From ff9adaf8c2451a36d6d9556717ca6af2a7e3f2ad Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Mar 29 2022 10:55:15 +0000 Subject: import qemu-kvm-6.2.0-8.module+el8.6.0+14324+050a5215 --- diff --git a/.gitignore b/.gitignore index 39356a4..f5dae2e 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/qemu-4.2.0.tar.xz +SOURCES/qemu-6.2.0.tar.xz diff --git a/.qemu-kvm.metadata b/.qemu-kvm.metadata index f479eb3..6f39e05 100644 --- a/.qemu-kvm.metadata +++ b/.qemu-kvm.metadata @@ -1 +1 @@ -b27aa828a8457bd8551ae3c81b80cc365e1f6bfe SOURCES/qemu-4.2.0.tar.xz +68cd61a466170115b88817e2d52db2cd7a92f43a SOURCES/qemu-6.2.0.tar.xz diff --git a/SOURCES/0001-redhat-Adding-slirp-to-the-exploded-tree.patch b/SOURCES/0001-redhat-Adding-slirp-to-the-exploded-tree.patch new file mode 100644 index 0000000..43fbac3 --- /dev/null +++ b/SOURCES/0001-redhat-Adding-slirp-to-the-exploded-tree.patch @@ -0,0 +1,17931 @@ +From 0a17d5f6abf800e88069738904e3fcd8427ab28a Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 5 Aug 2021 01:07:55 -0400 +Subject: redhat: Adding slirp to the exploded tree + +RH-Author: Danilo de Paula +Message-id: <20190907020756.8619-1-ddepaula@redhat.com> +Patchwork-id: 90309 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] redhat: Adding slirp to the exploded tree +Bugzilla: +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Wainer dos Santos Moschetta + +Until qemu-kvm-3.1 slirp used to live as a regular folder in qemu-kvm. +After that it got moved into its own submodule. Which means it's not +part of the qemu-kvm git tree anymore. + +This passed unoticed for RHEL-AV-8.0.1 and 8.1.0 because qemu still ships +the code in the tarball. That's why scratch builds still works (it's based in +the tarball content). + +As we're receiving some CVE's against slirp, we need a way to patch +slirp in RHEL-8.1.0 without handling as a separate package (as we do for +firmwares). + +The simplest solution is to copy the slirp folder from the tarball into the +exploded tree. + +To be able to do that, I had to make some changes: + +slirp needs to be removed from .gitmodules, otherwise git complains +about files on it. + +Since "make -C redhat rh-brew" uses the tarball and apply all the +patches on top of it, we need to remove the folder from the tarball before applying +the patch (because we are actually re-applying them). + +We also need to use --ignore-submodule while generating the patches for +scratch-build, otherwise it will include some weird definition of the +slirp folder in the patch, something that /usr/bin/patch gets mad with. + +After that I compared the patch list, after and before this change, and +saw no major differences. + +This is an exploded-tree-only change and shouldn't be applied to dist-git. + +Signed-off-by: Danilo C. L. de Paula + +Rebase notes (weekly-210217): + - Upstream slirp updated to 8f43a99191afb47ca3f3c6972f6306209f367ece + +Rebase notes (6.1.0-rc2): +- Upstream slirp updated to a88d9ace234a24ce1c17189642ef9104799425e0 + +Merged commits (weekly-210203): + - a3f5f082f Drop bogus IPv6 messagesa + +Merged commits (weekly-210714): +- ce9ddeef04 Add mtod_check() +- 0609398e76 bootp: limit vendor-specific area to input packet memory buffer +- 377f755273 bootp: check bootp_input buffer size +- 4101e41f0d upd6: check udp6_input buffer size +- 7a663c9667 tftp: check tftp_input buffer size +- 76f81fc22c tftp: introduce a header structure +- 6903e9ba25 udp: check upd_input buffer size +- 8aa4fe0b6d Fix "DHCP broken in libslirp v4.6.0" +--- + .gitmodules | 3 - + slirp/.clang-format | 58 ++ + slirp/.gitignore | 11 + + slirp/.gitlab-ci.yml | 43 + + slirp/.gitpublish | 3 + + slirp/CHANGELOG.md | 184 ++++ + slirp/COPYRIGHT | 62 ++ + slirp/README.md | 60 ++ + slirp/build-aux/git-version-gen | 158 ++++ + slirp/meson.build | 162 ++++ + slirp/meson_options.txt | 2 + + slirp/src/arp_table.c | 94 ++ + slirp/src/bootp.c | 375 ++++++++ + slirp/src/bootp.h | 129 +++ + slirp/src/cksum.c | 179 ++++ + slirp/src/debug.h | 59 ++ + slirp/src/dhcpv6.c | 224 +++++ + slirp/src/dhcpv6.h | 68 ++ + slirp/src/dnssearch.c | 306 ++++++ + slirp/src/if.c | 215 +++++ + slirp/src/if.h | 25 + + slirp/src/ip.h | 242 +++++ + slirp/src/ip6.h | 214 +++++ + slirp/src/ip6_icmp.c | 444 +++++++++ + slirp/src/ip6_icmp.h | 220 +++++ + slirp/src/ip6_input.c | 88 ++ + slirp/src/ip6_output.c | 45 + + slirp/src/ip_icmp.c | 524 +++++++++++ + slirp/src/ip_icmp.h | 168 ++++ + slirp/src/ip_input.c | 463 +++++++++ + slirp/src/ip_output.c | 171 ++++ + slirp/src/libslirp-version.h.in | 24 + + slirp/src/libslirp.h | 236 +++++ + slirp/src/libslirp.map | 36 + + slirp/src/main.h | 16 + + slirp/src/mbuf.c | 281 ++++++ + slirp/src/mbuf.h | 192 ++++ + slirp/src/misc.c | 440 +++++++++ + slirp/src/misc.h | 72 ++ + slirp/src/ncsi-pkt.h | 445 +++++++++ + slirp/src/ncsi.c | 197 ++++ + slirp/src/ndp_table.c | 98 ++ + slirp/src/sbuf.c | 168 ++++ + slirp/src/sbuf.h | 27 + + slirp/src/slirp.c | 1387 +++++++++++++++++++++++++++ + slirp/src/slirp.h | 289 ++++++ + slirp/src/socket.c | 1104 ++++++++++++++++++++++ + slirp/src/socket.h | 186 ++++ + slirp/src/state.c | 379 ++++++++ + slirp/src/stream.c | 120 +++ + slirp/src/stream.h | 35 + + slirp/src/tcp.h | 169 ++++ + slirp/src/tcp_input.c | 1552 +++++++++++++++++++++++++++++++ + slirp/src/tcp_output.c | 516 ++++++++++ + slirp/src/tcp_subr.c | 1011 ++++++++++++++++++++ + slirp/src/tcp_timer.c | 286 ++++++ + slirp/src/tcp_timer.h | 130 +++ + slirp/src/tcp_var.h | 161 ++++ + slirp/src/tcpip.h | 104 +++ + slirp/src/tftp.c | 470 ++++++++++ + slirp/src/tftp.h | 58 ++ + slirp/src/udp.c | 425 +++++++++ + slirp/src/udp.h | 96 ++ + slirp/src/udp6.c | 196 ++++ + slirp/src/util.c | 441 +++++++++ + slirp/src/util.h | 203 ++++ + slirp/src/version.c | 8 + + slirp/src/vmstate.c | 444 +++++++++ + slirp/src/vmstate.h | 391 ++++++++ + 69 files changed, 17389 insertions(+), 3 deletions(-) + create mode 100644 slirp/.clang-format + create mode 100644 slirp/.gitignore + create mode 100644 slirp/.gitlab-ci.yml + create mode 100644 slirp/.gitpublish + create mode 100644 slirp/CHANGELOG.md + create mode 100644 slirp/COPYRIGHT + create mode 100644 slirp/README.md + create mode 100755 slirp/build-aux/git-version-gen + create mode 100644 slirp/meson.build + create mode 100644 slirp/meson_options.txt + create mode 100644 slirp/src/arp_table.c + create mode 100644 slirp/src/bootp.c + create mode 100644 slirp/src/bootp.h + create mode 100644 slirp/src/cksum.c + create mode 100644 slirp/src/debug.h + create mode 100644 slirp/src/dhcpv6.c + create mode 100644 slirp/src/dhcpv6.h + create mode 100644 slirp/src/dnssearch.c + create mode 100644 slirp/src/if.c + create mode 100644 slirp/src/if.h + create mode 100644 slirp/src/ip.h + create mode 100644 slirp/src/ip6.h + create mode 100644 slirp/src/ip6_icmp.c + create mode 100644 slirp/src/ip6_icmp.h + create mode 100644 slirp/src/ip6_input.c + create mode 100644 slirp/src/ip6_output.c + create mode 100644 slirp/src/ip_icmp.c + create mode 100644 slirp/src/ip_icmp.h + create mode 100644 slirp/src/ip_input.c + create mode 100644 slirp/src/ip_output.c + create mode 100644 slirp/src/libslirp-version.h.in + create mode 100644 slirp/src/libslirp.h + create mode 100644 slirp/src/libslirp.map + create mode 100644 slirp/src/main.h + create mode 100644 slirp/src/mbuf.c + create mode 100644 slirp/src/mbuf.h + create mode 100644 slirp/src/misc.c + create mode 100644 slirp/src/misc.h + create mode 100644 slirp/src/ncsi-pkt.h + create mode 100644 slirp/src/ncsi.c + create mode 100644 slirp/src/ndp_table.c + create mode 100644 slirp/src/sbuf.c + create mode 100644 slirp/src/sbuf.h + create mode 100644 slirp/src/slirp.c + create mode 100644 slirp/src/slirp.h + create mode 100644 slirp/src/socket.c + create mode 100644 slirp/src/socket.h + create mode 100644 slirp/src/state.c + create mode 100644 slirp/src/stream.c + create mode 100644 slirp/src/stream.h + create mode 100644 slirp/src/tcp.h + create mode 100644 slirp/src/tcp_input.c + create mode 100644 slirp/src/tcp_output.c + create mode 100644 slirp/src/tcp_subr.c + create mode 100644 slirp/src/tcp_timer.c + create mode 100644 slirp/src/tcp_timer.h + create mode 100644 slirp/src/tcp_var.h + create mode 100644 slirp/src/tcpip.h + create mode 100644 slirp/src/tftp.c + create mode 100644 slirp/src/tftp.h + create mode 100644 slirp/src/udp.c + create mode 100644 slirp/src/udp.h + create mode 100644 slirp/src/udp6.c + create mode 100644 slirp/src/util.c + create mode 100644 slirp/src/util.h + create mode 100644 slirp/src/version.c + create mode 100644 slirp/src/vmstate.c + create mode 100644 slirp/src/vmstate.h + +diff --git a/slirp/.clang-format b/slirp/.clang-format +new file mode 100644 +index 0000000000..17fb49fe65 +--- /dev/null ++++ b/slirp/.clang-format +@@ -0,0 +1,58 @@ ++# https://clang.llvm.org/docs/ClangFormat.html ++# https://clang.llvm.org/docs/ClangFormatStyleOptions.html ++--- ++Language: Cpp ++AlignAfterOpenBracket: Align ++AlignConsecutiveAssignments: false # although we like it, it creates churn ++AlignConsecutiveDeclarations: false ++AlignEscapedNewlinesLeft: true ++AlignOperands: true ++AlignTrailingComments: false # churn ++AllowAllParametersOfDeclarationOnNextLine: true ++AllowShortBlocksOnASingleLine: false ++AllowShortCaseLabelsOnASingleLine: false ++AllowShortFunctionsOnASingleLine: None ++AllowShortIfStatementsOnASingleLine: false ++AllowShortLoopsOnASingleLine: false ++AlwaysBreakAfterReturnType: None # AlwaysBreakAfterDefinitionReturnType is taken into account ++AlwaysBreakBeforeMultilineStrings: false ++BinPackArguments: true ++BinPackParameters: true ++BraceWrapping: ++ AfterControlStatement: false ++ AfterEnum: false ++ AfterFunction: true ++ AfterStruct: false ++ AfterUnion: false ++ BeforeElse: false ++ IndentBraces: false ++BreakBeforeBinaryOperators: None ++BreakBeforeBraces: Custom ++BreakBeforeTernaryOperators: false ++BreakStringLiterals: true ++ColumnLimit: 80 ++ContinuationIndentWidth: 4 ++Cpp11BracedListStyle: false ++DerivePointerAlignment: false ++DisableFormat: false ++IndentCaseLabels: false ++IndentWidth: 4 ++IndentWrappedFunctionNames: false ++KeepEmptyLinesAtTheStartOfBlocks: false ++MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ? ++MacroBlockEnd: '.*_END$' ++MaxEmptyLinesToKeep: 2 ++PointerAlignment: Right ++ReflowComments: true ++SortIncludes: false ++SpaceAfterCStyleCast: false ++SpaceBeforeAssignmentOperators: true ++SpaceBeforeParens: ControlStatements ++SpaceInEmptyParentheses: false ++SpacesBeforeTrailingComments: 1 ++SpacesInContainerLiterals: true ++SpacesInParentheses: false ++SpacesInSquareBrackets: false ++Standard: Auto ++UseTab: Never ++... +diff --git a/slirp/CHANGELOG.md b/slirp/CHANGELOG.md +new file mode 100644 +index 0000000000..bd4845ca29 +--- /dev/null ++++ b/slirp/CHANGELOG.md +@@ -0,0 +1,184 @@ ++# Changelog ++ ++All notable changes to this project will be documented in this file. ++ ++The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ++and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ++ ++## [4.6.1] - 2021-06-18 ++ ++### Fixed ++ ++ - Fix DHCP regression introduced in 4.6.0. !95 ++ ++## [4.6.0] - 2021-06-14 ++ ++### Added ++ ++ - mbuf: Add debugging helpers for allocation. !90 ++ ++### Changed ++ ++ - Revert "Set macOS deployment target to macOS 10.4". !93 ++ ++### Fixed ++ ++ - mtod()-related buffer overflows (CVE-2021-3592 #44, CVE-2021-3593 #45, ++ CVE-2021-3594 #47, CVE-2021-3595 #46). ++ - poll_fd: add missing fd registration for UDP and ICMP ++ - ncsi: make ncsi_calculate_checksum work with unaligned data. !89 ++ - Various typos and doc fixes. !88 ++ ++## [4.5.0] - 2021-05-18 ++ ++### Added ++ ++ - IPv6 forwarding. !62 !75 !77 ++ - slirp_neighbor_info() to dump the ARP/NDP tables. !71 ++ ++### Changed ++ ++ - Lazy guest address resolution for IPv6. !81 ++ - Improve signal handling when spawning a child. !61 ++ - Set macOS deployment target to macOS 10.4. !72 ++ - slirp_add_hostfwd: Ensure all error paths set errno. !80 ++ - More API documentation. ++ ++### Fixed ++ ++ - Assertion failure on unspecified IPv6 address. !86 ++ - Disable polling for PRI on MacOS, fixing some closing streams issues. !73 ++ - Various memory leak fixes on fastq/batchq. !68 ++ - Memory leak on IPv6 fast-send. !67 ++ - Slow socket response on Windows. !64 ++ - Misc build and code cleanups. !60 !63 !76 !79 !84 ++ ++## [4.4.0] - 2020-12-02 ++ ++### Added ++ ++ - udp, udp6, icmp: handle TTL value. !48 ++ - Enable forwarding ICMP errors. !49 ++ - Add DNS resolving for iOS. !54 ++ ++### Changed ++ ++ - Improve meson subproject() support. !53 ++ - Removed Makefile-based build system. !56 ++ ++### Fixed ++ ++ - socket: consume empty packets. !55 ++ - check pkt_len before reading protocol header (CVE-2020-29129). !57 ++ - ip_stripoptions use memmove (fixes undefined behaviour). !47 ++ - various Coverity-related changes/fixes. ++ ++## [4.3.1] - 2020-07-08 ++ ++### Changed ++ ++ - A silent truncation could occur in `slirp_fmt()`, which will now print a ++ critical message. See also #22. ++ ++### Fixed ++ ++ - CVE-2020-10756 - Drop bogus IPv6 messages that could lead to data leakage. ++ See !44 and !42. ++ - Fix win32 builds by using the SLIRP_PACKED definition. ++ - Various coverity scan errors fixed. !41 ++ - Fix new GCC warnings. !43 ++ ++## [4.3.0] - 2020-04-22 ++ ++### Added ++ ++ - `SLIRP_VERSION_STRING` macro, with the git sha suffix when building from git ++ - `SlirpConfig.disable_dns`, to disable DNS redirection #16 ++ ++### Changed ++ ++ - `slirp_version_string()` now has the git sha suffix when building form git ++ - Limit DNS redirection to port 53 #16 ++ ++### Fixed ++ ++ - Fix build regression with mingw & NetBSD ++ - Fix use-afte-free in `ip_reass()` (CVE-2020-1983) ++ ++## [4.2.0] - 2020-03-17 ++ ++### Added ++ ++ - New API function `slirp_add_unix`: add a forward rule to a Unix socket. ++ - New API function `slirp_remove_guestfwd`: remove a forward rule previously ++ added by `slirp_add_exec`, `slirp_add_unix` or `slirp_add_guestfwd` ++ - New `SlirpConfig.outbound_addr{,6}` fields to bind output socket to a ++ specific address ++ ++### Changed ++ ++ - socket: do not fallback on host loopback if `get_dns_addr()` failed ++ or the address is in slirp network ++ ++### Fixed ++ ++ - ncsi: fix checksum OOB memory access ++ - `tcp_emu()`: fix OOB accesses ++ - tftp: restrict relative path access ++ - state: fix loading of guestfwd state ++ ++## [4.1.0] - 2019-12-02 ++ ++### Added ++ ++ - The `slirp_new()` API, simpler and more extensible than `slirp_init()`. ++ - Allow custom MTU configuration. ++ - Option to disable host loopback connections. ++ - CI now runs scan-build too. ++ ++### Changed ++ ++ - Disable `tcp_emu()` by default. `tcp_emu()` is known to have caused ++ several CVEs, and not useful today in most cases. The feature can ++ be still enabled by setting `SlirpConfig.enable_emu` to true. ++ - meson build system is now `subproject()` friendly. ++ - Replace remaining `malloc()`/`free()` with glib (which aborts on OOM) ++ - Various code cleanups. ++ ++### Deprecated ++ ++ - The `slirp_init()` API. ++ ++### Fixed ++ ++ - `getpeername()` error after `shutdown(SHUT_WR)`. ++ - Exec forward: correctly parse command lines that contain spaces. ++ - Allow 0.0.0.0 destination address. ++ - Make host receive broadcast packets. ++ - Various memory related fixes (heap overflow, leaks, NULL ++ dereference). ++ - Compilation warnings, dead code. ++ ++## [4.0.0] - 2019-05-24 ++ ++### Added ++ ++ - Installable as a shared library. ++ - meson build system ++ (& make build system for in-tree QEMU integration) ++ ++### Changed ++ ++ - Standalone project, removing any QEMU dependency. ++ - License clarifications. ++ ++[Unreleased]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.6.1...master ++[4.6.1]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.6.0...v4.6.1 ++[4.6.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.5.0...v4.6.0 ++[4.5.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.4.0...v4.5.0 ++[4.4.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.3.1...v4.4.0 ++[4.3.1]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.3.0...v4.3.1 ++[4.3.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.2.0...v4.3.0 ++[4.2.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.1.0...v4.2.0 ++[4.1.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.0.0...v4.1.0 ++[4.0.0]: https://gitlab.freedesktop.org/slirp/libslirp/commits/v4.0.0 +diff --git a/slirp/COPYRIGHT b/slirp/COPYRIGHT +new file mode 100644 +index 0000000000..ed49512dbc +--- /dev/null ++++ b/slirp/COPYRIGHT +@@ -0,0 +1,62 @@ ++Slirp was written by Danny Gasparovski. ++Copyright (c), 1995,1996 All Rights Reserved. ++ ++Slirp is free software; "free" as in you don't have to pay for it, and you ++are free to do whatever you want with it. I do not accept any donations, ++monetary or otherwise, for Slirp. Instead, I would ask you to pass this ++potential donation to your favorite charity. In fact, I encourage ++*everyone* who finds Slirp useful to make a small donation to their ++favorite charity (for example, GreenPeace). This is not a requirement, but ++a suggestion from someone who highly values the service they provide. ++ ++The copyright terms and conditions: ++ ++---BEGIN--- ++ ++ Copyright (c) 1995,1996 Danny Gasparovski. All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ 1. Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ 2. Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ 3. Neither the name of the copyright holder nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, ++ INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY ++ AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ++ DANNY GASPAROVSKI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, ++ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ++ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF ++ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++---END--- ++ ++This basically means you can do anything you want with the software, except ++1) call it your own, and 2) claim warranty on it. There is no warranty for ++this software. None. Nada. If you lose a million dollars while using ++Slirp, that's your loss not mine. So, ***USE AT YOUR OWN RISK!***. ++ ++If these conditions cannot be met due to legal restrictions (E.g. where it ++is against the law to give out Software without warranty), you must cease ++using the software and delete all copies you have. ++ ++Slirp uses code that is copyrighted by the following people/organizations: ++ ++Juha Pirkola. ++Gregory M. Christy. ++The Regents of the University of California. ++Carnegie Mellon University. ++The Australian National University. ++RSA Data Security, Inc. ++ ++Please read the top of each source file for the details on the various ++copyrights. +diff --git a/slirp/README.md b/slirp/README.md +new file mode 100644 +index 0000000000..9f9c1b14f6 +--- /dev/null ++++ b/slirp/README.md +@@ -0,0 +1,60 @@ ++# libslirp ++ ++libslirp is a user-mode networking library used by virtual machines, ++containers or various tools. ++ ++## Getting Started ++ ++### Prerequisites ++ ++A C compiler, meson and glib2 development libraries. ++ ++(see also [.gitlab-ci.yml](.gitlab-ci.yml) DEPS variable for the list ++of dependencies on Fedora) ++ ++### Building ++ ++You may build and install the shared library with meson: ++ ++``` sh ++meson build ++ninja -C build install ++``` ++And configure QEMU with --enable-slirp=system to link against it. ++ ++(QEMU may build with the submodule static library using --enable-slirp=git) ++ ++### Testing ++ ++Unfortunately, there are no automated tests available. ++ ++You may run QEMU ``-net user`` linked with your development version. ++ ++## Contributing ++ ++Feel free to open issues on the [project ++issues](https://gitlab.freedesktop.org/slirp/libslirp/issues) page. ++ ++You may clone the [gitlab ++project](https://gitlab.freedesktop.org/slirp/libslirp) and create a ++merge request. ++ ++Contributing with gitlab allows gitlab workflow, tracking issues, ++running CI etc. ++ ++Alternatively, you may send patches to slirp@lists.freedesktop.org ++mailing list. ++ ++## Versioning ++ ++We intend to use [libtool's ++versioning](https://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html) ++for the shared libraries and use [SemVer](http://semver.org/) for ++project versions. ++ ++For the versions available, see the [tags on this ++repository](https://gitlab.freedesktop.org/slirp/libslirp/releases). ++ ++## License ++ ++See the [COPYRIGHT](COPYRIGHT) file for details. +diff --git a/slirp/build-aux/git-version-gen b/slirp/build-aux/git-version-gen +new file mode 100755 +index 0000000000..5617eb8d4e +--- /dev/null ++++ b/slirp/build-aux/git-version-gen +@@ -0,0 +1,158 @@ ++#!/bin/sh ++# Print a version string. ++scriptversion=2010-06-14.19; # UTC ++ ++# Copyright (C) 2007-2010 Free Software Foundation, Inc. ++# ++# This program is free software: you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++# This script is derived from GIT-VERSION-GEN from GIT: http://git.or.cz/. ++# It may be run two ways: ++# - from a git repository in which the "git describe" command below ++# produces useful output (thus requiring at least one signed tag) ++# - from a non-git-repo directory containing a .tarball-version file, which ++# presumes this script is invoked like "./git-version-gen .tarball-version". ++ ++# In order to use intra-version strings in your project, you will need two ++# separate generated version string files: ++# ++# .tarball-version - present only in a distribution tarball, and not in ++# a checked-out repository. Created with contents that were learned at ++# the last time autoconf was run, and used by git-version-gen. Must not ++# be present in either $(srcdir) or $(builddir) for git-version-gen to ++# give accurate answers during normal development with a checked out tree, ++# but must be present in a tarball when there is no version control system. ++# Therefore, it cannot be used in any dependencies. GNUmakefile has ++# hooks to force a reconfigure at distribution time to get the value ++# correct, without penalizing normal development with extra reconfigures. ++# ++# .version - present in a checked-out repository and in a distribution ++# tarball. Usable in dependencies, particularly for files that don't ++# want to depend on config.h but do want to track version changes. ++# Delete this file prior to any autoconf run where you want to rebuild ++# files to pick up a version string change; and leave it stale to ++# minimize rebuild time after unrelated changes to configure sources. ++# ++# It is probably wise to add these two files to .gitignore, so that you ++# don't accidentally commit either generated file. ++# ++# Use the following line in your configure.ac, so that $(VERSION) will ++# automatically be up-to-date each time configure is run (and note that ++# since configure.ac no longer includes a version string, Makefile rules ++# should not depend on configure.ac for version updates). ++# ++# AC_INIT([GNU project], ++# m4_esyscmd([build-aux/git-version-gen .tarball-version]), ++# [bug-project@example]) ++# ++# Then use the following lines in your Makefile.am, so that .version ++# will be present for dependencies, and so that .tarball-version will ++# exist in distribution tarballs. ++# ++# BUILT_SOURCES = $(top_srcdir)/.version ++# $(top_srcdir)/.version: ++# echo $(VERSION) > $@-t && mv $@-t $@ ++# dist-hook: ++# echo $(VERSION) > $(distdir)/.tarball-version ++ ++case $# in ++ 1|2) ;; ++ *) echo 1>&2 "Usage: $0 \$srcdir/.tarball-version" \ ++ '[TAG-NORMALIZATION-SED-SCRIPT]' ++ exit 1;; ++esac ++ ++tarball_version_file=$1 ++tag_sed_script="${2:-s/x/x/}" ++nl=' ++' ++ ++# Avoid meddling by environment variable of the same name. ++v= ++ ++# First see if there is a tarball-only version file. ++# then try "git describe", then default. ++if test -f $tarball_version_file ++then ++ v=`cat $tarball_version_file` || exit 1 ++ case $v in ++ *$nl*) v= ;; # reject multi-line output ++ [0-9]*) ;; ++ *) v= ;; ++ esac ++ test -z "$v" \ ++ && echo "$0: WARNING: $tarball_version_file seems to be damaged" 1>&2 ++fi ++ ++if test -n "$v" ++then ++ : # use $v ++elif test -d .git \ ++ && v=`git describe --abbrev=4 --match='v*' HEAD 2>/dev/null \ ++ || git describe --abbrev=4 HEAD 2>/dev/null` \ ++ && v=`printf '%s\n' "$v" | sed "$tag_sed_script"` \ ++ && case $v in ++ v[0-9]*) ;; ++ *) (exit 1) ;; ++ esac ++then ++ # Is this a new git that lists number of commits since the last ++ # tag or the previous older version that did not? ++ # Newer: v6.10-77-g0f8faeb ++ # Older: v6.10-g0f8faeb ++ case $v in ++ *-*-*) : git describe is okay three part flavor ;; ++ *-*) ++ : git describe is older two part flavor ++ # Recreate the number of commits and rewrite such that the ++ # result is the same as if we were using the newer version ++ # of git describe. ++ vtag=`echo "$v" | sed 's/-.*//'` ++ numcommits=`git rev-list "$vtag"..HEAD | wc -l` ++ v=`echo "$v" | sed "s/\(.*\)-\(.*\)/\1-$numcommits-\2/"`; ++ ;; ++ esac ++ ++ # Change the first '-' to a '.', so version-comparing tools work properly. ++ # Remove the "g" in git describe's output string, to save a byte. ++ v=`echo "$v" | sed 's/-/./;s/\(.*\)-g/\1-/'`; ++else ++ v=UNKNOWN ++fi ++ ++v=`echo "$v" |sed 's/^v//'` ++ ++# Don't declare a version "dirty" merely because a time stamp has changed. ++git update-index --refresh > /dev/null 2>&1 ++ ++dirty=`sh -c 'git diff-index --name-only HEAD' 2>/dev/null` || dirty= ++case "$dirty" in ++ '') ;; ++ *) # Append the suffix only if there isn't one already. ++ case $v in ++ *-dirty) ;; ++ *) v="$v-dirty" ;; ++ esac ;; ++esac ++ ++# Omit the trailing newline, so that m4_esyscmd can use the result directly. ++echo "$v" | tr -d "$nl" ++ ++# Local variables: ++# eval: (add-hook 'write-file-hooks 'time-stamp) ++# time-stamp-start: "scriptversion=" ++# time-stamp-format: "%:y-%02m-%02d.%02H" ++# time-stamp-time-zone: "UTC" ++# time-stamp-end: "; # UTC" ++# End: +diff --git a/slirp/meson.build b/slirp/meson.build +new file mode 100644 +index 0000000000..cb1396ad59 +--- /dev/null ++++ b/slirp/meson.build +@@ -0,0 +1,162 @@ ++project('libslirp', 'c', ++ version : '4.6.1', ++ license : 'BSD-3-Clause', ++ default_options : ['warning_level=1', 'c_std=gnu99'], ++ meson_version : '>= 0.50', ++) ++ ++version = meson.project_version() ++varr = version.split('.') ++major_version = varr[0] ++minor_version = varr[1] ++micro_version = varr[2] ++ ++conf = configuration_data() ++conf.set('SLIRP_MAJOR_VERSION', major_version) ++conf.set('SLIRP_MINOR_VERSION', minor_version) ++conf.set('SLIRP_MICRO_VERSION', micro_version) ++ ++full_version = run_command('build-aux/git-version-gen', ++ '@0@/.tarball-version'.format(meson.current_source_dir()), ++ check : true).stdout().strip() ++if full_version.startswith('UNKNOWN') ++ full_version = meson.project_version() ++elif not full_version.startswith(meson.project_version()) ++ error('meson.build project version @0@ does not match git-describe output @1@' ++ .format(meson.project_version(), full_version)) ++endif ++conf.set_quoted('SLIRP_VERSION_STRING', full_version + get_option('version_suffix')) ++ ++# libtool versioning - this applies to libslirp ++# ++# See http://sources.redhat.com/autobook/autobook/autobook_91.html#SEC91 for details ++# ++# - If interfaces have been changed or added, but binary compatibility ++# has been preserved, change: ++# CURRENT += 1 ++# REVISION = 0 ++# AGE += 1 ++# - If binary compatibility has been broken (eg removed or changed ++# interfaces), change: ++# CURRENT += 1 ++# REVISION = 0 ++# AGE = 0 ++# - If the interface is the same as the previous version, but bugs are ++# fixed, change: ++# REVISION += 1 ++lt_current = 3 ++lt_revision = 1 ++lt_age = 3 ++lt_version = '@0@.@1@.@2@'.format(lt_current - lt_age, lt_age, lt_revision) ++ ++host_system = host_machine.system() ++ ++glib_dep = dependency('glib-2.0') ++ ++cc = meson.get_compiler('c') ++ ++platform_deps = [] ++ ++if host_system == 'windows' ++ platform_deps += [ ++ cc.find_library('ws2_32'), ++ cc.find_library('iphlpapi') ++ ] ++elif host_system == 'darwin' ++ platform_deps += [ ++ cc.find_library('resolv') ++ ] ++endif ++ ++cargs = [ ++ '-DG_LOG_DOMAIN="Slirp"', ++] ++ ++if cc.check_header('valgrind/valgrind.h') ++ cargs += [ '-DHAVE_VALGRIND=1' ] ++endif ++ ++sources = [ ++ 'src/arp_table.c', ++ 'src/bootp.c', ++ 'src/cksum.c', ++ 'src/dhcpv6.c', ++ 'src/dnssearch.c', ++ 'src/if.c', ++ 'src/ip6_icmp.c', ++ 'src/ip6_input.c', ++ 'src/ip6_output.c', ++ 'src/ip_icmp.c', ++ 'src/ip_input.c', ++ 'src/ip_output.c', ++ 'src/mbuf.c', ++ 'src/misc.c', ++ 'src/ncsi.c', ++ 'src/ndp_table.c', ++ 'src/sbuf.c', ++ 'src/slirp.c', ++ 'src/socket.c', ++ 'src/state.c', ++ 'src/stream.c', ++ 'src/tcp_input.c', ++ 'src/tcp_output.c', ++ 'src/tcp_subr.c', ++ 'src/tcp_timer.c', ++ 'src/tftp.c', ++ 'src/udp.c', ++ 'src/udp6.c', ++ 'src/util.c', ++ 'src/version.c', ++ 'src/vmstate.c', ++] ++ ++mapfile = 'src/libslirp.map' ++vflag = [] ++vflag_test = '-Wl,--version-script,@0@/@1@'.format(meson.current_source_dir(), mapfile) ++if cc.has_link_argument(vflag_test) ++ vflag += vflag_test ++endif ++ ++install_devel = not meson.is_subproject() ++ ++configure_file( ++ input : 'src/libslirp-version.h.in', ++ output : 'libslirp-version.h', ++ install : install_devel, ++ install_dir : join_paths(get_option('includedir'), 'slirp'), ++ configuration : conf ++) ++ ++lib = library('slirp', sources, ++ version : lt_version, ++ c_args : cargs, ++ link_args : vflag, ++ link_depends : mapfile, ++ dependencies : [glib_dep, platform_deps], ++ install : install_devel or get_option('default_library') == 'shared', ++) ++ ++if install_devel ++ install_headers(['src/libslirp.h'], subdir : 'slirp') ++ ++ pkg = import('pkgconfig') ++ ++ pkg.generate( ++ version : version, ++ libraries : lib, ++ requires : [ ++ 'glib-2.0', ++ ], ++ name : 'slirp', ++ description : 'User-space network stack', ++ filebase : 'slirp', ++ subdirs : 'slirp', ++ ) ++else ++ if get_option('default_library') == 'both' ++ lib = lib.get_static_lib() ++ endif ++ libslirp_dep = declare_dependency( ++ include_directories: include_directories('.', 'src'), ++ link_with: lib) ++endif +diff --git a/slirp/meson_options.txt b/slirp/meson_options.txt +new file mode 100644 +index 0000000000..27e7c8059c +--- /dev/null ++++ b/slirp/meson_options.txt +@@ -0,0 +1,2 @@ ++option('version_suffix', type: 'string', value: '', ++ description: 'Suffix to append to SLIRP_VERSION_STRING') +diff --git a/slirp/src/arp_table.c b/slirp/src/arp_table.c +new file mode 100644 +index 0000000000..ba8c8a4eee +--- /dev/null ++++ b/slirp/src/arp_table.c +@@ -0,0 +1,94 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * ARP table ++ * ++ * Copyright (c) 2011 AdaCore ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "slirp.h" ++ ++#include ++ ++void arp_table_add(Slirp *slirp, uint32_t ip_addr, ++ const uint8_t ethaddr[ETH_ALEN]) ++{ ++ const uint32_t broadcast_addr = ++ ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; ++ ArpTable *arptbl = &slirp->arp_table; ++ int i; ++ char ethaddr_str[ETH_ADDRSTRLEN]; ++ ++ DEBUG_CALL("arp_table_add"); ++ DEBUG_ARG("ip = %s", inet_ntoa((struct in_addr){ .s_addr = ip_addr })); ++ DEBUG_ARG("hw addr = %s", slirp_ether_ntoa(ethaddr, ethaddr_str, ++ sizeof(ethaddr_str))); ++ ++ if (ip_addr == 0 || ip_addr == 0xffffffff || ip_addr == broadcast_addr) { ++ /* Do not register broadcast addresses */ ++ return; ++ } ++ ++ /* Search for an entry */ ++ for (i = 0; i < ARP_TABLE_SIZE; i++) { ++ if (arptbl->table[i].ar_sip == ip_addr) { ++ /* Update the entry */ ++ memcpy(arptbl->table[i].ar_sha, ethaddr, ETH_ALEN); ++ return; ++ } ++ } ++ ++ /* No entry found, create a new one */ ++ arptbl->table[arptbl->next_victim].ar_sip = ip_addr; ++ memcpy(arptbl->table[arptbl->next_victim].ar_sha, ethaddr, ETH_ALEN); ++ arptbl->next_victim = (arptbl->next_victim + 1) % ARP_TABLE_SIZE; ++} ++ ++bool arp_table_search(Slirp *slirp, uint32_t ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]) ++{ ++ const uint32_t broadcast_addr = ++ ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; ++ ArpTable *arptbl = &slirp->arp_table; ++ int i; ++ char ethaddr_str[ETH_ADDRSTRLEN]; ++ ++ DEBUG_CALL("arp_table_search"); ++ DEBUG_ARG("ip = %s", inet_ntoa((struct in_addr){ .s_addr = ip_addr })); ++ ++ /* If broadcast address */ ++ if (ip_addr == 0 || ip_addr == 0xffffffff || ip_addr == broadcast_addr) { ++ /* return Ethernet broadcast address */ ++ memset(out_ethaddr, 0xff, ETH_ALEN); ++ return 1; ++ } ++ ++ for (i = 0; i < ARP_TABLE_SIZE; i++) { ++ if (arptbl->table[i].ar_sip == ip_addr) { ++ memcpy(out_ethaddr, arptbl->table[i].ar_sha, ETH_ALEN); ++ DEBUG_ARG("found hw addr = %s", ++ slirp_ether_ntoa(out_ethaddr, ethaddr_str, ++ sizeof(ethaddr_str))); ++ return 1; ++ } ++ } ++ ++ return 0; ++} +diff --git a/slirp/src/bootp.c b/slirp/src/bootp.c +new file mode 100644 +index 0000000000..d78d61b44c +--- /dev/null ++++ b/slirp/src/bootp.c +@@ -0,0 +1,375 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * QEMU BOOTP/DHCP server ++ * ++ * Copyright (c) 2004 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "slirp.h" ++ ++#if defined(_WIN32) ++/* Windows ntohl() returns an u_long value. ++ * Add a type cast to match the format strings. */ ++#define ntohl(n) ((uint32_t)ntohl(n)) ++#endif ++ ++/* XXX: only DHCP is supported */ ++ ++#define LEASE_TIME (24 * 3600) ++ ++static const uint8_t rfc1533_cookie[] = { RFC1533_COOKIE }; ++ ++#define DPRINTF(fmt, ...) DEBUG_CALL(fmt, ##__VA_ARGS__) ++ ++static BOOTPClient *get_new_addr(Slirp *slirp, struct in_addr *paddr, ++ const uint8_t *macaddr) ++{ ++ BOOTPClient *bc; ++ int i; ++ ++ for (i = 0; i < NB_BOOTP_CLIENTS; i++) { ++ bc = &slirp->bootp_clients[i]; ++ if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) ++ goto found; ++ } ++ return NULL; ++found: ++ bc = &slirp->bootp_clients[i]; ++ bc->allocated = 1; ++ paddr->s_addr = slirp->vdhcp_startaddr.s_addr + htonl(i); ++ return bc; ++} ++ ++static BOOTPClient *request_addr(Slirp *slirp, const struct in_addr *paddr, ++ const uint8_t *macaddr) ++{ ++ uint32_t req_addr = ntohl(paddr->s_addr); ++ uint32_t dhcp_addr = ntohl(slirp->vdhcp_startaddr.s_addr); ++ BOOTPClient *bc; ++ ++ if (req_addr >= dhcp_addr && req_addr < (dhcp_addr + NB_BOOTP_CLIENTS)) { ++ bc = &slirp->bootp_clients[req_addr - dhcp_addr]; ++ if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) { ++ bc->allocated = 1; ++ return bc; ++ } ++ } ++ return NULL; ++} ++ ++static BOOTPClient *find_addr(Slirp *slirp, struct in_addr *paddr, ++ const uint8_t *macaddr) ++{ ++ BOOTPClient *bc; ++ int i; ++ ++ for (i = 0; i < NB_BOOTP_CLIENTS; i++) { ++ if (!memcmp(macaddr, slirp->bootp_clients[i].macaddr, 6)) ++ goto found; ++ } ++ return NULL; ++found: ++ bc = &slirp->bootp_clients[i]; ++ bc->allocated = 1; ++ paddr->s_addr = slirp->vdhcp_startaddr.s_addr + htonl(i); ++ return bc; ++} ++ ++static void dhcp_decode(const struct bootp_t *bp, ++ const uint8_t *bp_end, ++ int *pmsg_type, ++ struct in_addr *preq_addr) ++{ ++ const uint8_t *p; ++ int len, tag; ++ ++ *pmsg_type = 0; ++ preq_addr->s_addr = htonl(0L); ++ ++ p = bp->bp_vend; ++ if (memcmp(p, rfc1533_cookie, 4) != 0) ++ return; ++ p += 4; ++ while (p < bp_end) { ++ tag = p[0]; ++ if (tag == RFC1533_PAD) { ++ p++; ++ } else if (tag == RFC1533_END) { ++ break; ++ } else { ++ p++; ++ if (p >= bp_end) ++ break; ++ len = *p++; ++ if (p + len > bp_end) { ++ break; ++ } ++ DPRINTF("dhcp: tag=%d len=%d\n", tag, len); ++ ++ switch (tag) { ++ case RFC2132_MSG_TYPE: ++ if (len >= 1) ++ *pmsg_type = p[0]; ++ break; ++ case RFC2132_REQ_ADDR: ++ if (len >= 4) { ++ memcpy(&(preq_addr->s_addr), p, 4); ++ } ++ break; ++ default: ++ break; ++ } ++ p += len; ++ } ++ } ++ if (*pmsg_type == DHCPREQUEST && preq_addr->s_addr == htonl(0L) && ++ bp->bp_ciaddr.s_addr) { ++ memcpy(&(preq_addr->s_addr), &bp->bp_ciaddr, 4); ++ } ++} ++ ++static void bootp_reply(Slirp *slirp, ++ const struct bootp_t *bp, ++ const uint8_t *bp_end) ++{ ++ BOOTPClient *bc = NULL; ++ struct mbuf *m; ++ struct bootp_t *rbp; ++ struct sockaddr_in saddr, daddr; ++ struct in_addr preq_addr; ++ int dhcp_msg_type, val; ++ uint8_t *q; ++ uint8_t *end; ++ uint8_t client_ethaddr[ETH_ALEN]; ++ ++ /* extract exact DHCP msg type */ ++ dhcp_decode(bp, bp_end, &dhcp_msg_type, &preq_addr); ++ DPRINTF("bootp packet op=%d msgtype=%d", bp->bp_op, dhcp_msg_type); ++ if (preq_addr.s_addr != htonl(0L)) ++ DPRINTF(" req_addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); ++ else { ++ DPRINTF("\n"); ++ } ++ ++ if (dhcp_msg_type == 0) ++ dhcp_msg_type = DHCPREQUEST; /* Force reply for old BOOTP clients */ ++ ++ if (dhcp_msg_type != DHCPDISCOVER && dhcp_msg_type != DHCPREQUEST) ++ return; ++ ++ /* Get client's hardware address from bootp request */ ++ memcpy(client_ethaddr, bp->bp_hwaddr, ETH_ALEN); ++ ++ m = m_get(slirp); ++ if (!m) { ++ return; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ m_inc(m, sizeof(struct bootp_t) + DHCP_OPT_LEN); ++ rbp = (struct bootp_t *)m->m_data; ++ m->m_data += sizeof(struct udpiphdr); ++ memset(rbp, 0, sizeof(struct bootp_t) + DHCP_OPT_LEN); ++ ++ if (dhcp_msg_type == DHCPDISCOVER) { ++ if (preq_addr.s_addr != htonl(0L)) { ++ bc = request_addr(slirp, &preq_addr, client_ethaddr); ++ if (bc) { ++ daddr.sin_addr = preq_addr; ++ } ++ } ++ if (!bc) { ++ new_addr: ++ bc = get_new_addr(slirp, &daddr.sin_addr, client_ethaddr); ++ if (!bc) { ++ DPRINTF("no address left\n"); ++ return; ++ } ++ } ++ memcpy(bc->macaddr, client_ethaddr, ETH_ALEN); ++ } else if (preq_addr.s_addr != htonl(0L)) { ++ bc = request_addr(slirp, &preq_addr, client_ethaddr); ++ if (bc) { ++ daddr.sin_addr = preq_addr; ++ memcpy(bc->macaddr, client_ethaddr, ETH_ALEN); ++ } else { ++ /* DHCPNAKs should be sent to broadcast */ ++ daddr.sin_addr.s_addr = 0xffffffff; ++ } ++ } else { ++ bc = find_addr(slirp, &daddr.sin_addr, bp->bp_hwaddr); ++ if (!bc) { ++ /* if never assigned, behaves as if it was already ++ assigned (windows fix because it remembers its address) */ ++ goto new_addr; ++ } ++ } ++ ++ /* Update ARP table for this IP address */ ++ arp_table_add(slirp, daddr.sin_addr.s_addr, client_ethaddr); ++ ++ saddr.sin_addr = slirp->vhost_addr; ++ saddr.sin_port = htons(BOOTP_SERVER); ++ ++ daddr.sin_port = htons(BOOTP_CLIENT); ++ ++ rbp->bp_op = BOOTP_REPLY; ++ rbp->bp_xid = bp->bp_xid; ++ rbp->bp_htype = 1; ++ rbp->bp_hlen = 6; ++ memcpy(rbp->bp_hwaddr, bp->bp_hwaddr, ETH_ALEN); ++ ++ rbp->bp_yiaddr = daddr.sin_addr; /* Client IP address */ ++ rbp->bp_siaddr = saddr.sin_addr; /* Server IP address */ ++ ++ q = rbp->bp_vend; ++ end = rbp->bp_vend + DHCP_OPT_LEN; ++ memcpy(q, rfc1533_cookie, 4); ++ q += 4; ++ ++ if (bc) { ++ DPRINTF("%s addr=%08" PRIx32 "\n", ++ (dhcp_msg_type == DHCPDISCOVER) ? "offered" : "ack'ed", ++ ntohl(daddr.sin_addr.s_addr)); ++ ++ if (dhcp_msg_type == DHCPDISCOVER) { ++ *q++ = RFC2132_MSG_TYPE; ++ *q++ = 1; ++ *q++ = DHCPOFFER; ++ } else /* DHCPREQUEST */ { ++ *q++ = RFC2132_MSG_TYPE; ++ *q++ = 1; ++ *q++ = DHCPACK; ++ } ++ ++ if (slirp->bootp_filename) { ++ g_assert(strlen(slirp->bootp_filename) < sizeof(rbp->bp_file)); ++ strcpy(rbp->bp_file, slirp->bootp_filename); ++ } ++ ++ *q++ = RFC2132_SRV_ID; ++ *q++ = 4; ++ memcpy(q, &saddr.sin_addr, 4); ++ q += 4; ++ ++ *q++ = RFC1533_NETMASK; ++ *q++ = 4; ++ memcpy(q, &slirp->vnetwork_mask, 4); ++ q += 4; ++ ++ if (!slirp->restricted) { ++ *q++ = RFC1533_GATEWAY; ++ *q++ = 4; ++ memcpy(q, &saddr.sin_addr, 4); ++ q += 4; ++ ++ *q++ = RFC1533_DNS; ++ *q++ = 4; ++ memcpy(q, &slirp->vnameserver_addr, 4); ++ q += 4; ++ } ++ ++ *q++ = RFC2132_LEASE_TIME; ++ *q++ = 4; ++ val = htonl(LEASE_TIME); ++ memcpy(q, &val, 4); ++ q += 4; ++ ++ if (*slirp->client_hostname) { ++ val = strlen(slirp->client_hostname); ++ if (q + val + 2 >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting host name option."); ++ } else { ++ *q++ = RFC1533_HOSTNAME; ++ *q++ = val; ++ memcpy(q, slirp->client_hostname, val); ++ q += val; ++ } ++ } ++ ++ if (slirp->vdomainname) { ++ val = strlen(slirp->vdomainname); ++ if (q + val + 2 >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting domain name option."); ++ } else { ++ *q++ = RFC1533_DOMAINNAME; ++ *q++ = val; ++ memcpy(q, slirp->vdomainname, val); ++ q += val; ++ } ++ } ++ ++ if (slirp->tftp_server_name) { ++ val = strlen(slirp->tftp_server_name); ++ if (q + val + 2 >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting tftp-server-name option."); ++ } else { ++ *q++ = RFC2132_TFTP_SERVER_NAME; ++ *q++ = val; ++ memcpy(q, slirp->tftp_server_name, val); ++ q += val; ++ } ++ } ++ ++ if (slirp->vdnssearch) { ++ val = slirp->vdnssearch_len; ++ if (q + val >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting domain-search option."); ++ } else { ++ memcpy(q, slirp->vdnssearch, val); ++ q += val; ++ } ++ } ++ } else { ++ static const char nak_msg[] = "requested address not available"; ++ ++ DPRINTF("nak'ed addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); ++ ++ *q++ = RFC2132_MSG_TYPE; ++ *q++ = 1; ++ *q++ = DHCPNAK; ++ ++ *q++ = RFC2132_MESSAGE; ++ *q++ = sizeof(nak_msg) - 1; ++ memcpy(q, nak_msg, sizeof(nak_msg) - 1); ++ q += sizeof(nak_msg) - 1; ++ } ++ assert(q < end); ++ *q++ = RFC1533_END; ++ ++ daddr.sin_addr.s_addr = 0xffffffffu; ++ ++ assert(q <= end); ++ ++ m->m_len = sizeof(struct bootp_t) + (end - rbp->bp_vend) - sizeof(struct ip) - sizeof(struct udphdr); ++ udp_output(NULL, m, &saddr, &daddr, IPTOS_LOWDELAY); ++} ++ ++void bootp_input(struct mbuf *m) ++{ ++ struct bootp_t *bp = mtod_check(m, sizeof(struct bootp_t)); ++ ++ if (bp && bp->bp_op == BOOTP_REQUEST) { ++ bootp_reply(m->slirp, bp, m_end(m)); ++ } ++} +diff --git a/slirp/src/bootp.h b/slirp/src/bootp.h +new file mode 100644 +index 0000000000..31ce5fd33f +--- /dev/null ++++ b/slirp/src/bootp.h +@@ -0,0 +1,129 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* bootp/dhcp defines */ ++ ++#ifndef SLIRP_BOOTP_H ++#define SLIRP_BOOTP_H ++ ++#define BOOTP_SERVER 67 ++#define BOOTP_CLIENT 68 ++ ++#define BOOTP_REQUEST 1 ++#define BOOTP_REPLY 2 ++ ++#define RFC1533_COOKIE 99, 130, 83, 99 ++#define RFC1533_PAD 0 ++#define RFC1533_NETMASK 1 ++#define RFC1533_TIMEOFFSET 2 ++#define RFC1533_GATEWAY 3 ++#define RFC1533_TIMESERVER 4 ++#define RFC1533_IEN116NS 5 ++#define RFC1533_DNS 6 ++#define RFC1533_LOGSERVER 7 ++#define RFC1533_COOKIESERVER 8 ++#define RFC1533_LPRSERVER 9 ++#define RFC1533_IMPRESSSERVER 10 ++#define RFC1533_RESOURCESERVER 11 ++#define RFC1533_HOSTNAME 12 ++#define RFC1533_BOOTFILESIZE 13 ++#define RFC1533_MERITDUMPFILE 14 ++#define RFC1533_DOMAINNAME 15 ++#define RFC1533_SWAPSERVER 16 ++#define RFC1533_ROOTPATH 17 ++#define RFC1533_EXTENSIONPATH 18 ++#define RFC1533_IPFORWARDING 19 ++#define RFC1533_IPSOURCEROUTING 20 ++#define RFC1533_IPPOLICYFILTER 21 ++#define RFC1533_IPMAXREASSEMBLY 22 ++#define RFC1533_IPTTL 23 ++#define RFC1533_IPMTU 24 ++#define RFC1533_IPMTUPLATEAU 25 ++#define RFC1533_INTMTU 26 ++#define RFC1533_INTLOCALSUBNETS 27 ++#define RFC1533_INTBROADCAST 28 ++#define RFC1533_INTICMPDISCOVER 29 ++#define RFC1533_INTICMPRESPOND 30 ++#define RFC1533_INTROUTEDISCOVER 31 ++#define RFC1533_INTROUTESOLICIT 32 ++#define RFC1533_INTSTATICROUTES 33 ++#define RFC1533_LLTRAILERENCAP 34 ++#define RFC1533_LLARPCACHETMO 35 ++#define RFC1533_LLETHERNETENCAP 36 ++#define RFC1533_TCPTTL 37 ++#define RFC1533_TCPKEEPALIVETMO 38 ++#define RFC1533_TCPKEEPALIVEGB 39 ++#define RFC1533_NISDOMAIN 40 ++#define RFC1533_NISSERVER 41 ++#define RFC1533_NTPSERVER 42 ++#define RFC1533_VENDOR 43 ++#define RFC1533_NBNS 44 ++#define RFC1533_NBDD 45 ++#define RFC1533_NBNT 46 ++#define RFC1533_NBSCOPE 47 ++#define RFC1533_XFS 48 ++#define RFC1533_XDM 49 ++ ++#define RFC2132_REQ_ADDR 50 ++#define RFC2132_LEASE_TIME 51 ++#define RFC2132_MSG_TYPE 53 ++#define RFC2132_SRV_ID 54 ++#define RFC2132_PARAM_LIST 55 ++#define RFC2132_MESSAGE 56 ++#define RFC2132_MAX_SIZE 57 ++#define RFC2132_RENEWAL_TIME 58 ++#define RFC2132_REBIND_TIME 59 ++#define RFC2132_TFTP_SERVER_NAME 66 ++ ++#define DHCPDISCOVER 1 ++#define DHCPOFFER 2 ++#define DHCPREQUEST 3 ++#define DHCPACK 5 ++#define DHCPNAK 6 ++ ++#define RFC1533_VENDOR_MAJOR 0 ++#define RFC1533_VENDOR_MINOR 0 ++ ++#define RFC1533_VENDOR_MAGIC 128 ++#define RFC1533_VENDOR_ADDPARM 129 ++#define RFC1533_VENDOR_ETHDEV 130 ++#define RFC1533_VENDOR_HOWTO 132 ++#define RFC1533_VENDOR_MNUOPTS 160 ++#define RFC1533_VENDOR_SELECTION 176 ++#define RFC1533_VENDOR_MOTD 184 ++#define RFC1533_VENDOR_NUMOFMOTD 8 ++#define RFC1533_VENDOR_IMG 192 ++#define RFC1533_VENDOR_NUMOFIMG 16 ++ ++#define RFC1533_END 255 ++#define BOOTP_VENDOR_LEN 64 ++#define DHCP_OPT_LEN 312 ++ ++struct bootp_t { ++ struct ip ip; ++ struct udphdr udp; ++ uint8_t bp_op; ++ uint8_t bp_htype; ++ uint8_t bp_hlen; ++ uint8_t bp_hops; ++ uint32_t bp_xid; ++ uint16_t bp_secs; ++ uint16_t unused; ++ struct in_addr bp_ciaddr; ++ struct in_addr bp_yiaddr; ++ struct in_addr bp_siaddr; ++ struct in_addr bp_giaddr; ++ uint8_t bp_hwaddr[16]; ++ uint8_t bp_sname[64]; ++ char bp_file[128]; ++ uint8_t bp_vend[]; ++}; ++ ++typedef struct { ++ uint16_t allocated; ++ uint8_t macaddr[6]; ++} BOOTPClient; ++ ++#define NB_BOOTP_CLIENTS 16 ++ ++void bootp_input(struct mbuf *m); ++ ++#endif +diff --git a/slirp/src/cksum.c b/slirp/src/cksum.c +new file mode 100644 +index 0000000000..b1cb97b7e1 +--- /dev/null ++++ b/slirp/src/cksum.c +@@ -0,0 +1,179 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1988, 1992, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 ++ * in_cksum.c,v 1.2 1994/08/02 07:48:16 davidg Exp ++ */ ++ ++#include "slirp.h" ++ ++/* ++ * Checksum routine for Internet Protocol family headers (Portable Version). ++ * ++ * This routine is very heavily used in the network ++ * code and should be modified for each CPU to be as fast as possible. ++ * ++ * XXX Since we will never span more than 1 mbuf, we can optimise this ++ */ ++ ++#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) ++#define REDUCE \ ++ { \ ++ l_util.l = sum; \ ++ sum = l_util.s[0] + l_util.s[1]; \ ++ ADDCARRY(sum); \ ++ } ++ ++int cksum(struct mbuf *m, int len) ++{ ++ register uint16_t *w; ++ register int sum = 0; ++ register int mlen = 0; ++ int byte_swapped = 0; ++ ++ union { ++ uint8_t c[2]; ++ uint16_t s; ++ } s_util; ++ union { ++ uint16_t s[2]; ++ uint32_t l; ++ } l_util; ++ ++ if (m->m_len == 0) ++ goto cont; ++ w = mtod(m, uint16_t *); ++ ++ mlen = m->m_len; ++ ++ if (len < mlen) ++ mlen = len; ++ len -= mlen; ++ /* ++ * Force to even boundary. ++ */ ++ if ((1 & (uintptr_t)w) && (mlen > 0)) { ++ REDUCE; ++ sum <<= 8; ++ s_util.c[0] = *(uint8_t *)w; ++ w = (uint16_t *)((int8_t *)w + 1); ++ mlen--; ++ byte_swapped = 1; ++ } ++ /* ++ * Unroll the loop to make overhead from ++ * branches &c small. ++ */ ++ while ((mlen -= 32) >= 0) { ++ sum += w[0]; ++ sum += w[1]; ++ sum += w[2]; ++ sum += w[3]; ++ sum += w[4]; ++ sum += w[5]; ++ sum += w[6]; ++ sum += w[7]; ++ sum += w[8]; ++ sum += w[9]; ++ sum += w[10]; ++ sum += w[11]; ++ sum += w[12]; ++ sum += w[13]; ++ sum += w[14]; ++ sum += w[15]; ++ w += 16; ++ } ++ mlen += 32; ++ while ((mlen -= 8) >= 0) { ++ sum += w[0]; ++ sum += w[1]; ++ sum += w[2]; ++ sum += w[3]; ++ w += 4; ++ } ++ mlen += 8; ++ if (mlen == 0 && byte_swapped == 0) ++ goto cont; ++ REDUCE; ++ while ((mlen -= 2) >= 0) { ++ sum += *w++; ++ } ++ ++ if (byte_swapped) { ++ REDUCE; ++ sum <<= 8; ++ if (mlen == -1) { ++ s_util.c[1] = *(uint8_t *)w; ++ sum += s_util.s; ++ mlen = 0; ++ } else ++ ++ mlen = -1; ++ } else if (mlen == -1) ++ s_util.c[0] = *(uint8_t *)w; ++ ++cont: ++ if (len) { ++ DEBUG_ERROR("cksum: out of data"); ++ DEBUG_ERROR(" len = %d", len); ++ } ++ if (mlen == -1) { ++ /* The last mbuf has odd # of bytes. Follow the ++ standard (the odd byte may be shifted left by 8 bits ++ or not as determined by endian-ness of the machine) */ ++ s_util.c[1] = 0; ++ sum += s_util.s; ++ } ++ REDUCE; ++ return (~sum & 0xffff); ++} ++ ++int ip6_cksum(struct mbuf *m) ++{ ++ /* TODO: Optimize this by being able to pass the ip6_pseudohdr to cksum ++ * separately from the mbuf */ ++ struct ip6 save_ip, *ip = mtod(m, struct ip6 *); ++ struct ip6_pseudohdr *ih = mtod(m, struct ip6_pseudohdr *); ++ int sum; ++ ++ save_ip = *ip; ++ ++ ih->ih_src = save_ip.ip_src; ++ ih->ih_dst = save_ip.ip_dst; ++ ih->ih_pl = htonl((uint32_t)ntohs(save_ip.ip_pl)); ++ ih->ih_zero_hi = 0; ++ ih->ih_zero_lo = 0; ++ ih->ih_nh = save_ip.ip_nh; ++ ++ sum = cksum(m, ((int)sizeof(struct ip6_pseudohdr)) + ntohl(ih->ih_pl)); ++ ++ *ip = save_ip; ++ ++ return sum; ++} +diff --git a/slirp/src/debug.h b/slirp/src/debug.h +new file mode 100644 +index 0000000000..0f9f3eff3f +--- /dev/null ++++ b/slirp/src/debug.h +@@ -0,0 +1,59 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef DEBUG_H_ ++#define DEBUG_H_ ++ ++#define DBG_CALL (1 << 0) ++#define DBG_MISC (1 << 1) ++#define DBG_ERROR (1 << 2) ++#define DBG_TFTP (1 << 3) ++#define DBG_VERBOSE_CALL (1 << 4) ++ ++extern int slirp_debug; ++ ++#define DEBUG_CALL(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_CALL)) { \ ++ g_debug(fmt "...", ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_VERBOSE_CALL(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_VERBOSE_CALL)) { \ ++ g_debug(fmt "...", ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_ARG(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_CALL)) { \ ++ g_debug(" " fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_MISC(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_MISC)) { \ ++ g_debug(fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_ERROR(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_ERROR)) { \ ++ g_debug(fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_TFTP(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_TFTP)) { \ ++ g_debug(fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#endif /* DEBUG_H_ */ +diff --git a/slirp/src/dhcpv6.c b/slirp/src/dhcpv6.c +new file mode 100644 +index 0000000000..77b451b910 +--- /dev/null ++++ b/slirp/src/dhcpv6.c +@@ -0,0 +1,224 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * SLIRP stateless DHCPv6 ++ * ++ * We only support stateless DHCPv6, e.g. for network booting. ++ * See RFC 3315, RFC 3736, RFC 3646 and RFC 5970 for details. ++ * ++ * Copyright 2016 Thomas Huth, Red Hat Inc. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#include "slirp.h" ++#include "dhcpv6.h" ++ ++/* DHCPv6 message types */ ++#define MSGTYPE_REPLY 7 ++#define MSGTYPE_INFO_REQUEST 11 ++ ++/* DHCPv6 option types */ ++#define OPTION_CLIENTID 1 ++#define OPTION_IAADDR 5 ++#define OPTION_ORO 6 ++#define OPTION_DNS_SERVERS 23 ++#define OPTION_BOOTFILE_URL 59 ++ ++struct requested_infos { ++ uint8_t *client_id; ++ int client_id_len; ++ bool want_dns; ++ bool want_boot_url; ++}; ++ ++/** ++ * Analyze the info request message sent by the client to see what data it ++ * provided and what it wants to have. The information is gathered in the ++ * "requested_infos" struct. Note that client_id (if provided) points into ++ * the odata region, thus the caller must keep odata valid as long as it ++ * needs to access the requested_infos struct. ++ */ ++static int dhcpv6_parse_info_request(Slirp *slirp, uint8_t *odata, int olen, ++ struct requested_infos *ri) ++{ ++ int i, req_opt; ++ ++ while (olen > 4) { ++ /* Parse one option */ ++ int option = odata[0] << 8 | odata[1]; ++ int len = odata[2] << 8 | odata[3]; ++ ++ if (len + 4 > olen) { ++ slirp->cb->guest_error("Guest sent bad DHCPv6 packet!", ++ slirp->opaque); ++ return -E2BIG; ++ } ++ ++ switch (option) { ++ case OPTION_IAADDR: ++ /* According to RFC3315, we must discard requests with IA option */ ++ return -EINVAL; ++ case OPTION_CLIENTID: ++ if (len > 256) { ++ /* Avoid very long IDs which could cause problems later */ ++ return -E2BIG; ++ } ++ ri->client_id = odata + 4; ++ ri->client_id_len = len; ++ break; ++ case OPTION_ORO: /* Option request option */ ++ if (len & 1) { ++ return -EINVAL; ++ } ++ /* Check which options the client wants to have */ ++ for (i = 0; i < len; i += 2) { ++ req_opt = odata[4 + i] << 8 | odata[4 + i + 1]; ++ switch (req_opt) { ++ case OPTION_DNS_SERVERS: ++ ri->want_dns = true; ++ break; ++ case OPTION_BOOTFILE_URL: ++ ri->want_boot_url = true; ++ break; ++ default: ++ DEBUG_MISC("dhcpv6: Unsupported option request %d", ++ req_opt); ++ } ++ } ++ break; ++ default: ++ DEBUG_MISC("dhcpv6 info req: Unsupported option %d, len=%d", option, ++ len); ++ } ++ ++ odata += len + 4; ++ olen -= len + 4; ++ } ++ ++ return 0; ++} ++ ++ ++/** ++ * Handle information request messages ++ */ ++static void dhcpv6_info_request(Slirp *slirp, struct sockaddr_in6 *srcsas, ++ uint32_t xid, uint8_t *odata, int olen) ++{ ++ struct requested_infos ri = { NULL }; ++ struct sockaddr_in6 sa6, da6; ++ struct mbuf *m; ++ uint8_t *resp; ++ ++ if (dhcpv6_parse_info_request(slirp, odata, olen, &ri) < 0) { ++ return; ++ } ++ ++ m = m_get(slirp); ++ if (!m) { ++ return; ++ } ++ memset(m->m_data, 0, m->m_size); ++ m->m_data += IF_MAXLINKHDR; ++ resp = (uint8_t *)m->m_data + sizeof(struct ip6) + sizeof(struct udphdr); ++ ++ /* Fill in response */ ++ *resp++ = MSGTYPE_REPLY; ++ *resp++ = (uint8_t)(xid >> 16); ++ *resp++ = (uint8_t)(xid >> 8); ++ *resp++ = (uint8_t)xid; ++ ++ if (ri.client_id) { ++ *resp++ = OPTION_CLIENTID >> 8; /* option-code high byte */ ++ *resp++ = OPTION_CLIENTID; /* option-code low byte */ ++ *resp++ = ri.client_id_len >> 8; /* option-len high byte */ ++ *resp++ = ri.client_id_len; /* option-len low byte */ ++ memcpy(resp, ri.client_id, ri.client_id_len); ++ resp += ri.client_id_len; ++ } ++ if (ri.want_dns) { ++ *resp++ = OPTION_DNS_SERVERS >> 8; /* option-code high byte */ ++ *resp++ = OPTION_DNS_SERVERS; /* option-code low byte */ ++ *resp++ = 0; /* option-len high byte */ ++ *resp++ = 16; /* option-len low byte */ ++ memcpy(resp, &slirp->vnameserver_addr6, 16); ++ resp += 16; ++ } ++ if (ri.want_boot_url) { ++ uint8_t *sa = slirp->vhost_addr6.s6_addr; ++ int slen, smaxlen; ++ ++ *resp++ = OPTION_BOOTFILE_URL >> 8; /* option-code high byte */ ++ *resp++ = OPTION_BOOTFILE_URL; /* option-code low byte */ ++ smaxlen = (uint8_t *)m->m_data + slirp->if_mtu - (resp + 2); ++ slen = slirp_fmt((char *)resp + 2, smaxlen, ++ "tftp://[%02x%02x:%02x%02x:%02x%02x:%02x%02x:" ++ "%02x%02x:%02x%02x:%02x%02x:%02x%02x]/%s", ++ sa[0], sa[1], sa[2], sa[3], sa[4], sa[5], sa[6], sa[7], ++ sa[8], sa[9], sa[10], sa[11], sa[12], sa[13], sa[14], ++ sa[15], slirp->bootp_filename); ++ *resp++ = slen >> 8; /* option-len high byte */ ++ *resp++ = slen; /* option-len low byte */ ++ resp += slen; ++ } ++ ++ sa6.sin6_addr = slirp->vhost_addr6; ++ sa6.sin6_port = DHCPV6_SERVER_PORT; ++ da6.sin6_addr = srcsas->sin6_addr; ++ da6.sin6_port = srcsas->sin6_port; ++ m->m_data += sizeof(struct ip6) + sizeof(struct udphdr); ++ m->m_len = resp - (uint8_t *)m->m_data; ++ udp6_output(NULL, m, &sa6, &da6); ++} ++ ++/** ++ * Handle DHCPv6 messages sent by the client ++ */ ++void dhcpv6_input(struct sockaddr_in6 *srcsas, struct mbuf *m) ++{ ++ uint8_t *data = (uint8_t *)m->m_data + sizeof(struct udphdr); ++ int data_len = m->m_len - sizeof(struct udphdr); ++ uint32_t xid; ++ ++ if (data_len < 4) { ++ return; ++ } ++ ++ xid = ntohl(*(uint32_t *)data) & 0xffffff; ++ ++ switch (data[0]) { ++ case MSGTYPE_INFO_REQUEST: ++ dhcpv6_info_request(m->slirp, srcsas, xid, &data[4], data_len - 4); ++ break; ++ default: ++ DEBUG_MISC("dhcpv6_input: Unsupported message type 0x%x", data[0]); ++ } ++} +diff --git a/slirp/src/dhcpv6.h b/slirp/src/dhcpv6.h +new file mode 100644 +index 0000000000..d12c49b36c +--- /dev/null ++++ b/slirp/src/dhcpv6.h +@@ -0,0 +1,68 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Definitions and prototypes for SLIRP stateless DHCPv6 ++ * ++ * Copyright 2016 Thomas Huth, Red Hat Inc. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#ifndef SLIRP_DHCPV6_H ++#define SLIRP_DHCPV6_H ++ ++#define DHCPV6_SERVER_PORT 547 ++ ++#define ALLDHCP_MULTICAST \ ++ { \ ++ .s6_addr = { \ ++ 0xff, \ ++ 0x02, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x01, \ ++ 0x00, \ ++ 0x02 \ ++ } \ ++ } ++ ++#define in6_dhcp_multicast(a) in6_equal(a, &(struct in6_addr)ALLDHCP_MULTICAST) ++ ++void dhcpv6_input(struct sockaddr_in6 *srcsas, struct mbuf *m); ++ ++#endif +diff --git a/slirp/src/dnssearch.c b/slirp/src/dnssearch.c +new file mode 100644 +index 0000000000..55497e860e +--- /dev/null ++++ b/slirp/src/dnssearch.c +@@ -0,0 +1,306 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * Domain search option for DHCP (RFC 3397) ++ * ++ * Copyright (c) 2012 Klaus Stengel ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "slirp.h" ++ ++static const uint8_t RFC3397_OPT_DOMAIN_SEARCH = 119; ++static const uint8_t MAX_OPT_LEN = 255; ++static const uint8_t OPT_HEADER_LEN = 2; ++static const uint8_t REFERENCE_LEN = 2; ++ ++struct compact_domain; ++ ++typedef struct compact_domain { ++ struct compact_domain *self; ++ struct compact_domain *refdom; ++ uint8_t *labels; ++ size_t len; ++ size_t common_octets; ++} CompactDomain; ++ ++static size_t domain_suffix_diffoff(const CompactDomain *a, ++ const CompactDomain *b) ++{ ++ size_t la = a->len, lb = b->len; ++ uint8_t *da = a->labels + la, *db = b->labels + lb; ++ size_t i, lm = (la < lb) ? la : lb; ++ ++ for (i = 0; i < lm; i++) { ++ da--; ++ db--; ++ if (*da != *db) { ++ break; ++ } ++ } ++ return i; ++} ++ ++static int domain_suffix_ord(const void *cva, const void *cvb) ++{ ++ const CompactDomain *a = cva, *b = cvb; ++ size_t la = a->len, lb = b->len; ++ size_t doff = domain_suffix_diffoff(a, b); ++ uint8_t ca = a->labels[la - doff]; ++ uint8_t cb = b->labels[lb - doff]; ++ ++ if (ca < cb) { ++ return -1; ++ } ++ if (ca > cb) { ++ return 1; ++ } ++ if (la < lb) { ++ return -1; ++ } ++ if (la > lb) { ++ return 1; ++ } ++ return 0; ++} ++ ++static size_t domain_common_label(CompactDomain *a, CompactDomain *b) ++{ ++ size_t res, doff = domain_suffix_diffoff(a, b); ++ uint8_t *first_eq_pos = a->labels + (a->len - doff); ++ uint8_t *label = a->labels; ++ ++ while (*label && label < first_eq_pos) { ++ label += *label + 1; ++ } ++ res = a->len - (label - a->labels); ++ /* only report if it can help to reduce the packet size */ ++ return (res > REFERENCE_LEN) ? res : 0; ++} ++ ++static void domain_fixup_order(CompactDomain *cd, size_t n) ++{ ++ size_t i; ++ ++ for (i = 0; i < n; i++) { ++ CompactDomain *cur = cd + i, *next = cd[i].self; ++ ++ while (!cur->common_octets) { ++ CompactDomain *tmp = next->self; /* backup target value */ ++ ++ next->self = cur; ++ cur->common_octets++; ++ ++ cur = next; ++ next = tmp; ++ } ++ } ++} ++ ++static void domain_mklabels(CompactDomain *cd, const char *input) ++{ ++ uint8_t *len_marker = cd->labels; ++ uint8_t *output = len_marker; /* pre-incremented */ ++ const char *in = input; ++ char cur_chr; ++ size_t len = 0; ++ ++ if (cd->len == 0) { ++ goto fail; ++ } ++ cd->len++; ++ ++ do { ++ cur_chr = *in++; ++ if (cur_chr == '.' || cur_chr == '\0') { ++ len = output - len_marker; ++ if ((len == 0 && cur_chr == '.') || len >= 64) { ++ goto fail; ++ } ++ *len_marker = len; ++ ++ output++; ++ len_marker = output; ++ } else { ++ output++; ++ *output = cur_chr; ++ } ++ } while (cur_chr != '\0'); ++ ++ /* ensure proper zero-termination */ ++ if (len != 0) { ++ *len_marker = 0; ++ cd->len++; ++ } ++ return; ++ ++fail: ++ g_warning("failed to parse domain name '%s'\n", input); ++ cd->len = 0; ++} ++ ++static void domain_mkxrefs(CompactDomain *doms, CompactDomain *last, ++ size_t depth) ++{ ++ CompactDomain *i = doms, *target = doms; ++ ++ do { ++ if (i->labels < target->labels) { ++ target = i; ++ } ++ } while (i++ != last); ++ ++ for (i = doms; i != last; i++) { ++ CompactDomain *group_last; ++ size_t next_depth; ++ ++ if (i->common_octets == depth) { ++ continue; ++ } ++ ++ next_depth = -1; ++ for (group_last = i; group_last != last; group_last++) { ++ size_t co = group_last->common_octets; ++ if (co <= depth) { ++ break; ++ } ++ if (co < next_depth) { ++ next_depth = co; ++ } ++ } ++ domain_mkxrefs(i, group_last, next_depth); ++ ++ i = group_last; ++ if (i == last) { ++ break; ++ } ++ } ++ ++ if (depth == 0) { ++ return; ++ } ++ ++ i = doms; ++ do { ++ if (i != target && i->refdom == NULL) { ++ i->refdom = target; ++ i->common_octets = depth; ++ } ++ } while (i++ != last); ++} ++ ++static size_t domain_compactify(CompactDomain *domains, size_t n) ++{ ++ uint8_t *start = domains->self->labels, *outptr = start; ++ size_t i; ++ ++ for (i = 0; i < n; i++) { ++ CompactDomain *cd = domains[i].self; ++ CompactDomain *rd = cd->refdom; ++ ++ if (rd != NULL) { ++ size_t moff = (rd->labels - start) + (rd->len - cd->common_octets); ++ if (moff < 0x3FFFu) { ++ cd->len -= cd->common_octets - 2; ++ cd->labels[cd->len - 1] = moff & 0xFFu; ++ cd->labels[cd->len - 2] = 0xC0u | (moff >> 8); ++ } ++ } ++ ++ if (cd->labels != outptr) { ++ memmove(outptr, cd->labels, cd->len); ++ cd->labels = outptr; ++ } ++ outptr += cd->len; ++ } ++ return outptr - start; ++} ++ ++int translate_dnssearch(Slirp *s, const char **names) ++{ ++ size_t blocks, bsrc_start, bsrc_end, bdst_start; ++ size_t i, num_domains, memreq = 0; ++ uint8_t *result = NULL, *outptr; ++ CompactDomain *domains = NULL; ++ ++ num_domains = g_strv_length((GStrv)(void *)names); ++ if (num_domains == 0) { ++ return -2; ++ } ++ ++ domains = g_malloc(num_domains * sizeof(*domains)); ++ ++ for (i = 0; i < num_domains; i++) { ++ size_t nlen = strlen(names[i]); ++ memreq += nlen + 2; /* 1 zero octet + 1 label length octet */ ++ domains[i].self = domains + i; ++ domains[i].len = nlen; ++ domains[i].common_octets = 0; ++ domains[i].refdom = NULL; ++ } ++ ++ /* reserve extra 2 header bytes for each 255 bytes of output */ ++ memreq += DIV_ROUND_UP(memreq, MAX_OPT_LEN) * OPT_HEADER_LEN; ++ result = g_malloc(memreq * sizeof(*result)); ++ ++ outptr = result; ++ for (i = 0; i < num_domains; i++) { ++ domains[i].labels = outptr; ++ domain_mklabels(domains + i, names[i]); ++ outptr += domains[i].len; ++ } ++ ++ if (outptr == result) { ++ g_free(domains); ++ g_free(result); ++ return -1; ++ } ++ ++ qsort(domains, num_domains, sizeof(*domains), domain_suffix_ord); ++ domain_fixup_order(domains, num_domains); ++ ++ for (i = 1; i < num_domains; i++) { ++ size_t cl = domain_common_label(domains + i - 1, domains + i); ++ domains[i - 1].common_octets = cl; ++ } ++ ++ domain_mkxrefs(domains, domains + num_domains - 1, 0); ++ memreq = domain_compactify(domains, num_domains); ++ ++ blocks = DIV_ROUND_UP(memreq, MAX_OPT_LEN); ++ bsrc_end = memreq; ++ bsrc_start = (blocks - 1) * MAX_OPT_LEN; ++ bdst_start = bsrc_start + blocks * OPT_HEADER_LEN; ++ memreq += blocks * OPT_HEADER_LEN; ++ ++ while (blocks--) { ++ size_t len = bsrc_end - bsrc_start; ++ memmove(result + bdst_start, result + bsrc_start, len); ++ result[bdst_start - 2] = RFC3397_OPT_DOMAIN_SEARCH; ++ result[bdst_start - 1] = len; ++ bsrc_end = bsrc_start; ++ bsrc_start -= MAX_OPT_LEN; ++ bdst_start -= MAX_OPT_LEN + OPT_HEADER_LEN; ++ } ++ ++ g_free(domains); ++ s->vdnssearch = result; ++ s->vdnssearch_len = memreq; ++ return 0; ++} +diff --git a/slirp/src/if.c b/slirp/src/if.c +new file mode 100644 +index 0000000000..9a1eec97b8 +--- /dev/null ++++ b/slirp/src/if.c +@@ -0,0 +1,215 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++static void ifs_insque(struct mbuf *ifm, struct mbuf *ifmhead) ++{ ++ ifm->ifs_next = ifmhead->ifs_next; ++ ifmhead->ifs_next = ifm; ++ ifm->ifs_prev = ifmhead; ++ ifm->ifs_next->ifs_prev = ifm; ++} ++ ++static void ifs_remque(struct mbuf *ifm) ++{ ++ ifm->ifs_prev->ifs_next = ifm->ifs_next; ++ ifm->ifs_next->ifs_prev = ifm->ifs_prev; ++} ++ ++void if_init(Slirp *slirp) ++{ ++ slirp->if_fastq.qh_link = slirp->if_fastq.qh_rlink = &slirp->if_fastq; ++ slirp->if_batchq.qh_link = slirp->if_batchq.qh_rlink = &slirp->if_batchq; ++} ++ ++/* ++ * if_output: Queue packet into an output queue. ++ * There are 2 output queue's, if_fastq and if_batchq. ++ * Each output queue is a doubly linked list of double linked lists ++ * of mbufs, each list belonging to one "session" (socket). This ++ * way, we can output packets fairly by sending one packet from each ++ * session, instead of all the packets from one session, then all packets ++ * from the next session, etc. Packets on the if_fastq get absolute ++ * priority, but if one session hogs the link, it gets "downgraded" ++ * to the batchq until it runs out of packets, then it'll return ++ * to the fastq (eg. if the user does an ls -alR in a telnet session, ++ * it'll temporarily get downgraded to the batchq) ++ */ ++void if_output(struct socket *so, struct mbuf *ifm) ++{ ++ Slirp *slirp = ifm->slirp; ++ M_DUP_DEBUG(slirp, ifm, 0, 0); ++ ++ struct mbuf *ifq; ++ int on_fastq = 1; ++ ++ DEBUG_CALL("if_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("ifm = %p", ifm); ++ ++ /* ++ * First remove the mbuf from m_usedlist, ++ * since we're gonna use m_next and m_prev ourselves ++ * XXX Shouldn't need this, gotta change dtom() etc. ++ */ ++ if (ifm->m_flags & M_USEDLIST) { ++ remque(ifm); ++ ifm->m_flags &= ~M_USEDLIST; ++ } ++ ++ /* ++ * See if there's already a batchq list for this session. ++ * This can include an interactive session, which should go on fastq, ++ * but gets too greedy... hence it'll be downgraded from fastq to batchq. ++ * We mustn't put this packet back on the fastq (or we'll send it out of ++ * order) ++ * XXX add cache here? ++ */ ++ if (so) { ++ for (ifq = (struct mbuf *)slirp->if_batchq.qh_rlink; ++ (struct quehead *)ifq != &slirp->if_batchq; ifq = ifq->ifq_prev) { ++ if (so == ifq->ifq_so) { ++ /* A match! */ ++ ifm->ifq_so = so; ++ ifs_insque(ifm, ifq->ifs_prev); ++ goto diddit; ++ } ++ } ++ } ++ ++ /* No match, check which queue to put it on */ ++ if (so && (so->so_iptos & IPTOS_LOWDELAY)) { ++ ifq = (struct mbuf *)slirp->if_fastq.qh_rlink; ++ on_fastq = 1; ++ /* ++ * Check if this packet is a part of the last ++ * packet's session ++ */ ++ if (ifq->ifq_so == so) { ++ ifm->ifq_so = so; ++ ifs_insque(ifm, ifq->ifs_prev); ++ goto diddit; ++ } ++ } else { ++ ifq = (struct mbuf *)slirp->if_batchq.qh_rlink; ++ } ++ ++ /* Create a new doubly linked list for this session */ ++ ifm->ifq_so = so; ++ ifs_init(ifm); ++ insque(ifm, ifq); ++ ++diddit: ++ if (so) { ++ /* Update *_queued */ ++ so->so_queued++; ++ so->so_nqueued++; ++ /* ++ * Check if the interactive session should be downgraded to ++ * the batchq. A session is downgraded if it has queued 6 ++ * packets without pausing, and at least 3 of those packets ++ * have been sent over the link ++ * (XXX These are arbitrary numbers, probably not optimal..) ++ */ ++ if (on_fastq && ++ ((so->so_nqueued >= 6) && (so->so_nqueued - so->so_queued) >= 3)) { ++ /* Remove from current queue... */ ++ remque(ifm->ifs_next); ++ ++ /* ...And insert in the new. That'll teach ya! */ ++ insque(ifm->ifs_next, &slirp->if_batchq); ++ } ++ } ++ ++ /* ++ * This prevents us from malloc()ing too many mbufs ++ */ ++ if_start(ifm->slirp); ++} ++ ++/* ++ * Send one packet from each session. ++ * If there are packets on the fastq, they are sent FIFO, before ++ * everything else. Then we choose the first packet from each ++ * batchq session (socket) and send it. ++ * For example, if there are 3 ftp sessions fighting for bandwidth, ++ * one packet will be sent from the first session, then one packet ++ * from the second session, then one packet from the third. ++ */ ++void if_start(Slirp *slirp) ++{ ++ uint64_t now = slirp->cb->clock_get_ns(slirp->opaque); ++ bool from_batchq = false; ++ struct mbuf *ifm, *ifm_next, *ifqt; ++ ++ DEBUG_VERBOSE_CALL("if_start"); ++ ++ if (slirp->if_start_busy) { ++ return; ++ } ++ slirp->if_start_busy = true; ++ ++ struct mbuf *batch_head = NULL; ++ if (slirp->if_batchq.qh_link != &slirp->if_batchq) { ++ batch_head = (struct mbuf *)slirp->if_batchq.qh_link; ++ } ++ ++ if (slirp->if_fastq.qh_link != &slirp->if_fastq) { ++ ifm_next = (struct mbuf *)slirp->if_fastq.qh_link; ++ } else if (batch_head) { ++ /* Nothing on fastq, pick up from batchq */ ++ ifm_next = batch_head; ++ from_batchq = true; ++ } else { ++ ifm_next = NULL; ++ } ++ ++ while (ifm_next) { ++ ifm = ifm_next; ++ ++ ifm_next = ifm->ifq_next; ++ if ((struct quehead *)ifm_next == &slirp->if_fastq) { ++ /* No more packets in fastq, switch to batchq */ ++ ifm_next = batch_head; ++ from_batchq = true; ++ } ++ if ((struct quehead *)ifm_next == &slirp->if_batchq) { ++ /* end of batchq */ ++ ifm_next = NULL; ++ } ++ ++ /* Try to send packet unless it already expired */ ++ if (ifm->expiration_date >= now && !if_encap(slirp, ifm)) { ++ /* Packet is delayed due to pending ARP or NDP resolution */ ++ continue; ++ } ++ ++ /* Remove it from the queue */ ++ ifqt = ifm->ifq_prev; ++ remque(ifm); ++ ++ /* If there are more packets for this session, re-queue them */ ++ if (ifm->ifs_next != ifm) { ++ struct mbuf *next = ifm->ifs_next; ++ ++ insque(next, ifqt); ++ ifs_remque(ifm); ++ if (!from_batchq) { ++ ifm_next = next; ++ } ++ } ++ ++ /* Update so_queued */ ++ if (ifm->ifq_so && --ifm->ifq_so->so_queued == 0) { ++ /* If there's no more queued, reset nqueued */ ++ ifm->ifq_so->so_nqueued = 0; ++ } ++ ++ m_free(ifm); ++ } ++ ++ slirp->if_start_busy = false; ++} +diff --git a/slirp/src/if.h b/slirp/src/if.h +new file mode 100644 +index 0000000000..7cf9d2750e +--- /dev/null ++++ b/slirp/src/if.h +@@ -0,0 +1,25 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef IF_H ++#define IF_H ++ ++#define IF_COMPRESS 0x01 /* We want compression */ ++#define IF_NOCOMPRESS 0x02 /* Do not do compression */ ++#define IF_AUTOCOMP 0x04 /* Autodetect (default) */ ++#define IF_NOCIDCOMP 0x08 /* CID compression */ ++ ++#define IF_MTU_DEFAULT 1500 ++#define IF_MTU_MIN 68 ++#define IF_MTU_MAX 65521 ++#define IF_MRU_DEFAULT 1500 ++#define IF_MRU_MIN 68 ++#define IF_MRU_MAX 65521 ++#define IF_COMP IF_AUTOCOMP /* Flags for compression */ ++ ++/* 2 for alignment, 14 for ethernet */ ++#define IF_MAXLINKHDR (2 + ETH_HLEN) ++ ++#endif +diff --git a/slirp/src/ip.h b/slirp/src/ip.h +new file mode 100644 +index 0000000000..e5d4aa8a6d +--- /dev/null ++++ b/slirp/src/ip.h +@@ -0,0 +1,242 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip.h 8.1 (Berkeley) 6/10/93 ++ * ip.h,v 1.3 1994/08/21 05:27:30 paul Exp ++ */ ++ ++#ifndef IP_H ++#define IP_H ++ ++#include ++ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++#undef NTOHL ++#undef NTOHS ++#undef HTONL ++#undef HTONS ++#define NTOHL(d) ++#define NTOHS(d) ++#define HTONL(d) ++#define HTONS(d) ++#else ++#ifndef NTOHL ++#define NTOHL(d) ((d) = ntohl((d))) ++#endif ++#ifndef NTOHS ++#define NTOHS(d) ((d) = ntohs((uint16_t)(d))) ++#endif ++#ifndef HTONL ++#define HTONL(d) ((d) = htonl((d))) ++#endif ++#ifndef HTONS ++#define HTONS(d) ((d) = htons((uint16_t)(d))) ++#endif ++#endif ++ ++typedef uint32_t n_long; /* long as received from the net */ ++ ++/* ++ * Definitions for internet protocol version 4. ++ * Per RFC 791, September 1981. ++ */ ++#define IPVERSION 4 ++ ++/* ++ * Structure of an internet header, naked of options. ++ */ ++struct ip { ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t ip_v : 4, /* version */ ++ ip_hl : 4; /* header length */ ++#else ++ uint8_t ip_hl : 4, /* header length */ ++ ip_v : 4; /* version */ ++#endif ++ uint8_t ip_tos; /* type of service */ ++ uint16_t ip_len; /* total length */ ++ uint16_t ip_id; /* identification */ ++ uint16_t ip_off; /* fragment offset field */ ++#define IP_DF 0x4000 /* don't fragment flag */ ++#define IP_MF 0x2000 /* more fragments flag */ ++#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ ++ uint8_t ip_ttl; /* time to live */ ++ uint8_t ip_p; /* protocol */ ++ uint16_t ip_sum; /* checksum */ ++ struct in_addr ip_src, ip_dst; /* source and dest address */ ++} SLIRP_PACKED; ++ ++#define IP_MAXPACKET 65535 /* maximum packet size */ ++ ++/* ++ * Definitions for IP type of service (ip_tos) ++ */ ++#define IPTOS_LOWDELAY 0x10 ++#define IPTOS_THROUGHPUT 0x08 ++#define IPTOS_RELIABILITY 0x04 ++ ++/* ++ * Definitions for options. ++ */ ++#define IPOPT_COPIED(o) ((o)&0x80) ++#define IPOPT_CLASS(o) ((o)&0x60) ++#define IPOPT_NUMBER(o) ((o)&0x1f) ++ ++#define IPOPT_CONTROL 0x00 ++#define IPOPT_RESERVED1 0x20 ++#define IPOPT_DEBMEAS 0x40 ++#define IPOPT_RESERVED2 0x60 ++ ++#define IPOPT_EOL 0 /* end of option list */ ++#define IPOPT_NOP 1 /* no operation */ ++ ++#define IPOPT_RR 7 /* record packet route */ ++#define IPOPT_TS 68 /* timestamp */ ++#define IPOPT_SECURITY 130 /* provide s,c,h,tcc */ ++#define IPOPT_LSRR 131 /* loose source route */ ++#define IPOPT_SATID 136 /* satnet id */ ++#define IPOPT_SSRR 137 /* strict source route */ ++ ++/* ++ * Offsets to fields in options other than EOL and NOP. ++ */ ++#define IPOPT_OPTVAL 0 /* option ID */ ++#define IPOPT_OLEN 1 /* option length */ ++#define IPOPT_OFFSET 2 /* offset within option */ ++#define IPOPT_MINOFF 4 /* min value of above */ ++ ++/* ++ * Time stamp option structure. ++ */ ++struct ip_timestamp { ++ uint8_t ipt_code; /* IPOPT_TS */ ++ uint8_t ipt_len; /* size of structure (variable) */ ++ uint8_t ipt_ptr; /* index of current entry */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t ipt_oflw : 4, /* overflow counter */ ++ ipt_flg : 4; /* flags, see below */ ++#else ++ uint8_t ipt_flg : 4, /* flags, see below */ ++ ipt_oflw : 4; /* overflow counter */ ++#endif ++ union ipt_timestamp { ++ n_long ipt_time[1]; ++ struct ipt_ta { ++ struct in_addr ipt_addr; ++ n_long ipt_time; ++ } ipt_ta[1]; ++ } ipt_timestamp; ++} SLIRP_PACKED; ++ ++/* flag bits for ipt_flg */ ++#define IPOPT_TS_TSONLY 0 /* timestamps only */ ++#define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */ ++#define IPOPT_TS_PRESPEC 3 /* specified modules only */ ++ ++/* bits for security (not byte swapped) */ ++#define IPOPT_SECUR_UNCLASS 0x0000 ++#define IPOPT_SECUR_CONFID 0xf135 ++#define IPOPT_SECUR_EFTO 0x789a ++#define IPOPT_SECUR_MMMM 0xbc4d ++#define IPOPT_SECUR_RESTR 0xaf13 ++#define IPOPT_SECUR_SECRET 0xd788 ++#define IPOPT_SECUR_TOPSECRET 0x6bc5 ++ ++/* ++ * Internet implementation parameters. ++ */ ++#define MAXTTL 255 /* maximum time to live (seconds) */ ++#define IPDEFTTL 64 /* default ttl, from RFC 1340 */ ++#define IPFRAGTTL 60 /* time to live for frags, slowhz */ ++#define IPTTLDEC 1 /* subtracted when forwarding */ ++ ++#define IP_MSS 576 /* default maximum segment size */ ++ ++#if GLIB_SIZEOF_VOID_P == 4 ++struct mbuf_ptr { ++ struct mbuf *mptr; ++ uint32_t dummy; ++} SLIRP_PACKED; ++#else ++struct mbuf_ptr { ++ struct mbuf *mptr; ++} SLIRP_PACKED; ++#endif ++struct qlink { ++ void *next, *prev; ++}; ++ ++/* ++ * Overlay for ip header used by other protocols (tcp, udp). ++ */ ++struct ipovly { ++ struct mbuf_ptr ih_mbuf; /* backpointer to mbuf */ ++ uint8_t ih_x1; /* (unused) */ ++ uint8_t ih_pr; /* protocol */ ++ uint16_t ih_len; /* protocol length */ ++ struct in_addr ih_src; /* source internet address */ ++ struct in_addr ih_dst; /* destination internet address */ ++} SLIRP_PACKED; ++ ++/* ++ * Ip reassembly queue structure. Each fragment ++ * being reassembled is attached to one of these structures. ++ * They are timed out after ipq_ttl drops to 0, and may also ++ * be reclaimed if memory becomes tight. ++ * size 28 bytes ++ */ ++struct ipq { ++ struct qlink frag_link; /* to ip headers of fragments */ ++ struct qlink ip_link; /* to other reass headers */ ++ uint8_t ipq_ttl; /* time for reass q to live */ ++ uint8_t ipq_p; /* protocol of this fragment */ ++ uint16_t ipq_id; /* sequence id for reassembly */ ++ struct in_addr ipq_src, ipq_dst; ++}; ++ ++/* ++ * Ip header, when holding a fragment. ++ * ++ * Note: ipf_link must be at same offset as frag_link above ++ */ ++struct ipasfrag { ++ struct qlink ipf_link; ++ struct ip ipf_ip; ++}; ++ ++G_STATIC_ASSERT(offsetof(struct ipq, frag_link) == ++ offsetof(struct ipasfrag, ipf_link)); ++ ++#define ipf_off ipf_ip.ip_off ++#define ipf_tos ipf_ip.ip_tos ++#define ipf_len ipf_ip.ip_len ++#define ipf_next ipf_link.next ++#define ipf_prev ipf_link.prev ++ ++#endif +diff --git a/slirp/src/ip6.h b/slirp/src/ip6.h +new file mode 100644 +index 0000000000..0630309d29 +--- /dev/null ++++ b/slirp/src/ip6.h +@@ -0,0 +1,214 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#ifndef SLIRP_IP6_H ++#define SLIRP_IP6_H ++ ++#include ++#include ++ ++#define ALLNODES_MULTICAST \ ++ { \ ++ .s6_addr = { \ ++ 0xff, \ ++ 0x02, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x01 \ ++ } \ ++ } ++ ++#define SOLICITED_NODE_PREFIX \ ++ { \ ++ .s6_addr = { \ ++ 0xff, \ ++ 0x02, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x01, \ ++ 0xff, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00 \ ++ } \ ++ } ++ ++#define LINKLOCAL_ADDR \ ++ { \ ++ .s6_addr = { \ ++ 0xfe, \ ++ 0x80, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x02 \ ++ } \ ++ } ++ ++#define ZERO_ADDR \ ++ { \ ++ .s6_addr = { \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00 \ ++ } \ ++ } ++ ++static inline bool in6_equal(const struct in6_addr *a, const struct in6_addr *b) ++{ ++ return memcmp(a, b, sizeof(*a)) == 0; ++} ++ ++static inline bool in6_equal_net(const struct in6_addr *a, ++ const struct in6_addr *b, int prefix_len) ++{ ++ if (memcmp(a, b, prefix_len / 8) != 0) { ++ return 0; ++ } ++ ++ if (prefix_len % 8 == 0) { ++ return 1; ++ } ++ ++ return a->s6_addr[prefix_len / 8] >> (8 - (prefix_len % 8)) == ++ b->s6_addr[prefix_len / 8] >> (8 - (prefix_len % 8)); ++} ++ ++static inline bool in6_equal_mach(const struct in6_addr *a, ++ const struct in6_addr *b, int prefix_len) ++{ ++ if (memcmp(&(a->s6_addr[DIV_ROUND_UP(prefix_len, 8)]), ++ &(b->s6_addr[DIV_ROUND_UP(prefix_len, 8)]), ++ 16 - DIV_ROUND_UP(prefix_len, 8)) != 0) { ++ return 0; ++ } ++ ++ if (prefix_len % 8 == 0) { ++ return 1; ++ } ++ ++ return (a->s6_addr[prefix_len / 8] & ++ ((1U << (8 - (prefix_len % 8))) - 1)) == ++ (b->s6_addr[prefix_len / 8] & ((1U << (8 - (prefix_len % 8))) - 1)); ++} ++ ++ ++#define in6_equal_router(a) \ ++ ((in6_equal_net(a, &slirp->vprefix_addr6, slirp->vprefix_len) && \ ++ in6_equal_mach(a, &slirp->vhost_addr6, slirp->vprefix_len)) || \ ++ (in6_equal_net(a, &(struct in6_addr)LINKLOCAL_ADDR, 64) && \ ++ in6_equal_mach(a, &slirp->vhost_addr6, 64))) ++ ++#define in6_equal_dns(a) \ ++ ((in6_equal_net(a, &slirp->vprefix_addr6, slirp->vprefix_len) && \ ++ in6_equal_mach(a, &slirp->vnameserver_addr6, slirp->vprefix_len)) || \ ++ (in6_equal_net(a, &(struct in6_addr)LINKLOCAL_ADDR, 64) && \ ++ in6_equal_mach(a, &slirp->vnameserver_addr6, 64))) ++ ++#define in6_equal_host(a) (in6_equal_router(a) || in6_equal_dns(a)) ++ ++#define in6_solicitednode_multicast(a) \ ++ (in6_equal_net(a, &(struct in6_addr)SOLICITED_NODE_PREFIX, 104)) ++ ++#define in6_zero(a) (in6_equal(a, &(struct in6_addr)ZERO_ADDR)) ++ ++/* Compute emulated host MAC address from its ipv6 address */ ++static inline void in6_compute_ethaddr(struct in6_addr ip, ++ uint8_t eth[ETH_ALEN]) ++{ ++ eth[0] = 0x52; ++ eth[1] = 0x56; ++ memcpy(ð[2], &ip.s6_addr[16 - (ETH_ALEN - 2)], ETH_ALEN - 2); ++} ++ ++/* ++ * Definitions for internet protocol version 6. ++ * Per RFC 2460, December 1998. ++ */ ++#define IP6VERSION 6 ++#define IP6_HOP_LIMIT 255 ++ ++/* ++ * Structure of an internet header, naked of options. ++ */ ++struct ip6 { ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint32_t ip_v : 4, /* version */ ++ ip_tc_hi : 4, /* traffic class */ ++ ip_tc_lo : 4, ip_fl_hi : 4, /* flow label */ ++ ip_fl_lo : 16; ++#else ++ uint32_t ip_tc_hi : 4, ip_v : 4, ip_fl_hi : 4, ip_tc_lo : 4, ip_fl_lo : 16; ++#endif ++ uint16_t ip_pl; /* payload length */ ++ uint8_t ip_nh; /* next header */ ++ uint8_t ip_hl; /* hop limit */ ++ struct in6_addr ip_src, ip_dst; /* source and dest address */ ++}; ++ ++/* ++ * IPv6 pseudo-header used by upper-layer protocols ++ */ ++struct ip6_pseudohdr { ++ struct in6_addr ih_src; /* source internet address */ ++ struct in6_addr ih_dst; /* destination internet address */ ++ uint32_t ih_pl; /* upper-layer packet length */ ++ uint16_t ih_zero_hi; /* zero */ ++ uint8_t ih_zero_lo; /* zero */ ++ uint8_t ih_nh; /* next header */ ++}; ++ ++/* ++ * We don't want to mark these ip6 structs as packed as they are naturally ++ * correctly aligned; instead assert that there is no stray padding. ++ * If we marked the struct as packed then we would be unable to take ++ * the address of any of the fields in it. ++ */ ++G_STATIC_ASSERT(sizeof(struct ip6) == 40); ++G_STATIC_ASSERT(sizeof(struct ip6_pseudohdr) == 40); ++ ++#endif +diff --git a/slirp/src/ip6_icmp.c b/slirp/src/ip6_icmp.c +new file mode 100644 +index 0000000000..738b40f725 +--- /dev/null ++++ b/slirp/src/ip6_icmp.c +@@ -0,0 +1,444 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++#include "ip6_icmp.h" ++ ++#define NDP_Interval \ ++ g_rand_int_range(slirp->grand, NDP_MinRtrAdvInterval, NDP_MaxRtrAdvInterval) ++ ++static void ra_timer_handler(void *opaque) ++{ ++ Slirp *slirp = opaque; ++ ++ slirp->cb->timer_mod(slirp->ra_timer, ++ slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS + ++ NDP_Interval, ++ slirp->opaque); ++ ndp_send_ra(slirp); ++} ++ ++void icmp6_init(Slirp *slirp) ++{ ++ if (!slirp->in6_enabled) { ++ return; ++ } ++ ++ slirp->ra_timer = ++ slirp->cb->timer_new(ra_timer_handler, slirp, slirp->opaque); ++ slirp->cb->timer_mod(slirp->ra_timer, ++ slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS + ++ NDP_Interval, ++ slirp->opaque); ++} ++ ++void icmp6_cleanup(Slirp *slirp) ++{ ++ if (!slirp->in6_enabled) { ++ return; ++ } ++ ++ slirp->cb->timer_free(slirp->ra_timer, slirp->opaque); ++} ++ ++static void icmp6_send_echoreply(struct mbuf *m, Slirp *slirp, struct ip6 *ip, ++ struct icmp6 *icmp) ++{ ++ struct mbuf *t = m_get(slirp); ++ t->m_len = sizeof(struct ip6) + ntohs(ip->ip_pl); ++ memcpy(t->m_data, m->m_data, t->m_len); ++ ++ /* IPv6 Packet */ ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_dst = ip->ip_src; ++ rip->ip_src = ip->ip_dst; ++ ++ /* ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_ECHO_REPLY; ++ ricmp->icmp6_cksum = 0; ++ ++ /* Checksum */ ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++void icmp6_forward_error(struct mbuf *m, uint8_t type, uint8_t code, struct in6_addr *src) ++{ ++ Slirp *slirp = m->slirp; ++ struct mbuf *t; ++ struct ip6 *ip = mtod(m, struct ip6 *); ++ char addrstr[INET6_ADDRSTRLEN]; ++ ++ DEBUG_CALL("icmp6_send_error"); ++ DEBUG_ARG("type = %d, code = %d", type, code); ++ ++ if (IN6_IS_ADDR_MULTICAST(&ip->ip_src) || in6_zero(&ip->ip_src)) { ++ /* TODO icmp error? */ ++ return; ++ } ++ ++ t = m_get(slirp); ++ ++ /* IPv6 packet */ ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_src = *src; ++ rip->ip_dst = ip->ip_src; ++ inet_ntop(AF_INET6, &rip->ip_dst, addrstr, INET6_ADDRSTRLEN); ++ DEBUG_ARG("target = %s", addrstr); ++ ++ rip->ip_nh = IPPROTO_ICMPV6; ++ const int error_data_len = MIN( ++ m->m_len, slirp->if_mtu - (sizeof(struct ip6) + ICMP6_ERROR_MINLEN)); ++ rip->ip_pl = htons(ICMP6_ERROR_MINLEN + error_data_len); ++ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); ++ ++ /* ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = type; ++ ricmp->icmp6_code = code; ++ ricmp->icmp6_cksum = 0; ++ ++ switch (type) { ++ case ICMP6_UNREACH: ++ case ICMP6_TIMXCEED: ++ ricmp->icmp6_err.unused = 0; ++ break; ++ case ICMP6_TOOBIG: ++ ricmp->icmp6_err.mtu = htonl(slirp->if_mtu); ++ break; ++ case ICMP6_PARAMPROB: ++ /* TODO: Handle this case */ ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ t->m_data += ICMP6_ERROR_MINLEN; ++ memcpy(t->m_data, m->m_data, error_data_len); ++ ++ /* Checksum */ ++ t->m_data -= ICMP6_ERROR_MINLEN; ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++void icmp6_send_error(struct mbuf *m, uint8_t type, uint8_t code) ++{ ++ struct in6_addr src = LINKLOCAL_ADDR; ++ icmp6_forward_error(m, type, code, &src); ++} ++ ++/* ++ * Send NDP Router Advertisement ++ */ ++void ndp_send_ra(Slirp *slirp) ++{ ++ DEBUG_CALL("ndp_send_ra"); ++ ++ /* Build IPv6 packet */ ++ struct mbuf *t = m_get(slirp); ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ size_t pl_size = 0; ++ struct in6_addr addr; ++ uint32_t scope_id; ++ ++ rip->ip_src = (struct in6_addr)LINKLOCAL_ADDR; ++ rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST; ++ rip->ip_nh = IPPROTO_ICMPV6; ++ ++ /* Build ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_NDP_RA; ++ ricmp->icmp6_code = 0; ++ ricmp->icmp6_cksum = 0; ++ ++ /* NDP */ ++ ricmp->icmp6_nra.chl = NDP_AdvCurHopLimit; ++ ricmp->icmp6_nra.M = NDP_AdvManagedFlag; ++ ricmp->icmp6_nra.O = NDP_AdvOtherConfigFlag; ++ ricmp->icmp6_nra.reserved = 0; ++ ricmp->icmp6_nra.lifetime = htons(NDP_AdvDefaultLifetime); ++ ricmp->icmp6_nra.reach_time = htonl(NDP_AdvReachableTime); ++ ricmp->icmp6_nra.retrans_time = htonl(NDP_AdvRetransTime); ++ t->m_data += ICMP6_NDP_RA_MINLEN; ++ pl_size += ICMP6_NDP_RA_MINLEN; ++ ++ /* Source link-layer address (NDP option) */ ++ struct ndpopt *opt = mtod(t, struct ndpopt *); ++ opt->ndpopt_type = NDPOPT_LINKLAYER_SOURCE; ++ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; ++ in6_compute_ethaddr(rip->ip_src, opt->ndpopt_linklayer); ++ t->m_data += NDPOPT_LINKLAYER_LEN; ++ pl_size += NDPOPT_LINKLAYER_LEN; ++ ++ /* Prefix information (NDP option) */ ++ struct ndpopt *opt2 = mtod(t, struct ndpopt *); ++ opt2->ndpopt_type = NDPOPT_PREFIX_INFO; ++ opt2->ndpopt_len = NDPOPT_PREFIXINFO_LEN / 8; ++ opt2->ndpopt_prefixinfo.prefix_length = slirp->vprefix_len; ++ opt2->ndpopt_prefixinfo.L = 1; ++ opt2->ndpopt_prefixinfo.A = 1; ++ opt2->ndpopt_prefixinfo.reserved1 = 0; ++ opt2->ndpopt_prefixinfo.valid_lt = htonl(NDP_AdvValidLifetime); ++ opt2->ndpopt_prefixinfo.pref_lt = htonl(NDP_AdvPrefLifetime); ++ opt2->ndpopt_prefixinfo.reserved2 = 0; ++ opt2->ndpopt_prefixinfo.prefix = slirp->vprefix_addr6; ++ t->m_data += NDPOPT_PREFIXINFO_LEN; ++ pl_size += NDPOPT_PREFIXINFO_LEN; ++ ++ /* Prefix information (NDP option) */ ++ if (get_dns6_addr(&addr, &scope_id) >= 0) { ++ /* Host system does have an IPv6 DNS server, announce our proxy. */ ++ struct ndpopt *opt3 = mtod(t, struct ndpopt *); ++ opt3->ndpopt_type = NDPOPT_RDNSS; ++ opt3->ndpopt_len = NDPOPT_RDNSS_LEN / 8; ++ opt3->ndpopt_rdnss.reserved = 0; ++ opt3->ndpopt_rdnss.lifetime = htonl(2 * NDP_MaxRtrAdvInterval); ++ opt3->ndpopt_rdnss.addr = slirp->vnameserver_addr6; ++ t->m_data += NDPOPT_RDNSS_LEN; ++ pl_size += NDPOPT_RDNSS_LEN; ++ } ++ ++ rip->ip_pl = htons(pl_size); ++ t->m_data -= sizeof(struct ip6) + pl_size; ++ t->m_len = sizeof(struct ip6) + pl_size; ++ ++ /* ICMPv6 Checksum */ ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++/* ++ * Send NDP Neighbor Solitication ++ */ ++void ndp_send_ns(Slirp *slirp, struct in6_addr addr) ++{ ++ char addrstr[INET6_ADDRSTRLEN]; ++ ++ inet_ntop(AF_INET6, &addr, addrstr, INET6_ADDRSTRLEN); ++ ++ DEBUG_CALL("ndp_send_ns"); ++ DEBUG_ARG("target = %s", addrstr); ++ ++ /* Build IPv6 packet */ ++ struct mbuf *t = m_get(slirp); ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_src = slirp->vhost_addr6; ++ rip->ip_dst = (struct in6_addr)SOLICITED_NODE_PREFIX; ++ memcpy(&rip->ip_dst.s6_addr[13], &addr.s6_addr[13], 3); ++ rip->ip_nh = IPPROTO_ICMPV6; ++ rip->ip_pl = htons(ICMP6_NDP_NS_MINLEN + NDPOPT_LINKLAYER_LEN); ++ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); ++ ++ /* Build ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_NDP_NS; ++ ricmp->icmp6_code = 0; ++ ricmp->icmp6_cksum = 0; ++ ++ /* NDP */ ++ ricmp->icmp6_nns.reserved = 0; ++ ricmp->icmp6_nns.target = addr; ++ ++ /* Build NDP option */ ++ t->m_data += ICMP6_NDP_NS_MINLEN; ++ struct ndpopt *opt = mtod(t, struct ndpopt *); ++ opt->ndpopt_type = NDPOPT_LINKLAYER_SOURCE; ++ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; ++ in6_compute_ethaddr(slirp->vhost_addr6, opt->ndpopt_linklayer); ++ ++ /* ICMPv6 Checksum */ ++ t->m_data -= ICMP6_NDP_NA_MINLEN; ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 1); ++} ++ ++/* ++ * Send NDP Neighbor Advertisement ++ */ ++static void ndp_send_na(Slirp *slirp, struct ip6 *ip, struct icmp6 *icmp) ++{ ++ /* Build IPv6 packet */ ++ struct mbuf *t = m_get(slirp); ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_src = icmp->icmp6_nns.target; ++ if (in6_zero(&ip->ip_src)) { ++ rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST; ++ } else { ++ rip->ip_dst = ip->ip_src; ++ } ++ rip->ip_nh = IPPROTO_ICMPV6; ++ rip->ip_pl = htons(ICMP6_NDP_NA_MINLEN + NDPOPT_LINKLAYER_LEN); ++ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); ++ ++ /* Build ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_NDP_NA; ++ ricmp->icmp6_code = 0; ++ ricmp->icmp6_cksum = 0; ++ ++ /* NDP */ ++ ricmp->icmp6_nna.R = NDP_IsRouter; ++ ricmp->icmp6_nna.S = !IN6_IS_ADDR_MULTICAST(&rip->ip_dst); ++ ricmp->icmp6_nna.O = 1; ++ ricmp->icmp6_nna.reserved_hi = 0; ++ ricmp->icmp6_nna.reserved_lo = 0; ++ ricmp->icmp6_nna.target = icmp->icmp6_nns.target; ++ ++ /* Build NDP option */ ++ t->m_data += ICMP6_NDP_NA_MINLEN; ++ struct ndpopt *opt = mtod(t, struct ndpopt *); ++ opt->ndpopt_type = NDPOPT_LINKLAYER_TARGET; ++ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; ++ in6_compute_ethaddr(ricmp->icmp6_nna.target, opt->ndpopt_linklayer); ++ ++ /* ICMPv6 Checksum */ ++ t->m_data -= ICMP6_NDP_NA_MINLEN; ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++/* ++ * Process a NDP message ++ */ ++static void ndp_input(struct mbuf *m, Slirp *slirp, struct ip6 *ip, ++ struct icmp6 *icmp) ++{ ++ g_assert(M_ROOMBEFORE(m) >= ETH_HLEN); ++ ++ m->m_len += ETH_HLEN; ++ m->m_data -= ETH_HLEN; ++ struct ethhdr *eth = mtod(m, struct ethhdr *); ++ m->m_len -= ETH_HLEN; ++ m->m_data += ETH_HLEN; ++ ++ switch (icmp->icmp6_type) { ++ case ICMP6_NDP_RS: ++ DEBUG_CALL(" type = Router Solicitation"); ++ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && ++ ntohs(ip->ip_pl) >= ICMP6_NDP_RS_MINLEN) { ++ /* Gratuitous NDP */ ++ ndp_table_add(slirp, ip->ip_src, eth->h_source); ++ ++ ndp_send_ra(slirp); ++ } ++ break; ++ ++ case ICMP6_NDP_RA: ++ DEBUG_CALL(" type = Router Advertisement"); ++ slirp->cb->guest_error("Warning: guest sent NDP RA, but shouldn't", ++ slirp->opaque); ++ break; ++ ++ case ICMP6_NDP_NS: ++ DEBUG_CALL(" type = Neighbor Solicitation"); ++ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && ++ !IN6_IS_ADDR_MULTICAST(&icmp->icmp6_nns.target) && ++ ntohs(ip->ip_pl) >= ICMP6_NDP_NS_MINLEN && ++ (!in6_zero(&ip->ip_src) || ++ in6_solicitednode_multicast(&ip->ip_dst))) { ++ if (in6_equal_host(&icmp->icmp6_nns.target)) { ++ /* Gratuitous NDP */ ++ ndp_table_add(slirp, ip->ip_src, eth->h_source); ++ ndp_send_na(slirp, ip, icmp); ++ } ++ } ++ break; ++ ++ case ICMP6_NDP_NA: ++ DEBUG_CALL(" type = Neighbor Advertisement"); ++ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && ++ ntohs(ip->ip_pl) >= ICMP6_NDP_NA_MINLEN && ++ !IN6_IS_ADDR_MULTICAST(&icmp->icmp6_nna.target) && ++ (!IN6_IS_ADDR_MULTICAST(&ip->ip_dst) || icmp->icmp6_nna.S == 0)) { ++ ndp_table_add(slirp, ip->ip_src, eth->h_source); ++ } ++ break; ++ ++ case ICMP6_NDP_REDIRECT: ++ DEBUG_CALL(" type = Redirect"); ++ slirp->cb->guest_error( ++ "Warning: guest sent NDP REDIRECT, but shouldn't", slirp->opaque); ++ break; ++ } ++} ++ ++/* ++ * Process a received ICMPv6 message. ++ */ ++void icmp6_input(struct mbuf *m) ++{ ++ Slirp *slirp = m->slirp; ++ /* NDP reads the ethernet header for gratuitous NDP */ ++ M_DUP_DEBUG(slirp, m, 1, ETH_HLEN); ++ ++ struct icmp6 *icmp; ++ struct ip6 *ip = mtod(m, struct ip6 *); ++ int hlen = sizeof(struct ip6); ++ ++ DEBUG_CALL("icmp6_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ if (ntohs(ip->ip_pl) < ICMP6_MINLEN) { ++ goto end; ++ } ++ ++ if (ip6_cksum(m)) { ++ goto end; ++ } ++ ++ m->m_len -= hlen; ++ m->m_data += hlen; ++ icmp = mtod(m, struct icmp6 *); ++ m->m_len += hlen; ++ m->m_data -= hlen; ++ ++ DEBUG_ARG("icmp6_type = %d", icmp->icmp6_type); ++ switch (icmp->icmp6_type) { ++ case ICMP6_ECHO_REQUEST: ++ if (in6_equal_host(&ip->ip_dst)) { ++ icmp6_send_echoreply(m, slirp, ip, icmp); ++ } else { ++ /* TODO */ ++ g_critical("external icmpv6 not supported yet"); ++ } ++ break; ++ ++ case ICMP6_NDP_RS: ++ case ICMP6_NDP_RA: ++ case ICMP6_NDP_NS: ++ case ICMP6_NDP_NA: ++ case ICMP6_NDP_REDIRECT: ++ ndp_input(m, slirp, ip, icmp); ++ break; ++ ++ case ICMP6_UNREACH: ++ case ICMP6_TOOBIG: ++ case ICMP6_TIMXCEED: ++ case ICMP6_PARAMPROB: ++ /* XXX? report error? close socket? */ ++ default: ++ break; ++ } ++ ++end: ++ m_free(m); ++} +diff --git a/slirp/src/ip6_icmp.h b/slirp/src/ip6_icmp.h +new file mode 100644 +index 0000000000..9070999cfc +--- /dev/null ++++ b/slirp/src/ip6_icmp.h +@@ -0,0 +1,220 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#ifndef SLIRP_IP6_ICMP_H ++#define SLIRP_IP6_ICMP_H ++ ++/* ++ * Interface Control Message Protocol version 6 Definitions. ++ * Per RFC 4443, March 2006. ++ * ++ * Network Discover Protocol Definitions. ++ * Per RFC 4861, September 2007. ++ */ ++ ++struct icmp6_echo { /* Echo Messages */ ++ uint16_t id; ++ uint16_t seq_num; ++}; ++ ++union icmp6_error_body { ++ uint32_t unused; ++ uint32_t pointer; ++ uint32_t mtu; ++}; ++ ++/* ++ * NDP Messages ++ */ ++struct ndp_rs { /* Router Solicitation Message */ ++ uint32_t reserved; ++}; ++ ++struct ndp_ra { /* Router Advertisement Message */ ++ uint8_t chl; /* Cur Hop Limit */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t M : 1, O : 1, reserved : 6; ++#else ++ uint8_t reserved : 6, O : 1, M : 1; ++#endif ++ uint16_t lifetime; /* Router Lifetime */ ++ uint32_t reach_time; /* Reachable Time */ ++ uint32_t retrans_time; /* Retrans Timer */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_ra) == 12); ++ ++struct ndp_ns { /* Neighbor Solicitation Message */ ++ uint32_t reserved; ++ struct in6_addr target; /* Target Address */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_ns) == 20); ++ ++struct ndp_na { /* Neighbor Advertisement Message */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint32_t R : 1, /* Router Flag */ ++ S : 1, /* Solicited Flag */ ++ O : 1, /* Override Flag */ ++ reserved_hi : 5, reserved_lo : 24; ++#else ++ uint32_t reserved_hi : 5, O : 1, S : 1, R : 1, reserved_lo : 24; ++#endif ++ struct in6_addr target; /* Target Address */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_na) == 20); ++ ++struct ndp_redirect { ++ uint32_t reserved; ++ struct in6_addr target; /* Target Address */ ++ struct in6_addr dest; /* Destination Address */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_redirect) == 36); ++ ++/* ++ * Structure of an icmpv6 header. ++ */ ++struct icmp6 { ++ uint8_t icmp6_type; /* type of message, see below */ ++ uint8_t icmp6_code; /* type sub code */ ++ uint16_t icmp6_cksum; /* ones complement cksum of struct */ ++ union { ++ union icmp6_error_body error_body; ++ struct icmp6_echo echo; ++ struct ndp_rs ndp_rs; ++ struct ndp_ra ndp_ra; ++ struct ndp_ns ndp_ns; ++ struct ndp_na ndp_na; ++ struct ndp_redirect ndp_redirect; ++ } icmp6_body; ++#define icmp6_err icmp6_body.error_body ++#define icmp6_echo icmp6_body.echo ++#define icmp6_nrs icmp6_body.ndp_rs ++#define icmp6_nra icmp6_body.ndp_ra ++#define icmp6_nns icmp6_body.ndp_ns ++#define icmp6_nna icmp6_body.ndp_na ++#define icmp6_redirect icmp6_body.ndp_redirect ++}; ++ ++G_STATIC_ASSERT(sizeof(struct icmp6) == 40); ++ ++#define ICMP6_MINLEN 4 ++#define ICMP6_ERROR_MINLEN 8 ++#define ICMP6_ECHO_MINLEN 8 ++#define ICMP6_NDP_RS_MINLEN 8 ++#define ICMP6_NDP_RA_MINLEN 16 ++#define ICMP6_NDP_NS_MINLEN 24 ++#define ICMP6_NDP_NA_MINLEN 24 ++#define ICMP6_NDP_REDIRECT_MINLEN 40 ++ ++/* ++ * NDP Options ++ */ ++struct ndpopt { ++ uint8_t ndpopt_type; /* Option type */ ++ uint8_t ndpopt_len; /* /!\ In units of 8 octets */ ++ union { ++ unsigned char linklayer_addr[6]; /* Source/Target Link-layer */ ++#define ndpopt_linklayer ndpopt_body.linklayer_addr ++ struct prefixinfo { /* Prefix Information */ ++ uint8_t prefix_length; ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t L : 1, A : 1, reserved1 : 6; ++#else ++ uint8_t reserved1 : 6, A : 1, L : 1; ++#endif ++ uint32_t valid_lt; /* Valid Lifetime */ ++ uint32_t pref_lt; /* Preferred Lifetime */ ++ uint32_t reserved2; ++ struct in6_addr prefix; ++ } SLIRP_PACKED prefixinfo; ++#define ndpopt_prefixinfo ndpopt_body.prefixinfo ++ struct rdnss { ++ uint16_t reserved; ++ uint32_t lifetime; ++ struct in6_addr addr; ++ } SLIRP_PACKED rdnss; ++#define ndpopt_rdnss ndpopt_body.rdnss ++ } ndpopt_body; ++} SLIRP_PACKED; ++ ++/* NDP options type */ ++#define NDPOPT_LINKLAYER_SOURCE 1 /* Source Link-Layer Address */ ++#define NDPOPT_LINKLAYER_TARGET 2 /* Target Link-Layer Address */ ++#define NDPOPT_PREFIX_INFO 3 /* Prefix Information */ ++#define NDPOPT_RDNSS 25 /* Recursive DNS Server Address */ ++ ++/* NDP options size, in octets. */ ++#define NDPOPT_LINKLAYER_LEN 8 ++#define NDPOPT_PREFIXINFO_LEN 32 ++#define NDPOPT_RDNSS_LEN 24 ++ ++/* ++ * Definition of type and code field values. ++ * Per https://www.iana.org/assignments/icmpv6-parameters/icmpv6-parameters.xml ++ * Last Updated 2012-11-12 ++ */ ++ ++/* Errors */ ++#define ICMP6_UNREACH 1 /* Destination Unreachable */ ++#define ICMP6_UNREACH_NO_ROUTE 0 /* no route to dest */ ++#define ICMP6_UNREACH_DEST_PROHIB 1 /* com with dest prohibited */ ++#define ICMP6_UNREACH_SCOPE 2 /* beyond scope of src addr */ ++#define ICMP6_UNREACH_ADDRESS 3 /* address unreachable */ ++#define ICMP6_UNREACH_PORT 4 /* port unreachable */ ++#define ICMP6_UNREACH_SRC_FAIL 5 /* src addr failed */ ++#define ICMP6_UNREACH_REJECT_ROUTE 6 /* reject route to dest */ ++#define ICMP6_UNREACH_SRC_HDR_ERROR 7 /* error in src routing header */ ++#define ICMP6_TOOBIG 2 /* Packet Too Big */ ++#define ICMP6_TIMXCEED 3 /* Time Exceeded */ ++#define ICMP6_TIMXCEED_INTRANS 0 /* hop limit exceeded in transit */ ++#define ICMP6_TIMXCEED_REASS 1 /* ttl=0 in reass */ ++#define ICMP6_PARAMPROB 4 /* Parameter Problem */ ++#define ICMP6_PARAMPROB_HDR_FIELD 0 /* err header field */ ++#define ICMP6_PARAMPROB_NXTHDR_TYPE 1 /* unrecognized Next Header type */ ++#define ICMP6_PARAMPROB_IPV6_OPT 2 /* unrecognized IPv6 option */ ++ ++/* Informational Messages */ ++#define ICMP6_ECHO_REQUEST 128 /* Echo Request */ ++#define ICMP6_ECHO_REPLY 129 /* Echo Reply */ ++#define ICMP6_NDP_RS 133 /* Router Solicitation (NDP) */ ++#define ICMP6_NDP_RA 134 /* Router Advertisement (NDP) */ ++#define ICMP6_NDP_NS 135 /* Neighbor Solicitation (NDP) */ ++#define ICMP6_NDP_NA 136 /* Neighbor Advertisement (NDP) */ ++#define ICMP6_NDP_REDIRECT 137 /* Redirect Message (NDP) */ ++ ++/* ++ * Router Configuration Variables (rfc4861#section-6) ++ */ ++#define NDP_IsRouter 1 ++#define NDP_AdvSendAdvertisements 1 ++#define NDP_MaxRtrAdvInterval 600000 ++#define NDP_MinRtrAdvInterval \ ++ ((NDP_MaxRtrAdvInterval >= 9) ? NDP_MaxRtrAdvInterval / 3 : \ ++ NDP_MaxRtrAdvInterval) ++#define NDP_AdvManagedFlag 0 ++#define NDP_AdvOtherConfigFlag 0 ++#define NDP_AdvLinkMTU 0 ++#define NDP_AdvReachableTime 0 ++#define NDP_AdvRetransTime 0 ++#define NDP_AdvCurHopLimit 64 ++#define NDP_AdvDefaultLifetime ((3 * NDP_MaxRtrAdvInterval) / 1000) ++#define NDP_AdvValidLifetime 86400 ++#define NDP_AdvOnLinkFlag 1 ++#define NDP_AdvPrefLifetime 14400 ++#define NDP_AdvAutonomousFlag 1 ++ ++void icmp6_init(Slirp *slirp); ++void icmp6_cleanup(Slirp *slirp); ++void icmp6_input(struct mbuf *); ++void icmp6_forward_error(struct mbuf *m, uint8_t type, uint8_t code, struct in6_addr *src); ++void icmp6_send_error(struct mbuf *m, uint8_t type, uint8_t code); ++void ndp_send_ra(Slirp *slirp); ++void ndp_send_ns(Slirp *slirp, struct in6_addr addr); ++ ++#endif +diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c +new file mode 100644 +index 0000000000..b3d98653df +--- /dev/null ++++ b/slirp/src/ip6_input.c +@@ -0,0 +1,88 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++#include "ip6_icmp.h" ++ ++/* ++ * IP initialization: fill in IP protocol switch table. ++ * All protocols not implemented in kernel go to raw IP protocol handler. ++ */ ++void ip6_init(Slirp *slirp) ++{ ++ icmp6_init(slirp); ++} ++ ++void ip6_cleanup(Slirp *slirp) ++{ ++ icmp6_cleanup(slirp); ++} ++ ++void ip6_input(struct mbuf *m) ++{ ++ Slirp *slirp = m->slirp; ++ /* NDP reads the ethernet header for gratuitous NDP */ ++ M_DUP_DEBUG(slirp, m, 1, TCPIPHDR_DELTA + 2 + ETH_HLEN); ++ ++ struct ip6 *ip6; ++ ++ if (!slirp->in6_enabled) { ++ goto bad; ++ } ++ ++ DEBUG_CALL("ip6_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ if (m->m_len < sizeof(struct ip6)) { ++ goto bad; ++ } ++ ++ ip6 = mtod(m, struct ip6 *); ++ ++ if (ip6->ip_v != IP6VERSION) { ++ goto bad; ++ } ++ ++ if (ntohs(ip6->ip_pl) + sizeof(struct ip6) > slirp->if_mtu) { ++ icmp6_send_error(m, ICMP6_TOOBIG, 0); ++ goto bad; ++ } ++ ++ // Check if the message size is big enough to hold what's ++ // set in the payload length header. If not this is an invalid ++ // packet ++ if (m->m_len < ntohs(ip6->ip_pl) + sizeof(struct ip6)) { ++ goto bad; ++ } ++ ++ /* check ip_ttl for a correct ICMP reply */ ++ if (ip6->ip_hl == 0) { ++ icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); ++ goto bad; ++ } ++ ++ /* ++ * Switch out to protocol's input routine. ++ */ ++ switch (ip6->ip_nh) { ++ case IPPROTO_TCP: ++ NTOHS(ip6->ip_pl); ++ tcp_input(m, sizeof(struct ip6), (struct socket *)NULL, AF_INET6); ++ break; ++ case IPPROTO_UDP: ++ udp6_input(m); ++ break; ++ case IPPROTO_ICMPV6: ++ icmp6_input(m); ++ break; ++ default: ++ m_free(m); ++ } ++ return; ++bad: ++ m_free(m); ++} +diff --git a/slirp/src/ip6_output.c b/slirp/src/ip6_output.c +new file mode 100644 +index 0000000000..834f1c0a32 +--- /dev/null ++++ b/slirp/src/ip6_output.c +@@ -0,0 +1,45 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++ ++/* Number of packets queued before we start sending ++ * (to prevent allocing too many mbufs) */ ++#define IF6_THRESH 10 ++ ++/* ++ * IPv6 output. The packet in mbuf chain m contains a IP header ++ */ ++int ip6_output(struct socket *so, struct mbuf *m, int fast) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, 0); ++ ++ struct ip6 *ip = mtod(m, struct ip6 *); ++ ++ DEBUG_CALL("ip6_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ /* Fill IPv6 header */ ++ ip->ip_v = IP6VERSION; ++ ip->ip_hl = IP6_HOP_LIMIT; ++ ip->ip_tc_hi = 0; ++ ip->ip_tc_lo = 0; ++ ip->ip_fl_hi = 0; ++ ip->ip_fl_lo = 0; ++ ++ if (fast) { ++ /* We cannot fast-send non-multicast, we'd need a NDP NS */ ++ assert(IN6_IS_ADDR_MULTICAST(&ip->ip_dst)); ++ if_encap(m->slirp, m); ++ m_free(m); ++ } else { ++ if_output(so, m); ++ } ++ ++ return 0; ++} +diff --git a/slirp/src/ip_icmp.c b/slirp/src/ip_icmp.c +new file mode 100644 +index 0000000000..9fba653a46 +--- /dev/null ++++ b/slirp/src/ip_icmp.c +@@ -0,0 +1,524 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 ++ * ip_icmp.c,v 1.7 1995/05/30 08:09:42 rgrimes Exp ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++#ifndef WITH_ICMP_ERROR_MSG ++#define WITH_ICMP_ERROR_MSG 0 ++#endif ++ ++/* The message sent when emulating PING */ ++/* Be nice and tell them it's just a pseudo-ping packet */ ++static const char icmp_ping_msg[] = ++ "This is a pseudo-PING packet used by Slirp to emulate ICMP ECHO-REQUEST " ++ "packets.\n"; ++ ++/* list of actions for icmp_send_error() on RX of an icmp message */ ++static const int icmp_flush[19] = { ++ /* ECHO REPLY (0) */ 0, ++ 1, ++ 1, ++ /* DEST UNREACH (3) */ 1, ++ /* SOURCE QUENCH (4)*/ 1, ++ /* REDIRECT (5) */ 1, ++ 1, ++ 1, ++ /* ECHO (8) */ 0, ++ /* ROUTERADVERT (9) */ 1, ++ /* ROUTERSOLICIT (10) */ 1, ++ /* TIME EXCEEDED (11) */ 1, ++ /* PARAMETER PROBLEM (12) */ 1, ++ /* TIMESTAMP (13) */ 0, ++ /* TIMESTAMP REPLY (14) */ 0, ++ /* INFO (15) */ 0, ++ /* INFO REPLY (16) */ 0, ++ /* ADDR MASK (17) */ 0, ++ /* ADDR MASK REPLY (18) */ 0 ++}; ++ ++void icmp_init(Slirp *slirp) ++{ ++ slirp->icmp.so_next = slirp->icmp.so_prev = &slirp->icmp; ++ slirp->icmp_last_so = &slirp->icmp; ++} ++ ++void icmp_cleanup(Slirp *slirp) ++{ ++ struct socket *so, *so_next; ++ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { ++ so_next = so->so_next; ++ icmp_detach(so); ++ } ++} ++ ++static int icmp_send(struct socket *so, struct mbuf *m, int hlen) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, 0); ++ ++ struct ip *ip = mtod(m, struct ip *); ++ struct sockaddr_in addr; ++ ++ so->s = slirp_socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP); ++ if (so->s == -1) { ++ return -1; ++ } ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ ++ if (slirp_bind_outbound(so, AF_INET) != 0) { ++ // bind failed - close socket ++ closesocket(so->s); ++ so->s = -1; ++ return -1; ++ } ++ ++ so->so_m = m; ++ so->so_faddr = ip->ip_dst; ++ so->so_laddr = ip->ip_src; ++ so->so_iptos = ip->ip_tos; ++ so->so_type = IPPROTO_ICMP; ++ so->so_state = SS_ISFCONNECTED; ++ so->so_expire = curtime + SO_EXPIRE; ++ ++ addr.sin_family = AF_INET; ++ addr.sin_addr = so->so_faddr; ++ ++ insque(so, &so->slirp->icmp); ++ ++ if (sendto(so->s, m->m_data + hlen, m->m_len - hlen, 0, ++ (struct sockaddr *)&addr, sizeof(addr)) == -1) { ++ DEBUG_MISC("icmp_input icmp sendto tx errno = %d-%s", errno, ++ strerror(errno)); ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); ++ icmp_detach(so); ++ } ++ ++ return 0; ++} ++ ++void icmp_detach(struct socket *so) ++{ ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++} ++ ++/* ++ * Process a received ICMP message. ++ */ ++void icmp_input(struct mbuf *m, int hlen) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, 0); ++ ++ register struct icmp *icp; ++ register struct ip *ip = mtod(m, struct ip *); ++ int icmplen = ip->ip_len; ++ ++ DEBUG_CALL("icmp_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ /* ++ * Locate icmp structure in mbuf, and check ++ * that its not corrupted and of at least minimum length. ++ */ ++ if (icmplen < ICMP_MINLEN) { /* min 8 bytes payload */ ++ freeit: ++ m_free(m); ++ goto end_error; ++ } ++ ++ m->m_len -= hlen; ++ m->m_data += hlen; ++ icp = mtod(m, struct icmp *); ++ if (cksum(m, icmplen)) { ++ goto freeit; ++ } ++ m->m_len += hlen; ++ m->m_data -= hlen; ++ ++ DEBUG_ARG("icmp_type = %d", icp->icmp_type); ++ switch (icp->icmp_type) { ++ case ICMP_ECHO: ++ ip->ip_len += hlen; /* since ip_input subtracts this */ ++ if (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr || ++ ip->ip_dst.s_addr == slirp->vnameserver_addr.s_addr) { ++ icmp_reflect(m); ++ } else if (slirp->restricted) { ++ goto freeit; ++ } else { ++ struct socket *so; ++ struct sockaddr_storage addr; ++ int ttl; ++ ++ so = socreate(slirp); ++ if (icmp_send(so, m, hlen) == 0) { ++ /* We could send this as ICMP, good! */ ++ return; ++ } ++ ++ /* We could not send this as ICMP, try to send it on UDP echo ++ * service (7), wishfully hoping that it is open there. */ ++ ++ if (udp_attach(so, AF_INET) == -1) { ++ DEBUG_MISC("icmp_input udp_attach errno = %d-%s", errno, ++ strerror(errno)); ++ sofree(so); ++ m_free(m); ++ goto end_error; ++ } ++ so->so_m = m; ++ so->so_ffamily = AF_INET; ++ so->so_faddr = ip->ip_dst; ++ so->so_fport = htons(7); ++ so->so_lfamily = AF_INET; ++ so->so_laddr = ip->ip_src; ++ so->so_lport = htons(9); ++ so->so_iptos = ip->ip_tos; ++ so->so_type = IPPROTO_ICMP; ++ so->so_state = SS_ISFCONNECTED; ++ ++ /* Send the packet */ ++ addr = so->fhost.ss; ++ if (sotranslate_out(so, &addr) < 0) { ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, ++ strerror(errno)); ++ udp_detach(so); ++ return; ++ } ++ ++ /* ++ * Check for TTL ++ */ ++ ttl = ip->ip_ttl-1; ++ if (ttl <= 0) { ++ DEBUG_MISC("udp ttl exceeded"); ++ icmp_send_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, ++ NULL); ++ udp_detach(so); ++ break; ++ } ++ setsockopt(so->s, IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)); ++ ++ if (sendto(so->s, icmp_ping_msg, strlen(icmp_ping_msg), 0, ++ (struct sockaddr *)&addr, sockaddr_size(&addr)) == -1) { ++ DEBUG_MISC("icmp_input udp sendto tx errno = %d-%s", errno, ++ strerror(errno)); ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, ++ strerror(errno)); ++ udp_detach(so); ++ } ++ } /* if ip->ip_dst.s_addr == alias_addr.s_addr */ ++ break; ++ case ICMP_UNREACH: ++ /* XXX? report error? close socket? */ ++ case ICMP_TIMXCEED: ++ case ICMP_PARAMPROB: ++ case ICMP_SOURCEQUENCH: ++ case ICMP_TSTAMP: ++ case ICMP_MASKREQ: ++ case ICMP_REDIRECT: ++ m_free(m); ++ break; ++ ++ default: ++ m_free(m); ++ } /* switch */ ++ ++end_error: ++ /* m is m_free()'d xor put in a socket xor or given to ip_send */ ++ return; ++} ++ ++ ++/* ++ * Send an ICMP message in response to a situation ++ * ++ * RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header. ++ *MAY send more (we do). MUST NOT change this header information. MUST NOT reply ++ *to a multicast/broadcast IP address. MUST NOT reply to a multicast/broadcast ++ *MAC address. MUST reply to only the first fragment. ++ */ ++/* ++ * Send ICMP_UNREACH back to the source regarding msrc. ++ * mbuf *msrc is used as a template, but is NOT m_free()'d. ++ * It is reported as the bad ip packet. The header should ++ * be fully correct and in host byte order. ++ * ICMP fragmentation is illegal. All machines must accept 576 bytes in one ++ * packet. The maximum payload is 576-20(ip hdr)-8(icmp hdr)=548 ++ */ ++ ++#define ICMP_MAXDATALEN (IP_MSS - 28) ++void icmp_forward_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, ++ const char *message, struct in_addr *src) ++{ ++ unsigned hlen, shlen, s_ip_len; ++ register struct ip *ip; ++ register struct icmp *icp; ++ register struct mbuf *m; ++ ++ DEBUG_CALL("icmp_send_error"); ++ DEBUG_ARG("msrc = %p", msrc); ++ DEBUG_ARG("msrc_len = %d", msrc->m_len); ++ ++ if (type != ICMP_UNREACH && type != ICMP_TIMXCEED) ++ goto end_error; ++ ++ /* check msrc */ ++ if (!msrc) ++ goto end_error; ++ ip = mtod(msrc, struct ip *); ++ if (slirp_debug & DBG_MISC) { ++ char bufa[20], bufb[20]; ++ slirp_pstrcpy(bufa, sizeof(bufa), inet_ntoa(ip->ip_src)); ++ slirp_pstrcpy(bufb, sizeof(bufb), inet_ntoa(ip->ip_dst)); ++ DEBUG_MISC(" %.16s to %.16s", bufa, bufb); ++ } ++ if (ip->ip_off & IP_OFFMASK) ++ goto end_error; /* Only reply to fragment 0 */ ++ ++ /* Do not reply to source-only IPs */ ++ if ((ip->ip_src.s_addr & htonl(~(0xf << 28))) == 0) { ++ goto end_error; ++ } ++ ++ shlen = ip->ip_hl << 2; ++ s_ip_len = ip->ip_len; ++ if (ip->ip_p == IPPROTO_ICMP) { ++ icp = (struct icmp *)((char *)ip + shlen); ++ /* ++ * Assume any unknown ICMP type is an error. This isn't ++ * specified by the RFC, but think about it.. ++ */ ++ if (icp->icmp_type > 18 || icmp_flush[icp->icmp_type]) ++ goto end_error; ++ } ++ ++ /* make a copy */ ++ m = m_get(msrc->slirp); ++ if (!m) { ++ goto end_error; ++ } ++ ++ { ++ int new_m_size; ++ new_m_size = ++ sizeof(struct ip) + ICMP_MINLEN + msrc->m_len + ICMP_MAXDATALEN; ++ if (new_m_size > m->m_size) ++ m_inc(m, new_m_size); ++ } ++ memcpy(m->m_data, msrc->m_data, msrc->m_len); ++ m->m_len = msrc->m_len; /* copy msrc to m */ ++ ++ /* make the header of the reply packet */ ++ ip = mtod(m, struct ip *); ++ hlen = sizeof(struct ip); /* no options in reply */ ++ ++ /* fill in icmp */ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ ++ icp = mtod(m, struct icmp *); ++ ++ if (minsize) ++ s_ip_len = shlen + ICMP_MINLEN; /* return header+8b only */ ++ else if (s_ip_len > ICMP_MAXDATALEN) /* maximum size */ ++ s_ip_len = ICMP_MAXDATALEN; ++ ++ m->m_len = ICMP_MINLEN + s_ip_len; /* 8 bytes ICMP header */ ++ ++ /* min. size = 8+sizeof(struct ip)+8 */ ++ ++ icp->icmp_type = type; ++ icp->icmp_code = code; ++ icp->icmp_id = 0; ++ icp->icmp_seq = 0; ++ ++ memcpy(&icp->icmp_ip, msrc->m_data, s_ip_len); /* report the ip packet */ ++ HTONS(icp->icmp_ip.ip_len); ++ HTONS(icp->icmp_ip.ip_id); ++ HTONS(icp->icmp_ip.ip_off); ++ ++ if (message && WITH_ICMP_ERROR_MSG) { /* append message to ICMP packet */ ++ int message_len; ++ char *cpnt; ++ message_len = strlen(message); ++ if (message_len > ICMP_MAXDATALEN) ++ message_len = ICMP_MAXDATALEN; ++ cpnt = (char *)m->m_data + m->m_len; ++ memcpy(cpnt, message, message_len); ++ m->m_len += message_len; ++ } ++ ++ icp->icmp_cksum = 0; ++ icp->icmp_cksum = cksum(m, m->m_len); ++ ++ m->m_data -= hlen; ++ m->m_len += hlen; ++ ++ /* fill in ip */ ++ ip->ip_hl = hlen >> 2; ++ ip->ip_len = m->m_len; ++ ++ ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */ ++ ++ ip->ip_ttl = MAXTTL; ++ ip->ip_p = IPPROTO_ICMP; ++ ip->ip_dst = ip->ip_src; /* ip addresses */ ++ ip->ip_src = *src; ++ ++ ip_output((struct socket *)NULL, m); ++ ++end_error: ++ return; ++} ++#undef ICMP_MAXDATALEN ++ ++void icmp_send_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, ++ const char *message) ++{ ++ icmp_forward_error(msrc, type, code, minsize, message, &msrc->slirp->vhost_addr); ++} ++ ++/* ++ * Reflect the ip packet back to the source ++ */ ++void icmp_reflect(struct mbuf *m) ++{ ++ register struct ip *ip = mtod(m, struct ip *); ++ int hlen = ip->ip_hl << 2; ++ int optlen = hlen - sizeof(struct ip); ++ register struct icmp *icp; ++ ++ /* ++ * Send an icmp packet back to the ip level, ++ * after supplying a checksum. ++ */ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ icp = mtod(m, struct icmp *); ++ ++ icp->icmp_type = ICMP_ECHOREPLY; ++ icp->icmp_cksum = 0; ++ icp->icmp_cksum = cksum(m, ip->ip_len - hlen); ++ ++ m->m_data -= hlen; ++ m->m_len += hlen; ++ ++ /* fill in ip */ ++ if (optlen > 0) { ++ /* ++ * Strip out original options by copying rest of first ++ * mbuf's data back, and adjust the IP length. ++ */ ++ memmove((char *)(ip + 1), (char *)ip + hlen, ++ (unsigned)(m->m_len - hlen)); ++ hlen -= optlen; ++ ip->ip_hl = hlen >> 2; ++ ip->ip_len -= optlen; ++ m->m_len -= optlen; ++ } ++ ++ ip->ip_ttl = MAXTTL; ++ { /* swap */ ++ struct in_addr icmp_dst; ++ icmp_dst = ip->ip_dst; ++ ip->ip_dst = ip->ip_src; ++ ip->ip_src = icmp_dst; ++ } ++ ++ ip_output((struct socket *)NULL, m); ++} ++ ++void icmp_receive(struct socket *so) ++{ ++ struct mbuf *m = so->so_m; ++ struct ip *ip = mtod(m, struct ip *); ++ int hlen = ip->ip_hl << 2; ++ uint8_t error_code; ++ struct icmp *icp; ++ int id, len; ++ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ icp = mtod(m, struct icmp *); ++ ++ id = icp->icmp_id; ++ len = recv(so->s, icp, M_ROOM(m), 0); ++ /* ++ * The behavior of reading SOCK_DGRAM+IPPROTO_ICMP sockets is inconsistent ++ * between host OSes. On Linux, only the ICMP header and payload is ++ * included. On macOS/Darwin, the socket acts like a raw socket and ++ * includes the IP header as well. On other BSDs, SOCK_DGRAM+IPPROTO_ICMP ++ * sockets aren't supported at all, so we treat them like raw sockets. It ++ * isn't possible to detect this difference at runtime, so we must use an ++ * #ifdef to determine if we need to remove the IP header. ++ */ ++#ifdef CONFIG_BSD ++ if (len >= sizeof(struct ip)) { ++ struct ip *inner_ip = mtod(m, struct ip *); ++ int inner_hlen = inner_ip->ip_hl << 2; ++ if (inner_hlen > len) { ++ len = -1; ++ errno = -EINVAL; ++ } else { ++ len -= inner_hlen; ++ memmove(icp, (unsigned char *)icp + inner_hlen, len); ++ } ++ } else { ++ len = -1; ++ errno = -EINVAL; ++ } ++#endif ++ icp->icmp_id = id; ++ ++ m->m_data -= hlen; ++ m->m_len += hlen; ++ ++ if (len == -1 || len == 0) { ++ if (errno == ENETUNREACH) { ++ error_code = ICMP_UNREACH_NET; ++ } else { ++ error_code = ICMP_UNREACH_HOST; ++ } ++ DEBUG_MISC(" udp icmp rx errno = %d-%s", errno, strerror(errno)); ++ icmp_send_error(so->so_m, ICMP_UNREACH, error_code, 0, strerror(errno)); ++ } else { ++ icmp_reflect(so->so_m); ++ so->so_m = NULL; /* Don't m_free() it again! */ ++ } ++ icmp_detach(so); ++} +diff --git a/slirp/src/ip_icmp.h b/slirp/src/ip_icmp.h +new file mode 100644 +index 0000000000..569a083061 +--- /dev/null ++++ b/slirp/src/ip_icmp.h +@@ -0,0 +1,168 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_icmp.h 8.1 (Berkeley) 6/10/93 ++ * ip_icmp.h,v 1.4 1995/05/30 08:09:43 rgrimes Exp ++ */ ++ ++#ifndef NETINET_IP_ICMP_H ++#define NETINET_IP_ICMP_H ++ ++/* ++ * Interface Control Message Protocol Definitions. ++ * Per RFC 792, September 1981. ++ */ ++ ++typedef uint32_t n_time; ++ ++/* ++ * Structure of an icmp header. ++ */ ++struct icmp { ++ uint8_t icmp_type; /* type of message, see below */ ++ uint8_t icmp_code; /* type sub code */ ++ uint16_t icmp_cksum; /* ones complement cksum of struct */ ++ union { ++ uint8_t ih_pptr; /* ICMP_PARAMPROB */ ++ struct in_addr ih_gwaddr; /* ICMP_REDIRECT */ ++ struct ih_idseq { ++ uint16_t icd_id; ++ uint16_t icd_seq; ++ } ih_idseq; ++ int ih_void; ++ ++ /* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */ ++ struct ih_pmtu { ++ uint16_t ipm_void; ++ uint16_t ipm_nextmtu; ++ } ih_pmtu; ++ } icmp_hun; ++#define icmp_pptr icmp_hun.ih_pptr ++#define icmp_gwaddr icmp_hun.ih_gwaddr ++#define icmp_id icmp_hun.ih_idseq.icd_id ++#define icmp_seq icmp_hun.ih_idseq.icd_seq ++#define icmp_void icmp_hun.ih_void ++#define icmp_pmvoid icmp_hun.ih_pmtu.ipm_void ++#define icmp_nextmtu icmp_hun.ih_pmtu.ipm_nextmtu ++ union { ++ struct id_ts { ++ n_time its_otime; ++ n_time its_rtime; ++ n_time its_ttime; ++ } id_ts; ++ struct id_ip { ++ struct ip idi_ip; ++ /* options and then 64 bits of data */ ++ } id_ip; ++ uint32_t id_mask; ++ char id_data[1]; ++ } icmp_dun; ++#define icmp_otime icmp_dun.id_ts.its_otime ++#define icmp_rtime icmp_dun.id_ts.its_rtime ++#define icmp_ttime icmp_dun.id_ts.its_ttime ++#define icmp_ip icmp_dun.id_ip.idi_ip ++#define icmp_mask icmp_dun.id_mask ++#define icmp_data icmp_dun.id_data ++}; ++ ++/* ++ * Lower bounds on packet lengths for various types. ++ * For the error advice packets must first ensure that the ++ * packet is large enough to contain the returned ip header. ++ * Only then can we do the check to see if 64 bits of packet ++ * data have been returned, since we need to check the returned ++ * ip header length. ++ */ ++#define ICMP_MINLEN 8 /* abs minimum */ ++#define ICMP_TSLEN (8 + 3 * sizeof(n_time)) /* timestamp */ ++#define ICMP_MASKLEN 12 /* address mask */ ++#define ICMP_ADVLENMIN (8 + sizeof(struct ip) + 8) /* min */ ++#define ICMP_ADVLEN(p) (8 + ((p)->icmp_ip.ip_hl << 2) + 8) ++/* N.B.: must separately check that ip_hl >= 5 */ ++ ++/* ++ * Definition of type and code field values. ++ */ ++#define ICMP_ECHOREPLY 0 /* echo reply */ ++#define ICMP_UNREACH 3 /* dest unreachable, codes: */ ++#define ICMP_UNREACH_NET 0 /* bad net */ ++#define ICMP_UNREACH_HOST 1 /* bad host */ ++#define ICMP_UNREACH_PROTOCOL 2 /* bad protocol */ ++#define ICMP_UNREACH_PORT 3 /* bad port */ ++#define ICMP_UNREACH_NEEDFRAG 4 /* IP_DF caused drop */ ++#define ICMP_UNREACH_SRCFAIL 5 /* src route failed */ ++#define ICMP_UNREACH_NET_UNKNOWN 6 /* unknown net */ ++#define ICMP_UNREACH_HOST_UNKNOWN 7 /* unknown host */ ++#define ICMP_UNREACH_ISOLATED 8 /* src host isolated */ ++#define ICMP_UNREACH_NET_PROHIB 9 /* prohibited access */ ++#define ICMP_UNREACH_HOST_PROHIB 10 /* ditto */ ++#define ICMP_UNREACH_TOSNET 11 /* bad tos for net */ ++#define ICMP_UNREACH_TOSHOST 12 /* bad tos for host */ ++#define ICMP_SOURCEQUENCH 4 /* packet lost, slow down */ ++#define ICMP_REDIRECT 5 /* shorter route, codes: */ ++#define ICMP_REDIRECT_NET 0 /* for network */ ++#define ICMP_REDIRECT_HOST 1 /* for host */ ++#define ICMP_REDIRECT_TOSNET 2 /* for tos and net */ ++#define ICMP_REDIRECT_TOSHOST 3 /* for tos and host */ ++#define ICMP_ECHO 8 /* echo service */ ++#define ICMP_ROUTERADVERT 9 /* router advertisement */ ++#define ICMP_ROUTERSOLICIT 10 /* router solicitation */ ++#define ICMP_TIMXCEED 11 /* time exceeded, code: */ ++#define ICMP_TIMXCEED_INTRANS 0 /* ttl==0 in transit */ ++#define ICMP_TIMXCEED_REASS 1 /* ttl==0 in reass */ ++#define ICMP_PARAMPROB 12 /* ip header bad */ ++#define ICMP_PARAMPROB_OPTABSENT 1 /* req. opt. absent */ ++#define ICMP_TSTAMP 13 /* timestamp request */ ++#define ICMP_TSTAMPREPLY 14 /* timestamp reply */ ++#define ICMP_IREQ 15 /* information request */ ++#define ICMP_IREQREPLY 16 /* information reply */ ++#define ICMP_MASKREQ 17 /* address mask request */ ++#define ICMP_MASKREPLY 18 /* address mask reply */ ++ ++#define ICMP_MAXTYPE 18 ++ ++#define ICMP_INFOTYPE(type) \ ++ ((type) == ICMP_ECHOREPLY || (type) == ICMP_ECHO || \ ++ (type) == ICMP_ROUTERADVERT || (type) == ICMP_ROUTERSOLICIT || \ ++ (type) == ICMP_TSTAMP || (type) == ICMP_TSTAMPREPLY || \ ++ (type) == ICMP_IREQ || (type) == ICMP_IREQREPLY || \ ++ (type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY) ++ ++void icmp_init(Slirp *slirp); ++void icmp_cleanup(Slirp *slirp); ++void icmp_input(struct mbuf *, int); ++void icmp_forward_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, ++ const char *message, struct in_addr *src); ++void icmp_send_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, ++ const char *message); ++void icmp_reflect(struct mbuf *); ++void icmp_receive(struct socket *so); ++void icmp_detach(struct socket *so); ++ ++#endif +diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c +new file mode 100644 +index 0000000000..a29c324cce +--- /dev/null ++++ b/slirp/src/ip_input.c +@@ -0,0 +1,463 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 ++ * ip_input.c,v 1.11 1994/11/16 10:17:08 jkh Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP are ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp); ++static void ip_freef(Slirp *slirp, struct ipq *fp); ++static void ip_enq(register struct ipasfrag *p, register struct ipasfrag *prev); ++static void ip_deq(register struct ipasfrag *p); ++ ++/* ++ * IP initialization: fill in IP protocol switch table. ++ * All protocols not implemented in kernel go to raw IP protocol handler. ++ */ ++void ip_init(Slirp *slirp) ++{ ++ slirp->ipq.ip_link.next = slirp->ipq.ip_link.prev = &slirp->ipq.ip_link; ++ udp_init(slirp); ++ tcp_init(slirp); ++ icmp_init(slirp); ++} ++ ++void ip_cleanup(Slirp *slirp) ++{ ++ udp_cleanup(slirp); ++ tcp_cleanup(slirp); ++ icmp_cleanup(slirp); ++} ++ ++/* ++ * Ip input routine. Checksum and byte swap header. If fragmented ++ * try to reassemble. Process options. Pass to next level. ++ */ ++void ip_input(struct mbuf *m) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, TCPIPHDR_DELTA); ++ ++ register struct ip *ip; ++ int hlen; ++ ++ if (!slirp->in_enabled) { ++ goto bad; ++ } ++ ++ DEBUG_CALL("ip_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ if (m->m_len < sizeof(struct ip)) { ++ goto bad; ++ } ++ ++ ip = mtod(m, struct ip *); ++ ++ if (ip->ip_v != IPVERSION) { ++ goto bad; ++ } ++ ++ hlen = ip->ip_hl << 2; ++ if (hlen < sizeof(struct ip) || hlen > m->m_len) { /* min header length */ ++ goto bad; /* or packet too short */ ++ } ++ ++ /* keep ip header intact for ICMP reply ++ * ip->ip_sum = cksum(m, hlen); ++ * if (ip->ip_sum) { ++ */ ++ if (cksum(m, hlen)) { ++ goto bad; ++ } ++ ++ /* ++ * Convert fields to host representation. ++ */ ++ NTOHS(ip->ip_len); ++ if (ip->ip_len < hlen) { ++ goto bad; ++ } ++ NTOHS(ip->ip_id); ++ NTOHS(ip->ip_off); ++ ++ /* ++ * Check that the amount of data in the buffers ++ * is as at least much as the IP header would have us expect. ++ * Trim mbufs if longer than we expect. ++ * Drop packet if shorter than we expect. ++ */ ++ if (m->m_len < ip->ip_len) { ++ goto bad; ++ } ++ ++ /* Should drop packet if mbuf too long? hmmm... */ ++ if (m->m_len > ip->ip_len) ++ m_adj(m, ip->ip_len - m->m_len); ++ ++ /* check ip_ttl for a correct ICMP reply */ ++ if (ip->ip_ttl == 0) { ++ icmp_send_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, "ttl"); ++ goto bad; ++ } ++ ++ /* ++ * If offset or IP_MF are set, must reassemble. ++ * Otherwise, nothing need be done. ++ * (We could look in the reassembly queue to see ++ * if the packet was previously fragmented, ++ * but it's not worth the time; just let them time out.) ++ * ++ * XXX This should fail, don't fragment yet ++ */ ++ if (ip->ip_off & ~IP_DF) { ++ register struct ipq *fp; ++ struct qlink *l; ++ /* ++ * Look for queue of fragments ++ * of this datagram. ++ */ ++ for (l = slirp->ipq.ip_link.next; l != &slirp->ipq.ip_link; ++ l = l->next) { ++ fp = container_of(l, struct ipq, ip_link); ++ if (ip->ip_id == fp->ipq_id && ++ ip->ip_src.s_addr == fp->ipq_src.s_addr && ++ ip->ip_dst.s_addr == fp->ipq_dst.s_addr && ++ ip->ip_p == fp->ipq_p) ++ goto found; ++ } ++ fp = NULL; ++ found: ++ ++ /* ++ * Adjust ip_len to not reflect header, ++ * set ip_mff if more fragments are expected, ++ * convert offset of this to bytes. ++ */ ++ ip->ip_len -= hlen; ++ if (ip->ip_off & IP_MF) ++ ip->ip_tos |= 1; ++ else ++ ip->ip_tos &= ~1; ++ ++ ip->ip_off <<= 3; ++ ++ /* ++ * If datagram marked as having more fragments ++ * or if this is not the first fragment, ++ * attempt reassembly; if it succeeds, proceed. ++ */ ++ if (ip->ip_tos & 1 || ip->ip_off) { ++ ip = ip_reass(slirp, ip, fp); ++ if (ip == NULL) ++ return; ++ m = dtom(slirp, ip); ++ } else if (fp) ++ ip_freef(slirp, fp); ++ ++ } else ++ ip->ip_len -= hlen; ++ ++ /* ++ * Switch out to protocol's input routine. ++ */ ++ switch (ip->ip_p) { ++ case IPPROTO_TCP: ++ tcp_input(m, hlen, (struct socket *)NULL, AF_INET); ++ break; ++ case IPPROTO_UDP: ++ udp_input(m, hlen); ++ break; ++ case IPPROTO_ICMP: ++ icmp_input(m, hlen); ++ break; ++ default: ++ m_free(m); ++ } ++ return; ++bad: ++ m_free(m); ++} ++ ++#define iptofrag(P) ((struct ipasfrag *)(((char *)(P)) - sizeof(struct qlink))) ++#define fragtoip(P) ((struct ip *)(((char *)(P)) + sizeof(struct qlink))) ++/* ++ * Take incoming datagram fragment and try to ++ * reassemble it into whole datagram. If a chain for ++ * reassembly of this datagram already exists, then it ++ * is given as fp; otherwise have to make a chain. ++ */ ++static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) ++{ ++ register struct mbuf *m = dtom(slirp, ip); ++ register struct ipasfrag *q; ++ int hlen = ip->ip_hl << 2; ++ int i, next; ++ ++ DEBUG_CALL("ip_reass"); ++ DEBUG_ARG("ip = %p", ip); ++ DEBUG_ARG("fp = %p", fp); ++ DEBUG_ARG("m = %p", m); ++ ++ /* ++ * Presence of header sizes in mbufs ++ * would confuse code below. ++ * Fragment m_data is concatenated. ++ */ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ ++ /* ++ * If first fragment to arrive, create a reassembly queue. ++ */ ++ if (fp == NULL) { ++ struct mbuf *t = m_get(slirp); ++ ++ if (t == NULL) { ++ goto dropfrag; ++ } ++ fp = mtod(t, struct ipq *); ++ insque(&fp->ip_link, &slirp->ipq.ip_link); ++ fp->ipq_ttl = IPFRAGTTL; ++ fp->ipq_p = ip->ip_p; ++ fp->ipq_id = ip->ip_id; ++ fp->frag_link.next = fp->frag_link.prev = &fp->frag_link; ++ fp->ipq_src = ip->ip_src; ++ fp->ipq_dst = ip->ip_dst; ++ q = (struct ipasfrag *)fp; ++ goto insert; ++ } ++ ++ /* ++ * Find a segment which begins after this one does. ++ */ ++ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; ++ q = q->ipf_next) ++ if (q->ipf_off > ip->ip_off) ++ break; ++ ++ /* ++ * If there is a preceding segment, it may provide some of ++ * our data already. If so, drop the data from the incoming ++ * segment. If it provides all of our data, drop us. ++ */ ++ if (q->ipf_prev != &fp->frag_link) { ++ struct ipasfrag *pq = q->ipf_prev; ++ i = pq->ipf_off + pq->ipf_len - ip->ip_off; ++ if (i > 0) { ++ if (i >= ip->ip_len) ++ goto dropfrag; ++ m_adj(dtom(slirp, ip), i); ++ ip->ip_off += i; ++ ip->ip_len -= i; ++ } ++ } ++ ++ /* ++ * While we overlap succeeding segments trim them or, ++ * if they are completely covered, dequeue them. ++ */ ++ while (q != (struct ipasfrag *)&fp->frag_link && ++ ip->ip_off + ip->ip_len > q->ipf_off) { ++ struct ipasfrag *prev; ++ i = (ip->ip_off + ip->ip_len) - q->ipf_off; ++ if (i < q->ipf_len) { ++ q->ipf_len -= i; ++ q->ipf_off += i; ++ m_adj(dtom(slirp, q), i); ++ break; ++ } ++ prev = q; ++ q = q->ipf_next; ++ ip_deq(prev); ++ m_free(dtom(slirp, prev)); ++ } ++ ++insert: ++ /* ++ * Stick new segment in its place; ++ * check for complete reassembly. ++ */ ++ ip_enq(iptofrag(ip), q->ipf_prev); ++ next = 0; ++ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; ++ q = q->ipf_next) { ++ if (q->ipf_off != next) ++ return NULL; ++ next += q->ipf_len; ++ } ++ if (((struct ipasfrag *)(q->ipf_prev))->ipf_tos & 1) ++ return NULL; ++ ++ /* ++ * Reassembly is complete; concatenate fragments. ++ */ ++ q = fp->frag_link.next; ++ m = dtom(slirp, q); ++ int delta = (char *)q - (m->m_flags & M_EXT ? m->m_ext : m->m_dat); ++ ++ q = (struct ipasfrag *)q->ipf_next; ++ while (q != (struct ipasfrag *)&fp->frag_link) { ++ struct mbuf *t = dtom(slirp, q); ++ q = (struct ipasfrag *)q->ipf_next; ++ m_cat(m, t); ++ } ++ ++ /* ++ * Create header for new ip packet by ++ * modifying header of first packet; ++ * dequeue and discard fragment reassembly header. ++ * Make header visible. ++ */ ++ q = fp->frag_link.next; ++ ++ /* ++ * If the fragments concatenated to an mbuf that's bigger than the total ++ * size of the fragment and the mbuf was not already using an m_ext buffer, ++ * then an m_ext buffer was allocated. But fp->ipq_next points to the old ++ * buffer (in the mbuf), so we must point ip into the new buffer. ++ */ ++ if (m->m_flags & M_EXT) { ++ q = (struct ipasfrag *)(m->m_ext + delta); ++ } ++ ++ ip = fragtoip(q); ++ ip->ip_len = next; ++ ip->ip_tos &= ~1; ++ ip->ip_src = fp->ipq_src; ++ ip->ip_dst = fp->ipq_dst; ++ remque(&fp->ip_link); ++ m_free(dtom(slirp, fp)); ++ m->m_len += (ip->ip_hl << 2); ++ m->m_data -= (ip->ip_hl << 2); ++ ++ return ip; ++ ++dropfrag: ++ m_free(m); ++ return NULL; ++} ++ ++/* ++ * Free a fragment reassembly header and all ++ * associated datagrams. ++ */ ++static void ip_freef(Slirp *slirp, struct ipq *fp) ++{ ++ register struct ipasfrag *q, *p; ++ ++ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; ++ q = p) { ++ p = q->ipf_next; ++ ip_deq(q); ++ m_free(dtom(slirp, q)); ++ } ++ remque(&fp->ip_link); ++ m_free(dtom(slirp, fp)); ++} ++ ++/* ++ * Put an ip fragment on a reassembly chain. ++ * Like insque, but pointers in middle of structure. ++ */ ++static void ip_enq(register struct ipasfrag *p, register struct ipasfrag *prev) ++{ ++ DEBUG_CALL("ip_enq"); ++ DEBUG_ARG("prev = %p", prev); ++ p->ipf_prev = prev; ++ p->ipf_next = prev->ipf_next; ++ ((struct ipasfrag *)(prev->ipf_next))->ipf_prev = p; ++ prev->ipf_next = p; ++} ++ ++/* ++ * To ip_enq as remque is to insque. ++ */ ++static void ip_deq(register struct ipasfrag *p) ++{ ++ ((struct ipasfrag *)(p->ipf_prev))->ipf_next = p->ipf_next; ++ ((struct ipasfrag *)(p->ipf_next))->ipf_prev = p->ipf_prev; ++} ++ ++/* ++ * IP timer processing; ++ * if a timer expires on a reassembly ++ * queue, discard it. ++ */ ++void ip_slowtimo(Slirp *slirp) ++{ ++ struct qlink *l; ++ ++ DEBUG_CALL("ip_slowtimo"); ++ ++ l = slirp->ipq.ip_link.next; ++ ++ if (l == NULL) ++ return; ++ ++ while (l != &slirp->ipq.ip_link) { ++ struct ipq *fp = container_of(l, struct ipq, ip_link); ++ l = l->next; ++ if (--fp->ipq_ttl == 0) { ++ ip_freef(slirp, fp); ++ } ++ } ++} ++ ++/* ++ * Strip out IP options, at higher ++ * level protocol in the kernel. ++ * Second argument is buffer to which options ++ * will be moved, and return value is their length. ++ * (XXX) should be deleted; last arg currently ignored. ++ */ ++void ip_stripoptions(register struct mbuf *m, struct mbuf *mopt) ++{ ++ register int i; ++ struct ip *ip = mtod(m, struct ip *); ++ register char *opts; ++ int olen; ++ ++ olen = (ip->ip_hl << 2) - sizeof(struct ip); ++ opts = (char *)(ip + 1); ++ i = m->m_len - (sizeof(struct ip) + olen); ++ memmove(opts, opts + olen, (unsigned)i); ++ m->m_len -= olen; ++ ++ ip->ip_hl = sizeof(struct ip) >> 2; ++} +diff --git a/slirp/src/ip_output.c b/slirp/src/ip_output.c +new file mode 100644 +index 0000000000..4f62605915 +--- /dev/null ++++ b/slirp/src/ip_output.c +@@ -0,0 +1,171 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 ++ * ip_output.c,v 1.9 1994/11/16 10:17:10 jkh Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP are ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++/* Number of packets queued before we start sending ++ * (to prevent allocing too many mbufs) */ ++#define IF_THRESH 10 ++ ++/* ++ * IP output. The packet in mbuf chain m contains a skeletal IP ++ * header (with len, off, ttl, proto, tos, src, dst). ++ * The mbuf chain containing the packet will be freed. ++ * The mbuf opt, if present, will not be freed. ++ */ ++int ip_output(struct socket *so, struct mbuf *m0) ++{ ++ Slirp *slirp = m0->slirp; ++ M_DUP_DEBUG(slirp, m0, 0, 0); ++ ++ register struct ip *ip; ++ register struct mbuf *m = m0; ++ register int hlen = sizeof(struct ip); ++ int len, off, error = 0; ++ ++ DEBUG_CALL("ip_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m0 = %p", m0); ++ ++ ip = mtod(m, struct ip *); ++ /* ++ * Fill in IP header. ++ */ ++ ip->ip_v = IPVERSION; ++ ip->ip_off &= IP_DF; ++ ip->ip_id = htons(slirp->ip_id++); ++ ip->ip_hl = hlen >> 2; ++ ++ /* ++ * If small enough for interface, can just send directly. ++ */ ++ if ((uint16_t)ip->ip_len <= slirp->if_mtu) { ++ ip->ip_len = htons((uint16_t)ip->ip_len); ++ ip->ip_off = htons((uint16_t)ip->ip_off); ++ ip->ip_sum = 0; ++ ip->ip_sum = cksum(m, hlen); ++ ++ if_output(so, m); ++ goto done; ++ } ++ ++ /* ++ * Too large for interface; fragment if possible. ++ * Must be able to put at least 8 bytes per fragment. ++ */ ++ if (ip->ip_off & IP_DF) { ++ error = -1; ++ goto bad; ++ } ++ ++ len = (slirp->if_mtu - hlen) & ~7; /* ip databytes per packet */ ++ if (len < 8) { ++ error = -1; ++ goto bad; ++ } ++ ++ { ++ int mhlen, firstlen = len; ++ struct mbuf **mnext = &m->m_nextpkt; ++ ++ /* ++ * Loop through length of segment after first fragment, ++ * make new header and copy data of each part and link onto chain. ++ */ ++ m0 = m; ++ mhlen = sizeof(struct ip); ++ for (off = hlen + len; off < (uint16_t)ip->ip_len; off += len) { ++ register struct ip *mhip; ++ m = m_get(slirp); ++ if (m == NULL) { ++ error = -1; ++ goto sendorfree; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ mhip = mtod(m, struct ip *); ++ *mhip = *ip; ++ ++ m->m_len = mhlen; ++ mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF); ++ if (ip->ip_off & IP_MF) ++ mhip->ip_off |= IP_MF; ++ if (off + len >= (uint16_t)ip->ip_len) ++ len = (uint16_t)ip->ip_len - off; ++ else ++ mhip->ip_off |= IP_MF; ++ mhip->ip_len = htons((uint16_t)(len + mhlen)); ++ ++ if (m_copy(m, m0, off, len) < 0) { ++ error = -1; ++ goto sendorfree; ++ } ++ ++ mhip->ip_off = htons((uint16_t)mhip->ip_off); ++ mhip->ip_sum = 0; ++ mhip->ip_sum = cksum(m, mhlen); ++ *mnext = m; ++ mnext = &m->m_nextpkt; ++ } ++ /* ++ * Update first fragment by trimming what's been copied out ++ * and updating header, then send each fragment (in order). ++ */ ++ m = m0; ++ m_adj(m, hlen + firstlen - (uint16_t)ip->ip_len); ++ ip->ip_len = htons((uint16_t)m->m_len); ++ ip->ip_off = htons((uint16_t)(ip->ip_off | IP_MF)); ++ ip->ip_sum = 0; ++ ip->ip_sum = cksum(m, hlen); ++ sendorfree: ++ for (m = m0; m; m = m0) { ++ m0 = m->m_nextpkt; ++ m->m_nextpkt = NULL; ++ if (error == 0) ++ if_output(so, m); ++ else ++ m_free(m); ++ } ++ } ++ ++done: ++ return (error); ++ ++bad: ++ m_free(m0); ++ goto done; ++} +diff --git a/slirp/src/libslirp-version.h.in b/slirp/src/libslirp-version.h.in +new file mode 100644 +index 0000000000..faa6c85952 +--- /dev/null ++++ b/slirp/src/libslirp-version.h.in +@@ -0,0 +1,24 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef LIBSLIRP_VERSION_H_ ++#define LIBSLIRP_VERSION_H_ ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#define SLIRP_MAJOR_VERSION @SLIRP_MAJOR_VERSION@ ++#define SLIRP_MINOR_VERSION @SLIRP_MINOR_VERSION@ ++#define SLIRP_MICRO_VERSION @SLIRP_MICRO_VERSION@ ++#define SLIRP_VERSION_STRING @SLIRP_VERSION_STRING@ ++ ++#define SLIRP_CHECK_VERSION(major,minor,micro) \ ++ (SLIRP_MAJOR_VERSION > (major) || \ ++ (SLIRP_MAJOR_VERSION == (major) && SLIRP_MINOR_VERSION > (minor)) || \ ++ (SLIRP_MAJOR_VERSION == (major) && SLIRP_MINOR_VERSION == (minor) && \ ++ SLIRP_MICRO_VERSION >= (micro))) ++ ++#ifdef __cplusplus ++} /* extern "C" */ ++#endif ++ ++#endif /* LIBSLIRP_VERSION_H_ */ +diff --git a/slirp/src/libslirp.h b/slirp/src/libslirp.h +new file mode 100644 +index 0000000000..5760d53cea +--- /dev/null ++++ b/slirp/src/libslirp.h +@@ -0,0 +1,236 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef LIBSLIRP_H ++#define LIBSLIRP_H ++ ++#include ++#include ++#include ++ ++#ifdef _WIN32 ++#include ++#include ++#include ++#else ++#include ++#include ++#endif ++ ++#include "libslirp-version.h" ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* Opaque structure containing the slirp state */ ++typedef struct Slirp Slirp; ++ ++/* Flags passed to SlirpAddPollCb and to be returned by SlirpGetREventsCb. */ ++enum { ++ SLIRP_POLL_IN = 1 << 0, ++ SLIRP_POLL_OUT = 1 << 1, ++ SLIRP_POLL_PRI = 1 << 2, ++ SLIRP_POLL_ERR = 1 << 3, ++ SLIRP_POLL_HUP = 1 << 4, ++}; ++ ++typedef ssize_t (*SlirpReadCb)(void *buf, size_t len, void *opaque); ++typedef ssize_t (*SlirpWriteCb)(const void *buf, size_t len, void *opaque); ++typedef void (*SlirpTimerCb)(void *opaque); ++typedef int (*SlirpAddPollCb)(int fd, int events, void *opaque); ++typedef int (*SlirpGetREventsCb)(int idx, void *opaque); ++ ++/* ++ * Callbacks from slirp, to be set by the application. ++ * ++ * The opaque parameter is set to the opaque pointer given in the slirp_new / ++ * slirp_init call. ++ */ ++typedef struct SlirpCb { ++ /* ++ * Send an ethernet frame to the guest network. The opaque parameter is the ++ * one given to slirp_init(). If the guest is not ready to receive a frame, ++ * the function can just drop the data. TCP will then handle retransmissions ++ * at a lower pace. ++ * <0 reports an IO error. ++ */ ++ SlirpWriteCb send_packet; ++ /* Print a message for an error due to guest misbehavior. */ ++ void (*guest_error)(const char *msg, void *opaque); ++ /* Return the virtual clock value in nanoseconds */ ++ int64_t (*clock_get_ns)(void *opaque); ++ /* Create a new timer with the given callback and opaque data */ ++ void *(*timer_new)(SlirpTimerCb cb, void *cb_opaque, void *opaque); ++ /* Remove and free a timer */ ++ void (*timer_free)(void *timer, void *opaque); ++ /* Modify a timer to expire at @expire_time (ms) */ ++ void (*timer_mod)(void *timer, int64_t expire_time, void *opaque); ++ /* Register a fd for future polling */ ++ void (*register_poll_fd)(int fd, void *opaque); ++ /* Unregister a fd */ ++ void (*unregister_poll_fd)(int fd, void *opaque); ++ /* Kick the io-thread, to signal that new events may be processed */ ++ void (*notify)(void *opaque); ++} SlirpCb; ++ ++#define SLIRP_CONFIG_VERSION_MIN 1 ++#define SLIRP_CONFIG_VERSION_MAX 3 ++ ++typedef struct SlirpConfig { ++ /* Version must be provided */ ++ uint32_t version; ++ /* ++ * Fields introduced in SlirpConfig version 1 begin ++ */ ++ int restricted; ++ bool in_enabled; ++ struct in_addr vnetwork; ++ struct in_addr vnetmask; ++ struct in_addr vhost; ++ bool in6_enabled; ++ struct in6_addr vprefix_addr6; ++ uint8_t vprefix_len; ++ struct in6_addr vhost6; ++ const char *vhostname; ++ const char *tftp_server_name; ++ const char *tftp_path; ++ const char *bootfile; ++ struct in_addr vdhcp_start; ++ struct in_addr vnameserver; ++ struct in6_addr vnameserver6; ++ const char **vdnssearch; ++ const char *vdomainname; ++ /* Default: IF_MTU_DEFAULT */ ++ size_t if_mtu; ++ /* Default: IF_MRU_DEFAULT */ ++ size_t if_mru; ++ /* Prohibit connecting to 127.0.0.1:* */ ++ bool disable_host_loopback; ++ /* ++ * Enable emulation code (*warning*: this code isn't safe, it is not ++ * recommended to enable it) ++ */ ++ bool enable_emu; ++ /* ++ * Fields introduced in SlirpConfig version 2 begin ++ */ ++ struct sockaddr_in *outbound_addr; ++ struct sockaddr_in6 *outbound_addr6; ++ /* ++ * Fields introduced in SlirpConfig version 3 begin ++ */ ++ bool disable_dns; /* slirp will not redirect/serve any DNS packet */ ++} SlirpConfig; ++ ++/* Create a new instance of a slirp stack */ ++Slirp *slirp_new(const SlirpConfig *cfg, const SlirpCb *callbacks, ++ void *opaque); ++/* slirp_init is deprecated in favor of slirp_new */ ++Slirp *slirp_init(int restricted, bool in_enabled, struct in_addr vnetwork, ++ struct in_addr vnetmask, struct in_addr vhost, ++ bool in6_enabled, struct in6_addr vprefix_addr6, ++ uint8_t vprefix_len, struct in6_addr vhost6, ++ const char *vhostname, const char *tftp_server_name, ++ const char *tftp_path, const char *bootfile, ++ struct in_addr vdhcp_start, struct in_addr vnameserver, ++ struct in6_addr vnameserver6, const char **vdnssearch, ++ const char *vdomainname, const SlirpCb *callbacks, ++ void *opaque); ++/* Shut down an instance of a slirp stack */ ++void slirp_cleanup(Slirp *slirp); ++ ++/* This is called by the application when it is about to sleep through poll(). ++ * *timeout is set to the amount of virtual time (in ms) that the application intends to ++ * wait (UINT32_MAX if infinite). slirp_pollfds_fill updates it according to ++ * e.g. TCP timers, so the application knows it should sleep a smaller amount of ++ * time. slirp_pollfds_fill calls add_poll for each file descriptor ++ * that should be monitored along the sleep. The opaque pointer is passed as ++ * such to add_poll, and add_poll returns an index. */ ++void slirp_pollfds_fill(Slirp *slirp, uint32_t *timeout, ++ SlirpAddPollCb add_poll, void *opaque); ++ ++/* This is called by the application after sleeping, to report which file ++ * descriptors are available. slirp_pollfds_poll calls get_revents on each file ++ * descriptor, giving it the index that add_poll returned during the ++ * slirp_pollfds_fill call, to know whether the descriptor is available for ++ * read/write/etc. (SLIRP_POLL_*) ++ * select_error should be passed 1 if poll() returned an error. */ ++void slirp_pollfds_poll(Slirp *slirp, int select_error, ++ SlirpGetREventsCb get_revents, void *opaque); ++ ++/* This is called by the application when the guest emits a packet on the ++ * guest network, to be interpreted by slirp. */ ++void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len); ++ ++/* These set up / remove port forwarding between a host port in the real world ++ * and the guest network. */ ++int slirp_add_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, ++ int host_port, struct in_addr guest_addr, int guest_port); ++int slirp_remove_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, ++ int host_port); ++ ++#define SLIRP_HOSTFWD_UDP 1 ++#define SLIRP_HOSTFWD_V6ONLY 2 ++int slirp_add_hostxfwd(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *gaddr, socklen_t gaddrlen, ++ int flags); ++int slirp_remove_hostxfwd(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ int flags); ++ ++/* Set up port forwarding between a port in the guest network and a ++ * command running on the host */ ++int slirp_add_exec(Slirp *slirp, const char *cmdline, ++ struct in_addr *guest_addr, int guest_port); ++/* Set up port forwarding between a port in the guest network and a ++ * Unix port on the host */ ++int slirp_add_unix(Slirp *slirp, const char *unixsock, ++ struct in_addr *guest_addr, int guest_port); ++/* Set up port forwarding between a port in the guest network and a ++ * callback that will receive the data coming from the port */ ++int slirp_add_guestfwd(Slirp *slirp, SlirpWriteCb write_cb, void *opaque, ++ struct in_addr *guest_addr, int guest_port); ++ ++/* TODO: rather identify a guestfwd through an opaque pointer instead of through ++ * the guest_addr */ ++ ++/* This is called by the application for a guestfwd, to determine how much data ++ * can be received by the forwarded port through a call to slirp_socket_recv. */ ++size_t slirp_socket_can_recv(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port); ++/* This is called by the application for a guestfwd, to provide the data to be ++ * sent on the forwarded port */ ++void slirp_socket_recv(Slirp *slirp, struct in_addr guest_addr, int guest_port, ++ const uint8_t *buf, int size); ++ ++/* Remove entries added by slirp_add_exec, slirp_add_unix or slirp_add_guestfwd */ ++int slirp_remove_guestfwd(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port); ++ ++/* Return a human-readable state of the slirp stack */ ++char *slirp_connection_info(Slirp *slirp); ++ ++/* Return a human-readable state of the NDP/ARP tables */ ++char *slirp_neighbor_info(Slirp *slirp); ++ ++/* Save the slirp state through the write_cb. The opaque pointer is passed as ++ * such to the write_cb. */ ++void slirp_state_save(Slirp *s, SlirpWriteCb write_cb, void *opaque); ++ ++/* Returns the version of the slirp state, to be saved along the state */ ++int slirp_state_version(void); ++ ++/* Load the slirp state through the read_cb. The opaque pointer is passed as ++ * such to the read_cb. The version should be given as it was obtained from ++ * slirp_state_version when slirp_state_save was called. */ ++int slirp_state_load(Slirp *s, int version_id, SlirpReadCb read_cb, ++ void *opaque); ++ ++/* Return the version of the slirp implementation */ ++const char *slirp_version_string(void); ++ ++#ifdef __cplusplus ++} /* extern "C" */ ++#endif ++ ++#endif /* LIBSLIRP_H */ +diff --git a/slirp/src/libslirp.map b/slirp/src/libslirp.map +new file mode 100644 +index 0000000000..792b0a94ab +--- /dev/null ++++ b/slirp/src/libslirp.map +@@ -0,0 +1,36 @@ ++SLIRP_4.0 { ++global: ++ slirp_add_exec; ++ slirp_add_guestfwd; ++ slirp_add_hostfwd; ++ slirp_cleanup; ++ slirp_connection_info; ++ slirp_init; ++ slirp_input; ++ slirp_pollfds_fill; ++ slirp_pollfds_poll; ++ slirp_remove_hostfwd; ++ slirp_socket_can_recv; ++ slirp_socket_recv; ++ slirp_state_load; ++ slirp_state_save; ++ slirp_state_version; ++ slirp_version_string; ++local: ++ *; ++}; ++ ++SLIRP_4.1 { ++ slirp_new; ++} SLIRP_4.0; ++ ++SLIRP_4.2 { ++ slirp_add_unix; ++ slirp_remove_guestfwd; ++} SLIRP_4.1; ++ ++SLIRP_4.5 { ++ slirp_add_hostxfwd; ++ slirp_remove_hostxfwd; ++ slirp_neighbor_info; ++} SLIRP_4.2; +diff --git a/slirp/src/main.h b/slirp/src/main.h +new file mode 100644 +index 0000000000..3b3f883703 +--- /dev/null ++++ b/slirp/src/main.h +@@ -0,0 +1,16 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef SLIRP_MAIN_H ++#define SLIRP_MAIN_H ++ ++extern unsigned curtime; ++extern struct in_addr loopback_addr; ++extern unsigned long loopback_mask; ++ ++int if_encap(Slirp *slirp, struct mbuf *ifm); ++ssize_t slirp_send(struct socket *so, const void *buf, size_t len, int flags); ++ ++#endif +diff --git a/slirp/src/mbuf.c b/slirp/src/mbuf.c +new file mode 100644 +index 0000000000..36864a401f +--- /dev/null ++++ b/slirp/src/mbuf.c +@@ -0,0 +1,281 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski ++ */ ++ ++/* ++ * mbuf's in SLiRP are much simpler than the real mbufs in ++ * FreeBSD. They are fixed size, determined by the MTU, ++ * so that one whole packet can fit. Mbuf's cannot be ++ * chained together. If there's more data than the mbuf ++ * could hold, an external g_malloced buffer is pointed to ++ * by m_ext (and the data pointers) and M_EXT is set in ++ * the flags ++ */ ++ ++#include "slirp.h" ++ ++#define MBUF_THRESH 30 ++ ++/* ++ * Find a nice value for msize ++ */ ++#define SLIRP_MSIZE(mtu) \ ++ (offsetof(struct mbuf, m_dat) + IF_MAXLINKHDR + TCPIPHDR_DELTA + (mtu)) ++ ++void m_init(Slirp *slirp) ++{ ++ slirp->m_freelist.qh_link = slirp->m_freelist.qh_rlink = &slirp->m_freelist; ++ slirp->m_usedlist.qh_link = slirp->m_usedlist.qh_rlink = &slirp->m_usedlist; ++} ++ ++static void m_cleanup_list(struct quehead *list_head) ++{ ++ struct mbuf *m, *next; ++ ++ m = (struct mbuf *)list_head->qh_link; ++ while ((struct quehead *)m != list_head) { ++ next = m->m_next; ++ if (m->m_flags & M_EXT) { ++ g_free(m->m_ext); ++ } ++ g_free(m); ++ m = next; ++ } ++ list_head->qh_link = list_head; ++ list_head->qh_rlink = list_head; ++} ++ ++void m_cleanup(Slirp *slirp) ++{ ++ m_cleanup_list(&slirp->m_usedlist); ++ m_cleanup_list(&slirp->m_freelist); ++ m_cleanup_list(&slirp->if_batchq); ++ m_cleanup_list(&slirp->if_fastq); ++} ++ ++/* ++ * Get an mbuf from the free list, if there are none ++ * allocate one ++ * ++ * Because fragmentation can occur if we alloc new mbufs and ++ * free old mbufs, we mark all mbufs above mbuf_thresh as M_DOFREE, ++ * which tells m_free to actually g_free() it ++ */ ++struct mbuf *m_get(Slirp *slirp) ++{ ++ register struct mbuf *m; ++ int flags = 0; ++ ++ DEBUG_CALL("m_get"); ++ ++ if (MBUF_DEBUG || slirp->m_freelist.qh_link == &slirp->m_freelist) { ++ m = g_malloc(SLIRP_MSIZE(slirp->if_mtu)); ++ slirp->mbuf_alloced++; ++ if (MBUF_DEBUG || slirp->mbuf_alloced > MBUF_THRESH) ++ flags = M_DOFREE; ++ m->slirp = slirp; ++ } else { ++ m = (struct mbuf *)slirp->m_freelist.qh_link; ++ remque(m); ++ } ++ ++ /* Insert it in the used list */ ++ insque(m, &slirp->m_usedlist); ++ m->m_flags = (flags | M_USEDLIST); ++ ++ /* Initialise it */ ++ m->m_size = SLIRP_MSIZE(slirp->if_mtu) - offsetof(struct mbuf, m_dat); ++ m->m_data = m->m_dat; ++ m->m_len = 0; ++ m->m_nextpkt = NULL; ++ m->m_prevpkt = NULL; ++ m->resolution_requested = false; ++ m->expiration_date = (uint64_t)-1; ++ DEBUG_ARG("m = %p", m); ++ return m; ++} ++ ++void m_free(struct mbuf *m) ++{ ++ DEBUG_CALL("m_free"); ++ DEBUG_ARG("m = %p", m); ++ ++ if (m) { ++ /* Remove from m_usedlist */ ++ if (m->m_flags & M_USEDLIST) ++ remque(m); ++ ++ /* If it's M_EXT, free() it */ ++ if (m->m_flags & M_EXT) { ++ g_free(m->m_ext); ++ m->m_flags &= ~M_EXT; ++ } ++ /* ++ * Either free() it or put it on the free list ++ */ ++ if (m->m_flags & M_DOFREE) { ++ m->slirp->mbuf_alloced--; ++ g_free(m); ++ } else if ((m->m_flags & M_FREELIST) == 0) { ++ insque(m, &m->slirp->m_freelist); ++ m->m_flags = M_FREELIST; /* Clobber other flags */ ++ } ++ } /* if(m) */ ++} ++ ++/* ++ * Copy data from one mbuf to the end of ++ * the other.. if result is too big for one mbuf, allocate ++ * an M_EXT data segment ++ */ ++void m_cat(struct mbuf *m, struct mbuf *n) ++{ ++ /* ++ * If there's no room, realloc ++ */ ++ if (M_FREEROOM(m) < n->m_len) ++ m_inc(m, m->m_len + n->m_len); ++ ++ memcpy(m->m_data + m->m_len, n->m_data, n->m_len); ++ m->m_len += n->m_len; ++ ++ m_free(n); ++} ++ ++ ++/* make m 'size' bytes large from m_data */ ++void m_inc(struct mbuf *m, int size) ++{ ++ int gapsize; ++ ++ /* some compilers throw up on gotos. This one we can fake. */ ++ if (M_ROOM(m) > size) { ++ return; ++ } ++ ++ if (m->m_flags & M_EXT) { ++ gapsize = m->m_data - m->m_ext; ++ m->m_ext = g_realloc(m->m_ext, size + gapsize); ++ } else { ++ gapsize = m->m_data - m->m_dat; ++ m->m_ext = g_malloc(size + gapsize); ++ memcpy(m->m_ext, m->m_dat, m->m_size); ++ m->m_flags |= M_EXT; ++ } ++ ++ m->m_data = m->m_ext + gapsize; ++ m->m_size = size + gapsize; ++} ++ ++ ++void m_adj(struct mbuf *m, int len) ++{ ++ if (m == NULL) ++ return; ++ if (len >= 0) { ++ /* Trim from head */ ++ m->m_data += len; ++ m->m_len -= len; ++ } else { ++ /* Trim from tail */ ++ len = -len; ++ m->m_len -= len; ++ } ++} ++ ++ ++/* ++ * Copy len bytes from m, starting off bytes into n ++ */ ++int m_copy(struct mbuf *n, struct mbuf *m, int off, int len) ++{ ++ if (len > M_FREEROOM(n)) ++ return -1; ++ ++ memcpy((n->m_data + n->m_len), (m->m_data + off), len); ++ n->m_len += len; ++ return 0; ++} ++ ++ ++/* ++ * Given a pointer into an mbuf, return the mbuf ++ * XXX This is a kludge, I should eliminate the need for it ++ * Fortunately, it's not used often ++ */ ++struct mbuf *dtom(Slirp *slirp, void *dat) ++{ ++ struct mbuf *m; ++ ++ DEBUG_CALL("dtom"); ++ DEBUG_ARG("dat = %p", dat); ++ ++ /* bug corrected for M_EXT buffers */ ++ for (m = (struct mbuf *)slirp->m_usedlist.qh_link; ++ (struct quehead *)m != &slirp->m_usedlist; m = m->m_next) { ++ if (m->m_flags & M_EXT) { ++ if ((char *)dat >= m->m_ext && (char *)dat < (m->m_ext + m->m_size)) ++ return m; ++ } else { ++ if ((char *)dat >= m->m_dat && (char *)dat < (m->m_dat + m->m_size)) ++ return m; ++ } ++ } ++ ++ DEBUG_ERROR("dtom failed"); ++ ++ return (struct mbuf *)0; ++} ++ ++/* ++ * Duplicate the mbuf ++ * ++ * copy_header specifies whether the bytes before m_data should also be copied. ++ * header_size specifies how many bytes are to be reserved before m_data. ++ */ ++struct mbuf *m_dup(Slirp *slirp, struct mbuf *m, ++ bool copy_header, ++ size_t header_size) ++{ ++ struct mbuf *n; ++ int mcopy_result; ++ ++ /* The previous mbuf was supposed to have it already, we can check it along ++ * the way */ ++ assert(M_ROOMBEFORE(m) >= header_size); ++ ++ n = m_get(slirp); ++ m_inc(n, m->m_len + header_size); ++ ++ if (copy_header) { ++ m->m_len += header_size; ++ m->m_data -= header_size; ++ mcopy_result = m_copy(n, m, 0, m->m_len + header_size); ++ n->m_data += header_size; ++ m->m_len -= header_size; ++ m->m_data += header_size; ++ } else { ++ n->m_data += header_size; ++ mcopy_result = m_copy(n, m, 0, m->m_len); ++ } ++ g_assert(mcopy_result == 0); ++ ++ return n; ++} ++ ++void *mtod_check(struct mbuf *m, size_t len) ++{ ++ if (m->m_len >= len) { ++ return m->m_data; ++ } ++ ++ DEBUG_ERROR("mtod failed"); ++ ++ return NULL; ++} ++ ++void *m_end(struct mbuf *m) ++{ ++ return m->m_data + m->m_len; ++} +diff --git a/slirp/src/mbuf.h b/slirp/src/mbuf.h +new file mode 100644 +index 0000000000..34e697a914 +--- /dev/null ++++ b/slirp/src/mbuf.h +@@ -0,0 +1,192 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)mbuf.h 8.3 (Berkeley) 1/21/94 ++ * mbuf.h,v 1.9 1994/11/14 13:54:20 bde Exp ++ */ ++ ++#ifndef MBUF_H ++#define MBUF_H ++ ++/* ++ * Macros for type conversion ++ * mtod(m,t) - convert mbuf pointer to data pointer of correct type ++ */ ++#define mtod(m, t) ((t)(m)->m_data) ++ ++/* XXX About mbufs for slirp: ++ * Only one mbuf is ever used in a chain, for each "cell" of data. ++ * m_nextpkt points to the next packet, if fragmented. ++ * If the data is too large, the M_EXT is used, and a larger block ++ * is alloced. Therefore, m_free[m] must check for M_EXT and if set ++ * free the m_ext. This is inefficient memory-wise, but who cares. ++ */ ++ ++/* ++ * mbufs allow to have a gap between the start of the allocated buffer (m_ext if ++ * M_EXT is set, m_dat otherwise) and the in-use data: ++ * ++ * |--gapsize----->|---m_len-------> ++ * |----------m_size------------------------------> ++ * |----M_ROOM--------------------> ++ * |-M_FREEROOM--> ++ * ++ * ^ ^ ^ ++ * m_dat/m_ext m_data end of buffer ++ */ ++ ++/* ++ * How much room is in the mbuf, from m_data to the end of the mbuf ++ */ ++#define M_ROOM(m) \ ++ ((m->m_flags & M_EXT) ? (((m)->m_ext + (m)->m_size) - (m)->m_data) : \ ++ (((m)->m_dat + (m)->m_size) - (m)->m_data)) ++ ++/* ++ * How much free room there is ++ */ ++#define M_FREEROOM(m) (M_ROOM(m) - (m)->m_len) ++ ++/* ++ * How much free room there is before m_data ++ */ ++#define M_ROOMBEFORE(m) \ ++ (((m)->m_flags & M_EXT) ? (m)->m_data - (m)->m_ext \ ++ : (m)->m_data - (m)->m_dat) ++ ++struct mbuf { ++ /* XXX should union some of these! */ ++ /* header at beginning of each mbuf: */ ++ struct mbuf *m_next; /* Linked list of mbufs */ ++ struct mbuf *m_prev; ++ struct mbuf *m_nextpkt; /* Next packet in queue/record */ ++ struct mbuf *m_prevpkt; /* Flags aren't used in the output queue */ ++ int m_flags; /* Misc flags */ ++ ++ int m_size; /* Size of mbuf, from m_dat or m_ext */ ++ struct socket *m_so; ++ ++ char *m_data; /* Current location of data */ ++ int m_len; /* Amount of data in this mbuf, from m_data */ ++ ++ Slirp *slirp; ++ bool resolution_requested; ++ uint64_t expiration_date; ++ char *m_ext; ++ /* start of dynamic buffer area, must be last element */ ++ char m_dat[]; ++}; ++ ++#define ifq_prev m_prev ++#define ifq_next m_next ++#define ifs_prev m_prevpkt ++#define ifs_next m_nextpkt ++#define ifq_so m_so ++ ++#define M_EXT 0x01 /* m_ext points to more (malloced) data */ ++#define M_FREELIST 0x02 /* mbuf is on free list */ ++#define M_USEDLIST 0x04 /* XXX mbuf is on used list (for dtom()) */ ++#define M_DOFREE \ ++ 0x08 /* when m_free is called on the mbuf, free() \ ++ * it rather than putting it on the free list */ ++ ++void m_init(Slirp *); ++void m_cleanup(Slirp *slirp); ++struct mbuf *m_get(Slirp *); ++void m_free(struct mbuf *); ++void m_cat(register struct mbuf *, register struct mbuf *); ++void m_inc(struct mbuf *, int); ++void m_adj(struct mbuf *, int); ++int m_copy(struct mbuf *, struct mbuf *, int, int); ++struct mbuf *m_dup(Slirp *slirp, struct mbuf *m, bool copy_header, size_t header_size); ++struct mbuf *dtom(Slirp *, void *); ++void *mtod_check(struct mbuf *, size_t len); ++void *m_end(struct mbuf *); ++ ++static inline void ifs_init(struct mbuf *ifm) ++{ ++ ifm->ifs_next = ifm->ifs_prev = ifm; ++} ++ ++#ifdef DEBUG ++# define MBUF_DEBUG 1 ++#else ++# ifdef HAVE_VALGRIND ++# include ++# define MBUF_DEBUG RUNNING_ON_VALGRIND ++# else ++# define MBUF_DEBUG 0 ++# endif ++#endif ++ ++/* ++ * When a function is given an mbuf as well as the responsibility to free it, we ++ * want valgrind etc. to properly identify the new responsible for the ++ * free. Achieve this by making a new copy. For instance: ++ * ++ * f0(void) { ++ * struct mbuf *m = m_get(slirp); ++ * [...] ++ * switch (something) { ++ * case 1: ++ * f1(m); ++ * break; ++ * case 2: ++ * f2(m); ++ * break; ++ * [...] ++ * } ++ * } ++ * ++ * f1(struct mbuf *m) { ++ * M_DUP_DEBUG(m->slirp, m); ++ * [...] ++ * m_free(m); // but author of f1 might be forgetting this ++ * } ++ * ++ * f0 transfers the freeing responsibility to f1, f2, etc. Without the ++ * M_DUP_DEBUG call in f1, valgrind would tell us that it is f0 where the buffer ++ * was allocated, but it's difficult to know whether a leak is actually in f0, ++ * or in f1, or in f2, etc. Duplicating the mbuf in M_DUP_DEBUG each time the ++ * responsibility is transferred allows to immediately know where the leak ++ * actually is. ++ */ ++#define M_DUP_DEBUG(slirp, m, copy_header, header_size) do { \ ++ if (MBUF_DEBUG) { \ ++ struct mbuf *__n; \ ++ __n = m_dup((slirp), (m), (copy_header), (header_size)); \ ++ m_free(m); \ ++ (m) = __n; \ ++ } else { \ ++ (void) (slirp); (void) (copy_header); \ ++ g_assert(M_ROOMBEFORE(m) >= (header_size)); \ ++ } \ ++} while(0) ++ ++#endif +diff --git a/slirp/src/misc.c b/slirp/src/misc.c +new file mode 100644 +index 0000000000..48f180be43 +--- /dev/null ++++ b/slirp/src/misc.c +@@ -0,0 +1,440 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#ifdef G_OS_UNIX ++#include ++#endif ++ ++inline void insque(void *a, void *b) ++{ ++ register struct quehead *element = (struct quehead *)a; ++ register struct quehead *head = (struct quehead *)b; ++ element->qh_link = head->qh_link; ++ head->qh_link = (struct quehead *)element; ++ element->qh_rlink = (struct quehead *)head; ++ ((struct quehead *)(element->qh_link))->qh_rlink = ++ (struct quehead *)element; ++} ++ ++inline void remque(void *a) ++{ ++ register struct quehead *element = (struct quehead *)a; ++ ((struct quehead *)(element->qh_link))->qh_rlink = element->qh_rlink; ++ ((struct quehead *)(element->qh_rlink))->qh_link = element->qh_link; ++ element->qh_rlink = NULL; ++} ++ ++/* TODO: IPv6 */ ++struct gfwd_list *add_guestfwd(struct gfwd_list **ex_ptr, SlirpWriteCb write_cb, ++ void *opaque, struct in_addr addr, int port) ++{ ++ struct gfwd_list *f = g_new0(struct gfwd_list, 1); ++ ++ f->write_cb = write_cb; ++ f->opaque = opaque; ++ f->ex_fport = port; ++ f->ex_addr = addr; ++ f->ex_next = *ex_ptr; ++ *ex_ptr = f; ++ ++ return f; ++} ++ ++struct gfwd_list *add_exec(struct gfwd_list **ex_ptr, const char *cmdline, ++ struct in_addr addr, int port) ++{ ++ struct gfwd_list *f = add_guestfwd(ex_ptr, NULL, NULL, addr, port); ++ ++ f->ex_exec = g_strdup(cmdline); ++ ++ return f; ++} ++ ++struct gfwd_list *add_unix(struct gfwd_list **ex_ptr, const char *unixsock, ++ struct in_addr addr, int port) ++{ ++ struct gfwd_list *f = add_guestfwd(ex_ptr, NULL, NULL, addr, port); ++ ++ f->ex_unix = g_strdup(unixsock); ++ ++ return f; ++} ++ ++int remove_guestfwd(struct gfwd_list **ex_ptr, struct in_addr addr, int port) ++{ ++ for (; *ex_ptr != NULL; ex_ptr = &((*ex_ptr)->ex_next)) { ++ struct gfwd_list *f = *ex_ptr; ++ if (f->ex_addr.s_addr == addr.s_addr && f->ex_fport == port) { ++ *ex_ptr = f->ex_next; ++ g_free(f->ex_exec); ++ g_free(f); ++ return 0; ++ } ++ } ++ return -1; ++} ++ ++static int slirp_socketpair_with_oob(int sv[2]) ++{ ++ struct sockaddr_in addr = { ++ .sin_family = AF_INET, ++ .sin_port = 0, ++ .sin_addr.s_addr = INADDR_ANY, ++ }; ++ socklen_t addrlen = sizeof(addr); ++ int ret, s; ++ ++ sv[1] = -1; ++ s = slirp_socket(AF_INET, SOCK_STREAM, 0); ++ if (s < 0 || bind(s, (struct sockaddr *)&addr, addrlen) < 0 || ++ listen(s, 1) < 0 || ++ getsockname(s, (struct sockaddr *)&addr, &addrlen) < 0) { ++ goto err; ++ } ++ ++ sv[1] = slirp_socket(AF_INET, SOCK_STREAM, 0); ++ if (sv[1] < 0) { ++ goto err; ++ } ++ /* ++ * This connect won't block because we've already listen()ed on ++ * the server end (even though we won't accept() the connection ++ * until later on). ++ */ ++ do { ++ ret = connect(sv[1], (struct sockaddr *)&addr, addrlen); ++ } while (ret < 0 && errno == EINTR); ++ if (ret < 0) { ++ goto err; ++ } ++ ++ do { ++ sv[0] = accept(s, (struct sockaddr *)&addr, &addrlen); ++ } while (sv[0] < 0 && errno == EINTR); ++ if (sv[0] < 0) { ++ goto err; ++ } ++ ++ closesocket(s); ++ return 0; ++ ++err: ++ g_critical("slirp_socketpair(): %s", strerror(errno)); ++ if (s >= 0) { ++ closesocket(s); ++ } ++ if (sv[1] >= 0) { ++ closesocket(sv[1]); ++ } ++ return -1; ++} ++ ++static void fork_exec_child_setup(gpointer data) ++{ ++#ifndef _WIN32 ++ setsid(); ++ ++ /* Unblock all signals and leave our exec()-ee to block what it wants */ ++ sigset_t ss; ++ sigemptyset(&ss); ++ sigprocmask(SIG_SETMASK, &ss, NULL); ++ ++ /* POSIX is obnoxious about SIGCHLD specifically across exec() */ ++ signal(SIGCHLD, SIG_DFL); ++#endif ++} ++ ++#pragma GCC diagnostic push ++#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ++ ++#if !GLIB_CHECK_VERSION(2, 58, 0) ++typedef struct SlirpGSpawnFds { ++ GSpawnChildSetupFunc child_setup; ++ gpointer user_data; ++ gint stdin_fd; ++ gint stdout_fd; ++ gint stderr_fd; ++} SlirpGSpawnFds; ++ ++static inline void slirp_gspawn_fds_setup(gpointer user_data) ++{ ++ SlirpGSpawnFds *q = (SlirpGSpawnFds *)user_data; ++ ++ dup2(q->stdin_fd, 0); ++ dup2(q->stdout_fd, 1); ++ dup2(q->stderr_fd, 2); ++ q->child_setup(q->user_data); ++} ++#endif ++ ++static inline gboolean ++g_spawn_async_with_fds_slirp(const gchar *working_directory, gchar **argv, ++ gchar **envp, GSpawnFlags flags, ++ GSpawnChildSetupFunc child_setup, ++ gpointer user_data, GPid *child_pid, gint stdin_fd, ++ gint stdout_fd, gint stderr_fd, GError **error) ++{ ++#if GLIB_CHECK_VERSION(2, 58, 0) ++ return g_spawn_async_with_fds(working_directory, argv, envp, flags, ++ child_setup, user_data, child_pid, stdin_fd, ++ stdout_fd, stderr_fd, error); ++#else ++ SlirpGSpawnFds setup = { ++ .child_setup = child_setup, ++ .user_data = user_data, ++ .stdin_fd = stdin_fd, ++ .stdout_fd = stdout_fd, ++ .stderr_fd = stderr_fd, ++ }; ++ ++ return g_spawn_async(working_directory, argv, envp, flags, ++ slirp_gspawn_fds_setup, &setup, child_pid, error); ++#endif ++} ++ ++#define g_spawn_async_with_fds(wd, argv, env, f, c, d, p, ifd, ofd, efd, err) \ ++ g_spawn_async_with_fds_slirp(wd, argv, env, f, c, d, p, ifd, ofd, efd, err) ++ ++#pragma GCC diagnostic pop ++ ++int fork_exec(struct socket *so, const char *ex) ++{ ++ GError *err = NULL; ++ gint argc = 0; ++ gchar **argv = NULL; ++ int opt, sp[2]; ++ ++ DEBUG_CALL("fork_exec"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("ex = %p", ex); ++ ++ if (slirp_socketpair_with_oob(sp) < 0) { ++ return 0; ++ } ++ ++ if (!g_shell_parse_argv(ex, &argc, &argv, &err)) { ++ g_critical("fork_exec invalid command: %s\nerror: %s", ex, err->message); ++ g_error_free(err); ++ return 0; ++ } ++ ++ g_spawn_async_with_fds(NULL /* cwd */, argv, NULL /* env */, ++ G_SPAWN_SEARCH_PATH, fork_exec_child_setup, ++ NULL /* data */, NULL /* child_pid */, sp[1], sp[1], ++ sp[1], &err); ++ g_strfreev(argv); ++ ++ if (err) { ++ g_critical("fork_exec: %s", err->message); ++ g_error_free(err); ++ closesocket(sp[0]); ++ closesocket(sp[1]); ++ return 0; ++ } ++ ++ so->s = sp[0]; ++ closesocket(sp[1]); ++ slirp_socket_set_fast_reuse(so->s); ++ opt = 1; ++ setsockopt(so->s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); ++ slirp_set_nonblock(so->s); ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ return 1; ++} ++ ++int open_unix(struct socket *so, const char *unixpath) ++{ ++#ifdef G_OS_UNIX ++ struct sockaddr_un sa; ++ int s; ++ ++ DEBUG_CALL("open_unix"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("unixpath = %s", unixpath); ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.sun_family = AF_UNIX; ++ if (g_strlcpy(sa.sun_path, unixpath, sizeof(sa.sun_path)) >= sizeof(sa.sun_path)) { ++ g_critical("Bad unix path: %s", unixpath); ++ return 0; ++ } ++ ++ s = slirp_socket(PF_UNIX, SOCK_STREAM, 0); ++ if (s < 0) { ++ g_critical("open_unix(): %s", strerror(errno)); ++ return 0; ++ } ++ ++ if (connect(s, (struct sockaddr *)&sa, sizeof(sa)) < 0) { ++ g_critical("open_unix(): %s", strerror(errno)); ++ closesocket(s); ++ return 0; ++ } ++ ++ so->s = s; ++ slirp_set_nonblock(so->s); ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ ++ return 1; ++#else ++ g_assert_not_reached(); ++#endif ++} ++ ++char *slirp_connection_info(Slirp *slirp) ++{ ++ GString *str = g_string_new(NULL); ++ const char *const tcpstates[] = { ++ [TCPS_CLOSED] = "CLOSED", [TCPS_LISTEN] = "LISTEN", ++ [TCPS_SYN_SENT] = "SYN_SENT", [TCPS_SYN_RECEIVED] = "SYN_RCVD", ++ [TCPS_ESTABLISHED] = "ESTABLISHED", [TCPS_CLOSE_WAIT] = "CLOSE_WAIT", ++ [TCPS_FIN_WAIT_1] = "FIN_WAIT_1", [TCPS_CLOSING] = "CLOSING", ++ [TCPS_LAST_ACK] = "LAST_ACK", [TCPS_FIN_WAIT_2] = "FIN_WAIT_2", ++ [TCPS_TIME_WAIT] = "TIME_WAIT", ++ }; ++ struct in_addr dst_addr; ++ struct sockaddr_in src; ++ socklen_t src_len; ++ uint16_t dst_port; ++ struct socket *so; ++ const char *state; ++ char buf[20]; ++ ++ g_string_append_printf(str, ++ " Protocol[State] FD Source Address Port " ++ "Dest. Address Port RecvQ SendQ\n"); ++ ++ /* TODO: IPv6 */ ++ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so->so_next) { ++ if (so->so_state & SS_HOSTFWD) { ++ state = "HOST_FORWARD"; ++ } else if (so->so_tcpcb) { ++ state = tcpstates[so->so_tcpcb->t_state]; ++ } else { ++ state = "NONE"; ++ } ++ if (so->so_state & (SS_HOSTFWD | SS_INCOMING)) { ++ src_len = sizeof(src); ++ getsockname(so->s, (struct sockaddr *)&src, &src_len); ++ dst_addr = so->so_laddr; ++ dst_port = so->so_lport; ++ } else { ++ src.sin_addr = so->so_laddr; ++ src.sin_port = so->so_lport; ++ dst_addr = so->so_faddr; ++ dst_port = so->so_fport; ++ } ++ slirp_fmt0(buf, sizeof(buf), " TCP[%s]", state); ++ g_string_append_printf(str, "%-19s %3d %15s %5d ", buf, so->s, ++ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : ++ "*", ++ ntohs(src.sin_port)); ++ g_string_append_printf(str, "%15s %5d %5d %5d\n", inet_ntoa(dst_addr), ++ ntohs(dst_port), so->so_rcv.sb_cc, ++ so->so_snd.sb_cc); ++ } ++ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so->so_next) { ++ if (so->so_state & SS_HOSTFWD) { ++ slirp_fmt0(buf, sizeof(buf), " UDP[HOST_FORWARD]"); ++ src_len = sizeof(src); ++ getsockname(so->s, (struct sockaddr *)&src, &src_len); ++ dst_addr = so->so_laddr; ++ dst_port = so->so_lport; ++ } else { ++ slirp_fmt0(buf, sizeof(buf), " UDP[%d sec]", ++ (so->so_expire - curtime) / 1000); ++ src.sin_addr = so->so_laddr; ++ src.sin_port = so->so_lport; ++ dst_addr = so->so_faddr; ++ dst_port = so->so_fport; ++ } ++ g_string_append_printf(str, "%-19s %3d %15s %5d ", buf, so->s, ++ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : ++ "*", ++ ntohs(src.sin_port)); ++ g_string_append_printf(str, "%15s %5d %5d %5d\n", inet_ntoa(dst_addr), ++ ntohs(dst_port), so->so_rcv.sb_cc, ++ so->so_snd.sb_cc); ++ } ++ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so->so_next) { ++ slirp_fmt0(buf, sizeof(buf), " ICMP[%d sec]", ++ (so->so_expire - curtime) / 1000); ++ src.sin_addr = so->so_laddr; ++ dst_addr = so->so_faddr; ++ g_string_append_printf(str, "%-19s %3d %15s - ", buf, so->s, ++ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : ++ "*"); ++ g_string_append_printf(str, "%15s - %5d %5d\n", inet_ntoa(dst_addr), ++ so->so_rcv.sb_cc, so->so_snd.sb_cc); ++ } ++ ++ return g_string_free(str, FALSE); ++} ++ ++char *slirp_neighbor_info(Slirp *slirp) ++{ ++ GString *str = g_string_new(NULL); ++ ArpTable *arp_table = &slirp->arp_table; ++ NdpTable *ndp_table = &slirp->ndp_table; ++ char ip_addr[INET6_ADDRSTRLEN]; ++ char eth_addr[ETH_ADDRSTRLEN]; ++ const char *ip; ++ ++ g_string_append_printf(str, " %5s %-17s %s\n", ++ "Table", "MacAddr", "IP Address"); ++ ++ for (int i = 0; i < ARP_TABLE_SIZE; ++i) { ++ struct in_addr addr; ++ addr.s_addr = arp_table->table[i].ar_sip; ++ if (!addr.s_addr) { ++ continue; ++ } ++ ip = inet_ntop(AF_INET, &addr, ip_addr, sizeof(ip_addr)); ++ g_assert(ip != NULL); ++ g_string_append_printf(str, " %5s %-17s %s\n", "ARP", ++ slirp_ether_ntoa(arp_table->table[i].ar_sha, ++ eth_addr, sizeof(eth_addr)), ++ ip); ++ } ++ ++ for (int i = 0; i < NDP_TABLE_SIZE; ++i) { ++ if (in6_zero(&ndp_table->table[i].ip_addr)) { ++ continue; ++ } ++ ip = inet_ntop(AF_INET6, &ndp_table->table[i].ip_addr, ip_addr, ++ sizeof(ip_addr)); ++ g_assert(ip != NULL); ++ g_string_append_printf(str, " %5s %-17s %s\n", "NDP", ++ slirp_ether_ntoa(ndp_table->table[i].eth_addr, ++ eth_addr, sizeof(eth_addr)), ++ ip); ++ } ++ ++ return g_string_free(str, FALSE); ++} ++ ++int slirp_bind_outbound(struct socket *so, unsigned short af) ++{ ++ int ret = 0; ++ struct sockaddr *addr = NULL; ++ int addr_size = 0; ++ ++ if (af == AF_INET && so->slirp->outbound_addr != NULL) { ++ addr = (struct sockaddr *)so->slirp->outbound_addr; ++ addr_size = sizeof(struct sockaddr_in); ++ } else if (af == AF_INET6 && so->slirp->outbound_addr6 != NULL) { ++ addr = (struct sockaddr *)so->slirp->outbound_addr6; ++ addr_size = sizeof(struct sockaddr_in6); ++ } ++ ++ if (addr != NULL) { ++ ret = bind(so->s, addr, addr_size); ++ } ++ return ret; ++} +diff --git a/slirp/src/misc.h b/slirp/src/misc.h +new file mode 100644 +index 0000000000..81b370cfb1 +--- /dev/null ++++ b/slirp/src/misc.h +@@ -0,0 +1,72 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef MISC_H ++#define MISC_H ++ ++#include "libslirp.h" ++ ++struct gfwd_list { ++ SlirpWriteCb write_cb; ++ void *opaque; ++ struct in_addr ex_addr; /* Server address */ ++ int ex_fport; /* Port to telnet to */ ++ char *ex_exec; /* Command line of what to exec */ ++ char *ex_unix; /* unix socket */ ++ struct gfwd_list *ex_next; ++}; ++ ++#define EMU_NONE 0x0 ++ ++/* TCP emulations */ ++#define EMU_CTL 0x1 ++#define EMU_FTP 0x2 ++#define EMU_KSH 0x3 ++#define EMU_IRC 0x4 ++#define EMU_REALAUDIO 0x5 ++#define EMU_RLOGIN 0x6 ++#define EMU_IDENT 0x7 ++ ++#define EMU_NOCONNECT 0x10 /* Don't connect */ ++ ++struct tos_t { ++ uint16_t lport; ++ uint16_t fport; ++ uint8_t tos; ++ uint8_t emu; ++}; ++ ++struct emu_t { ++ uint16_t lport; ++ uint16_t fport; ++ uint8_t tos; ++ uint8_t emu; ++ struct emu_t *next; ++}; ++ ++struct slirp_quehead { ++ struct slirp_quehead *qh_link; ++ struct slirp_quehead *qh_rlink; ++}; ++ ++void slirp_insque(void *, void *); ++void slirp_remque(void *); ++int fork_exec(struct socket *so, const char *ex); ++int open_unix(struct socket *so, const char *unixsock); ++ ++struct gfwd_list *add_guestfwd(struct gfwd_list **ex_ptr, SlirpWriteCb write_cb, ++ void *opaque, struct in_addr addr, int port); ++ ++struct gfwd_list *add_exec(struct gfwd_list **ex_ptr, const char *cmdline, ++ struct in_addr addr, int port); ++ ++struct gfwd_list *add_unix(struct gfwd_list **ex_ptr, const char *unixsock, ++ struct in_addr addr, int port); ++ ++int remove_guestfwd(struct gfwd_list **ex_ptr, struct in_addr addr, int port); ++ ++int slirp_bind_outbound(struct socket *so, unsigned short af); ++ ++#endif +diff --git a/slirp/src/ncsi-pkt.h b/slirp/src/ncsi-pkt.h +new file mode 100644 +index 0000000000..39cf8446d6 +--- /dev/null ++++ b/slirp/src/ncsi-pkt.h +@@ -0,0 +1,445 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright Gavin Shan, IBM Corporation 2016. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#ifndef NCSI_PKT_H ++#define NCSI_PKT_H ++ ++/* from linux/net/ncsi/ncsi-pkt.h */ ++#define __be32 uint32_t ++#define __be16 uint16_t ++ ++struct ncsi_pkt_hdr { ++ unsigned char mc_id; /* Management controller ID */ ++ unsigned char revision; /* NCSI version - 0x01 */ ++ unsigned char reserved; /* Reserved */ ++ unsigned char id; /* Packet sequence number */ ++ unsigned char type; /* Packet type */ ++ unsigned char channel; /* Network controller ID */ ++ __be16 length; /* Payload length */ ++ __be32 reserved1[2]; /* Reserved */ ++} SLIRP_PACKED; ++ ++struct ncsi_cmd_pkt_hdr { ++ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ ++} SLIRP_PACKED; ++ ++struct ncsi_rsp_pkt_hdr { ++ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ ++ __be16 code; /* Response code */ ++ __be16 reason; /* Response reason */ ++} SLIRP_PACKED; ++ ++struct ncsi_aen_pkt_hdr { ++ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ ++ unsigned char reserved2[3]; /* Reserved */ ++ unsigned char type; /* AEN packet type */ ++} SLIRP_PACKED; ++ ++/* NCSI common command packet */ ++struct ncsi_cmd_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[26]; ++} SLIRP_PACKED; ++ ++struct ncsi_rsp_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Select Package */ ++struct ncsi_cmd_sp_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char hw_arbitration; /* HW arbitration */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Disable Channel */ ++struct ncsi_cmd_dc_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char ald; /* Allow link down */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Reset Channel */ ++struct ncsi_cmd_rc_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 reserved; /* Reserved */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* AEN Enable */ ++struct ncsi_cmd_ae_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char mc_id; /* MC ID */ ++ __be32 mode; /* AEN working mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++} SLIRP_PACKED; ++ ++/* Set Link */ ++struct ncsi_cmd_sl_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 mode; /* Link working mode */ ++ __be32 oem_mode; /* OEM link mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++} SLIRP_PACKED; ++ ++/* Set VLAN Filter */ ++struct ncsi_cmd_svf_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be16 reserved; /* Reserved */ ++ __be16 vlan; /* VLAN ID */ ++ __be16 reserved1; /* Reserved */ ++ unsigned char index; /* VLAN table index */ ++ unsigned char enable; /* Enable or disable */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[14]; ++} SLIRP_PACKED; ++ ++/* Enable VLAN */ ++struct ncsi_cmd_ev_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char mode; /* VLAN filter mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Set MAC Address */ ++struct ncsi_cmd_sma_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char mac[6]; /* MAC address */ ++ unsigned char index; /* MAC table index */ ++ unsigned char at_e; /* Addr type and operation */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++} SLIRP_PACKED; ++ ++/* Enable Broadcast Filter */ ++struct ncsi_cmd_ebf_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 mode; /* Filter mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Enable Global Multicast Filter */ ++struct ncsi_cmd_egmf_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 mode; /* Global MC mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Set NCSI Flow Control */ ++struct ncsi_cmd_snfc_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char mode; /* Flow control mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Get Link Status */ ++struct ncsi_rsp_gls_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 status; /* Link status */ ++ __be32 other; /* Other indications */ ++ __be32 oem_status; /* OEM link status */ ++ __be32 checksum; ++ unsigned char pad[10]; ++} SLIRP_PACKED; ++ ++/* Get Version ID */ ++struct ncsi_rsp_gvi_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 ncsi_version; /* NCSI version */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char alpha2; /* NCSI version */ ++ unsigned char fw_name[12]; /* f/w name string */ ++ __be32 fw_version; /* f/w version */ ++ __be16 pci_ids[4]; /* PCI IDs */ ++ __be32 mf_id; /* Manufacture ID */ ++ __be32 checksum; ++} SLIRP_PACKED; ++ ++/* Get Capabilities */ ++struct ncsi_rsp_gc_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 cap; /* Capabilities */ ++ __be32 bc_cap; /* Broadcast cap */ ++ __be32 mc_cap; /* Multicast cap */ ++ __be32 buf_cap; /* Buffering cap */ ++ __be32 aen_cap; /* AEN cap */ ++ unsigned char vlan_cnt; /* VLAN filter count */ ++ unsigned char mixed_cnt; /* Mix filter count */ ++ unsigned char mc_cnt; /* MC filter count */ ++ unsigned char uc_cnt; /* UC filter count */ ++ unsigned char reserved[2]; /* Reserved */ ++ unsigned char vlan_mode; /* VLAN mode */ ++ unsigned char channel_cnt; /* Channel count */ ++ __be32 checksum; /* Checksum */ ++} SLIRP_PACKED; ++ ++/* Get Parameters */ ++struct ncsi_rsp_gp_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ unsigned char mac_cnt; /* Number of MAC addr */ ++ unsigned char reserved[2]; /* Reserved */ ++ unsigned char mac_enable; /* MAC addr enable flags */ ++ unsigned char vlan_cnt; /* VLAN tag count */ ++ unsigned char reserved1; /* Reserved */ ++ __be16 vlan_enable; /* VLAN tag enable flags */ ++ __be32 link_mode; /* Link setting */ ++ __be32 bc_mode; /* BC filter mode */ ++ __be32 valid_modes; /* Valid mode parameters */ ++ unsigned char vlan_mode; /* VLAN mode */ ++ unsigned char fc_mode; /* Flow control mode */ ++ unsigned char reserved2[2]; /* Reserved */ ++ __be32 aen_mode; /* AEN mode */ ++ unsigned char mac[6]; /* Supported MAC addr */ ++ __be16 vlan; /* Supported VLAN tags */ ++ __be32 checksum; /* Checksum */ ++} SLIRP_PACKED; ++ ++/* Get Controller Packet Statistics */ ++struct ncsi_rsp_gcps_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 cnt_hi; /* Counter cleared */ ++ __be32 cnt_lo; /* Counter cleared */ ++ __be32 rx_bytes; /* Rx bytes */ ++ __be32 tx_bytes; /* Tx bytes */ ++ __be32 rx_uc_pkts; /* Rx UC packets */ ++ __be32 rx_mc_pkts; /* Rx MC packets */ ++ __be32 rx_bc_pkts; /* Rx BC packets */ ++ __be32 tx_uc_pkts; /* Tx UC packets */ ++ __be32 tx_mc_pkts; /* Tx MC packets */ ++ __be32 tx_bc_pkts; /* Tx BC packets */ ++ __be32 fcs_err; /* FCS errors */ ++ __be32 align_err; /* Alignment errors */ ++ __be32 false_carrier; /* False carrier detection */ ++ __be32 runt_pkts; /* Rx runt packets */ ++ __be32 jabber_pkts; /* Rx jabber packets */ ++ __be32 rx_pause_xon; /* Rx pause XON frames */ ++ __be32 rx_pause_xoff; /* Rx XOFF frames */ ++ __be32 tx_pause_xon; /* Tx XON frames */ ++ __be32 tx_pause_xoff; /* Tx XOFF frames */ ++ __be32 tx_s_collision; /* Single collision frames */ ++ __be32 tx_m_collision; /* Multiple collision frames */ ++ __be32 l_collision; /* Late collision frames */ ++ __be32 e_collision; /* Excessive collision frames */ ++ __be32 rx_ctl_frames; /* Rx control frames */ ++ __be32 rx_64_frames; /* Rx 64-bytes frames */ ++ __be32 rx_127_frames; /* Rx 65-127 bytes frames */ ++ __be32 rx_255_frames; /* Rx 128-255 bytes frames */ ++ __be32 rx_511_frames; /* Rx 256-511 bytes frames */ ++ __be32 rx_1023_frames; /* Rx 512-1023 bytes frames */ ++ __be32 rx_1522_frames; /* Rx 1024-1522 bytes frames */ ++ __be32 rx_9022_frames; /* Rx 1523-9022 bytes frames */ ++ __be32 tx_64_frames; /* Tx 64-bytes frames */ ++ __be32 tx_127_frames; /* Tx 65-127 bytes frames */ ++ __be32 tx_255_frames; /* Tx 128-255 bytes frames */ ++ __be32 tx_511_frames; /* Tx 256-511 bytes frames */ ++ __be32 tx_1023_frames; /* Tx 512-1023 bytes frames */ ++ __be32 tx_1522_frames; /* Tx 1024-1522 bytes frames */ ++ __be32 tx_9022_frames; /* Tx 1523-9022 bytes frames */ ++ __be32 rx_valid_bytes; /* Rx valid bytes */ ++ __be32 rx_runt_pkts; /* Rx error runt packets */ ++ __be32 rx_jabber_pkts; /* Rx error jabber packets */ ++ __be32 checksum; /* Checksum */ ++} SLIRP_PACKED; ++ ++/* Get NCSI Statistics */ ++struct ncsi_rsp_gns_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 rx_cmds; /* Rx NCSI commands */ ++ __be32 dropped_cmds; /* Dropped commands */ ++ __be32 cmd_type_errs; /* Command type errors */ ++ __be32 cmd_csum_errs; /* Command checksum errors */ ++ __be32 rx_pkts; /* Rx NCSI packets */ ++ __be32 tx_pkts; /* Tx NCSI packets */ ++ __be32 tx_aen_pkts; /* Tx AEN packets */ ++ __be32 checksum; /* Checksum */ ++} SLIRP_PACKED; ++ ++/* Get NCSI Pass-through Statistics */ ++struct ncsi_rsp_gnpts_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 tx_pkts; /* Tx packets */ ++ __be32 tx_dropped; /* Tx dropped packets */ ++ __be32 tx_channel_err; /* Tx channel errors */ ++ __be32 tx_us_err; /* Tx undersize errors */ ++ __be32 rx_pkts; /* Rx packets */ ++ __be32 rx_dropped; /* Rx dropped packets */ ++ __be32 rx_channel_err; /* Rx channel errors */ ++ __be32 rx_us_err; /* Rx undersize errors */ ++ __be32 rx_os_err; /* Rx oversize errors */ ++ __be32 checksum; /* Checksum */ ++} SLIRP_PACKED; ++ ++/* Get package status */ ++struct ncsi_rsp_gps_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 status; /* Hardware arbitration status */ ++ __be32 checksum; ++} SLIRP_PACKED; ++ ++/* Get package UUID */ ++struct ncsi_rsp_gpuuid_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ unsigned char uuid[16]; /* UUID */ ++ __be32 checksum; ++} SLIRP_PACKED; ++ ++/* AEN: Link State Change */ ++struct ncsi_aen_lsc_pkt { ++ struct ncsi_aen_pkt_hdr aen; /* AEN header */ ++ __be32 status; /* Link status */ ++ __be32 oem_status; /* OEM link status */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[14]; ++} SLIRP_PACKED; ++ ++/* AEN: Configuration Required */ ++struct ncsi_aen_cr_pkt { ++ struct ncsi_aen_pkt_hdr aen; /* AEN header */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* AEN: Host Network Controller Driver Status Change */ ++struct ncsi_aen_hncdsc_pkt { ++ struct ncsi_aen_pkt_hdr aen; /* AEN header */ ++ __be32 status; /* Status */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++} SLIRP_PACKED; ++ ++/* NCSI packet revision */ ++#define NCSI_PKT_REVISION 0x01 ++ ++/* NCSI packet commands */ ++#define NCSI_PKT_CMD_CIS 0x00 /* Clear Initial State */ ++#define NCSI_PKT_CMD_SP 0x01 /* Select Package */ ++#define NCSI_PKT_CMD_DP 0x02 /* Deselect Package */ ++#define NCSI_PKT_CMD_EC 0x03 /* Enable Channel */ ++#define NCSI_PKT_CMD_DC 0x04 /* Disable Channel */ ++#define NCSI_PKT_CMD_RC 0x05 /* Reset Channel */ ++#define NCSI_PKT_CMD_ECNT 0x06 /* Enable Channel Network Tx */ ++#define NCSI_PKT_CMD_DCNT 0x07 /* Disable Channel Network Tx */ ++#define NCSI_PKT_CMD_AE 0x08 /* AEN Enable */ ++#define NCSI_PKT_CMD_SL 0x09 /* Set Link */ ++#define NCSI_PKT_CMD_GLS 0x0a /* Get Link */ ++#define NCSI_PKT_CMD_SVF 0x0b /* Set VLAN Filter */ ++#define NCSI_PKT_CMD_EV 0x0c /* Enable VLAN */ ++#define NCSI_PKT_CMD_DV 0x0d /* Disable VLAN */ ++#define NCSI_PKT_CMD_SMA 0x0e /* Set MAC address */ ++#define NCSI_PKT_CMD_EBF 0x10 /* Enable Broadcast Filter */ ++#define NCSI_PKT_CMD_DBF 0x11 /* Disable Broadcast Filter */ ++#define NCSI_PKT_CMD_EGMF 0x12 /* Enable Global Multicast Filter */ ++#define NCSI_PKT_CMD_DGMF 0x13 /* Disable Global Multicast Filter */ ++#define NCSI_PKT_CMD_SNFC 0x14 /* Set NCSI Flow Control */ ++#define NCSI_PKT_CMD_GVI 0x15 /* Get Version ID */ ++#define NCSI_PKT_CMD_GC 0x16 /* Get Capabilities */ ++#define NCSI_PKT_CMD_GP 0x17 /* Get Parameters */ ++#define NCSI_PKT_CMD_GCPS 0x18 /* Get Controller Packet Statistics */ ++#define NCSI_PKT_CMD_GNS 0x19 /* Get NCSI Statistics */ ++#define NCSI_PKT_CMD_GNPTS 0x1a /* Get NCSI Pass-throu Statistics */ ++#define NCSI_PKT_CMD_GPS 0x1b /* Get package status */ ++#define NCSI_PKT_CMD_OEM 0x50 /* OEM */ ++#define NCSI_PKT_CMD_PLDM 0x51 /* PLDM request over NCSI over RBT */ ++#define NCSI_PKT_CMD_GPUUID 0x52 /* Get package UUID */ ++ ++/* NCSI packet responses */ ++#define NCSI_PKT_RSP_CIS (NCSI_PKT_CMD_CIS + 0x80) ++#define NCSI_PKT_RSP_SP (NCSI_PKT_CMD_SP + 0x80) ++#define NCSI_PKT_RSP_DP (NCSI_PKT_CMD_DP + 0x80) ++#define NCSI_PKT_RSP_EC (NCSI_PKT_CMD_EC + 0x80) ++#define NCSI_PKT_RSP_DC (NCSI_PKT_CMD_DC + 0x80) ++#define NCSI_PKT_RSP_RC (NCSI_PKT_CMD_RC + 0x80) ++#define NCSI_PKT_RSP_ECNT (NCSI_PKT_CMD_ECNT + 0x80) ++#define NCSI_PKT_RSP_DCNT (NCSI_PKT_CMD_DCNT + 0x80) ++#define NCSI_PKT_RSP_AE (NCSI_PKT_CMD_AE + 0x80) ++#define NCSI_PKT_RSP_SL (NCSI_PKT_CMD_SL + 0x80) ++#define NCSI_PKT_RSP_GLS (NCSI_PKT_CMD_GLS + 0x80) ++#define NCSI_PKT_RSP_SVF (NCSI_PKT_CMD_SVF + 0x80) ++#define NCSI_PKT_RSP_EV (NCSI_PKT_CMD_EV + 0x80) ++#define NCSI_PKT_RSP_DV (NCSI_PKT_CMD_DV + 0x80) ++#define NCSI_PKT_RSP_SMA (NCSI_PKT_CMD_SMA + 0x80) ++#define NCSI_PKT_RSP_EBF (NCSI_PKT_CMD_EBF + 0x80) ++#define NCSI_PKT_RSP_DBF (NCSI_PKT_CMD_DBF + 0x80) ++#define NCSI_PKT_RSP_EGMF (NCSI_PKT_CMD_EGMF + 0x80) ++#define NCSI_PKT_RSP_DGMF (NCSI_PKT_CMD_DGMF + 0x80) ++#define NCSI_PKT_RSP_SNFC (NCSI_PKT_CMD_SNFC + 0x80) ++#define NCSI_PKT_RSP_GVI (NCSI_PKT_CMD_GVI + 0x80) ++#define NCSI_PKT_RSP_GC (NCSI_PKT_CMD_GC + 0x80) ++#define NCSI_PKT_RSP_GP (NCSI_PKT_CMD_GP + 0x80) ++#define NCSI_PKT_RSP_GCPS (NCSI_PKT_CMD_GCPS + 0x80) ++#define NCSI_PKT_RSP_GNS (NCSI_PKT_CMD_GNS + 0x80) ++#define NCSI_PKT_RSP_GNPTS (NCSI_PKT_CMD_GNPTS + 0x80) ++#define NCSI_PKT_RSP_GPS (NCSI_PKT_CMD_GPS + 0x80) ++#define NCSI_PKT_RSP_OEM (NCSI_PKT_CMD_OEM + 0x80) ++#define NCSI_PKT_RSP_PLDM (NCSI_PKT_CMD_PLDM + 0x80) ++#define NCSI_PKT_RSP_GPUUID (NCSI_PKT_CMD_GPUUID + 0x80) ++ ++/* NCSI response code/reason */ ++#define NCSI_PKT_RSP_C_COMPLETED 0x0000 /* Command Completed */ ++#define NCSI_PKT_RSP_C_FAILED 0x0001 /* Command Failed */ ++#define NCSI_PKT_RSP_C_UNAVAILABLE 0x0002 /* Command Unavailable */ ++#define NCSI_PKT_RSP_C_UNSUPPORTED 0x0003 /* Command Unsupported */ ++#define NCSI_PKT_RSP_R_NO_ERROR 0x0000 /* No Error */ ++#define NCSI_PKT_RSP_R_INTERFACE 0x0001 /* Interface not ready */ ++#define NCSI_PKT_RSP_R_PARAM 0x0002 /* Invalid Parameter */ ++#define NCSI_PKT_RSP_R_CHANNEL 0x0003 /* Channel not Ready */ ++#define NCSI_PKT_RSP_R_PACKAGE 0x0004 /* Package not Ready */ ++#define NCSI_PKT_RSP_R_LENGTH 0x0005 /* Invalid payload length */ ++#define NCSI_PKT_RSP_R_UNKNOWN 0x7fff /* Command type unsupported */ ++ ++/* NCSI AEN packet type */ ++#define NCSI_PKT_AEN 0xFF /* AEN Packet */ ++#define NCSI_PKT_AEN_LSC 0x00 /* Link status change */ ++#define NCSI_PKT_AEN_CR 0x01 /* Configuration required */ ++#define NCSI_PKT_AEN_HNCDSC 0x02 /* HNC driver status change */ ++ ++#endif /* NCSI_PKT_H */ +diff --git a/slirp/src/ncsi.c b/slirp/src/ncsi.c +new file mode 100644 +index 0000000000..f3427bd66d +--- /dev/null ++++ b/slirp/src/ncsi.c +@@ -0,0 +1,197 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * NC-SI (Network Controller Sideband Interface) "echo" model ++ * ++ * Copyright (C) 2016-2018 IBM Corp. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#include "slirp.h" ++ ++#include "ncsi-pkt.h" ++ ++static uint32_t ncsi_calculate_checksum(uint8_t *data, int len) ++{ ++ uint32_t checksum = 0; ++ int i; ++ ++ /* ++ * 32-bit unsigned sum of the NC-SI packet header and NC-SI packet ++ * payload interpreted as a series of 16-bit unsigned integer values. ++ */ ++ for (i = 0; i < len; i += 2) { ++ checksum += (((uint16_t) data[i]) << 8) + data[i+1]; ++ } ++ ++ checksum = (~checksum + 1); ++ return checksum; ++} ++ ++/* Get Capabilities */ ++static int ncsi_rsp_handler_gc(struct ncsi_rsp_pkt_hdr *rnh) ++{ ++ struct ncsi_rsp_gc_pkt *rsp = (struct ncsi_rsp_gc_pkt *)rnh; ++ ++ rsp->cap = htonl(~0); ++ rsp->bc_cap = htonl(~0); ++ rsp->mc_cap = htonl(~0); ++ rsp->buf_cap = htonl(~0); ++ rsp->aen_cap = htonl(~0); ++ rsp->vlan_mode = 0xff; ++ rsp->uc_cnt = 2; ++ return 0; ++} ++ ++/* Get Link status */ ++static int ncsi_rsp_handler_gls(struct ncsi_rsp_pkt_hdr *rnh) ++{ ++ struct ncsi_rsp_gls_pkt *rsp = (struct ncsi_rsp_gls_pkt *)rnh; ++ ++ rsp->status = htonl(0x1); ++ return 0; ++} ++ ++/* Get Parameters */ ++static int ncsi_rsp_handler_gp(struct ncsi_rsp_pkt_hdr *rnh) ++{ ++ struct ncsi_rsp_gp_pkt *rsp = (struct ncsi_rsp_gp_pkt *)rnh; ++ ++ /* no MAC address filters or VLAN filters on the channel */ ++ rsp->mac_cnt = 0; ++ rsp->mac_enable = 0; ++ rsp->vlan_cnt = 0; ++ rsp->vlan_enable = 0; ++ ++ return 0; ++} ++ ++static const struct ncsi_rsp_handler { ++ unsigned char type; ++ int payload; ++ int (*handler)(struct ncsi_rsp_pkt_hdr *rnh); ++} ncsi_rsp_handlers[] = { { NCSI_PKT_RSP_CIS, 4, NULL }, ++ { NCSI_PKT_RSP_SP, 4, NULL }, ++ { NCSI_PKT_RSP_DP, 4, NULL }, ++ { NCSI_PKT_RSP_EC, 4, NULL }, ++ { NCSI_PKT_RSP_DC, 4, NULL }, ++ { NCSI_PKT_RSP_RC, 4, NULL }, ++ { NCSI_PKT_RSP_ECNT, 4, NULL }, ++ { NCSI_PKT_RSP_DCNT, 4, NULL }, ++ { NCSI_PKT_RSP_AE, 4, NULL }, ++ { NCSI_PKT_RSP_SL, 4, NULL }, ++ { NCSI_PKT_RSP_GLS, 16, ncsi_rsp_handler_gls }, ++ { NCSI_PKT_RSP_SVF, 4, NULL }, ++ { NCSI_PKT_RSP_EV, 4, NULL }, ++ { NCSI_PKT_RSP_DV, 4, NULL }, ++ { NCSI_PKT_RSP_SMA, 4, NULL }, ++ { NCSI_PKT_RSP_EBF, 4, NULL }, ++ { NCSI_PKT_RSP_DBF, 4, NULL }, ++ { NCSI_PKT_RSP_EGMF, 4, NULL }, ++ { NCSI_PKT_RSP_DGMF, 4, NULL }, ++ { NCSI_PKT_RSP_SNFC, 4, NULL }, ++ { NCSI_PKT_RSP_GVI, 40, NULL }, ++ { NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc }, ++ { NCSI_PKT_RSP_GP, 40, ncsi_rsp_handler_gp }, ++ { NCSI_PKT_RSP_GCPS, 172, NULL }, ++ { NCSI_PKT_RSP_GNS, 172, NULL }, ++ { NCSI_PKT_RSP_GNPTS, 172, NULL }, ++ { NCSI_PKT_RSP_GPS, 8, NULL }, ++ { NCSI_PKT_RSP_OEM, 0, NULL }, ++ { NCSI_PKT_RSP_PLDM, 0, NULL }, ++ { NCSI_PKT_RSP_GPUUID, 20, NULL } }; ++ ++/* ++ * packet format : ncsi header + payload + checksum ++ */ ++#define NCSI_MAX_PAYLOAD 172 ++#define NCSI_MAX_LEN (sizeof(struct ncsi_pkt_hdr) + NCSI_MAX_PAYLOAD + 4) ++ ++void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) ++{ ++ const struct ncsi_pkt_hdr *nh = ++ (const struct ncsi_pkt_hdr *)(pkt + ETH_HLEN); ++ uint8_t ncsi_reply[ETH_HLEN + NCSI_MAX_LEN]; ++ struct ethhdr *reh = (struct ethhdr *)ncsi_reply; ++ struct ncsi_rsp_pkt_hdr *rnh = ++ (struct ncsi_rsp_pkt_hdr *)(ncsi_reply + ETH_HLEN); ++ const struct ncsi_rsp_handler *handler = NULL; ++ int i; ++ int ncsi_rsp_len = sizeof(*nh); ++ uint32_t checksum; ++ uint32_t *pchecksum; ++ ++ if (pkt_len < ETH_HLEN + sizeof(struct ncsi_pkt_hdr)) { ++ return; /* packet too short */ ++ } ++ ++ memset(ncsi_reply, 0, sizeof(ncsi_reply)); ++ ++ memset(reh->h_dest, 0xff, ETH_ALEN); ++ memset(reh->h_source, 0xff, ETH_ALEN); ++ reh->h_proto = htons(ETH_P_NCSI); ++ ++ for (i = 0; i < G_N_ELEMENTS(ncsi_rsp_handlers); i++) { ++ if (ncsi_rsp_handlers[i].type == nh->type + 0x80) { ++ handler = &ncsi_rsp_handlers[i]; ++ break; ++ } ++ } ++ ++ rnh->common.mc_id = nh->mc_id; ++ rnh->common.revision = NCSI_PKT_REVISION; ++ rnh->common.id = nh->id; ++ rnh->common.type = nh->type + 0x80; ++ rnh->common.channel = nh->channel; ++ ++ if (handler) { ++ rnh->common.length = htons(handler->payload); ++ rnh->code = htons(NCSI_PKT_RSP_C_COMPLETED); ++ rnh->reason = htons(NCSI_PKT_RSP_R_NO_ERROR); ++ ++ if (handler->handler) { ++ /* TODO: handle errors */ ++ handler->handler(rnh); ++ } ++ ncsi_rsp_len += handler->payload; ++ } else { ++ rnh->common.length = 0; ++ rnh->code = htons(NCSI_PKT_RSP_C_UNAVAILABLE); ++ rnh->reason = htons(NCSI_PKT_RSP_R_UNKNOWN); ++ } ++ ++ /* Add the optional checksum at the end of the frame. */ ++ checksum = ncsi_calculate_checksum((uint8_t *)rnh, ncsi_rsp_len); ++ pchecksum = (uint32_t *)((void *)rnh + ncsi_rsp_len); ++ *pchecksum = htonl(checksum); ++ ncsi_rsp_len += 4; ++ ++ slirp_send_packet_all(slirp, ncsi_reply, ETH_HLEN + ncsi_rsp_len); ++} +diff --git a/slirp/src/ndp_table.c b/slirp/src/ndp_table.c +new file mode 100644 +index 0000000000..fdb189d595 +--- /dev/null ++++ b/slirp/src/ndp_table.c +@@ -0,0 +1,98 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++ ++void ndp_table_add(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t ethaddr[ETH_ALEN]) ++{ ++ char addrstr[INET6_ADDRSTRLEN]; ++ NdpTable *ndp_table = &slirp->ndp_table; ++ int i; ++ char ethaddr_str[ETH_ADDRSTRLEN]; ++ ++ inet_ntop(AF_INET6, &(ip_addr), addrstr, INET6_ADDRSTRLEN); ++ ++ DEBUG_CALL("ndp_table_add"); ++ DEBUG_ARG("ip = %s", addrstr); ++ DEBUG_ARG("hw addr = %s", slirp_ether_ntoa(ethaddr, ethaddr_str, ++ sizeof(ethaddr_str))); ++ ++ if (IN6_IS_ADDR_MULTICAST(&ip_addr) || in6_zero(&ip_addr)) { ++ /* Do not register multicast or unspecified addresses */ ++ DEBUG_CALL(" abort: do not register multicast or unspecified address"); ++ return; ++ } ++ ++ /* Search for an entry */ ++ for (i = 0; i < NDP_TABLE_SIZE; i++) { ++ if (in6_equal(&ndp_table->table[i].ip_addr, &ip_addr)) { ++ DEBUG_CALL(" already in table: update the entry"); ++ /* Update the entry */ ++ memcpy(ndp_table->table[i].eth_addr, ethaddr, ETH_ALEN); ++ return; ++ } ++ } ++ ++ /* No entry found, create a new one */ ++ DEBUG_CALL(" create new entry"); ++ /* Save the first entry, it is the guest. */ ++ if (in6_zero(&ndp_table->guest_in6_addr)) { ++ ndp_table->guest_in6_addr = ip_addr; ++ } ++ ndp_table->table[ndp_table->next_victim].ip_addr = ip_addr; ++ memcpy(ndp_table->table[ndp_table->next_victim].eth_addr, ethaddr, ++ ETH_ALEN); ++ ndp_table->next_victim = (ndp_table->next_victim + 1) % NDP_TABLE_SIZE; ++} ++ ++bool ndp_table_search(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]) ++{ ++ char addrstr[INET6_ADDRSTRLEN]; ++ NdpTable *ndp_table = &slirp->ndp_table; ++ int i; ++ char ethaddr_str[ETH_ADDRSTRLEN]; ++ ++ inet_ntop(AF_INET6, &(ip_addr), addrstr, INET6_ADDRSTRLEN); ++ ++ DEBUG_CALL("ndp_table_search"); ++ DEBUG_ARG("ip = %s", addrstr); ++ ++ /* If unspecified address */ ++ if (in6_zero(&ip_addr)) { ++ /* return Ethernet broadcast address */ ++ memset(out_ethaddr, 0xff, ETH_ALEN); ++ return 1; ++ } ++ ++ /* Multicast address: fec0::abcd:efgh/8 -> 33:33:ab:cd:ef:gh */ ++ if (IN6_IS_ADDR_MULTICAST(&ip_addr)) { ++ out_ethaddr[0] = 0x33; ++ out_ethaddr[1] = 0x33; ++ out_ethaddr[2] = ip_addr.s6_addr[12]; ++ out_ethaddr[3] = ip_addr.s6_addr[13]; ++ out_ethaddr[4] = ip_addr.s6_addr[14]; ++ out_ethaddr[5] = ip_addr.s6_addr[15]; ++ DEBUG_ARG("multicast addr = %s", ++ slirp_ether_ntoa(out_ethaddr, ethaddr_str, ++ sizeof(ethaddr_str))); ++ return 1; ++ } ++ ++ for (i = 0; i < NDP_TABLE_SIZE; i++) { ++ if (in6_equal(&ndp_table->table[i].ip_addr, &ip_addr)) { ++ memcpy(out_ethaddr, ndp_table->table[i].eth_addr, ETH_ALEN); ++ DEBUG_ARG("found hw addr = %s", ++ slirp_ether_ntoa(out_ethaddr, ethaddr_str, ++ sizeof(ethaddr_str))); ++ return 1; ++ } ++ } ++ ++ DEBUG_CALL(" ip not found in table"); ++ return 0; ++} +diff --git a/slirp/src/sbuf.c b/slirp/src/sbuf.c +new file mode 100644 +index 0000000000..b357091705 +--- /dev/null ++++ b/slirp/src/sbuf.c +@@ -0,0 +1,168 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++static void sbappendsb(struct sbuf *sb, struct mbuf *m); ++ ++void sbfree(struct sbuf *sb) ++{ ++ g_free(sb->sb_data); ++} ++ ++bool sbdrop(struct sbuf *sb, size_t num) ++{ ++ int limit = sb->sb_datalen / 2; ++ ++ g_warn_if_fail(num <= sb->sb_cc); ++ if (num > sb->sb_cc) ++ num = sb->sb_cc; ++ ++ sb->sb_cc -= num; ++ sb->sb_rptr += num; ++ if (sb->sb_rptr >= sb->sb_data + sb->sb_datalen) ++ sb->sb_rptr -= sb->sb_datalen; ++ ++ if (sb->sb_cc < limit && sb->sb_cc + num >= limit) { ++ return true; ++ } ++ ++ return false; ++} ++ ++void sbreserve(struct sbuf *sb, size_t size) ++{ ++ sb->sb_wptr = sb->sb_rptr = sb->sb_data = g_realloc(sb->sb_data, size); ++ sb->sb_cc = 0; ++ sb->sb_datalen = size; ++} ++ ++/* ++ * Try and write() to the socket, whatever doesn't get written ++ * append to the buffer... for a host with a fast net connection, ++ * this prevents an unnecessary copy of the data ++ * (the socket is non-blocking, so we won't hang) ++ */ ++void sbappend(struct socket *so, struct mbuf *m) ++{ ++ int ret = 0; ++ ++ DEBUG_CALL("sbappend"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m->m_len = %d", m->m_len); ++ ++ /* Shouldn't happen, but... e.g. foreign host closes connection */ ++ if (m->m_len <= 0) { ++ m_free(m); ++ return; ++ } ++ ++ /* ++ * If there is urgent data, call sosendoob ++ * if not all was sent, sowrite will take care of the rest ++ * (The rest of this function is just an optimisation) ++ */ ++ if (so->so_urgc) { ++ sbappendsb(&so->so_rcv, m); ++ m_free(m); ++ sosendoob(so); ++ return; ++ } ++ ++ /* ++ * We only write if there's nothing in the buffer, ++ * ottherwise it'll arrive out of order, and hence corrupt ++ */ ++ if (!so->so_rcv.sb_cc) ++ ret = slirp_send(so, m->m_data, m->m_len, 0); ++ ++ if (ret <= 0) { ++ /* ++ * Nothing was written ++ * It's possible that the socket has closed, but ++ * we don't need to check because if it has closed, ++ * it will be detected in the normal way by soread() ++ */ ++ sbappendsb(&so->so_rcv, m); ++ } else if (ret != m->m_len) { ++ /* ++ * Something was written, but not everything.. ++ * sbappendsb the rest ++ */ ++ m->m_len -= ret; ++ m->m_data += ret; ++ sbappendsb(&so->so_rcv, m); ++ } /* else */ ++ /* Whatever happened, we free the mbuf */ ++ m_free(m); ++} ++ ++/* ++ * Copy the data from m into sb ++ * The caller is responsible to make sure there's enough room ++ */ ++static void sbappendsb(struct sbuf *sb, struct mbuf *m) ++{ ++ int len, n, nn; ++ ++ len = m->m_len; ++ ++ if (sb->sb_wptr < sb->sb_rptr) { ++ n = sb->sb_rptr - sb->sb_wptr; ++ if (n > len) ++ n = len; ++ memcpy(sb->sb_wptr, m->m_data, n); ++ } else { ++ /* Do the right edge first */ ++ n = sb->sb_data + sb->sb_datalen - sb->sb_wptr; ++ if (n > len) ++ n = len; ++ memcpy(sb->sb_wptr, m->m_data, n); ++ len -= n; ++ if (len) { ++ /* Now the left edge */ ++ nn = sb->sb_rptr - sb->sb_data; ++ if (nn > len) ++ nn = len; ++ memcpy(sb->sb_data, m->m_data + n, nn); ++ n += nn; ++ } ++ } ++ ++ sb->sb_cc += n; ++ sb->sb_wptr += n; ++ if (sb->sb_wptr >= sb->sb_data + sb->sb_datalen) ++ sb->sb_wptr -= sb->sb_datalen; ++} ++ ++/* ++ * Copy data from sbuf to a normal, straight buffer ++ * Don't update the sbuf rptr, this will be ++ * done in sbdrop when the data is acked ++ */ ++void sbcopy(struct sbuf *sb, size_t off, size_t len, char *to) ++{ ++ char *from; ++ ++ g_assert(len + off <= sb->sb_cc); ++ ++ from = sb->sb_rptr + off; ++ if (from >= sb->sb_data + sb->sb_datalen) ++ from -= sb->sb_datalen; ++ ++ if (from < sb->sb_wptr) { ++ memcpy(to, from, len); ++ } else { ++ /* re-use off */ ++ off = (sb->sb_data + sb->sb_datalen) - from; ++ if (off > len) ++ off = len; ++ memcpy(to, from, off); ++ len -= off; ++ if (len) ++ memcpy(to + off, sb->sb_data, len); ++ } ++} +diff --git a/slirp/src/sbuf.h b/slirp/src/sbuf.h +new file mode 100644 +index 0000000000..01886fbd01 +--- /dev/null ++++ b/slirp/src/sbuf.h +@@ -0,0 +1,27 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef SBUF_H ++#define SBUF_H ++ ++#define sbspace(sb) ((sb)->sb_datalen - (sb)->sb_cc) ++ ++struct sbuf { ++ uint32_t sb_cc; /* actual chars in buffer */ ++ uint32_t sb_datalen; /* Length of data */ ++ char *sb_wptr; /* write pointer. points to where the next ++ * bytes should be written in the sbuf */ ++ char *sb_rptr; /* read pointer. points to where the next ++ * byte should be read from the sbuf */ ++ char *sb_data; /* Actual data */ ++}; ++ ++void sbfree(struct sbuf *sb); ++bool sbdrop(struct sbuf *sb, size_t len); ++void sbreserve(struct sbuf *sb, size_t size); ++void sbappend(struct socket *sb, struct mbuf *mb); ++void sbcopy(struct sbuf *sb, size_t off, size_t len, char *p); ++ ++#endif +diff --git a/slirp/src/slirp.c b/slirp/src/slirp.c +new file mode 100644 +index 0000000000..9d3fee3e97 +--- /dev/null ++++ b/slirp/src/slirp.c +@@ -0,0 +1,1387 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * libslirp glue ++ * ++ * Copyright (c) 2004-2008 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "slirp.h" ++ ++ ++#ifndef _WIN32 ++#include ++#endif ++ ++/* https://gitlab.freedesktop.org/slirp/libslirp/issues/18 */ ++#if defined(__NetBSD__) && defined(if_mtu) ++#undef if_mtu ++#endif ++ ++int slirp_debug; ++ ++/* Define to 1 if you want KEEPALIVE timers */ ++bool slirp_do_keepalive; ++ ++/* host loopback address */ ++struct in_addr loopback_addr; ++/* host loopback network mask */ ++unsigned long loopback_mask; ++ ++/* emulated hosts use the MAC addr 52:55:IP:IP:IP:IP */ ++static const uint8_t special_ethaddr[ETH_ALEN] = { 0x52, 0x55, 0x00, ++ 0x00, 0x00, 0x00 }; ++ ++unsigned curtime; ++ ++static struct in_addr dns_addr; ++#ifndef _WIN32 ++static struct in6_addr dns6_addr; ++#endif ++static unsigned dns_addr_time; ++#ifndef _WIN32 ++static unsigned dns6_addr_time; ++#endif ++ ++#define TIMEOUT_FAST 2 /* milliseconds */ ++#define TIMEOUT_SLOW 499 /* milliseconds */ ++/* for the aging of certain requests like DNS */ ++#define TIMEOUT_DEFAULT 1000 /* milliseconds */ ++ ++#if defined(_WIN32) ++ ++int get_dns_addr(struct in_addr *pdns_addr) ++{ ++ FIXED_INFO *FixedInfo = NULL; ++ ULONG BufLen; ++ DWORD ret; ++ IP_ADDR_STRING *pIPAddr; ++ struct in_addr tmp_addr; ++ ++ if (dns_addr.s_addr != 0 && (curtime - dns_addr_time) < TIMEOUT_DEFAULT) { ++ *pdns_addr = dns_addr; ++ return 0; ++ } ++ ++ FixedInfo = (FIXED_INFO *)GlobalAlloc(GPTR, sizeof(FIXED_INFO)); ++ BufLen = sizeof(FIXED_INFO); ++ ++ if (ERROR_BUFFER_OVERFLOW == GetNetworkParams(FixedInfo, &BufLen)) { ++ if (FixedInfo) { ++ GlobalFree(FixedInfo); ++ FixedInfo = NULL; ++ } ++ FixedInfo = GlobalAlloc(GPTR, BufLen); ++ } ++ ++ if ((ret = GetNetworkParams(FixedInfo, &BufLen)) != ERROR_SUCCESS) { ++ printf("GetNetworkParams failed. ret = %08x\n", (unsigned)ret); ++ if (FixedInfo) { ++ GlobalFree(FixedInfo); ++ FixedInfo = NULL; ++ } ++ return -1; ++ } ++ ++ pIPAddr = &(FixedInfo->DnsServerList); ++ inet_aton(pIPAddr->IpAddress.String, &tmp_addr); ++ *pdns_addr = tmp_addr; ++ dns_addr = tmp_addr; ++ dns_addr_time = curtime; ++ if (FixedInfo) { ++ GlobalFree(FixedInfo); ++ FixedInfo = NULL; ++ } ++ return 0; ++} ++ ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) ++{ ++ return -1; ++} ++ ++static void winsock_cleanup(void) ++{ ++ WSACleanup(); ++} ++ ++#elif defined(__APPLE__) ++ ++#include ++ ++static int get_dns_addr_cached(void *pdns_addr, void *cached_addr, ++ socklen_t addrlen, unsigned *cached_time) ++{ ++ if (curtime - *cached_time < TIMEOUT_DEFAULT) { ++ memcpy(pdns_addr, cached_addr, addrlen); ++ return 0; ++ } ++ return 1; ++} ++ ++static int get_dns_addr_libresolv(int af, void *pdns_addr, void *cached_addr, ++ socklen_t addrlen, uint32_t *scope_id, ++ unsigned *cached_time) ++{ ++ struct __res_state state; ++ union res_sockaddr_union servers[NI_MAXSERV]; ++ int count; ++ int found; ++ ++ if (res_ninit(&state) != 0) { ++ return -1; ++ } ++ ++ count = res_getservers(&state, servers, NI_MAXSERV); ++ found = 0; ++ DEBUG_MISC("IP address of your DNS(s):"); ++ for (int i = 0; i < count; i++) { ++ if (af == servers[i].sin.sin_family) { ++ found++; ++ } ++ ++ // we use the first found entry ++ if (found == 1) { ++ memcpy(pdns_addr, &servers[i].sin.sin_addr, addrlen); ++ memcpy(cached_addr, &servers[i].sin.sin_addr, addrlen); ++ if (scope_id) { ++ *scope_id = 0; ++ } ++ *cached_time = curtime; ++ } ++ ++ if (found > 3) { ++ DEBUG_MISC(" (more)"); ++ break; ++ } else if (slirp_debug & DBG_MISC) { ++ char s[INET6_ADDRSTRLEN]; ++ const char *res = inet_ntop(servers[i].sin.sin_family, ++ &servers[i].sin.sin_addr, ++ s, ++ sizeof(s)); ++ if (!res) { ++ res = " (string conversion error)"; ++ } ++ DEBUG_MISC(" %s", res); ++ } ++ } ++ ++ res_nclose(&state); ++ if (!found) ++ return -1; ++ return 0; ++} ++ ++int get_dns_addr(struct in_addr *pdns_addr) ++{ ++ if (dns_addr.s_addr != 0) { ++ int ret; ++ ret = get_dns_addr_cached(pdns_addr, &dns_addr, sizeof(dns_addr), ++ &dns_addr_time); ++ if (ret <= 0) { ++ return ret; ++ } ++ } ++ return get_dns_addr_libresolv(AF_INET, pdns_addr, &dns_addr, ++ sizeof(dns_addr), NULL, &dns_addr_time); ++} ++ ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) ++{ ++ if (!in6_zero(&dns6_addr)) { ++ int ret; ++ ret = get_dns_addr_cached(pdns6_addr, &dns6_addr, sizeof(dns6_addr), ++ &dns6_addr_time); ++ if (ret <= 0) { ++ return ret; ++ } ++ } ++ return get_dns_addr_libresolv(AF_INET6, pdns6_addr, &dns6_addr, ++ sizeof(dns6_addr), scope_id, &dns6_addr_time); ++} ++ ++#else // !defined(_WIN32) && !defined(__APPLE__) ++ ++#if defined(__HAIKU__) ++#define RESOLV_CONF_PATH "/boot/system/settings/network/resolv.conf" ++#else ++#define RESOLV_CONF_PATH "/etc/resolv.conf" ++#endif ++ ++static int get_dns_addr_cached(void *pdns_addr, void *cached_addr, ++ socklen_t addrlen, struct stat *cached_stat, ++ unsigned *cached_time) ++{ ++ struct stat old_stat; ++ if (curtime - *cached_time < TIMEOUT_DEFAULT) { ++ memcpy(pdns_addr, cached_addr, addrlen); ++ return 0; ++ } ++ old_stat = *cached_stat; ++ if (stat(RESOLV_CONF_PATH, cached_stat) != 0) { ++ return -1; ++ } ++ if (cached_stat->st_dev == old_stat.st_dev && ++ cached_stat->st_ino == old_stat.st_ino && ++ cached_stat->st_size == old_stat.st_size && ++ cached_stat->st_mtime == old_stat.st_mtime) { ++ memcpy(pdns_addr, cached_addr, addrlen); ++ return 0; ++ } ++ return 1; ++} ++ ++static int get_dns_addr_resolv_conf(int af, void *pdns_addr, void *cached_addr, ++ socklen_t addrlen, uint32_t *scope_id, ++ unsigned *cached_time) ++{ ++ char buff[512]; ++ char buff2[257]; ++ FILE *f; ++ int found = 0; ++ union { ++ struct in_addr dns_addr; ++ struct in6_addr dns6_addr; ++ } tmp_addr; ++ unsigned if_index; ++ ++ assert(sizeof(tmp_addr) >= addrlen); ++ f = fopen(RESOLV_CONF_PATH, "r"); ++ if (!f) ++ return -1; ++ ++ DEBUG_MISC("IP address of your DNS(s):"); ++ while (fgets(buff, 512, f) != NULL) { ++ if (sscanf(buff, "nameserver%*[ \t]%256s", buff2) == 1) { ++ char *c = strchr(buff2, '%'); ++ if (c) { ++ if_index = if_nametoindex(c + 1); ++ *c = '\0'; ++ } else { ++ if_index = 0; ++ } ++ ++ if (!inet_pton(af, buff2, &tmp_addr)) { ++ continue; ++ } ++ /* If it's the first one, set it to dns_addr */ ++ if (!found) { ++ memcpy(pdns_addr, &tmp_addr, addrlen); ++ memcpy(cached_addr, &tmp_addr, addrlen); ++ if (scope_id) { ++ *scope_id = if_index; ++ } ++ *cached_time = curtime; ++ } ++ ++ if (++found > 3) { ++ DEBUG_MISC(" (more)"); ++ break; ++ } else if (slirp_debug & DBG_MISC) { ++ char s[INET6_ADDRSTRLEN]; ++ const char *res = inet_ntop(af, &tmp_addr, s, sizeof(s)); ++ if (!res) { ++ res = " (string conversion error)"; ++ } ++ DEBUG_MISC(" %s", res); ++ } ++ } ++ } ++ fclose(f); ++ if (!found) ++ return -1; ++ return 0; ++} ++ ++int get_dns_addr(struct in_addr *pdns_addr) ++{ ++ static struct stat dns_addr_stat; ++ ++ if (dns_addr.s_addr != 0) { ++ int ret; ++ ret = get_dns_addr_cached(pdns_addr, &dns_addr, sizeof(dns_addr), ++ &dns_addr_stat, &dns_addr_time); ++ if (ret <= 0) { ++ return ret; ++ } ++ } ++ return get_dns_addr_resolv_conf(AF_INET, pdns_addr, &dns_addr, ++ sizeof(dns_addr), NULL, &dns_addr_time); ++} ++ ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) ++{ ++ static struct stat dns6_addr_stat; ++ ++ if (!in6_zero(&dns6_addr)) { ++ int ret; ++ ret = get_dns_addr_cached(pdns6_addr, &dns6_addr, sizeof(dns6_addr), ++ &dns6_addr_stat, &dns6_addr_time); ++ if (ret <= 0) { ++ return ret; ++ } ++ } ++ return get_dns_addr_resolv_conf(AF_INET6, pdns6_addr, &dns6_addr, ++ sizeof(dns6_addr), scope_id, ++ &dns6_addr_time); ++} ++ ++#endif ++ ++static void slirp_init_once(void) ++{ ++ static int initialized; ++ const char *debug; ++#ifdef _WIN32 ++ WSADATA Data; ++#endif ++ ++ if (initialized) { ++ return; ++ } ++ initialized = 1; ++ ++#ifdef _WIN32 ++ WSAStartup(MAKEWORD(2, 0), &Data); ++ atexit(winsock_cleanup); ++#endif ++ ++ loopback_addr.s_addr = htonl(INADDR_LOOPBACK); ++ loopback_mask = htonl(IN_CLASSA_NET); ++ ++ debug = g_getenv("SLIRP_DEBUG"); ++ if (debug) { ++ const GDebugKey keys[] = { ++ { "call", DBG_CALL }, ++ { "misc", DBG_MISC }, ++ { "error", DBG_ERROR }, ++ { "tftp", DBG_TFTP }, ++ { "verbose_call", DBG_VERBOSE_CALL }, ++ }; ++ slirp_debug = g_parse_debug_string(debug, keys, G_N_ELEMENTS(keys)); ++ } ++} ++ ++Slirp *slirp_new(const SlirpConfig *cfg, const SlirpCb *callbacks, void *opaque) ++{ ++ Slirp *slirp; ++ ++ g_return_val_if_fail(cfg != NULL, NULL); ++ g_return_val_if_fail(cfg->version >= SLIRP_CONFIG_VERSION_MIN, NULL); ++ g_return_val_if_fail(cfg->version <= SLIRP_CONFIG_VERSION_MAX, NULL); ++ g_return_val_if_fail(cfg->if_mtu >= IF_MTU_MIN || cfg->if_mtu == 0, NULL); ++ g_return_val_if_fail(cfg->if_mtu <= IF_MTU_MAX, NULL); ++ g_return_val_if_fail(cfg->if_mru >= IF_MRU_MIN || cfg->if_mru == 0, NULL); ++ g_return_val_if_fail(cfg->if_mru <= IF_MRU_MAX, NULL); ++ g_return_val_if_fail(!cfg->bootfile || ++ (strlen(cfg->bootfile) < ++ G_SIZEOF_MEMBER(struct bootp_t, bp_file)), NULL); ++ ++ slirp = g_malloc0(sizeof(Slirp)); ++ ++ slirp_init_once(); ++ ++ slirp->opaque = opaque; ++ slirp->cb = callbacks; ++ slirp->grand = g_rand_new(); ++ slirp->restricted = cfg->restricted; ++ ++ slirp->in_enabled = cfg->in_enabled; ++ slirp->in6_enabled = cfg->in6_enabled; ++ ++ if_init(slirp); ++ ip_init(slirp); ++ ip6_init(slirp); ++ ++ m_init(slirp); ++ ++ slirp->vnetwork_addr = cfg->vnetwork; ++ slirp->vnetwork_mask = cfg->vnetmask; ++ slirp->vhost_addr = cfg->vhost; ++ slirp->vprefix_addr6 = cfg->vprefix_addr6; ++ slirp->vprefix_len = cfg->vprefix_len; ++ slirp->vhost_addr6 = cfg->vhost6; ++ if (cfg->vhostname) { ++ slirp_pstrcpy(slirp->client_hostname, sizeof(slirp->client_hostname), ++ cfg->vhostname); ++ } ++ slirp->tftp_prefix = g_strdup(cfg->tftp_path); ++ slirp->bootp_filename = g_strdup(cfg->bootfile); ++ slirp->vdomainname = g_strdup(cfg->vdomainname); ++ slirp->vdhcp_startaddr = cfg->vdhcp_start; ++ slirp->vnameserver_addr = cfg->vnameserver; ++ slirp->vnameserver_addr6 = cfg->vnameserver6; ++ slirp->tftp_server_name = g_strdup(cfg->tftp_server_name); ++ ++ if (cfg->vdnssearch) { ++ translate_dnssearch(slirp, cfg->vdnssearch); ++ } ++ slirp->if_mtu = cfg->if_mtu == 0 ? IF_MTU_DEFAULT : cfg->if_mtu; ++ slirp->if_mru = cfg->if_mru == 0 ? IF_MRU_DEFAULT : cfg->if_mru; ++ slirp->disable_host_loopback = cfg->disable_host_loopback; ++ slirp->enable_emu = cfg->enable_emu; ++ ++ if (cfg->version >= 2) { ++ slirp->outbound_addr = cfg->outbound_addr; ++ slirp->outbound_addr6 = cfg->outbound_addr6; ++ } else { ++ slirp->outbound_addr = NULL; ++ slirp->outbound_addr6 = NULL; ++ } ++ ++ if (cfg->version >= 3) { ++ slirp->disable_dns = cfg->disable_dns; ++ } else { ++ slirp->disable_dns = false; ++ } ++ ++ return slirp; ++} ++ ++Slirp *slirp_init(int restricted, bool in_enabled, struct in_addr vnetwork, ++ struct in_addr vnetmask, struct in_addr vhost, ++ bool in6_enabled, struct in6_addr vprefix_addr6, ++ uint8_t vprefix_len, struct in6_addr vhost6, ++ const char *vhostname, const char *tftp_server_name, ++ const char *tftp_path, const char *bootfile, ++ struct in_addr vdhcp_start, struct in_addr vnameserver, ++ struct in6_addr vnameserver6, const char **vdnssearch, ++ const char *vdomainname, const SlirpCb *callbacks, ++ void *opaque) ++{ ++ SlirpConfig cfg; ++ memset(&cfg, 0, sizeof(cfg)); ++ cfg.version = 1; ++ cfg.restricted = restricted; ++ cfg.in_enabled = in_enabled; ++ cfg.vnetwork = vnetwork; ++ cfg.vnetmask = vnetmask; ++ cfg.vhost = vhost; ++ cfg.in6_enabled = in6_enabled; ++ cfg.vprefix_addr6 = vprefix_addr6; ++ cfg.vprefix_len = vprefix_len; ++ cfg.vhost6 = vhost6; ++ cfg.vhostname = vhostname; ++ cfg.tftp_server_name = tftp_server_name; ++ cfg.tftp_path = tftp_path; ++ cfg.bootfile = bootfile; ++ cfg.vdhcp_start = vdhcp_start; ++ cfg.vnameserver = vnameserver; ++ cfg.vnameserver6 = vnameserver6; ++ cfg.vdnssearch = vdnssearch; ++ cfg.vdomainname = vdomainname; ++ return slirp_new(&cfg, callbacks, opaque); ++} ++ ++void slirp_cleanup(Slirp *slirp) ++{ ++ struct gfwd_list *e, *next; ++ ++ for (e = slirp->guestfwd_list; e; e = next) { ++ next = e->ex_next; ++ g_free(e->ex_exec); ++ g_free(e->ex_unix); ++ g_free(e); ++ } ++ ++ ip_cleanup(slirp); ++ ip6_cleanup(slirp); ++ m_cleanup(slirp); ++ ++ g_rand_free(slirp->grand); ++ ++ g_free(slirp->vdnssearch); ++ g_free(slirp->tftp_prefix); ++ g_free(slirp->bootp_filename); ++ g_free(slirp->vdomainname); ++ g_free(slirp); ++} ++ ++#define CONN_CANFSEND(so) \ ++ (((so)->so_state & (SS_FCANTSENDMORE | SS_ISFCONNECTED)) == SS_ISFCONNECTED) ++#define CONN_CANFRCV(so) \ ++ (((so)->so_state & (SS_FCANTRCVMORE | SS_ISFCONNECTED)) == SS_ISFCONNECTED) ++ ++static void slirp_update_timeout(Slirp *slirp, uint32_t *timeout) ++{ ++ uint32_t t; ++ ++ if (*timeout <= TIMEOUT_FAST) { ++ return; ++ } ++ ++ t = MIN(1000, *timeout); ++ ++ /* If we have tcp timeout with slirp, then we will fill @timeout with ++ * more precise value. ++ */ ++ if (slirp->time_fasttimo) { ++ *timeout = TIMEOUT_FAST; ++ return; ++ } ++ if (slirp->do_slowtimo) { ++ t = MIN(TIMEOUT_SLOW, t); ++ } ++ *timeout = t; ++} ++ ++void slirp_pollfds_fill(Slirp *slirp, uint32_t *timeout, ++ SlirpAddPollCb add_poll, void *opaque) ++{ ++ struct socket *so, *so_next; ++ ++ /* ++ * First, TCP sockets ++ */ ++ ++ /* ++ * *_slowtimo needs calling if there are IP fragments ++ * in the fragment queue, or there are TCP connections active ++ */ ++ slirp->do_slowtimo = ((slirp->tcb.so_next != &slirp->tcb) || ++ (&slirp->ipq.ip_link != slirp->ipq.ip_link.next)); ++ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so_next) { ++ int events = 0; ++ ++ so_next = so->so_next; ++ ++ so->pollfds_idx = -1; ++ ++ /* ++ * See if we need a tcp_fasttimo ++ */ ++ if (slirp->time_fasttimo == 0 && so->so_tcpcb->t_flags & TF_DELACK) { ++ slirp->time_fasttimo = curtime; /* Flag when want a fasttimo */ ++ } ++ ++ /* ++ * NOFDREF can include still connecting to local-host, ++ * newly socreated() sockets etc. Don't want to select these. ++ */ ++ if (so->so_state & SS_NOFDREF || so->s == -1) { ++ continue; ++ } ++ ++ /* ++ * Set for reading sockets which are accepting ++ */ ++ if (so->so_state & SS_FACCEPTCONN) { ++ so->pollfds_idx = add_poll( ++ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); ++ continue; ++ } ++ ++ /* ++ * Set for writing sockets which are connecting ++ */ ++ if (so->so_state & SS_ISFCONNECTING) { ++ so->pollfds_idx = ++ add_poll(so->s, SLIRP_POLL_OUT | SLIRP_POLL_ERR, opaque); ++ continue; ++ } ++ ++ /* ++ * Set for writing if we are connected, can send more, and ++ * we have something to send ++ */ ++ if (CONN_CANFSEND(so) && so->so_rcv.sb_cc) { ++ events |= SLIRP_POLL_OUT | SLIRP_POLL_ERR; ++ } ++ ++ /* ++ * Set for reading (and urgent data) if we are connected, can ++ * receive more, and we have room for it XXX /2 ? ++ */ ++ if (CONN_CANFRCV(so) && ++ (so->so_snd.sb_cc < (so->so_snd.sb_datalen / 2))) { ++ events |= SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR | ++ SLIRP_POLL_PRI; ++ } ++ ++ if (events) { ++ so->pollfds_idx = add_poll(so->s, events, opaque); ++ } ++ } ++ ++ /* ++ * UDP sockets ++ */ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { ++ so_next = so->so_next; ++ ++ so->pollfds_idx = -1; ++ ++ /* ++ * See if it's timed out ++ */ ++ if (so->so_expire) { ++ if (so->so_expire <= curtime) { ++ udp_detach(so); ++ continue; ++ } else { ++ slirp->do_slowtimo = true; /* Let socket expire */ ++ } ++ } ++ ++ /* ++ * When UDP packets are received from over the ++ * link, they're sendto()'d straight away, so ++ * no need for setting for writing ++ * Limit the number of packets queued by this session ++ * to 4. Note that even though we try and limit this ++ * to 4 packets, the session could have more queued ++ * if the packets needed to be fragmented ++ * (XXX <= 4 ?) ++ */ ++ if ((so->so_state & SS_ISFCONNECTED) && so->so_queued <= 4) { ++ so->pollfds_idx = add_poll( ++ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); ++ } ++ } ++ ++ /* ++ * ICMP sockets ++ */ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { ++ so_next = so->so_next; ++ ++ so->pollfds_idx = -1; ++ ++ /* ++ * See if it's timed out ++ */ ++ if (so->so_expire) { ++ if (so->so_expire <= curtime) { ++ icmp_detach(so); ++ continue; ++ } else { ++ slirp->do_slowtimo = true; /* Let socket expire */ ++ } ++ } ++ ++ if (so->so_state & SS_ISFCONNECTED) { ++ so->pollfds_idx = add_poll( ++ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); ++ } ++ } ++ ++ slirp_update_timeout(slirp, timeout); ++} ++ ++void slirp_pollfds_poll(Slirp *slirp, int select_error, ++ SlirpGetREventsCb get_revents, void *opaque) ++{ ++ struct socket *so, *so_next; ++ int ret; ++ ++ curtime = slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS; ++ ++ /* ++ * See if anything has timed out ++ */ ++ if (slirp->time_fasttimo && ++ ((curtime - slirp->time_fasttimo) >= TIMEOUT_FAST)) { ++ tcp_fasttimo(slirp); ++ slirp->time_fasttimo = 0; ++ } ++ if (slirp->do_slowtimo && ++ ((curtime - slirp->last_slowtimo) >= TIMEOUT_SLOW)) { ++ ip_slowtimo(slirp); ++ tcp_slowtimo(slirp); ++ slirp->last_slowtimo = curtime; ++ } ++ ++ /* ++ * Check sockets ++ */ ++ if (!select_error) { ++ /* ++ * Check TCP sockets ++ */ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so_next) { ++ int revents; ++ ++ so_next = so->so_next; ++ ++ revents = 0; ++ if (so->pollfds_idx != -1) { ++ revents = get_revents(so->pollfds_idx, opaque); ++ } ++ ++ if (so->so_state & SS_NOFDREF || so->s == -1) { ++ continue; ++ } ++ ++#ifndef __APPLE__ ++ /* ++ * Check for URG data ++ * This will soread as well, so no need to ++ * test for SLIRP_POLL_IN below if this succeeds. ++ * ++ * This is however disabled on MacOS, which apparently always ++ * reports data as PRI when it is the last data of the ++ * connection. We would then report it out of band, which the guest ++ * would most probably not be ready for. ++ */ ++ if (revents & SLIRP_POLL_PRI) { ++ ret = sorecvoob(so); ++ if (ret < 0) { ++ /* Socket error might have resulted in the socket being ++ * removed, do not try to do anything more with it. */ ++ continue; ++ } ++ } ++ /* ++ * Check sockets for reading ++ */ ++ else ++#endif ++ if (revents & ++ (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR | SLIRP_POLL_PRI)) { ++ /* ++ * Check for incoming connections ++ */ ++ if (so->so_state & SS_FACCEPTCONN) { ++ tcp_connect(so); ++ continue; ++ } /* else */ ++ ret = soread(so); ++ ++ /* Output it if we read something */ ++ if (ret > 0) { ++ tcp_output(sototcpcb(so)); ++ } ++ if (ret < 0) { ++ /* Socket error might have resulted in the socket being ++ * removed, do not try to do anything more with it. */ ++ continue; ++ } ++ } ++ ++ /* ++ * Check sockets for writing ++ */ ++ if (!(so->so_state & SS_NOFDREF) && ++ (revents & (SLIRP_POLL_OUT | SLIRP_POLL_ERR))) { ++ /* ++ * Check for non-blocking, still-connecting sockets ++ */ ++ if (so->so_state & SS_ISFCONNECTING) { ++ /* Connected */ ++ so->so_state &= ~SS_ISFCONNECTING; ++ ++ ret = send(so->s, (const void *)&ret, 0, 0); ++ if (ret < 0) { ++ /* XXXXX Must fix, zero bytes is a NOP */ ++ if (errno == EAGAIN || errno == EWOULDBLOCK || ++ errno == EINPROGRESS || errno == ENOTCONN) { ++ continue; ++ } ++ ++ /* else failed */ ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; ++ } ++ /* else so->so_state &= ~SS_ISFCONNECTING; */ ++ ++ /* ++ * Continue tcp_input ++ */ ++ tcp_input((struct mbuf *)NULL, sizeof(struct ip), so, ++ so->so_ffamily); ++ /* continue; */ ++ } else { ++ ret = sowrite(so); ++ if (ret > 0) { ++ /* Call tcp_output in case we need to send a window ++ * update to the guest, otherwise it will be stuck ++ * until it sends a window probe. */ ++ tcp_output(sototcpcb(so)); ++ } ++ } ++ } ++ } ++ ++ /* ++ * Now UDP sockets. ++ * Incoming packets are sent straight away, they're not buffered. ++ * Incoming UDP data isn't buffered either. ++ */ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { ++ int revents; ++ ++ so_next = so->so_next; ++ ++ revents = 0; ++ if (so->pollfds_idx != -1) { ++ revents = get_revents(so->pollfds_idx, opaque); ++ } ++ ++ if (so->s != -1 && ++ (revents & (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR))) { ++ sorecvfrom(so); ++ } ++ } ++ ++ /* ++ * Check incoming ICMP relies. ++ */ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { ++ int revents; ++ ++ so_next = so->so_next; ++ ++ revents = 0; ++ if (so->pollfds_idx != -1) { ++ revents = get_revents(so->pollfds_idx, opaque); ++ } ++ ++ if (so->s != -1 && ++ (revents & (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR))) { ++ icmp_receive(so); ++ } ++ } ++ } ++ ++ if_start(slirp); ++} ++ ++static void arp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) ++{ ++ const struct slirp_arphdr *ah = ++ (const struct slirp_arphdr *)(pkt + ETH_HLEN); ++ uint8_t arp_reply[MAX(ETH_HLEN + sizeof(struct slirp_arphdr), 64)]; ++ struct ethhdr *reh = (struct ethhdr *)arp_reply; ++ struct slirp_arphdr *rah = (struct slirp_arphdr *)(arp_reply + ETH_HLEN); ++ int ar_op; ++ struct gfwd_list *ex_ptr; ++ ++ if (!slirp->in_enabled) { ++ return; ++ } ++ ++ if (pkt_len < ETH_HLEN + sizeof(struct slirp_arphdr)) { ++ return; /* packet too short */ ++ } ++ ++ ar_op = ntohs(ah->ar_op); ++ switch (ar_op) { ++ case ARPOP_REQUEST: ++ if (ah->ar_tip == ah->ar_sip) { ++ /* Gratuitous ARP */ ++ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); ++ return; ++ } ++ ++ if ((ah->ar_tip & slirp->vnetwork_mask.s_addr) == ++ slirp->vnetwork_addr.s_addr) { ++ if (ah->ar_tip == slirp->vnameserver_addr.s_addr || ++ ah->ar_tip == slirp->vhost_addr.s_addr) ++ goto arp_ok; ++ /* TODO: IPv6 */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ++ ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_addr.s_addr == ah->ar_tip) ++ goto arp_ok; ++ } ++ return; ++ arp_ok: ++ memset(arp_reply, 0, sizeof(arp_reply)); ++ ++ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); ++ ++ /* ARP request for alias/dns mac address */ ++ memcpy(reh->h_dest, pkt + ETH_ALEN, ETH_ALEN); ++ memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4); ++ memcpy(&reh->h_source[2], &ah->ar_tip, 4); ++ reh->h_proto = htons(ETH_P_ARP); ++ ++ rah->ar_hrd = htons(1); ++ rah->ar_pro = htons(ETH_P_IP); ++ rah->ar_hln = ETH_ALEN; ++ rah->ar_pln = 4; ++ rah->ar_op = htons(ARPOP_REPLY); ++ memcpy(rah->ar_sha, reh->h_source, ETH_ALEN); ++ rah->ar_sip = ah->ar_tip; ++ memcpy(rah->ar_tha, ah->ar_sha, ETH_ALEN); ++ rah->ar_tip = ah->ar_sip; ++ slirp_send_packet_all(slirp, arp_reply, sizeof(arp_reply)); ++ } ++ break; ++ case ARPOP_REPLY: ++ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); ++ break; ++ default: ++ break; ++ } ++} ++ ++void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) ++{ ++ struct mbuf *m; ++ int proto; ++ ++ if (pkt_len < ETH_HLEN) ++ return; ++ ++ proto = (((uint16_t)pkt[12]) << 8) + pkt[13]; ++ switch (proto) { ++ case ETH_P_ARP: ++ arp_input(slirp, pkt, pkt_len); ++ break; ++ case ETH_P_IP: ++ case ETH_P_IPV6: ++ m = m_get(slirp); ++ if (!m) ++ return; ++ /* Note: we add 2 to align the IP header on 4 bytes, ++ * and add the margin for the tcpiphdr overhead */ ++ if (M_FREEROOM(m) < pkt_len + TCPIPHDR_DELTA + 2) { ++ m_inc(m, pkt_len + TCPIPHDR_DELTA + 2); ++ } ++ m->m_len = pkt_len + TCPIPHDR_DELTA + 2; ++ memcpy(m->m_data + TCPIPHDR_DELTA + 2, pkt, pkt_len); ++ ++ m->m_data += TCPIPHDR_DELTA + 2 + ETH_HLEN; ++ m->m_len -= TCPIPHDR_DELTA + 2 + ETH_HLEN; ++ ++ if (proto == ETH_P_IP) { ++ ip_input(m); ++ } else if (proto == ETH_P_IPV6) { ++ ip6_input(m); ++ } ++ break; ++ ++ case ETH_P_NCSI: ++ ncsi_input(slirp, pkt, pkt_len); ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++/* Prepare the IPv4 packet to be sent to the ethernet device. Returns 1 if no ++ * packet should be sent, 0 if the packet must be re-queued, 2 if the packet ++ * is ready to go. ++ */ ++static int if_encap4(Slirp *slirp, struct mbuf *ifm, struct ethhdr *eh, ++ uint8_t ethaddr[ETH_ALEN]) ++{ ++ const struct ip *iph = (const struct ip *)ifm->m_data; ++ ++ if (!arp_table_search(slirp, iph->ip_dst.s_addr, ethaddr)) { ++ uint8_t arp_req[ETH_HLEN + sizeof(struct slirp_arphdr)]; ++ struct ethhdr *reh = (struct ethhdr *)arp_req; ++ struct slirp_arphdr *rah = (struct slirp_arphdr *)(arp_req + ETH_HLEN); ++ ++ if (!ifm->resolution_requested) { ++ /* If the client addr is not known, send an ARP request */ ++ memset(reh->h_dest, 0xff, ETH_ALEN); ++ memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4); ++ memcpy(&reh->h_source[2], &slirp->vhost_addr, 4); ++ reh->h_proto = htons(ETH_P_ARP); ++ rah->ar_hrd = htons(1); ++ rah->ar_pro = htons(ETH_P_IP); ++ rah->ar_hln = ETH_ALEN; ++ rah->ar_pln = 4; ++ rah->ar_op = htons(ARPOP_REQUEST); ++ ++ /* source hw addr */ ++ memcpy(rah->ar_sha, special_ethaddr, ETH_ALEN - 4); ++ memcpy(&rah->ar_sha[2], &slirp->vhost_addr, 4); ++ ++ /* source IP */ ++ rah->ar_sip = slirp->vhost_addr.s_addr; ++ ++ /* target hw addr (none) */ ++ memset(rah->ar_tha, 0, ETH_ALEN); ++ ++ /* target IP */ ++ rah->ar_tip = iph->ip_dst.s_addr; ++ slirp->client_ipaddr = iph->ip_dst; ++ slirp_send_packet_all(slirp, arp_req, sizeof(arp_req)); ++ ifm->resolution_requested = true; ++ ++ /* Expire request and drop outgoing packet after 1 second */ ++ ifm->expiration_date = ++ slirp->cb->clock_get_ns(slirp->opaque) + 1000000000ULL; ++ } ++ return 0; ++ } else { ++ memcpy(eh->h_source, special_ethaddr, ETH_ALEN - 4); ++ /* XXX: not correct */ ++ memcpy(&eh->h_source[2], &slirp->vhost_addr, 4); ++ eh->h_proto = htons(ETH_P_IP); ++ ++ /* Send this */ ++ return 2; ++ } ++} ++ ++/* Prepare the IPv6 packet to be sent to the ethernet device. Returns 1 if no ++ * packet should be sent, 0 if the packet must be re-queued, 2 if the packet ++ * is ready to go. ++ */ ++static int if_encap6(Slirp *slirp, struct mbuf *ifm, struct ethhdr *eh, ++ uint8_t ethaddr[ETH_ALEN]) ++{ ++ const struct ip6 *ip6h = mtod(ifm, const struct ip6 *); ++ if (!ndp_table_search(slirp, ip6h->ip_dst, ethaddr)) { ++ if (!ifm->resolution_requested) { ++ ndp_send_ns(slirp, ip6h->ip_dst); ++ ifm->resolution_requested = true; ++ ifm->expiration_date = ++ slirp->cb->clock_get_ns(slirp->opaque) + 1000000000ULL; ++ } ++ return 0; ++ } else { ++ eh->h_proto = htons(ETH_P_IPV6); ++ in6_compute_ethaddr(ip6h->ip_src, eh->h_source); ++ ++ /* Send this */ ++ return 2; ++ } ++} ++ ++/* Output the IP packet to the ethernet device. Returns 0 if the packet must be ++ * re-queued. ++ */ ++int if_encap(Slirp *slirp, struct mbuf *ifm) ++{ ++ uint8_t buf[IF_MTU_MAX + 100]; ++ struct ethhdr *eh = (struct ethhdr *)buf; ++ uint8_t ethaddr[ETH_ALEN]; ++ const struct ip *iph = (const struct ip *)ifm->m_data; ++ int ret; ++ char ethaddr_str[ETH_ADDRSTRLEN]; ++ ++ if (ifm->m_len + ETH_HLEN > sizeof(buf)) { ++ return 1; ++ } ++ ++ switch (iph->ip_v) { ++ case IPVERSION: ++ ret = if_encap4(slirp, ifm, eh, ethaddr); ++ if (ret < 2) { ++ return ret; ++ } ++ break; ++ ++ case IP6VERSION: ++ ret = if_encap6(slirp, ifm, eh, ethaddr); ++ if (ret < 2) { ++ return ret; ++ } ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ memcpy(eh->h_dest, ethaddr, ETH_ALEN); ++ DEBUG_ARG("src = %s", slirp_ether_ntoa(eh->h_source, ethaddr_str, ++ sizeof(ethaddr_str))); ++ DEBUG_ARG("dst = %s", slirp_ether_ntoa(eh->h_dest, ethaddr_str, ++ sizeof(ethaddr_str))); ++ memcpy(buf + sizeof(struct ethhdr), ifm->m_data, ifm->m_len); ++ slirp_send_packet_all(slirp, buf, ifm->m_len + ETH_HLEN); ++ return 1; ++} ++ ++/* Drop host forwarding rule, return 0 if found. */ ++int slirp_remove_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, ++ int host_port) ++{ ++ struct socket *so; ++ struct socket *head = (is_udp ? &slirp->udb : &slirp->tcb); ++ struct sockaddr_in addr; ++ int port = htons(host_port); ++ socklen_t addr_len; ++ ++ for (so = head->so_next; so != head; so = so->so_next) { ++ addr_len = sizeof(addr); ++ if ((so->so_state & SS_HOSTFWD) && ++ getsockname(so->s, (struct sockaddr *)&addr, &addr_len) == 0 && ++ addr_len == sizeof(addr) && ++ addr.sin_family == AF_INET && ++ addr.sin_addr.s_addr == host_addr.s_addr && ++ addr.sin_port == port) { ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++ return 0; ++ } ++ } ++ ++ return -1; ++} ++ ++int slirp_add_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, ++ int host_port, struct in_addr guest_addr, int guest_port) ++{ ++ if (!guest_addr.s_addr) { ++ guest_addr = slirp->vdhcp_startaddr; ++ } ++ if (is_udp) { ++ if (!udp_listen(slirp, host_addr.s_addr, htons(host_port), ++ guest_addr.s_addr, htons(guest_port), SS_HOSTFWD)) ++ return -1; ++ } else { ++ if (!tcp_listen(slirp, host_addr.s_addr, htons(host_port), ++ guest_addr.s_addr, htons(guest_port), SS_HOSTFWD)) ++ return -1; ++ } ++ return 0; ++} ++ ++int slirp_remove_hostxfwd(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ int flags) ++{ ++ struct socket *so; ++ struct socket *head = (flags & SLIRP_HOSTFWD_UDP ? &slirp->udb : &slirp->tcb); ++ struct sockaddr_storage addr; ++ socklen_t addr_len; ++ ++ for (so = head->so_next; so != head; so = so->so_next) { ++ addr_len = sizeof(addr); ++ if ((so->so_state & SS_HOSTFWD) && ++ getsockname(so->s, (struct sockaddr *)&addr, &addr_len) == 0 && ++ sockaddr_equal(&addr, (const struct sockaddr_storage *) haddr)) { ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++ return 0; ++ } ++ } ++ ++ return -1; ++} ++ ++int slirp_add_hostxfwd(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *gaddr, socklen_t gaddrlen, ++ int flags) ++{ ++ struct sockaddr_in gdhcp_addr; ++ int fwd_flags = SS_HOSTFWD; ++ ++ if (flags & SLIRP_HOSTFWD_V6ONLY) ++ fwd_flags |= SS_HOSTFWD_V6ONLY; ++ ++ if (gaddr->sa_family == AF_INET) { ++ const struct sockaddr_in *gaddr_in = (const struct sockaddr_in *) gaddr; ++ ++ if (gaddrlen < sizeof(struct sockaddr_in)) { ++ errno = EINVAL; ++ return -1; ++ } ++ ++ if (!gaddr_in->sin_addr.s_addr) { ++ gdhcp_addr = *gaddr_in; ++ gdhcp_addr.sin_addr = slirp->vdhcp_startaddr; ++ gaddr = (struct sockaddr *) &gdhcp_addr; ++ gaddrlen = sizeof(gdhcp_addr); ++ } ++ } else { ++ if (gaddrlen < sizeof(struct sockaddr_in6)) { ++ errno = EINVAL; ++ return -1; ++ } ++ ++ /* ++ * Libslirp currently only provides a stateless DHCPv6 server, thus ++ * we can't translate "addr-any" to the guest here. Instead, we defer ++ * performing the translation to when it's needed. See ++ * soassign_guest_addr_if_needed(). ++ */ ++ } ++ ++ if (flags & SLIRP_HOSTFWD_UDP) { ++ if (!udpx_listen(slirp, haddr, haddrlen, ++ gaddr, gaddrlen, ++ fwd_flags)) ++ return -1; ++ } else { ++ if (!tcpx_listen(slirp, haddr, haddrlen, ++ gaddr, gaddrlen, ++ fwd_flags)) ++ return -1; ++ } ++ return 0; ++} ++ ++/* TODO: IPv6 */ ++static bool check_guestfwd(Slirp *slirp, struct in_addr *guest_addr, ++ int guest_port) ++{ ++ struct gfwd_list *tmp_ptr; ++ ++ if (!guest_addr->s_addr) { ++ guest_addr->s_addr = slirp->vnetwork_addr.s_addr | ++ (htonl(0x0204) & ~slirp->vnetwork_mask.s_addr); ++ } ++ if ((guest_addr->s_addr & slirp->vnetwork_mask.s_addr) != ++ slirp->vnetwork_addr.s_addr || ++ guest_addr->s_addr == slirp->vhost_addr.s_addr || ++ guest_addr->s_addr == slirp->vnameserver_addr.s_addr) { ++ return false; ++ } ++ ++ /* check if the port is "bound" */ ++ for (tmp_ptr = slirp->guestfwd_list; tmp_ptr; tmp_ptr = tmp_ptr->ex_next) { ++ if (guest_port == tmp_ptr->ex_fport && ++ guest_addr->s_addr == tmp_ptr->ex_addr.s_addr) ++ return false; ++ } ++ ++ return true; ++} ++ ++int slirp_add_exec(Slirp *slirp, const char *cmdline, ++ struct in_addr *guest_addr, int guest_port) ++{ ++ if (!check_guestfwd(slirp, guest_addr, guest_port)) { ++ return -1; ++ } ++ ++ add_exec(&slirp->guestfwd_list, cmdline, *guest_addr, htons(guest_port)); ++ return 0; ++} ++ ++int slirp_add_unix(Slirp *slirp, const char *unixsock, ++ struct in_addr *guest_addr, int guest_port) ++{ ++#ifdef G_OS_UNIX ++ if (!check_guestfwd(slirp, guest_addr, guest_port)) { ++ return -1; ++ } ++ ++ add_unix(&slirp->guestfwd_list, unixsock, *guest_addr, htons(guest_port)); ++ return 0; ++#else ++ g_warn_if_reached(); ++ return -1; ++#endif ++} ++ ++int slirp_add_guestfwd(Slirp *slirp, SlirpWriteCb write_cb, void *opaque, ++ struct in_addr *guest_addr, int guest_port) ++{ ++ if (!check_guestfwd(slirp, guest_addr, guest_port)) { ++ return -1; ++ } ++ ++ add_guestfwd(&slirp->guestfwd_list, write_cb, opaque, *guest_addr, ++ htons(guest_port)); ++ return 0; ++} ++ ++int slirp_remove_guestfwd(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port) ++{ ++ return remove_guestfwd(&slirp->guestfwd_list, guest_addr, ++ htons(guest_port)); ++} ++ ++ssize_t slirp_send(struct socket *so, const void *buf, size_t len, int flags) ++{ ++ if (so->s == -1 && so->guestfwd) { ++ /* XXX this blocks entire thread. Rewrite to use ++ * qemu_chr_fe_write and background I/O callbacks */ ++ so->guestfwd->write_cb(buf, len, so->guestfwd->opaque); ++ return len; ++ } ++ ++ if (so->s == -1) { ++ /* ++ * This should in theory not happen but it is hard to be ++ * sure because some code paths will end up with so->s == -1 ++ * on a failure but don't dispose of the struct socket. ++ * Check specifically, so we don't pass -1 to send(). ++ */ ++ errno = EBADF; ++ return -1; ++ } ++ ++ return send(so->s, buf, len, flags); ++} ++ ++struct socket *slirp_find_ctl_socket(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port) ++{ ++ struct socket *so; ++ ++ /* TODO: IPv6 */ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so->so_next) { ++ if (so->so_faddr.s_addr == guest_addr.s_addr && ++ htons(so->so_fport) == guest_port) { ++ return so; ++ } ++ } ++ return NULL; ++} ++ ++size_t slirp_socket_can_recv(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port) ++{ ++ struct iovec iov[2]; ++ struct socket *so; ++ ++ so = slirp_find_ctl_socket(slirp, guest_addr, guest_port); ++ ++ if (!so || so->so_state & SS_NOFDREF) { ++ return 0; ++ } ++ ++ if (!CONN_CANFRCV(so) || so->so_snd.sb_cc >= (so->so_snd.sb_datalen / 2)) { ++ return 0; ++ } ++ ++ return sopreprbuf(so, iov, NULL); ++} ++ ++void slirp_socket_recv(Slirp *slirp, struct in_addr guest_addr, int guest_port, ++ const uint8_t *buf, int size) ++{ ++ int ret; ++ struct socket *so = slirp_find_ctl_socket(slirp, guest_addr, guest_port); ++ ++ if (!so) ++ return; ++ ++ ret = soreadbuf(so, (const char *)buf, size); ++ ++ if (ret > 0) ++ tcp_output(sototcpcb(so)); ++} ++ ++void slirp_send_packet_all(Slirp *slirp, const void *buf, size_t len) ++{ ++ ssize_t ret = slirp->cb->send_packet(buf, len, slirp->opaque); ++ ++ if (ret < 0) { ++ g_critical("Failed to send packet, ret: %ld", (long)ret); ++ } else if (ret < len) { ++ DEBUG_ERROR("send_packet() didn't send all data: %ld < %lu", (long)ret, ++ (unsigned long)len); ++ } ++} +diff --git a/slirp/src/slirp.h b/slirp/src/slirp.h +new file mode 100644 +index 0000000000..89d79f3de5 +--- /dev/null ++++ b/slirp/src/slirp.h +@@ -0,0 +1,289 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef SLIRP_H ++#define SLIRP_H ++ ++#ifdef _WIN32 ++ ++/* as defined in sdkddkver.h */ ++#ifndef _WIN32_WINNT ++#define _WIN32_WINNT 0x0600 /* Vista */ ++#endif ++/* reduces the number of implicitly included headers */ ++#ifndef WIN32_LEAN_AND_MEAN ++#define WIN32_LEAN_AND_MEAN ++#endif ++ ++#include ++#include ++#include ++#include ++#include ++ ++#else ++#define O_BINARY 0 ++#endif ++ ++#ifndef _WIN32 ++#include ++#include ++#include ++#include ++#include ++#endif ++ ++#ifdef __APPLE__ ++#include ++#endif ++ ++/* Avoid conflicting with the libc insque() and remque(), which ++ have different prototypes. */ ++#define insque slirp_insque ++#define remque slirp_remque ++#define quehead slirp_quehead ++ ++#include "debug.h" ++#include "util.h" ++ ++#include "libslirp.h" ++#include "ip.h" ++#include "ip6.h" ++#include "tcp.h" ++#include "tcp_timer.h" ++#include "tcp_var.h" ++#include "tcpip.h" ++#include "udp.h" ++#include "ip_icmp.h" ++#include "ip6_icmp.h" ++#include "mbuf.h" ++#include "sbuf.h" ++#include "socket.h" ++#include "if.h" ++#include "main.h" ++#include "misc.h" ++ ++#include "bootp.h" ++#include "tftp.h" ++ ++#define ARPOP_REQUEST 1 /* ARP request */ ++#define ARPOP_REPLY 2 /* ARP reply */ ++ ++struct ethhdr { ++ unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ ++ unsigned char h_source[ETH_ALEN]; /* source ether addr */ ++ unsigned short h_proto; /* packet type ID field */ ++}; ++ ++struct slirp_arphdr { ++ unsigned short ar_hrd; /* format of hardware address */ ++ unsigned short ar_pro; /* format of protocol address */ ++ unsigned char ar_hln; /* length of hardware address */ ++ unsigned char ar_pln; /* length of protocol address */ ++ unsigned short ar_op; /* ARP opcode (command) */ ++ ++ /* ++ * Ethernet looks like this : This bit is variable sized however... ++ */ ++ uint8_t ar_sha[ETH_ALEN]; /* sender hardware address */ ++ uint32_t ar_sip; /* sender IP address */ ++ uint8_t ar_tha[ETH_ALEN]; /* target hardware address */ ++ uint32_t ar_tip; /* target IP address */ ++} SLIRP_PACKED; ++ ++#define ARP_TABLE_SIZE 16 ++ ++typedef struct ArpTable { ++ struct slirp_arphdr table[ARP_TABLE_SIZE]; ++ int next_victim; ++} ArpTable; ++ ++void arp_table_add(Slirp *slirp, uint32_t ip_addr, ++ const uint8_t ethaddr[ETH_ALEN]); ++ ++bool arp_table_search(Slirp *slirp, uint32_t ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]); ++ ++struct ndpentry { ++ uint8_t eth_addr[ETH_ALEN]; /* sender hardware address */ ++ struct in6_addr ip_addr; /* sender IP address */ ++}; ++ ++#define NDP_TABLE_SIZE 16 ++ ++typedef struct NdpTable { ++ struct ndpentry table[NDP_TABLE_SIZE]; ++ /* ++ * The table is a cache with old entries overwritten when the table fills. ++ * Preserve the first entry: it is the guest, which is needed for lazy ++ * hostfwd guest address assignment. ++ */ ++ struct in6_addr guest_in6_addr; ++ int next_victim; ++} NdpTable; ++ ++void ndp_table_add(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t ethaddr[ETH_ALEN]); ++bool ndp_table_search(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]); ++ ++struct Slirp { ++ unsigned time_fasttimo; ++ unsigned last_slowtimo; ++ bool do_slowtimo; ++ ++ bool in_enabled, in6_enabled; ++ ++ /* virtual network configuration */ ++ struct in_addr vnetwork_addr; ++ struct in_addr vnetwork_mask; ++ struct in_addr vhost_addr; ++ struct in6_addr vprefix_addr6; ++ uint8_t vprefix_len; ++ struct in6_addr vhost_addr6; ++ struct in_addr vdhcp_startaddr; ++ struct in_addr vnameserver_addr; ++ struct in6_addr vnameserver_addr6; ++ ++ struct in_addr client_ipaddr; ++ char client_hostname[33]; ++ ++ int restricted; ++ struct gfwd_list *guestfwd_list; ++ ++ int if_mtu; ++ int if_mru; ++ ++ bool disable_host_loopback; ++ ++ /* mbuf states */ ++ struct quehead m_freelist; ++ struct quehead m_usedlist; ++ int mbuf_alloced; ++ ++ /* if states */ ++ struct quehead if_fastq; /* fast queue (for interactive data) */ ++ struct quehead if_batchq; /* queue for non-interactive data */ ++ bool if_start_busy; /* avoid if_start recursion */ ++ ++ /* ip states */ ++ struct ipq ipq; /* ip reass. queue */ ++ uint16_t ip_id; /* ip packet ctr, for ids */ ++ ++ /* bootp/dhcp states */ ++ BOOTPClient bootp_clients[NB_BOOTP_CLIENTS]; ++ char *bootp_filename; ++ size_t vdnssearch_len; ++ uint8_t *vdnssearch; ++ char *vdomainname; ++ ++ /* tcp states */ ++ struct socket tcb; ++ struct socket *tcp_last_so; ++ tcp_seq tcp_iss; /* tcp initial send seq # */ ++ uint32_t tcp_now; /* for RFC 1323 timestamps */ ++ ++ /* udp states */ ++ struct socket udb; ++ struct socket *udp_last_so; ++ ++ /* icmp states */ ++ struct socket icmp; ++ struct socket *icmp_last_so; ++ ++ /* tftp states */ ++ char *tftp_prefix; ++ struct tftp_session tftp_sessions[TFTP_SESSIONS_MAX]; ++ char *tftp_server_name; ++ ++ ArpTable arp_table; ++ NdpTable ndp_table; ++ ++ GRand *grand; ++ void *ra_timer; ++ ++ bool enable_emu; ++ ++ const SlirpCb *cb; ++ void *opaque; ++ ++ struct sockaddr_in *outbound_addr; ++ struct sockaddr_in6 *outbound_addr6; ++ bool disable_dns; /* slirp will not redirect/serve any DNS packet */ ++}; ++ ++void if_start(Slirp *); ++ ++int get_dns_addr(struct in_addr *pdns_addr); ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id); ++ ++/* ncsi.c */ ++void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len); ++ ++#ifndef _WIN32 ++#include ++#endif ++ ++ ++extern bool slirp_do_keepalive; ++ ++#define TCP_MAXIDLE (TCPTV_KEEPCNT * TCPTV_KEEPINTVL) ++ ++/* dnssearch.c */ ++int translate_dnssearch(Slirp *s, const char **names); ++ ++/* cksum.c */ ++int cksum(struct mbuf *m, int len); ++int ip6_cksum(struct mbuf *m); ++ ++/* if.c */ ++void if_init(Slirp *); ++void if_output(struct socket *, struct mbuf *); ++ ++/* ip_input.c */ ++void ip_init(Slirp *); ++void ip_cleanup(Slirp *); ++void ip_input(struct mbuf *); ++void ip_slowtimo(Slirp *); ++void ip_stripoptions(register struct mbuf *, struct mbuf *); ++ ++/* ip_output.c */ ++int ip_output(struct socket *, struct mbuf *); ++ ++/* ip6_input.c */ ++void ip6_init(Slirp *); ++void ip6_cleanup(Slirp *); ++void ip6_input(struct mbuf *); ++ ++/* ip6_output */ ++int ip6_output(struct socket *, struct mbuf *, int fast); ++ ++/* tcp_input.c */ ++void tcp_input(register struct mbuf *, int, struct socket *, unsigned short af); ++int tcp_mss(register struct tcpcb *, unsigned); ++ ++/* tcp_output.c */ ++int tcp_output(register struct tcpcb *); ++void tcp_setpersist(register struct tcpcb *); ++ ++/* tcp_subr.c */ ++void tcp_init(Slirp *); ++void tcp_cleanup(Slirp *); ++void tcp_template(struct tcpcb *); ++void tcp_respond(struct tcpcb *, register struct tcpiphdr *, ++ register struct mbuf *, tcp_seq, tcp_seq, int, unsigned short); ++struct tcpcb *tcp_newtcpcb(struct socket *); ++struct tcpcb *tcp_close(register struct tcpcb *); ++void tcp_sockclosed(struct tcpcb *); ++int tcp_fconnect(struct socket *, unsigned short af); ++void tcp_connect(struct socket *); ++void tcp_attach(struct socket *); ++uint8_t tcp_tos(struct socket *); ++int tcp_emu(struct socket *, struct mbuf *); ++int tcp_ctl(struct socket *); ++struct tcpcb *tcp_drop(struct tcpcb *tp, int err); ++ ++struct socket *slirp_find_ctl_socket(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port); ++ ++void slirp_send_packet_all(Slirp *slirp, const void *buf, size_t len); ++ ++#endif +diff --git a/slirp/src/socket.c b/slirp/src/socket.c +new file mode 100644 +index 0000000000..2c1b789d48 +--- /dev/null ++++ b/slirp/src/socket.c +@@ -0,0 +1,1104 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++#ifdef __sun__ ++#include ++#endif ++#ifdef __linux__ ++#include ++#endif ++ ++static void sofcantrcvmore(struct socket *so); ++static void sofcantsendmore(struct socket *so); ++ ++struct socket *solookup(struct socket **last, struct socket *head, ++ struct sockaddr_storage *lhost, ++ struct sockaddr_storage *fhost) ++{ ++ struct socket *so = *last; ++ ++ /* Optimisation */ ++ if (so != head && sockaddr_equal(&(so->lhost.ss), lhost) && ++ (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) { ++ return so; ++ } ++ ++ for (so = head->so_next; so != head; so = so->so_next) { ++ if (sockaddr_equal(&(so->lhost.ss), lhost) && ++ (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) { ++ *last = so; ++ return so; ++ } ++ } ++ ++ return (struct socket *)NULL; ++} ++ ++/* ++ * Create a new socket, initialise the fields ++ * It is the responsibility of the caller to ++ * insque() it into the correct linked-list ++ */ ++struct socket *socreate(Slirp *slirp) ++{ ++ struct socket *so = g_new(struct socket, 1); ++ ++ memset(so, 0, sizeof(struct socket)); ++ so->so_state = SS_NOFDREF; ++ so->s = -1; ++ so->slirp = slirp; ++ so->pollfds_idx = -1; ++ ++ return so; ++} ++ ++/* ++ * Remove references to so from the given message queue. ++ */ ++static void soqfree(struct socket *so, struct quehead *qh) ++{ ++ struct mbuf *ifq; ++ ++ for (ifq = (struct mbuf *)qh->qh_link; (struct quehead *)ifq != qh; ++ ifq = ifq->ifq_next) { ++ if (ifq->ifq_so == so) { ++ struct mbuf *ifm; ++ ifq->ifq_so = NULL; ++ for (ifm = ifq->ifs_next; ifm != ifq; ifm = ifm->ifs_next) { ++ ifm->ifq_so = NULL; ++ } ++ } ++ } ++} ++ ++/* ++ * remque and free a socket, clobber cache ++ */ ++void sofree(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ ++ soqfree(so, &slirp->if_fastq); ++ soqfree(so, &slirp->if_batchq); ++ ++ if (so == slirp->tcp_last_so) { ++ slirp->tcp_last_so = &slirp->tcb; ++ } else if (so == slirp->udp_last_so) { ++ slirp->udp_last_so = &slirp->udb; ++ } else if (so == slirp->icmp_last_so) { ++ slirp->icmp_last_so = &slirp->icmp; ++ } ++ m_free(so->so_m); ++ ++ if (so->so_next && so->so_prev) ++ remque(so); /* crashes if so is not in a queue */ ++ ++ if (so->so_tcpcb) { ++ g_free(so->so_tcpcb); ++ } ++ g_free(so); ++} ++ ++size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np) ++{ ++ int n, lss, total; ++ struct sbuf *sb = &so->so_snd; ++ int len = sb->sb_datalen - sb->sb_cc; ++ int mss = so->so_tcpcb->t_maxseg; ++ ++ DEBUG_CALL("sopreprbuf"); ++ DEBUG_ARG("so = %p", so); ++ ++ if (len <= 0) ++ return 0; ++ ++ iov[0].iov_base = sb->sb_wptr; ++ iov[1].iov_base = NULL; ++ iov[1].iov_len = 0; ++ if (sb->sb_wptr < sb->sb_rptr) { ++ iov[0].iov_len = sb->sb_rptr - sb->sb_wptr; ++ /* Should never succeed, but... */ ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ if (iov[0].iov_len > mss) ++ iov[0].iov_len -= iov[0].iov_len % mss; ++ n = 1; ++ } else { ++ iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr; ++ /* Should never succeed, but... */ ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ len -= iov[0].iov_len; ++ if (len) { ++ iov[1].iov_base = sb->sb_data; ++ iov[1].iov_len = sb->sb_rptr - sb->sb_data; ++ if (iov[1].iov_len > len) ++ iov[1].iov_len = len; ++ total = iov[0].iov_len + iov[1].iov_len; ++ if (total > mss) { ++ lss = total % mss; ++ if (iov[1].iov_len > lss) { ++ iov[1].iov_len -= lss; ++ n = 2; ++ } else { ++ lss -= iov[1].iov_len; ++ iov[0].iov_len -= lss; ++ n = 1; ++ } ++ } else ++ n = 2; ++ } else { ++ if (iov[0].iov_len > mss) ++ iov[0].iov_len -= iov[0].iov_len % mss; ++ n = 1; ++ } ++ } ++ if (np) ++ *np = n; ++ ++ return iov[0].iov_len + (n - 1) * iov[1].iov_len; ++} ++ ++/* ++ * Read from so's socket into sb_snd, updating all relevant sbuf fields ++ * NOTE: This will only be called if it is select()ed for reading, so ++ * a read() of 0 (or less) means it's disconnected ++ */ ++int soread(struct socket *so) ++{ ++ int n, nn; ++ size_t buf_len; ++ struct sbuf *sb = &so->so_snd; ++ struct iovec iov[2]; ++ ++ DEBUG_CALL("soread"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* ++ * No need to check if there's enough room to read. ++ * soread wouldn't have been called if there weren't ++ */ ++ buf_len = sopreprbuf(so, iov, &n); ++ assert(buf_len != 0); ++ ++ nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, 0); ++ if (nn <= 0) { ++ if (nn < 0 && (errno == EINTR || errno == EAGAIN)) ++ return 0; ++ else { ++ int err; ++ socklen_t elen = sizeof err; ++ struct sockaddr_storage addr; ++ struct sockaddr *paddr = (struct sockaddr *)&addr; ++ socklen_t alen = sizeof addr; ++ ++ err = errno; ++ if (nn == 0) { ++ int shutdown_wr = so->so_state & SS_FCANTSENDMORE; ++ ++ if (!shutdown_wr && getpeername(so->s, paddr, &alen) < 0) { ++ err = errno; ++ } else { ++ getsockopt(so->s, SOL_SOCKET, SO_ERROR, &err, &elen); ++ } ++ } ++ ++ DEBUG_MISC(" --- soread() disconnected, nn = %d, errno = %d-%s", nn, ++ errno, strerror(errno)); ++ sofcantrcvmore(so); ++ ++ if (err == ECONNRESET || err == ECONNREFUSED || err == ENOTCONN || ++ err == EPIPE) { ++ tcp_drop(sototcpcb(so), err); ++ } else { ++ tcp_sockclosed(sototcpcb(so)); ++ } ++ return -1; ++ } ++ } ++ ++ /* ++ * If there was no error, try and read the second time round ++ * We read again if n = 2 (ie, there's another part of the buffer) ++ * and we read as much as we could in the first read ++ * We don't test for <= 0 this time, because there legitimately ++ * might not be any more data (since the socket is non-blocking), ++ * a close will be detected on next iteration. ++ * A return of -1 won't (shouldn't) happen, since it didn't happen above ++ */ ++ if (n == 2 && nn == iov[0].iov_len) { ++ int ret; ++ ret = recv(so->s, iov[1].iov_base, iov[1].iov_len, 0); ++ if (ret > 0) ++ nn += ret; ++ } ++ ++ DEBUG_MISC(" ... read nn = %d bytes", nn); ++ ++ /* Update fields */ ++ sb->sb_cc += nn; ++ sb->sb_wptr += nn; ++ if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_wptr -= sb->sb_datalen; ++ return nn; ++} ++ ++int soreadbuf(struct socket *so, const char *buf, int size) ++{ ++ int n, nn, copy = size; ++ struct sbuf *sb = &so->so_snd; ++ struct iovec iov[2]; ++ ++ DEBUG_CALL("soreadbuf"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* ++ * No need to check if there's enough room to read. ++ * soread wouldn't have been called if there weren't ++ */ ++ assert(size > 0); ++ if (sopreprbuf(so, iov, &n) < size) ++ goto err; ++ ++ nn = MIN(iov[0].iov_len, copy); ++ memcpy(iov[0].iov_base, buf, nn); ++ ++ copy -= nn; ++ buf += nn; ++ ++ if (copy == 0) ++ goto done; ++ ++ memcpy(iov[1].iov_base, buf, copy); ++ ++done: ++ /* Update fields */ ++ sb->sb_cc += size; ++ sb->sb_wptr += size; ++ if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_wptr -= sb->sb_datalen; ++ return size; ++err: ++ ++ sofcantrcvmore(so); ++ tcp_sockclosed(sototcpcb(so)); ++ g_critical("soreadbuf buffer too small"); ++ return -1; ++} ++ ++/* ++ * Get urgent data ++ * ++ * When the socket is created, we set it SO_OOBINLINE, ++ * so when OOB data arrives, we soread() it and everything ++ * in the send buffer is sent as urgent data ++ */ ++int sorecvoob(struct socket *so) ++{ ++ struct tcpcb *tp = sototcpcb(so); ++ int ret; ++ ++ DEBUG_CALL("sorecvoob"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* ++ * We take a guess at how much urgent data has arrived. ++ * In most situations, when urgent data arrives, the next ++ * read() should get all the urgent data. This guess will ++ * be wrong however if more data arrives just after the ++ * urgent data, or the read() doesn't return all the ++ * urgent data. ++ */ ++ ret = soread(so); ++ if (ret > 0) { ++ tp->snd_up = tp->snd_una + so->so_snd.sb_cc; ++ tp->t_force = 1; ++ tcp_output(tp); ++ tp->t_force = 0; ++ } ++ ++ return ret; ++} ++ ++/* ++ * Send urgent data ++ * There's a lot duplicated code here, but... ++ */ ++int sosendoob(struct socket *so) ++{ ++ struct sbuf *sb = &so->so_rcv; ++ char buff[2048]; /* XXX Shouldn't be sending more oob data than this */ ++ ++ int n; ++ ++ DEBUG_CALL("sosendoob"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc); ++ ++ if (so->so_urgc > sizeof(buff)) ++ so->so_urgc = sizeof(buff); /* XXXX */ ++ ++ if (sb->sb_rptr < sb->sb_wptr) { ++ /* We can send it directly */ ++ n = slirp_send(so, sb->sb_rptr, so->so_urgc, ++ (MSG_OOB)); /* |MSG_DONTWAIT)); */ ++ } else { ++ /* ++ * Since there's no sendv or sendtov like writev, ++ * we must copy all data to a linear buffer then ++ * send it all ++ */ ++ uint32_t urgc = so->so_urgc; /* Amount of room left in buff */ ++ int len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; ++ if (len > urgc) { ++ len = urgc; ++ } ++ memcpy(buff, sb->sb_rptr, len); ++ urgc -= len; ++ if (urgc) { ++ /* We still have some room for the rest */ ++ n = sb->sb_wptr - sb->sb_data; ++ if (n > urgc) { ++ n = urgc; ++ } ++ memcpy((buff + len), sb->sb_data, n); ++ len += n; ++ } ++ n = slirp_send(so, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */ ++#ifdef DEBUG ++ if (n != len) { ++ DEBUG_ERROR("Didn't send all data urgently XXXXX"); ++ } ++#endif ++ } ++ ++ if (n < 0) { ++ return n; ++ } ++ so->so_urgc -= n; ++ DEBUG_MISC(" ---2 sent %d bytes urgent data, %d urgent bytes left", n, ++ so->so_urgc); ++ ++ sb->sb_cc -= n; ++ sb->sb_rptr += n; ++ if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_rptr -= sb->sb_datalen; ++ ++ return n; ++} ++ ++/* ++ * Write data from so_rcv to so's socket, ++ * updating all sbuf field as necessary ++ */ ++int sowrite(struct socket *so) ++{ ++ int n, nn; ++ struct sbuf *sb = &so->so_rcv; ++ int len = sb->sb_cc; ++ struct iovec iov[2]; ++ ++ DEBUG_CALL("sowrite"); ++ DEBUG_ARG("so = %p", so); ++ ++ if (so->so_urgc) { ++ uint32_t expected = so->so_urgc; ++ if (sosendoob(so) < expected) { ++ /* Treat a short write as a fatal error too, ++ * rather than continuing on and sending the urgent ++ * data as if it were non-urgent and leaving the ++ * so_urgc count wrong. ++ */ ++ goto err_disconnected; ++ } ++ if (sb->sb_cc == 0) ++ return 0; ++ } ++ ++ /* ++ * No need to check if there's something to write, ++ * sowrite wouldn't have been called otherwise ++ */ ++ ++ iov[0].iov_base = sb->sb_rptr; ++ iov[1].iov_base = NULL; ++ iov[1].iov_len = 0; ++ if (sb->sb_rptr < sb->sb_wptr) { ++ iov[0].iov_len = sb->sb_wptr - sb->sb_rptr; ++ /* Should never succeed, but... */ ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ n = 1; ++ } else { ++ iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ len -= iov[0].iov_len; ++ if (len) { ++ iov[1].iov_base = sb->sb_data; ++ iov[1].iov_len = sb->sb_wptr - sb->sb_data; ++ if (iov[1].iov_len > len) ++ iov[1].iov_len = len; ++ n = 2; ++ } else ++ n = 1; ++ } ++ /* Check if there's urgent data to send, and if so, send it */ ++ ++ nn = slirp_send(so, iov[0].iov_base, iov[0].iov_len, 0); ++ /* This should never happen, but people tell me it does *shrug* */ ++ if (nn < 0 && (errno == EAGAIN || errno == EINTR)) ++ return 0; ++ ++ if (nn <= 0) { ++ goto err_disconnected; ++ } ++ ++ if (n == 2 && nn == iov[0].iov_len) { ++ int ret; ++ ret = slirp_send(so, iov[1].iov_base, iov[1].iov_len, 0); ++ if (ret > 0) ++ nn += ret; ++ } ++ DEBUG_MISC(" ... wrote nn = %d bytes", nn); ++ ++ /* Update sbuf */ ++ sb->sb_cc -= nn; ++ sb->sb_rptr += nn; ++ if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_rptr -= sb->sb_datalen; ++ ++ /* ++ * If in DRAIN mode, and there's no more data, set ++ * it CANTSENDMORE ++ */ ++ if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0) ++ sofcantsendmore(so); ++ ++ return nn; ++ ++err_disconnected: ++ DEBUG_MISC(" --- sowrite disconnected, so->so_state = %x, errno = %d", ++ so->so_state, errno); ++ sofcantsendmore(so); ++ tcp_sockclosed(sototcpcb(so)); ++ return -1; ++} ++ ++/* ++ * recvfrom() a UDP socket ++ */ ++void sorecvfrom(struct socket *so) ++{ ++ struct sockaddr_storage addr; ++ struct sockaddr_storage saddr, daddr; ++ socklen_t addrlen = sizeof(struct sockaddr_storage); ++ char buff[256]; ++ ++#ifdef __linux__ ++ ssize_t size; ++ struct msghdr msg; ++ struct iovec iov; ++ char control[1024]; ++ ++ /* First look for errors */ ++ memset(&msg, 0, sizeof(msg)); ++ msg.msg_name = &saddr; ++ msg.msg_namelen = sizeof(saddr); ++ msg.msg_control = control; ++ msg.msg_controllen = sizeof(control); ++ iov.iov_base = buff; ++ iov.iov_len = sizeof(buff); ++ msg.msg_iov = &iov; ++ msg.msg_iovlen = 1; ++ ++ size = recvmsg(so->s, &msg, MSG_ERRQUEUE); ++ if (size >= 0) { ++ struct cmsghdr *cmsg; ++ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { ++ ++ if (cmsg->cmsg_level == IPPROTO_IP && ++ cmsg->cmsg_type == IP_RECVERR) { ++ struct sock_extended_err *ee = ++ (struct sock_extended_err *) CMSG_DATA(cmsg); ++ ++ if (ee->ee_origin == SO_EE_ORIGIN_ICMP) { ++ /* Got an ICMP error, forward it */ ++ struct sockaddr_in *sin; ++ ++ sin = (struct sockaddr_in *) SO_EE_OFFENDER(ee); ++ icmp_forward_error(so->so_m, ee->ee_type, ee->ee_code, ++ 0, NULL, &sin->sin_addr); ++ } ++ } ++ else if (cmsg->cmsg_level == IPPROTO_IPV6 && ++ cmsg->cmsg_type == IPV6_RECVERR) { ++ struct sock_extended_err *ee = ++ (struct sock_extended_err *) CMSG_DATA(cmsg); ++ ++ if (ee->ee_origin == SO_EE_ORIGIN_ICMP6) { ++ /* Got an ICMPv6 error, forward it */ ++ struct sockaddr_in6 *sin6; ++ ++ sin6 = (struct sockaddr_in6 *) SO_EE_OFFENDER(ee); ++ icmp6_forward_error(so->so_m, ee->ee_type, ee->ee_code, ++ &sin6->sin6_addr); ++ } ++ } ++ } ++ return; ++ } ++#endif ++ ++ DEBUG_CALL("sorecvfrom"); ++ DEBUG_ARG("so = %p", so); ++ ++ if (so->so_type == IPPROTO_ICMP) { /* This is a "ping" reply */ ++ int len; ++ ++ len = recvfrom(so->s, buff, 256, 0, (struct sockaddr *)&addr, &addrlen); ++ /* XXX Check if reply is "correct"? */ ++ ++ if (len == -1 || len == 0) { ++ uint8_t code = ICMP_UNREACH_PORT; ++ ++ if (errno == EHOSTUNREACH) ++ code = ICMP_UNREACH_HOST; ++ else if (errno == ENETUNREACH) ++ code = ICMP_UNREACH_NET; ++ ++ DEBUG_MISC(" udp icmp rx errno = %d-%s", errno, strerror(errno)); ++ icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, strerror(errno)); ++ } else { ++ icmp_reflect(so->so_m); ++ so->so_m = NULL; /* Don't m_free() it again! */ ++ } ++ /* No need for this socket anymore, udp_detach it */ ++ udp_detach(so); ++ } else { /* A "normal" UDP packet */ ++ struct mbuf *m; ++ int len; ++#ifdef _WIN32 ++ unsigned long n; ++#else ++ int n; ++#endif ++ ++ if (ioctlsocket(so->s, FIONREAD, &n) != 0) { ++ DEBUG_MISC(" ioctlsocket errno = %d-%s\n", errno, strerror(errno)); ++ return; ++ } ++ ++ m = m_get(so->slirp); ++ if (!m) { ++ return; ++ } ++ switch (so->so_ffamily) { ++ case AF_INET: ++ m->m_data += IF_MAXLINKHDR + sizeof(struct udpiphdr); ++ break; ++ case AF_INET6: ++ m->m_data += ++ IF_MAXLINKHDR + sizeof(struct ip6) + sizeof(struct udphdr); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ /* ++ * XXX Shouldn't FIONREAD packets destined for port 53, ++ * but I don't know the max packet size for DNS lookups ++ */ ++ len = M_FREEROOM(m); ++ /* if (so->so_fport != htons(53)) { */ ++ ++ if (n > len) { ++ n = (m->m_data - m->m_dat) + m->m_len + n + 1; ++ m_inc(m, n); ++ len = M_FREEROOM(m); ++ } ++ /* } */ ++ ++ m->m_len = recvfrom(so->s, m->m_data, len, 0, (struct sockaddr *)&addr, ++ &addrlen); ++ DEBUG_MISC(" did recvfrom %d, errno = %d-%s", m->m_len, errno, ++ strerror(errno)); ++ if (m->m_len < 0) { ++ /* Report error as ICMP */ ++ switch (so->so_lfamily) { ++ uint8_t code; ++ case AF_INET: ++ code = ICMP_UNREACH_PORT; ++ ++ if (errno == EHOSTUNREACH) { ++ code = ICMP_UNREACH_HOST; ++ } else if (errno == ENETUNREACH) { ++ code = ICMP_UNREACH_NET; ++ } ++ ++ DEBUG_MISC(" rx error, tx icmp ICMP_UNREACH:%i", code); ++ icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, ++ strerror(errno)); ++ break; ++ case AF_INET6: ++ code = ICMP6_UNREACH_PORT; ++ ++ if (errno == EHOSTUNREACH) { ++ code = ICMP6_UNREACH_ADDRESS; ++ } else if (errno == ENETUNREACH) { ++ code = ICMP6_UNREACH_NO_ROUTE; ++ } ++ ++ DEBUG_MISC(" rx error, tx icmp6 ICMP_UNREACH:%i", code); ++ icmp6_send_error(so->so_m, ICMP6_UNREACH, code); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ m_free(m); ++ } else { ++ /* ++ * Hack: domain name lookup will be used the most for UDP, ++ * and since they'll only be used once there's no need ++ * for the 4 minute (or whatever) timeout... So we time them ++ * out much quicker (10 seconds for now...) ++ */ ++ if (so->so_expire) { ++ if (so->so_fport == htons(53)) ++ so->so_expire = curtime + SO_EXPIREFAST; ++ else ++ so->so_expire = curtime + SO_EXPIRE; ++ } ++ ++ /* ++ * If this packet was destined for CTL_ADDR, ++ * make it look like that's where it came from ++ */ ++ saddr = addr; ++ sotranslate_in(so, &saddr); ++ ++ /* Perform lazy guest IP address resolution if needed. */ ++ if (so->so_state & SS_HOSTFWD) { ++ if (soassign_guest_addr_if_needed(so) < 0) { ++ DEBUG_MISC(" guest address not available yet"); ++ switch (so->so_lfamily) { ++ case AF_INET: ++ icmp_send_error(so->so_m, ICMP_UNREACH, ++ ICMP_UNREACH_HOST, 0, ++ "guest address not available yet"); ++ break; ++ case AF_INET6: ++ icmp6_send_error(so->so_m, ICMP6_UNREACH, ++ ICMP6_UNREACH_ADDRESS); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ m_free(m); ++ return; ++ } ++ } ++ daddr = so->lhost.ss; ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ udp_output(so, m, (struct sockaddr_in *)&saddr, ++ (struct sockaddr_in *)&daddr, so->so_iptos); ++ break; ++ case AF_INET6: ++ udp6_output(so, m, (struct sockaddr_in6 *)&saddr, ++ (struct sockaddr_in6 *)&daddr); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ } /* rx error */ ++ } /* if ping packet */ ++} ++ ++/* ++ * sendto() a socket ++ */ ++int sosendto(struct socket *so, struct mbuf *m) ++{ ++ int ret; ++ struct sockaddr_storage addr; ++ ++ DEBUG_CALL("sosendto"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ addr = so->fhost.ss; ++ DEBUG_CALL(" sendto()ing)"); ++ if (sotranslate_out(so, &addr) < 0) { ++ return -1; ++ } ++ ++ /* Don't care what port we get */ ++ ret = sendto(so->s, m->m_data, m->m_len, 0, (struct sockaddr *)&addr, ++ sockaddr_size(&addr)); ++ if (ret < 0) ++ return -1; ++ ++ /* ++ * Kill the socket if there's no reply in 4 minutes, ++ * but only if it's an expirable socket ++ */ ++ if (so->so_expire) ++ so->so_expire = curtime + SO_EXPIRE; ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_ISFCONNECTED; /* So that it gets select()ed */ ++ return 0; ++} ++ ++/* ++ * Listen for incoming TCP connections ++ * On failure errno contains the reason. ++ */ ++struct socket *tcpx_listen(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *laddr, socklen_t laddrlen, ++ int flags) ++{ ++ struct socket *so; ++ int s, opt = 1; ++ socklen_t addrlen; ++ ++ DEBUG_CALL("tcpx_listen"); ++ /* AF_INET6 addresses are bigger than AF_INET, so this is big enough. */ ++ char addrstr[INET6_ADDRSTRLEN]; ++ char portstr[6]; ++ int ret; ++ ret = getnameinfo(haddr, haddrlen, addrstr, sizeof(addrstr), portstr, sizeof(portstr), NI_NUMERICHOST|NI_NUMERICSERV); ++ g_assert(ret == 0); ++ DEBUG_ARG("haddr = %s", addrstr); ++ DEBUG_ARG("hport = %s", portstr); ++ ret = getnameinfo(laddr, laddrlen, addrstr, sizeof(addrstr), portstr, sizeof(portstr), NI_NUMERICHOST|NI_NUMERICSERV); ++ g_assert(ret == 0); ++ DEBUG_ARG("laddr = %s", addrstr); ++ DEBUG_ARG("lport = %s", portstr); ++ DEBUG_ARG("flags = %x", flags); ++ ++ /* ++ * SS_HOSTFWD sockets can be accepted multiple times, so they can't be ++ * SS_FACCEPTONCE. Also, SS_HOSTFWD connections can be accepted and ++ * immediately closed if the guest address isn't available yet, which is ++ * incompatible with the "accept once" concept. Correct code will never ++ * request both, so disallow their combination by assertion. ++ */ ++ g_assert(!((flags & SS_HOSTFWD) && (flags & SS_FACCEPTONCE))); ++ ++ so = socreate(slirp); ++ ++ /* Don't tcp_attach... we don't need so_snd nor so_rcv */ ++ so->so_tcpcb = tcp_newtcpcb(so); ++ insque(so, &slirp->tcb); ++ ++ /* ++ * SS_FACCEPTONCE sockets must time out. ++ */ ++ if (flags & SS_FACCEPTONCE) ++ so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT * 2; ++ ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= (SS_FACCEPTCONN | flags); ++ ++ sockaddr_copy(&so->lhost.sa, sizeof(so->lhost), laddr, laddrlen); ++ ++ s = slirp_socket(haddr->sa_family, SOCK_STREAM, 0); ++ if ((s < 0) || ++ (haddr->sa_family == AF_INET6 && slirp_socket_set_v6only(s, (flags & SS_HOSTFWD_V6ONLY) != 0) < 0) || ++ (slirp_socket_set_fast_reuse(s) < 0) || ++ (bind(s, haddr, haddrlen) < 0) || ++ (listen(s, 1) < 0)) { ++ int tmperrno = errno; /* Don't clobber the real reason we failed */ ++ if (s >= 0) { ++ closesocket(s); ++ } ++ sofree(so); ++ /* Restore the real errno */ ++#ifdef _WIN32 ++ WSASetLastError(tmperrno); ++#else ++ errno = tmperrno; ++#endif ++ return NULL; ++ } ++ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); ++ slirp_socket_set_nodelay(s); ++ ++ addrlen = sizeof(so->fhost); ++ getsockname(s, &so->fhost.sa, &addrlen); ++ sotranslate_accept(so); ++ ++ so->s = s; ++ return so; ++} ++ ++struct socket *tcp_listen(Slirp *slirp, uint32_t haddr, unsigned hport, ++ uint32_t laddr, unsigned lport, int flags) ++{ ++ struct sockaddr_in hsa, lsa; ++ ++ memset(&hsa, 0, sizeof(hsa)); ++ hsa.sin_family = AF_INET; ++ hsa.sin_addr.s_addr = haddr; ++ hsa.sin_port = hport; ++ ++ memset(&lsa, 0, sizeof(lsa)); ++ lsa.sin_family = AF_INET; ++ lsa.sin_addr.s_addr = laddr; ++ lsa.sin_port = lport; ++ ++ return tcpx_listen(slirp, (const struct sockaddr *) &hsa, sizeof(hsa), (struct sockaddr *) &lsa, sizeof(lsa), flags); ++} ++ ++/* ++ * Various session state calls ++ * XXX Should be #define's ++ * The socket state stuff needs work, these often get call 2 or 3 ++ * times each when only 1 was needed ++ */ ++void soisfconnecting(struct socket *so) ++{ ++ so->so_state &= ~(SS_NOFDREF | SS_ISFCONNECTED | SS_FCANTRCVMORE | ++ SS_FCANTSENDMORE | SS_FWDRAIN); ++ so->so_state |= SS_ISFCONNECTING; /* Clobber other states */ ++} ++ ++void soisfconnected(struct socket *so) ++{ ++ so->so_state &= ~(SS_ISFCONNECTING | SS_FWDRAIN | SS_NOFDREF); ++ so->so_state |= SS_ISFCONNECTED; /* Clobber other states */ ++} ++ ++static void sofcantrcvmore(struct socket *so) ++{ ++ if ((so->so_state & SS_NOFDREF) == 0) { ++ shutdown(so->s, 0); ++ } ++ so->so_state &= ~(SS_ISFCONNECTING); ++ if (so->so_state & SS_FCANTSENDMORE) { ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; /* Don't select it */ ++ } else { ++ so->so_state |= SS_FCANTRCVMORE; ++ } ++} ++ ++static void sofcantsendmore(struct socket *so) ++{ ++ if ((so->so_state & SS_NOFDREF) == 0) { ++ shutdown(so->s, 1); /* send FIN to fhost */ ++ } ++ so->so_state &= ~(SS_ISFCONNECTING); ++ if (so->so_state & SS_FCANTRCVMORE) { ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; /* as above */ ++ } else { ++ so->so_state |= SS_FCANTSENDMORE; ++ } ++} ++ ++/* ++ * Set write drain mode ++ * Set CANTSENDMORE once all data has been write()n ++ */ ++void sofwdrain(struct socket *so) ++{ ++ if (so->so_rcv.sb_cc) ++ so->so_state |= SS_FWDRAIN; ++ else ++ sofcantsendmore(so); ++} ++ ++static bool sotranslate_out4(Slirp *s, struct socket *so, struct sockaddr_in *sin) ++{ ++ if (!s->disable_dns && so->so_faddr.s_addr == s->vnameserver_addr.s_addr) { ++ return so->so_fport == htons(53) && get_dns_addr(&sin->sin_addr) >= 0; ++ } ++ ++ if (so->so_faddr.s_addr == s->vhost_addr.s_addr || ++ so->so_faddr.s_addr == 0xffffffff) { ++ if (s->disable_host_loopback) { ++ return false; ++ } ++ ++ sin->sin_addr = loopback_addr; ++ } ++ ++ return true; ++} ++ ++static bool sotranslate_out6(Slirp *s, struct socket *so, struct sockaddr_in6 *sin) ++{ ++ if (!s->disable_dns && in6_equal(&so->so_faddr6, &s->vnameserver_addr6)) { ++ uint32_t scope_id; ++ if (so->so_fport == htons(53) && get_dns6_addr(&sin->sin6_addr, &scope_id) >= 0) { ++ sin->sin6_scope_id = scope_id; ++ return true; ++ } ++ return false; ++ } ++ ++ if (in6_equal_net(&so->so_faddr6, &s->vprefix_addr6, s->vprefix_len) || ++ in6_equal(&so->so_faddr6, &(struct in6_addr)ALLNODES_MULTICAST)) { ++ if (s->disable_host_loopback) { ++ return false; ++ } ++ ++ sin->sin6_addr = in6addr_loopback; ++ } ++ ++ return true; ++} ++ ++ ++/* ++ * Translate addr in host addr when it is a virtual address ++ */ ++int sotranslate_out(struct socket *so, struct sockaddr_storage *addr) ++{ ++ bool ok = true; ++ ++ switch (addr->ss_family) { ++ case AF_INET: ++ ok = sotranslate_out4(so->slirp, so, (struct sockaddr_in *)addr); ++ break; ++ case AF_INET6: ++ ok = sotranslate_out6(so->slirp, so, (struct sockaddr_in6 *)addr); ++ break; ++ } ++ ++ if (!ok) { ++ errno = EPERM; ++ return -1; ++ } ++ ++ return 0; ++} ++ ++void sotranslate_in(struct socket *so, struct sockaddr_storage *addr) ++{ ++ Slirp *slirp = so->slirp; ++ struct sockaddr_in *sin = (struct sockaddr_in *)addr; ++ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; ++ ++ switch (addr->ss_family) { ++ case AF_INET: ++ if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == ++ slirp->vnetwork_addr.s_addr) { ++ uint32_t inv_mask = ~slirp->vnetwork_mask.s_addr; ++ ++ if ((so->so_faddr.s_addr & inv_mask) == inv_mask) { ++ sin->sin_addr = slirp->vhost_addr; ++ } else if (sin->sin_addr.s_addr == loopback_addr.s_addr || ++ so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { ++ sin->sin_addr = so->so_faddr; ++ } ++ } ++ break; ++ ++ case AF_INET6: ++ if (in6_equal_net(&so->so_faddr6, &slirp->vprefix_addr6, ++ slirp->vprefix_len)) { ++ if (in6_equal(&sin6->sin6_addr, &in6addr_loopback) || ++ !in6_equal(&so->so_faddr6, &slirp->vhost_addr6)) { ++ sin6->sin6_addr = so->so_faddr6; ++ } ++ } ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++/* ++ * Translate connections from localhost to the real hostname ++ */ ++void sotranslate_accept(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ if (so->so_faddr.s_addr == INADDR_ANY || ++ (so->so_faddr.s_addr & loopback_mask) == ++ (loopback_addr.s_addr & loopback_mask)) { ++ so->so_faddr = slirp->vhost_addr; ++ } ++ break; ++ ++ case AF_INET6: ++ if (in6_equal(&so->so_faddr6, &in6addr_any) || ++ in6_equal(&so->so_faddr6, &in6addr_loopback)) { ++ so->so_faddr6 = slirp->vhost_addr6; ++ } ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++void sodrop(struct socket *s, int num) ++{ ++ if (sbdrop(&s->so_snd, num)) { ++ s->slirp->cb->notify(s->slirp->opaque); ++ } ++} ++ ++/* ++ * Translate "addr-any" in so->lhost to the guest's actual address. ++ * Returns 0 for success, or -1 if the guest doesn't have an address yet ++ * with errno set to EHOSTUNREACH. ++ * ++ * The guest address is taken from the first entry in the ARP table for IPv4 ++ * and the first entry in the NDP table for IPv6. ++ * Note: The IPv4 path isn't exercised yet as all hostfwd "" guest translations ++ * are handled immediately by using slirp->vdhcp_startaddr. ++ */ ++int soassign_guest_addr_if_needed(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ /* AF_INET6 addresses are bigger than AF_INET, so this is big enough. */ ++ char addrstr[INET6_ADDRSTRLEN]; ++ char portstr[6]; ++ ++ g_assert(so->so_state & SS_HOSTFWD); ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ if (so->so_laddr.s_addr == INADDR_ANY) { ++ g_assert_not_reached(); ++ } ++ break; ++ ++ case AF_INET6: ++ if (in6_zero(&so->so_laddr6)) { ++ int ret; ++ if (in6_zero(&slirp->ndp_table.guest_in6_addr)) { ++ errno = EHOSTUNREACH; ++ return -1; ++ } ++ so->so_laddr6 = slirp->ndp_table.guest_in6_addr; ++ ret = getnameinfo((const struct sockaddr *) &so->lhost.ss, ++ sizeof(so->lhost.ss), addrstr, sizeof(addrstr), ++ portstr, sizeof(portstr), ++ NI_NUMERICHOST|NI_NUMERICSERV); ++ g_assert(ret == 0); ++ DEBUG_MISC("%s: new ip = [%s]:%s", __func__, addrstr, portstr); ++ } ++ break; ++ ++ default: ++ break; ++ } ++ ++ return 0; ++} +diff --git a/slirp/src/socket.h b/slirp/src/socket.h +new file mode 100644 +index 0000000000..a73175dc29 +--- /dev/null ++++ b/slirp/src/socket.h +@@ -0,0 +1,186 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef SLIRP_SOCKET_H ++#define SLIRP_SOCKET_H ++ ++#include "misc.h" ++#include "sbuf.h" ++ ++#define SO_EXPIRE 240000 ++#define SO_EXPIREFAST 10000 ++ ++/* Helps unify some in/in6 routines. */ ++union in4or6_addr { ++ struct in_addr addr4; ++ struct in6_addr addr6; ++}; ++typedef union in4or6_addr in4or6_addr; ++ ++/* ++ * Our socket structure ++ */ ++ ++union slirp_sockaddr { ++ struct sockaddr sa; ++ struct sockaddr_storage ss; ++ struct sockaddr_in sin; ++ struct sockaddr_in6 sin6; ++}; ++ ++struct socket { ++ struct socket *so_next, *so_prev; /* For a linked list of sockets */ ++ ++ int s; /* The actual socket */ ++ struct gfwd_list *guestfwd; ++ ++ int pollfds_idx; /* GPollFD GArray index */ ++ ++ Slirp *slirp; /* managing slirp instance */ ++ ++ /* XXX union these with not-yet-used sbuf params */ ++ struct mbuf *so_m; /* Pointer to the original SYN packet, ++ * for non-blocking connect()'s, and ++ * PING reply's */ ++ struct tcpiphdr *so_ti; /* Pointer to the original ti within ++ * so_mconn, for non-blocking connections */ ++ uint32_t so_urgc; ++ union slirp_sockaddr fhost; /* Foreign host */ ++#define so_faddr fhost.sin.sin_addr ++#define so_fport fhost.sin.sin_port ++#define so_faddr6 fhost.sin6.sin6_addr ++#define so_fport6 fhost.sin6.sin6_port ++#define so_ffamily fhost.ss.ss_family ++ ++ union slirp_sockaddr lhost; /* Local host */ ++#define so_laddr lhost.sin.sin_addr ++#define so_lport lhost.sin.sin_port ++#define so_laddr6 lhost.sin6.sin6_addr ++#define so_lport6 lhost.sin6.sin6_port ++#define so_lfamily lhost.ss.ss_family ++ ++ uint8_t so_iptos; /* Type of service */ ++ uint8_t so_emu; /* Is the socket emulated? */ ++ ++ uint8_t so_type; /* Type of socket, UDP or TCP */ ++ int32_t so_state; /* internal state flags SS_*, below */ ++ ++ struct tcpcb *so_tcpcb; /* pointer to TCP protocol control block */ ++ unsigned so_expire; /* When the socket will expire */ ++ ++ int so_queued; /* Number of packets queued from this socket */ ++ int so_nqueued; /* Number of packets queued in a row ++ * Used to determine when to "downgrade" a session ++ * from fastq to batchq */ ++ ++ struct sbuf so_rcv; /* Receive buffer */ ++ struct sbuf so_snd; /* Send buffer */ ++}; ++ ++ ++/* ++ * Socket state bits. (peer means the host on the Internet, ++ * local host means the host on the other end of the modem) ++ */ ++#define SS_NOFDREF 0x001 /* No fd reference */ ++ ++#define SS_ISFCONNECTING \ ++ 0x002 /* Socket is connecting to peer (non-blocking connect()'s) */ ++#define SS_ISFCONNECTED 0x004 /* Socket is connected to peer */ ++#define SS_FCANTRCVMORE \ ++ 0x008 /* Socket can't receive more from peer (for half-closes) */ ++#define SS_FCANTSENDMORE \ ++ 0x010 /* Socket can't send more to peer (for half-closes) */ ++#define SS_FWDRAIN \ ++ 0x040 /* We received a FIN, drain data and set SS_FCANTSENDMORE */ ++ ++#define SS_CTL 0x080 ++#define SS_FACCEPTCONN \ ++ 0x100 /* Socket is accepting connections from a host on the internet */ ++#define SS_FACCEPTONCE \ ++ 0x200 /* If set, the SS_FACCEPTCONN socket will die after one accept */ ++ ++#define SS_PERSISTENT_MASK 0xf000 /* Unremovable state bits */ ++#define SS_HOSTFWD 0x1000 /* Socket describes host->guest forwarding */ ++#define SS_INCOMING \ ++ 0x2000 /* Connection was initiated by a host on the internet */ ++#define SS_HOSTFWD_V6ONLY 0x4000 /* Only bind on v6 addresses */ ++ ++static inline int sockaddr_equal(const struct sockaddr_storage *a, ++ const struct sockaddr_storage *b) ++{ ++ if (a->ss_family != b->ss_family) { ++ return 0; ++ } ++ ++ switch (a->ss_family) { ++ case AF_INET: { ++ const struct sockaddr_in *a4 = (const struct sockaddr_in *)a; ++ const struct sockaddr_in *b4 = (const struct sockaddr_in *)b; ++ return a4->sin_addr.s_addr == b4->sin_addr.s_addr && ++ a4->sin_port == b4->sin_port; ++ } ++ case AF_INET6: { ++ const struct sockaddr_in6 *a6 = (const struct sockaddr_in6 *)a; ++ const struct sockaddr_in6 *b6 = (const struct sockaddr_in6 *)b; ++ return (in6_equal(&a6->sin6_addr, &b6->sin6_addr) && ++ a6->sin6_port == b6->sin6_port); ++ } ++ default: ++ g_assert_not_reached(); ++ } ++ ++ return 0; ++} ++ ++static inline socklen_t sockaddr_size(const struct sockaddr_storage *a) ++{ ++ switch (a->ss_family) { ++ case AF_INET: ++ return sizeof(struct sockaddr_in); ++ case AF_INET6: ++ return sizeof(struct sockaddr_in6); ++ default: ++ g_assert_not_reached(); ++ } ++} ++ ++static inline void sockaddr_copy(struct sockaddr *dst, socklen_t dstlen, const struct sockaddr *src, socklen_t srclen) ++{ ++ socklen_t len = sockaddr_size((const struct sockaddr_storage *) src); ++ g_assert(len <= srclen); ++ g_assert(len <= dstlen); ++ memcpy(dst, src, len); ++} ++ ++struct socket *solookup(struct socket **, struct socket *, ++ struct sockaddr_storage *, struct sockaddr_storage *); ++struct socket *socreate(Slirp *); ++void sofree(struct socket *); ++int soread(struct socket *); ++int sorecvoob(struct socket *); ++int sosendoob(struct socket *); ++int sowrite(struct socket *); ++void sorecvfrom(struct socket *); ++int sosendto(struct socket *, struct mbuf *); ++struct socket *tcp_listen(Slirp *, uint32_t, unsigned, uint32_t, unsigned, int); ++struct socket *tcpx_listen(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *laddr, socklen_t laddrlen, ++ int flags); ++void soisfconnecting(register struct socket *); ++void soisfconnected(register struct socket *); ++void sofwdrain(struct socket *); ++struct iovec; /* For win32 */ ++size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np); ++int soreadbuf(struct socket *so, const char *buf, int size); ++ ++int sotranslate_out(struct socket *, struct sockaddr_storage *); ++void sotranslate_in(struct socket *, struct sockaddr_storage *); ++void sotranslate_accept(struct socket *); ++void sodrop(struct socket *, int num); ++int soassign_guest_addr_if_needed(struct socket *so); ++ ++#endif /* SLIRP_SOCKET_H */ +diff --git a/slirp/src/state.c b/slirp/src/state.c +new file mode 100644 +index 0000000000..22af77b256 +--- /dev/null ++++ b/slirp/src/state.c +@@ -0,0 +1,379 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * libslirp ++ * ++ * Copyright (c) 2004-2008 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "slirp.h" ++#include "vmstate.h" ++#include "stream.h" ++ ++static int slirp_tcp_post_load(void *opaque, int version) ++{ ++ tcp_template((struct tcpcb *)opaque); ++ ++ return 0; ++} ++ ++static const VMStateDescription vmstate_slirp_tcp = { ++ .name = "slirp-tcp", ++ .version_id = 0, ++ .post_load = slirp_tcp_post_load, ++ .fields = (VMStateField[]){ VMSTATE_INT16(t_state, struct tcpcb), ++ VMSTATE_INT16_ARRAY(t_timer, struct tcpcb, ++ TCPT_NTIMERS), ++ VMSTATE_INT16(t_rxtshift, struct tcpcb), ++ VMSTATE_INT16(t_rxtcur, struct tcpcb), ++ VMSTATE_INT16(t_dupacks, struct tcpcb), ++ VMSTATE_UINT16(t_maxseg, struct tcpcb), ++ VMSTATE_UINT8(t_force, struct tcpcb), ++ VMSTATE_UINT16(t_flags, struct tcpcb), ++ VMSTATE_UINT32(snd_una, struct tcpcb), ++ VMSTATE_UINT32(snd_nxt, struct tcpcb), ++ VMSTATE_UINT32(snd_up, struct tcpcb), ++ VMSTATE_UINT32(snd_wl1, struct tcpcb), ++ VMSTATE_UINT32(snd_wl2, struct tcpcb), ++ VMSTATE_UINT32(iss, struct tcpcb), ++ VMSTATE_UINT32(snd_wnd, struct tcpcb), ++ VMSTATE_UINT32(rcv_wnd, struct tcpcb), ++ VMSTATE_UINT32(rcv_nxt, struct tcpcb), ++ VMSTATE_UINT32(rcv_up, struct tcpcb), ++ VMSTATE_UINT32(irs, struct tcpcb), ++ VMSTATE_UINT32(rcv_adv, struct tcpcb), ++ VMSTATE_UINT32(snd_max, struct tcpcb), ++ VMSTATE_UINT32(snd_cwnd, struct tcpcb), ++ VMSTATE_UINT32(snd_ssthresh, struct tcpcb), ++ VMSTATE_INT16(t_idle, struct tcpcb), ++ VMSTATE_INT16(t_rtt, struct tcpcb), ++ VMSTATE_UINT32(t_rtseq, struct tcpcb), ++ VMSTATE_INT16(t_srtt, struct tcpcb), ++ VMSTATE_INT16(t_rttvar, struct tcpcb), ++ VMSTATE_UINT16(t_rttmin, struct tcpcb), ++ VMSTATE_UINT32(max_sndwnd, struct tcpcb), ++ VMSTATE_UINT8(t_oobflags, struct tcpcb), ++ VMSTATE_UINT8(t_iobc, struct tcpcb), ++ VMSTATE_INT16(t_softerror, struct tcpcb), ++ VMSTATE_UINT8(snd_scale, struct tcpcb), ++ VMSTATE_UINT8(rcv_scale, struct tcpcb), ++ VMSTATE_UINT8(request_r_scale, struct tcpcb), ++ VMSTATE_UINT8(requested_s_scale, struct tcpcb), ++ VMSTATE_UINT32(ts_recent, struct tcpcb), ++ VMSTATE_UINT32(ts_recent_age, struct tcpcb), ++ VMSTATE_UINT32(last_ack_sent, struct tcpcb), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++/* The sbuf has a pair of pointers that are migrated as offsets; ++ * we calculate the offsets and restore the pointers using ++ * pre_save/post_load on a tmp structure. ++ */ ++struct sbuf_tmp { ++ struct sbuf *parent; ++ uint32_t roff, woff; ++}; ++ ++static int sbuf_tmp_pre_save(void *opaque) ++{ ++ struct sbuf_tmp *tmp = opaque; ++ tmp->woff = tmp->parent->sb_wptr - tmp->parent->sb_data; ++ tmp->roff = tmp->parent->sb_rptr - tmp->parent->sb_data; ++ ++ return 0; ++} ++ ++static int sbuf_tmp_post_load(void *opaque, int version) ++{ ++ struct sbuf_tmp *tmp = opaque; ++ uint32_t requested_len = tmp->parent->sb_datalen; ++ ++ /* Allocate the buffer space used by the field after the tmp */ ++ sbreserve(tmp->parent, tmp->parent->sb_datalen); ++ ++ if (tmp->woff >= requested_len || tmp->roff >= requested_len) { ++ g_critical("invalid sbuf offsets r/w=%u/%u len=%u", tmp->roff, ++ tmp->woff, requested_len); ++ return -EINVAL; ++ } ++ ++ tmp->parent->sb_wptr = tmp->parent->sb_data + tmp->woff; ++ tmp->parent->sb_rptr = tmp->parent->sb_data + tmp->roff; ++ ++ return 0; ++} ++ ++ ++static const VMStateDescription vmstate_slirp_sbuf_tmp = { ++ .name = "slirp-sbuf-tmp", ++ .post_load = sbuf_tmp_post_load, ++ .pre_save = sbuf_tmp_pre_save, ++ .version_id = 0, ++ .fields = (VMStateField[]){ VMSTATE_UINT32(woff, struct sbuf_tmp), ++ VMSTATE_UINT32(roff, struct sbuf_tmp), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_sbuf = { ++ .name = "slirp-sbuf", ++ .version_id = 0, ++ .fields = (VMStateField[]){ VMSTATE_UINT32(sb_cc, struct sbuf), ++ VMSTATE_UINT32(sb_datalen, struct sbuf), ++ VMSTATE_WITH_TMP(struct sbuf, struct sbuf_tmp, ++ vmstate_slirp_sbuf_tmp), ++ VMSTATE_VBUFFER_UINT32(sb_data, struct sbuf, 0, ++ NULL, sb_datalen), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static bool slirp_older_than_v4(void *opaque, int version_id) ++{ ++ return version_id < 4; ++} ++ ++static bool slirp_family_inet(void *opaque, int version_id) ++{ ++ union slirp_sockaddr *ssa = (union slirp_sockaddr *)opaque; ++ return ssa->ss.ss_family == AF_INET; ++} ++ ++static int slirp_socket_pre_load(void *opaque) ++{ ++ struct socket *so = opaque; ++ ++ tcp_attach(so); ++ /* Older versions don't load these fields */ ++ so->so_ffamily = AF_INET; ++ so->so_lfamily = AF_INET; ++ return 0; ++} ++ ++#ifndef _WIN32 ++#define VMSTATE_SIN4_ADDR(f, s, t) VMSTATE_UINT32_TEST(f, s, t) ++#else ++/* Win uses u_long rather than uint32_t - but it's still 32bits long */ ++#define VMSTATE_SIN4_ADDR(f, s, t) \ ++ VMSTATE_SINGLE_TEST(f, s, t, 0, slirp_vmstate_info_uint32, u_long) ++#endif ++ ++/* The OS provided ss_family field isn't that portable; it's size ++ * and type varies (16/8 bit, signed, unsigned) ++ * and the values it contains aren't fully portable. ++ */ ++typedef struct SS_FamilyTmpStruct { ++ union slirp_sockaddr *parent; ++ uint16_t portable_family; ++} SS_FamilyTmpStruct; ++ ++#define SS_FAMILY_MIG_IPV4 2 /* Linux, BSD, Win... */ ++#define SS_FAMILY_MIG_IPV6 10 /* Linux */ ++#define SS_FAMILY_MIG_OTHER 0xffff ++ ++static int ss_family_pre_save(void *opaque) ++{ ++ SS_FamilyTmpStruct *tss = opaque; ++ ++ tss->portable_family = SS_FAMILY_MIG_OTHER; ++ ++ if (tss->parent->ss.ss_family == AF_INET) { ++ tss->portable_family = SS_FAMILY_MIG_IPV4; ++ } else if (tss->parent->ss.ss_family == AF_INET6) { ++ tss->portable_family = SS_FAMILY_MIG_IPV6; ++ } ++ ++ return 0; ++} ++ ++static int ss_family_post_load(void *opaque, int version_id) ++{ ++ SS_FamilyTmpStruct *tss = opaque; ++ ++ switch (tss->portable_family) { ++ case SS_FAMILY_MIG_IPV4: ++ tss->parent->ss.ss_family = AF_INET; ++ break; ++ case SS_FAMILY_MIG_IPV6: ++ case 23: /* compatibility: AF_INET6 from mingw */ ++ case 28: /* compatibility: AF_INET6 from FreeBSD sys/socket.h */ ++ tss->parent->ss.ss_family = AF_INET6; ++ break; ++ default: ++ g_critical("invalid ss_family type %x", tss->portable_family); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static const VMStateDescription vmstate_slirp_ss_family = { ++ .name = "slirp-socket-addr/ss_family", ++ .pre_save = ss_family_pre_save, ++ .post_load = ss_family_post_load, ++ .fields = ++ (VMStateField[]){ VMSTATE_UINT16(portable_family, SS_FamilyTmpStruct), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_socket_addr = { ++ .name = "slirp-socket-addr", ++ .version_id = 4, ++ .fields = ++ (VMStateField[]){ ++ VMSTATE_WITH_TMP(union slirp_sockaddr, SS_FamilyTmpStruct, ++ vmstate_slirp_ss_family), ++ VMSTATE_SIN4_ADDR(sin.sin_addr.s_addr, union slirp_sockaddr, ++ slirp_family_inet), ++ VMSTATE_UINT16_TEST(sin.sin_port, union slirp_sockaddr, ++ slirp_family_inet), ++ ++#if 0 ++ /* Untested: Needs checking by someone with IPv6 test */ ++ VMSTATE_BUFFER_TEST(sin6.sin6_addr, union slirp_sockaddr, ++ slirp_family_inet6), ++ VMSTATE_UINT16_TEST(sin6.sin6_port, union slirp_sockaddr, ++ slirp_family_inet6), ++ VMSTATE_UINT32_TEST(sin6.sin6_flowinfo, union slirp_sockaddr, ++ slirp_family_inet6), ++ VMSTATE_UINT32_TEST(sin6.sin6_scope_id, union slirp_sockaddr, ++ slirp_family_inet6), ++#endif ++ ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_socket = { ++ .name = "slirp-socket", ++ .version_id = 4, ++ .pre_load = slirp_socket_pre_load, ++ .fields = ++ (VMStateField[]){ ++ VMSTATE_UINT32(so_urgc, struct socket), ++ /* Pre-v4 versions */ ++ VMSTATE_SIN4_ADDR(so_faddr.s_addr, struct socket, ++ slirp_older_than_v4), ++ VMSTATE_SIN4_ADDR(so_laddr.s_addr, struct socket, ++ slirp_older_than_v4), ++ VMSTATE_UINT16_TEST(so_fport, struct socket, slirp_older_than_v4), ++ VMSTATE_UINT16_TEST(so_lport, struct socket, slirp_older_than_v4), ++ /* v4 and newer */ ++ VMSTATE_STRUCT(fhost, struct socket, 4, vmstate_slirp_socket_addr, ++ union slirp_sockaddr), ++ VMSTATE_STRUCT(lhost, struct socket, 4, vmstate_slirp_socket_addr, ++ union slirp_sockaddr), ++ ++ VMSTATE_UINT8(so_iptos, struct socket), ++ VMSTATE_UINT8(so_emu, struct socket), ++ VMSTATE_UINT8(so_type, struct socket), ++ VMSTATE_INT32(so_state, struct socket), ++ VMSTATE_STRUCT(so_rcv, struct socket, 0, vmstate_slirp_sbuf, ++ struct sbuf), ++ VMSTATE_STRUCT(so_snd, struct socket, 0, vmstate_slirp_sbuf, ++ struct sbuf), ++ VMSTATE_STRUCT_POINTER(so_tcpcb, struct socket, vmstate_slirp_tcp, ++ struct tcpcb), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_bootp_client = { ++ .name = "slirp_bootpclient", ++ .fields = (VMStateField[]){ VMSTATE_UINT16(allocated, BOOTPClient), ++ VMSTATE_BUFFER(macaddr, BOOTPClient), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp = { ++ .name = "slirp", ++ .version_id = 4, ++ .fields = (VMStateField[]){ VMSTATE_UINT16_V(ip_id, Slirp, 2), ++ VMSTATE_STRUCT_ARRAY( ++ bootp_clients, Slirp, NB_BOOTP_CLIENTS, 3, ++ vmstate_slirp_bootp_client, BOOTPClient), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++void slirp_state_save(Slirp *slirp, SlirpWriteCb write_cb, void *opaque) ++{ ++ struct gfwd_list *ex_ptr; ++ SlirpOStream f = { ++ .write_cb = write_cb, ++ .opaque = opaque, ++ }; ++ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) ++ if (ex_ptr->write_cb) { ++ struct socket *so; ++ so = slirp_find_ctl_socket(slirp, ex_ptr->ex_addr, ++ ntohs(ex_ptr->ex_fport)); ++ if (!so) { ++ continue; ++ } ++ ++ slirp_ostream_write_u8(&f, 42); ++ slirp_vmstate_save_state(&f, &vmstate_slirp_socket, so); ++ } ++ slirp_ostream_write_u8(&f, 0); ++ ++ slirp_vmstate_save_state(&f, &vmstate_slirp, slirp); ++} ++ ++ ++int slirp_state_load(Slirp *slirp, int version_id, SlirpReadCb read_cb, ++ void *opaque) ++{ ++ struct gfwd_list *ex_ptr; ++ SlirpIStream f = { ++ .read_cb = read_cb, ++ .opaque = opaque, ++ }; ++ ++ while (slirp_istream_read_u8(&f)) { ++ int ret; ++ struct socket *so = socreate(slirp); ++ ++ ret = ++ slirp_vmstate_load_state(&f, &vmstate_slirp_socket, so, version_id); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) != ++ slirp->vnetwork_addr.s_addr) { ++ return -EINVAL; ++ } ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->write_cb && ++ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr && ++ so->so_fport == ex_ptr->ex_fport) { ++ break; ++ } ++ } ++ if (!ex_ptr) { ++ return -EINVAL; ++ } ++ ++ so->guestfwd = ex_ptr; ++ } ++ ++ return slirp_vmstate_load_state(&f, &vmstate_slirp, slirp, version_id); ++} ++ ++int slirp_state_version(void) ++{ ++ return 4; ++} +diff --git a/slirp/src/stream.c b/slirp/src/stream.c +new file mode 100644 +index 0000000000..6cf326f669 +--- /dev/null ++++ b/slirp/src/stream.c +@@ -0,0 +1,120 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * libslirp io streams ++ * ++ * Copyright (c) 2018 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "stream.h" ++#include ++ ++bool slirp_istream_read(SlirpIStream *f, void *buf, size_t size) ++{ ++ return f->read_cb(buf, size, f->opaque) == size; ++} ++ ++bool slirp_ostream_write(SlirpOStream *f, const void *buf, size_t size) ++{ ++ return f->write_cb(buf, size, f->opaque) == size; ++} ++ ++uint8_t slirp_istream_read_u8(SlirpIStream *f) ++{ ++ uint8_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return b; ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_u8(SlirpOStream *f, uint8_t b) ++{ ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++uint16_t slirp_istream_read_u16(SlirpIStream *f) ++{ ++ uint16_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GUINT16_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_u16(SlirpOStream *f, uint16_t b) ++{ ++ b = GUINT16_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++uint32_t slirp_istream_read_u32(SlirpIStream *f) ++{ ++ uint32_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GUINT32_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_u32(SlirpOStream *f, uint32_t b) ++{ ++ b = GUINT32_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++int16_t slirp_istream_read_i16(SlirpIStream *f) ++{ ++ int16_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GINT16_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_i16(SlirpOStream *f, int16_t b) ++{ ++ b = GINT16_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++int32_t slirp_istream_read_i32(SlirpIStream *f) ++{ ++ int32_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GINT32_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_i32(SlirpOStream *f, int32_t b) ++{ ++ b = GINT32_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} +diff --git a/slirp/src/stream.h b/slirp/src/stream.h +new file mode 100644 +index 0000000000..08bb5b6610 +--- /dev/null ++++ b/slirp/src/stream.h +@@ -0,0 +1,35 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef STREAM_H_ ++#define STREAM_H_ ++ ++#include "libslirp.h" ++ ++typedef struct SlirpIStream { ++ SlirpReadCb read_cb; ++ void *opaque; ++} SlirpIStream; ++ ++typedef struct SlirpOStream { ++ SlirpWriteCb write_cb; ++ void *opaque; ++} SlirpOStream; ++ ++bool slirp_istream_read(SlirpIStream *f, void *buf, size_t size); ++bool slirp_ostream_write(SlirpOStream *f, const void *buf, size_t size); ++ ++uint8_t slirp_istream_read_u8(SlirpIStream *f); ++bool slirp_ostream_write_u8(SlirpOStream *f, uint8_t b); ++ ++uint16_t slirp_istream_read_u16(SlirpIStream *f); ++bool slirp_ostream_write_u16(SlirpOStream *f, uint16_t b); ++ ++uint32_t slirp_istream_read_u32(SlirpIStream *f); ++bool slirp_ostream_write_u32(SlirpOStream *f, uint32_t b); ++ ++int16_t slirp_istream_read_i16(SlirpIStream *f); ++bool slirp_ostream_write_i16(SlirpOStream *f, int16_t b); ++ ++int32_t slirp_istream_read_i32(SlirpIStream *f); ++bool slirp_ostream_write_i32(SlirpOStream *f, int32_t b); ++ ++#endif /* STREAM_H_ */ +diff --git a/slirp/src/tcp.h b/slirp/src/tcp.h +new file mode 100644 +index 0000000000..70a9760664 +--- /dev/null ++++ b/slirp/src/tcp.h +@@ -0,0 +1,169 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp.h 8.1 (Berkeley) 6/10/93 ++ * tcp.h,v 1.3 1994/08/21 05:27:34 paul Exp ++ */ ++ ++#ifndef TCP_H ++#define TCP_H ++ ++#include ++ ++typedef uint32_t tcp_seq; ++ ++#define PR_SLOWHZ 2 /* 2 slow timeouts per second (approx) */ ++#define PR_FASTHZ 5 /* 5 fast timeouts per second (not important) */ ++ ++#define TCP_SNDSPACE 1024 * 128 ++#define TCP_RCVSPACE 1024 * 128 ++#define TCP_MAXSEG_MAX 32768 ++ ++/* ++ * TCP header. ++ * Per RFC 793, September, 1981. ++ */ ++#define tcphdr slirp_tcphdr ++struct tcphdr { ++ uint16_t th_sport; /* source port */ ++ uint16_t th_dport; /* destination port */ ++ tcp_seq th_seq; /* sequence number */ ++ tcp_seq th_ack; /* acknowledgement number */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t th_off : 4, /* data offset */ ++ th_x2 : 4; /* (unused) */ ++#else ++ uint8_t th_x2 : 4, /* (unused) */ ++ th_off : 4; /* data offset */ ++#endif ++ uint8_t th_flags; ++ uint16_t th_win; /* window */ ++ uint16_t th_sum; /* checksum */ ++ uint16_t th_urp; /* urgent pointer */ ++}; ++ ++#include "tcp_var.h" ++ ++#ifndef TH_FIN ++#define TH_FIN 0x01 ++#define TH_SYN 0x02 ++#define TH_RST 0x04 ++#define TH_PUSH 0x08 ++#define TH_ACK 0x10 ++#define TH_URG 0x20 ++#endif ++ ++#ifndef TCPOPT_EOL ++#define TCPOPT_EOL 0 ++#define TCPOPT_NOP 1 ++#define TCPOPT_MAXSEG 2 ++#define TCPOPT_WINDOW 3 ++#define TCPOPT_SACK_PERMITTED 4 /* Experimental */ ++#define TCPOPT_SACK 5 /* Experimental */ ++#define TCPOPT_TIMESTAMP 8 ++ ++#define TCPOPT_TSTAMP_HDR \ ++ (TCPOPT_NOP << 24 | TCPOPT_NOP << 16 | TCPOPT_TIMESTAMP << 8 | \ ++ TCPOLEN_TIMESTAMP) ++#endif ++ ++#ifndef TCPOLEN_MAXSEG ++#define TCPOLEN_MAXSEG 4 ++#define TCPOLEN_WINDOW 3 ++#define TCPOLEN_SACK_PERMITTED 2 ++#define TCPOLEN_TIMESTAMP 10 ++#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP + 2) /* appendix A */ ++#endif ++ ++#undef TCP_MAXWIN ++#define TCP_MAXWIN 65535 /* largest value for (unscaled) window */ ++ ++#undef TCP_MAX_WINSHIFT ++#define TCP_MAX_WINSHIFT 14 /* maximum window shift */ ++ ++/* ++ * User-settable options (used with setsockopt). ++ * ++ * We don't use the system headers on unix because we have conflicting ++ * local structures. We can't avoid the system definitions on Windows, ++ * so we undefine them. ++ */ ++#undef TCP_NODELAY ++#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ ++#undef TCP_MAXSEG ++ ++/* ++ * TCP FSM state definitions. ++ * Per RFC793, September, 1981. ++ */ ++ ++#define TCP_NSTATES 11 ++ ++#define TCPS_CLOSED 0 /* closed */ ++#define TCPS_LISTEN 1 /* listening for connection */ ++#define TCPS_SYN_SENT 2 /* active, have sent syn */ ++#define TCPS_SYN_RECEIVED 3 /* have send and received syn */ ++/* states < TCPS_ESTABLISHED are those where connections not established */ ++#define TCPS_ESTABLISHED 4 /* established */ ++#define TCPS_CLOSE_WAIT 5 /* rcvd fin, waiting for close */ ++/* states > TCPS_CLOSE_WAIT are those where user has closed */ ++#define TCPS_FIN_WAIT_1 6 /* have closed, sent fin */ ++#define TCPS_CLOSING 7 /* closed xchd FIN; await FIN ACK */ ++#define TCPS_LAST_ACK 8 /* had fin and close; await FIN ACK */ ++/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */ ++#define TCPS_FIN_WAIT_2 9 /* have closed, fin is acked */ ++#define TCPS_TIME_WAIT 10 /* in 2*msl quiet wait after close */ ++ ++#define TCPS_HAVERCVDSYN(s) ((s) >= TCPS_SYN_RECEIVED) ++#define TCPS_HAVEESTABLISHED(s) ((s) >= TCPS_ESTABLISHED) ++#define TCPS_HAVERCVDFIN(s) ((s) >= TCPS_TIME_WAIT) ++ ++/* ++ * TCP sequence numbers are 32 bit integers operated ++ * on with modular arithmetic. These macros can be ++ * used to compare such integers. ++ */ ++#define SEQ_LT(a, b) ((int)((a) - (b)) < 0) ++#define SEQ_LEQ(a, b) ((int)((a) - (b)) <= 0) ++#define SEQ_GT(a, b) ((int)((a) - (b)) > 0) ++#define SEQ_GEQ(a, b) ((int)((a) - (b)) >= 0) ++ ++/* ++ * Macros to initialize tcp sequence numbers for ++ * send and receive from initial send and receive ++ * sequence numbers. ++ */ ++#define tcp_rcvseqinit(tp) (tp)->rcv_adv = (tp)->rcv_nxt = (tp)->irs + 1 ++ ++#define tcp_sendseqinit(tp) \ ++ (tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = (tp)->iss ++ ++#define TCP_ISSINCR (125 * 1024) /* increment for tcp_iss each second */ ++ ++#endif +diff --git a/slirp/src/tcp_input.c b/slirp/src/tcp_input.c +new file mode 100644 +index 0000000000..36a4844a7d +--- /dev/null ++++ b/slirp/src/tcp_input.c +@@ -0,0 +1,1552 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94 ++ * tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++#define TCPREXMTTHRESH 3 ++ ++#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ) ++ ++/* for modulo comparisons of timestamps */ ++#define TSTMP_LT(a, b) ((int)((a) - (b)) < 0) ++#define TSTMP_GEQ(a, b) ((int)((a) - (b)) >= 0) ++ ++/* ++ * Insert segment ti into reassembly queue of tcp with ++ * control block tp. Return TH_FIN if reassembly now includes ++ * a segment with FIN. The macro form does the common case inline ++ * (segment is the next to be received on an established connection, ++ * and the queue is empty), avoiding linkage into and removal ++ * from the queue and repetition of various conversions. ++ * Set DELACK for segments received in order, but ack immediately ++ * when segments are out of order (so fast retransmit can work). ++ */ ++#define TCP_REASS(tp, ti, m, so, flags) \ ++ { \ ++ if ((ti)->ti_seq == (tp)->rcv_nxt && tcpfrag_list_empty(tp) && \ ++ (tp)->t_state == TCPS_ESTABLISHED) { \ ++ tp->t_flags |= TF_DELACK; \ ++ (tp)->rcv_nxt += (ti)->ti_len; \ ++ flags = (ti)->ti_flags & TH_FIN; \ ++ if (so->so_emu) { \ ++ if (tcp_emu((so), (m))) \ ++ sbappend(so, (m)); \ ++ } else \ ++ sbappend((so), (m)); \ ++ } else { \ ++ (flags) = tcp_reass((tp), (ti), (m)); \ ++ tp->t_flags |= TF_ACKNOW; \ ++ } \ ++ } ++ ++static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt, ++ struct tcpiphdr *ti); ++static void tcp_xmit_timer(register struct tcpcb *tp, int rtt); ++ ++static int tcp_reass(register struct tcpcb *tp, register struct tcpiphdr *ti, ++ struct mbuf *m) ++{ ++ if (m) ++ M_DUP_DEBUG(m->slirp, m, 0, 0); ++ ++ register struct tcpiphdr *q; ++ struct socket *so = tp->t_socket; ++ int flags; ++ ++ /* ++ * Call with ti==NULL after become established to ++ * force pre-ESTABLISHED data up to user socket. ++ */ ++ if (ti == NULL) ++ goto present; ++ ++ /* ++ * Find a segment which begins after this one does. ++ */ ++ for (q = tcpfrag_list_first(tp); !tcpfrag_list_end(q, tp); ++ q = tcpiphdr_next(q)) ++ if (SEQ_GT(q->ti_seq, ti->ti_seq)) ++ break; ++ ++ /* ++ * If there is a preceding segment, it may provide some of ++ * our data already. If so, drop the data from the incoming ++ * segment. If it provides all of our data, drop us. ++ */ ++ if (!tcpfrag_list_end(tcpiphdr_prev(q), tp)) { ++ register int i; ++ q = tcpiphdr_prev(q); ++ /* conversion to int (in i) handles seq wraparound */ ++ i = q->ti_seq + q->ti_len - ti->ti_seq; ++ if (i > 0) { ++ if (i >= ti->ti_len) { ++ m_free(m); ++ /* ++ * Try to present any queued data ++ * at the left window edge to the user. ++ * This is needed after the 3-WHS ++ * completes. ++ */ ++ goto present; /* ??? */ ++ } ++ m_adj(m, i); ++ ti->ti_len -= i; ++ ti->ti_seq += i; ++ } ++ q = tcpiphdr_next(q); ++ } ++ ti->ti_mbuf = m; ++ ++ /* ++ * While we overlap succeeding segments trim them or, ++ * if they are completely covered, dequeue them. ++ */ ++ while (!tcpfrag_list_end(q, tp)) { ++ register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq; ++ if (i <= 0) ++ break; ++ if (i < q->ti_len) { ++ q->ti_seq += i; ++ q->ti_len -= i; ++ m_adj(q->ti_mbuf, i); ++ break; ++ } ++ q = tcpiphdr_next(q); ++ m = tcpiphdr_prev(q)->ti_mbuf; ++ remque(tcpiphdr2qlink(tcpiphdr_prev(q))); ++ m_free(m); ++ } ++ ++ /* ++ * Stick new segment in its place. ++ */ ++ insque(tcpiphdr2qlink(ti), tcpiphdr2qlink(tcpiphdr_prev(q))); ++ ++present: ++ /* ++ * Present data to user, advancing rcv_nxt through ++ * completed sequence space. ++ */ ++ if (!TCPS_HAVEESTABLISHED(tp->t_state)) ++ return (0); ++ ti = tcpfrag_list_first(tp); ++ if (tcpfrag_list_end(ti, tp) || ti->ti_seq != tp->rcv_nxt) ++ return (0); ++ if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len) ++ return (0); ++ do { ++ tp->rcv_nxt += ti->ti_len; ++ flags = ti->ti_flags & TH_FIN; ++ remque(tcpiphdr2qlink(ti)); ++ m = ti->ti_mbuf; ++ ti = tcpiphdr_next(ti); ++ if (so->so_state & SS_FCANTSENDMORE) ++ m_free(m); ++ else { ++ if (so->so_emu) { ++ if (tcp_emu(so, m)) ++ sbappend(so, m); ++ } else ++ sbappend(so, m); ++ } ++ } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt); ++ return (flags); ++} ++ ++/* ++ * TCP input routine, follows pages 65-76 of the ++ * protocol specification dated September, 1981 very closely. ++ */ ++void tcp_input(struct mbuf *m, int iphlen, struct socket *inso, ++ unsigned short af) ++{ ++ struct ip save_ip, *ip; ++ struct ip6 save_ip6, *ip6; ++ register struct tcpiphdr *ti; ++ char *optp = NULL; ++ int optlen = 0; ++ int len, tlen, off; ++ register struct tcpcb *tp = NULL; ++ register int tiflags; ++ struct socket *so = NULL; ++ int todrop, acked, ourfinisacked, needoutput = 0; ++ int iss = 0; ++ uint32_t tiwin; ++ int ret; ++ struct sockaddr_storage lhost, fhost; ++ struct sockaddr_in *lhost4, *fhost4; ++ struct sockaddr_in6 *lhost6, *fhost6; ++ struct gfwd_list *ex_ptr; ++ Slirp *slirp; ++ ++ DEBUG_CALL("tcp_input"); ++ DEBUG_ARG("m = %p iphlen = %2d inso = %p", m, iphlen, inso); ++ ++ /* ++ * If called with m == 0, then we're continuing the connect ++ */ ++ if (m == NULL) { ++ so = inso; ++ slirp = so->slirp; ++ ++ /* Re-set a few variables */ ++ tp = sototcpcb(so); ++ m = so->so_m; ++ so->so_m = NULL; ++ ti = so->so_ti; ++ tiwin = ti->ti_win; ++ tiflags = ti->ti_flags; ++ ++ goto cont_conn; ++ } ++ slirp = m->slirp; ++ switch (af) { ++ case AF_INET: ++ M_DUP_DEBUG(slirp, m, 0, ++ sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr)); ++ break; ++ case AF_INET6: ++ M_DUP_DEBUG(slirp, m, 0, ++ sizeof(struct tcpiphdr) - sizeof(struct ip6) - sizeof(struct tcphdr)); ++ break; ++ } ++ ++ ip = mtod(m, struct ip *); ++ ip6 = mtod(m, struct ip6 *); ++ ++ switch (af) { ++ case AF_INET: ++ if (iphlen > sizeof(struct ip)) { ++ ip_stripoptions(m, (struct mbuf *)0); ++ iphlen = sizeof(struct ip); ++ } ++ /* XXX Check if too short */ ++ ++ ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip = *ip; ++ save_ip.ip_len += iphlen; ++ ++ /* ++ * Get IP and TCP header together in first mbuf. ++ * Note: IP leaves IP header in first mbuf. ++ */ ++ m->m_data -= ++ sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr); ++ m->m_len += ++ sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr); ++ ti = mtod(m, struct tcpiphdr *); ++ ++ /* ++ * Checksum extended TCP header and data. ++ */ ++ tlen = ip->ip_len; ++ tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; ++ memset(&ti->ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ memset(&ti->ti, 0, sizeof(ti->ti)); ++ ti->ti_x0 = 0; ++ ti->ti_src = save_ip.ip_src; ++ ti->ti_dst = save_ip.ip_dst; ++ ti->ti_pr = save_ip.ip_p; ++ ti->ti_len = htons((uint16_t)tlen); ++ break; ++ ++ case AF_INET6: ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip6 = *ip6; ++ /* ++ * Get IP and TCP header together in first mbuf. ++ * Note: IP leaves IP header in first mbuf. ++ */ ++ m->m_data -= sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ m->m_len += sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ ti = mtod(m, struct tcpiphdr *); ++ ++ tlen = ip6->ip_pl; ++ tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; ++ memset(&ti->ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ memset(&ti->ti, 0, sizeof(ti->ti)); ++ ti->ti_x0 = 0; ++ ti->ti_src6 = save_ip6.ip_src; ++ ti->ti_dst6 = save_ip6.ip_dst; ++ ti->ti_nh6 = save_ip6.ip_nh; ++ ti->ti_len = htons((uint16_t)tlen); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ len = ((sizeof(struct tcpiphdr) - sizeof(struct tcphdr)) + tlen); ++ if (cksum(m, len)) { ++ goto drop; ++ } ++ ++ /* ++ * Check that TCP offset makes sense, ++ * pull out TCP options and adjust length. XXX ++ */ ++ off = ti->ti_off << 2; ++ if (off < sizeof(struct tcphdr) || off > tlen) { ++ goto drop; ++ } ++ tlen -= off; ++ ti->ti_len = tlen; ++ if (off > sizeof(struct tcphdr)) { ++ optlen = off - sizeof(struct tcphdr); ++ optp = mtod(m, char *) + sizeof(struct tcpiphdr); ++ } ++ tiflags = ti->ti_flags; ++ ++ /* ++ * Convert TCP protocol specific fields to host format. ++ */ ++ NTOHL(ti->ti_seq); ++ NTOHL(ti->ti_ack); ++ NTOHS(ti->ti_win); ++ NTOHS(ti->ti_urp); ++ ++ /* ++ * Drop TCP, IP headers and TCP options. ++ */ ++ m->m_data += sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ m->m_len -= sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ ++ /* ++ * Locate pcb for segment. ++ */ ++findso: ++ lhost.ss_family = af; ++ fhost.ss_family = af; ++ switch (af) { ++ case AF_INET: ++ lhost4 = (struct sockaddr_in *)&lhost; ++ lhost4->sin_addr = ti->ti_src; ++ lhost4->sin_port = ti->ti_sport; ++ fhost4 = (struct sockaddr_in *)&fhost; ++ fhost4->sin_addr = ti->ti_dst; ++ fhost4->sin_port = ti->ti_dport; ++ break; ++ case AF_INET6: ++ lhost6 = (struct sockaddr_in6 *)&lhost; ++ lhost6->sin6_addr = ti->ti_src6; ++ lhost6->sin6_port = ti->ti_sport; ++ fhost6 = (struct sockaddr_in6 *)&fhost; ++ fhost6->sin6_addr = ti->ti_dst6; ++ fhost6->sin6_port = ti->ti_dport; ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ so = solookup(&slirp->tcp_last_so, &slirp->tcb, &lhost, &fhost); ++ ++ /* ++ * If the state is CLOSED (i.e., TCB does not exist) then ++ * all data in the incoming segment is discarded. ++ * If the TCB exists but is in CLOSED state, it is embryonic, ++ * but should either do a listen or a connect soon. ++ * ++ * state == CLOSED means we've done socreate() but haven't ++ * attached it to a protocol yet... ++ * ++ * XXX If a TCB does not exist, and the TH_SYN flag is ++ * the only flag set, then create a session, mark it ++ * as if it was LISTENING, and continue... ++ */ ++ if (so == NULL) { ++ /* TODO: IPv6 */ ++ if (slirp->restricted) { ++ /* Any hostfwds will have an existing socket, so we only get here ++ * for non-hostfwd connections. These should be dropped, unless it ++ * happens to be a guestfwd. ++ */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ++ ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_fport == ti->ti_dport && ++ ti->ti_dst.s_addr == ex_ptr->ex_addr.s_addr) { ++ break; ++ } ++ } ++ if (!ex_ptr) { ++ goto dropwithreset; ++ } ++ } ++ ++ if ((tiflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) != TH_SYN) ++ goto dropwithreset; ++ ++ so = socreate(slirp); ++ tcp_attach(so); ++ ++ sbreserve(&so->so_snd, TCP_SNDSPACE); ++ sbreserve(&so->so_rcv, TCP_RCVSPACE); ++ ++ so->lhost.ss = lhost; ++ so->fhost.ss = fhost; ++ ++ so->so_iptos = tcp_tos(so); ++ if (so->so_iptos == 0) { ++ switch (af) { ++ case AF_INET: ++ so->so_iptos = ((struct ip *)ti)->ip_tos; ++ break; ++ case AF_INET6: ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ } ++ ++ tp = sototcpcb(so); ++ tp->t_state = TCPS_LISTEN; ++ } ++ ++ /* ++ * If this is a still-connecting socket, this probably ++ * a retransmit of the SYN. Whether it's a retransmit SYN ++ * or something else, we nuke it. ++ */ ++ if (so->so_state & SS_ISFCONNECTING) ++ goto drop; ++ ++ tp = sototcpcb(so); ++ ++ /* XXX Should never fail */ ++ if (tp == NULL) ++ goto dropwithreset; ++ if (tp->t_state == TCPS_CLOSED) ++ goto drop; ++ ++ tiwin = ti->ti_win; ++ ++ /* ++ * Segment received on connection. ++ * Reset idle time and keep-alive timer. ++ */ ++ tp->t_idle = 0; ++ if (slirp_do_keepalive) ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; ++ else ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; ++ ++ /* ++ * Process options if not in LISTEN state, ++ * else do it below (after getting remote address). ++ */ ++ if (optp && tp->t_state != TCPS_LISTEN) ++ tcp_dooptions(tp, (uint8_t *)optp, optlen, ti); ++ ++ /* ++ * Header prediction: check for the two common cases ++ * of a uni-directional data xfer. If the packet has ++ * no control flags, is in-sequence, the window didn't ++ * change and we're not retransmitting, it's a ++ * candidate. If the length is zero and the ack moved ++ * forward, we're the sender side of the xfer. Just ++ * free the data acked & wake any higher level process ++ * that was blocked waiting for space. If the length ++ * is non-zero and the ack didn't move, we're the ++ * receiver side. If we're getting packets in-order ++ * (the reassembly queue is empty), add the data to ++ * the socket buffer and note that we need a delayed ack. ++ * ++ * XXX Some of these tests are not needed ++ * eg: the tiwin == tp->snd_wnd prevents many more ++ * predictions.. with no *real* advantage.. ++ */ ++ if (tp->t_state == TCPS_ESTABLISHED && ++ (tiflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) == TH_ACK && ++ ti->ti_seq == tp->rcv_nxt && tiwin && tiwin == tp->snd_wnd && ++ tp->snd_nxt == tp->snd_max) { ++ if (ti->ti_len == 0) { ++ if (SEQ_GT(ti->ti_ack, tp->snd_una) && ++ SEQ_LEQ(ti->ti_ack, tp->snd_max) && ++ tp->snd_cwnd >= tp->snd_wnd) { ++ /* ++ * this is a pure ack for outstanding data. ++ */ ++ if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) ++ tcp_xmit_timer(tp, tp->t_rtt); ++ acked = ti->ti_ack - tp->snd_una; ++ sodrop(so, acked); ++ tp->snd_una = ti->ti_ack; ++ m_free(m); ++ ++ /* ++ * If all outstanding data are acked, stop ++ * retransmit timer, otherwise restart timer ++ * using current (possibly backed-off) value. ++ * If process is waiting for space, ++ * wakeup/selwakeup/signal. If data ++ * are ready to send, let tcp_output ++ * decide between more output or persist. ++ */ ++ if (tp->snd_una == tp->snd_max) ++ tp->t_timer[TCPT_REXMT] = 0; ++ else if (tp->t_timer[TCPT_PERSIST] == 0) ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ ++ /* ++ * This is called because sowwakeup might have ++ * put data into so_snd. Since we don't so sowwakeup, ++ * we don't need this.. XXX??? ++ */ ++ if (so->so_snd.sb_cc) ++ tcp_output(tp); ++ ++ return; ++ } ++ } else if (ti->ti_ack == tp->snd_una && tcpfrag_list_empty(tp) && ++ ti->ti_len <= sbspace(&so->so_rcv)) { ++ /* ++ * this is a pure, in-sequence data packet ++ * with nothing on the reassembly queue and ++ * we have enough buffer space to take it. ++ */ ++ tp->rcv_nxt += ti->ti_len; ++ /* ++ * Add data to socket buffer. ++ */ ++ if (so->so_emu) { ++ if (tcp_emu(so, m)) ++ sbappend(so, m); ++ } else ++ sbappend(so, m); ++ ++ /* ++ * If this is a short packet, then ACK now - with Nagel ++ * congestion avoidance sender won't send more until ++ * he gets an ACK. ++ * ++ * It is better to not delay acks at all to maximize ++ * TCP throughput. See RFC 2581. ++ */ ++ tp->t_flags |= TF_ACKNOW; ++ tcp_output(tp); ++ return; ++ } ++ } /* header prediction */ ++ /* ++ * Calculate amount of space in receive window, ++ * and then do TCP input processing. ++ * Receive window is amount of space in rcv queue, ++ * but not less than advertised window. ++ */ ++ { ++ int win; ++ win = sbspace(&so->so_rcv); ++ if (win < 0) ++ win = 0; ++ tp->rcv_wnd = MAX(win, (int)(tp->rcv_adv - tp->rcv_nxt)); ++ } ++ ++ switch (tp->t_state) { ++ /* ++ * If the state is LISTEN then ignore segment if it contains an RST. ++ * If the segment contains an ACK then it is bad and send a RST. ++ * If it does not contain a SYN then it is not interesting; drop it. ++ * Don't bother responding if the destination was a broadcast. ++ * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial ++ * tp->iss, and send a segment: ++ * ++ * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. ++ * Fill in remote peer address fields if not previously specified. ++ * Enter SYN_RECEIVED state, and process any other fields of this ++ * segment in this state. ++ */ ++ case TCPS_LISTEN: { ++ if (tiflags & TH_RST) ++ goto drop; ++ if (tiflags & TH_ACK) ++ goto dropwithreset; ++ if ((tiflags & TH_SYN) == 0) ++ goto drop; ++ ++ /* ++ * This has way too many gotos... ++ * But a bit of spaghetti code never hurt anybody :) ++ */ ++ ++ /* ++ * If this is destined for the control address, then flag to ++ * tcp_ctl once connected, otherwise connect ++ */ ++ /* TODO: IPv6 */ ++ if (af == AF_INET && ++ (so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == ++ slirp->vnetwork_addr.s_addr) { ++ if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr && ++ so->so_faddr.s_addr != slirp->vnameserver_addr.s_addr) { ++ /* May be an add exec */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ++ ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_fport == so->so_fport && ++ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { ++ so->so_state |= SS_CTL; ++ break; ++ } ++ } ++ if (so->so_state & SS_CTL) { ++ goto cont_input; ++ } ++ } ++ /* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */ ++ } ++ ++ if (so->so_emu & EMU_NOCONNECT) { ++ so->so_emu &= ~EMU_NOCONNECT; ++ goto cont_input; ++ } ++ ++ if ((tcp_fconnect(so, so->so_ffamily) == -1) && (errno != EAGAIN) && ++ (errno != EINPROGRESS) && (errno != EWOULDBLOCK)) { ++ uint8_t code; ++ DEBUG_MISC(" tcp fconnect errno = %d-%s", errno, strerror(errno)); ++ if (errno == ECONNREFUSED) { ++ /* ACK the SYN, send RST to refuse the connection */ ++ tcp_respond(tp, ti, m, ti->ti_seq + 1, (tcp_seq)0, ++ TH_RST | TH_ACK, af); ++ } else { ++ switch (af) { ++ case AF_INET: ++ code = ICMP_UNREACH_NET; ++ if (errno == EHOSTUNREACH) { ++ code = ICMP_UNREACH_HOST; ++ } ++ break; ++ case AF_INET6: ++ code = ICMP6_UNREACH_NO_ROUTE; ++ if (errno == EHOSTUNREACH) { ++ code = ICMP6_UNREACH_ADDRESS; ++ } ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ HTONL(ti->ti_seq); /* restore tcp header */ ++ HTONL(ti->ti_ack); ++ HTONS(ti->ti_win); ++ HTONS(ti->ti_urp); ++ m->m_data -= ++ sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ m->m_len += ++ sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ switch (af) { ++ case AF_INET: ++ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct ip) - ++ sizeof(struct tcphdr); ++ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct ip) - ++ sizeof(struct tcphdr); ++ *ip = save_ip; ++ icmp_send_error(m, ICMP_UNREACH, code, 0, strerror(errno)); ++ break; ++ case AF_INET6: ++ m->m_data += sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ m->m_len -= sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ *ip6 = save_ip6; ++ icmp6_send_error(m, ICMP6_UNREACH, code); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ } ++ tcp_close(tp); ++ m_free(m); ++ } else { ++ /* ++ * Haven't connected yet, save the current mbuf ++ * and ti, and return ++ * XXX Some OS's don't tell us whether the connect() ++ * succeeded or not. So we must time it out. ++ */ ++ so->so_m = m; ++ so->so_ti = ti; ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; ++ tp->t_state = TCPS_SYN_RECEIVED; ++ /* ++ * Initialize receive sequence numbers now so that we can send a ++ * valid RST if the remote end rejects our connection. ++ */ ++ tp->irs = ti->ti_seq; ++ tcp_rcvseqinit(tp); ++ tcp_template(tp); ++ } ++ return; ++ ++ cont_conn: ++ /* m==NULL ++ * Check if the connect succeeded ++ */ ++ if (so->so_state & SS_NOFDREF) { ++ tp = tcp_close(tp); ++ goto dropwithreset; ++ } ++ cont_input: ++ tcp_template(tp); ++ ++ if (optp) ++ tcp_dooptions(tp, (uint8_t *)optp, optlen, ti); ++ ++ if (iss) ++ tp->iss = iss; ++ else ++ tp->iss = slirp->tcp_iss; ++ slirp->tcp_iss += TCP_ISSINCR / 2; ++ tp->irs = ti->ti_seq; ++ tcp_sendseqinit(tp); ++ tcp_rcvseqinit(tp); ++ tp->t_flags |= TF_ACKNOW; ++ tp->t_state = TCPS_SYN_RECEIVED; ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; ++ goto trimthenstep6; ++ } /* case TCPS_LISTEN */ ++ ++ /* ++ * If the state is SYN_SENT: ++ * if seg contains an ACK, but not for our SYN, drop the input. ++ * if seg contains a RST, then drop the connection. ++ * if seg does not contain SYN, then drop it. ++ * Otherwise this is an acceptable SYN segment ++ * initialize tp->rcv_nxt and tp->irs ++ * if seg contains ack then advance tp->snd_una ++ * if SYN has been acked change to ESTABLISHED else SYN_RCVD state ++ * arrange for segment to be acked (eventually) ++ * continue processing rest of data/controls, beginning with URG ++ */ ++ case TCPS_SYN_SENT: ++ if ((tiflags & TH_ACK) && ++ (SEQ_LEQ(ti->ti_ack, tp->iss) || SEQ_GT(ti->ti_ack, tp->snd_max))) ++ goto dropwithreset; ++ ++ if (tiflags & TH_RST) { ++ if (tiflags & TH_ACK) { ++ tcp_drop(tp, 0); /* XXX Check t_softerror! */ ++ } ++ goto drop; ++ } ++ ++ if ((tiflags & TH_SYN) == 0) ++ goto drop; ++ if (tiflags & TH_ACK) { ++ tp->snd_una = ti->ti_ack; ++ if (SEQ_LT(tp->snd_nxt, tp->snd_una)) ++ tp->snd_nxt = tp->snd_una; ++ } ++ ++ tp->t_timer[TCPT_REXMT] = 0; ++ tp->irs = ti->ti_seq; ++ tcp_rcvseqinit(tp); ++ tp->t_flags |= TF_ACKNOW; ++ if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { ++ soisfconnected(so); ++ tp->t_state = TCPS_ESTABLISHED; ++ ++ tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); ++ /* ++ * if we didn't have to retransmit the SYN, ++ * use its rtt as our initial srtt & rtt var. ++ */ ++ if (tp->t_rtt) ++ tcp_xmit_timer(tp, tp->t_rtt); ++ } else ++ tp->t_state = TCPS_SYN_RECEIVED; ++ ++ trimthenstep6: ++ /* ++ * Advance ti->ti_seq to correspond to first data byte. ++ * If data, trim to stay within window, ++ * dropping FIN if necessary. ++ */ ++ ti->ti_seq++; ++ if (ti->ti_len > tp->rcv_wnd) { ++ todrop = ti->ti_len - tp->rcv_wnd; ++ m_adj(m, -todrop); ++ ti->ti_len = tp->rcv_wnd; ++ tiflags &= ~TH_FIN; ++ } ++ tp->snd_wl1 = ti->ti_seq - 1; ++ tp->rcv_up = ti->ti_seq; ++ goto step6; ++ } /* switch tp->t_state */ ++ /* ++ * States other than LISTEN or SYN_SENT. ++ * Check that at least some bytes of segment are within ++ * receive window. If segment begins before rcv_nxt, ++ * drop leading data (and SYN); if nothing left, just ack. ++ */ ++ todrop = tp->rcv_nxt - ti->ti_seq; ++ if (todrop > 0) { ++ if (tiflags & TH_SYN) { ++ tiflags &= ~TH_SYN; ++ ti->ti_seq++; ++ if (ti->ti_urp > 1) ++ ti->ti_urp--; ++ else ++ tiflags &= ~TH_URG; ++ todrop--; ++ } ++ /* ++ * Following if statement from Stevens, vol. 2, p. 960. ++ */ ++ if (todrop > ti->ti_len || ++ (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) { ++ /* ++ * Any valid FIN must be to the left of the window. ++ * At this point the FIN must be a duplicate or out ++ * of sequence; drop it. ++ */ ++ tiflags &= ~TH_FIN; ++ ++ /* ++ * Send an ACK to resynchronize and drop any data. ++ * But keep on processing for RST or ACK. ++ */ ++ tp->t_flags |= TF_ACKNOW; ++ todrop = ti->ti_len; ++ } ++ m_adj(m, todrop); ++ ti->ti_seq += todrop; ++ ti->ti_len -= todrop; ++ if (ti->ti_urp > todrop) ++ ti->ti_urp -= todrop; ++ else { ++ tiflags &= ~TH_URG; ++ ti->ti_urp = 0; ++ } ++ } ++ /* ++ * If new data are received on a connection after the ++ * user processes are gone, then RST the other end. ++ */ ++ if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && ++ ti->ti_len) { ++ tp = tcp_close(tp); ++ goto dropwithreset; ++ } ++ ++ /* ++ * If segment ends after window, drop trailing data ++ * (and PUSH and FIN); if nothing left, just ACK. ++ */ ++ todrop = (ti->ti_seq + ti->ti_len) - (tp->rcv_nxt + tp->rcv_wnd); ++ if (todrop > 0) { ++ if (todrop >= ti->ti_len) { ++ /* ++ * If a new connection request is received ++ * while in TIME_WAIT, drop the old connection ++ * and start over if the sequence numbers ++ * are above the previous ones. ++ */ ++ if (tiflags & TH_SYN && tp->t_state == TCPS_TIME_WAIT && ++ SEQ_GT(ti->ti_seq, tp->rcv_nxt)) { ++ iss = tp->rcv_nxt + TCP_ISSINCR; ++ tp = tcp_close(tp); ++ goto findso; ++ } ++ /* ++ * If window is closed can only take segments at ++ * window edge, and have to drop data and PUSH from ++ * incoming segments. Continue processing, but ++ * remember to ack. Otherwise, drop segment ++ * and ack. ++ */ ++ if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) { ++ tp->t_flags |= TF_ACKNOW; ++ } else { ++ goto dropafterack; ++ } ++ } ++ m_adj(m, -todrop); ++ ti->ti_len -= todrop; ++ tiflags &= ~(TH_PUSH | TH_FIN); ++ } ++ ++ /* ++ * If the RST bit is set examine the state: ++ * SYN_RECEIVED STATE: ++ * If passive open, return to LISTEN state. ++ * If active open, inform user that connection was refused. ++ * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: ++ * Inform user that connection was reset, and close tcb. ++ * CLOSING, LAST_ACK, TIME_WAIT STATES ++ * Close the tcb. ++ */ ++ if (tiflags & TH_RST) ++ switch (tp->t_state) { ++ case TCPS_SYN_RECEIVED: ++ case TCPS_ESTABLISHED: ++ case TCPS_FIN_WAIT_1: ++ case TCPS_FIN_WAIT_2: ++ case TCPS_CLOSE_WAIT: ++ tp->t_state = TCPS_CLOSED; ++ tcp_close(tp); ++ goto drop; ++ ++ case TCPS_CLOSING: ++ case TCPS_LAST_ACK: ++ case TCPS_TIME_WAIT: ++ tcp_close(tp); ++ goto drop; ++ } ++ ++ /* ++ * If a SYN is in the window, then this is an ++ * error and we send an RST and drop the connection. ++ */ ++ if (tiflags & TH_SYN) { ++ tp = tcp_drop(tp, 0); ++ goto dropwithreset; ++ } ++ ++ /* ++ * If the ACK bit is off we drop the segment and return. ++ */ ++ if ((tiflags & TH_ACK) == 0) ++ goto drop; ++ ++ /* ++ * Ack processing. ++ */ ++ switch (tp->t_state) { ++ /* ++ * In SYN_RECEIVED state if the ack ACKs our SYN then enter ++ * ESTABLISHED state and continue processing, otherwise ++ * send an RST. una<=ack<=max ++ */ ++ case TCPS_SYN_RECEIVED: ++ ++ if (SEQ_GT(tp->snd_una, ti->ti_ack) || SEQ_GT(ti->ti_ack, tp->snd_max)) ++ goto dropwithreset; ++ tp->t_state = TCPS_ESTABLISHED; ++ /* ++ * The sent SYN is ack'ed with our sequence number +1 ++ * The first data byte already in the buffer will get ++ * lost if no correction is made. This is only needed for ++ * SS_CTL since the buffer is empty otherwise. ++ * tp->snd_una++; or: ++ */ ++ tp->snd_una = ti->ti_ack; ++ if (so->so_state & SS_CTL) { ++ /* So tcp_ctl reports the right state */ ++ ret = tcp_ctl(so); ++ if (ret == 1) { ++ soisfconnected(so); ++ so->so_state &= ~SS_CTL; /* success XXX */ ++ } else if (ret == 2) { ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; /* CTL_CMD */ ++ } else { ++ needoutput = 1; ++ tp->t_state = TCPS_FIN_WAIT_1; ++ } ++ } else { ++ soisfconnected(so); ++ } ++ ++ tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); ++ tp->snd_wl1 = ti->ti_seq - 1; ++ /* Avoid ack processing; snd_una==ti_ack => dup ack */ ++ goto synrx_to_est; ++ /* fall into ... */ ++ ++ /* ++ * In ESTABLISHED state: drop duplicate ACKs; ACK out of range ++ * ACKs. If the ack is in the range ++ * tp->snd_una < ti->ti_ack <= tp->snd_max ++ * then advance tp->snd_una to ti->ti_ack and drop ++ * data from the retransmission queue. If this ACK reflects ++ * more up to date window information we update our window information. ++ */ ++ case TCPS_ESTABLISHED: ++ case TCPS_FIN_WAIT_1: ++ case TCPS_FIN_WAIT_2: ++ case TCPS_CLOSE_WAIT: ++ case TCPS_CLOSING: ++ case TCPS_LAST_ACK: ++ case TCPS_TIME_WAIT: ++ ++ if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) { ++ if (ti->ti_len == 0 && tiwin == tp->snd_wnd) { ++ DEBUG_MISC(" dup ack m = %p so = %p", m, so); ++ /* ++ * If we have outstanding data (other than ++ * a window probe), this is a completely ++ * duplicate ack (ie, window info didn't ++ * change), the ack is the biggest we've ++ * seen and we've seen exactly our rexmt ++ * threshold of them, assume a packet ++ * has been dropped and retransmit it. ++ * Kludge snd_nxt & the congestion ++ * window so we send only this one ++ * packet. ++ * ++ * We know we're losing at the current ++ * window size so do congestion avoidance ++ * (set ssthresh to half the current window ++ * and pull our congestion window back to ++ * the new ssthresh). ++ * ++ * Dup acks mean that packets have left the ++ * network (they're now cached at the receiver) ++ * so bump cwnd by the amount in the receiver ++ * to keep a constant cwnd packets in the ++ * network. ++ */ ++ if (tp->t_timer[TCPT_REXMT] == 0 || ti->ti_ack != tp->snd_una) ++ tp->t_dupacks = 0; ++ else if (++tp->t_dupacks == TCPREXMTTHRESH) { ++ tcp_seq onxt = tp->snd_nxt; ++ unsigned win = ++ MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; ++ ++ if (win < 2) ++ win = 2; ++ tp->snd_ssthresh = win * tp->t_maxseg; ++ tp->t_timer[TCPT_REXMT] = 0; ++ tp->t_rtt = 0; ++ tp->snd_nxt = ti->ti_ack; ++ tp->snd_cwnd = tp->t_maxseg; ++ tcp_output(tp); ++ tp->snd_cwnd = ++ tp->snd_ssthresh + tp->t_maxseg * tp->t_dupacks; ++ if (SEQ_GT(onxt, tp->snd_nxt)) ++ tp->snd_nxt = onxt; ++ goto drop; ++ } else if (tp->t_dupacks > TCPREXMTTHRESH) { ++ tp->snd_cwnd += tp->t_maxseg; ++ tcp_output(tp); ++ goto drop; ++ } ++ } else ++ tp->t_dupacks = 0; ++ break; ++ } ++ synrx_to_est: ++ /* ++ * If the congestion window was inflated to account ++ * for the other side's cached packets, retract it. ++ */ ++ if (tp->t_dupacks > TCPREXMTTHRESH && tp->snd_cwnd > tp->snd_ssthresh) ++ tp->snd_cwnd = tp->snd_ssthresh; ++ tp->t_dupacks = 0; ++ if (SEQ_GT(ti->ti_ack, tp->snd_max)) { ++ goto dropafterack; ++ } ++ acked = ti->ti_ack - tp->snd_una; ++ ++ /* ++ * If transmit timer is running and timed sequence ++ * number was acked, update smoothed round trip time. ++ * Since we now have an rtt measurement, cancel the ++ * timer backoff (cf., Phil Karn's retransmit alg.). ++ * Recompute the initial retransmit timer. ++ */ ++ if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) ++ tcp_xmit_timer(tp, tp->t_rtt); ++ ++ /* ++ * If all outstanding data is acked, stop retransmit ++ * timer and remember to restart (more output or persist). ++ * If there is more data to be acked, restart retransmit ++ * timer, using current (possibly backed-off) value. ++ */ ++ if (ti->ti_ack == tp->snd_max) { ++ tp->t_timer[TCPT_REXMT] = 0; ++ needoutput = 1; ++ } else if (tp->t_timer[TCPT_PERSIST] == 0) ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ /* ++ * When new data is acked, open the congestion window. ++ * If the window gives us less than ssthresh packets ++ * in flight, open exponentially (maxseg per packet). ++ * Otherwise open linearly: maxseg per window ++ * (maxseg^2 / cwnd per packet). ++ */ ++ { ++ register unsigned cw = tp->snd_cwnd; ++ register unsigned incr = tp->t_maxseg; ++ ++ if (cw > tp->snd_ssthresh) ++ incr = incr * incr / cw; ++ tp->snd_cwnd = MIN(cw + incr, TCP_MAXWIN << tp->snd_scale); ++ } ++ if (acked > so->so_snd.sb_cc) { ++ tp->snd_wnd -= so->so_snd.sb_cc; ++ sodrop(so, (int)so->so_snd.sb_cc); ++ ourfinisacked = 1; ++ } else { ++ sodrop(so, acked); ++ tp->snd_wnd -= acked; ++ ourfinisacked = 0; ++ } ++ tp->snd_una = ti->ti_ack; ++ if (SEQ_LT(tp->snd_nxt, tp->snd_una)) ++ tp->snd_nxt = tp->snd_una; ++ ++ switch (tp->t_state) { ++ /* ++ * In FIN_WAIT_1 STATE in addition to the processing ++ * for the ESTABLISHED state if our FIN is now acknowledged ++ * then enter FIN_WAIT_2. ++ */ ++ case TCPS_FIN_WAIT_1: ++ if (ourfinisacked) { ++ /* ++ * If we can't receive any more ++ * data, then closing user can proceed. ++ * Starting the timer is contrary to the ++ * specification, but if we don't get a FIN ++ * we'll hang forever. ++ */ ++ if (so->so_state & SS_FCANTRCVMORE) { ++ tp->t_timer[TCPT_2MSL] = TCP_MAXIDLE; ++ } ++ tp->t_state = TCPS_FIN_WAIT_2; ++ } ++ break; ++ ++ /* ++ * In CLOSING STATE in addition to the processing for ++ * the ESTABLISHED state if the ACK acknowledges our FIN ++ * then enter the TIME-WAIT state, otherwise ignore ++ * the segment. ++ */ ++ case TCPS_CLOSING: ++ if (ourfinisacked) { ++ tp->t_state = TCPS_TIME_WAIT; ++ tcp_canceltimers(tp); ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ } ++ break; ++ ++ /* ++ * In LAST_ACK, we may still be waiting for data to drain ++ * and/or to be acked, as well as for the ack of our FIN. ++ * If our FIN is now acknowledged, delete the TCB, ++ * enter the closed state and return. ++ */ ++ case TCPS_LAST_ACK: ++ if (ourfinisacked) { ++ tcp_close(tp); ++ goto drop; ++ } ++ break; ++ ++ /* ++ * In TIME_WAIT state the only thing that should arrive ++ * is a retransmission of the remote FIN. Acknowledge ++ * it and restart the finack timer. ++ */ ++ case TCPS_TIME_WAIT: ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ goto dropafterack; ++ } ++ } /* switch(tp->t_state) */ ++ ++step6: ++ /* ++ * Update window information. ++ * Don't look at window if no ACK: TAC's send garbage on first SYN. ++ */ ++ if ((tiflags & TH_ACK) && ++ (SEQ_LT(tp->snd_wl1, ti->ti_seq) || ++ (tp->snd_wl1 == ti->ti_seq && ++ (SEQ_LT(tp->snd_wl2, ti->ti_ack) || ++ (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) { ++ tp->snd_wnd = tiwin; ++ tp->snd_wl1 = ti->ti_seq; ++ tp->snd_wl2 = ti->ti_ack; ++ if (tp->snd_wnd > tp->max_sndwnd) ++ tp->max_sndwnd = tp->snd_wnd; ++ needoutput = 1; ++ } ++ ++ /* ++ * Process segments with URG. ++ */ ++ if ((tiflags & TH_URG) && ti->ti_urp && ++ TCPS_HAVERCVDFIN(tp->t_state) == 0) { ++ /* ++ * This is a kludge, but if we receive and accept ++ * random urgent pointers, we'll crash in ++ * soreceive. It's hard to imagine someone ++ * actually wanting to send this much urgent data. ++ */ ++ if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) { ++ ti->ti_urp = 0; ++ tiflags &= ~TH_URG; ++ goto dodata; ++ } ++ /* ++ * If this segment advances the known urgent pointer, ++ * then mark the data stream. This should not happen ++ * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since ++ * a FIN has been received from the remote side. ++ * In these states we ignore the URG. ++ * ++ * According to RFC961 (Assigned Protocols), ++ * the urgent pointer points to the last octet ++ * of urgent data. We continue, however, ++ * to consider it to indicate the first octet ++ * of data past the urgent section as the original ++ * spec states (in one of two places). ++ */ ++ if (SEQ_GT(ti->ti_seq + ti->ti_urp, tp->rcv_up)) { ++ tp->rcv_up = ti->ti_seq + ti->ti_urp; ++ so->so_urgc = ++ so->so_rcv.sb_cc + (tp->rcv_up - tp->rcv_nxt); /* -1; */ ++ tp->rcv_up = ti->ti_seq + ti->ti_urp; ++ } ++ } else ++ /* ++ * If no out of band data is expected, ++ * pull receive urgent pointer along ++ * with the receive window. ++ */ ++ if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) ++ tp->rcv_up = tp->rcv_nxt; ++dodata: ++ ++ /* ++ * If this is a small packet, then ACK now - with Nagel ++ * congestion avoidance sender won't send more until ++ * he gets an ACK. ++ */ ++ if (ti->ti_len && (unsigned)ti->ti_len <= 5 && ++ ((struct tcpiphdr_2 *)ti)->first_char == (char)27) { ++ tp->t_flags |= TF_ACKNOW; ++ } ++ ++ /* ++ * Process the segment text, merging it into the TCP sequencing queue, ++ * and arranging for acknowledgment of receipt if necessary. ++ * This process logically involves adjusting tp->rcv_wnd as data ++ * is presented to the user (this happens in tcp_usrreq.c, ++ * case PRU_RCVD). If a FIN has already been received on this ++ * connection then we just ignore the text. ++ */ ++ if ((ti->ti_len || (tiflags & TH_FIN)) && ++ TCPS_HAVERCVDFIN(tp->t_state) == 0) { ++ TCP_REASS(tp, ti, m, so, tiflags); ++ } else { ++ m_free(m); ++ tiflags &= ~TH_FIN; ++ } ++ ++ /* ++ * If FIN is received ACK the FIN and let the user know ++ * that the connection is closing. ++ */ ++ if (tiflags & TH_FIN) { ++ if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { ++ /* ++ * If we receive a FIN we can't send more data, ++ * set it SS_FDRAIN ++ * Shutdown the socket if there is no rx data in the ++ * buffer. ++ * soread() is called on completion of shutdown() and ++ * will got to TCPS_LAST_ACK, and use tcp_output() ++ * to send the FIN. ++ */ ++ sofwdrain(so); ++ ++ tp->t_flags |= TF_ACKNOW; ++ tp->rcv_nxt++; ++ } ++ switch (tp->t_state) { ++ /* ++ * In SYN_RECEIVED and ESTABLISHED STATES ++ * enter the CLOSE_WAIT state. ++ */ ++ case TCPS_SYN_RECEIVED: ++ case TCPS_ESTABLISHED: ++ if (so->so_emu == EMU_CTL) /* no shutdown on socket */ ++ tp->t_state = TCPS_LAST_ACK; ++ else ++ tp->t_state = TCPS_CLOSE_WAIT; ++ break; ++ ++ /* ++ * If still in FIN_WAIT_1 STATE FIN has not been acked so ++ * enter the CLOSING state. ++ */ ++ case TCPS_FIN_WAIT_1: ++ tp->t_state = TCPS_CLOSING; ++ break; ++ ++ /* ++ * In FIN_WAIT_2 state enter the TIME_WAIT state, ++ * starting the time-wait timer, turning off the other ++ * standard timers. ++ */ ++ case TCPS_FIN_WAIT_2: ++ tp->t_state = TCPS_TIME_WAIT; ++ tcp_canceltimers(tp); ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ break; ++ ++ /* ++ * In TIME_WAIT state restart the 2 MSL time_wait timer. ++ */ ++ case TCPS_TIME_WAIT: ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ break; ++ } ++ } ++ ++ /* ++ * Return any desired output. ++ */ ++ if (needoutput || (tp->t_flags & TF_ACKNOW)) { ++ tcp_output(tp); ++ } ++ return; ++ ++dropafterack: ++ /* ++ * Generate an ACK dropping incoming segment if it occupies ++ * sequence space, where the ACK reflects our state. ++ */ ++ if (tiflags & TH_RST) ++ goto drop; ++ m_free(m); ++ tp->t_flags |= TF_ACKNOW; ++ tcp_output(tp); ++ return; ++ ++dropwithreset: ++ /* reuses m if m!=NULL, m_free() unnecessary */ ++ if (tiflags & TH_ACK) ++ tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST, af); ++ else { ++ if (tiflags & TH_SYN) ++ ti->ti_len++; ++ tcp_respond(tp, ti, m, ti->ti_seq + ti->ti_len, (tcp_seq)0, ++ TH_RST | TH_ACK, af); ++ } ++ ++ return; ++ ++drop: ++ /* ++ * Drop space held by incoming segment and return. ++ */ ++ m_free(m); ++} ++ ++static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt, ++ struct tcpiphdr *ti) ++{ ++ uint16_t mss; ++ int opt, optlen; ++ ++ DEBUG_CALL("tcp_dooptions"); ++ DEBUG_ARG("tp = %p cnt=%i", tp, cnt); ++ ++ for (; cnt > 0; cnt -= optlen, cp += optlen) { ++ opt = cp[0]; ++ if (opt == TCPOPT_EOL) ++ break; ++ if (opt == TCPOPT_NOP) ++ optlen = 1; ++ else { ++ optlen = cp[1]; ++ if (optlen <= 0) ++ break; ++ } ++ switch (opt) { ++ default: ++ continue; ++ ++ case TCPOPT_MAXSEG: ++ if (optlen != TCPOLEN_MAXSEG) ++ continue; ++ if (!(ti->ti_flags & TH_SYN)) ++ continue; ++ memcpy((char *)&mss, (char *)cp + 2, sizeof(mss)); ++ NTOHS(mss); ++ tcp_mss(tp, mss); /* sets t_maxseg */ ++ break; ++ } ++ } ++} ++ ++/* ++ * Collect new round-trip time estimate ++ * and update averages and current timeout. ++ */ ++ ++static void tcp_xmit_timer(register struct tcpcb *tp, int rtt) ++{ ++ register short delta; ++ ++ DEBUG_CALL("tcp_xmit_timer"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("rtt = %d", rtt); ++ ++ if (tp->t_srtt != 0) { ++ /* ++ * srtt is stored as fixed point with 3 bits after the ++ * binary point (i.e., scaled by 8). The following magic ++ * is equivalent to the smoothing algorithm in rfc793 with ++ * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed ++ * point). Adjust rtt to origin 0. ++ */ ++ delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT); ++ if ((tp->t_srtt += delta) <= 0) ++ tp->t_srtt = 1; ++ /* ++ * We accumulate a smoothed rtt variance (actually, a ++ * smoothed mean difference), then set the retransmit ++ * timer to smoothed rtt + 4 times the smoothed variance. ++ * rttvar is stored as fixed point with 2 bits after the ++ * binary point (scaled by 4). The following is ++ * equivalent to rfc793 smoothing with an alpha of .75 ++ * (rttvar = rttvar*3/4 + |delta| / 4). This replaces ++ * rfc793's wired-in beta. ++ */ ++ if (delta < 0) ++ delta = -delta; ++ delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT); ++ if ((tp->t_rttvar += delta) <= 0) ++ tp->t_rttvar = 1; ++ } else { ++ /* ++ * No rtt measurement yet - use the unsmoothed rtt. ++ * Set the variance to half the rtt (so our first ++ * retransmit happens at 3*rtt). ++ */ ++ tp->t_srtt = rtt << TCP_RTT_SHIFT; ++ tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); ++ } ++ tp->t_rtt = 0; ++ tp->t_rxtshift = 0; ++ ++ /* ++ * the retransmit should happen at rtt + 4 * rttvar. ++ * Because of the way we do the smoothing, srtt and rttvar ++ * will each average +1/2 tick of bias. When we compute ++ * the retransmit timer, we want 1/2 tick of rounding and ++ * 1 extra tick because of +-1/2 tick uncertainty in the ++ * firing of the timer. The bias will give us exactly the ++ * 1.5 tick we need. But, because the bias is ++ * statistical, we have to test that we don't drop below ++ * the minimum feasible timer (which is 2 ticks). ++ */ ++ TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), (short)tp->t_rttmin, ++ TCPTV_REXMTMAX); /* XXX */ ++ ++ /* ++ * We received an ack for a packet that wasn't retransmitted; ++ * it is probably safe to discard any error indications we've ++ * received recently. This isn't quite right, but close enough ++ * for now (a route might have failed after we sent a segment, ++ * and the return path might not be symmetrical). ++ */ ++ tp->t_softerror = 0; ++} ++ ++/* ++ * Determine a reasonable value for maxseg size. ++ * If the route is known, check route for mtu. ++ * If none, use an mss that can be handled on the outgoing ++ * interface without forcing IP to fragment; if bigger than ++ * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES ++ * to utilize large mbufs. If no route is found, route has no mtu, ++ * or the destination isn't local, use a default, hopefully conservative ++ * size (usually 512 or the default IP max size, but no more than the mtu ++ * of the interface), as we can't discover anything about intervening ++ * gateways or networks. We also initialize the congestion/slow start ++ * window to be a single segment if the destination isn't local. ++ * While looking at the routing entry, we also initialize other path-dependent ++ * parameters from pre-set or cached values in the routing entry. ++ */ ++ ++int tcp_mss(struct tcpcb *tp, unsigned offer) ++{ ++ struct socket *so = tp->t_socket; ++ int mss; ++ ++ DEBUG_CALL("tcp_mss"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("offer = %d", offer); ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ mss = MIN(so->slirp->if_mtu, so->slirp->if_mru) - ++ sizeof(struct tcphdr) - sizeof(struct ip); ++ break; ++ case AF_INET6: ++ mss = MIN(so->slirp->if_mtu, so->slirp->if_mru) - ++ sizeof(struct tcphdr) - sizeof(struct ip6); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ if (offer) ++ mss = MIN(mss, offer); ++ mss = MAX(mss, 32); ++ if (mss < tp->t_maxseg || offer != 0) ++ tp->t_maxseg = MIN(mss, TCP_MAXSEG_MAX); ++ ++ tp->snd_cwnd = mss; ++ ++ sbreserve(&so->so_snd, ++ TCP_SNDSPACE + ++ ((TCP_SNDSPACE % mss) ? (mss - (TCP_SNDSPACE % mss)) : 0)); ++ sbreserve(&so->so_rcv, ++ TCP_RCVSPACE + ++ ((TCP_RCVSPACE % mss) ? (mss - (TCP_RCVSPACE % mss)) : 0)); ++ ++ DEBUG_MISC(" returning mss = %d", mss); ++ ++ return mss; ++} +diff --git a/slirp/src/tcp_output.c b/slirp/src/tcp_output.c +new file mode 100644 +index 0000000000..383fe31dcf +--- /dev/null ++++ b/slirp/src/tcp_output.c +@@ -0,0 +1,516 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_output.c 8.3 (Berkeley) 12/30/93 ++ * tcp_output.c,v 1.3 1994/09/15 10:36:55 davidg Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++static const uint8_t tcp_outflags[TCP_NSTATES] = { ++ TH_RST | TH_ACK, 0, TH_SYN, TH_SYN | TH_ACK, ++ TH_ACK, TH_ACK, TH_FIN | TH_ACK, TH_FIN | TH_ACK, ++ TH_FIN | TH_ACK, TH_ACK, TH_ACK, ++}; ++ ++ ++#undef MAX_TCPOPTLEN ++#define MAX_TCPOPTLEN 32 /* max # bytes that go in options */ ++ ++/* ++ * Tcp output routine: figure out what should be sent and send it. ++ */ ++int tcp_output(struct tcpcb *tp) ++{ ++ register struct socket *so = tp->t_socket; ++ register long len, win; ++ int off, flags, error; ++ register struct mbuf *m; ++ register struct tcpiphdr *ti, tcpiph_save; ++ struct ip *ip; ++ struct ip6 *ip6; ++ uint8_t opt[MAX_TCPOPTLEN]; ++ unsigned optlen, hdrlen; ++ int idle, sendalot; ++ ++ DEBUG_CALL("tcp_output"); ++ DEBUG_ARG("tp = %p", tp); ++ ++ /* ++ * Determine length of data that should be transmitted, ++ * and flags that will be used. ++ * If there is some data or critical controls (SYN, RST) ++ * to send, then transmit; otherwise, investigate further. ++ */ ++ idle = (tp->snd_max == tp->snd_una); ++ if (idle && tp->t_idle >= tp->t_rxtcur) ++ /* ++ * We have been idle for "a while" and no acks are ++ * expected to clock out any data we send -- ++ * slow start to get ack "clock" running again. ++ */ ++ tp->snd_cwnd = tp->t_maxseg; ++again: ++ sendalot = 0; ++ off = tp->snd_nxt - tp->snd_una; ++ win = MIN(tp->snd_wnd, tp->snd_cwnd); ++ ++ flags = tcp_outflags[tp->t_state]; ++ ++ DEBUG_MISC(" --- tcp_output flags = 0x%x", flags); ++ ++ /* ++ * If in persist timeout with window of 0, send 1 byte. ++ * Otherwise, if window is small but nonzero ++ * and timer expired, we will send what we can ++ * and go to transmit state. ++ */ ++ if (tp->t_force) { ++ if (win == 0) { ++ /* ++ * If we still have some data to send, then ++ * clear the FIN bit. Usually this would ++ * happen below when it realizes that we ++ * aren't sending all the data. However, ++ * if we have exactly 1 byte of unset data, ++ * then it won't clear the FIN bit below, ++ * and if we are in persist state, we wind ++ * up sending the packet without recording ++ * that we sent the FIN bit. ++ * ++ * We can't just blindly clear the FIN bit, ++ * because if we don't have any more data ++ * to send then the probe will be the FIN ++ * itself. ++ */ ++ if (off < so->so_snd.sb_cc) ++ flags &= ~TH_FIN; ++ win = 1; ++ } else { ++ tp->t_timer[TCPT_PERSIST] = 0; ++ tp->t_rxtshift = 0; ++ } ++ } ++ ++ len = MIN(so->so_snd.sb_cc, win) - off; ++ ++ if (len < 0) { ++ /* ++ * If FIN has been sent but not acked, ++ * but we haven't been called to retransmit, ++ * len will be -1. Otherwise, window shrank ++ * after we sent into it. If window shrank to 0, ++ * cancel pending retransmit and pull snd_nxt ++ * back to (closed) window. We will enter persist ++ * state below. If the window didn't close completely, ++ * just wait for an ACK. ++ */ ++ len = 0; ++ if (win == 0) { ++ tp->t_timer[TCPT_REXMT] = 0; ++ tp->snd_nxt = tp->snd_una; ++ } ++ } ++ ++ if (len > tp->t_maxseg) { ++ len = tp->t_maxseg; ++ sendalot = 1; ++ } ++ if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) ++ flags &= ~TH_FIN; ++ ++ win = sbspace(&so->so_rcv); ++ ++ /* ++ * Sender silly window avoidance. If connection is idle ++ * and can send all data, a maximum segment, ++ * at least a maximum default-size segment do it, ++ * or are forced, do it; otherwise don't bother. ++ * If peer's buffer is tiny, then send ++ * when window is at least half open. ++ * If retransmitting (possibly after persist timer forced us ++ * to send into a small window), then must resend. ++ */ ++ if (len) { ++ if (len == tp->t_maxseg) ++ goto send; ++ if ((1 || idle || tp->t_flags & TF_NODELAY) && ++ len + off >= so->so_snd.sb_cc) ++ goto send; ++ if (tp->t_force) ++ goto send; ++ if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) ++ goto send; ++ if (SEQ_LT(tp->snd_nxt, tp->snd_max)) ++ goto send; ++ } ++ ++ /* ++ * Compare available window to amount of window ++ * known to peer (as advertised window less ++ * next expected input). If the difference is at least two ++ * max size segments, or at least 50% of the maximum possible ++ * window, then want to send a window update to peer. ++ */ ++ if (win > 0) { ++ /* ++ * "adv" is the amount we can increase the window, ++ * taking into account that we are limited by ++ * TCP_MAXWIN << tp->rcv_scale. ++ */ ++ long adv = MIN(win, (long)TCP_MAXWIN << tp->rcv_scale) - ++ (tp->rcv_adv - tp->rcv_nxt); ++ ++ if (adv >= (long)(2 * tp->t_maxseg)) ++ goto send; ++ if (2 * adv >= (long)so->so_rcv.sb_datalen) ++ goto send; ++ } ++ ++ /* ++ * Send if we owe peer an ACK. ++ */ ++ if (tp->t_flags & TF_ACKNOW) ++ goto send; ++ if (flags & (TH_SYN | TH_RST)) ++ goto send; ++ if (SEQ_GT(tp->snd_up, tp->snd_una)) ++ goto send; ++ /* ++ * If our state indicates that FIN should be sent ++ * and we have not yet done so, or we're retransmitting the FIN, ++ * then we need to send. ++ */ ++ if (flags & TH_FIN && ++ ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) ++ goto send; ++ ++ /* ++ * TCP window updates are not reliable, rather a polling protocol ++ * using ``persist'' packets is used to insure receipt of window ++ * updates. The three ``states'' for the output side are: ++ * idle not doing retransmits or persists ++ * persisting to move a small or zero window ++ * (re)transmitting and thereby not persisting ++ * ++ * tp->t_timer[TCPT_PERSIST] ++ * is set when we are in persist state. ++ * tp->t_force ++ * is set when we are called to send a persist packet. ++ * tp->t_timer[TCPT_REXMT] ++ * is set when we are retransmitting ++ * The output side is idle when both timers are zero. ++ * ++ * If send window is too small, there is data to transmit, and no ++ * retransmit or persist is pending, then go to persist state. ++ * If nothing happens soon, send when timer expires: ++ * if window is nonzero, transmit what we can, ++ * otherwise force out a byte. ++ */ ++ if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 && ++ tp->t_timer[TCPT_PERSIST] == 0) { ++ tp->t_rxtshift = 0; ++ tcp_setpersist(tp); ++ } ++ ++ /* ++ * No reason to send a segment, just return. ++ */ ++ return (0); ++ ++send: ++ /* ++ * Before ESTABLISHED, force sending of initial options ++ * unless TCP set not to do any options. ++ * NOTE: we assume that the IP/TCP header plus TCP options ++ * always fit in a single mbuf, leaving room for a maximum ++ * link header, i.e. ++ * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN ++ */ ++ optlen = 0; ++ hdrlen = sizeof(struct tcpiphdr); ++ if (flags & TH_SYN) { ++ tp->snd_nxt = tp->iss; ++ if ((tp->t_flags & TF_NOOPT) == 0) { ++ uint16_t mss; ++ ++ opt[0] = TCPOPT_MAXSEG; ++ opt[1] = 4; ++ mss = htons((uint16_t)tcp_mss(tp, 0)); ++ memcpy((char *)(opt + 2), (char *)&mss, sizeof(mss)); ++ optlen = 4; ++ } ++ } ++ ++ hdrlen += optlen; ++ ++ /* ++ * Adjust data length if insertion of options will ++ * bump the packet length beyond the t_maxseg length. ++ */ ++ if (len > tp->t_maxseg - optlen) { ++ len = tp->t_maxseg - optlen; ++ sendalot = 1; ++ } ++ ++ /* ++ * Grab a header mbuf, attaching a copy of data to ++ * be transmitted, and initialize the header from ++ * the template for sends on this connection. ++ */ ++ if (len) { ++ m = m_get(so->slirp); ++ if (m == NULL) { ++ error = 1; ++ goto out; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ m->m_len = hdrlen; ++ ++ sbcopy(&so->so_snd, off, (int)len, mtod(m, char *) + hdrlen); ++ m->m_len += len; ++ ++ /* ++ * If we're sending everything we've got, set PUSH. ++ * (This will keep happy those implementations which only ++ * give data to the user when a buffer fills or ++ * a PUSH comes in.) ++ */ ++ if (off + len == so->so_snd.sb_cc) ++ flags |= TH_PUSH; ++ } else { ++ m = m_get(so->slirp); ++ if (m == NULL) { ++ error = 1; ++ goto out; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ m->m_len = hdrlen; ++ } ++ ++ ti = mtod(m, struct tcpiphdr *); ++ ++ memcpy((char *)ti, &tp->t_template, sizeof(struct tcpiphdr)); ++ ++ /* ++ * Fill in fields, remembering maximum advertised ++ * window for use in delaying messages about window sizes. ++ * If resending a FIN, be sure not to use a new sequence number. ++ */ ++ if (flags & TH_FIN && tp->t_flags & TF_SENTFIN && ++ tp->snd_nxt == tp->snd_max) ++ tp->snd_nxt--; ++ /* ++ * If we are doing retransmissions, then snd_nxt will ++ * not reflect the first unsent octet. For ACK only ++ * packets, we do not want the sequence number of the ++ * retransmitted packet, we want the sequence number ++ * of the next unsent octet. So, if there is no data ++ * (and no SYN or FIN), use snd_max instead of snd_nxt ++ * when filling in ti_seq. But if we are in persist ++ * state, snd_max might reflect one byte beyond the ++ * right edge of the window, so use snd_nxt in that ++ * case, since we know we aren't doing a retransmission. ++ * (retransmit and persist are mutually exclusive...) ++ */ ++ if (len || (flags & (TH_SYN | TH_FIN)) || tp->t_timer[TCPT_PERSIST]) ++ ti->ti_seq = htonl(tp->snd_nxt); ++ else ++ ti->ti_seq = htonl(tp->snd_max); ++ ti->ti_ack = htonl(tp->rcv_nxt); ++ if (optlen) { ++ memcpy((char *)(ti + 1), (char *)opt, optlen); ++ ti->ti_off = (sizeof(struct tcphdr) + optlen) >> 2; ++ } ++ ti->ti_flags = flags; ++ /* ++ * Calculate receive window. Don't shrink window, ++ * but avoid silly window syndrome. ++ */ ++ if (win < (long)(so->so_rcv.sb_datalen / 4) && win < (long)tp->t_maxseg) ++ win = 0; ++ if (win > (long)TCP_MAXWIN << tp->rcv_scale) ++ win = (long)TCP_MAXWIN << tp->rcv_scale; ++ if (win < (long)(tp->rcv_adv - tp->rcv_nxt)) ++ win = (long)(tp->rcv_adv - tp->rcv_nxt); ++ ti->ti_win = htons((uint16_t)(win >> tp->rcv_scale)); ++ ++ if (SEQ_GT(tp->snd_up, tp->snd_una)) { ++ ti->ti_urp = htons((uint16_t)(tp->snd_up - ntohl(ti->ti_seq))); ++ ti->ti_flags |= TH_URG; ++ } else ++ /* ++ * If no urgent pointer to send, then we pull ++ * the urgent pointer to the left edge of the send window ++ * so that it doesn't drift into the send window on sequence ++ * number wraparound. ++ */ ++ tp->snd_up = tp->snd_una; /* drag it along */ ++ ++ /* ++ * Put TCP length in extended header, and then ++ * checksum extended header and data. ++ */ ++ if (len + optlen) ++ ti->ti_len = htons((uint16_t)(sizeof(struct tcphdr) + optlen + len)); ++ ti->ti_sum = cksum(m, (int)(hdrlen + len)); ++ ++ /* ++ * In transmit state, time the transmission and arrange for ++ * the retransmit. In persist state, just set snd_max. ++ */ ++ if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) { ++ tcp_seq startseq = tp->snd_nxt; ++ ++ /* ++ * Advance snd_nxt over sequence space of this segment. ++ */ ++ if (flags & (TH_SYN | TH_FIN)) { ++ if (flags & TH_SYN) ++ tp->snd_nxt++; ++ if (flags & TH_FIN) { ++ tp->snd_nxt++; ++ tp->t_flags |= TF_SENTFIN; ++ } ++ } ++ tp->snd_nxt += len; ++ if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { ++ tp->snd_max = tp->snd_nxt; ++ /* ++ * Time this transmission if not a retransmission and ++ * not currently timing anything. ++ */ ++ if (tp->t_rtt == 0) { ++ tp->t_rtt = 1; ++ tp->t_rtseq = startseq; ++ } ++ } ++ ++ /* ++ * Set retransmit timer if not currently set, ++ * and not doing an ack or a keep-alive probe. ++ * Initial value for retransmit timer is smoothed ++ * round-trip time + 2 * round-trip time variance. ++ * Initialize shift counter which is used for backoff ++ * of retransmit time. ++ */ ++ if (tp->t_timer[TCPT_REXMT] == 0 && tp->snd_nxt != tp->snd_una) { ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ if (tp->t_timer[TCPT_PERSIST]) { ++ tp->t_timer[TCPT_PERSIST] = 0; ++ tp->t_rxtshift = 0; ++ } ++ } ++ } else if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) ++ tp->snd_max = tp->snd_nxt + len; ++ ++ /* ++ * Fill in IP length and desired time to live and ++ * send to IP level. There should be a better way ++ * to handle ttl and tos; we could keep them in ++ * the template, but need a way to checksum without them. ++ */ ++ m->m_len = hdrlen + len; /* XXX Needed? m_len should be correct */ ++ tcpiph_save = *mtod(m, struct tcpiphdr *); ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ m->m_data += ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ m->m_len -= ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ ip = mtod(m, struct ip *); ++ ++ ip->ip_len = m->m_len; ++ ip->ip_dst = tcpiph_save.ti_dst; ++ ip->ip_src = tcpiph_save.ti_src; ++ ip->ip_p = tcpiph_save.ti_pr; ++ ++ ip->ip_ttl = IPDEFTTL; ++ ip->ip_tos = so->so_iptos; ++ error = ip_output(so, m); ++ break; ++ ++ case AF_INET6: ++ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ ip6 = mtod(m, struct ip6 *); ++ ++ ip6->ip_pl = tcpiph_save.ti_len; ++ ip6->ip_dst = tcpiph_save.ti_dst6; ++ ip6->ip_src = tcpiph_save.ti_src6; ++ ip6->ip_nh = tcpiph_save.ti_nh6; ++ ++ error = ip6_output(so, m, 0); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ if (error) { ++ out: ++ return (error); ++ } ++ ++ /* ++ * Data sent (as far as we can tell). ++ * If this advertises a larger window than any other segment, ++ * then remember the size of the advertised window. ++ * Any pending ACK has now been sent. ++ */ ++ if (win > 0 && SEQ_GT(tp->rcv_nxt + win, tp->rcv_adv)) ++ tp->rcv_adv = tp->rcv_nxt + win; ++ tp->last_ack_sent = tp->rcv_nxt; ++ tp->t_flags &= ~(TF_ACKNOW | TF_DELACK); ++ if (sendalot) ++ goto again; ++ ++ return (0); ++} ++ ++void tcp_setpersist(struct tcpcb *tp) ++{ ++ int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; ++ ++ /* ++ * Start/restart persistence timer. ++ */ ++ TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], t * tcp_backoff[tp->t_rxtshift], ++ TCPTV_PERSMIN, TCPTV_PERSMAX); ++ if (tp->t_rxtshift < TCP_MAXRXTSHIFT) ++ tp->t_rxtshift++; ++} +diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c +new file mode 100644 +index 0000000000..600cfa1456 +--- /dev/null ++++ b/slirp/src/tcp_subr.c +@@ -0,0 +1,1011 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_subr.c 8.1 (Berkeley) 6/10/93 ++ * tcp_subr.c,v 1.5 1994/10/08 22:39:58 phk Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++/* patchable/settable parameters for tcp */ ++/* Don't do rfc1323 performance enhancements */ ++#define TCP_DO_RFC1323 0 ++ ++/* ++ * Tcp initialization ++ */ ++void tcp_init(Slirp *slirp) ++{ ++ slirp->tcp_iss = 1; /* wrong */ ++ slirp->tcb.so_next = slirp->tcb.so_prev = &slirp->tcb; ++ slirp->tcp_last_so = &slirp->tcb; ++} ++ ++void tcp_cleanup(Slirp *slirp) ++{ ++ while (slirp->tcb.so_next != &slirp->tcb) { ++ tcp_close(sototcpcb(slirp->tcb.so_next)); ++ } ++} ++ ++/* ++ * Create template to be used to send tcp packets on a connection. ++ * Call after host entry created, fills ++ * in a skeletal tcp/ip header, minimizing the amount of work ++ * necessary when the connection is used. ++ */ ++void tcp_template(struct tcpcb *tp) ++{ ++ struct socket *so = tp->t_socket; ++ register struct tcpiphdr *n = &tp->t_template; ++ ++ n->ti_mbuf = NULL; ++ memset(&n->ti, 0, sizeof(n->ti)); ++ n->ti_x0 = 0; ++ switch (so->so_ffamily) { ++ case AF_INET: ++ n->ti_pr = IPPROTO_TCP; ++ n->ti_len = htons(sizeof(struct tcphdr)); ++ n->ti_src = so->so_faddr; ++ n->ti_dst = so->so_laddr; ++ n->ti_sport = so->so_fport; ++ n->ti_dport = so->so_lport; ++ break; ++ ++ case AF_INET6: ++ n->ti_nh6 = IPPROTO_TCP; ++ n->ti_len = htons(sizeof(struct tcphdr)); ++ n->ti_src6 = so->so_faddr6; ++ n->ti_dst6 = so->so_laddr6; ++ n->ti_sport = so->so_fport6; ++ n->ti_dport = so->so_lport6; ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ n->ti_seq = 0; ++ n->ti_ack = 0; ++ n->ti_x2 = 0; ++ n->ti_off = 5; ++ n->ti_flags = 0; ++ n->ti_win = 0; ++ n->ti_sum = 0; ++ n->ti_urp = 0; ++} ++ ++/* ++ * Send a single message to the TCP at address specified by ++ * the given TCP/IP header. If m == 0, then we make a copy ++ * of the tcpiphdr at ti and send directly to the addressed host. ++ * This is used to force keep alive messages out using the TCP ++ * template for a connection tp->t_template. If flags are given ++ * then we send a message back to the TCP which originated the ++ * segment ti, and discard the mbuf containing it and any other ++ * attached mbufs. ++ * ++ * In any case the ack and sequence number of the transmitted ++ * segment are as specified by the parameters. ++ */ ++void tcp_respond(struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m, ++ tcp_seq ack, tcp_seq seq, int flags, unsigned short af) ++{ ++ register int tlen; ++ int win = 0; ++ ++ DEBUG_CALL("tcp_respond"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("ti = %p", ti); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("ack = %u", ack); ++ DEBUG_ARG("seq = %u", seq); ++ DEBUG_ARG("flags = %x", flags); ++ ++ if (tp) ++ win = sbspace(&tp->t_socket->so_rcv); ++ if (m == NULL) { ++ if (!tp || (m = m_get(tp->t_socket->slirp)) == NULL) ++ return; ++ tlen = 0; ++ m->m_data += IF_MAXLINKHDR; ++ *mtod(m, struct tcpiphdr *) = *ti; ++ ti = mtod(m, struct tcpiphdr *); ++ switch (af) { ++ case AF_INET: ++ ti->ti.ti_i4.ih_x1 = 0; ++ break; ++ case AF_INET6: ++ ti->ti.ti_i6.ih_x1 = 0; ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ flags = TH_ACK; ++ } else { ++ /* ++ * ti points into m so the next line is just making ++ * the mbuf point to ti ++ */ ++ m->m_data = (char *)ti; ++ ++ m->m_len = sizeof(struct tcpiphdr); ++ tlen = 0; ++#define xchg(a, b, type) \ ++ { \ ++ type t; \ ++ t = a; \ ++ a = b; \ ++ b = t; \ ++ } ++ switch (af) { ++ case AF_INET: ++ xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, uint32_t); ++ xchg(ti->ti_dport, ti->ti_sport, uint16_t); ++ break; ++ case AF_INET6: ++ xchg(ti->ti_dst6, ti->ti_src6, struct in6_addr); ++ xchg(ti->ti_dport, ti->ti_sport, uint16_t); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++#undef xchg ++ } ++ ti->ti_len = htons((uint16_t)(sizeof(struct tcphdr) + tlen)); ++ tlen += sizeof(struct tcpiphdr); ++ m->m_len = tlen; ++ ++ ti->ti_mbuf = NULL; ++ ti->ti_x0 = 0; ++ ti->ti_seq = htonl(seq); ++ ti->ti_ack = htonl(ack); ++ ti->ti_x2 = 0; ++ ti->ti_off = sizeof(struct tcphdr) >> 2; ++ ti->ti_flags = flags; ++ if (tp) ++ ti->ti_win = htons((uint16_t)(win >> tp->rcv_scale)); ++ else ++ ti->ti_win = htons((uint16_t)win); ++ ti->ti_urp = 0; ++ ti->ti_sum = 0; ++ ti->ti_sum = cksum(m, tlen); ++ ++ struct tcpiphdr tcpiph_save = *(mtod(m, struct tcpiphdr *)); ++ struct ip *ip; ++ struct ip6 *ip6; ++ ++ switch (af) { ++ case AF_INET: ++ m->m_data += ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ m->m_len -= ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ ip = mtod(m, struct ip *); ++ ip->ip_len = m->m_len; ++ ip->ip_dst = tcpiph_save.ti_dst; ++ ip->ip_src = tcpiph_save.ti_src; ++ ip->ip_p = tcpiph_save.ti_pr; ++ ++ if (flags & TH_RST) { ++ ip->ip_ttl = MAXTTL; ++ } else { ++ ip->ip_ttl = IPDEFTTL; ++ } ++ ++ ip_output(NULL, m); ++ break; ++ ++ case AF_INET6: ++ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ ip6 = mtod(m, struct ip6 *); ++ ip6->ip_pl = tcpiph_save.ti_len; ++ ip6->ip_dst = tcpiph_save.ti_dst6; ++ ip6->ip_src = tcpiph_save.ti_src6; ++ ip6->ip_nh = tcpiph_save.ti_nh6; ++ ++ ip6_output(NULL, m, 0); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++} ++ ++/* ++ * Create a new TCP control block, making an ++ * empty reassembly queue and hooking it to the argument ++ * protocol control block. ++ */ ++struct tcpcb *tcp_newtcpcb(struct socket *so) ++{ ++ register struct tcpcb *tp; ++ ++ tp = g_new0(struct tcpcb, 1); ++ tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp; ++ /* ++ * 40: length of IPv4 header (20) + TCP header (20) ++ * 60: length of IPv6 header (40) + TCP header (20) ++ */ ++ tp->t_maxseg = ++ MIN(so->slirp->if_mtu - ((so->so_ffamily == AF_INET) ? 40 : 60), ++ TCP_MAXSEG_MAX); ++ ++ tp->t_flags = TCP_DO_RFC1323 ? (TF_REQ_SCALE | TF_REQ_TSTMP) : 0; ++ tp->t_socket = so; ++ ++ /* ++ * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no ++ * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives ++ * reasonable initial retransmit time. ++ */ ++ tp->t_srtt = TCPTV_SRTTBASE; ++ tp->t_rttvar = TCPTV_SRTTDFLT << 2; ++ tp->t_rttmin = TCPTV_MIN; ++ ++ TCPT_RANGESET(tp->t_rxtcur, ++ ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1, ++ TCPTV_MIN, TCPTV_REXMTMAX); ++ ++ tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; ++ tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; ++ tp->t_state = TCPS_CLOSED; ++ ++ so->so_tcpcb = tp; ++ ++ return (tp); ++} ++ ++/* ++ * Drop a TCP connection, reporting ++ * the specified error. If connection is synchronized, ++ * then send a RST to peer. ++ */ ++struct tcpcb *tcp_drop(struct tcpcb *tp, int err) ++{ ++ DEBUG_CALL("tcp_drop"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("errno = %d", errno); ++ ++ if (TCPS_HAVERCVDSYN(tp->t_state)) { ++ tp->t_state = TCPS_CLOSED; ++ tcp_output(tp); ++ } ++ return (tcp_close(tp)); ++} ++ ++/* ++ * Close a TCP control block: ++ * discard all space held by the tcp ++ * discard internet protocol block ++ * wake up any sleepers ++ */ ++struct tcpcb *tcp_close(struct tcpcb *tp) ++{ ++ register struct tcpiphdr *t; ++ struct socket *so = tp->t_socket; ++ Slirp *slirp = so->slirp; ++ register struct mbuf *m; ++ ++ DEBUG_CALL("tcp_close"); ++ DEBUG_ARG("tp = %p", tp); ++ ++ /* free the reassembly queue, if any */ ++ t = tcpfrag_list_first(tp); ++ while (!tcpfrag_list_end(t, tp)) { ++ t = tcpiphdr_next(t); ++ m = tcpiphdr_prev(t)->ti_mbuf; ++ remque(tcpiphdr2qlink(tcpiphdr_prev(t))); ++ m_free(m); ++ } ++ g_free(tp); ++ so->so_tcpcb = NULL; ++ /* clobber input socket cache if we're closing the cached connection */ ++ if (so == slirp->tcp_last_so) ++ slirp->tcp_last_so = &slirp->tcb; ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sbfree(&so->so_rcv); ++ sbfree(&so->so_snd); ++ sofree(so); ++ return ((struct tcpcb *)0); ++} ++ ++/* ++ * TCP protocol interface to socket abstraction. ++ */ ++ ++/* ++ * User issued close, and wish to trail through shutdown states: ++ * if never received SYN, just forget it. If got a SYN from peer, ++ * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. ++ * If already got a FIN from peer, then almost done; go to LAST_ACK ++ * state. In all other cases, have already sent FIN to peer (e.g. ++ * after PRU_SHUTDOWN), and just have to play tedious game waiting ++ * for peer to send FIN or not respond to keep-alives, etc. ++ * We can let the user exit from the close as soon as the FIN is acked. ++ */ ++void tcp_sockclosed(struct tcpcb *tp) ++{ ++ DEBUG_CALL("tcp_sockclosed"); ++ DEBUG_ARG("tp = %p", tp); ++ ++ if (!tp) { ++ return; ++ } ++ ++ switch (tp->t_state) { ++ case TCPS_CLOSED: ++ case TCPS_LISTEN: ++ case TCPS_SYN_SENT: ++ tp->t_state = TCPS_CLOSED; ++ tcp_close(tp); ++ return; ++ ++ case TCPS_SYN_RECEIVED: ++ case TCPS_ESTABLISHED: ++ tp->t_state = TCPS_FIN_WAIT_1; ++ break; ++ ++ case TCPS_CLOSE_WAIT: ++ tp->t_state = TCPS_LAST_ACK; ++ break; ++ } ++ tcp_output(tp); ++} ++ ++/* ++ * Connect to a host on the Internet ++ * Called by tcp_input ++ * Only do a connect, the tcp fields will be set in tcp_input ++ * return 0 if there's a result of the connect, ++ * else return -1 means we're still connecting ++ * The return value is almost always -1 since the socket is ++ * nonblocking. Connect returns after the SYN is sent, and does ++ * not wait for ACK+SYN. ++ */ ++int tcp_fconnect(struct socket *so, unsigned short af) ++{ ++ int ret = 0; ++ ++ DEBUG_CALL("tcp_fconnect"); ++ DEBUG_ARG("so = %p", so); ++ ++ ret = so->s = slirp_socket(af, SOCK_STREAM, 0); ++ if (ret >= 0) { ++ ret = slirp_bind_outbound(so, af); ++ if (ret < 0) { ++ // bind failed - close socket ++ closesocket(so->s); ++ so->s = -1; ++ return (ret); ++ } ++ } ++ ++ if (ret >= 0) { ++ int opt, s = so->s; ++ struct sockaddr_storage addr; ++ ++ slirp_set_nonblock(s); ++ so->slirp->cb->register_poll_fd(s, so->slirp->opaque); ++ slirp_socket_set_fast_reuse(s); ++ opt = 1; ++ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(opt)); ++ opt = 1; ++ setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &opt, sizeof(opt)); ++ ++ addr = so->fhost.ss; ++ DEBUG_CALL(" connect()ing"); ++ if (sotranslate_out(so, &addr) < 0) { ++ return -1; ++ } ++ ++ /* We don't care what port we get */ ++ ret = connect(s, (struct sockaddr *)&addr, sockaddr_size(&addr)); ++ ++ /* ++ * If it's not in progress, it failed, so we just return 0, ++ * without clearing SS_NOFDREF ++ */ ++ soisfconnecting(so); ++ } ++ ++ return (ret); ++} ++ ++/* ++ * Accept the socket and connect to the local-host ++ * ++ * We have a problem. The correct thing to do would be ++ * to first connect to the local-host, and only if the ++ * connection is accepted, then do an accept() here. ++ * But, a) we need to know who's trying to connect ++ * to the socket to be able to SYN the local-host, and ++ * b) we are already connected to the foreign host by ++ * the time it gets to accept(), so... We simply accept ++ * here and SYN the local-host. ++ */ ++void tcp_connect(struct socket *inso) ++{ ++ Slirp *slirp = inso->slirp; ++ struct socket *so; ++ struct sockaddr_storage addr; ++ socklen_t addrlen = sizeof(struct sockaddr_storage); ++ struct tcpcb *tp; ++ int s, opt, ret; ++ /* AF_INET6 addresses are bigger than AF_INET, so this is big enough. */ ++ char addrstr[INET6_ADDRSTRLEN]; ++ char portstr[6]; ++ ++ DEBUG_CALL("tcp_connect"); ++ DEBUG_ARG("inso = %p", inso); ++ ret = getnameinfo((const struct sockaddr *) &inso->lhost.ss, sizeof(inso->lhost.ss), addrstr, sizeof(addrstr), portstr, sizeof(portstr), NI_NUMERICHOST|NI_NUMERICSERV); ++ g_assert(ret == 0); ++ DEBUG_ARG("ip = [%s]:%s", addrstr, portstr); ++ DEBUG_ARG("so_state = 0x%x", inso->so_state); ++ ++ /* Perform lazy guest IP address resolution if needed. */ ++ if (inso->so_state & SS_HOSTFWD) { ++ /* ++ * We can only reject the connection request by accepting it and ++ * then immediately closing it. Note that SS_FACCEPTONCE sockets can't ++ * get here. ++ */ ++ if (soassign_guest_addr_if_needed(inso) < 0) { ++ /* ++ * Guest address isn't available yet. We could either try to defer ++ * completing this connection request until the guest address is ++ * available, or punt. It's easier to punt. Otherwise we need to ++ * complicate the mechanism by which we're called to defer calling ++ * us again until the guest address is available. ++ */ ++ DEBUG_MISC(" guest address not available yet"); ++ s = accept(inso->s, (struct sockaddr *)&addr, &addrlen); ++ if (s >= 0) { ++ close(s); ++ } ++ return; ++ } ++ } ++ ++ /* ++ * If it's an SS_ACCEPTONCE socket, no need to socreate() ++ * another socket, just use the accept() socket. ++ */ ++ if (inso->so_state & SS_FACCEPTONCE) { ++ /* FACCEPTONCE already have a tcpcb */ ++ so = inso; ++ } else { ++ so = socreate(slirp); ++ tcp_attach(so); ++ so->lhost = inso->lhost; ++ so->so_ffamily = inso->so_ffamily; ++ } ++ ++ tcp_mss(sototcpcb(so), 0); ++ ++ s = accept(inso->s, (struct sockaddr *)&addr, &addrlen); ++ if (s < 0) { ++ tcp_close(sototcpcb(so)); /* This will sofree() as well */ ++ return; ++ } ++ slirp_set_nonblock(s); ++ so->slirp->cb->register_poll_fd(s, so->slirp->opaque); ++ slirp_socket_set_fast_reuse(s); ++ opt = 1; ++ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); ++ slirp_socket_set_nodelay(s); ++ ++ so->fhost.ss = addr; ++ sotranslate_accept(so); ++ ++ /* Close the accept() socket, set right state */ ++ if (inso->so_state & SS_FACCEPTONCE) { ++ /* If we only accept once, close the accept() socket */ ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ ++ /* Don't select it yet, even though we have an FD */ ++ /* if it's not FACCEPTONCE, it's already NOFDREF */ ++ so->so_state = SS_NOFDREF; ++ } ++ so->s = s; ++ so->so_state |= SS_INCOMING; ++ ++ so->so_iptos = tcp_tos(so); ++ tp = sototcpcb(so); ++ ++ tcp_template(tp); ++ ++ tp->t_state = TCPS_SYN_SENT; ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; ++ tp->iss = slirp->tcp_iss; ++ slirp->tcp_iss += TCP_ISSINCR / 2; ++ tcp_sendseqinit(tp); ++ tcp_output(tp); ++} ++ ++/* ++ * Attach a TCPCB to a socket. ++ */ ++void tcp_attach(struct socket *so) ++{ ++ so->so_tcpcb = tcp_newtcpcb(so); ++ insque(so, &so->slirp->tcb); ++} ++ ++/* ++ * Set the socket's type of service field ++ */ ++static const struct tos_t tcptos[] = { ++ { 0, 20, IPTOS_THROUGHPUT, 0 }, /* ftp data */ ++ { 21, 21, IPTOS_LOWDELAY, EMU_FTP }, /* ftp control */ ++ { 0, 23, IPTOS_LOWDELAY, 0 }, /* telnet */ ++ { 0, 80, IPTOS_THROUGHPUT, 0 }, /* WWW */ ++ { 0, 513, IPTOS_LOWDELAY, EMU_RLOGIN | EMU_NOCONNECT }, /* rlogin */ ++ { 0, 544, IPTOS_LOWDELAY, EMU_KSH }, /* kshell */ ++ { 0, 543, IPTOS_LOWDELAY, 0 }, /* klogin */ ++ { 0, 6667, IPTOS_THROUGHPUT, EMU_IRC }, /* IRC */ ++ { 0, 6668, IPTOS_THROUGHPUT, EMU_IRC }, /* IRC undernet */ ++ { 0, 7070, IPTOS_LOWDELAY, EMU_REALAUDIO }, /* RealAudio control */ ++ { 0, 113, IPTOS_LOWDELAY, EMU_IDENT }, /* identd protocol */ ++ { 0, 0, 0, 0 } ++}; ++ ++/* ++ * Return TOS according to the above table ++ */ ++uint8_t tcp_tos(struct socket *so) ++{ ++ int i = 0; ++ ++ while (tcptos[i].tos) { ++ if ((tcptos[i].fport && (ntohs(so->so_fport) == tcptos[i].fport)) || ++ (tcptos[i].lport && (ntohs(so->so_lport) == tcptos[i].lport))) { ++ if (so->slirp->enable_emu) ++ so->so_emu = tcptos[i].emu; ++ return tcptos[i].tos; ++ } ++ i++; ++ } ++ return 0; ++} ++ ++/* ++ * Emulate programs that try and connect to us ++ * This includes ftp (the data connection is ++ * initiated by the server) and IRC (DCC CHAT and ++ * DCC SEND) for now ++ * ++ * NOTE: It's possible to crash SLiRP by sending it ++ * unstandard strings to emulate... if this is a problem, ++ * more checks are needed here ++ * ++ * XXX Assumes the whole command came in one packet ++ * XXX If there is more than one command in the packet, the others may ++ * be truncated. ++ * XXX If the command is too long, it may be truncated. ++ * ++ * XXX Some ftp clients will have their TOS set to ++ * LOWDELAY and so Nagel will kick in. Because of this, ++ * we'll get the first letter, followed by the rest, so ++ * we simply scan for ORT instead of PORT... ++ * DCC doesn't have this problem because there's other stuff ++ * in the packet before the DCC command. ++ * ++ * Return 1 if the mbuf m is still valid and should be ++ * sbappend()ed ++ * ++ * NOTE: if you return 0 you MUST m_free() the mbuf! ++ */ ++int tcp_emu(struct socket *so, struct mbuf *m) ++{ ++ Slirp *slirp = so->slirp; ++ unsigned n1, n2, n3, n4, n5, n6; ++ char buff[257]; ++ uint32_t laddr; ++ unsigned lport; ++ char *bptr; ++ ++ DEBUG_CALL("tcp_emu"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ switch (so->so_emu) { ++ int x, i; ++ ++ /* TODO: IPv6 */ ++ case EMU_IDENT: ++ /* ++ * Identification protocol as per rfc-1413 ++ */ ++ ++ { ++ struct socket *tmpso; ++ struct sockaddr_in addr; ++ socklen_t addrlen = sizeof(struct sockaddr_in); ++ char *eol = g_strstr_len(m->m_data, m->m_len, "\r\n"); ++ ++ if (!eol) { ++ return 1; ++ } ++ ++ *eol = '\0'; ++ if (sscanf(m->m_data, "%u%*[ ,]%u", &n1, &n2) == 2) { ++ HTONS(n1); ++ HTONS(n2); ++ /* n2 is the one on our host */ ++ for (tmpso = slirp->tcb.so_next; tmpso != &slirp->tcb; ++ tmpso = tmpso->so_next) { ++ if (tmpso->so_laddr.s_addr == so->so_laddr.s_addr && ++ tmpso->so_lport == n2 && ++ tmpso->so_faddr.s_addr == so->so_faddr.s_addr && ++ tmpso->so_fport == n1) { ++ if (getsockname(tmpso->s, (struct sockaddr *)&addr, ++ &addrlen) == 0) ++ n2 = addr.sin_port; ++ break; ++ } ++ } ++ NTOHS(n1); ++ NTOHS(n2); ++ m_inc(m, g_snprintf(NULL, 0, "%d,%d\r\n", n1, n2) + 1); ++ m->m_len = slirp_fmt(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); ++ } else { ++ *eol = '\r'; ++ } ++ ++ return 1; ++ } ++ ++ case EMU_FTP: /* ftp */ ++ m_inc(m, m->m_len + 1); ++ *(m->m_data + m->m_len) = 0; /* NUL terminate for strstr */ ++ if ((bptr = (char *)strstr(m->m_data, "ORT")) != NULL) { ++ /* ++ * Need to emulate the PORT command ++ */ ++ x = sscanf(bptr, "ORT %u,%u,%u,%u,%u,%u\r\n%256[^\177]", &n1, &n2, ++ &n3, &n4, &n5, &n6, buff); ++ if (x < 6) ++ return 1; ++ ++ laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); ++ lport = htons((n5 << 8) | (n6)); ++ ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, laddr, lport, ++ SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ n6 = ntohs(so->so_fport); ++ ++ n5 = (n6 >> 8) & 0xff; ++ n6 &= 0xff; ++ ++ laddr = ntohl(so->so_faddr.s_addr); ++ ++ n1 = ((laddr >> 24) & 0xff); ++ n2 = ((laddr >> 16) & 0xff); ++ n3 = ((laddr >> 8) & 0xff); ++ n4 = (laddr & 0xff); ++ ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "ORT %d,%d,%d,%d,%d,%d\r\n%s", ++ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); ++ return 1; ++ } else if ((bptr = (char *)strstr(m->m_data, "27 Entering")) != NULL) { ++ /* ++ * Need to emulate the PASV response ++ */ ++ x = sscanf( ++ bptr, ++ "27 Entering Passive Mode (%u,%u,%u,%u,%u,%u)\r\n%256[^\177]", ++ &n1, &n2, &n3, &n4, &n5, &n6, buff); ++ if (x < 6) ++ return 1; ++ ++ laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); ++ lport = htons((n5 << 8) | (n6)); ++ ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, laddr, lport, ++ SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ n6 = ntohs(so->so_fport); ++ ++ n5 = (n6 >> 8) & 0xff; ++ n6 &= 0xff; ++ ++ laddr = ntohl(so->so_faddr.s_addr); ++ ++ n1 = ((laddr >> 24) & 0xff); ++ n2 = ((laddr >> 16) & 0xff); ++ n3 = ((laddr >> 8) & 0xff); ++ n4 = (laddr & 0xff); ++ ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", ++ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); ++ return 1; ++ } ++ ++ return 1; ++ ++ case EMU_KSH: ++ /* ++ * The kshell (Kerberos rsh) and shell services both pass ++ * a local port port number to carry signals to the server ++ * and stderr to the client. It is passed at the beginning ++ * of the connection as a NUL-terminated decimal ASCII string. ++ */ ++ so->so_emu = 0; ++ for (lport = 0, i = 0; i < m->m_len - 1; ++i) { ++ if (m->m_data[i] < '0' || m->m_data[i] > '9') ++ return 1; /* invalid number */ ++ lport *= 10; ++ lport += m->m_data[i] - '0'; ++ } ++ if (m->m_data[m->m_len - 1] == '\0' && lport != 0 && ++ (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, ++ htons(lport), SS_FACCEPTONCE)) != NULL) ++ m->m_len = slirp_fmt0(m->m_data, M_ROOM(m), ++ "%d", ntohs(so->so_fport)); ++ return 1; ++ ++ case EMU_IRC: ++ /* ++ * Need to emulate DCC CHAT, DCC SEND and DCC MOVE ++ */ ++ m_inc(m, m->m_len + 1); ++ *(m->m_data + m->m_len) = 0; /* NULL terminate the string for strstr */ ++ if ((bptr = (char *)strstr(m->m_data, "DCC")) == NULL) ++ return 1; ++ ++ /* The %256s is for the broken mIRC */ ++ if (sscanf(bptr, "DCC CHAT %256s %u %u", buff, &laddr, &lport) == 3) { ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), ++ htons(lport), SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC CHAT chat %lu %u%c\n", ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), 1); ++ } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, ++ &n1) == 4) { ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), ++ htons(lport), SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC SEND %s %lu %u %u%c\n", buff, ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), n1, 1); ++ } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, ++ &n1) == 4) { ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), ++ htons(lport), SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC MOVE %s %lu %u %u%c\n", buff, ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), n1, 1); ++ } ++ return 1; ++ ++ case EMU_REALAUDIO: ++ /* ++ * RealAudio emulation - JP. We must try to parse the incoming ++ * data and try to find the two characters that contain the ++ * port number. Then we redirect an udp port and replace the ++ * number with the real port we got. ++ * ++ * The 1.0 beta versions of the player are not supported ++ * any more. ++ * ++ * A typical packet for player version 1.0 (release version): ++ * ++ * 0000:50 4E 41 00 05 ++ * 0000:00 01 00 02 1B D7 00 00 67 E6 6C DC 63 00 12 50 ........g.l.c..P ++ * 0010:4E 43 4C 49 45 4E 54 20 31 30 31 20 41 4C 50 48 NCLIENT 101 ALPH ++ * 0020:41 6C 00 00 52 00 17 72 61 66 69 6C 65 73 2F 76 Al..R..rafiles/v ++ * 0030:6F 61 2F 65 6E 67 6C 69 73 68 5F 2E 72 61 79 42 oa/english_.rayB ++ * ++ * Now the port number 0x1BD7 is found at offset 0x04 of the ++ * Now the port number 0x1BD7 is found at offset 0x04 of the ++ * second packet. This time we received five bytes first and ++ * then the rest. You never know how many bytes you get. ++ * ++ * A typical packet for player version 2.0 (beta): ++ * ++ * 0000:50 4E 41 00 06 00 02 00 00 00 01 00 02 1B C1 00 PNA............. ++ * 0010:00 67 75 78 F5 63 00 0A 57 69 6E 32 2E 30 2E 30 .gux.c..Win2.0.0 ++ * 0020:2E 35 6C 00 00 52 00 1C 72 61 66 69 6C 65 73 2F .5l..R..rafiles/ ++ * 0030:77 65 62 73 69 74 65 2F 32 30 72 65 6C 65 61 73 website/20releas ++ * 0040:65 2E 72 61 79 53 00 00 06 36 42 e.rayS...6B ++ * ++ * Port number 0x1BC1 is found at offset 0x0d. ++ * ++ * This is just a horrible switch statement. Variable ra tells ++ * us where we're going. ++ */ ++ ++ bptr = m->m_data; ++ while (bptr < m->m_data + m->m_len) { ++ uint16_t p; ++ static int ra = 0; ++ char ra_tbl[4]; ++ ++ ra_tbl[0] = 0x50; ++ ra_tbl[1] = 0x4e; ++ ra_tbl[2] = 0x41; ++ ra_tbl[3] = 0; ++ ++ switch (ra) { ++ case 0: ++ case 2: ++ case 3: ++ if (*bptr++ != ra_tbl[ra]) { ++ ra = 0; ++ continue; ++ } ++ break; ++ ++ case 1: ++ /* ++ * We may get 0x50 several times, ignore them ++ */ ++ if (*bptr == 0x50) { ++ ra = 1; ++ bptr++; ++ continue; ++ } else if (*bptr++ != ra_tbl[ra]) { ++ ra = 0; ++ continue; ++ } ++ break; ++ ++ case 4: ++ /* ++ * skip version number ++ */ ++ bptr++; ++ break; ++ ++ case 5: ++ if (bptr == m->m_data + m->m_len - 1) ++ return 1; /* We need two bytes */ ++ ++ /* ++ * The difference between versions 1.0 and ++ * 2.0 is here. For future versions of ++ * the player this may need to be modified. ++ */ ++ if (*(bptr + 1) == 0x02) ++ bptr += 8; ++ else ++ bptr += 4; ++ break; ++ ++ case 6: ++ /* This is the field containing the port ++ * number that RA-player is listening to. ++ */ ++ ++ if (bptr == m->m_data + m->m_len - 1) ++ return 1; /* We need two bytes */ ++ ++ lport = (((uint8_t *)bptr)[0] << 8) + ((uint8_t *)bptr)[1]; ++ if (lport < 6970) ++ lport += 256; /* don't know why */ ++ if (lport < 6970 || lport > 7170) ++ return 1; /* failed */ ++ ++ /* try to get udp port between 6970 - 7170 */ ++ for (p = 6970; p < 7071; p++) { ++ if (udp_listen(slirp, INADDR_ANY, htons(p), ++ so->so_laddr.s_addr, htons(lport), ++ SS_FACCEPTONCE)) { ++ break; ++ } ++ } ++ if (p == 7071) ++ p = 0; ++ *(uint8_t *)bptr++ = (p >> 8) & 0xff; ++ *(uint8_t *)bptr = p & 0xff; ++ ra = 0; ++ return 1; /* port redirected, we're done */ ++ break; ++ ++ default: ++ ra = 0; ++ } ++ ra++; ++ } ++ return 1; ++ ++ default: ++ /* Ooops, not emulated, won't call tcp_emu again */ ++ so->so_emu = 0; ++ return 1; ++ } ++} ++ ++/* ++ * Do misc. config of SLiRP while its running. ++ * Return 0 if this connections is to be closed, 1 otherwise, ++ * return 2 if this is a command-line connection ++ */ ++int tcp_ctl(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ struct sbuf *sb = &so->so_snd; ++ struct gfwd_list *ex_ptr; ++ ++ DEBUG_CALL("tcp_ctl"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* TODO: IPv6 */ ++ if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { ++ /* Check if it's pty_exec */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_fport == so->so_fport && ++ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { ++ if (ex_ptr->write_cb) { ++ so->s = -1; ++ so->guestfwd = ex_ptr; ++ return 1; ++ } ++ DEBUG_MISC(" executing %s", ex_ptr->ex_exec); ++ if (ex_ptr->ex_unix) ++ return open_unix(so, ex_ptr->ex_unix); ++ else ++ return fork_exec(so, ex_ptr->ex_exec); ++ } ++ } ++ } ++ sb->sb_cc = slirp_fmt(sb->sb_wptr, sb->sb_datalen - (sb->sb_wptr - sb->sb_data), ++ "Error: No application configured.\r\n"); ++ sb->sb_wptr += sb->sb_cc; ++ return 0; ++} +diff --git a/slirp/src/tcp_timer.c b/slirp/src/tcp_timer.c +new file mode 100644 +index 0000000000..bc4db2d15e +--- /dev/null ++++ b/slirp/src/tcp_timer.c +@@ -0,0 +1,286 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_timer.c 8.1 (Berkeley) 6/10/93 ++ * tcp_timer.c,v 1.2 1994/08/02 07:49:10 davidg Exp ++ */ ++ ++#include "slirp.h" ++ ++static struct tcpcb *tcp_timers(register struct tcpcb *tp, int timer); ++ ++/* ++ * Fast timeout routine for processing delayed acks ++ */ ++void tcp_fasttimo(Slirp *slirp) ++{ ++ register struct socket *so; ++ register struct tcpcb *tp; ++ ++ DEBUG_CALL("tcp_fasttimo"); ++ ++ so = slirp->tcb.so_next; ++ if (so) ++ for (; so != &slirp->tcb; so = so->so_next) ++ if ((tp = (struct tcpcb *)so->so_tcpcb) && ++ (tp->t_flags & TF_DELACK)) { ++ tp->t_flags &= ~TF_DELACK; ++ tp->t_flags |= TF_ACKNOW; ++ tcp_output(tp); ++ } ++} ++ ++/* ++ * Tcp protocol timeout routine called every 500 ms. ++ * Updates the timers in all active tcb's and ++ * causes finite state machine actions if timers expire. ++ */ ++void tcp_slowtimo(Slirp *slirp) ++{ ++ register struct socket *ip, *ipnxt; ++ register struct tcpcb *tp; ++ register int i; ++ ++ DEBUG_CALL("tcp_slowtimo"); ++ ++ /* ++ * Search through tcb's and update active timers. ++ */ ++ ip = slirp->tcb.so_next; ++ if (ip == NULL) { ++ return; ++ } ++ for (; ip != &slirp->tcb; ip = ipnxt) { ++ ipnxt = ip->so_next; ++ tp = sototcpcb(ip); ++ if (tp == NULL) { ++ continue; ++ } ++ for (i = 0; i < TCPT_NTIMERS; i++) { ++ if (tp->t_timer[i] && --tp->t_timer[i] == 0) { ++ tcp_timers(tp, i); ++ if (ipnxt->so_prev != ip) ++ goto tpgone; ++ } ++ } ++ tp->t_idle++; ++ if (tp->t_rtt) ++ tp->t_rtt++; ++ tpgone:; ++ } ++ slirp->tcp_iss += TCP_ISSINCR / PR_SLOWHZ; /* increment iss */ ++ slirp->tcp_now++; /* for timestamps */ ++} ++ ++/* ++ * Cancel all timers for TCP tp. ++ */ ++void tcp_canceltimers(struct tcpcb *tp) ++{ ++ register int i; ++ ++ for (i = 0; i < TCPT_NTIMERS; i++) ++ tp->t_timer[i] = 0; ++} ++ ++const int tcp_backoff[TCP_MAXRXTSHIFT + 1] = { 1, 2, 4, 8, 16, 32, 64, ++ 64, 64, 64, 64, 64, 64 }; ++ ++/* ++ * TCP timer processing. ++ */ ++static struct tcpcb *tcp_timers(register struct tcpcb *tp, int timer) ++{ ++ register int rexmt; ++ ++ DEBUG_CALL("tcp_timers"); ++ ++ switch (timer) { ++ /* ++ * 2 MSL timeout in shutdown went off. If we're closed but ++ * still waiting for peer to close and connection has been idle ++ * too long, or if 2MSL time is up from TIME_WAIT, delete connection ++ * control block. Otherwise, check again in a bit. ++ */ ++ case TCPT_2MSL: ++ if (tp->t_state != TCPS_TIME_WAIT && tp->t_idle <= TCP_MAXIDLE) ++ tp->t_timer[TCPT_2MSL] = TCPTV_KEEPINTVL; ++ else ++ tp = tcp_close(tp); ++ break; ++ ++ /* ++ * Retransmission timer went off. Message has not ++ * been acked within retransmit interval. Back off ++ * to a longer retransmit interval and retransmit one segment. ++ */ ++ case TCPT_REXMT: ++ ++ /* ++ * XXXXX If a packet has timed out, then remove all the queued ++ * packets for that session. ++ */ ++ ++ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { ++ /* ++ * This is a hack to suit our terminal server here at the uni of ++ * canberra since they have trouble with zeroes... It usually lets ++ * them through unharmed, but under some conditions, it'll eat the ++ * zeros. If we keep retransmitting it, it'll keep eating the ++ * zeroes, so we keep retransmitting, and eventually the connection ++ * dies... (this only happens on incoming data) ++ * ++ * So, if we were gonna drop the connection from too many ++ * retransmits, don't... instead halve the t_maxseg, which might ++ * break up the NULLs and let them through ++ * ++ * *sigh* ++ */ ++ ++ tp->t_maxseg >>= 1; ++ if (tp->t_maxseg < 32) { ++ /* ++ * We tried our best, now the connection must die! ++ */ ++ tp->t_rxtshift = TCP_MAXRXTSHIFT; ++ tp = tcp_drop(tp, tp->t_softerror); ++ /* tp->t_softerror : ETIMEDOUT); */ /* XXX */ ++ return (tp); /* XXX */ ++ } ++ ++ /* ++ * Set rxtshift to 6, which is still at the maximum ++ * backoff time ++ */ ++ tp->t_rxtshift = 6; ++ } ++ rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; ++ TCPT_RANGESET(tp->t_rxtcur, rexmt, (short)tp->t_rttmin, ++ TCPTV_REXMTMAX); /* XXX */ ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ /* ++ * If losing, let the lower level know and try for ++ * a better route. Also, if we backed off this far, ++ * our srtt estimate is probably bogus. Clobber it ++ * so we'll take the next rtt measurement as our srtt; ++ * move the current srtt into rttvar to keep the current ++ * retransmit times until then. ++ */ ++ if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { ++ tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); ++ tp->t_srtt = 0; ++ } ++ tp->snd_nxt = tp->snd_una; ++ /* ++ * If timing a segment in this window, stop the timer. ++ */ ++ tp->t_rtt = 0; ++ /* ++ * Close the congestion window down to one segment ++ * (we'll open it by one segment for each ack we get). ++ * Since we probably have a window's worth of unacked ++ * data accumulated, this "slow start" keeps us from ++ * dumping all that data as back-to-back packets (which ++ * might overwhelm an intermediate gateway). ++ * ++ * There are two phases to the opening: Initially we ++ * open by one mss on each ack. This makes the window ++ * size increase exponentially with time. If the ++ * window is larger than the path can handle, this ++ * exponential growth results in dropped packet(s) ++ * almost immediately. To get more time between ++ * drops but still "push" the network to take advantage ++ * of improving conditions, we switch from exponential ++ * to linear window opening at some threshold size. ++ * For a threshold, we use half the current window ++ * size, truncated to a multiple of the mss. ++ * ++ * (the minimum cwnd that will give us exponential ++ * growth is 2 mss. We don't allow the threshold ++ * to go below this.) ++ */ ++ { ++ unsigned win = MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; ++ if (win < 2) ++ win = 2; ++ tp->snd_cwnd = tp->t_maxseg; ++ tp->snd_ssthresh = win * tp->t_maxseg; ++ tp->t_dupacks = 0; ++ } ++ tcp_output(tp); ++ break; ++ ++ /* ++ * Persistence timer into zero window. ++ * Force a byte to be output, if possible. ++ */ ++ case TCPT_PERSIST: ++ tcp_setpersist(tp); ++ tp->t_force = 1; ++ tcp_output(tp); ++ tp->t_force = 0; ++ break; ++ ++ /* ++ * Keep-alive timer went off; send something ++ * or drop connection if idle for too long. ++ */ ++ case TCPT_KEEP: ++ if (tp->t_state < TCPS_ESTABLISHED) ++ goto dropit; ++ ++ if (slirp_do_keepalive && tp->t_state <= TCPS_CLOSE_WAIT) { ++ if (tp->t_idle >= TCPTV_KEEP_IDLE + TCP_MAXIDLE) ++ goto dropit; ++ /* ++ * Send a packet designed to force a response ++ * if the peer is up and reachable: ++ * either an ACK if the connection is still alive, ++ * or an RST if the peer has closed the connection ++ * due to timeout or reboot. ++ * Using sequence number tp->snd_una-1 ++ * causes the transmitted zero-length segment ++ * to lie outside the receive window; ++ * by the protocol spec, this requires the ++ * correspondent TCP to respond. ++ */ ++ tcp_respond(tp, &tp->t_template, (struct mbuf *)NULL, tp->rcv_nxt, ++ tp->snd_una - 1, 0, tp->t_socket->so_ffamily); ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; ++ } else ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; ++ break; ++ ++ dropit: ++ tp = tcp_drop(tp, 0); ++ break; ++ } ++ ++ return (tp); ++} +diff --git a/slirp/src/tcp_timer.h b/slirp/src/tcp_timer.h +new file mode 100644 +index 0000000000..584a5594e4 +--- /dev/null ++++ b/slirp/src/tcp_timer.h +@@ -0,0 +1,130 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93 ++ * tcp_timer.h,v 1.4 1994/08/21 05:27:38 paul Exp ++ */ ++ ++#ifndef TCP_TIMER_H ++#define TCP_TIMER_H ++ ++/* ++ * Definitions of the TCP timers. These timers are counted ++ * down PR_SLOWHZ times a second. ++ */ ++#define TCPT_NTIMERS 4 ++ ++#define TCPT_REXMT 0 /* retransmit */ ++#define TCPT_PERSIST 1 /* retransmit persistence */ ++#define TCPT_KEEP 2 /* keep alive */ ++#define TCPT_2MSL 3 /* 2*msl quiet time timer */ ++ ++/* ++ * The TCPT_REXMT timer is used to force retransmissions. ++ * The TCP has the TCPT_REXMT timer set whenever segments ++ * have been sent for which ACKs are expected but not yet ++ * received. If an ACK is received which advances tp->snd_una, ++ * then the retransmit timer is cleared (if there are no more ++ * outstanding segments) or reset to the base value (if there ++ * are more ACKs expected). Whenever the retransmit timer goes off, ++ * we retransmit one unacknowledged segment, and do a backoff ++ * on the retransmit timer. ++ * ++ * The TCPT_PERSIST timer is used to keep window size information ++ * flowing even if the window goes shut. If all previous transmissions ++ * have been acknowledged (so that there are no retransmissions in progress), ++ * and the window is too small to bother sending anything, then we start ++ * the TCPT_PERSIST timer. When it expires, if the window is nonzero, ++ * we go to transmit state. Otherwise, at intervals send a single byte ++ * into the peer's window to force him to update our window information. ++ * We do this at most as often as TCPT_PERSMIN time intervals, ++ * but no more frequently than the current estimate of round-trip ++ * packet time. The TCPT_PERSIST timer is cleared whenever we receive ++ * a window update from the peer. ++ * ++ * The TCPT_KEEP timer is used to keep connections alive. If an ++ * connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time, ++ * but not yet established, then we drop the connection. Once the connection ++ * is established, if the connection is idle for TCPTV_KEEP_IDLE time ++ * (and keepalives have been enabled on the socket), we begin to probe ++ * the connection. We force the peer to send us a segment by sending: ++ * ++ * This segment is (deliberately) outside the window, and should elicit ++ * an ack segment in response from the peer. If, despite the TCPT_KEEP ++ * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE ++ * amount of time probing, then we drop the connection. ++ */ ++ ++/* ++ * Time constants. ++ */ ++#define TCPTV_MSL (5 * PR_SLOWHZ) /* max seg lifetime (hah!) */ ++ ++#define TCPTV_SRTTBASE \ ++ 0 /* base roundtrip time; \ ++ if 0, no idea yet */ ++#define TCPTV_SRTTDFLT (3 * PR_SLOWHZ) /* assumed RTT if no info */ ++ ++#define TCPTV_PERSMIN (5 * PR_SLOWHZ) /* retransmit persistence */ ++#define TCPTV_PERSMAX (60 * PR_SLOWHZ) /* maximum persist interval */ ++ ++#define TCPTV_KEEP_INIT (75 * PR_SLOWHZ) /* initial connect keep alive */ ++#define TCPTV_KEEP_IDLE (120 * 60 * PR_SLOWHZ) /* dflt time before probing */ ++#define TCPTV_KEEPINTVL (75 * PR_SLOWHZ) /* default probe interval */ ++#define TCPTV_KEEPCNT 8 /* max probes before drop */ ++ ++#define TCPTV_MIN (1 * PR_SLOWHZ) /* minimum allowable value */ ++#define TCPTV_REXMTMAX (12 * PR_SLOWHZ) /* max allowable REXMT value */ ++ ++#define TCP_LINGERTIME 120 /* linger at most 2 minutes */ ++ ++#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */ ++ ++ ++/* ++ * Force a time value to be in a certain range. ++ */ ++#define TCPT_RANGESET(tv, value, tvmin, tvmax) \ ++ { \ ++ (tv) = (value); \ ++ if ((tv) < (tvmin)) \ ++ (tv) = (tvmin); \ ++ else if ((tv) > (tvmax)) \ ++ (tv) = (tvmax); \ ++ } ++ ++extern const int tcp_backoff[]; ++ ++struct tcpcb; ++ ++void tcp_fasttimo(Slirp *); ++void tcp_slowtimo(Slirp *); ++void tcp_canceltimers(struct tcpcb *); ++ ++#endif +diff --git a/slirp/src/tcp_var.h b/slirp/src/tcp_var.h +new file mode 100644 +index 0000000000..c8da8cbd16 +--- /dev/null ++++ b/slirp/src/tcp_var.h +@@ -0,0 +1,161 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993, 1994 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_var.h 8.3 (Berkeley) 4/10/94 ++ * tcp_var.h,v 1.3 1994/08/21 05:27:39 paul Exp ++ */ ++ ++#ifndef TCP_VAR_H ++#define TCP_VAR_H ++ ++#include "tcpip.h" ++#include "tcp_timer.h" ++ ++/* ++ * Tcp control block, one per tcp; fields: ++ */ ++struct tcpcb { ++ struct tcpiphdr *seg_next; /* sequencing queue */ ++ struct tcpiphdr *seg_prev; ++ short t_state; /* state of this connection */ ++ short t_timer[TCPT_NTIMERS]; /* tcp timers */ ++ short t_rxtshift; /* log(2) of rexmt exp. backoff */ ++ short t_rxtcur; /* current retransmit value */ ++ short t_dupacks; /* consecutive dup acks recd */ ++ uint16_t t_maxseg; /* maximum segment size */ ++ uint8_t t_force; /* 1 if forcing out a byte */ ++ uint16_t t_flags; ++#define TF_ACKNOW 0x0001 /* ack peer immediately */ ++#define TF_DELACK 0x0002 /* ack, but try to delay it */ ++#define TF_NODELAY 0x0004 /* don't delay packets to coalesce */ ++#define TF_NOOPT 0x0008 /* don't use tcp options */ ++#define TF_SENTFIN 0x0010 /* have sent FIN */ ++#define TF_REQ_SCALE 0x0020 /* have/will request window scaling */ ++#define TF_RCVD_SCALE 0x0040 /* other side has requested scaling */ ++#define TF_REQ_TSTMP 0x0080 /* have/will request timestamps */ ++#define TF_RCVD_TSTMP 0x0100 /* a timestamp was received in SYN */ ++#define TF_SACK_PERMIT 0x0200 /* other side said I could SACK */ ++ ++ struct tcpiphdr t_template; /* static skeletal packet for xmit */ ++ ++ struct socket *t_socket; /* back pointer to socket */ ++ /* ++ * The following fields are used as in the protocol specification. ++ * See RFC783, Dec. 1981, page 21. ++ */ ++ /* send sequence variables */ ++ tcp_seq snd_una; /* send unacknowledged */ ++ tcp_seq snd_nxt; /* send next */ ++ tcp_seq snd_up; /* send urgent pointer */ ++ tcp_seq snd_wl1; /* window update seg seq number */ ++ tcp_seq snd_wl2; /* window update seg ack number */ ++ tcp_seq iss; /* initial send sequence number */ ++ uint32_t snd_wnd; /* send window */ ++ /* receive sequence variables */ ++ uint32_t rcv_wnd; /* receive window */ ++ tcp_seq rcv_nxt; /* receive next */ ++ tcp_seq rcv_up; /* receive urgent pointer */ ++ tcp_seq irs; /* initial receive sequence number */ ++ /* ++ * Additional variables for this implementation. ++ */ ++ /* receive variables */ ++ tcp_seq rcv_adv; /* advertised window */ ++ /* retransmit variables */ ++ tcp_seq snd_max; /* highest sequence number sent; ++ * used to recognize retransmits ++ */ ++ /* congestion control (for slow start, source quench, retransmit after loss) ++ */ ++ uint32_t snd_cwnd; /* congestion-controlled window */ ++ uint32_t snd_ssthresh; /* snd_cwnd size threshold for ++ * for slow start exponential to ++ * linear switch ++ */ ++ /* ++ * transmit timing stuff. See below for scale of srtt and rttvar. ++ * "Variance" is actually smoothed difference. ++ */ ++ short t_idle; /* inactivity time */ ++ short t_rtt; /* round trip time */ ++ tcp_seq t_rtseq; /* sequence number being timed */ ++ short t_srtt; /* smoothed round-trip time */ ++ short t_rttvar; /* variance in round-trip time */ ++ uint16_t t_rttmin; /* minimum rtt allowed */ ++ uint32_t max_sndwnd; /* largest window peer has offered */ ++ ++ /* out-of-band data */ ++ uint8_t t_oobflags; /* have some */ ++ uint8_t t_iobc; /* input character */ ++#define TCPOOB_HAVEDATA 0x01 ++#define TCPOOB_HADDATA 0x02 ++ short t_softerror; /* possible error not yet reported */ ++ ++ /* RFC 1323 variables */ ++ uint8_t snd_scale; /* window scaling for send window */ ++ uint8_t rcv_scale; /* window scaling for recv window */ ++ uint8_t request_r_scale; /* pending window scaling */ ++ uint8_t requested_s_scale; ++ uint32_t ts_recent; /* timestamp echo data */ ++ uint32_t ts_recent_age; /* when last updated */ ++ tcp_seq last_ack_sent; ++}; ++ ++#define sototcpcb(so) ((so)->so_tcpcb) ++ ++/* ++ * The smoothed round-trip time and estimated variance ++ * are stored as fixed point numbers scaled by the values below. ++ * For convenience, these scales are also used in smoothing the average ++ * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). ++ * With these scales, srtt has 3 bits to the right of the binary point, ++ * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the ++ * binary point, and is smoothed with an ALPHA of 0.75. ++ */ ++#define TCP_RTT_SCALE 8 /* multiplier for srtt; 3 bits frac. */ ++#define TCP_RTT_SHIFT 3 /* shift for srtt; 3 bits frac. */ ++#define TCP_RTTVAR_SCALE 4 /* multiplier for rttvar; 2 bits */ ++#define TCP_RTTVAR_SHIFT 2 /* multiplier for rttvar; 2 bits */ ++ ++/* ++ * The initial retransmission should happen at rtt + 4 * rttvar. ++ * Because of the way we do the smoothing, srtt and rttvar ++ * will each average +1/2 tick of bias. When we compute ++ * the retransmit timer, we want 1/2 tick of rounding and ++ * 1 extra tick because of +-1/2 tick uncertainty in the ++ * firing of the timer. The bias will give us exactly the ++ * 1.5 tick we need. But, because the bias is ++ * statistical, we have to test that we don't drop below ++ * the minimum feasible timer (which is 2 ticks). ++ * This macro assumes that the value of TCP_RTTVAR_SCALE ++ * is the same as the multiplier for rttvar. ++ */ ++#define TCP_REXMTVAL(tp) (((tp)->t_srtt >> TCP_RTT_SHIFT) + (tp)->t_rttvar) ++ ++#endif +diff --git a/slirp/src/tcpip.h b/slirp/src/tcpip.h +new file mode 100644 +index 0000000000..a0fb2282f2 +--- /dev/null ++++ b/slirp/src/tcpip.h +@@ -0,0 +1,104 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcpip.h 8.1 (Berkeley) 6/10/93 ++ * tcpip.h,v 1.3 1994/08/21 05:27:40 paul Exp ++ */ ++ ++#ifndef TCPIP_H ++#define TCPIP_H ++ ++/* ++ * Tcp+ip header, after ip options removed. ++ */ ++struct tcpiphdr { ++ struct mbuf_ptr ih_mbuf; /* backpointer to mbuf */ ++ union { ++ struct { ++ struct in_addr ih_src; /* source internet address */ ++ struct in_addr ih_dst; /* destination internet address */ ++ uint8_t ih_x1; /* (unused) */ ++ uint8_t ih_pr; /* protocol */ ++ } ti_i4; ++ struct { ++ struct in6_addr ih_src; ++ struct in6_addr ih_dst; ++ uint8_t ih_x1; ++ uint8_t ih_nh; ++ } ti_i6; ++ } ti; ++ uint16_t ti_x0; ++ uint16_t ti_len; /* protocol length */ ++ struct tcphdr ti_t; /* tcp header */ ++}; ++#define ti_mbuf ih_mbuf.mptr ++#define ti_pr ti.ti_i4.ih_pr ++#define ti_src ti.ti_i4.ih_src ++#define ti_dst ti.ti_i4.ih_dst ++#define ti_src6 ti.ti_i6.ih_src ++#define ti_dst6 ti.ti_i6.ih_dst ++#define ti_nh6 ti.ti_i6.ih_nh ++#define ti_sport ti_t.th_sport ++#define ti_dport ti_t.th_dport ++#define ti_seq ti_t.th_seq ++#define ti_ack ti_t.th_ack ++#define ti_x2 ti_t.th_x2 ++#define ti_off ti_t.th_off ++#define ti_flags ti_t.th_flags ++#define ti_win ti_t.th_win ++#define ti_sum ti_t.th_sum ++#define ti_urp ti_t.th_urp ++ ++#define tcpiphdr2qlink(T) \ ++ ((struct qlink *)(((char *)(T)) - sizeof(struct qlink))) ++#define qlink2tcpiphdr(Q) \ ++ ((struct tcpiphdr *)(((char *)(Q)) + sizeof(struct qlink))) ++#define tcpiphdr_next(T) qlink2tcpiphdr(tcpiphdr2qlink(T)->next) ++#define tcpiphdr_prev(T) qlink2tcpiphdr(tcpiphdr2qlink(T)->prev) ++#define tcpfrag_list_first(T) qlink2tcpiphdr((T)->seg_next) ++#define tcpfrag_list_end(F, T) (tcpiphdr2qlink(F) == (struct qlink *)(T)) ++#define tcpfrag_list_empty(T) ((T)->seg_next == (struct tcpiphdr *)(T)) ++ ++/* This is the difference between the size of a tcpiphdr structure, and the ++ * size of actual ip+tcp headers, rounded up since we need to align data. */ ++#define TCPIPHDR_DELTA \ ++ (MAX(0, ((int) sizeof(struct tcpiphdr) - (int) sizeof(struct ip) - \ ++ (int) sizeof(struct tcphdr) + 3) & \ ++ ~3)) ++ ++/* ++ * Just a clean way to get to the first byte ++ * of the packet ++ */ ++struct tcpiphdr_2 { ++ struct tcpiphdr dummy; ++ char first_char; ++}; ++ ++#endif +diff --git a/slirp/src/tftp.c b/slirp/src/tftp.c +new file mode 100644 +index 0000000000..a19c889d34 +--- /dev/null ++++ b/slirp/src/tftp.c +@@ -0,0 +1,470 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * tftp.c - a simple, read-only tftp server for qemu ++ * ++ * Copyright (c) 2004 Magnus Damm ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "slirp.h" ++ ++#include ++#include ++#include ++ ++static inline int tftp_session_in_use(struct tftp_session *spt) ++{ ++ return (spt->slirp != NULL); ++} ++ ++static inline void tftp_session_update(struct tftp_session *spt) ++{ ++ spt->timestamp = curtime; ++} ++ ++static void tftp_session_terminate(struct tftp_session *spt) ++{ ++ if (spt->fd >= 0) { ++ close(spt->fd); ++ spt->fd = -1; ++ } ++ g_free(spt->filename); ++ spt->slirp = NULL; ++} ++ ++static int tftp_session_allocate(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftphdr *hdr) ++{ ++ struct tftp_session *spt; ++ int k; ++ ++ for (k = 0; k < TFTP_SESSIONS_MAX; k++) { ++ spt = &slirp->tftp_sessions[k]; ++ ++ if (!tftp_session_in_use(spt)) ++ goto found; ++ ++ /* sessions time out after 5 inactive seconds */ ++ if ((int)(curtime - spt->timestamp) > 5000) { ++ tftp_session_terminate(spt); ++ goto found; ++ } ++ } ++ ++ return -1; ++ ++found: ++ memset(spt, 0, sizeof(*spt)); ++ memcpy(&spt->client_addr, srcsas, sockaddr_size(srcsas)); ++ spt->fd = -1; ++ spt->block_size = 512; ++ spt->client_port = hdr->udp.uh_sport; ++ spt->slirp = slirp; ++ ++ tftp_session_update(spt); ++ ++ return k; ++} ++ ++static int tftp_session_find(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftphdr *hdr) ++{ ++ struct tftp_session *spt; ++ int k; ++ ++ for (k = 0; k < TFTP_SESSIONS_MAX; k++) { ++ spt = &slirp->tftp_sessions[k]; ++ ++ if (tftp_session_in_use(spt)) { ++ if (sockaddr_equal(&spt->client_addr, srcsas)) { ++ if (spt->client_port == hdr->udp.uh_sport) { ++ return k; ++ } ++ } ++ } ++ } ++ ++ return -1; ++} ++ ++static int tftp_read_data(struct tftp_session *spt, uint32_t block_nr, ++ uint8_t *buf, int len) ++{ ++ int bytes_read = 0; ++ ++ if (spt->fd < 0) { ++ spt->fd = open(spt->filename, O_RDONLY | O_BINARY); ++ } ++ ++ if (spt->fd < 0) { ++ return -1; ++ } ++ ++ if (len) { ++ if (lseek(spt->fd, block_nr * spt->block_size, SEEK_SET) == (off_t)-1) { ++ return -1; ++ } ++ ++ bytes_read = read(spt->fd, buf, len); ++ } ++ ++ return bytes_read; ++} ++ ++static struct tftp_t *tftp_prep_mbuf_data(struct tftp_session *spt, ++ struct mbuf *m) ++{ ++ struct tftp_t *tp; ++ ++ memset(m->m_data, 0, m->m_size); ++ ++ m->m_data += IF_MAXLINKHDR; ++ if (spt->client_addr.ss_family == AF_INET6) { ++ m->m_data += sizeof(struct ip6); ++ } else { ++ m->m_data += sizeof(struct ip); ++ } ++ tp = (void *)m->m_data; ++ m->m_data += sizeof(struct udphdr); ++ ++ return tp; ++} ++ ++static void tftp_udp_output(struct tftp_session *spt, struct mbuf *m, ++ struct tftphdr *hdr) ++{ ++ if (spt->client_addr.ss_family == AF_INET6) { ++ struct sockaddr_in6 sa6, da6; ++ ++ sa6.sin6_addr = spt->slirp->vhost_addr6; ++ sa6.sin6_port = hdr->udp.uh_dport; ++ da6.sin6_addr = ((struct sockaddr_in6 *)&spt->client_addr)->sin6_addr; ++ da6.sin6_port = spt->client_port; ++ ++ udp6_output(NULL, m, &sa6, &da6); ++ } else { ++ struct sockaddr_in sa4, da4; ++ ++ sa4.sin_addr = spt->slirp->vhost_addr; ++ sa4.sin_port = hdr->udp.uh_dport; ++ da4.sin_addr = ((struct sockaddr_in *)&spt->client_addr)->sin_addr; ++ da4.sin_port = spt->client_port; ++ ++ udp_output(NULL, m, &sa4, &da4, IPTOS_LOWDELAY); ++ } ++} ++ ++static int tftp_send_oack(struct tftp_session *spt, const char *keys[], ++ uint32_t values[], int nb, struct tftp_t *recv_tp) ++{ ++ struct mbuf *m; ++ struct tftp_t *tp; ++ int i, n = 0; ++ ++ m = m_get(spt->slirp); ++ ++ if (!m) ++ return -1; ++ ++ tp = tftp_prep_mbuf_data(spt, m); ++ ++ tp->hdr.tp_op = htons(TFTP_OACK); ++ for (i = 0; i < nb; i++) { ++ n += slirp_fmt0(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%s", keys[i]); ++ n += slirp_fmt0(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%u", values[i]); ++ } ++ ++ m->m_len = G_SIZEOF_MEMBER(struct tftp_t, hdr.tp_op) + n; ++ tftp_udp_output(spt, m, &recv_tp->hdr); ++ ++ return 0; ++} ++ ++static void tftp_send_error(struct tftp_session *spt, uint16_t errorcode, ++ const char *msg, struct tftp_t *recv_tp) ++{ ++ struct mbuf *m; ++ struct tftp_t *tp; ++ ++ DEBUG_TFTP("tftp error msg: %s", msg); ++ ++ m = m_get(spt->slirp); ++ ++ if (!m) { ++ goto out; ++ } ++ ++ tp = tftp_prep_mbuf_data(spt, m); ++ ++ tp->hdr.tp_op = htons(TFTP_ERROR); ++ tp->x.tp_error.tp_error_code = htons(errorcode); ++ slirp_pstrcpy((char *)tp->x.tp_error.tp_msg, sizeof(tp->x.tp_error.tp_msg), ++ msg); ++ ++ m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX + 2) + 3 + ++ strlen(msg) - sizeof(struct udphdr); ++ tftp_udp_output(spt, m, &recv_tp->hdr); ++ ++out: ++ tftp_session_terminate(spt); ++} ++ ++static void tftp_send_next_block(struct tftp_session *spt, ++ struct tftphdr *hdr) ++{ ++ struct mbuf *m; ++ struct tftp_t *tp; ++ int nobytes; ++ ++ m = m_get(spt->slirp); ++ ++ if (!m) { ++ return; ++ } ++ ++ tp = tftp_prep_mbuf_data(spt, m); ++ ++ tp->hdr.tp_op = htons(TFTP_DATA); ++ tp->x.tp_data.tp_block_nr = htons((spt->block_nr + 1) & 0xffff); ++ ++ nobytes = tftp_read_data(spt, spt->block_nr, tp->x.tp_data.tp_buf, ++ spt->block_size); ++ ++ if (nobytes < 0) { ++ m_free(m); ++ ++ /* send "file not found" error back */ ++ ++ tftp_send_error(spt, 1, "File not found", tp); ++ ++ return; ++ } ++ ++ m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX - nobytes) - ++ sizeof(struct udphdr); ++ tftp_udp_output(spt, m, hdr); ++ ++ if (nobytes == spt->block_size) { ++ tftp_session_update(spt); ++ } else { ++ tftp_session_terminate(spt); ++ } ++ ++ spt->block_nr++; ++} ++ ++static void tftp_handle_rrq(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftp_t *tp, int pktlen) ++{ ++ struct tftp_session *spt; ++ int s, k; ++ size_t prefix_len; ++ char *req_fname; ++ const char *option_name[2]; ++ uint32_t option_value[2]; ++ int nb_options = 0; ++ ++ /* check if a session already exists and if so terminate it */ ++ s = tftp_session_find(slirp, srcsas, &tp->hdr); ++ if (s >= 0) { ++ tftp_session_terminate(&slirp->tftp_sessions[s]); ++ } ++ ++ s = tftp_session_allocate(slirp, srcsas, &tp->hdr); ++ ++ if (s < 0) { ++ return; ++ } ++ ++ spt = &slirp->tftp_sessions[s]; ++ ++ /* unspecified prefix means service disabled */ ++ if (!slirp->tftp_prefix) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ /* skip header fields */ ++ k = 0; ++ pktlen -= offsetof(struct tftp_t, x.tp_buf); ++ ++ /* prepend tftp_prefix */ ++ prefix_len = strlen(slirp->tftp_prefix); ++ spt->filename = g_malloc(prefix_len + TFTP_FILENAME_MAX + 2); ++ memcpy(spt->filename, slirp->tftp_prefix, prefix_len); ++ spt->filename[prefix_len] = '/'; ++ ++ /* get name */ ++ req_fname = spt->filename + prefix_len + 1; ++ ++ while (1) { ++ if (k >= TFTP_FILENAME_MAX || k >= pktlen) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ req_fname[k] = tp->x.tp_buf[k]; ++ if (req_fname[k++] == '\0') { ++ break; ++ } ++ } ++ ++ DEBUG_TFTP("tftp rrq file: %s", req_fname); ++ ++ /* check mode */ ++ if ((pktlen - k) < 6) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ if (strcasecmp(&tp->x.tp_buf[k], "octet") != 0) { ++ tftp_send_error(spt, 4, "Unsupported transfer mode", tp); ++ return; ++ } ++ ++ k += 6; /* skipping octet */ ++ ++ /* do sanity checks on the filename */ ++ if ( ++#ifdef G_OS_WIN32 ++ strstr(req_fname, "..\\") || ++ req_fname[strlen(req_fname) - 1] == '\\' || ++#endif ++ strstr(req_fname, "../") || ++ req_fname[strlen(req_fname) - 1] == '/') { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ /* check if the file exists */ ++ if (tftp_read_data(spt, 0, NULL, 0) < 0) { ++ tftp_send_error(spt, 1, "File not found", tp); ++ return; ++ } ++ ++ if (tp->x.tp_buf[pktlen - 1] != 0) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ while (k < pktlen && nb_options < G_N_ELEMENTS(option_name)) { ++ const char *key, *value; ++ ++ key = &tp->x.tp_buf[k]; ++ k += strlen(key) + 1; ++ ++ if (k >= pktlen) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ value = &tp->x.tp_buf[k]; ++ k += strlen(value) + 1; ++ ++ if (strcasecmp(key, "tsize") == 0) { ++ int tsize = atoi(value); ++ struct stat stat_p; ++ ++ if (tsize == 0) { ++ if (stat(spt->filename, &stat_p) == 0) ++ tsize = stat_p.st_size; ++ else { ++ tftp_send_error(spt, 1, "File not found", tp); ++ return; ++ } ++ } ++ ++ option_name[nb_options] = "tsize"; ++ option_value[nb_options] = tsize; ++ nb_options++; ++ } else if (strcasecmp(key, "blksize") == 0) { ++ int blksize = atoi(value); ++ ++ /* Accept blksize up to our maximum size */ ++ if (blksize > 0) { ++ spt->block_size = MIN(blksize, TFTP_BLOCKSIZE_MAX); ++ option_name[nb_options] = "blksize"; ++ option_value[nb_options] = spt->block_size; ++ nb_options++; ++ } ++ } ++ } ++ ++ if (nb_options > 0) { ++ assert(nb_options <= G_N_ELEMENTS(option_name)); ++ tftp_send_oack(spt, option_name, option_value, nb_options, tp); ++ return; ++ } ++ ++ spt->block_nr = 0; ++ tftp_send_next_block(spt, &tp->hdr); ++} ++ ++static void tftp_handle_ack(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftphdr *hdr) ++{ ++ int s; ++ ++ s = tftp_session_find(slirp, srcsas, hdr); ++ ++ if (s < 0) { ++ return; ++ } ++ ++ tftp_send_next_block(&slirp->tftp_sessions[s], hdr); ++} ++ ++static void tftp_handle_error(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftphdr *hdr) ++{ ++ int s; ++ ++ s = tftp_session_find(slirp, srcsas, hdr); ++ ++ if (s < 0) { ++ return; ++ } ++ ++ tftp_session_terminate(&slirp->tftp_sessions[s]); ++} ++ ++void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m) ++{ ++ struct tftphdr *hdr = mtod_check(m, sizeof(struct tftphdr)); ++ ++ if (hdr == NULL) { ++ return; ++ } ++ ++ switch (ntohs(hdr->tp_op)) { ++ case TFTP_RRQ: ++ tftp_handle_rrq(m->slirp, srcsas, ++ mtod(m, struct tftp_t *), ++ m->m_len); ++ break; ++ ++ case TFTP_ACK: ++ tftp_handle_ack(m->slirp, srcsas, hdr); ++ break; ++ ++ case TFTP_ERROR: ++ tftp_handle_error(m->slirp, srcsas, hdr); ++ break; ++ } ++} +diff --git a/slirp/src/tftp.h b/slirp/src/tftp.h +new file mode 100644 +index 0000000000..cafab03f2f +--- /dev/null ++++ b/slirp/src/tftp.h +@@ -0,0 +1,58 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* tftp defines */ ++ ++#ifndef SLIRP_TFTP_H ++#define SLIRP_TFTP_H ++ ++#include "util.h" ++ ++#define TFTP_SESSIONS_MAX 20 ++ ++#define TFTP_SERVER 69 ++ ++#define TFTP_RRQ 1 ++#define TFTP_WRQ 2 ++#define TFTP_DATA 3 ++#define TFTP_ACK 4 ++#define TFTP_ERROR 5 ++#define TFTP_OACK 6 ++ ++#define TFTP_FILENAME_MAX 512 ++#define TFTP_BLOCKSIZE_MAX 1428 ++ ++struct tftphdr { ++ struct udphdr udp; ++ uint16_t tp_op; ++} SLIRP_PACKED; ++ ++struct tftp_t { ++ struct tftphdr hdr; ++ union { ++ struct { ++ uint16_t tp_block_nr; ++ uint8_t tp_buf[TFTP_BLOCKSIZE_MAX]; ++ } tp_data; ++ struct { ++ uint16_t tp_error_code; ++ uint8_t tp_msg[TFTP_BLOCKSIZE_MAX]; ++ } tp_error; ++ char tp_buf[TFTP_BLOCKSIZE_MAX + 2]; ++ } x; ++} SLIRP_PACKED; ++ ++struct tftp_session { ++ Slirp *slirp; ++ char *filename; ++ int fd; ++ uint16_t block_size; ++ ++ struct sockaddr_storage client_addr; ++ uint16_t client_port; ++ uint32_t block_nr; ++ ++ int timestamp; ++}; ++ ++void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m); ++ ++#endif +diff --git a/slirp/src/udp.c b/slirp/src/udp.c +new file mode 100644 +index 0000000000..06b7b7d032 +--- /dev/null ++++ b/slirp/src/udp.c +@@ -0,0 +1,425 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)udp_usrreq.c 8.4 (Berkeley) 1/21/94 ++ * udp_usrreq.c,v 1.4 1994/10/02 17:48:45 phk Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ * ++ * Please read the file COPYRIGHT for the ++ * terms and conditions of the copyright. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++static uint8_t udp_tos(struct socket *so); ++ ++void udp_init(Slirp *slirp) ++{ ++ slirp->udb.so_next = slirp->udb.so_prev = &slirp->udb; ++ slirp->udp_last_so = &slirp->udb; ++} ++ ++void udp_cleanup(Slirp *slirp) ++{ ++ struct socket *so, *so_next; ++ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { ++ so_next = so->so_next; ++ udp_detach(slirp->udb.so_next); ++ } ++} ++ ++/* m->m_data points at ip packet header ++ * m->m_len length ip packet ++ * ip->ip_len length data (IPDU) ++ */ ++void udp_input(register struct mbuf *m, int iphlen) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, 0); ++ ++ register struct ip *ip; ++ register struct udphdr *uh; ++ int len; ++ struct ip save_ip; ++ struct socket *so; ++ struct sockaddr_storage lhost; ++ struct sockaddr_in *lhost4; ++ int ttl; ++ ++ DEBUG_CALL("udp_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("iphlen = %d", iphlen); ++ ++ /* ++ * Strip IP options, if any; should skip this, ++ * make available to user, and use on returned packets, ++ * but we don't yet have a way to check the checksum ++ * with options still present. ++ */ ++ if (iphlen > sizeof(struct ip)) { ++ ip_stripoptions(m, (struct mbuf *)0); ++ iphlen = sizeof(struct ip); ++ } ++ ++ /* ++ * Get IP and UDP header together in first mbuf. ++ */ ++ ip = mtod_check(m, iphlen + sizeof(struct udphdr)); ++ if (ip == NULL) { ++ goto bad; ++ } ++ uh = (struct udphdr *)((char *)ip + iphlen); ++ ++ /* ++ * Make mbuf data length reflect UDP length. ++ * If not enough data to reflect UDP length, drop. ++ */ ++ len = ntohs((uint16_t)uh->uh_ulen); ++ ++ if (ip->ip_len != len) { ++ if (len > ip->ip_len) { ++ goto bad; ++ } ++ m_adj(m, len - ip->ip_len); ++ ip->ip_len = len; ++ } ++ ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip = *ip; ++ save_ip.ip_len += iphlen; /* tcp_input subtracts this */ ++ ++ /* ++ * Checksum extended UDP header and data. ++ */ ++ if (uh->uh_sum) { ++ memset(&((struct ipovly *)ip)->ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ ((struct ipovly *)ip)->ih_x1 = 0; ++ ((struct ipovly *)ip)->ih_len = uh->uh_ulen; ++ if (cksum(m, len + sizeof(struct ip))) { ++ goto bad; ++ } ++ } ++ ++ lhost.ss_family = AF_INET; ++ lhost4 = (struct sockaddr_in *)&lhost; ++ lhost4->sin_addr = ip->ip_src; ++ lhost4->sin_port = uh->uh_sport; ++ ++ /* ++ * handle DHCP/BOOTP ++ */ ++ if (ntohs(uh->uh_dport) == BOOTP_SERVER && ++ (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr || ++ ip->ip_dst.s_addr == 0xffffffff)) { ++ bootp_input(m); ++ goto bad; ++ } ++ ++ /* ++ * handle TFTP ++ */ ++ if (ntohs(uh->uh_dport) == TFTP_SERVER && ++ ip->ip_dst.s_addr == slirp->vhost_addr.s_addr) { ++ m->m_data += iphlen; ++ m->m_len -= iphlen; ++ tftp_input(&lhost, m); ++ m->m_data -= iphlen; ++ m->m_len += iphlen; ++ goto bad; ++ } ++ ++ if (slirp->restricted) { ++ goto bad; ++ } ++ ++ /* ++ * Locate pcb for datagram. ++ */ ++ so = solookup(&slirp->udp_last_so, &slirp->udb, &lhost, NULL); ++ ++ if (so == NULL) { ++ /* ++ * If there's no socket for this packet, ++ * create one ++ */ ++ so = socreate(slirp); ++ if (udp_attach(so, AF_INET) == -1) { ++ DEBUG_MISC(" udp_attach errno = %d-%s", errno, strerror(errno)); ++ sofree(so); ++ goto bad; ++ } ++ ++ /* ++ * Setup fields ++ */ ++ so->so_lfamily = AF_INET; ++ so->so_laddr = ip->ip_src; ++ so->so_lport = uh->uh_sport; ++ ++ if ((so->so_iptos = udp_tos(so)) == 0) ++ so->so_iptos = ip->ip_tos; ++ ++ /* ++ * XXXXX Here, check if it's in udpexec_list, ++ * and if it is, do the fork_exec() etc. ++ */ ++ } ++ ++ so->so_ffamily = AF_INET; ++ so->so_faddr = ip->ip_dst; /* XXX */ ++ so->so_fport = uh->uh_dport; /* XXX */ ++ ++ iphlen += sizeof(struct udphdr); ++ m->m_len -= iphlen; ++ m->m_data += iphlen; ++ ++ /* ++ * Check for TTL ++ */ ++ ttl = save_ip.ip_ttl-1; ++ if (ttl <= 0) { ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ DEBUG_MISC("udp ttl exceeded"); ++ icmp_send_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, NULL); ++ goto bad; ++ } ++ setsockopt(so->s, IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)); ++ ++ /* ++ * Now we sendto() the packet. ++ */ ++ if (sosendto(so, m) == -1) { ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ DEBUG_MISC("udp tx errno = %d-%s", errno, strerror(errno)); ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); ++ goto bad; ++ } ++ ++ m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ ++ ++ /* restore the orig mbuf packet */ ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ so->so_m = m; /* ICMP backup */ ++ ++ return; ++bad: ++ m_free(m); ++} ++ ++int udp_output(struct socket *so, struct mbuf *m, struct sockaddr_in *saddr, ++ struct sockaddr_in *daddr, int iptos) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, sizeof(struct udpiphdr)); ++ ++ register struct udpiphdr *ui; ++ int error = 0; ++ ++ DEBUG_CALL("udp_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("saddr = %s", inet_ntoa(saddr->sin_addr)); ++ DEBUG_ARG("daddr = %s", inet_ntoa(daddr->sin_addr)); ++ ++ /* ++ * Adjust for header ++ */ ++ m->m_data -= sizeof(struct udpiphdr); ++ m->m_len += sizeof(struct udpiphdr); ++ ++ /* ++ * Fill in mbuf with extended UDP header ++ * and addresses and length put into network format. ++ */ ++ ui = mtod(m, struct udpiphdr *); ++ memset(&ui->ui_i.ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ ui->ui_x1 = 0; ++ ui->ui_pr = IPPROTO_UDP; ++ ui->ui_len = htons(m->m_len - sizeof(struct ip)); ++ /* XXXXX Check for from-one-location sockets, or from-any-location sockets ++ */ ++ ui->ui_src = saddr->sin_addr; ++ ui->ui_dst = daddr->sin_addr; ++ ui->ui_sport = saddr->sin_port; ++ ui->ui_dport = daddr->sin_port; ++ ui->ui_ulen = ui->ui_len; ++ ++ /* ++ * Stuff checksum and output datagram. ++ */ ++ ui->ui_sum = 0; ++ if ((ui->ui_sum = cksum(m, m->m_len)) == 0) ++ ui->ui_sum = 0xffff; ++ ((struct ip *)ui)->ip_len = m->m_len; ++ ++ ((struct ip *)ui)->ip_ttl = IPDEFTTL; ++ ((struct ip *)ui)->ip_tos = iptos; ++ ++ error = ip_output(so, m); ++ ++ return (error); ++} ++ ++int udp_attach(struct socket *so, unsigned short af) ++{ ++ so->s = slirp_socket(af, SOCK_DGRAM, 0); ++ if (so->s != -1) { ++ if (slirp_bind_outbound(so, af) != 0) { ++ // bind failed - close socket ++ closesocket(so->s); ++ so->s = -1; ++ return -1; ++ } ++ ++#ifdef __linux__ ++ { ++ int opt = 1; ++ switch (af) { ++ case AF_INET: ++ setsockopt(so->s, IPPROTO_IP, IP_RECVERR, &opt, sizeof(opt)); ++ break; ++ case AF_INET6: ++ setsockopt(so->s, IPPROTO_IPV6, IPV6_RECVERR, &opt, sizeof(opt)); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ } ++#endif ++ ++ so->so_expire = curtime + SO_EXPIRE; ++ insque(so, &so->slirp->udb); ++ } ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ return (so->s); ++} ++ ++void udp_detach(struct socket *so) ++{ ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++} ++ ++static const struct tos_t udptos[] = { { 0, 53, IPTOS_LOWDELAY, 0 }, /* DNS */ ++ { 0, 0, 0, 0 } }; ++ ++static uint8_t udp_tos(struct socket *so) ++{ ++ int i = 0; ++ ++ while (udptos[i].tos) { ++ if ((udptos[i].fport && ntohs(so->so_fport) == udptos[i].fport) || ++ (udptos[i].lport && ntohs(so->so_lport) == udptos[i].lport)) { ++ if (so->slirp->enable_emu) ++ so->so_emu = udptos[i].emu; ++ return udptos[i].tos; ++ } ++ i++; ++ } ++ ++ return 0; ++} ++ ++struct socket *udpx_listen(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *laddr, socklen_t laddrlen, ++ int flags) ++{ ++ struct socket *so; ++ socklen_t addrlen; ++ int save_errno; ++ ++ so = socreate(slirp); ++ so->s = slirp_socket(haddr->sa_family, SOCK_DGRAM, 0); ++ if (so->s < 0) { ++ save_errno = errno; ++ sofree(so); ++ errno = save_errno; ++ return NULL; ++ } ++ if (haddr->sa_family == AF_INET6) ++ slirp_socket_set_v6only(so->s, (flags & SS_HOSTFWD_V6ONLY) != 0); ++ so->so_expire = curtime + SO_EXPIRE; ++ insque(so, &slirp->udb); ++ ++ if (bind(so->s, haddr, haddrlen) < 0) { ++ save_errno = errno; ++ udp_detach(so); ++ errno = save_errno; ++ return NULL; ++ } ++ slirp_socket_set_fast_reuse(so->s); ++ ++ addrlen = sizeof(so->fhost); ++ getsockname(so->s, &so->fhost.sa, &addrlen); ++ sotranslate_accept(so); ++ ++ sockaddr_copy(&so->lhost.sa, sizeof(so->lhost), laddr, laddrlen); ++ ++ if (flags != SS_FACCEPTONCE) ++ so->so_expire = 0; ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_ISFCONNECTED | flags; ++ ++ return so; ++} ++ ++struct socket *udp_listen(Slirp *slirp, uint32_t haddr, unsigned hport, ++ uint32_t laddr, unsigned lport, int flags) ++{ ++ struct sockaddr_in hsa, lsa; ++ ++ memset(&hsa, 0, sizeof(hsa)); ++ hsa.sin_family = AF_INET; ++ hsa.sin_addr.s_addr = haddr; ++ hsa.sin_port = hport; ++ ++ memset(&lsa, 0, sizeof(lsa)); ++ lsa.sin_family = AF_INET; ++ lsa.sin_addr.s_addr = laddr; ++ lsa.sin_port = lport; ++ ++ return udpx_listen(slirp, (const struct sockaddr *) &hsa, sizeof(hsa), (struct sockaddr *) &lsa, sizeof(lsa), flags); ++} +diff --git a/slirp/src/udp.h b/slirp/src/udp.h +new file mode 100644 +index 0000000000..47f4ed34d8 +--- /dev/null ++++ b/slirp/src/udp.h +@@ -0,0 +1,96 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)udp.h 8.1 (Berkeley) 6/10/93 ++ * udp.h,v 1.3 1994/08/21 05:27:41 paul Exp ++ */ ++ ++#ifndef UDP_H ++#define UDP_H ++ ++#include "socket.h" ++ ++#define UDP_TTL 0x60 ++#define UDP_UDPDATALEN 16192 ++ ++/* ++ * Udp protocol header. ++ * Per RFC 768, September, 1981. ++ */ ++struct udphdr { ++ uint16_t uh_sport; /* source port */ ++ uint16_t uh_dport; /* destination port */ ++ int16_t uh_ulen; /* udp length */ ++ uint16_t uh_sum; /* udp checksum */ ++}; ++ ++/* ++ * UDP kernel structures and variables. ++ */ ++struct udpiphdr { ++ struct ipovly ui_i; /* overlaid ip structure */ ++ struct udphdr ui_u; /* udp header */ ++}; ++#define ui_mbuf ui_i.ih_mbuf.mptr ++#define ui_x1 ui_i.ih_x1 ++#define ui_pr ui_i.ih_pr ++#define ui_len ui_i.ih_len ++#define ui_src ui_i.ih_src ++#define ui_dst ui_i.ih_dst ++#define ui_sport ui_u.uh_sport ++#define ui_dport ui_u.uh_dport ++#define ui_ulen ui_u.uh_ulen ++#define ui_sum ui_u.uh_sum ++ ++/* ++ * Names for UDP sysctl objects ++ */ ++#define UDPCTL_CHECKSUM 1 /* checksum UDP packets */ ++#define UDPCTL_MAXID 2 ++ ++struct mbuf; ++ ++void udp_init(Slirp *); ++void udp_cleanup(Slirp *); ++void udp_input(register struct mbuf *, int); ++int udp_attach(struct socket *, unsigned short af); ++void udp_detach(struct socket *); ++struct socket *udp_listen(Slirp *, uint32_t, unsigned, uint32_t, unsigned, int); ++struct socket *udpx_listen(Slirp *, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *laddr, socklen_t laddrlen, ++ int flags); ++int udp_output(struct socket *so, struct mbuf *m, struct sockaddr_in *saddr, ++ struct sockaddr_in *daddr, int iptos); ++ ++void udp6_input(register struct mbuf *); ++int udp6_output(struct socket *so, struct mbuf *m, struct sockaddr_in6 *saddr, ++ struct sockaddr_in6 *daddr); ++ ++#endif +diff --git a/slirp/src/udp6.c b/slirp/src/udp6.c +new file mode 100644 +index 0000000000..efeac5c19a +--- /dev/null ++++ b/slirp/src/udp6.c +@@ -0,0 +1,196 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron ++ */ ++ ++#include "slirp.h" ++#include "udp.h" ++#include "dhcpv6.h" ++ ++void udp6_input(struct mbuf *m) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, 0); ++ ++ struct ip6 *ip, save_ip; ++ struct udphdr *uh; ++ int iphlen = sizeof(struct ip6); ++ int len; ++ struct socket *so; ++ struct sockaddr_in6 lhost; ++ int hop_limit; ++ ++ DEBUG_CALL("udp6_input"); ++ DEBUG_ARG("m = %p", m); ++ ++ if (slirp->restricted) { ++ goto bad; ++ } ++ ++ ip = mtod(m, struct ip6 *); ++ m->m_len -= iphlen; ++ m->m_data += iphlen; ++ uh = mtod_check(m, sizeof(struct udphdr)); ++ if (uh == NULL) { ++ goto bad; ++ } ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ ++ if (ip6_cksum(m)) { ++ goto bad; ++ } ++ ++ len = ntohs((uint16_t)uh->uh_ulen); ++ ++ /* ++ * Make mbuf data length reflect UDP length. ++ * If not enough data to reflect UDP length, drop. ++ */ ++ if (ntohs(ip->ip_pl) != len) { ++ if (len > ntohs(ip->ip_pl)) { ++ goto bad; ++ } ++ m_adj(m, len - ntohs(ip->ip_pl)); ++ ip->ip_pl = htons(len); ++ } ++ ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip = *ip; ++ ++ /* Locate pcb for datagram. */ ++ lhost.sin6_family = AF_INET6; ++ lhost.sin6_addr = ip->ip_src; ++ lhost.sin6_port = uh->uh_sport; ++ ++ /* handle DHCPv6 */ ++ if (ntohs(uh->uh_dport) == DHCPV6_SERVER_PORT && ++ (in6_equal(&ip->ip_dst, &slirp->vhost_addr6) || ++ in6_dhcp_multicast(&ip->ip_dst))) { ++ m->m_data += iphlen; ++ m->m_len -= iphlen; ++ dhcpv6_input(&lhost, m); ++ m->m_data -= iphlen; ++ m->m_len += iphlen; ++ goto bad; ++ } ++ ++ /* handle TFTP */ ++ if (ntohs(uh->uh_dport) == TFTP_SERVER && ++ !memcmp(ip->ip_dst.s6_addr, slirp->vhost_addr6.s6_addr, 16)) { ++ m->m_data += iphlen; ++ m->m_len -= iphlen; ++ tftp_input((struct sockaddr_storage *)&lhost, m); ++ m->m_data -= iphlen; ++ m->m_len += iphlen; ++ goto bad; ++ } ++ ++ so = solookup(&slirp->udp_last_so, &slirp->udb, ++ (struct sockaddr_storage *)&lhost, NULL); ++ ++ if (so == NULL) { ++ /* If there's no socket for this packet, create one. */ ++ so = socreate(slirp); ++ if (udp_attach(so, AF_INET6) == -1) { ++ DEBUG_MISC(" udp6_attach errno = %d-%s", errno, strerror(errno)); ++ sofree(so); ++ goto bad; ++ } ++ ++ /* Setup fields */ ++ so->so_lfamily = AF_INET6; ++ so->so_laddr6 = ip->ip_src; ++ so->so_lport6 = uh->uh_sport; ++ } ++ ++ so->so_ffamily = AF_INET6; ++ so->so_faddr6 = ip->ip_dst; /* XXX */ ++ so->so_fport6 = uh->uh_dport; /* XXX */ ++ ++ iphlen += sizeof(struct udphdr); ++ m->m_len -= iphlen; ++ m->m_data += iphlen; ++ ++ /* ++ * Check for TTL ++ */ ++ hop_limit = save_ip.ip_hl-1; ++ if (hop_limit <= 0) { ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ DEBUG_MISC("udp ttl exceeded"); ++ icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); ++ goto bad; ++ } ++ setsockopt(so->s, IPPROTO_IPV6, IPV6_UNICAST_HOPS, &hop_limit, sizeof(hop_limit)); ++ ++ /* ++ * Now we sendto() the packet. ++ */ ++ if (sosendto(so, m) == -1) { ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ DEBUG_MISC("udp tx errno = %d-%s", errno, strerror(errno)); ++ icmp6_send_error(m, ICMP6_UNREACH, ICMP6_UNREACH_NO_ROUTE); ++ goto bad; ++ } ++ ++ m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ ++ ++ /* restore the orig mbuf packet */ ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ so->so_m = m; ++ ++ return; ++bad: ++ m_free(m); ++} ++ ++int udp6_output(struct socket *so, struct mbuf *m, struct sockaddr_in6 *saddr, ++ struct sockaddr_in6 *daddr) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, sizeof(struct ip6) + sizeof(struct udphdr)); ++ ++ struct ip6 *ip; ++ struct udphdr *uh; ++ ++ DEBUG_CALL("udp6_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ /* adjust for header */ ++ m->m_data -= sizeof(struct udphdr); ++ m->m_len += sizeof(struct udphdr); ++ uh = mtod(m, struct udphdr *); ++ m->m_data -= sizeof(struct ip6); ++ m->m_len += sizeof(struct ip6); ++ ip = mtod(m, struct ip6 *); ++ ++ /* Build IP header */ ++ ip->ip_pl = htons(m->m_len - sizeof(struct ip6)); ++ ip->ip_nh = IPPROTO_UDP; ++ ip->ip_src = saddr->sin6_addr; ++ ip->ip_dst = daddr->sin6_addr; ++ ++ /* Build UDP header */ ++ uh->uh_sport = saddr->sin6_port; ++ uh->uh_dport = daddr->sin6_port; ++ uh->uh_ulen = ip->ip_pl; ++ uh->uh_sum = 0; ++ uh->uh_sum = ip6_cksum(m); ++ if (uh->uh_sum == 0) { ++ uh->uh_sum = 0xffff; ++ } ++ ++ return ip6_output(so, m, 0); ++} +diff --git a/slirp/src/util.c b/slirp/src/util.c +new file mode 100644 +index 0000000000..e6bccbe0fa +--- /dev/null ++++ b/slirp/src/util.c +@@ -0,0 +1,441 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * util.c (mostly based on QEMU os-win32.c) ++ * ++ * Copyright (c) 2003-2008 Fabrice Bellard ++ * Copyright (c) 2010-2016 Red Hat, Inc. ++ * ++ * QEMU library functions for win32 which are shared between QEMU and ++ * the QEMU tools. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "util.h" ++ ++#include ++#include ++#include ++ ++#if defined(_WIN32) ++int slirp_inet_aton(const char *cp, struct in_addr *ia) ++{ ++ uint32_t addr = inet_addr(cp); ++ if (addr == 0xffffffff) { ++ return 0; ++ } ++ ia->s_addr = addr; ++ return 1; ++} ++#endif ++ ++void slirp_set_nonblock(int fd) ++{ ++#ifndef _WIN32 ++ int f; ++ f = fcntl(fd, F_GETFL); ++ assert(f != -1); ++ f = fcntl(fd, F_SETFL, f | O_NONBLOCK); ++ assert(f != -1); ++#else ++ unsigned long opt = 1; ++ ioctlsocket(fd, FIONBIO, &opt); ++#endif ++} ++ ++static void slirp_set_cloexec(int fd) ++{ ++#ifndef _WIN32 ++ int f; ++ f = fcntl(fd, F_GETFD); ++ assert(f != -1); ++ f = fcntl(fd, F_SETFD, f | FD_CLOEXEC); ++ assert(f != -1); ++#endif ++} ++ ++/* ++ * Opens a socket with FD_CLOEXEC set ++ * On failure errno contains the reason. ++ */ ++int slirp_socket(int domain, int type, int protocol) ++{ ++ int ret; ++ ++#ifdef SOCK_CLOEXEC ++ ret = socket(domain, type | SOCK_CLOEXEC, protocol); ++ if (ret != -1 || errno != EINVAL) { ++ return ret; ++ } ++#endif ++ ret = socket(domain, type, protocol); ++ if (ret >= 0) { ++ slirp_set_cloexec(ret); ++ } ++ ++ return ret; ++} ++ ++#ifdef _WIN32 ++static int socket_error(void) ++{ ++ switch (WSAGetLastError()) { ++ case 0: ++ return 0; ++ case WSAEINTR: ++ return EINTR; ++ case WSAEINVAL: ++ return EINVAL; ++ case WSA_INVALID_HANDLE: ++ return EBADF; ++ case WSA_NOT_ENOUGH_MEMORY: ++ return ENOMEM; ++ case WSA_INVALID_PARAMETER: ++ return EINVAL; ++ case WSAENAMETOOLONG: ++ return ENAMETOOLONG; ++ case WSAENOTEMPTY: ++ return ENOTEMPTY; ++ case WSAEWOULDBLOCK: ++ /* not using EWOULDBLOCK as we don't want code to have ++ * to check both EWOULDBLOCK and EAGAIN */ ++ return EAGAIN; ++ case WSAEINPROGRESS: ++ return EINPROGRESS; ++ case WSAEALREADY: ++ return EALREADY; ++ case WSAENOTSOCK: ++ return ENOTSOCK; ++ case WSAEDESTADDRREQ: ++ return EDESTADDRREQ; ++ case WSAEMSGSIZE: ++ return EMSGSIZE; ++ case WSAEPROTOTYPE: ++ return EPROTOTYPE; ++ case WSAENOPROTOOPT: ++ return ENOPROTOOPT; ++ case WSAEPROTONOSUPPORT: ++ return EPROTONOSUPPORT; ++ case WSAEOPNOTSUPP: ++ return EOPNOTSUPP; ++ case WSAEAFNOSUPPORT: ++ return EAFNOSUPPORT; ++ case WSAEADDRINUSE: ++ return EADDRINUSE; ++ case WSAEADDRNOTAVAIL: ++ return EADDRNOTAVAIL; ++ case WSAENETDOWN: ++ return ENETDOWN; ++ case WSAENETUNREACH: ++ return ENETUNREACH; ++ case WSAENETRESET: ++ return ENETRESET; ++ case WSAECONNABORTED: ++ return ECONNABORTED; ++ case WSAECONNRESET: ++ return ECONNRESET; ++ case WSAENOBUFS: ++ return ENOBUFS; ++ case WSAEISCONN: ++ return EISCONN; ++ case WSAENOTCONN: ++ return ENOTCONN; ++ case WSAETIMEDOUT: ++ return ETIMEDOUT; ++ case WSAECONNREFUSED: ++ return ECONNREFUSED; ++ case WSAELOOP: ++ return ELOOP; ++ case WSAEHOSTUNREACH: ++ return EHOSTUNREACH; ++ default: ++ return EIO; ++ } ++} ++ ++#undef ioctlsocket ++int slirp_ioctlsocket_wrap(int fd, int req, void *val) ++{ ++ int ret; ++ ret = ioctlsocket(fd, req, val); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef closesocket ++int slirp_closesocket_wrap(int fd) ++{ ++ int ret; ++ ret = closesocket(fd); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef connect ++int slirp_connect_wrap(int sockfd, const struct sockaddr *addr, int addrlen) ++{ ++ int ret; ++ ret = connect(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef listen ++int slirp_listen_wrap(int sockfd, int backlog) ++{ ++ int ret; ++ ret = listen(sockfd, backlog); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef bind ++int slirp_bind_wrap(int sockfd, const struct sockaddr *addr, int addrlen) ++{ ++ int ret; ++ ret = bind(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef socket ++int slirp_socket_wrap(int domain, int type, int protocol) ++{ ++ int ret; ++ ret = socket(domain, type, protocol); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef accept ++int slirp_accept_wrap(int sockfd, struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = accept(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef shutdown ++int slirp_shutdown_wrap(int sockfd, int how) ++{ ++ int ret; ++ ret = shutdown(sockfd, how); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef getsockopt ++int slirp_getsockopt_wrap(int sockfd, int level, int optname, void *optval, ++ int *optlen) ++{ ++ int ret; ++ ret = getsockopt(sockfd, level, optname, optval, optlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef setsockopt ++int slirp_setsockopt_wrap(int sockfd, int level, int optname, ++ const void *optval, int optlen) ++{ ++ int ret; ++ ret = setsockopt(sockfd, level, optname, optval, optlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef getpeername ++int slirp_getpeername_wrap(int sockfd, struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = getpeername(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef getsockname ++int slirp_getsockname_wrap(int sockfd, struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = getsockname(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef send ++ssize_t slirp_send_wrap(int sockfd, const void *buf, size_t len, int flags) ++{ ++ int ret; ++ ret = send(sockfd, buf, len, flags); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef sendto ++ssize_t slirp_sendto_wrap(int sockfd, const void *buf, size_t len, int flags, ++ const struct sockaddr *addr, int addrlen) ++{ ++ int ret; ++ ret = sendto(sockfd, buf, len, flags, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef recv ++ssize_t slirp_recv_wrap(int sockfd, void *buf, size_t len, int flags) ++{ ++ int ret; ++ ret = recv(sockfd, buf, len, flags); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef recvfrom ++ssize_t slirp_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags, ++ struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = recvfrom(sockfd, buf, len, flags, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++#endif /* WIN32 */ ++ ++void slirp_pstrcpy(char *buf, int buf_size, const char *str) ++{ ++ int c; ++ char *q = buf; ++ ++ if (buf_size <= 0) ++ return; ++ ++ for (;;) { ++ c = *str++; ++ if (c == 0 || q >= buf + buf_size - 1) ++ break; ++ *q++ = c; ++ } ++ *q = '\0'; ++} ++ ++G_GNUC_PRINTF(3, 0) ++static int slirp_vsnprintf(char *str, size_t size, ++ const char *format, va_list args) ++{ ++ int rv = g_vsnprintf(str, size, format, args); ++ ++ if (rv < 0) { ++ g_error("g_vsnprintf() failed: %s", g_strerror(errno)); ++ } ++ ++ return rv; ++} ++ ++/* ++ * A snprintf()-like function that: ++ * - returns the number of bytes written (excluding optional \0-ending) ++ * - dies on error ++ * - warn on truncation ++ */ ++int slirp_fmt(char *str, size_t size, const char *format, ...) ++{ ++ va_list args; ++ int rv; ++ ++ va_start(args, format); ++ rv = slirp_vsnprintf(str, size, format, args); ++ va_end(args); ++ ++ if (rv >= size) { ++ g_critical("slirp_fmt() truncation"); ++ } ++ ++ return MIN(rv, size); ++} ++ ++/* ++ * A snprintf()-like function that: ++ * - always \0-end (unless size == 0) ++ * - returns the number of bytes actually written, including \0 ending ++ * - dies on error ++ * - warn on truncation ++ */ ++int slirp_fmt0(char *str, size_t size, const char *format, ...) ++{ ++ va_list args; ++ int rv; ++ ++ va_start(args, format); ++ rv = slirp_vsnprintf(str, size, format, args); ++ va_end(args); ++ ++ if (rv >= size) { ++ g_critical("slirp_fmt0() truncation"); ++ if (size > 0) ++ str[size - 1] = '\0'; ++ rv = size; ++ } else { ++ rv += 1; /* include \0 */ ++ } ++ ++ return rv; ++} ++ ++const char *slirp_ether_ntoa(const uint8_t *addr, char *out_str, ++ size_t out_str_size) ++{ ++ assert(out_str_size >= ETH_ADDRSTRLEN); ++ ++ slirp_fmt0(out_str, out_str_size, "%02x:%02x:%02x:%02x:%02x:%02x", ++ addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); ++ ++ return out_str; ++} +diff --git a/slirp/src/util.h b/slirp/src/util.h +new file mode 100644 +index 0000000000..07654ecf37 +--- /dev/null ++++ b/slirp/src/util.h +@@ -0,0 +1,203 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * Copyright (c) 2003-2008 Fabrice Bellard ++ * Copyright (c) 2010-2019 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#ifndef UTIL_H_ ++#define UTIL_H_ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef _WIN32 ++#include ++#include ++#include ++#else ++#include ++#include ++#include ++#endif ++ ++#if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__)) ++#define SLIRP_PACKED __attribute__((gcc_struct, packed)) ++#else ++#define SLIRP_PACKED __attribute__((packed)) ++#endif ++ ++#ifndef DIV_ROUND_UP ++#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) ++#endif ++ ++#ifndef container_of ++#define container_of(ptr, type, member) \ ++ __extension__({ \ ++ void *__mptr = (void *)(ptr); \ ++ ((type *)(__mptr - offsetof(type, member))); \ ++ }) ++#endif ++ ++#ifndef G_SIZEOF_MEMBER ++#define G_SIZEOF_MEMBER(type, member) sizeof(((type *)0)->member) ++#endif ++ ++#if defined(_WIN32) /* CONFIG_IOVEC */ ++#if !defined(IOV_MAX) /* XXX: to avoid duplicate with QEMU osdep.h */ ++struct iovec { ++ void *iov_base; ++ size_t iov_len; ++}; ++#endif ++#else ++#include ++#endif ++ ++#define stringify(s) tostring(s) ++#define tostring(s) #s ++ ++#define SCALE_MS 1000000 ++ ++#define ETH_ALEN 6 ++#define ETH_ADDRSTRLEN 18 /* "xx:xx:xx:xx:xx:xx", with trailing NUL */ ++#define ETH_HLEN 14 ++#define ETH_P_IP (0x0800) /* Internet Protocol packet */ ++#define ETH_P_ARP (0x0806) /* Address Resolution packet */ ++#define ETH_P_IPV6 (0x86dd) ++#define ETH_P_VLAN (0x8100) ++#define ETH_P_DVLAN (0x88a8) ++#define ETH_P_NCSI (0x88f8) ++#define ETH_P_UNKNOWN (0xffff) ++ ++/* FIXME: remove me when made standalone */ ++#ifdef _WIN32 ++#undef accept ++#undef bind ++#undef closesocket ++#undef connect ++#undef getpeername ++#undef getsockname ++#undef getsockopt ++#undef ioctlsocket ++#undef listen ++#undef recv ++#undef recvfrom ++#undef send ++#undef sendto ++#undef setsockopt ++#undef shutdown ++#undef socket ++#endif ++ ++#ifdef _WIN32 ++#define connect slirp_connect_wrap ++int slirp_connect_wrap(int fd, const struct sockaddr *addr, int addrlen); ++#define listen slirp_listen_wrap ++int slirp_listen_wrap(int fd, int backlog); ++#define bind slirp_bind_wrap ++int slirp_bind_wrap(int fd, const struct sockaddr *addr, int addrlen); ++#define socket slirp_socket_wrap ++int slirp_socket_wrap(int domain, int type, int protocol); ++#define accept slirp_accept_wrap ++int slirp_accept_wrap(int fd, struct sockaddr *addr, int *addrlen); ++#define shutdown slirp_shutdown_wrap ++int slirp_shutdown_wrap(int fd, int how); ++#define getpeername slirp_getpeername_wrap ++int slirp_getpeername_wrap(int fd, struct sockaddr *addr, int *addrlen); ++#define getsockname slirp_getsockname_wrap ++int slirp_getsockname_wrap(int fd, struct sockaddr *addr, int *addrlen); ++#define send slirp_send_wrap ++ssize_t slirp_send_wrap(int fd, const void *buf, size_t len, int flags); ++#define sendto slirp_sendto_wrap ++ssize_t slirp_sendto_wrap(int fd, const void *buf, size_t len, int flags, ++ const struct sockaddr *dest_addr, int addrlen); ++#define recv slirp_recv_wrap ++ssize_t slirp_recv_wrap(int fd, void *buf, size_t len, int flags); ++#define recvfrom slirp_recvfrom_wrap ++ssize_t slirp_recvfrom_wrap(int fd, void *buf, size_t len, int flags, ++ struct sockaddr *src_addr, int *addrlen); ++#define closesocket slirp_closesocket_wrap ++int slirp_closesocket_wrap(int fd); ++#define ioctlsocket slirp_ioctlsocket_wrap ++int slirp_ioctlsocket_wrap(int fd, int req, void *val); ++#define getsockopt slirp_getsockopt_wrap ++int slirp_getsockopt_wrap(int sockfd, int level, int optname, void *optval, ++ int *optlen); ++#define setsockopt slirp_setsockopt_wrap ++int slirp_setsockopt_wrap(int sockfd, int level, int optname, ++ const void *optval, int optlen); ++#define inet_aton slirp_inet_aton ++int slirp_inet_aton(const char *cp, struct in_addr *ia); ++#else ++#define closesocket(s) close(s) ++#define ioctlsocket(s, r, v) ioctl(s, r, v) ++#endif ++ ++int slirp_socket(int domain, int type, int protocol); ++void slirp_set_nonblock(int fd); ++ ++static inline int slirp_socket_set_v6only(int fd, int v) ++{ ++ return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)); ++} ++ ++static inline int slirp_socket_set_nodelay(int fd) ++{ ++ int v = 1; ++ return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v)); ++} ++ ++static inline int slirp_socket_set_fast_reuse(int fd) ++{ ++#ifndef _WIN32 ++ int v = 1; ++ return setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &v, sizeof(v)); ++#else ++ /* Enabling the reuse of an endpoint that was used by a socket still in ++ * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows ++ * fast reuse is the default and SO_REUSEADDR does strange things. So we ++ * don't have to do anything here. More info can be found at: ++ * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */ ++ return 0; ++#endif ++} ++ ++void slirp_pstrcpy(char *buf, int buf_size, const char *str); ++ ++int slirp_fmt(char *str, size_t size, const char *format, ...) G_GNUC_PRINTF(3, 4); ++int slirp_fmt0(char *str, size_t size, const char *format, ...) G_GNUC_PRINTF(3, 4); ++ ++/* ++ * Pretty print a MAC address into out_str. ++ * As a convenience returns out_str. ++ */ ++const char *slirp_ether_ntoa(const uint8_t *addr, char *out_str, ++ size_t out_str_len); ++ ++#endif +diff --git a/slirp/src/version.c b/slirp/src/version.c +new file mode 100644 +index 0000000000..93e0be9c24 +--- /dev/null ++++ b/slirp/src/version.c +@@ -0,0 +1,8 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#include "libslirp.h" ++ ++const char * ++slirp_version_string(void) ++{ ++ return SLIRP_VERSION_STRING; ++} +diff --git a/slirp/src/vmstate.c b/slirp/src/vmstate.c +new file mode 100644 +index 0000000000..68cc1729c5 +--- /dev/null ++++ b/slirp/src/vmstate.c +@@ -0,0 +1,444 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * VMState interpreter ++ * ++ * Copyright (c) 2009-2018 Red Hat Inc ++ * ++ * Authors: ++ * Juan Quintela ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#include ++#include ++#include ++#include ++ ++#include "stream.h" ++#include "vmstate.h" ++ ++static int get_nullptr(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ if (slirp_istream_read_u8(f) == VMS_NULLPTR_MARKER) { ++ return 0; ++ } ++ g_warning("vmstate: get_nullptr expected VMS_NULLPTR_MARKER"); ++ return -EINVAL; ++} ++ ++static int put_nullptr(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++ ++{ ++ if (pv == NULL) { ++ slirp_ostream_write_u8(f, VMS_NULLPTR_MARKER); ++ return 0; ++ } ++ g_warning("vmstate: put_nullptr must be called with pv == NULL"); ++ return -EINVAL; ++} ++ ++const VMStateInfo slirp_vmstate_info_nullptr = { ++ .name = "uint64", ++ .get = get_nullptr, ++ .put = put_nullptr, ++}; ++ ++/* 8 bit unsigned int */ ++ ++static int get_uint8(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint8_t *v = pv; ++ *v = slirp_istream_read_u8(f); ++ return 0; ++} ++ ++static int put_uint8(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint8_t *v = pv; ++ slirp_ostream_write_u8(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_uint8 = { ++ .name = "uint8", ++ .get = get_uint8, ++ .put = put_uint8, ++}; ++ ++/* 16 bit unsigned int */ ++ ++static int get_uint16(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint16_t *v = pv; ++ *v = slirp_istream_read_u16(f); ++ return 0; ++} ++ ++static int put_uint16(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint16_t *v = pv; ++ slirp_ostream_write_u16(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_uint16 = { ++ .name = "uint16", ++ .get = get_uint16, ++ .put = put_uint16, ++}; ++ ++/* 32 bit unsigned int */ ++ ++static int get_uint32(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint32_t *v = pv; ++ *v = slirp_istream_read_u32(f); ++ return 0; ++} ++ ++static int put_uint32(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint32_t *v = pv; ++ slirp_ostream_write_u32(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_uint32 = { ++ .name = "uint32", ++ .get = get_uint32, ++ .put = put_uint32, ++}; ++ ++/* 16 bit int */ ++ ++static int get_int16(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int16_t *v = pv; ++ *v = slirp_istream_read_i16(f); ++ return 0; ++} ++ ++static int put_int16(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int16_t *v = pv; ++ slirp_ostream_write_i16(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_int16 = { ++ .name = "int16", ++ .get = get_int16, ++ .put = put_int16, ++}; ++ ++/* 32 bit int */ ++ ++static int get_int32(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int32_t *v = pv; ++ *v = slirp_istream_read_i32(f); ++ return 0; ++} ++ ++static int put_int32(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int32_t *v = pv; ++ slirp_ostream_write_i32(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_int32 = { ++ .name = "int32", ++ .get = get_int32, ++ .put = put_int32, ++}; ++ ++/* vmstate_info_tmp, see VMSTATE_WITH_TMP, the idea is that we allocate ++ * a temporary buffer and the pre_load/pre_save methods in the child vmsd ++ * copy stuff from the parent into the child and do calculations to fill ++ * in fields that don't really exist in the parent but need to be in the ++ * stream. ++ */ ++static int get_tmp(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int ret; ++ const VMStateDescription *vmsd = field->vmsd; ++ int version_id = field->version_id; ++ void *tmp = g_malloc(size); ++ ++ /* Writes the parent field which is at the start of the tmp */ ++ *(void **)tmp = pv; ++ ret = slirp_vmstate_load_state(f, vmsd, tmp, version_id); ++ g_free(tmp); ++ return ret; ++} ++ ++static int put_tmp(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ const VMStateDescription *vmsd = field->vmsd; ++ void *tmp = g_malloc(size); ++ int ret; ++ ++ /* Writes the parent field which is at the start of the tmp */ ++ *(void **)tmp = pv; ++ ret = slirp_vmstate_save_state(f, vmsd, tmp); ++ g_free(tmp); ++ ++ return ret; ++} ++ ++const VMStateInfo slirp_vmstate_info_tmp = { ++ .name = "tmp", ++ .get = get_tmp, ++ .put = put_tmp, ++}; ++ ++/* uint8_t buffers */ ++ ++static int get_buffer(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ slirp_istream_read(f, pv, size); ++ return 0; ++} ++ ++static int put_buffer(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ slirp_ostream_write(f, pv, size); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_buffer = { ++ .name = "buffer", ++ .get = get_buffer, ++ .put = put_buffer, ++}; ++ ++static int vmstate_n_elems(void *opaque, const VMStateField *field) ++{ ++ int n_elems = 1; ++ ++ if (field->flags & VMS_ARRAY) { ++ n_elems = field->num; ++ } else if (field->flags & VMS_VARRAY_INT32) { ++ n_elems = *(int32_t *)(opaque + field->num_offset); ++ } else if (field->flags & VMS_VARRAY_UINT32) { ++ n_elems = *(uint32_t *)(opaque + field->num_offset); ++ } else if (field->flags & VMS_VARRAY_UINT16) { ++ n_elems = *(uint16_t *)(opaque + field->num_offset); ++ } else if (field->flags & VMS_VARRAY_UINT8) { ++ n_elems = *(uint8_t *)(opaque + field->num_offset); ++ } ++ ++ if (field->flags & VMS_MULTIPLY_ELEMENTS) { ++ n_elems *= field->num; ++ } ++ ++ return n_elems; ++} ++ ++static int vmstate_size(void *opaque, const VMStateField *field) ++{ ++ int size = field->size; ++ ++ if (field->flags & VMS_VBUFFER) { ++ size = *(int32_t *)(opaque + field->size_offset); ++ if (field->flags & VMS_MULTIPLY) { ++ size *= field->size; ++ } ++ } ++ ++ return size; ++} ++ ++static int vmstate_save_state_v(SlirpOStream *f, const VMStateDescription *vmsd, ++ void *opaque, int version_id) ++{ ++ int ret = 0; ++ const VMStateField *field = vmsd->fields; ++ ++ if (vmsd->pre_save) { ++ ret = vmsd->pre_save(opaque); ++ if (ret) { ++ g_warning("pre-save failed: %s", vmsd->name); ++ return ret; ++ } ++ } ++ ++ while (field->name) { ++ if ((field->field_exists && field->field_exists(opaque, version_id)) || ++ (!field->field_exists && field->version_id <= version_id)) { ++ void *first_elem = opaque + field->offset; ++ int i, n_elems = vmstate_n_elems(opaque, field); ++ int size = vmstate_size(opaque, field); ++ ++ if (field->flags & VMS_POINTER) { ++ first_elem = *(void **)first_elem; ++ assert(first_elem || !n_elems || !size); ++ } ++ for (i = 0; i < n_elems; i++) { ++ void *curr_elem = first_elem + size * i; ++ ++ if (field->flags & VMS_ARRAY_OF_POINTER) { ++ assert(curr_elem); ++ curr_elem = *(void **)curr_elem; ++ } ++ if (!curr_elem && size) { ++ /* if null pointer write placeholder and do not follow */ ++ assert(field->flags & VMS_ARRAY_OF_POINTER); ++ ret = slirp_vmstate_info_nullptr.put(f, curr_elem, size, ++ NULL); ++ } else if (field->flags & VMS_STRUCT) { ++ ret = slirp_vmstate_save_state(f, field->vmsd, curr_elem); ++ } else if (field->flags & VMS_VSTRUCT) { ++ ret = vmstate_save_state_v(f, field->vmsd, curr_elem, ++ field->struct_version_id); ++ } else { ++ ret = field->info->put(f, curr_elem, size, field); ++ } ++ if (ret) { ++ g_warning("Save of field %s/%s failed", vmsd->name, ++ field->name); ++ return ret; ++ } ++ } ++ } else { ++ if (field->flags & VMS_MUST_EXIST) { ++ g_warning("Output state validation failed: %s/%s", vmsd->name, ++ field->name); ++ assert(!(field->flags & VMS_MUST_EXIST)); ++ } ++ } ++ field++; ++ } ++ ++ return 0; ++} ++ ++int slirp_vmstate_save_state(SlirpOStream *f, const VMStateDescription *vmsd, ++ void *opaque) ++{ ++ return vmstate_save_state_v(f, vmsd, opaque, vmsd->version_id); ++} ++ ++static void vmstate_handle_alloc(void *ptr, VMStateField *field, void *opaque) ++{ ++ if (field->flags & VMS_POINTER && field->flags & VMS_ALLOC) { ++ size_t size = vmstate_size(opaque, field); ++ size *= vmstate_n_elems(opaque, field); ++ if (size) { ++ *(void **)ptr = g_malloc(size); ++ } ++ } ++} ++ ++int slirp_vmstate_load_state(SlirpIStream *f, const VMStateDescription *vmsd, ++ void *opaque, int version_id) ++{ ++ VMStateField *field = vmsd->fields; ++ int ret = 0; ++ ++ if (version_id > vmsd->version_id) { ++ g_warning("%s: incoming version_id %d is too new " ++ "for local version_id %d", ++ vmsd->name, version_id, vmsd->version_id); ++ return -EINVAL; ++ } ++ if (vmsd->pre_load) { ++ int ret = vmsd->pre_load(opaque); ++ if (ret) { ++ return ret; ++ } ++ } ++ while (field->name) { ++ if ((field->field_exists && field->field_exists(opaque, version_id)) || ++ (!field->field_exists && field->version_id <= version_id)) { ++ void *first_elem = opaque + field->offset; ++ int i, n_elems = vmstate_n_elems(opaque, field); ++ int size = vmstate_size(opaque, field); ++ ++ vmstate_handle_alloc(first_elem, field, opaque); ++ if (field->flags & VMS_POINTER) { ++ first_elem = *(void **)first_elem; ++ assert(first_elem || !n_elems || !size); ++ } ++ for (i = 0; i < n_elems; i++) { ++ void *curr_elem = first_elem + size * i; ++ ++ if (field->flags & VMS_ARRAY_OF_POINTER) { ++ curr_elem = *(void **)curr_elem; ++ } ++ if (!curr_elem && size) { ++ /* if null pointer check placeholder and do not follow */ ++ assert(field->flags & VMS_ARRAY_OF_POINTER); ++ ret = slirp_vmstate_info_nullptr.get(f, curr_elem, size, ++ NULL); ++ } else if (field->flags & VMS_STRUCT) { ++ ret = slirp_vmstate_load_state(f, field->vmsd, curr_elem, ++ field->vmsd->version_id); ++ } else if (field->flags & VMS_VSTRUCT) { ++ ret = slirp_vmstate_load_state(f, field->vmsd, curr_elem, ++ field->struct_version_id); ++ } else { ++ ret = field->info->get(f, curr_elem, size, field); ++ } ++ if (ret < 0) { ++ g_warning("Failed to load %s:%s", vmsd->name, field->name); ++ return ret; ++ } ++ } ++ } else if (field->flags & VMS_MUST_EXIST) { ++ g_warning("Input validation failed: %s/%s", vmsd->name, ++ field->name); ++ return -1; ++ } ++ field++; ++ } ++ if (vmsd->post_load) { ++ ret = vmsd->post_load(opaque, version_id); ++ } ++ return ret; ++} +diff --git a/slirp/src/vmstate.h b/slirp/src/vmstate.h +new file mode 100644 +index 0000000000..94c6a4bc7b +--- /dev/null ++++ b/slirp/src/vmstate.h +@@ -0,0 +1,391 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * QEMU migration/snapshot declarations ++ * ++ * Copyright (c) 2009-2011 Red Hat, Inc. ++ * ++ * Original author: Juan Quintela ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#ifndef VMSTATE_H_ ++#define VMSTATE_H_ ++ ++#include ++#include ++#include ++#include "slirp.h" ++#include "stream.h" ++ ++#define stringify(s) tostring(s) ++#define tostring(s) #s ++ ++typedef struct VMStateInfo VMStateInfo; ++typedef struct VMStateDescription VMStateDescription; ++typedef struct VMStateField VMStateField; ++ ++int slirp_vmstate_save_state(SlirpOStream *f, const VMStateDescription *vmsd, ++ void *opaque); ++int slirp_vmstate_load_state(SlirpIStream *f, const VMStateDescription *vmsd, ++ void *opaque, int version_id); ++ ++/* VMStateInfo allows customized migration of objects that don't fit in ++ * any category in VMStateFlags. Additional information is always passed ++ * into get and put in terms of field and vmdesc parameters. However ++ * these two parameters should only be used in cases when customized ++ * handling is needed, such as QTAILQ. For primitive data types such as ++ * integer, field and vmdesc parameters should be ignored inside get/put. ++ */ ++struct VMStateInfo { ++ const char *name; ++ int (*get)(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field); ++ int (*put)(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field); ++}; ++ ++enum VMStateFlags { ++ /* Ignored */ ++ VMS_SINGLE = 0x001, ++ ++ /* The struct member at opaque + VMStateField.offset is a pointer ++ * to the actual field (e.g. struct a { uint8_t *b; ++ * }). Dereference the pointer before using it as basis for ++ * further pointer arithmetic (see e.g. VMS_ARRAY). Does not ++ * affect the meaning of VMStateField.num_offset or ++ * VMStateField.size_offset; see VMS_VARRAY* and VMS_VBUFFER for ++ * those. */ ++ VMS_POINTER = 0x002, ++ ++ /* The field is an array of fixed size. VMStateField.num contains ++ * the number of entries in the array. The size of each entry is ++ * given by VMStateField.size and / or opaque + ++ * VMStateField.size_offset; see VMS_VBUFFER and ++ * VMS_MULTIPLY. Each array entry will be processed individually ++ * (VMStateField.info.get()/put() if VMS_STRUCT is not set, ++ * recursion into VMStateField.vmsd if VMS_STRUCT is set). May not ++ * be combined with VMS_VARRAY*. */ ++ VMS_ARRAY = 0x004, ++ ++ /* The field is itself a struct, containing one or more ++ * fields. Recurse into VMStateField.vmsd. Most useful in ++ * combination with VMS_ARRAY / VMS_VARRAY*, recursing into each ++ * array entry. */ ++ VMS_STRUCT = 0x008, ++ ++ /* The field is an array of variable size. The int32_t at opaque + ++ * VMStateField.num_offset contains the number of entries in the ++ * array. See the VMS_ARRAY description regarding array handling ++ * in general. May not be combined with VMS_ARRAY or any other ++ * VMS_VARRAY*. */ ++ VMS_VARRAY_INT32 = 0x010, ++ ++ /* Ignored */ ++ VMS_BUFFER = 0x020, ++ ++ /* The field is a (fixed-size or variable-size) array of pointers ++ * (e.g. struct a { uint8_t *b[]; }). Dereference each array entry ++ * before using it. Note: Does not imply any one of VMS_ARRAY / ++ * VMS_VARRAY*; these need to be set explicitly. */ ++ VMS_ARRAY_OF_POINTER = 0x040, ++ ++ /* The field is an array of variable size. The uint16_t at opaque ++ * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) ++ * contains the number of entries in the array. See the VMS_ARRAY ++ * description regarding array handling in general. May not be ++ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ ++ VMS_VARRAY_UINT16 = 0x080, ++ ++ /* The size of the individual entries (a single array entry if ++ * VMS_ARRAY or any of VMS_VARRAY* are set, or the field itself if ++ * neither is set) is variable (i.e. not known at compile-time), ++ * but the same for all entries. Use the int32_t at opaque + ++ * VMStateField.size_offset (subject to VMS_MULTIPLY) to determine ++ * the size of each (and every) entry. */ ++ VMS_VBUFFER = 0x100, ++ ++ /* Multiply the entry size given by the int32_t at opaque + ++ * VMStateField.size_offset (see VMS_VBUFFER description) with ++ * VMStateField.size to determine the number of bytes to be ++ * allocated. Only valid in combination with VMS_VBUFFER. */ ++ VMS_MULTIPLY = 0x200, ++ ++ /* The field is an array of variable size. The uint8_t at opaque + ++ * VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) ++ * contains the number of entries in the array. See the VMS_ARRAY ++ * description regarding array handling in general. May not be ++ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ ++ VMS_VARRAY_UINT8 = 0x400, ++ ++ /* The field is an array of variable size. The uint32_t at opaque ++ * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) ++ * contains the number of entries in the array. See the VMS_ARRAY ++ * description regarding array handling in general. May not be ++ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ ++ VMS_VARRAY_UINT32 = 0x800, ++ ++ /* Fail loading the serialised VM state if this field is missing ++ * from the input. */ ++ VMS_MUST_EXIST = 0x1000, ++ ++ /* When loading serialised VM state, allocate memory for the ++ * (entire) field. Only valid in combination with ++ * VMS_POINTER. Note: Not all combinations with other flags are ++ * currently supported, e.g. VMS_ALLOC|VMS_ARRAY_OF_POINTER won't ++ * cause the individual entries to be allocated. */ ++ VMS_ALLOC = 0x2000, ++ ++ /* Multiply the number of entries given by the integer at opaque + ++ * VMStateField.num_offset (see VMS_VARRAY*) with VMStateField.num ++ * to determine the number of entries in the array. Only valid in ++ * combination with one of VMS_VARRAY*. */ ++ VMS_MULTIPLY_ELEMENTS = 0x4000, ++ ++ /* A structure field that is like VMS_STRUCT, but uses ++ * VMStateField.struct_version_id to tell which version of the ++ * structure we are referencing to use. */ ++ VMS_VSTRUCT = 0x8000, ++}; ++ ++struct VMStateField { ++ const char *name; ++ size_t offset; ++ size_t size; ++ size_t start; ++ int num; ++ size_t num_offset; ++ size_t size_offset; ++ const VMStateInfo *info; ++ enum VMStateFlags flags; ++ const VMStateDescription *vmsd; ++ int version_id; ++ int struct_version_id; ++ bool (*field_exists)(void *opaque, int version_id); ++}; ++ ++struct VMStateDescription { ++ const char *name; ++ int version_id; ++ int (*pre_load)(void *opaque); ++ int (*post_load)(void *opaque, int version_id); ++ int (*pre_save)(void *opaque); ++ VMStateField *fields; ++}; ++ ++ ++extern const VMStateInfo slirp_vmstate_info_int16; ++extern const VMStateInfo slirp_vmstate_info_int32; ++extern const VMStateInfo slirp_vmstate_info_uint8; ++extern const VMStateInfo slirp_vmstate_info_uint16; ++extern const VMStateInfo slirp_vmstate_info_uint32; ++ ++/** Put this in the stream when migrating a null pointer.*/ ++#define VMS_NULLPTR_MARKER (0x30U) /* '0' */ ++extern const VMStateInfo slirp_vmstate_info_nullptr; ++ ++extern const VMStateInfo slirp_vmstate_info_buffer; ++extern const VMStateInfo slirp_vmstate_info_tmp; ++ ++#define type_check_array(t1, t2, n) ((t1(*)[n])0 - (t2 *)0) ++#define type_check_pointer(t1, t2) ((t1 **)0 - (t2 *)0) ++#define typeof_field(type, field) typeof(((type *)0)->field) ++#define type_check(t1, t2) ((t1 *)0 - (t2 *)0) ++ ++#define vmstate_offset_value(_state, _field, _type) \ ++ (offsetof(_state, _field) + type_check(_type, typeof_field(_state, _field))) ++ ++#define vmstate_offset_pointer(_state, _field, _type) \ ++ (offsetof(_state, _field) + \ ++ type_check_pointer(_type, typeof_field(_state, _field))) ++ ++#define vmstate_offset_array(_state, _field, _type, _num) \ ++ (offsetof(_state, _field) + \ ++ type_check_array(_type, typeof_field(_state, _field), _num)) ++ ++#define vmstate_offset_buffer(_state, _field) \ ++ vmstate_offset_array(_state, _field, uint8_t, \ ++ sizeof(typeof_field(_state, _field))) ++ ++/* In the macros below, if there is a _version, that means the macro's ++ * field will be processed only if the version being received is >= ++ * the _version specified. In general, if you add a new field, you ++ * would increment the structure's version and put that version ++ * number into the new field so it would only be processed with the ++ * new version. ++ * ++ * In particular, for VMSTATE_STRUCT() and friends the _version does ++ * *NOT* pick the version of the sub-structure. It works just as ++ * specified above. The version of the top-level structure received ++ * is passed down to all sub-structures. This means that the ++ * sub-structures must have version that are compatible with all the ++ * structures that use them. ++ * ++ * If you want to specify the version of the sub-structure, use ++ * VMSTATE_VSTRUCT(), which allows the specific sub-structure version ++ * to be directly specified. ++ */ ++ ++#define VMSTATE_SINGLE_TEST(_field, _state, _test, _version, _info, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), .size = sizeof(_type), .info = &(_info), \ ++ .flags = VMS_SINGLE, \ ++ .offset = vmstate_offset_value(_state, _field, _type), \ ++ } ++ ++#define VMSTATE_ARRAY(_field, _state, _num, _version, _info, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), .num = (_num), \ ++ .info = &(_info), .size = sizeof(_type), .flags = VMS_ARRAY, \ ++ .offset = vmstate_offset_array(_state, _field, _type, _num), \ ++ } ++ ++#define VMSTATE_STRUCT_TEST(_field, _state, _test, _version, _vmsd, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), .vmsd = &(_vmsd), .size = sizeof(_type), \ ++ .flags = VMS_STRUCT, \ ++ .offset = vmstate_offset_value(_state, _field, _type), \ ++ } ++ ++#define VMSTATE_STRUCT_POINTER_V(_field, _state, _version, _vmsd, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .vmsd = &(_vmsd), .size = sizeof(_type *), \ ++ .flags = VMS_STRUCT | VMS_POINTER, \ ++ .offset = vmstate_offset_pointer(_state, _field, _type), \ ++ } ++ ++#define VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, _test, _version, \ ++ _vmsd, _type) \ ++ { \ ++ .name = (stringify(_field)), .num = (_num), .field_exists = (_test), \ ++ .version_id = (_version), .vmsd = &(_vmsd), .size = sizeof(_type), \ ++ .flags = VMS_STRUCT | VMS_ARRAY, \ ++ .offset = vmstate_offset_array(_state, _field, _type, _num), \ ++ } ++ ++#define VMSTATE_STATIC_BUFFER(_field, _state, _version, _test, _start, _size) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), .size = (_size - _start), \ ++ .info = &slirp_vmstate_info_buffer, .flags = VMS_BUFFER, \ ++ .offset = vmstate_offset_buffer(_state, _field) + _start, \ ++ } ++ ++#define VMSTATE_VBUFFER_UINT32(_field, _state, _version, _test, _field_size) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), \ ++ .size_offset = vmstate_offset_value(_state, _field_size, uint32_t), \ ++ .info = &slirp_vmstate_info_buffer, \ ++ .flags = VMS_VBUFFER | VMS_POINTER, \ ++ .offset = offsetof(_state, _field), \ ++ } ++ ++#define QEMU_BUILD_BUG_ON_STRUCT(x) \ ++ struct { \ ++ int : (x) ? -1 : 1; \ ++ } ++ ++#define QEMU_BUILD_BUG_ON_ZERO(x) \ ++ (sizeof(QEMU_BUILD_BUG_ON_STRUCT(x)) - sizeof(QEMU_BUILD_BUG_ON_STRUCT(x))) ++ ++/* Allocate a temporary of type 'tmp_type', set tmp->parent to _state ++ * and execute the vmsd on the temporary. Note that we're working with ++ * the whole of _state here, not a field within it. ++ * We compile time check that: ++ * That _tmp_type contains a 'parent' member that's a pointer to the ++ * '_state' type ++ * That the pointer is right at the start of _tmp_type. ++ */ ++#define VMSTATE_WITH_TMP(_state, _tmp_type, _vmsd) \ ++ { \ ++ .name = "tmp", \ ++ .size = sizeof(_tmp_type) + \ ++ QEMU_BUILD_BUG_ON_ZERO(offsetof(_tmp_type, parent) != 0) + \ ++ type_check_pointer(_state, typeof_field(_tmp_type, parent)), \ ++ .vmsd = &(_vmsd), .info = &slirp_vmstate_info_tmp, \ ++ } ++ ++#define VMSTATE_SINGLE(_field, _state, _version, _info, _type) \ ++ VMSTATE_SINGLE_TEST(_field, _state, NULL, _version, _info, _type) ++ ++#define VMSTATE_STRUCT(_field, _state, _version, _vmsd, _type) \ ++ VMSTATE_STRUCT_TEST(_field, _state, NULL, _version, _vmsd, _type) ++ ++#define VMSTATE_STRUCT_POINTER(_field, _state, _vmsd, _type) \ ++ VMSTATE_STRUCT_POINTER_V(_field, _state, 0, _vmsd, _type) ++ ++#define VMSTATE_STRUCT_ARRAY(_field, _state, _num, _version, _vmsd, _type) \ ++ VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, NULL, _version, _vmsd, \ ++ _type) ++ ++#define VMSTATE_INT16_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_int16, int16_t) ++#define VMSTATE_INT32_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_int32, int32_t) ++ ++#define VMSTATE_UINT8_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint8, uint8_t) ++#define VMSTATE_UINT16_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint16, uint16_t) ++#define VMSTATE_UINT32_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint32, uint32_t) ++ ++#define VMSTATE_INT16(_f, _s) VMSTATE_INT16_V(_f, _s, 0) ++#define VMSTATE_INT32(_f, _s) VMSTATE_INT32_V(_f, _s, 0) ++ ++#define VMSTATE_UINT8(_f, _s) VMSTATE_UINT8_V(_f, _s, 0) ++#define VMSTATE_UINT16(_f, _s) VMSTATE_UINT16_V(_f, _s, 0) ++#define VMSTATE_UINT32(_f, _s) VMSTATE_UINT32_V(_f, _s, 0) ++ ++#define VMSTATE_UINT16_TEST(_f, _s, _t) \ ++ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, slirp_vmstate_info_uint16, uint16_t) ++ ++#define VMSTATE_UINT32_TEST(_f, _s, _t) \ ++ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, slirp_vmstate_info_uint32, uint32_t) ++ ++#define VMSTATE_INT16_ARRAY_V(_f, _s, _n, _v) \ ++ VMSTATE_ARRAY(_f, _s, _n, _v, slirp_vmstate_info_int16, int16_t) ++ ++#define VMSTATE_INT16_ARRAY(_f, _s, _n) VMSTATE_INT16_ARRAY_V(_f, _s, _n, 0) ++ ++#define VMSTATE_BUFFER_V(_f, _s, _v) \ ++ VMSTATE_STATIC_BUFFER(_f, _s, _v, NULL, 0, sizeof(typeof_field(_s, _f))) ++ ++#define VMSTATE_BUFFER(_f, _s) VMSTATE_BUFFER_V(_f, _s, 0) ++ ++#define VMSTATE_END_OF_LIST() \ ++ { \ ++ } ++ ++#endif +-- +2.27.0 + diff --git a/SOURCES/0005-Initial-redhat-build.patch b/SOURCES/0005-Initial-redhat-build.patch index cde66a1..ddae98d 100644 --- a/SOURCES/0005-Initial-redhat-build.patch +++ b/SOURCES/0005-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From 4df157781801c50224373be57fa3c8c3741c0535 Mon Sep 17 00:00:00 2001 +From 19ce5ff93ddd6b8a998348f2a5f59f603c5e11b7 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 12 Oct 2018 07:31:11 +0200 Subject: Initial redhat build @@ -10,10 +10,8 @@ several issues are fixed in QEMU tree: - As we use qemu-kvm as name in all places, this is updated to be consistent - Man page renamed from qemu to qemu-kvm - man page is installed using make install so we have to fix it in qemu tree - - Use "/share/qemu-kvm" as SHARE_SUFFIX - - We reconfigured our share to qemu-kvm to be consistent with used name -This rebase includes changes up to qemu-kvm-4.1.0-18.el8 +This rebase includes changes up to qemu-kvm-6.1.0-5.el9 Rebase notes (3.1.0): - added new configure options @@ -49,6 +47,86 @@ Rebase notes (4.2.0): - Removed spapr-rtas.bin (upstream) - Require newer SLOF (20191022) +Rebase notes (5.1.0): +- Use python3 for virtio_seg_max_adjust.py test +- Removed qemu-trace-stap shebang from spec file +- Added virtiofsd.1 (upstream) +- Use out-of-tree build +- New documentation structure (upstream) +- Update local build +- Removing installed qemu-storage-daemon (added upstream) +- Removing opensbi-riscv32-sifive_u-fw_jump.bin (added upstream) +- Disable iotests (moved from Enable make check commit) +- Added missing configure options +- Reorder configure options +- qemu-pr-helper moved to /usr/libexec/ (upstream) +- Added submodules for usb-redir, smartcard-reader and qxl display (upstream) +- Added setting rc version in Makefile for build +- removed --disable-vxhs configure option (removed upstream) +- bumped required libusbx-devel version to 1.0.23 +- bumped libfdt version to 1.6.0 + +Rebase notes (5.2.0 rc0): +- Move libfdt dependency to qemu-kvm-core +- Move manpage rename from Makefile to spec file +- rename with-confsuffix configure option to with-suffix (upstream) +- Bump libusbx Requires version to 1.0.234 +- Manual copy of keymaps in spec file (BZ 1875217) +- Removed /usr/share/qemu-kvm/npcm7xx_bootrom.bin, considering it + unpackaged for now. +- Removed /usr/share/qemu-kvm/qboot.rom, considering unpackaged. +- Added build dependency for meson and ninja-build +- hw/s390/s390-pci-vfio.c hack - set NULL for g_autofree variables +- Removed Chanelog (upstream) +- Fix in directory used for docs (upstream add %name so we do not pass it in configure) +- Package various .so as part of qemu-kvm-core package. + +Rebase notes (5.2.0 rc2): +- Added fix for dtrace build on RHEL 8.4.0 + +Rebase notes (5.2.0 rc3): +- Added man page for qemu-pr-helper +- Added new configure options +- Update qemu-kiwi patches to v4 + +Rebase notes (6.0.0): +- update tracetool usage in spec file +- remove qemu-storage-daemon-qmp-ref man page +- remove qemu-storage-daemon man page +- Added devel documentation +- do not package virtfs-proxy-helper files +- Use --with-git-submodules instead of --(enable|disable)-git-update +- Minor build fixes for sending upstream +- g_autofree initialization fixed upstream +- Updated rc information usage +- do not package package hw-s390x-virtio-gpu-ccw.so +- Disable new switch options + +Rebase notes (6.1.0): +- Fix warning issue in block.c +- Download tarball from dist-git cache +- Removed sheepdog driver +- Added new display modules: + - hw-display-virtio-gpu-gl.so + - hw-display-virtio-gpu-pci-gl.so + - hw-display-virtio-vga-gl.so +- sasl fix moved from ui/vnc.c to ui/vnc-auth-sasl.c +- Added accel-qtest-%{kvm_target} and accel-tcg-%{kvm_target} +- Added about docs +- Use -q option for setup +- Added hw-usb-host.so +- Disable new options (bpf, nvmm, slirp-smbd) + +Rebase notes (6.2.0): +- Using internal meson +- removed --disable-jemalloc and --disable-tcmalloc configure options +- added audio-oss.so +- added fdt requirement for x86_64 +- tests/acceptance renamed to tests/avocado +- added multiboot_dma.bin +- Removed conflict relics +- Updated configure options + Merged patches (3.1.0): - 01f0c9f RHEL8: Add disable configure options to qemu spec file - Spec file cleanups @@ -71,97 +149,203 @@ Merged patches (4.2.0): - 69e1fb2 enable virgla - d4f6115 enable virgl, for real this time ... -Signed-off-by: Danilo C. L. de Paula +Merged patches (5.1.0): +- 5edf6bd Add support for rh-brew-module +- f77d52d redhat: ship virtiofsd vhost-user device backend +- 63f12d4 redhat: Always use module build target for rh-brew (modified) +- 9b1e140 redhat: updating the modular target +- 44b8bd0 spec: Fix python shenigans for tests + +Merged patches (5.2.0 rc0): +- 9238ce7 Add support for simpletrace +- 5797cff Remove explicit glusterfs-api dependency +- fd62478 disable virgl +- 0205018 redhat: link /etc/qemu-ga/fsfreeze-hook to /etc/qemu-kvm/ +- 3645097 redhat: Make all generated so files executable (not only block-*) + +Merged patches (5.2.0 rc2): +- pjw 99657 redhat: introduces disable_everything macro into the configure call +- pjw 99659 redhat: scripts/extract_build_cmd.py - Avoid listing empty lines +- pjw 99658 redhat: Fixing rh-local build +- pjw 99660 redhat: Add qemu-kiwi subpackage +- d2e59ce redhat: add (un/pre)install systemd hooks for qemu-ga + +Merged patches (5.2.0 rc3): +- pjw 99887 - redhat: allow Makefile rh-prep builddep to fail +- pjw 99885 - redhat: adding rh-rpm target + +Merged patches (6.0.0): +- 5ab9954a3b spec: find system python via meson +- cd0f7db11f build-system: use b_staticpic=false +- 80d2dec42c udev-kvm-check: remove the "exceeded subscription limit" message +- 38959d51c0 redhat: Allow make to inherit params from parent make for rh-local +- 1e0cfe458f redhat: moving all documentation files to qemu-kvm-docs +- d7a594d02b redhat: makes qemu respect system's crypto profile +- e2bbf1572b spec: Package qemu-storage-daemon +- 92f10993ba spec: ui-spice sub-package +- 8931e46069 spec: ui-opengl sub-package + +Merged patches (6.1.0): +- 7bb57541b3 redhat: Install the s390-netboot.img that we've built +- b4a8531f41 redhat: Fix "unversioned Obsoletes" warning +- 141a1693c7 redhat: Move qemu-kvm-docs dependency to qemu-kvm +- d75f59c6f9 redhat: introducting qemu-kvm-hw-usbredir +- a934d8bf44 redhat: use the standard vhost-user JSON path + +Merged patches (6.2.0): +- 4f3f04bbb6 spec: Remove qemu-kiwi build --- - .gitignore | 1 + - Makefile | 3 +- - configure | 1 + - os-posix.c | 2 +- - redhat/Makefile | 82 + - redhat/Makefile.common | 51 + - redhat/README.tests | 39 + - redhat/qemu-kvm.spec.template | 2434 +++++++++++++++++++++++++++++ - redhat/scripts/process-patches.sh | 7 +- - tests/Makefile.include | 2 +- - ui/vnc.c | 2 +- - 11 files changed, 2615 insertions(+), 9 deletions(-) + .gitignore | 1 + + .gitlab-ci.yml | 24 - + .gitlab/issue_templates/bug.md | 64 - + .gitlab/issue_templates/feature_request.md | 32 - + README.systemtap | 43 + + meson.build | 4 +- + redhat/Makefile | 90 + + redhat/Makefile.common | 48 + + redhat/README.tests | 39 + + redhat/qemu-kvm.spec.template | 3896 ++++++++++++++++++++ + redhat/scripts/extract_build_cmd.py | 5 +- + redhat/scripts/process-patches.sh | 20 +- + redhat/scripts/tarball_checksum.sh | 2 +- + redhat/udev-kvm-check.c | 19 +- + scripts/qemu-guest-agent/fsfreeze-hook | 2 +- + scripts/systemtap/conf.d/qemu_kvm.conf | 4 + + scripts/systemtap/script.d/qemu_kvm.stp | 1 + + tests/check-block.sh | 2 + + ui/vnc-auth-sasl.c | 2 +- + 19 files changed, 4142 insertions(+), 156 deletions(-) + delete mode 100644 .gitlab-ci.yml + delete mode 100644 .gitlab/issue_templates/bug.md + delete mode 100644 .gitlab/issue_templates/feature_request.md + create mode 100644 README.systemtap create mode 100644 redhat/Makefile create mode 100644 redhat/Makefile.common create mode 100644 redhat/README.tests create mode 100644 redhat/qemu-kvm.spec.template + create mode 100644 scripts/systemtap/conf.d/qemu_kvm.conf + create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp -diff --git a/Makefile b/Makefile -index b437a346d7..086727dbb9 100644 ---- a/Makefile -+++ b/Makefile -@@ -512,6 +512,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM - CAP_CFLAGS += -DCAPSTONE_HAS_ARM64 - CAP_CFLAGS += -DCAPSTONE_HAS_POWERPC - CAP_CFLAGS += -DCAPSTONE_HAS_X86 -+CAP_CFLAGS += -Wp,-D_GLIBCXX_ASSERTIONS +diff --git a/README.systemtap b/README.systemtap +new file mode 100644 +index 0000000000..ad913fc990 +--- /dev/null ++++ b/README.systemtap +@@ -0,0 +1,43 @@ ++QEMU tracing using systemtap-initscript ++--------------------------------------- ++ ++You can capture QEMU trace data all the time using systemtap-initscript. This ++uses SystemTap's flight recorder mode to trace all running guests to a ++fixed-size buffer on the host. Old trace entries are overwritten by new ++entries when the buffer size wraps. ++ ++1. Install the systemtap-initscript package: ++ # yum install systemtap-initscript ++ ++2. Install the systemtap scripts and the conf file: ++ # cp /usr/share/qemu-kvm/systemtap/script.d/qemu_kvm.stp /etc/systemtap/script.d/ ++ # cp /usr/share/qemu-kvm/systemtap/conf.d/qemu_kvm.conf /etc/systemtap/conf.d/ ++ ++The set of trace events to enable is given in qemu_kvm.stp. This SystemTap ++script can be customized to add or remove trace events provided in ++/usr/share/systemtap/tapset/qemu-kvm-simpletrace.stp. ++ ++SystemTap customizations can be made to qemu_kvm.conf to control the flight ++recorder buffer size and whether to store traces in memory only or disk too. ++See stap(1) for option documentation. ++ ++3. Start the systemtap service. ++ # service systemtap start qemu_kvm ++ ++4. Make the service start at boot time. ++ # chkconfig systemtap on ++ ++5. Confirm that the service works. ++ # service systemtap status qemu_kvm ++ qemu_kvm is running... ++ ++When you want to inspect the trace buffer, perform the following steps: ++ ++1. Dump the trace buffer. ++ # staprun -A qemu_kvm >/tmp/trace.log ++ ++2. Start the systemtap service because the preceding step stops the service. ++ # service systemtap start qemu_kvm ++ ++3. Translate the trace record to readable format. ++ # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log +diff --git a/meson.build b/meson.build +index 96de1a6ef9..5f6ba86dbb 100644 +--- a/meson.build ++++ b/meson.build +@@ -2108,7 +2108,9 @@ if capstone_opt == 'internal' + # Include all configuration defines via a header file, which will wind up + # as a dependency on the object file, and thus changes here will result + # in a rebuild. +- '-include', 'capstone-defs.h' ++ '-include', 'capstone-defs.h', ++ ++ '-Wp,-D_GLIBCXX_ASSERTIONS', + ] + + libcapstone = static_library('capstone', +diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook +index 13aafd4845..e9b84ec028 100755 +--- a/scripts/qemu-guest-agent/fsfreeze-hook ++++ b/scripts/qemu-guest-agent/fsfreeze-hook +@@ -8,7 +8,7 @@ + # request, it is issued with "thaw" argument after filesystem is thawed. + + LOGFILE=/var/log/qga-fsfreeze-hook.log +-FSFREEZE_D=$(dirname -- "$0")/fsfreeze-hook.d ++FSFREEZE_D=$(dirname -- "$(realpath $0)")/fsfreeze-hook.d - .PHONY: capstone/all - capstone/all: .git-submodule-status -@@ -826,7 +827,7 @@ install-doc: $(DOCS) install-sphinxdocs - $(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)" - ifdef CONFIG_POSIX - $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" -- $(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1" -+ $(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1/qemu-kvm.1" - $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man7" - $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" - $(INSTALL_DATA) docs/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" -diff --git a/configure b/configure -index 6099be1d84..16564f8ccc 100755 ---- a/configure -+++ b/configure -@@ -2424,6 +2424,7 @@ if test "$seccomp" != "no" ; then - seccomp="no" + # Check whether file $1 is a backup or rpm-generated file and should be ignored + is_ignored_file() { +diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf +new file mode 100644 +index 0000000000..372d8160a4 +--- /dev/null ++++ b/scripts/systemtap/conf.d/qemu_kvm.conf +@@ -0,0 +1,4 @@ ++# Force load uprobes (see BZ#1118352) ++stap -e 'probe process("/usr/libexec/qemu-kvm").function("main") { printf("") }' -c true ++ ++qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes +diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp +new file mode 100644 +index 0000000000..c04abf9449 +--- /dev/null ++++ b/scripts/systemtap/script.d/qemu_kvm.stp +@@ -0,0 +1 @@ ++probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} +diff --git a/tests/check-block.sh b/tests/check-block.sh +index f86cb863de..6d38340d49 100755 +--- a/tests/check-block.sh ++++ b/tests/check-block.sh +@@ -69,6 +69,8 @@ else fi fi + ++exit 0 + - ########################################## - # xen probe + cd tests/qemu-iotests -diff --git a/os-posix.c b/os-posix.c -index 86cffd2c7d..1c9f86768d 100644 ---- a/os-posix.c -+++ b/os-posix.c -@@ -83,7 +83,7 @@ void os_setup_signal_handling(void) - /* Find a likely location for support files using the location of the binary. - For installed binaries this will be "$bindir/../share/qemu". When - running from the build tree this will be "$bindir/../pc-bios". */ --#define SHARE_SUFFIX "/share/qemu" -+#define SHARE_SUFFIX "/share/qemu-kvm" - #define BUILD_SUFFIX "/pc-bios" - char *os_find_datadir(void) - { -diff --git a/tests/Makefile.include b/tests/Makefile.include -index 8566f5f119..b483790cf3 100644 ---- a/tests/Makefile.include -+++ b/tests/Makefile.include -@@ -1194,7 +1194,7 @@ check-acceptance: check-venv $(TESTS_RESULTS_DIR) - check-qapi-schema: check-tests/qapi-schema/frontend check-tests/qapi-schema/doc-good.texi - check-qtest: $(patsubst %,check-qtest-%, $(QTEST_TARGETS)) - check-block: $(patsubst %,check-%, $(check-block-y)) --check: check-block check-qapi-schema check-unit check-softfloat check-qtest check-decodetree -+check: check-qapi-schema check-unit check-softfloat check-qtest check-decodetree - check-clean: - rm -rf $(check-unit-y) tests/*.o $(QEMU_IOTESTS_HELPERS-y) - rm -rf $(sort $(foreach target,$(SYSEMU_TARGET_LIST), $(check-qtest-$(target)-y)) $(check-qtest-generic-y)) -diff --git a/ui/vnc.c b/ui/vnc.c -index 87b8045afe..ecf6276f5b 100644 ---- a/ui/vnc.c -+++ b/ui/vnc.c -@@ -3987,7 +3987,7 @@ void vnc_display_open(const char *id, Error **errp) + # QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests +diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c +index 47fdae5b21..2a950caa2a 100644 +--- a/ui/vnc-auth-sasl.c ++++ b/ui/vnc-auth-sasl.c +@@ -42,7 +42,7 @@ - #ifdef CONFIG_VNC_SASL - if (sasl) { -- int saslErr = sasl_server_init(NULL, "qemu"); -+ int saslErr = sasl_server_init(NULL, "qemu-kvm"); + bool vnc_sasl_server_init(Error **errp) + { +- int saslErr = sasl_server_init(NULL, "qemu"); ++ int saslErr = sasl_server_init(NULL, "qemu-kvm"); - if (saslErr != SASL_OK) { - error_setg(errp, "Failed to initialize SASL auth: %s", + if (saslErr != SASL_OK) { + error_setg(errp, "Failed to initialize SASL auth: %s", -- -2.21.0 +2.27.0 diff --git a/SOURCES/0006-Enable-disable-devices-for-RHEL.patch b/SOURCES/0006-Enable-disable-devices-for-RHEL.patch index b14bb1b..a3fa5d1 100644 --- a/SOURCES/0006-Enable-disable-devices-for-RHEL.patch +++ b/SOURCES/0006-Enable-disable-devices-for-RHEL.patch @@ -1,6 +1,6 @@ -From 67511676246cce57becbd2dcf5abccf08d9ef737 Mon Sep 17 00:00:00 2001 +From 3d5a82d172345d17e300672909835262ff9dc917 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina -Date: Mon, 11 Jan 2016 11:53:33 +0100 +Date: Wed, 2 Sep 2020 09:11:07 +0200 Subject: Enable/disable devices for RHEL This commit adds all changes related to changes in supported devices. @@ -39,6 +39,50 @@ Rebase notes (4.2.0-rc3): - Disabled ccid-card-emulated (patch 92566) - Disabled vfio-pci-igd-lpc-bridge (patch 92565) +Rebase notes (5.1.0): +- added CONFIG_PCI_EXPRESS on ppc64 (due to upstream dependency) +- Added CONFIG_NVDIMM +- updated cortex-15 disabling to upstream code +- Add CONFIG_ACPI_APEI for aarch64 +- removed obsolete hw/bt/Makefile.objs chunk +- removed unnecessary changes in target/i386/cpu.c + +Rebase notes (5.2.0 rc0): +- Added CONFIG_USB_XHCI_PCI on aarch64 ppc64 and x86_64 +- remove vl.c hack for no hpet +- Enable CONFIG_PTIMER for aarch64 +- Do not package hw-display-virtio-gpu.so on s390x + +Rebase notes (5.2.0 rc1): +- Added CONFIG_ARM_GIC for aarch64 (required for build) + +Rebase notes (weekly-210113): +- Removed XICS_KVM, XICS_SPAPR, XIVE_KVM and XIVE_SPAPR config (removed upstream) + +Rebase notes (weekly-210120): +- Add CONFIG_ARM_COMPATIBLE_SEMIHOSTING option + +Rebase notes (weekly-210203): +- Rename CONFIG_PVPANIC to CONFIG_PVPANIC_ISA + +Rebase notes (weekly-210317): +- Add new USB_STORAGE_CORE and USB_STORAGE_CLASSIC config for ppc64 and x86_64 +- Update disabling TCG cpus for AArch64 + +Rebase notes (weekly-210519): +- Do not use CONFIG_SPICE and CONFIG_OPENGL in default configs + +Rebase notes (weekly-210623): +- Add CONFIG_TPM for archs with used TPM functionality + +Rebase notes (weekly-210714): +- default_configs moved to configs + +Rebase notes (6.1.0 rc2): +- Use --with-device-ARCH configure option to use redhat config files + +Rebase notes (6.2.0 rc3): +- Do not remove -no-hpet documentation Merged patches (qemu 3.1.0): - d51e082 Re-enable CONFIG_HYPERV_TESTDEV - 4b889f3 Declare cirrus-vga as deprecated @@ -57,85 +101,71 @@ Merged patches (4.1.0): Merged patches (4.2.0): - f7587dd RHEL: disable hostmem-memfd -Signed-off-by: Danilo C. L. de Paula +Merged patches (5.1.0): +- 4543a3c i386: Remove cpu64-rhel6 CPU model +- 96533 aarch64: Remove tcg cpu types (pjw commit) +- 559d589 Revert "RHEL: disable hostmem-memfd" +- 441128e enable ramfb + +Merged patches (5.2.0 rc0): +- f70eb50 RHEL-only: Enable vTPM for POWER in downstream configs +- 69d8ae7 redhat: fix 5.0 rebase missing ISA TPM TIS +- 8310f89 RHEL-only: Enable vTPM for ARM in downstream configs +- 4a8ccfd Disable TPM passthrough backend on ARM + +Merged patches (6.0.0): +- ff817df9e3 config: enable VFIO_CCW +- 70d3924521 redhat: Add some devices for exporting upstream machine types + - without machine type chunks +- efac91b2b4 default-configs: Enable vhost-user-blk + +Merged patches (weekly-210630): +- 59a178acff disable CONFIG_USB_STORAGE_BOT + +Merged patches (6.1.0 rc2): +- 86f0025f16 aarch64: Add USB storage devices --- - Makefile.objs | 4 +- - backends/Makefile.objs | 3 +- - default-configs/aarch64-rh-devices.mak | 20 +++++ - default-configs/aarch64-softmmu.mak | 10 ++- - default-configs/ppc64-rh-devices.mak | 32 ++++++++ - default-configs/ppc64-softmmu.mak | 8 +- - default-configs/rh-virtio.mak | 10 +++ - default-configs/s390x-rh-devices.mak | 15 ++++ - default-configs/s390x-softmmu.mak | 4 +- - default-configs/x86_64-rh-devices.mak | 100 +++++++++++++++++++++++++ - default-configs/x86_64-softmmu.mak | 4 +- - hw/acpi/ich9.c | 4 +- - hw/arm/Makefile.objs | 2 +- - hw/block/fdc.c | 10 +++ - hw/bt/Makefile.objs | 4 +- - hw/cpu/Makefile.objs | 5 +- - hw/display/Makefile.objs | 5 +- - hw/display/cirrus_vga.c | 3 + - hw/ide/piix.c | 5 +- - hw/input/pckbd.c | 2 + - hw/net/e1000.c | 2 + - hw/pci-host/i440fx.c | 4 + - hw/ppc/spapr_cpu_core.c | 2 + - hw/usb/Makefile.objs | 4 +- - hw/vfio/pci-quirks.c | 9 +++ - hw/vfio/pci.c | 5 ++ - qemu-options.hx | 7 +- - redhat/qemu-kvm.spec.template | 5 +- - target/arm/cpu.c | 4 +- - target/i386/cpu.c | 35 +++++++-- - target/ppc/cpu-models.c | 10 +++ - target/s390x/cpu_models.c | 3 + - target/s390x/kvm.c | 8 ++ - util/memfd.c | 2 +- - vl.c | 8 +- - 35 files changed, 317 insertions(+), 41 deletions(-) - create mode 100644 default-configs/aarch64-rh-devices.mak - create mode 100644 default-configs/ppc64-rh-devices.mak - create mode 100644 default-configs/rh-virtio.mak - create mode 100644 default-configs/s390x-rh-devices.mak - create mode 100644 default-configs/x86_64-rh-devices.mak + .../aarch64-softmmu/aarch64-rh-devices.mak | 31 ++++++ + .../ppc64-softmmu/ppc64-rh-devices.mak | 36 ++++++ + configs/devices/rh-virtio.mak | 10 ++ + .../s390x-softmmu/s390x-rh-devices.mak | 16 +++ + .../x86_64-softmmu/x86_64-rh-devices.mak | 104 ++++++++++++++++++ + .../x86_64-upstream-devices.mak | 4 + + hw/acpi/ich9.c | 4 +- + hw/arm/meson.build | 2 +- + hw/block/fdc.c | 10 ++ + hw/char/parallel.c | 9 ++ + hw/cpu/meson.build | 5 +- + hw/display/cirrus_vga.c | 3 + + hw/ide/piix.c | 5 +- + hw/input/pckbd.c | 2 + + hw/net/e1000.c | 2 + + hw/ppc/spapr_cpu_core.c | 2 + + hw/timer/hpet.c | 8 ++ + hw/usb/meson.build | 2 +- + redhat/qemu-kvm.spec.template | 9 +- + target/arm/cpu_tcg.c | 10 ++ + target/ppc/cpu-models.c | 10 ++ + target/s390x/cpu_models_sysemu.c | 3 + + target/s390x/kvm/kvm.c | 8 ++ + 23 files changed, 286 insertions(+), 9 deletions(-) + create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak + create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak + create mode 100644 configs/devices/rh-virtio.mak + create mode 100644 configs/devices/s390x-softmmu/s390x-rh-devices.mak + create mode 100644 configs/devices/x86_64-softmmu/x86_64-rh-devices.mak + create mode 100644 configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak -diff --git a/Makefile.objs b/Makefile.objs -index 11ba1a36bd..fcf63e1096 100644 ---- a/Makefile.objs -+++ b/Makefile.objs -@@ -65,8 +65,8 @@ common-obj-y += replay/ - - common-obj-y += ui/ - common-obj-m += ui/ --common-obj-y += bt-host.o bt-vhci.o --bt-host.o-cflags := $(BLUEZ_CFLAGS) -+#common-obj-y += bt-host.o bt-vhci.o -+#bt-host.o-cflags := $(BLUEZ_CFLAGS) - - common-obj-y += dma-helpers.o - common-obj-y += vl.o -diff --git a/backends/Makefile.objs b/backends/Makefile.objs -index f0691116e8..f328d404bf 100644 ---- a/backends/Makefile.objs -+++ b/backends/Makefile.objs -@@ -16,4 +16,5 @@ endif - - common-obj-$(call land,$(CONFIG_VHOST_USER),$(CONFIG_VIRTIO)) += vhost-user.o - --common-obj-$(CONFIG_LINUX) += hostmem-memfd.o -+# RHEL: disable memfd -+# common-obj-$(CONFIG_LINUX) += hostmem-memfd.o -diff --git a/default-configs/aarch64-rh-devices.mak b/default-configs/aarch64-rh-devices.mak +diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak new file mode 100644 -index 0000000000..a1ed641174 +index 0000000000..0d4f9e6e4b --- /dev/null -+++ b/default-configs/aarch64-rh-devices.mak -@@ -0,0 +1,20 @@ -+include rh-virtio.mak ++++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +@@ -0,0 +1,31 @@ ++include ../rh-virtio.mak + +CONFIG_ARM_GIC_KVM=y ++CONFIG_ARM_GIC=y +CONFIG_ARM_SMMUV3=y +CONFIG_ARM_V7M=y +CONFIG_ARM_VIRT=y @@ -148,43 +178,36 @@ index 0000000000..a1ed641174 +CONFIG_SEMIHOSTING=y +CONFIG_USB=y +CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_PCI=y ++CONFIG_USB_STORAGE_CORE=y ++CONFIG_USB_STORAGE_CLASSIC=y +CONFIG_VFIO=y +CONFIG_VFIO_PCI=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_PCI=y +CONFIG_XIO3130=y -diff --git a/default-configs/aarch64-softmmu.mak b/default-configs/aarch64-softmmu.mak -index 958b1e08e4..8f6867d48a 100644 ---- a/default-configs/aarch64-softmmu.mak -+++ b/default-configs/aarch64-softmmu.mak -@@ -1,8 +1,10 @@ - # Default configuration for aarch64-softmmu - - # We support all the 32 bit boards so need all their config --include arm-softmmu.mak -+#include arm-softmmu.mak - --CONFIG_XLNX_ZYNQMP_ARM=y --CONFIG_XLNX_VERSAL=y --CONFIG_SBSA_REF=y -+#CONFIG_XLNX_ZYNQMP_ARM=y -+#CONFIG_XLNX_VERSAL=y -+#CONFIG_SBSA_REF=y -+ -+include aarch64-rh-devices.mak -diff --git a/default-configs/ppc64-rh-devices.mak b/default-configs/ppc64-rh-devices.mak ++CONFIG_NVDIMM=y ++CONFIG_ACPI_APEI=y ++CONFIG_TPM=y ++CONFIG_TPM_EMULATOR=y ++CONFIG_TPM_TIS_SYSBUS=y ++CONFIG_PTIMER=y ++CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y +diff --git a/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak new file mode 100644 -index 0000000000..35f2106d06 +index 0000000000..73e3ee0293 --- /dev/null -+++ b/default-configs/ppc64-rh-devices.mak -@@ -0,0 +1,32 @@ -+include rh-virtio.mak ++++ b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak +@@ -0,0 +1,36 @@ ++include ../rh-virtio.mak + +CONFIG_DIMM=y +CONFIG_MEM_DEVICE=y ++CONFIG_NVDIMM=y +CONFIG_PCI=y +CONFIG_PCI_DEVICES=y +CONFIG_PCI_TESTDEV=y ++CONFIG_PCI_EXPRESS=y +CONFIG_PSERIES=y +CONFIG_SCSI=y +CONFIG_SPAPR_VSCSI=y @@ -193,9 +216,11 @@ index 0000000000..35f2106d06 +CONFIG_USB_OHCI=y +CONFIG_USB_OHCI_PCI=y +CONFIG_USB_SMARTCARD=y -+CONFIG_USB_STORAGE_BOT=y ++CONFIG_USB_STORAGE_CORE=y ++CONFIG_USB_STORAGE_CLASSIC=y +CONFIG_USB_XHCI=y +CONFIG_USB_XHCI_NEC=y ++CONFIG_USB_XHCI_PCI=y +CONFIG_VFIO=y +CONFIG_VFIO_PCI=y +CONFIG_VGA=y @@ -205,36 +230,16 @@ index 0000000000..35f2106d06 +CONFIG_VIRTIO_VGA=y +CONFIG_WDT_IB6300ESB=y +CONFIG_XICS=y -+CONFIG_XICS_KVM=y -+CONFIG_XICS_SPAPR=y +CONFIG_XIVE=y -+CONFIG_XIVE_SPAPR=y -+CONFIG_XIVE_KVM=y -diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak -index cca52665d9..fec354f327 100644 ---- a/default-configs/ppc64-softmmu.mak -+++ b/default-configs/ppc64-softmmu.mak -@@ -1,10 +1,12 @@ - # Default configuration for ppc64-softmmu - - # Include all 32-bit boards --include ppc-softmmu.mak -+#include ppc-softmmu.mak - - # For PowerNV --CONFIG_POWERNV=y -+#CONFIG_POWERNV=y - - # For pSeries --CONFIG_PSERIES=y -+#CONFIG_PSERIES=y -+ -+include ppc64-rh-devices.mak -diff --git a/default-configs/rh-virtio.mak b/default-configs/rh-virtio.mak ++CONFIG_TPM=y ++CONFIG_TPM_SPAPR=y ++CONFIG_TPM_EMULATOR=y ++CONFIG_TPM_PASSTHROUGH=y +diff --git a/configs/devices/rh-virtio.mak b/configs/devices/rh-virtio.mak new file mode 100644 index 0000000000..94ede1b5f6 --- /dev/null -+++ b/default-configs/rh-virtio.mak ++++ b/configs/devices/rh-virtio.mak @@ -0,0 +1,10 @@ +CONFIG_VIRTIO=y +CONFIG_VIRTIO_BALLOON=y @@ -246,13 +251,13 @@ index 0000000000..94ede1b5f6 +CONFIG_VIRTIO_RNG=y +CONFIG_VIRTIO_SCSI=y +CONFIG_VIRTIO_SERIAL=y -diff --git a/default-configs/s390x-rh-devices.mak b/default-configs/s390x-rh-devices.mak +diff --git a/configs/devices/s390x-softmmu/s390x-rh-devices.mak b/configs/devices/s390x-softmmu/s390x-rh-devices.mak new file mode 100644 -index 0000000000..c3c73fe752 +index 0000000000..165c082e87 --- /dev/null -+++ b/default-configs/s390x-rh-devices.mak -@@ -0,0 +1,15 @@ -+include rh-virtio.mak ++++ b/configs/devices/s390x-softmmu/s390x-rh-devices.mak +@@ -0,0 +1,16 @@ ++include ../rh-virtio.mak + +CONFIG_PCI=y +CONFIG_S390_CCW_VIRTIO=y @@ -263,29 +268,19 @@ index 0000000000..c3c73fe752 +CONFIG_TERMINAL3270=y +CONFIG_VFIO=y +CONFIG_VFIO_AP=y ++CONFIG_VFIO_CCW=y +CONFIG_VFIO_PCI=y +CONFIG_VHOST_USER=y +CONFIG_VIRTIO_CCW=y +CONFIG_WDT_DIAG288=y -diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak -index f2287a133f..3e2e388e91 100644 ---- a/default-configs/s390x-softmmu.mak -+++ b/default-configs/s390x-softmmu.mak -@@ -10,4 +10,6 @@ - - # Boards: - # --CONFIG_S390_CCW_VIRTIO=y -+#CONFIG_S390_CCW_VIRTIO=y -+ -+include s390x-rh-devices.mak -diff --git a/default-configs/x86_64-rh-devices.mak b/default-configs/x86_64-rh-devices.mak +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak new file mode 100644 -index 0000000000..d59b6d9bb5 +index 0000000000..ddf036f042 --- /dev/null -+++ b/default-configs/x86_64-rh-devices.mak -@@ -0,0 +1,100 @@ -+include rh-virtio.mak ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -0,0 +1,104 @@ ++include ../rh-virtio.mak ++include x86_64-upstream-devices.mak + +CONFIG_AC97=y +CONFIG_ACPI=y @@ -306,6 +301,7 @@ index 0000000000..d59b6d9bb5 +CONFIG_E1000_PCI=y +CONFIG_EDU=y +CONFIG_FDC=y ++CONFIG_FDC_SYSBUS=y +CONFIG_FW_CFG_DMA=y +CONFIG_HDA=y +CONFIG_HYPERV=y @@ -329,7 +325,6 @@ index 0000000000..d59b6d9bb5 +CONFIG_MC146818RTC=y +CONFIG_MEM_DEVICE=y +CONFIG_NVDIMM=y -+CONFIG_OPENGL=y +CONFIG_PAM=y +CONFIG_PC=y +CONFIG_PCI=y @@ -344,7 +339,7 @@ index 0000000000..d59b6d9bb5 +CONFIG_PC_ACPI=y +CONFIG_PC_PCI=y +CONFIG_PFLASH_CFI01=y -+CONFIG_PVPANIC=y ++CONFIG_PVPANIC_ISA=y +CONFIG_PXB=y +CONFIG_Q35=y +CONFIG_QXL=y @@ -357,22 +352,24 @@ index 0000000000..d59b6d9bb5 +CONFIG_SGA=y +CONFIG_SMBIOS=y +CONFIG_SMBUS_EEPROM=y -+CONFIG_SPICE=y +CONFIG_TEST_DEVICES=y +CONFIG_USB=y +CONFIG_USB_EHCI=y +CONFIG_USB_EHCI_PCI=y +CONFIG_USB_SMARTCARD=y -+CONFIG_USB_STORAGE_BOT=y ++CONFIG_USB_STORAGE_CORE=y ++CONFIG_USB_STORAGE_CLASSIC=y +CONFIG_USB_UHCI=y +CONFIG_USB_XHCI=y +CONFIG_USB_XHCI_NEC=y ++CONFIG_USB_XHCI_PCI=y +CONFIG_VFIO=y +CONFIG_VFIO_PCI=y +CONFIG_VGA=y +CONFIG_VGA_CIRRUS=y +CONFIG_VGA_PCI=y +CONFIG_VHOST_USER=y ++CONFIG_VHOST_USER_BLK=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_VGA=y +CONFIG_VMMOUSE=y @@ -381,26 +378,26 @@ index 0000000000..d59b6d9bb5 +CONFIG_WDT_IB6300ESB=y +CONFIG_WDT_IB700=y +CONFIG_XIO3130=y ++CONFIG_TPM=y +CONFIG_TPM_CRB=y -+CONFIG_TPM_TIS=y ++CONFIG_TPM_TIS_ISA=y +CONFIG_TPM_EMULATOR=y +CONFIG_TPM_PASSTHROUGH=y -diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak -index 64b2ee2960..b5de7e5279 100644 ---- a/default-configs/x86_64-softmmu.mak -+++ b/default-configs/x86_64-softmmu.mak -@@ -1,3 +1,5 @@ - # Default configuration for x86_64-softmmu - --include i386-softmmu.mak -+#include i386-softmmu.mak -+ -+include x86_64-rh-devices.mak +diff --git a/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak b/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak +new file mode 100644 +index 0000000000..2cd20f54d2 +--- /dev/null ++++ b/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak +@@ -0,0 +1,4 @@ ++# We need "isa-parallel" ++CONFIG_PARALLEL=y ++# We need "hpet" ++CONFIG_HPET=y diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 2034dd749e..ab203ad448 100644 +index ebe08ed831..381ef2ddcf 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c -@@ -449,8 +449,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) +@@ -438,8 +438,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; pm->acpi_memory_hotplug.is_enabled = true; pm->cpu_hotplug_legacy = true; @@ -409,36 +406,36 @@ index 2034dd749e..ab203ad448 100644 + pm->disable_s3 = 1; + pm->disable_s4 = 1; pm->s4_val = 2; - - object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, -diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs -index fe749f65fd..2aa1a9efdd 100644 ---- a/hw/arm/Makefile.objs -+++ b/hw/arm/Makefile.objs -@@ -27,7 +27,7 @@ obj-$(CONFIG_VEXPRESS) += vexpress.o - obj-$(CONFIG_ZYNQ) += xilinx_zynq.o - obj-$(CONFIG_SABRELITE) += sabrelite.o - --obj-$(CONFIG_ARM_V7M) += armv7m.o -+#obj-$(CONFIG_ARM_V7M) += armv7m.o - obj-$(CONFIG_EXYNOS4) += exynos4210.o - obj-$(CONFIG_PXA2XX) += pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o - obj-$(CONFIG_DIGIC) += digic.o + pm->use_acpi_hotplug_bridge = true; + pm->keep_pci_slot_hpc = true; +diff --git a/hw/arm/meson.build b/hw/arm/meson.build +index 721a8eb8be..87ed4dd914 100644 +--- a/hw/arm/meson.build ++++ b/hw/arm/meson.build +@@ -31,7 +31,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) + arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c')) + arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c')) + +-arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c')) ++#arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c')) + arm_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210.c')) + arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c')) + arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c')) diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index ac5d31e8c1..e925bac002 100644 +index 21d18ac2e3..97fa6de423 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c -@@ -46,6 +46,8 @@ - #include "qemu/module.h" - #include "trace.h" +@@ -48,6 +48,8 @@ + #include "qom/object.h" + #include "fdc-internal.h" +#include "hw/boards.h" + /********************************************************/ /* debug Floppy devices */ -@@ -2638,6 +2640,14 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, - int i, j; +@@ -2337,6 +2339,14 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) + FDrive *drive; static int command_tables_inited = 0; + /* Restricted for Red Hat Enterprise Linux: */ @@ -451,51 +448,53 @@ index ac5d31e8c1..e925bac002 100644 + if (fdctrl->fallback == FLOPPY_DRIVE_TYPE_AUTO) { error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); - } -diff --git a/hw/bt/Makefile.objs b/hw/bt/Makefile.objs -index 867a7d2e8a..e678e9ee3c 100644 ---- a/hw/bt/Makefile.objs -+++ b/hw/bt/Makefile.objs -@@ -1,3 +1,3 @@ --common-obj-y += core.o l2cap.o sdp.o hci.o hid.o --common-obj-y += hci-csr.o -+#common-obj-y += core.o l2cap.o sdp.o hci.o hid.o -+#common-obj-y += hci-csr.o - -diff --git a/hw/cpu/Makefile.objs b/hw/cpu/Makefile.objs -index 8db9e8a7b3..1601ea93c7 100644 ---- a/hw/cpu/Makefile.objs -+++ b/hw/cpu/Makefile.objs -@@ -1,5 +1,6 @@ - obj-$(CONFIG_ARM11MPCORE) += arm11mpcore.o - obj-$(CONFIG_REALVIEW) += realview_mpcore.o - obj-$(CONFIG_A9MPCORE) += a9mpcore.o --obj-$(CONFIG_A15MPCORE) += a15mpcore.o --common-obj-y += core.o cluster.o -+#obj-$(CONFIG_A15MPCORE) += a15mpcore.o -+common-obj-y += core.o -+# cluster.o -diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs -index f2182e3bef..3d0cda1b52 100644 ---- a/hw/display/Makefile.objs -+++ b/hw/display/Makefile.objs -@@ -1,8 +1,9 @@ - common-obj-$(CONFIG_DDC) += i2c-ddc.o - common-obj-$(CONFIG_EDID) += edid-generate.o edid-region.o - --common-obj-$(CONFIG_FW_CFG_DMA) += ramfb.o --common-obj-$(CONFIG_FW_CFG_DMA) += ramfb-standalone.o -+# Disabled for Red Hat Enterprise Linux -+#common-obj-$(CONFIG_FW_CFG_DMA) += ramfb.o -+#common-obj-$(CONFIG_FW_CFG_DMA) += ramfb-standalone.o - - common-obj-$(CONFIG_ADS7846) += ads7846.o - common-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o + return; +diff --git a/hw/char/parallel.c b/hw/char/parallel.c +index b45e67bfbb..e5f108211b 100644 +--- a/hw/char/parallel.c ++++ b/hw/char/parallel.c +@@ -29,6 +29,7 @@ + #include "chardev/char-parallel.h" + #include "chardev/char-fe.h" + #include "hw/acpi/aml-build.h" ++#include "hw/boards.h" + #include "hw/irq.h" + #include "hw/isa/isa.h" + #include "hw/qdev-properties.h" +@@ -534,6 +535,14 @@ static void parallel_isa_realizefn(DeviceState *dev, Error **errp) + int base; + uint8_t dummy; + ++ /* Restricted for Red Hat Enterprise Linux */ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (strstr(mc->name, "rhel")) { ++ error_setg(errp, "Device %s is not supported with machine type %s", ++ object_get_typename(OBJECT(dev)), mc->name); ++ return; ++ } ++ + if (!qemu_chr_fe_backend_connected(&s->chr)) { + error_setg(errp, "Can't create parallel device, empty char device"); + return; +diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build +index 9e52fee9e7..bb71c9f3e7 100644 +--- a/hw/cpu/meson.build ++++ b/hw/cpu/meson.build +@@ -1,6 +1,7 @@ +-softmmu_ss.add(files('core.c', 'cluster.c')) ++#softmmu_ss.add(files('core.c', 'cluster.c')) ++softmmu_ss.add(files('core.c')) + + specific_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) + specific_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) + specific_ss.add(when: 'CONFIG_A9MPCORE', if_true: files('a9mpcore.c')) +-specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) ++#specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index cd283e53b4..93afa26fda 100644 +index fdca6ca659..fa1a7eee51 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c -@@ -2975,6 +2975,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) +@@ -2945,6 +2945,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); int16_t device_id = pc->device_id; @@ -506,10 +505,10 @@ index cd283e53b4..93afa26fda 100644 Also accept 8 MB/16 MB for backward compatibility. */ if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 && diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index db313dd3b1..e14858ca64 100644 +index ce89fd0aa3..fbcf802b13 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c -@@ -251,7 +251,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) +@@ -232,7 +232,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); @@ -519,7 +518,7 @@ index db313dd3b1..e14858ca64 100644 } static const TypeInfo piix3_ide_info = { -@@ -279,6 +280,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) +@@ -261,6 +262,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); dc->hotpluggable = false; @@ -529,12 +528,12 @@ index db313dd3b1..e14858ca64 100644 static const TypeInfo piix4_ide_info = { diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c -index f0acfd86f7..390eb6579c 100644 +index baba62f357..bc360347ea 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c -@@ -571,6 +571,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) - dc->realize = i8042_realizefn; +@@ -796,6 +796,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) dc->vmsd = &vmstate_kbd_isa; + isa->build_aml = i8042_build_aml; set_bit(DEVICE_CATEGORY_INPUT, dc->categories); + /* Disabled for Red Hat Enterprise Linux: */ + dc->user_creatable = false; @@ -542,10 +541,10 @@ index f0acfd86f7..390eb6579c 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index a73f8d404e..fc73fdd6fa 100644 +index f5bc81296d..282d01e374 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c -@@ -1795,6 +1795,7 @@ static const E1000Info e1000_devices[] = { +@@ -1821,6 +1821,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -553,7 +552,7 @@ index a73f8d404e..fc73fdd6fa 100644 { .name = "e1000-82544gc", .device_id = E1000_DEV_ID_82544GC_COPPER, -@@ -1807,6 +1808,7 @@ static const E1000Info e1000_devices[] = { +@@ -1833,6 +1834,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -561,41 +560,11 @@ index a73f8d404e..fc73fdd6fa 100644 }; static void e1000_register_types(void) -diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c -index f27131102d..17f10efae2 100644 ---- a/hw/pci-host/i440fx.c -+++ b/hw/pci-host/i440fx.c -@@ -386,6 +386,7 @@ static const TypeInfo i440fx_info = { - }, - }; - -+#if 0 /* Disabled in Red Hat Enterprise Linux */ - /* IGD Passthrough Host Bridge. */ - typedef struct { - uint8_t offset; -@@ -469,6 +470,7 @@ static const TypeInfo igd_passthrough_i440fx_info = { - .instance_size = sizeof(PCII440FXState), - .class_init = igd_passthrough_i440fx_class_init, - }; -+#endif - - static const char *i440fx_pcihost_root_bus_path(PCIHostState *host_bridge, - PCIBus *rootbus) -@@ -514,7 +516,9 @@ static const TypeInfo i440fx_pcihost_info = { - static void i440fx_register_types(void) - { - type_register_static(&i440fx_info); -+#if 0 /* Disabled in Red Hat Enterprise Linux */ - type_register_static(&igd_passthrough_i440fx_info); -+#endif - type_register_static(&i440fx_pcihost_info); - } - diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 8339c4c0f8..301cd7b4e4 100644 +index 58e7341cb7..8ba34f6a1d 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c -@@ -403,10 +403,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { +@@ -370,10 +370,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { .instance_size = sizeof(SpaprCpuCore), .class_size = sizeof(SpaprCpuCoreClass), }, @@ -608,216 +577,118 @@ index 8339c4c0f8..301cd7b4e4 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"), DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), -diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs -index 303ac084a0..700a91886e 100644 ---- a/hw/usb/Makefile.objs -+++ b/hw/usb/Makefile.objs -@@ -30,7 +30,9 @@ common-obj-$(CONFIG_USB_BLUETOOTH) += dev-bluetooth.o - ifeq ($(CONFIG_USB_SMARTCARD),y) - common-obj-y += dev-smartcard-reader.o - common-obj-$(CONFIG_SMARTCARD) += smartcard.mo --smartcard.mo-objs := ccid-card-passthru.o ccid-card-emulated.o -+# Disabled for Red Hat Enterprise Linux: -+# smartcard.mo-objs := ccid-card-passthru.o ccid-card-emulated.o -+smartcard.mo-objs := ccid-card-passthru.o - smartcard.mo-cflags := $(SMARTCARD_CFLAGS) - smartcard.mo-libs := $(SMARTCARD_LIBS) +diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c +index 9520471be2..202e032524 100644 +--- a/hw/timer/hpet.c ++++ b/hw/timer/hpet.c +@@ -733,6 +733,14 @@ static void hpet_realize(DeviceState *dev, Error **errp) + int i; + HPETTimer *timer; + ++ /* Restricted for Red Hat Enterprise Linux */ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (strstr(mc->name, "rhel")) { ++ error_setg(errp, "Device %s is not supported with machine type %s", ++ object_get_typename(OBJECT(dev)), mc->name); ++ return; ++ } ++ + if (!s->intcap) { + warn_report("Hpet's intcap not initialized"); + } +diff --git a/hw/usb/meson.build b/hw/usb/meson.build +index de853d780d..0776ae6a20 100644 +--- a/hw/usb/meson.build ++++ b/hw/usb/meson.build +@@ -52,7 +52,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade + if cacard.found() + usbsmartcard_ss = ss.source_set() + usbsmartcard_ss.add(when: 'CONFIG_USB_SMARTCARD', +- if_true: [cacard, files('ccid-card-emulated.c', 'ccid-card-passthru.c')]) ++ if_true: [cacard, files('ccid-card-passthru.c')]) + hw_usb_modules += {'smartcard': usbsmartcard_ss} endif -diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c -index 136f3a9ad6..4505ffe48a 100644 ---- a/hw/vfio/pci-quirks.c -+++ b/hw/vfio/pci-quirks.c -@@ -1166,6 +1166,7 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) - trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name); - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - /* - * Intel IGD support - * -@@ -1239,6 +1240,7 @@ static int igd_gen(VFIOPCIDevice *vdev) - - return 8; /* Assume newer is compatible */ - } -+#endif - typedef struct VFIOIGDQuirk { - struct VFIOPCIDevice *vdev; -@@ -1311,6 +1313,7 @@ typedef struct { - uint8_t len; - } IGDHostInfo; +diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c +index 13d0e9b195..3826fa5122 100644 +--- a/target/arm/cpu_tcg.c ++++ b/target/arm/cpu_tcg.c +@@ -22,6 +22,7 @@ + /* CPU models. These are not needed for the AArch64 linux-user build. */ + #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static const IGDHostInfo igd_host_bridge_infos[] = { - {PCI_REVISION_ID, 2}, - {PCI_SUBSYSTEM_VENDOR_ID, 2}, -@@ -1559,9 +1562,11 @@ static const MemoryRegionOps vfio_igd_index_quirk = { - .write = vfio_igd_quirk_index_write, - .endianness = DEVICE_LITTLE_ENDIAN, - }; -+#endif - - static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) + #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) + static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - struct vfio_region_info *rom = NULL, *opregion = NULL, - *host = NULL, *lpc = NULL; - VFIOQuirk *quirk; -@@ -1572,6 +1577,7 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) - uint32_t gmch; - uint16_t cmd_orig, cmd; - Error *err = NULL; -+#endif - - /* - * This must be an Intel VGA device at address 00:02.0 for us to even -@@ -1585,6 +1591,8 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) - return; - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ -+ - /* - * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we - * can stuff host values into, so if there's already one there and it's not -@@ -1809,6 +1817,7 @@ out: - g_free(opregion); - g_free(host); - g_free(lpc); -+#endif +@@ -375,6 +376,7 @@ static void cortex_a9_initfn(Object *obj) + cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ + define_arm_cp_regs(cpu, cortexa9_cp_reginfo); } ++#endif /* disabled for RHEL */ - /* -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 2d40b396f2..c8534d3035 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3220,6 +3220,7 @@ static const TypeInfo vfio_pci_dev_info = { - }, + #ifndef CONFIG_USER_ONLY + static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) +@@ -400,6 +402,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { + REGINFO_SENTINEL }; +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static Property vfio_pci_dev_nohotplug_properties[] = { - DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false), - DEFINE_PROP_END_OF_LIST(), -@@ -3239,11 +3240,15 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = { - .instance_size = sizeof(VFIOPCIDevice), - .class_init = vfio_pci_nohotplug_dev_class_init, - }; -+#endif - - static void register_vfio_pci_dev_type(void) + static void cortex_a7_initfn(Object *obj) { - type_register_static(&vfio_pci_dev_info); -+ -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - type_register_static(&vfio_pci_nohotplug_dev_info); -+#endif + ARMCPU *cpu = ARM_CPU(obj); +@@ -445,6 +448,7 @@ static void cortex_a7_initfn(Object *obj) + cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */ + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ } ++#endif /* disabled for RHEL */ - type_init(register_vfio_pci_dev_type) -diff --git a/qemu-options.hx b/qemu-options.hx -index 65c9473b73..fc17aca631 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -2111,11 +2111,6 @@ ETEXI - - DEF("no-hpet", 0, QEMU_OPTION_no_hpet, - "-no-hpet disable HPET\n", QEMU_ARCH_I386) --STEXI --@item -no-hpet --@findex -no-hpet --Disable HPET support. --ETEXI - - DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable, - "-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...]\n" -@@ -3125,6 +3120,7 @@ STEXI - ETEXI - DEFHEADING() + static void cortex_a15_initfn(Object *obj) + { +@@ -488,6 +492,7 @@ static void cortex_a15_initfn(Object *obj) + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); + } -+#if 0 - DEFHEADING(Bluetooth(R) options:) - STEXI - @table @option -@@ -3203,6 +3199,7 @@ STEXI - @end table - ETEXI - DEFHEADING() -+#endif ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_m0_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -928,6 +933,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) - #ifdef CONFIG_TPM - DEFHEADING(TPM device options:) -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 7a4ac9339b..3788fc3c4a 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -2744,7 +2744,9 @@ static void arm_cpu_register_types(void) - type_register_static(&idau_interface_type_info); + cc->gdb_core_xml_file = "arm-m-profile.xml"; + } ++#endif /* disabled for RHEL */ - while (info->name) { -- cpu_register(info); -+ /* RHEL specific: Filter out unsupported cpu models */ -+ if (!strcmp(info->name, "cortex-a15")) -+ cpu_register(info); - info++; - } + #ifndef TARGET_AARCH64 + /* +@@ -1007,6 +1013,7 @@ static void arm_max_initfn(Object *obj) + #endif /* !TARGET_AARCH64 */ -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 69f518a21a..1b7880ae3a 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1835,14 +1835,14 @@ static X86CPUDefinition builtin_x86_defs[] = { - .family = 6, - .model = 6, - .stepping = 3, -- .features[FEAT_1_EDX] = -- PPRO_FEATURES | -- CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA | -- CPUID_PSE36, -- .features[FEAT_1_ECX] = -- CPUID_EXT_SSE3 | CPUID_EXT_CX16, -- .features[FEAT_8000_0001_EDX] = -- CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX, -+ .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | -+ CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | -+ CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | -+ CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | -+ CPUID_PSE | CPUID_DE | CPUID_FP87, -+ .features[FEAT_1_ECX] = CPUID_EXT_CX16 | CPUID_EXT_SSE3, -+ .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_NX | -+ CPUID_EXT2_SYSCALL, - .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM, - .xlevel = 0x8000000A, -@@ -2128,6 +2128,25 @@ static X86CPUDefinition builtin_x86_defs[] = { - .xlevel = 0x80000008, - .model_id = "Intel(R) Atom(TM) CPU N270 @ 1.60GHz", - }, -+ { -+ .name = "cpu64-rhel6", -+ .level = 4, -+ .vendor = CPUID_VENDOR_AMD, -+ .family = 6, -+ .model = 13, -+ .stepping = 3, -+ .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | -+ CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | -+ CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | -+ CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | -+ CPUID_PSE | CPUID_DE | CPUID_FP87, -+ .features[FEAT_1_ECX] = CPUID_EXT_CX16 | CPUID_EXT_SSE3, -+ .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, -+ .features[FEAT_8000_0001_ECX] = CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | -+ CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, -+ .xlevel = 0x8000000A, -+ .model_id = "QEMU Virtual CPU version (cpu64-rhel6)", -+ }, - { - .name = "Conroe", - .level = 10, + static const ARMCPUInfo arm_tcg_cpus[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "arm926", .initfn = arm926_initfn }, + { .name = "arm946", .initfn = arm946_initfn }, + { .name = "arm1026", .initfn = arm1026_initfn }, +@@ -1022,7 +1029,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "cortex-a7", .initfn = cortex_a7_initfn }, + { .name = "cortex-a8", .initfn = cortex_a8_initfn }, + { .name = "cortex-a9", .initfn = cortex_a9_initfn }, ++#endif /* disabled for RHEL */ + { .name = "cortex-a15", .initfn = cortex_a15_initfn }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-m0", .initfn = cortex_m0_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-m3", .initfn = cortex_m3_initfn, +@@ -1053,6 +1062,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, + { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, + { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, ++#endif /* disabled for RHEL */ + #ifndef TARGET_AARCH64 + { .name = "max", .initfn = arm_max_initfn }, + #endif diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 086548e9b9..1bbf378c18 100644 +index 4baa111713..d779c4d1d5 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -66,6 +66,7 @@ @@ -847,7 +718,7 @@ index 086548e9b9..1bbf378c18 100644 POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, "POWER7 v2.3") POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, -@@ -780,6 +784,7 @@ +@@ -784,6 +788,7 @@ /* PowerPC CPU aliases */ PowerPCCPUAlias ppc_cpu_aliases[] = { @@ -855,7 +726,7 @@ index 086548e9b9..1bbf378c18 100644 { "403", "403gc" }, { "405", "405d4" }, { "405cr", "405crc" }, -@@ -938,12 +943,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -942,12 +947,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "7447a", "7447a_v1.2" }, { "7457a", "7457a_v1.2" }, { "apollo7pm", "7457a_v1.0" }, @@ -871,26 +742,26 @@ index 086548e9b9..1bbf378c18 100644 { "power7", "power7_v2.3" }, { "power7+", "power7+_v2.1" }, { "power8e", "power8e_v2.1" }, -@@ -952,6 +960,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { - { "power9", "power9_v2.0" }, +@@ -957,6 +965,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "power10", "power10_v2.0" }, #endif +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* Generic PowerPCs */ #if defined(TARGET_PPC64) { "ppc64", "970fx_v3.1" }, -@@ -959,5 +968,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -964,5 +973,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "ppc32", "604" }, { "ppc", "604" }, { "default", "604" }, +#endif { NULL, NULL } }; -diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index 7e92fb2e15..be718220d7 100644 ---- a/target/s390x/cpu_models.c -+++ b/target/s390x/cpu_models.c -@@ -404,6 +404,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, +diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c +index 05c3ccaaff..6a04ccab1b 100644 +--- a/target/s390x/cpu_models_sysemu.c ++++ b/target/s390x/cpu_models_sysemu.c +@@ -36,6 +36,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, (max_model->def->gen == model->def->gen && max_model->def->ec_ga < model->def->ec_ga)) { list_add_feat("type", unavailable); @@ -900,11 +771,11 @@ index 7e92fb2e15..be718220d7 100644 } /* detect missing features if any to properly report them */ -diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 0c9d14b4b1..a02d569537 100644 ---- a/target/s390x/kvm.c -+++ b/target/s390x/kvm.c -@@ -2387,6 +2387,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c +index 5b1fdb55c4..c52434985b 100644 +--- a/target/s390x/kvm/kvm.c ++++ b/target/s390x/kvm/kvm.c +@@ -2508,6 +2508,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) error_setg(errp, "KVM doesn't support CPU models"); return; } @@ -919,76 +790,6 @@ index 0c9d14b4b1..a02d569537 100644 prop.cpuid = s390_cpuid_from_cpu_model(model); prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ -diff --git a/util/memfd.c b/util/memfd.c -index 4a3c07e0be..3303ec9da4 100644 ---- a/util/memfd.c -+++ b/util/memfd.c -@@ -193,7 +193,7 @@ bool qemu_memfd_alloc_check(void) - */ - bool qemu_memfd_check(unsigned int flags) - { --#ifdef CONFIG_LINUX -+#if 0 /* RHEL: memfd support disabled */ - int mfd = memfd_create("test", flags | MFD_CLOEXEC); - - if (mfd >= 0) { -diff --git a/vl.c b/vl.c -index 6a65a64bfd..668a34577e 100644 ---- a/vl.c -+++ b/vl.c -@@ -166,7 +166,7 @@ Chardev *parallel_hds[MAX_PARALLEL_PORTS]; - int win2k_install_hack = 0; - int singlestep = 0; - int acpi_enabled = 1; --int no_hpet = 0; -+int no_hpet = 1; /* Always disabled for Red Hat Enterprise Linux */ - int fd_bootchk = 1; - static int no_reboot; - int no_shutdown = 0; -@@ -914,6 +914,7 @@ static void configure_rtc(QemuOpts *opts) - } - } - -+#if 0 // Disabled for Red Hat Enterprise Linux - /***********************************************************/ - /* Bluetooth support */ - static int nb_hcis; -@@ -1035,6 +1036,7 @@ static int bt_parse(const char *opt) - error_report("bad bluetooth parameter '%s'", opt); - return 1; - } -+#endif - - static int parse_name(void *opaque, QemuOpts *opts, Error **errp) - { -@@ -3128,6 +3130,7 @@ int main(int argc, char **argv, char **envp) - } - break; - #endif -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - case QEMU_OPTION_bt: - warn_report("The bluetooth subsystem is deprecated and will " - "be removed soon. If the bluetooth subsystem is " -@@ -3135,6 +3138,7 @@ int main(int argc, char **argv, char **envp) - "qemu-devel@nongnu.org with your usecase."); - add_device_config(DEV_BT, optarg); - break; -+#endif - case QEMU_OPTION_audio_help: - audio_legacy_help(); - exit (0); -@@ -4282,9 +4286,11 @@ int main(int argc, char **argv, char **envp) - - tpm_init(); - -+#if 0 // Disabled for Red Hat Enterprise Linux - /* init the bluetooth world */ - if (foreach_device_config(DEV_BT, bt_parse)) - exit(1); -+#endif - - if (!xen_enabled()) { - /* On 32-bit hosts, QEMU is limited by virtual address space */ -- -2.21.0 +2.27.0 diff --git a/SOURCES/0007-Machine-type-related-general-changes.patch b/SOURCES/0007-Machine-type-related-general-changes.patch index 4ae3966..f7bd665 100644 --- a/SOURCES/0007-Machine-type-related-general-changes.patch +++ b/SOURCES/0007-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From 113078b23a4747b07eb363719d7cbc0af403dd2a Mon Sep 17 00:00:00 2001 +From adca046d9db670637b9bf2b24f7a4349a9fe2628 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -20,6 +20,12 @@ Rebase changes (4.1.0): - Use one format for compat scructures - Added compat for virtio-balloon-pci.any_layout for rhel71 +Rebase changes (weekly-210303): +- Added rhel 8.4.0 compat based on 5.2 compat + +Rebase changes (weekly-211103): +- Do not duplicate minimal_version_id for piix4_pm + Merged patches (4.0.0): - d4c0957 compat: Generic HW_COMPAT_RHEL7_6 - cbac773 virtio: Make disable-legacy/disable-modern compat properties optional @@ -33,34 +39,61 @@ Merged patches (4.2.0): - ca4a5e8 virtio: Make disable-legacy/disable-modern compat properties optional - compat: Generic hw_compat_rhel_8_1 (patch 93040/92956) -Signed-off-by: Danilo C. L. de Paula +Merged patches (5.1.0): +- e6c3fbf hw/smbios: set new default SMBIOS fields for Windows driver support (partialy) +- 8f9f4d8 compat: disable 'edid' for virtio-gpu-ccw + +Merged patches (5.2.0 rc0): +- 8348642 redhat: define hw_compat_8_2 +- 45b8402 redhat: define hw_compat_8_2 +- 4effa71 redhat: Update hw_compat_8_2 +- 0e84dff virtio: skip legacy support check on machine types less than 5.1 (partialy) + +Merged patches (6.0.0): +- fa0063ba67 redhat: Define hw_compat_8_3 +- d98e328c8d usb/hcd-xhci-pci: Fixup capabilities ordering (again) +- b8a2578117 virtio: move 'use-disabled-flag' property to hw_compat_4_2 +- f7940b04c8 virtio-pci: compat page aligned ATS + +Merged patches (weekly-210602): +- 26f25108c1 redhat: add missing entries in hw_compat_rhel_8_4 + +Merged patches (weekly-211006): +- 43c4b9bea6 redhat: Define hw_compat_rhel_8_5 --- - hw/acpi/ich9.c | 16 ++++ - hw/acpi/piix4.c | 5 +- - hw/char/serial.c | 16 ++++ - hw/core/machine.c | 170 ++++++++++++++++++++++++++++++++++++++++ - hw/display/vga-isa.c | 2 +- - hw/net/e1000e.c | 21 +++++ - hw/net/rtl8139.c | 4 +- - hw/rtc/mc146818rtc.c | 6 ++ - hw/smbios/smbios.c | 1 + - hw/timer/i8254_common.c | 2 +- - hw/usb/hcd-uhci.c | 4 +- - hw/usb/hcd-xhci.c | 20 +++++ - hw/usb/hcd-xhci.h | 2 + - include/hw/acpi/ich9.h | 3 + - include/hw/boards.h | 24 ++++++ - include/hw/usb.h | 4 + - migration/migration.c | 2 + - migration/migration.h | 5 ++ - 18 files changed, 301 insertions(+), 6 deletions(-) + hw/acpi/ich9.c | 15 ++ + hw/acpi/piix4.c | 6 +- + hw/arm/virt.c | 2 +- + hw/char/serial.c | 16 +++ + hw/core/machine.c | 272 +++++++++++++++++++++++++++++++++++ + hw/display/vga-isa.c | 2 +- + hw/i386/pc_piix.c | 2 + + hw/i386/pc_q35.c | 2 + + hw/net/e1000e.c | 22 +++ + hw/net/rtl8139.c | 4 +- + hw/rtc/mc146818rtc.c | 6 + + hw/smbios/smbios.c | 46 +++++- + hw/timer/i8254_common.c | 2 +- + hw/usb/hcd-uhci.c | 4 +- + hw/usb/hcd-xhci-pci.c | 59 ++++++-- + hw/usb/hcd-xhci-pci.h | 1 + + hw/usb/hcd-xhci.c | 20 +++ + hw/usb/hcd-xhci.h | 2 + + include/hw/acpi/ich9.h | 3 + + include/hw/boards.h | 36 +++++ + include/hw/firmware/smbios.h | 5 +- + include/hw/i386/pc.h | 3 + + include/hw/usb.h | 3 + + migration/migration.c | 2 + + migration/migration.h | 5 + + 25 files changed, 514 insertions(+), 26 deletions(-) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index ab203ad448..7ec26884e8 100644 +index 381ef2ddcf..82bd805b55 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c -@@ -444,6 +444,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) - s->pm.enable_tco = value; +@@ -433,6 +433,18 @@ static void ich9_pm_set_keep_pci_slot_hpc(Object *obj, bool value, Error **errp) + s->pm.keep_pci_slot_hpc = value; } +static bool ich9_pm_get_force_rev1_fadt(Object *obj, Error **errp) @@ -75,33 +108,33 @@ index ab203ad448..7ec26884e8 100644 + s->pm.force_rev1_fadt = value; +} + - void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) + void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) { static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; -@@ -468,6 +480,10 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) +@@ -457,6 +469,9 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) + object_property_add_bool(obj, "cpu-hotplug-legacy", ich9_pm_get_cpu_hotplug_legacy, - ich9_pm_set_cpu_hotplug_legacy, - NULL); + ich9_pm_set_cpu_hotplug_legacy); + object_property_add_bool(obj, "__com.redhat_force-rev1-fadt", + ich9_pm_get_force_rev1_fadt, -+ ich9_pm_set_force_rev1_fadt, -+ NULL); - object_property_add(obj, ACPI_PM_PROP_S3_DISABLED, "uint8", - ich9_pm_get_disable_s3, - ich9_pm_set_disable_s3, ++ ich9_pm_set_force_rev1_fadt); + object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S3_DISABLED, + &pm->disable_s3, OBJ_PROP_FLAG_READWRITE); + object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S4_DISABLED, diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 93aec2dd2c..3a26193cbe 100644 +index f0b5fac44a..8d6011c0a3 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c -@@ -274,6 +274,7 @@ static const VMStateDescription vmstate_acpi = { +@@ -278,7 +278,7 @@ static bool piix4_vmstate_need_smbus(void *opaque, int version_id) + static const VMStateDescription vmstate_acpi = { .name = "piix4_pm", .version_id = 3, - .minimum_version_id = 3, +- .minimum_version_id = 3, + .minimum_version_id = 2, .post_load = vmstate_acpi_post_load, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), -@@ -627,8 +628,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) +@@ -644,8 +644,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) static Property piix4_pm_properties[] = { DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), @@ -110,21 +143,34 @@ index 93aec2dd2c..3a26193cbe 100644 + DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 1), + DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1), DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), - DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, - use_acpi_pci_hotplug, true), + DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState, + use_acpi_hotplug_bridge, true), +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 30da05dfe0..5de4d9d73b 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1590,7 +1590,7 @@ static void virt_build_smbios(VirtMachineState *vms) + + smbios_set_defaults("QEMU", product, + vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, +- true, SMBIOS_ENTRY_POINT_30); ++ true, NULL, NULL, SMBIOS_ENTRY_POINT_30); + + smbios_get_tables(MACHINE(vms), NULL, 0, + &smbios_tables, &smbios_tables_len, diff --git a/hw/char/serial.c b/hw/char/serial.c -index b4aa250950..0012f0e44d 100644 +index 7061aacbce..fe8d0afbb0 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c -@@ -34,6 +34,7 @@ - #include "sysemu/runstate.h" - #include "qemu/error-report.h" +@@ -37,6 +37,7 @@ #include "trace.h" + #include "hw/qdev-properties.h" + #include "hw/qdev-properties-system.h" +#include "migration/migration.h" - //#define DEBUG_SERIAL + #define UART_LCR_DLAB 0x80 /* Divisor latch access bit */ -@@ -703,6 +704,9 @@ static int serial_post_load(void *opaque, int version_id) +@@ -689,6 +690,9 @@ static int serial_post_load(void *opaque, int version_id) static bool serial_thr_ipending_needed(void *opaque) { SerialState *s = opaque; @@ -134,7 +180,7 @@ index b4aa250950..0012f0e44d 100644 if (s->ier & UART_IER_THRI) { bool expected_value = ((s->iir & UART_IIR_ID) == UART_IIR_THRI); -@@ -784,6 +788,10 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { +@@ -770,6 +774,10 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { static bool serial_fifo_timeout_timer_needed(void *opaque) { SerialState *s = (SerialState *)opaque; @@ -145,7 +191,7 @@ index b4aa250950..0012f0e44d 100644 return timer_pending(s->fifo_timeout_timer); } -@@ -801,6 +809,10 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { +@@ -787,6 +795,10 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { static bool serial_timeout_ipending_needed(void *opaque) { SerialState *s = (SerialState *)opaque; @@ -156,7 +202,7 @@ index b4aa250950..0012f0e44d 100644 return s->timeout_ipending != 0; } -@@ -818,6 +830,10 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { +@@ -804,6 +816,10 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { static bool serial_poll_needed(void *opaque) { SerialState *s = (SerialState *)opaque; @@ -168,14 +214,116 @@ index b4aa250950..0012f0e44d 100644 } diff --git a/hw/core/machine.c b/hw/core/machine.c -index 1689ad3bf8..e0e0eec8bf 100644 +index 53a99abc56..be4f9864cd 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -27,6 +27,176 @@ - #include "hw/pci/pci.h" - #include "hw/mem/nvdimm.h" +@@ -37,6 +37,278 @@ + #include "hw/virtio/virtio.h" + #include "hw/virtio/virtio-pci.h" +/* ++ * Mostly the same as hw_compat_6_0 ++ */ ++GlobalProperty hw_compat_rhel_8_5[] = { ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "gpex-pcihost", "allow-unmapped-accesses", "false" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "i8042", "extended-state", "false"}, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "nvme-ns", "eui64-default", "off"}, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "e1000", "init-vet", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "e1000e", "init-vet", "off" }, ++}; ++const size_t hw_compat_rhel_8_5_len = G_N_ELEMENTS(hw_compat_rhel_8_5); ++ ++/* ++ * Mostly the same as hw_compat_5_2 ++ */ ++GlobalProperty hw_compat_rhel_8_4[] = { ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "ICH9-LPC", "smm-compat", "on"}, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "PIIX4_PM", "smm-compat", "on"}, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "virtio-blk-device", "report-discard-granularity", "off" }, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "virtio-net-pci", "vectors", "3"}, ++}; ++const size_t hw_compat_rhel_8_4_len = G_N_ELEMENTS(hw_compat_rhel_8_4); ++ ++/* ++ * Mostly the same as hw_compat_5_1 ++ */ ++GlobalProperty hw_compat_rhel_8_3[] = { ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-scsi", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-user-blk", "num-queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-user-scsi", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-blk-device", "num-queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-scsi-device", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "nvme", "use-intel-id", "on"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "pvpanic", "events", "1"}, /* PVPANIC_PANICKED */ ++ /* hw_compat_rhel_8_3 bz 1912846 */ ++ { "pci-xhci", "x-rh-late-msi-cap", "off" }, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-pci", "x-ats-page-aligned", "off"}, ++}; ++const size_t hw_compat_rhel_8_3_len = G_N_ELEMENTS(hw_compat_rhel_8_3); ++ ++/* ++ * The same as hw_compat_4_2 + hw_compat_5_0 ++ */ ++GlobalProperty hw_compat_rhel_8_2[] = { ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "queue-size", "128"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-scsi-device", "virtqueue_size", "128"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "x-enable-wce-if-config-wce", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "seg-max-adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-scsi-device", "seg_max_adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "vhost-blk-device", "seg_max_adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "usb-host", "suppress-remote-wake", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "usb-redir", "suppress-remote-wake", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "qxl", "revision", "4" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "qxl-vga", "revision", "4" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "fw_cfg", "acpi-mr-restore", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-device", "use-disabled-flag", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "pci-host-bridge", "x-config-reg-migration-enabled", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "virtio-balloon-device", "page-poison", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-read-set-eax", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-signal-unsupported-cmd", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-report-vmx-type", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-cmds-v2", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "virtio-device", "x-disable-legacy-check", "true" }, ++}; ++const size_t hw_compat_rhel_8_2_len = G_N_ELEMENTS(hw_compat_rhel_8_2); ++ ++/* + * The same as hw_compat_4_1 + */ +GlobalProperty hw_compat_rhel_8_1[] = { @@ -220,7 +368,7 @@ index 1689ad3bf8..e0e0eec8bf 100644 + /* hw_compat_rhel_8_0 from hw_compat_4_0 */ + { "virtio-vga", "edid", "false" }, + /* hw_compat_rhel_8_0 from hw_compat_4_0 */ -+ { "virtio-gpu-pci", "edid", "false" }, ++ { "virtio-gpu-device", "edid", "false" }, + /* hw_compat_rhel_8_0 from hw_compat_4_0 */ + { "virtio-device", "use-started", "false" }, + /* hw_compat_rhel_8_0 from hw_compat_3_1 - that was added in 4.1 */ @@ -345,14 +493,14 @@ index 1689ad3bf8..e0e0eec8bf 100644 +}; +const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1); + - GlobalProperty hw_compat_4_1[] = { - { "virtio-pci", "x-pcie-flr-init", "off" }, - }; + GlobalProperty hw_compat_6_1[] = { + { "vhost-user-vsock-device", "seqpacket", "off" }, + { "nvme-ns", "shared", "off" }, diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c -index 873e5e9706..d1a2efe47e 100644 +index 90851e730b..a91c5d7467 100644 --- a/hw/display/vga-isa.c +++ b/hw/display/vga-isa.c -@@ -82,7 +82,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) +@@ -85,7 +85,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) } static Property vga_isa_properties[] = { @@ -361,23 +509,50 @@ index 873e5e9706..d1a2efe47e 100644 DEFINE_PROP_END_OF_LIST(), }; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 223dd3e05d..dda3f64f19 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -177,6 +177,8 @@ static void pc_init1(MachineState *machine, + smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", + mc->name, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, + SMBIOS_ENTRY_POINT_21); + } + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index e1e100316d..235054a643 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -200,6 +200,8 @@ static void pc_q35_init(MachineState *machine) + smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", + mc->name, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, + SMBIOS_ENTRY_POINT_21); + } + diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c -index b69fd7d8ad..d8be50a1ce 100644 +index ac96f7665a..d35bc1f0b0 100644 --- a/hw/net/e1000e.c +++ b/hw/net/e1000e.c -@@ -79,6 +79,11 @@ typedef struct E1000EState { +@@ -81,6 +81,12 @@ struct E1000EState { E1000ECore core; - + bool init_vet; ++ + /* 7.3 had the intr_state field that was in the original e1000e code + * but that was removed prior to 2.7's release + */ + bool redhat_7_3_intr_state_enable; + uint32_t redhat_7_3_intr_state; - } E1000EState; + }; #define E1000E_MMIO_IDX 0 -@@ -94,6 +99,10 @@ typedef struct E1000EState { +@@ -96,6 +102,10 @@ struct E1000EState { #define E1000E_MSIX_TABLE (0x0000) #define E1000E_MSIX_PBA (0x2000) @@ -388,7 +563,7 @@ index b69fd7d8ad..d8be50a1ce 100644 static uint64_t e1000e_mmio_read(void *opaque, hwaddr addr, unsigned size) { -@@ -305,6 +314,8 @@ e1000e_init_msix(E1000EState *s) +@@ -307,6 +317,8 @@ e1000e_init_msix(E1000EState *s) } else { if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) { msix_uninit(d, &s->msix, &s->msix); @@ -397,7 +572,7 @@ index b69fd7d8ad..d8be50a1ce 100644 } } } -@@ -476,6 +487,8 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) +@@ -478,6 +490,8 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) ret = msi_init(PCI_DEVICE(s), 0xD0, 1, true, false, NULL); if (ret) { trace_e1000e_msi_init_fail(ret); @@ -406,7 +581,7 @@ index b69fd7d8ad..d8be50a1ce 100644 } if (e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset, -@@ -599,6 +612,11 @@ static const VMStateDescription e1000e_vmstate_intr_timer = { +@@ -605,6 +619,11 @@ static const VMStateDescription e1000e_vmstate_intr_timer = { VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \ e1000e_vmstate_intr_timer, E1000IntrDelayTimer) @@ -418,7 +593,7 @@ index b69fd7d8ad..d8be50a1ce 100644 static const VMStateDescription e1000e_vmstate = { .name = "e1000e", .version_id = 1, -@@ -610,6 +628,7 @@ static const VMStateDescription e1000e_vmstate = { +@@ -616,6 +635,7 @@ static const VMStateDescription e1000e_vmstate = { VMSTATE_MSIX(parent_obj, E1000EState), VMSTATE_UINT32(ioaddr, E1000EState), @@ -426,7 +601,7 @@ index b69fd7d8ad..d8be50a1ce 100644 VMSTATE_UINT32(core.rxbuf_min_shift, E1000EState), VMSTATE_UINT8(core.rx_desc_len, E1000EState), VMSTATE_UINT32_ARRAY(core.rxbuf_sizes, E1000EState, -@@ -658,6 +677,8 @@ static PropertyInfo e1000e_prop_disable_vnet, +@@ -664,6 +684,8 @@ static PropertyInfo e1000e_prop_disable_vnet, static Property e1000e_properties[] = { DEFINE_NIC_PROPERTIES(E1000EState, conf), @@ -436,10 +611,10 @@ index b69fd7d8ad..d8be50a1ce 100644 e1000e_prop_disable_vnet, bool), DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 88a97d756d..21d80e96cf 100644 +index 90b4fc63ce..3ffb9dd22c 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c -@@ -3177,7 +3177,7 @@ static int rtl8139_pre_save(void *opaque) +@@ -3179,7 +3179,7 @@ static int rtl8139_pre_save(void *opaque) static const VMStateDescription vmstate_rtl8139 = { .name = "rtl8139", @@ -448,7 +623,7 @@ index 88a97d756d..21d80e96cf 100644 .minimum_version_id = 3, .post_load = rtl8139_post_load, .pre_save = rtl8139_pre_save, -@@ -3258,7 +3258,9 @@ static const VMStateDescription vmstate_rtl8139 = { +@@ -3260,7 +3260,9 @@ static const VMStateDescription vmstate_rtl8139 = { VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State), VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State), VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State), @@ -459,18 +634,18 @@ index 88a97d756d..21d80e96cf 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c -index 74ae74bc5c..73820517df 100644 +index 4fbafddb22..2f120c6e70 100644 --- a/hw/rtc/mc146818rtc.c +++ b/hw/rtc/mc146818rtc.c -@@ -42,6 +42,7 @@ +@@ -43,6 +43,7 @@ + #include "qapi/qapi-events-misc-target.h" #include "qapi/visitor.h" - #include "exec/address-spaces.h" #include "hw/rtc/mc146818rtc_regs.h" +#include "migration/migration.h" #ifdef TARGET_I386 #include "qapi/qapi-commands-misc-target.h" -@@ -820,6 +821,11 @@ static int rtc_post_load(void *opaque, int version_id) +@@ -821,6 +822,11 @@ static int rtc_post_load(void *opaque, int version_id) static bool rtc_irq_reinject_on_ack_count_needed(void *opaque) { RTCState *s = (RTCState *)opaque; @@ -483,17 +658,87 @@ index 74ae74bc5c..73820517df 100644 } diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 11d476c4a2..e6e9355384 100644 +index 7397e56737..3a4bb894ba 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c -@@ -777,6 +777,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, +@@ -57,6 +57,9 @@ static bool smbios_legacy = true; + static bool smbios_uuid_encoded = true; + /* end: legacy structures & constants for <= 2.0 machines */ + ++/* Set to true for modern Windows 10 HardwareID-6 compat */ ++static bool smbios_type2_required; ++ + + uint8_t *smbios_tables; + size_t smbios_tables_len; +@@ -619,7 +622,7 @@ static void smbios_build_type_1_table(void) + + static void smbios_build_type_2_table(void) + { +- SMBIOS_BUILD_TABLE_PRE(2, 0x200, false); /* optional */ ++ SMBIOS_BUILD_TABLE_PRE(2, 0x200, smbios_type2_required); + + SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); + SMBIOS_TABLE_SET_STR(2, product_str, type2.product); +@@ -888,7 +891,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) + + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, +- bool uuid_encoded, SmbiosEntryPointType ep_type) ++ bool uuid_encoded, ++ const char *stream_product, ++ const char *stream_version, ++ SmbiosEntryPointType ep_type) + { + smbios_have_defaults = true; + smbios_legacy = legacy_mode; +@@ -909,11 +915,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + g_free(smbios_entries); + } + ++ /* ++ * If @stream_product & @stream_version are non-NULL, then ++ * we're following rules for new Windows driver support. ++ * The data we have to report is defined in this doc: ++ * ++ * https://docs.microsoft.com/en-us/windows-hardware/drivers/install/specifying-hardware-ids-for-a-computer ++ * ++ * The Windows drivers are written to expect use of the ++ * scheme documented as "HardwareID-6" against Windows 10, ++ * which uses SMBIOS System (Type 1) and Base Board (Type 2) ++ * tables and will match on ++ * ++ * System Manufacturer = Red Hat (@manufacturer) ++ * System SKU Number = 8.2.0 (@stream_version) ++ * Baseboard Manufacturer = Red Hat (@manufacturer) ++ * Baseboard Product = RHEL-AV (@stream_product) ++ * ++ * NB, SKU must be changed with each RHEL-AV release ++ * ++ * Other fields can be freely used by applications using ++ * QEMU. For example apps can use the "System product" ++ * and "System version" to identify themselves. ++ * ++ * We get 'System Manufacturer' and 'Baseboard Manufacturer' ++ */ SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); SMBIOS_SET_DEFAULT(type1.product, product); SMBIOS_SET_DEFAULT(type1.version, version); + SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); ++ if (stream_version != NULL) { ++ SMBIOS_SET_DEFAULT(type1.sku, stream_version); ++ } SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); - SMBIOS_SET_DEFAULT(type2.product, product); +- SMBIOS_SET_DEFAULT(type2.product, product); ++ if (stream_product != NULL) { ++ SMBIOS_SET_DEFAULT(type2.product, stream_product); ++ smbios_type2_required = true; ++ } else { ++ SMBIOS_SET_DEFAULT(type2.product, product); ++ } SMBIOS_SET_DEFAULT(type2.version, version); + SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type3.version, version); diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c index 050875b497..32935da46c 100644 --- a/hw/timer/i8254_common.c @@ -508,10 +753,10 @@ index 050875b497..32935da46c 100644 vmstate_pit_channel, PITChannelState), VMSTATE_INT64(channels[0].next_transition_time, diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c -index 23507ad3b5..9fd87a7ad9 100644 +index d1b5657d72..7930b868fa 100644 --- a/hw/usb/hcd-uhci.c +++ b/hw/usb/hcd-uhci.c -@@ -1219,12 +1219,14 @@ static void usb_uhci_common_realize(PCIDevice *dev, Error **errp) +@@ -1166,11 +1166,13 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp) UHCIState *s = UHCI(dev); uint8_t *pci_conf = s->dev.config; int i; @@ -519,19 +764,129 @@ index 23507ad3b5..9fd87a7ad9 100644 pci_conf[PCI_CLASS_PROG] = 0x00; /* TODO: reset value should be 0. */ - pci_conf[USB_SBRN] = USB_RELEASE_1; // release number - + pci_conf[USB_SBRN] = USB_RELEASE_1; /* release number */ - pci_config_set_interrupt_pin(pci_conf, u->info.irq_pin + 1); + irq_pin = u->info.irq_pin; + pci_config_set_interrupt_pin(pci_conf, irq_pin + 1); + s->irq = pci_allocate_irq(dev); if (s->masterbus) { - USBPort *ports[NB_PORTS]; +diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c +index e934b1a5b1..e18b05e528 100644 +--- a/hw/usb/hcd-xhci-pci.c ++++ b/hw/usb/hcd-xhci-pci.c +@@ -104,6 +104,33 @@ static int xhci_pci_vmstate_post_load(void *opaque, int version_id) + return 0; + } + ++/* RH bz 1912846 */ ++static bool usb_xhci_pci_add_msi(struct PCIDevice *dev, Error **errp) ++{ ++ int ret; ++ Error *err = NULL; ++ XHCIPciState *s = XHCI_PCI(dev); ++ ++ ret = msi_init(dev, 0x70, s->xhci.numintrs, true, false, &err); ++ /* ++ * Any error other than -ENOTSUP(board's MSI support is broken) ++ * is a programming error ++ */ ++ assert(!ret || ret == -ENOTSUP); ++ if (ret && s->msi == ON_OFF_AUTO_ON) { ++ /* Can't satisfy user's explicit msi=on request, fail */ ++ error_append_hint(&err, "You have to use msi=auto (default) or " ++ "msi=off with this machine type.\n"); ++ error_propagate(errp, err); ++ return true; ++ } ++ assert(!err || s->msi == ON_OFF_AUTO_AUTO); ++ /* With msi=auto, we fall back to MSI off silently */ ++ error_free(err); ++ ++ return false; ++} ++ + static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + { + int ret; +@@ -125,23 +152,12 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + s->xhci.nec_quirks = true; + } + +- if (s->msi != ON_OFF_AUTO_OFF) { +- ret = msi_init(dev, 0x70, s->xhci.numintrs, true, false, &err); +- /* +- * Any error other than -ENOTSUP(board's MSI support is broken) +- * is a programming error +- */ +- assert(!ret || ret == -ENOTSUP); +- if (ret && s->msi == ON_OFF_AUTO_ON) { +- /* Can't satisfy user's explicit msi=on request, fail */ +- error_append_hint(&err, "You have to use msi=auto (default) or " +- "msi=off with this machine type.\n"); ++ if (s->msi != ON_OFF_AUTO_OFF && s->rh_late_msi_cap) { ++ /* This gives the behaviour from 5.2.0 onwards, lspci shows 90,a0,70 */ ++ if (usb_xhci_pci_add_msi(dev, &err)) { + error_propagate(errp, err); + return; + } +- assert(!err || s->msi == ON_OFF_AUTO_AUTO); +- /* With msi=auto, we fall back to MSI off silently */ +- error_free(err); + } + pci_register_bar(dev, 0, + PCI_BASE_ADDRESS_SPACE_MEMORY | +@@ -154,6 +170,14 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + assert(ret > 0); + } + ++ /* RH bz 1912846 */ ++ if (s->msi != ON_OFF_AUTO_OFF && !s->rh_late_msi_cap) { ++ /* This gives the older RH machine behaviour, lspci shows 90,70,a0 */ ++ if (usb_xhci_pci_add_msi(dev, &err)) { ++ error_propagate(errp, err); ++ return; ++ } ++ } + if (s->msix != ON_OFF_AUTO_OFF) { + /* TODO check for errors, and should fail when msix=on */ + msix_init(dev, s->xhci.numintrs, +@@ -198,11 +222,18 @@ static void xhci_instance_init(Object *obj) + qdev_alias_all_properties(DEVICE(&s->xhci), obj); + } + ++static Property xhci_pci_properties[] = { ++ /* RH bz 1912846 */ ++ DEFINE_PROP_BOOL("x-rh-late-msi-cap", XHCIPciState, rh_late_msi_cap, true), ++ DEFINE_PROP_END_OF_LIST() ++}; ++ + static void xhci_class_init(ObjectClass *klass, void *data) + { + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + ++ device_class_set_props(dc, xhci_pci_properties); + dc->reset = xhci_pci_reset; + dc->vmsd = &vmstate_xhci_pci; + set_bit(DEVICE_CATEGORY_USB, dc->categories); +diff --git a/hw/usb/hcd-xhci-pci.h b/hw/usb/hcd-xhci-pci.h +index c193f79443..086a1feb1e 100644 +--- a/hw/usb/hcd-xhci-pci.h ++++ b/hw/usb/hcd-xhci-pci.h +@@ -39,6 +39,7 @@ typedef struct XHCIPciState { + XHCIState xhci; + OnOffAuto msi; + OnOffAuto msix; ++ bool rh_late_msi_cap; /* bz 1912846 */ + } XHCIPciState; + + #endif diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 80988bb305..8fed2eedd6 100644 +index e01700039b..d5ea13356c 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c -@@ -3590,9 +3590,27 @@ static const VMStateDescription vmstate_xhci_slot = { +@@ -3494,9 +3494,27 @@ static const VMStateDescription vmstate_xhci_slot = { } }; @@ -559,7 +914,7 @@ index 80988bb305..8fed2eedd6 100644 .fields = (VMStateField[]) { VMSTATE_UINT32(type, XHCIEvent), VMSTATE_UINT32(ccode, XHCIEvent), -@@ -3601,6 +3619,8 @@ static const VMStateDescription vmstate_xhci_event = { +@@ -3505,6 +3523,8 @@ static const VMStateDescription vmstate_xhci_event = { VMSTATE_UINT32(flags, XHCIEvent), VMSTATE_UINT8(slotid, XHCIEvent), VMSTATE_UINT8(epid, XHCIEvent), @@ -569,10 +924,10 @@ index 80988bb305..8fed2eedd6 100644 } }; diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h -index 2fad4df2a7..f554b671e3 100644 +index 98f598382a..50a7b6f6c4 100644 --- a/hw/usb/hcd-xhci.h +++ b/hw/usb/hcd-xhci.h -@@ -157,6 +157,8 @@ typedef struct XHCIEvent { +@@ -149,6 +149,8 @@ typedef struct XHCIEvent { uint32_t flags; uint8_t slotid; uint8_t epid; @@ -582,11 +937,11 @@ index 2fad4df2a7..f554b671e3 100644 typedef struct XHCIInterrupter { diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h -index 41568d1837..1a23ccc412 100644 +index 7ca92843c6..21abfd8447 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h -@@ -61,6 +61,9 @@ typedef struct ICH9LPCPMRegs { - uint8_t smm_enabled; +@@ -68,6 +68,9 @@ typedef struct ICH9LPCPMRegs { + bool smm_compat; bool enable_tco; TCOIORegs tco_regs; + @@ -596,13 +951,25 @@ index 41568d1837..1a23ccc412 100644 #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" diff --git a/include/hw/boards.h b/include/hw/boards.h -index de45087f34..6f85a0e032 100644 +index 9c1c190104..8bba96ef2b 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -377,4 +377,28 @@ extern const size_t hw_compat_2_2_len; +@@ -441,4 +441,40 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; ++extern GlobalProperty hw_compat_rhel_8_5[]; ++extern const size_t hw_compat_rhel_8_5_len; ++ ++extern GlobalProperty hw_compat_rhel_8_4[]; ++extern const size_t hw_compat_rhel_8_4_len; ++ ++extern GlobalProperty hw_compat_rhel_8_3[]; ++extern const size_t hw_compat_rhel_8_3_len; ++ ++extern GlobalProperty hw_compat_rhel_8_2[]; ++extern const size_t hw_compat_rhel_8_2_len; ++ +extern GlobalProperty hw_compat_rhel_8_1[]; +extern const size_t hw_compat_rhel_8_1_len; + @@ -628,26 +995,55 @@ index de45087f34..6f85a0e032 100644 +extern const size_t hw_compat_rhel_7_1_len; + #endif +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 5a0dd0c8cf..2cb1ec2bab 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -278,7 +278,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); + void smbios_set_cpuid(uint32_t version, uint32_t features); + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, +- bool uuid_encoded, SmbiosEntryPointType ep_type); ++ bool uuid_encoded, ++ const char *stream_product, ++ const char *stream_version, ++ SmbiosEntryPointType ep_type); + uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length); + void smbios_get_tables(MachineState *ms, + const struct smbios_phys_mem_area *mem_array, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 9ab39e428f..7ccc9a1a07 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -107,6 +107,9 @@ struct PCMachineClass { + bool smbios_defaults; + bool smbios_legacy_mode; + bool smbios_uuid_encoded; ++ /* New fields needed for Windows HardwareID-6 matching */ ++ const char *smbios_stream_product; ++ const char *smbios_stream_version; + + /* RAM / address space compat: */ + bool gigabyte_align; diff --git a/include/hw/usb.h b/include/hw/usb.h -index c24d968a19..b353438ea0 100644 +index 33668dd0a9..e6b2fe72da 100644 --- a/include/hw/usb.h +++ b/include/hw/usb.h -@@ -605,4 +605,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, - uint8_t interface_class, uint8_t interface_subclass, - uint8_t interface_protocol); +@@ -582,4 +582,7 @@ void usb_pcap_init(FILE *fp); + void usb_pcap_ctrl(USBPacket *p, bool setup); + void usb_pcap_data(USBPacket *p, bool setup); -+ +/* hcd-xhci.c -- rhel7.0.0 machine type compatibility */ +extern bool migrate_cve_2014_5263_xhci_fields; + #endif diff --git a/migration/migration.c b/migration/migration.c -index 354ad072fa..30c53c623b 100644 +index abaf6f9e3d..a87ff01b81 100644 --- a/migration/migration.c +++ b/migration/migration.c -@@ -121,6 +121,8 @@ enum mig_rp_message_type { - MIG_RP_MSG_MAX - }; +@@ -164,6 +164,8 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, + MIGRATION_CAPABILITY_X_COLO, + MIGRATION_CAPABILITY_VALIDATE_UUID); +bool migrate_pre_2_2; + @@ -655,10 +1051,10 @@ index 354ad072fa..30c53c623b 100644 migrations at once. For now we don't need to add dynamic creation of migration */ diff --git a/migration/migration.h b/migration/migration.h -index 79b3dda146..0b1b0d4df5 100644 +index 8130b703eb..d016cedd9d 100644 --- a/migration/migration.h +++ b/migration/migration.h -@@ -335,6 +335,11 @@ void init_dirty_bitmap_incoming_migration(void); +@@ -381,6 +381,11 @@ bool check_dirty_bitmap_mig_alias_map(const BitmapMigrationNodeAliasList *bbm, void migrate_add_address(SocketAddress *address); int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); @@ -671,5 +1067,5 @@ index 79b3dda146..0b1b0d4df5 100644 #define qemu_ram_foreach_block \ #warning "Use foreach_not_ignored_block in migration code" -- -2.21.0 +2.27.0 diff --git a/SOURCES/0008-Add-aarch64-machine-types.patch b/SOURCES/0008-Add-aarch64-machine-types.patch index 5397c8b..2e8c417 100644 --- a/SOURCES/0008-Add-aarch64-machine-types.patch +++ b/SOURCES/0008-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From 49164264d9928f73961acbbe4d56d8dfa23d8099 Mon Sep 17 00:00:00 2001 +From 670e90f5cbd92189155e079b8c6e2aafdf82d162 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -7,14 +7,14 @@ Adding changes to add RHEL machine types for aarch64 architecture. Signed-off-by: Miroslav Rezanina -Rebase changes (4.0.0): +Rebase notes (4.0.0): - Use upstream compat handling -Rebase changes (4.1.0-rc0): +Rebase notes (4.1.0-rc0): - Removed a15memmap (upstream) - Use virt_flash_create in rhel800_virt_instance_init -Rebase changes (4.2.0-rc0): +Rebase notes (4.2.0-rc0): - Set numa_mem_supported Rebase notes (4.2.0-rc3): @@ -23,6 +23,30 @@ Rebase notes (4.2.0-rc3): - aarch64: virt: Allow PCDIMM instantiation (patch 92247) - aarch64: virt: Enhance the comment related to gic-version (patch 92248) +Rebase notes (5.0.0): +- Set default_ram_id in rhel_machine_class_init +- Added setting acpi properties + +Rebase notes (5.1.0): +- Added ras property +- Added to virt_machine_device_unplug_cb to machine type (upstream) +- added mte property (upstream) + +Rebase notes (weekly-210210): +- Added support for oem fields to machine type + +Rebase notes (weekly-210303): +- Use rhel-8.4.0 hw compat + +Rebase notes (6.0.0-rc2): +- renamed oem-id and oem-table-id to x-oem-id and x-oem-table-id + +Rebase notes (210623): +- Protect TPM functions by CONFIG_TPM ifdef + +Rebase notes (6.1.0-rc0): +- Add support for default_bus_bypass_iommu + Merged patches (4.0.0): - 7bfdb4c aarch64: Add virt-rhel8.0.0 machine type for ARM - 3433e69 aarch64: Set virt-rhel8.0.0 max_cpus to 512 @@ -32,25 +56,45 @@ Merged patches (4.1.0): - c3e39ef aarch64: Add virt-rhel8.1.0 machine type for ARM - 59a46d1 aarch64: Allow ARM VIRT iommu option in RHEL8.1 machine -Signed-off-by: Danilo C. L. de Paula +Merged patches (5.2.0 rc0): +- 12990ad hw/arm: Changes to rhel820 machine +- 46d5a79 hw/arm: Introduce rhel_virt_instance_init() helper +- 098954a hw/arm: Add rhel830 machine type +- ee8e99d arm: Set correct max_cpus value on virt-rhel* machine types +- e5edd38 RHEL-only: arm/virt: Allow the TPM_TIS_SYSBUS device dynamic allocation in machvirt +- 6d7ba66 machine types/numa: set numa_mem_supported on old machine types (partialy) +- 25c5644 machine_types/numa: compatibility for auto_enable_numa_with_memdev (partialy) + +Merged patches (6.0): +- 078fadb5da AArch64 machine types cleanup +- ea7b7425fa hw/arm/virt: Add 8.4 Machine type + +Merged patches (weekly-210609): +- 73b1578882 hw/arm/virt: Add 8.5 machine type +- 5333038d11 hw/arm/virt: Disable PL011 clock migration through hw_compat_rhel_8_3 +- 63adb8ae86 arm/virt: Register highmem and gic-version as class properties + +Merged patches (weekly-211027): +- 86e3057c0a hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type --- - hw/arm/virt.c | 161 +++++++++++++++++++++++++++++++++++++++++- - include/hw/arm/virt.h | 11 +++ - 2 files changed, 171 insertions(+), 1 deletion(-) + hw/arm/virt.c | 226 +++++++++++++++++++++++++++++++++++++++++- + hw/core/machine.c | 2 + + include/hw/arm/virt.h | 8 ++ + 3 files changed, 235 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index d4bedc2607..e10839100e 100644 +index 5de4d9d73b..c77d26ab13 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -72,6 +72,7 @@ - #include "hw/mem/nvdimm.h" - #include "hw/acpi/generic_event_device.h" +@@ -79,6 +79,7 @@ + #include "hw/char/pl011.h" + #include "qemu/guest-random.h" +#if 0 /* Disabled for Red Hat Enterprise Linux */ #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ void *data) \ -@@ -98,7 +99,49 @@ +@@ -105,7 +106,48 @@ DEFINE_VIRT_MACHINE_LATEST(major, minor, true) #define DEFINE_VIRT_MACHINE(major, minor) \ DEFINE_VIRT_MACHINE_LATEST(major, minor, false) @@ -72,7 +116,6 @@ index d4bedc2607..e10839100e 100644 + static const TypeInfo rhel##m##n##s##_machvirt_info = { \ + .name = MACHINE_TYPE_NAME("virt-rhel" # m "." # n "." # s), \ + .parent = TYPE_RHEL_MACHINE, \ -+ .instance_init = rhel##m##n##s##_virt_instance_init, \ + .class_init = rhel##m##n##s##_virt_class_init, \ + }; \ + static void rhel##m##n##s##_machvirt_init(void) \ @@ -101,7 +144,7 @@ index d4bedc2607..e10839100e 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -1763,6 +1806,7 @@ static void machvirt_init(MachineState *machine) +@@ -2180,6 +2222,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -109,23 +152,39 @@ index d4bedc2607..e10839100e 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1791,6 +1835,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -2207,6 +2250,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) + vms->virt = value; } - +#endif /* disabled for RHEL */ + static bool virt_get_highmem(Object *obj, Error **errp) { +@@ -2304,6 +2348,7 @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name, + visit_type_OnOffAuto(v, name, &vms->acpi, errp); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_ras(Object *obj, Error **errp) + { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2022,6 +2067,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) - return requested_pa_size > 40 ? requested_pa_size : 0; +@@ -2331,6 +2376,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) + + vms->mte = value; + } ++#endif /* disabled for RHEL */ + + static char *virt_get_gic_version(Object *obj, Error **errp) + { +@@ -2666,6 +2712,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) + return fixed_ipa ? 0 : requested_pa_size; } +#if 0 /* Disabled for Red Hat Enterprise Linux */ static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -2258,3 +2304,116 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -3031,3 +3078,180 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -138,11 +197,11 @@ index d4bedc2607..e10839100e 100644 + + mc->family = "virt-rhel-Z"; + mc->init = machvirt_init; -+ /* Start with max_cpus set to 512, which is the maximum supported by KVM. -+ * The value may be reduced later when we have more information about the -+ * configuration of the particular instance. -+ */ -+ mc->max_cpus = 512; ++ /* Maximum supported VCPU count for all virt-rhel* machines */ ++ mc->max_cpus = 384; ++#ifdef CONFIG_TPM ++ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS); ++#endif + mc->block_default_type = IF_VIRTIO; + mc->no_cdrom = 1; + mc->pci_allow_0_address = true; @@ -158,57 +217,65 @@ index d4bedc2607..e10839100e 100644 + hc->pre_plug = virt_machine_device_pre_plug_cb; + hc->plug = virt_machine_device_plug_cb; + hc->unplug_request = virt_machine_device_unplug_request_cb; -+ mc->numa_mem_supported = true; ++ hc->unplug = virt_machine_device_unplug_cb; ++ mc->nvdimm_supported = true; + mc->auto_enable_numa_with_memhp = true; -+} ++ mc->auto_enable_numa_with_memdev = true; ++ mc->default_ram_id = "mach-virt.ram"; + -+static const TypeInfo rhel_machine_info = { -+ .name = TYPE_RHEL_MACHINE, -+ .parent = TYPE_MACHINE, -+ .abstract = true, -+ .instance_size = sizeof(VirtMachineState), -+ .class_size = sizeof(VirtMachineClass), -+ .class_init = rhel_machine_class_init, -+ .interfaces = (InterfaceInfo[]) { -+ { TYPE_HOTPLUG_HANDLER }, -+ { } -+ }, -+}; ++ object_class_property_add(oc, "acpi", "OnOffAuto", ++ virt_get_acpi, virt_set_acpi, ++ NULL, NULL); ++ object_class_property_set_description(oc, "acpi", ++ "Enable ACPI"); ++ ++ object_class_property_add_bool(oc, "highmem", virt_get_highmem, ++ virt_set_highmem); ++ object_class_property_set_description(oc, "highmem", ++ "Set on/off to enable/disable using " ++ "physical address space above 32 bits"); ++ ++ object_class_property_add_str(oc, "gic-version", virt_get_gic_version, ++ virt_set_gic_version); ++ object_class_property_set_description(oc, "gic-version", ++ "Set GIC version. " ++ "Valid values are 2, 3, host and max"); ++ ++ object_class_property_add_str(oc, "x-oem-id", ++ virt_get_oem_id, ++ virt_set_oem_id); ++ object_class_property_set_description(oc, "x-oem-id", ++ "Override the default value of field OEMID " ++ "in ACPI table header." ++ "The string may be up to 6 bytes in size"); ++ ++ object_class_property_add_str(oc, "x-oem-table-id", ++ virt_get_oem_table_id, ++ virt_set_oem_table_id); ++ object_class_property_set_description(oc, "x-oem-table-id", ++ "Override the default value of field OEM Table ID " ++ "in ACPI table header." ++ "The string may be up to 8 bytes in size"); ++ object_class_property_add_bool(oc, "default_bus_bypass_iommu", ++ virt_get_default_bus_bypass_iommu, ++ virt_set_default_bus_bypass_iommu); + -+static void rhel_machine_init(void) -+{ -+ type_register_static(&rhel_machine_info); +} -+type_init(rhel_machine_init); + -+static void rhel820_virt_instance_init(Object *obj) ++static void rhel_virt_instance_init(Object *obj) +{ + VirtMachineState *vms = VIRT_MACHINE(obj); + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); + + /* EL3 is disabled by default and non-configurable for RHEL */ + vms->secure = false; ++ + /* EL2 is disabled by default and non-configurable for RHEL */ + vms->virt = false; -+ /* High memory is enabled by default for RHEL */ ++ ++ /* High memory is enabled by default */ + vms->highmem = true; -+ object_property_add_bool(obj, "highmem", virt_get_highmem, -+ virt_set_highmem, NULL); -+ object_property_set_description(obj, "highmem", -+ "Set on/off to enable/disable using " -+ "physical address space above 32 bits", -+ NULL); -+ /* -+ * Default GIC type is still v2, but became configurable for RHEL. We -+ * keep v2 instead of max as TCG CI test cases require an MSI controller -+ * and there is no userspace ITS MSI emulation available. -+ */ -+ vms->gic_version = 2; -+ object_property_add_str(obj, "gic-version", virt_get_gic_version, -+ virt_set_gic_version, NULL); -+ object_property_set_description(obj, "gic-version", -+ "Set GIC version. " -+ "Valid values are 2, 3 and host", NULL); ++ vms->gic_version = VIRT_GIC_VERSION_NOSEL; + + vms->highmem_ecam = !vmc->no_highmem_ecam; + @@ -218,59 +285,121 @@ index d4bedc2607..e10839100e 100644 + /* Default allows ITS instantiation */ + vms->its = true; + object_property_add_bool(obj, "its", virt_get_its, -+ virt_set_its, NULL); ++ virt_set_its); + object_property_set_description(obj, "its", + "Set on/off to enable/disable " -+ "ITS instantiation", -+ NULL); ++ "ITS instantiation"); + } + + /* Default disallows iommu instantiation */ + vms->iommu = VIRT_IOMMU_NONE; -+ object_property_add_str(obj, "iommu", virt_get_iommu, virt_set_iommu, NULL); ++ object_property_add_str(obj, "iommu", virt_get_iommu, virt_set_iommu); + object_property_set_description(obj, "iommu", + "Set the IOMMU type. " -+ "Valid values are none and smmuv3", -+ NULL); ++ "Valid values are none and smmuv3"); ++ ++ /* Default disallows RAS instantiation and is non-configurable for RHEL */ ++ vms->ras = false; ++ ++ /* MTE is disabled by default and non-configurable for RHEL */ ++ vms->mte = false; ++ ++ vms->default_bus_bypass_iommu = false; ++ vms->irqmap = a15irqmap; + -+ vms->irqmap=a15irqmap; + virt_flash_create(vms); ++ vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); ++ vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); ++ +} + -+static void rhel820_virt_options(MachineClass *mc) ++static const TypeInfo rhel_machine_info = { ++ .name = TYPE_RHEL_MACHINE, ++ .parent = TYPE_MACHINE, ++ .abstract = true, ++ .instance_size = sizeof(VirtMachineState), ++ .class_size = sizeof(VirtMachineClass), ++ .class_init = rhel_machine_class_init, ++ .instance_init = rhel_virt_instance_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_HOTPLUG_HANDLER }, ++ { } ++ }, ++}; ++ ++static void rhel_machine_init(void) ++{ ++ type_register_static(&rhel_machine_info); ++} ++type_init(rhel_machine_init); ++ ++static void rhel850_virt_options(MachineClass *mc) +{ + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(8, 5, 0) ++ ++static void rhel840_virt_options(MachineClass *mc) ++{ ++ rhel850_virt_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_4, hw_compat_rhel_8_4_len); ++} ++DEFINE_RHEL_MACHINE(8, 4, 0) ++ ++static void rhel830_virt_options(MachineClass *mc) ++{ ++ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); ++ ++ rhel840_virt_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, hw_compat_rhel_8_3_len); ++ vmc->no_kvm_steal_time = true; ++} ++DEFINE_RHEL_MACHINE(8, 3, 0) ++ ++static void rhel820_virt_options(MachineClass *mc) ++{ ++ rhel830_virt_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, hw_compat_rhel_8_2_len); ++ mc->numa_mem_supported = true; ++ mc->auto_enable_numa_with_memdev = false; +} -+DEFINE_RHEL_MACHINE_AS_LATEST(8, 2, 0) ++DEFINE_RHEL_MACHINE(8, 2, 0) +diff --git a/hw/core/machine.c b/hw/core/machine.c +index be4f9864cd..62febde5aa 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -87,6 +87,8 @@ GlobalProperty hw_compat_rhel_8_3[] = { + { "nvme", "use-intel-id", "on"}, + /* hw_compat_rhel_8_3 from hw_compat_5_1 */ + { "pvpanic", "events", "1"}, /* PVPANIC_PANICKED */ ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "pl011", "migrate-clk", "off" }, + /* hw_compat_rhel_8_3 bz 1912846 */ + { "pci-xhci", "x-rh-late-msi-cap", "off" }, + /* hw_compat_rhel_8_3 from hw_compat_5_1 */ diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 0b41083e9d..53fdf16563 100644 +index dc6b66ffc8..9364628847 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -142,6 +142,7 @@ typedef struct { +@@ -175,9 +175,17 @@ struct VirtMachineState { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) +#if 0 /* disabled for Red Hat Enterprise Linux */ #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") - #define VIRT_MACHINE(obj) \ - OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE) -@@ -150,6 +151,16 @@ typedef struct { - #define VIRT_MACHINE_CLASS(klass) \ - OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE) + OBJECT_DECLARE_TYPE(VirtMachineState, VirtMachineClass, VIRT_MACHINE) +#else +#define TYPE_RHEL_MACHINE MACHINE_TYPE_NAME("virt-rhel") -+#define VIRT_MACHINE(obj) \ -+ OBJECT_CHECK(VirtMachineState, (obj), TYPE_RHEL_MACHINE) -+#define VIRT_MACHINE_GET_CLASS(obj) \ -+ OBJECT_GET_CLASS(VirtMachineClass, obj, TYPE_RHEL_MACHINE) -+#define VIRT_MACHINE_CLASS(klass) \ -+ OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_RHEL_MACHINE) ++typedef struct VirtMachineClass VirtMachineClass; ++typedef struct VirtMachineState VirtMachineState; ++DECLARE_OBJ_CHECKERS(VirtMachineState, VirtMachineClass, VIRT_MACHINE, TYPE_RHEL_MACHINE) +#endif + void virt_acpi_setup(VirtMachineState *vms); + bool virt_is_acpi_enabled(VirtMachineState *vms); - /* Return the number of used redistributor regions */ -- -2.21.0 +2.27.0 diff --git a/SOURCES/0009-Add-ppc64-machine-types.patch b/SOURCES/0009-Add-ppc64-machine-types.patch index a3f1a54..f5ce09a 100644 --- a/SOURCES/0009-Add-ppc64-machine-types.patch +++ b/SOURCES/0009-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From 136eae41007e2e5b0d693cc656f3ec36cbabf16f Mon Sep 17 00:00:00 2001 +From 3c65320ce5b8ad3bb8c0d8fd13a88c464d5c5845 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -18,6 +18,9 @@ Rebase changes (4.0.0): Rebase changes (4.1.0): - Update format for compat structures +Rebase notes (weekly-210303): +- Use rhel-8.4.0 hw compat + Merged patches (4.0.0): - 467d59a redhat: define pseries-rhel8.0.0 machine type @@ -31,36 +34,99 @@ Merged patches (4.2.0): - redhat: update pseries-rhel-7.6.0 machine type (patch 93039) - redhat: define pseries-rhel8.2.0 machine type (patch 93041) -Signed-off-by: Danilo C. L. de Paula +Merged patches (5.1.0): +- eb121ff spapr: Enable DD2.3 accelerated count cache flush in pseries-5.0 machine (partial) + +Merged patches (5.2.0 rc0): +- 311a20f redhat: define pseries-rhel8.3.0 machine type +- 1284167 ppc: Set correct max_cpus value on spapr-rhel* machine types +- 1ab8783 redhat: update pseries-rhel8.2.0 machine type +- b162af531a target/ppc: Add experimental option for enabling secure guests + +Merged patches (weekly-201216): +- 943c936df3 redhat: Add spapr_machine_rhel_default_class_options() +- 030b5e6fba redhat: Define pseries-rhel8.4.0 machine type + +Merged patches (weekly-210602): +- b7128d8ef7 redhat: Define pseries-rhel8.5.0 machine type + +Merged patches (weekly-211006): +- c8f68b47e9 redhat: Update pseries-rhel8.5.0 --- - hw/ppc/spapr.c | 278 ++++++++++++++++++++++++++++++++++++++++ + hw/ppc/spapr.c | 382 ++++++++++++++++++++++++++++++++++++++++ hw/ppc/spapr_cpu_core.c | 13 ++ - include/hw/ppc/spapr.h | 1 + + include/hw/ppc/spapr.h | 4 + target/ppc/compat.c | 13 +- target/ppc/cpu.h | 1 + - 5 files changed, 305 insertions(+), 1 deletion(-) + target/ppc/kvm.c | 27 +++ + target/ppc/kvm_ppc.h | 13 ++ + 7 files changed, 452 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index e076f6023c..8749c72066 100644 +index 3b5fd749be..cace86028d 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -4447,6 +4447,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) - smc->linux_pci_probe = true; - smc->smp_threads_vsmt = true; - smc->nr_xirqs = SPAPR_NR_XIRQS; +@@ -1593,6 +1593,9 @@ static void spapr_machine_reset(MachineState *machine) + + pef_kvm_reset(machine->cgs, &error_fatal); + spapr_caps_apply(spapr); ++ if (spapr->svm_allowed) { ++ kvmppc_svm_allow(&error_fatal); ++ } + + first_ppc_cpu = POWERPC_CPU(first_cpu); + if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && +@@ -3288,6 +3291,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) + spapr->host_serial = g_strdup(value); + } + ++static bool spapr_get_svm_allowed(Object *obj, Error **errp) ++{ ++ SpaprMachineState *spapr = SPAPR_MACHINE(obj); ++ ++ return spapr->svm_allowed; ++} ++ ++static void spapr_set_svm_allowed(Object *obj, bool value, Error **errp) ++{ ++ SpaprMachineState *spapr = SPAPR_MACHINE(obj); ++ ++ spapr->svm_allowed = value; ++} ++ + static void spapr_instance_init(Object *obj) + { + SpaprMachineState *spapr = SPAPR_MACHINE(obj); +@@ -3366,6 +3383,12 @@ static void spapr_instance_init(Object *obj) + spapr_get_host_serial, spapr_set_host_serial); + object_property_set_description(obj, "host-serial", + "Host serial number to advertise in guest device tree"); ++ object_property_add_bool(obj, "x-svm-allowed", ++ spapr_get_svm_allowed, ++ spapr_set_svm_allowed); ++ object_property_set_description(obj, "x-svm-allowed", ++ "Allow the guest to become a Secure Guest" ++ " (experimental only)"); + } + + static void spapr_machine_finalizefn(Object *obj) +@@ -4614,6 +4637,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) + vmc->client_architecture_support = spapr_vof_client_architecture_support; + vmc->quiesce = spapr_vof_quiesce; + vmc->setprop = spapr_vof_setprop; + smc->has_power9_support = true; } static const TypeInfo spapr_machine_info = { -@@ -4491,6 +4492,7 @@ static const TypeInfo spapr_machine_info = { +@@ -4665,6 +4689,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* - * pseries-4.2 + * pseries-6.2 */ -@@ -4520,6 +4522,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) +@@ -4781,6 +4806,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) } DEFINE_SPAPR_MACHINE(4_1, "4.1", false); @@ -68,46 +134,126 @@ index e076f6023c..8749c72066 100644 /* * pseries-4.0 -@@ -4536,6 +4539,7 @@ static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, +@@ -4800,6 +4826,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; + return true; } - ++ +#if 0 /* Disabled for Red Hat Enterprise Linux */ static void spapr_machine_4_0_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -4695,6 +4699,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); +@@ -4958,6 +4986,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); /* * pseries-2.7 */ +#endif - static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, + static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, -@@ -4749,6 +4754,7 @@ static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, - *nv2atsd = 0; +@@ -5013,6 +5042,7 @@ static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, + return true; } +#if 0 /* Disabled for Red Hat Enterprise Linux */ static void spapr_machine_2_7_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -4863,6 +4869,278 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -5127,6 +5157,358 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); +#endif + ++static void spapr_machine_rhel_default_class_options(MachineClass *mc) ++{ ++ /* ++ * Defaults for the latest behaviour inherited from the base class ++ * can be overriden here for all pseries-rhel* machines. ++ */ ++ ++ /* Maximum supported VCPU count */ ++ mc->max_cpus = 384; ++} ++ ++/* ++ * pseries-rhel8.5.0 ++ * like pseries-6.0 ++ */ ++ ++static void spapr_machine_rhel850_class_options(MachineClass *mc) ++{ ++ /* The default machine type must apply the RHEL specific defaults */ ++ spapr_machine_rhel_default_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", true); ++ ++/* ++ * pseries-rhel8.4.0 ++ * like pseries-5.2 ++ */ ++ ++static void spapr_machine_rhel840_class_options(MachineClass *mc) ++{ ++ spapr_machine_rhel850_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_4, ++ hw_compat_rhel_8_4_len); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel840, "rhel8.4.0", false); ++ ++/* ++ * pseries-rhel8.3.0 ++ * like pseries-5.1 ++ */ ++ ++static void spapr_machine_rhel830_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel840_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ ++ /* from pseries-5.1 */ ++ smc->pre_5_2_numa_associativity = true; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", false); ++ +/* + * pseries-rhel8.2.0 ++ * like pseries-4.2 + pseries-5.0 ++ * except SPAPR_CAP_CCF_ASSIST that has been backported to pseries-rhel8.1.0 + */ + +static void spapr_machine_rhel820_class_options(MachineClass *mc) +{ -+ /* Defaults for the latest behaviour inherited from the base class */ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ /* from pseries-5.0 */ ++ static GlobalProperty compat[] = { ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-5.1-associativity", "on" }, ++ }; ++ ++ spapr_machine_rhel830_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); ++ ++ /* from pseries-4.2 */ ++ smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_OFF; ++ smc->rma_limit = 16 * GiB; ++ mc->nvdimm_supported = false; ++ ++ /* from pseries-5.0 */ ++ mc->numa_mem_supported = true; ++ smc->pre_5_1_assoc_refpoints = true; +} + -+DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", true); ++DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", false); + +/* + * pseries-rhel8.1.0 @@ -131,6 +277,8 @@ index e076f6023c..8749c72066 100644 + hw_compat_rhel_8_1_len); + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + ++ /* from pseries-4.2 */ ++ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; +} + +DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", false); @@ -270,7 +418,6 @@ index e076f6023c..8749c72066 100644 + spapr_machine_rhel750_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); + compat_props_add(mc->compat_props, spapr_compat_rhel7_4, spapr_compat_rhel7_4_len); -+ mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram; + smc->has_power9_support = false; + smc->pre_2_10_has_unused_icps = true; + smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED; @@ -372,7 +519,7 @@ index e076f6023c..8749c72066 100644 static void spapr_machine_register_types(void) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 301cd7b4e4..ba5a8fb82b 100644 +index 8ba34f6a1d..78eca1c04a 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -24,6 +24,7 @@ @@ -383,15 +530,15 @@ index 301cd7b4e4..ba5a8fb82b 100644 static void spapr_reset_vcpu(PowerPCCPU *cpu) { -@@ -242,6 +243,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -250,6 +251,7 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, + { CPUPPCState *env = &cpu->env; CPUState *cs = CPU(cpu); - Error *local_err = NULL; + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); - object_property_set_bool(OBJECT(cpu), true, "realized", &local_err); - if (local_err) { -@@ -254,6 +256,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, + if (!qdev_realize(DEVICE(cpu), NULL, errp)) { + return false; +@@ -261,6 +263,17 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); kvmppc_set_papr(cpu); @@ -403,29 +550,39 @@ index 301cd7b4e4..ba5a8fb82b 100644 + ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, 0)))) { + error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, + "POWER9 CPU is not supported by this machine class"); -+ return; ++ return false; + } + - if (spapr_irq_cpu_intc_create(spapr, cpu, &local_err) < 0) { - goto error_intc_create; - } + if (spapr_irq_cpu_intc_create(spapr, cpu, errp) < 0) { + qdev_unrealize(DEVICE(cpu)); + return false; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index d5ab5ea7b2..aa89cc4a95 100644 +index ee7504b976..37a014d59c 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h -@@ -125,6 +125,7 @@ struct SpaprMachineClass { - bool linux_pci_probe; - bool smp_threads_vsmt; /* set VSMT to smp_threads by default */ +@@ -154,6 +154,7 @@ struct SpaprMachineClass { + bool pre_5_2_numa_associativity; + bool pre_6_2_numa_affinity; + bool has_power9_support; - void (*phb_placement)(SpaprMachineState *spapr, uint32_t index, - uint64_t *buid, hwaddr *pio, + bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, +@@ -237,6 +238,9 @@ struct SpaprMachineState { + + /* Set by -boot */ + char *boot_device; ++ ++ /* Secure Guest support via x-svm-allowed */ ++ bool svm_allowed; + + /*< public >*/ + char *kvm_type; diff --git a/target/ppc/compat.c b/target/ppc/compat.c -index 7de4bf3122..3e2e35342d 100644 +index 7949a24f5a..f207a9ba01 100644 --- a/target/ppc/compat.c +++ b/target/ppc/compat.c -@@ -105,8 +105,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) +@@ -114,8 +114,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) return NULL; } @@ -447,10 +604,10 @@ index 7de4bf3122..3e2e35342d 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index e3e82327b7..5c53801cfd 100644 +index e946da5f3a..23e8b76c85 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1367,6 +1367,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) +@@ -1401,6 +1401,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) @@ -458,6 +615,100 @@ index e3e82327b7..5c53801cfd 100644 bool ppc_check_compat(PowerPCCPU *cpu, uint32_t compat_pvr, uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, +diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c +index dc93b99189..154888cce5 100644 +--- a/target/ppc/kvm.c ++++ b/target/ppc/kvm.c +@@ -90,6 +90,7 @@ static int cap_ppc_nested_kvm_hv; + static int cap_large_decr; + static int cap_fwnmi; + static int cap_rpt_invalidate; ++static int cap_ppc_secure_guest; + + static uint32_t debug_inst_opcode; + +@@ -137,6 +138,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); + kvmppc_get_cpu_characteristics(s); + cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); ++ cap_ppc_secure_guest = kvm_vm_check_extension(s, KVM_CAP_PPC_SECURE_GUEST); + cap_large_decr = kvmppc_get_dec_bits(); + cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); + /* +@@ -2563,6 +2565,16 @@ int kvmppc_has_cap_rpt_invalidate(void) + return cap_rpt_invalidate; + } + ++bool kvmppc_has_cap_secure_guest(void) ++{ ++ return !!cap_ppc_secure_guest; ++} ++ ++int kvmppc_enable_cap_secure_guest(void) ++{ ++ return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SECURE_GUEST, 0, 1); ++} ++ + PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) + { + uint32_t host_pvr = mfpvr(); +@@ -2959,3 +2971,18 @@ bool kvm_arch_cpu_check_are_resettable(void) + { + return true; + } ++ ++void kvmppc_svm_allow(Error **errp) ++{ ++ if (!kvm_enabled()) { ++ error_setg(errp, "No PEF support in tcg, try x-svm-allowed=off"); ++ return; ++ } ++ ++ if (!kvmppc_has_cap_secure_guest()) { ++ error_setg(errp, "KVM implementation does not support secure guests, " ++ "try x-svm-allowed=off"); ++ } else if (kvmppc_enable_cap_secure_guest() < 0) { ++ error_setg(errp, "Error enabling x-svm-allowed, try x-svm-allowed=off"); ++ } ++} +diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h +index ee9325bf9a..20dbb95989 100644 +--- a/target/ppc/kvm_ppc.h ++++ b/target/ppc/kvm_ppc.h +@@ -40,6 +40,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); + target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, + bool radix, bool gtse, + uint64_t proc_tbl); ++void kvmppc_svm_allow(Error **errp); + #ifndef CONFIG_USER_ONLY + bool kvmppc_spapr_use_multitce(void); + int kvmppc_spapr_enable_inkernel_multitce(void); +@@ -74,6 +75,8 @@ int kvmppc_get_cap_large_decr(void); + int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable); + int kvmppc_has_cap_rpt_invalidate(void); + int kvmppc_enable_hwrng(void); ++bool kvmppc_has_cap_secure_guest(void); ++int kvmppc_enable_cap_secure_guest(void); + int kvmppc_put_books_sregs(PowerPCCPU *cpu); + PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void); + void kvmppc_check_papr_resize_hpt(Error **errp); +@@ -393,6 +396,16 @@ static inline int kvmppc_has_cap_rpt_invalidate(void) + return false; + } + ++static inline bool kvmppc_has_cap_secure_guest(void) ++{ ++ return false; ++} ++ ++static inline int kvmppc_enable_cap_secure_guest(void) ++{ ++ return -1; ++} ++ + static inline int kvmppc_enable_hwrng(void) + { + return -1; -- -2.21.0 +2.27.0 diff --git a/SOURCES/0010-Add-s390x-machine-types.patch b/SOURCES/0010-Add-s390x-machine-types.patch index d0f6669..fbb8841 100644 --- a/SOURCES/0010-Add-s390x-machine-types.patch +++ b/SOURCES/0010-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 0842700b3a01891c316e9169fa651f26714cafa5 Mon Sep 17 00:00:00 2001 +From 4ad9a0d0582eef78946b47563eb2c5b7ddf0cbb0 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -10,6 +10,9 @@ Signed-off-by: Miroslav Rezanina Rebase changes (weekly-4.1.0): - Use upstream compat handling +Rebase notes (weekly-210303): +- Use rhel-8.4.0 hw compat + Merged patches (3.1.0): - 29df663 s390x/cpumodel: default enable bpb and ppa15 for z196 and later @@ -21,16 +24,23 @@ Merged patches (4.2.0): - a9b22e8 redhat: s390x: Add proper compatibility options for the -rhel7.6.0 machine - hw/s390x: Add the s390-ccw-virtio-rhel8.2.0 machine types (patch 92954) -Signed-off-by: Danilo C. L. de Paula +Merged patches (weekly-201216): +- a6ae745cce redhat: s390x: add rhel-8.4.0 compat machine + +Merged patches (weekly-210602): +- 50835d3429 redhat: s390x: add rhel-8.5.0 compat machine + +Merged patches (weekly-211006): +- a3bcde27fe redhat: Add s390x machine type compatibility update for 6.1 rebase --- - hw/s390x/s390-virtio-ccw.c | 70 +++++++++++++++++++++++++++++++++++++- - 1 file changed, 69 insertions(+), 1 deletion(-) + hw/s390x/s390-virtio-ccw.c | 99 +++++++++++++++++++++++++++++++++++++- + 1 file changed, 98 insertions(+), 1 deletion(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index d3edeef0ad..c2c83d2fce 100644 +index 653587ea62..181856e6cf 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -615,7 +615,7 @@ bool css_migration_enabled(void) +@@ -767,7 +767,7 @@ bool css_migration_enabled(void) { \ MachineClass *mc = MACHINE_CLASS(oc); \ ccw_machine_##suffix##_class_options(mc); \ @@ -38,29 +48,57 @@ index d3edeef0ad..c2c83d2fce 100644 + mc->desc = "VirtIO-ccw based S390 machine " verstr; \ if (latest) { \ mc->alias = "s390-ccw-virtio"; \ - mc->is_default = 1; \ -@@ -639,6 +639,7 @@ bool css_migration_enabled(void) + mc->is_default = true; \ +@@ -791,6 +791,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void ccw_machine_4_2_instance_options(MachineState *machine) + static void ccw_machine_6_2_instance_options(MachineState *machine) { } -@@ -866,6 +867,73 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1100,6 +1101,102 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); +#endif + ++static void ccw_machine_rhel850_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel850_class_options(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++} ++DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); ++ ++static void ccw_machine_rhel840_instance_options(MachineState *machine) ++{ ++ ccw_machine_rhel850_instance_options(machine); ++} ++ ++static void ccw_machine_rhel840_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel850_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_4, hw_compat_rhel_8_4_len); ++} ++DEFINE_CCW_MACHINE(rhel840, "rhel8.4.0", false); ++ +static void ccw_machine_rhel820_instance_options(MachineState *machine) +{ ++ ccw_machine_rhel840_instance_options(machine); +} + +static void ccw_machine_rhel820_class_options(MachineClass *mc) +{ ++ ccw_machine_rhel840_class_options(mc); ++ mc->fixup_ram_size = s390_fixup_ram_size; ++ /* we did not publish a rhel8.3.0 machine */ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, hw_compat_rhel_8_3_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, hw_compat_rhel_8_2_len); +} -+DEFINE_CCW_MACHINE(rhel820, "rhel8.2.0", true); ++DEFINE_CCW_MACHINE(rhel820, "rhel8.2.0", false); + +static void ccw_machine_rhel760_instance_options(MachineState *machine) +{ @@ -82,6 +120,7 @@ index d3edeef0ad..c2c83d2fce 100644 +{ + ccw_machine_rhel820_class_options(mc); + /* We never published the s390x version of RHEL-AV 8.0 and 8.1, so add this here */ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); + compat_props_add(mc->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); + compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); +} @@ -115,12 +154,12 @@ index d3edeef0ad..c2c83d2fce 100644 + ccw_machine_rhel760_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); + compat_props_add(mc->compat_props, ccw_compat_rhel_7_5, ccw_compat_rhel_7_5_len); -+ S390_MACHINE_CLASS(mc)->hpage_1m_allowed = false; ++ S390_CCW_MACHINE_CLASS(mc)->hpage_1m_allowed = false; +} +DEFINE_CCW_MACHINE(rhel750, "rhel7.5.0", false); static void ccw_machine_register_types(void) { -- -2.21.0 +2.27.0 diff --git a/SOURCES/0011-Add-x86_64-machine-types.patch b/SOURCES/0011-Add-x86_64-machine-types.patch index 72a5159..2702772 100644 --- a/SOURCES/0011-Add-x86_64-machine-types.patch +++ b/SOURCES/0011-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 2ebaeca6e26950f401a8169d1324be2bafd11741 Mon Sep 17 00:00:00 2001 +From c2b3564ce466bc5069bf9f5b0694025c68b0858d Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -19,6 +19,15 @@ Rebase notes (4.1.0): Rebase notes (4.2.0-rc2): - Use X86MachineClass for save_tsc_khz (upstream change) +Rebase notes (weekly-210303): +- Use rhel-8.4.0 hw compat + +Rebase notes (weekly-210519): +- kvm_default_props moved to new file (upstream) + +Rebase notes (6.2.0-rc0): +- linuxboot_dma_enabled moved to X86MachineState + Merged patches (4.1.0): - f4dc802 pc: 7.5 compat entries - 456ed3e pc: PC_RHEL7_6_COMPAT @@ -37,23 +46,75 @@ Merged patches (4.2.0): - 0784125 x86 machine types: add pc-q35-rhel8.1.0 - machines/x86: Add rhel 8.2 machine type (patch 92959) -Signed-off-by: Danilo C. L. de Paula +Merged patches (5.1.0): +- 481357e RHEL: hw/i386: disable nested PERF_GLOBAL_CTRL MSR support +- e6c3fbf hw/smbios: set new default SMBIOS fields for Windows driver support (partialy) + +Merged patches (5.2.0 rc0): +- b02c9f5 x86: Add 8.3.0 x86_64 machine type +- f2edc4f q35: Set max_cpus to 512 +- 6d7ba66 machine types/numa: set numa_mem_supported on old machine types (partialy) +- 25c5644 machine_types/numa: compatibility for auto_enable_numa_with_memdev (partialy) +- e2d3209 x86: lpc9: let firmware negotiate 'CPU hotplug with SMI' features (partialy) + +Merged patches (weekly-210120): +- d0afeaa0c4 RHEL: Switch pvpanic test to q35 +- e19cdad83c 8.4 x86 machine type + +Merged patches (weekly-210203): +- 96f8781bd6 q35: Increase max_cpus to 710 on pc-q35-rhel8* machine types + +Merged patches (weekly-210224): +- 70d3924521 redhat: Add some devices for exporting upstream machine types + - machine type chunks only + +Merged patches (6.0.0 rc0): +- 031c690804 i386/acpi: restore device paths for pre-5.1 vms + +Merged patches (weekly-210623): +- 64c350696f x86: Add x86 rhel8.5 machine types +- 1c8fe5e164 redhat: x86: Enable 'kvm-asyncpf-int' by default + +Merged patches (weekly-210714): +- 618e2424ed redhat: Expose upstream machines pc-4.2 and pc-2.11 +- c4d1aa8bf2 redhat: Enable FDC device for upstream machines too +- 66882f9a32 redhat: Add hw_compat_4_2_extra and apply to upstream machines + +Fix machine type --- - hw/i386/acpi-build.c | 3 + - hw/i386/pc.c | 263 ++++++++++++++++++++++++++++++++++++++++++- - hw/i386/pc_piix.c | 210 +++++++++++++++++++++++++++++++++- - hw/i386/pc_q35.c | 156 ++++++++++++++++++++++++- - include/hw/boards.h | 2 + - include/hw/i386/pc.h | 33 ++++++ - target/i386/cpu.c | 9 +- - target/i386/kvm.c | 4 + - 8 files changed, 673 insertions(+), 7 deletions(-) + hw/block/fdc.c | 5 +- + hw/i386/acpi-build.c | 3 + + hw/i386/pc.c | 298 ++++++++++++++++++++++++++++++++++++- + hw/i386/pc_piix.c | 274 +++++++++++++++++++++++++++++++++- + hw/i386/pc_q35.c | 234 ++++++++++++++++++++++++++++- + include/hw/boards.h | 2 + + include/hw/i386/pc.h | 45 ++++++ + target/i386/kvm/kvm-cpu.c | 1 + + target/i386/kvm/kvm.c | 4 + + tests/qtest/pvpanic-test.c | 5 +- + 10 files changed, 862 insertions(+), 9 deletions(-) +diff --git a/hw/block/fdc.c b/hw/block/fdc.c +index 97fa6de423..63042ef030 100644 +--- a/hw/block/fdc.c ++++ b/hw/block/fdc.c +@@ -2341,7 +2341,10 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) + + /* Restricted for Red Hat Enterprise Linux: */ + MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); +- if (!strstr(mc->name, "-rhel7.")) { ++ if (!strstr(mc->name, "-rhel7.") && ++ /* Exported two upstream machine types allows FDC too */ ++ strcmp(mc->name, "pc-i440fx-4.2") && ++ strcmp(mc->name, "pc-i440fx-2.11")) { + error_setg(errp, "Device %s is not supported with machine type %s", + object_get_typename(OBJECT(dev)), mc->name); + return; diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index 12ff55fcfb..64001893ab 100644 +index a99c6e4fe3..447ea35275 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c -@@ -204,6 +204,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) +@@ -230,6 +230,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) pm->fadt.reset_reg = r; pm->fadt.reset_val = 0xf; pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; @@ -61,13 +122,13 @@ index 12ff55fcfb..64001893ab 100644 + "__com.redhat_force-rev1-fadt", NULL)) + pm->fadt.rev = 1; pm->cpu_hp_io_base = ICH9_CPU_HOTPLUG_IO_BASE; - } - + pm->smi_on_cpuhp = + !!(smi_features & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT)); diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index ac08e63604..61e70e4811 100644 +index a2ef40ecbc..e8109954ca 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -344,6 +344,261 @@ GlobalProperty pc_compat_1_4[] = { +@@ -371,6 +371,296 @@ GlobalProperty pc_compat_1_4[] = { }; const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); @@ -78,11 +139,34 @@ index ac08e63604..61e70e4811 100644 +GlobalProperty pc_rhel_compat[] = { + { TYPE_X86_CPU, "host-phys-bits", "on" }, + { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, ++ { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, ++ { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, + /* bz 1508330 */ + { "vfio-pci", "x-no-geforce-quirks", "on" }, ++ /* bz 1941397 */ ++ { TYPE_X86_CPU, "kvm-asyncpf-int", "on" }, +}; +const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_8_4_compat[] = { ++ /* pc_rhel_8_4_compat from pc_compat_5_2 */ ++ { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" }, ++ { TYPE_X86_CPU, "kvm-asyncpf-int", "off" }, ++}; ++const size_t pc_rhel_8_4_compat_len = G_N_ELEMENTS(pc_rhel_8_4_compat); ++ ++GlobalProperty pc_rhel_8_3_compat[] = { ++ /* pc_rhel_8_3_compat from pc_compat_5_1 */ ++ { "ICH9-LPC", "x-smi-cpu-hotplug", "off" }, ++}; ++const size_t pc_rhel_8_3_compat_len = G_N_ELEMENTS(pc_rhel_8_3_compat); ++ ++GlobalProperty pc_rhel_8_2_compat[] = { ++ /* pc_rhel_8_2_compat from pc_compat_4_2 */ ++ { "mch", "smbase-smram", "off" }, ++}; ++const size_t pc_rhel_8_2_compat_len = G_N_ELEMENTS(pc_rhel_8_2_compat); ++ +/* pc_rhel_8_1_compat is empty since pc_4_1_compat is */ +GlobalProperty pc_rhel_8_1_compat[] = { }; +const size_t pc_rhel_8_1_compat_len = G_N_ELEMENTS(pc_rhel_8_1_compat); @@ -326,10 +410,22 @@ index ac08e63604..61e70e4811 100644 +}; +const size_t pc_rhel_7_0_compat_len = G_N_ELEMENTS(pc_rhel_7_0_compat); + - void gsi_handler(void *opaque, int n, int level) ++/* ++ * RHEL: These properties only apply to the RHEL exported machine types ++ * pc-4.2/2.11 for the purpose to have a limited upstream machines support ++ * which can be migrated to RHEL. Let's avoid touching hw_compat_4_2 directly ++ * so that we can have some isolation against the upstream code. ++ */ ++GlobalProperty hw_compat_4_2_extra[] = { ++ /* By default enlarge the default virtio-net-pci ROM to 512KB. */ ++ { "virtio-net-pci", "romsize", "0x80000" }, ++}; ++const size_t hw_compat_4_2_extra_len = G_N_ELEMENTS(hw_compat_4_2_extra); ++ + GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { - GSIState *s = opaque; -@@ -1225,7 +1480,8 @@ void pc_memory_init(PCMachineState *pcms, + GSIState *s; +@@ -904,7 +1194,8 @@ void pc_memory_init(PCMachineState *pcms, option_rom_mr = g_malloc(sizeof(*option_rom_mr)); memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, &error_fatal); @@ -339,18 +435,18 @@ index ac08e63604..61e70e4811 100644 memory_region_set_readonly(option_rom_mr, true); } memory_region_add_subregion_overlap(rom_memory, -@@ -2198,6 +2454,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) - pcmc->linuxboot_dma_enabled = true; +@@ -1694,6 +1985,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->pvh_enabled = true; + pcmc->kvmclock_create_always = true; assert(!mc->get_hotplug_handler); + pcmc->pc_rom_ro = true; + mc->async_pf_vmexit_disable = false; mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -2209,7 +2467,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) - mc->hot_add_cpu = pc_hot_add_cpu; - mc->smp_parse = pc_smp_parse; +@@ -1704,7 +1997,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + mc->has_hotpluggable_cpus = true; + mc->default_boot_order = "cad"; mc->block_default_type = IF_IDE; - mc->max_cpus = 255; + /* 240: max CPU count for RHEL */ @@ -359,18 +455,18 @@ index ac08e63604..61e70e4811 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 1bd70d1abb..bd7fdb99bb 100644 +index dda3f64f19..2885edffe9 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -53,6 +53,7 @@ - #include "cpu.h" +@@ -50,6 +50,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" + #include "sysemu/xen.h" +#include "migration/migration.h" #ifdef CONFIG_XEN #include #include "hw/xen/xen_pt.h" -@@ -173,8 +174,8 @@ static void pc_init1(MachineState *machine, +@@ -174,8 +175,8 @@ static void pc_init1(MachineState *machine, if (pcmc->smbios_defaults) { MachineClass *mc = MACHINE_GET_CLASS(machine); /* These values are guest ABI, do not change */ @@ -379,19 +475,191 @@ index 1bd70d1abb..bd7fdb99bb 100644 + smbios_set_defaults("Red Hat", "KVM", + mc->desc, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, - SMBIOS_ENTRY_POINT_21); - } -@@ -307,6 +308,7 @@ else { + pcmc->smbios_stream_product, + pcmc->smbios_stream_version, +@@ -314,6 +315,15 @@ static void pc_init1(MachineState *machine, * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). */ ++/* ++ * NOTE! Not all the upstream machine types are disabled for RHEL. For ++ * providing a very limited support for upstream machine types, pc machines ++ * 2.11 and 4.2 are exposed explicitly. This will make the below "#if" macros ++ * a bit messed up, but please read this comment first so that we can have a ++ * rough understanding of what we're going to do. ++ */ ++ +#if 0 /* Disabled for Red Hat Enterprise Linux */ static void pc_compat_2_3_fn(MachineState *machine) { - PCMachineState *pcms = PC_MACHINE(machine); -@@ -1026,3 +1028,207 @@ static void xenfv_machine_options(MachineClass *m) - DEFINE_PC_MACHINE(xenfv, "xenfv", pc_xen_hvm_init, - xenfv_machine_options); + X86MachineState *x86ms = X86_MACHINE(machine); +@@ -389,6 +399,8 @@ static void pc_xen_hvm_init(MachineState *machine) + } + #endif + ++#endif /* Disabled for Red Hat Enterprise Linux */ ++ + #define DEFINE_I440FX_MACHINE(suffix, name, compatfn, optionfn) \ + static void pc_init_##suffix(MachineState *machine) \ + { \ +@@ -424,8 +436,10 @@ static void pc_i440fx_6_2_machine_options(MachineClass *m) + pcmc->default_cpu_version = 1; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v6_2, "pc-i440fx-6.2", NULL, + pc_i440fx_6_2_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_6_1_machine_options(MachineClass *m) + { +@@ -437,8 +451,10 @@ static void pc_i440fx_6_1_machine_options(MachineClass *m) + m->smp_props.prefer_sockets = true; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v6_1, "pc-i440fx-6.1", NULL, + pc_i440fx_6_1_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_6_0_machine_options(MachineClass *m) + { +@@ -449,8 +465,10 @@ static void pc_i440fx_6_0_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_6_0, pc_compat_6_0_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v6_0, "pc-i440fx-6.0", NULL, + pc_i440fx_6_0_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_5_2_machine_options(MachineClass *m) + { +@@ -461,8 +479,10 @@ static void pc_i440fx_5_2_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_5_2, pc_compat_5_2_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v5_2, "pc-i440fx-5.2", NULL, + pc_i440fx_5_2_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_5_1_machine_options(MachineClass *m) + { +@@ -477,8 +497,10 @@ static void pc_i440fx_5_1_machine_options(MachineClass *m) + pcmc->pci_root_uid = 1; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v5_1, "pc-i440fx-5.1", NULL, + pc_i440fx_5_1_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_5_0_machine_options(MachineClass *m) + { +@@ -491,8 +513,10 @@ static void pc_i440fx_5_0_machine_options(MachineClass *m) + m->auto_enable_numa_with_memdev = false; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v5_0, "pc-i440fx-5.0", NULL, + pc_i440fx_5_0_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_4_2_machine_options(MachineClass *m) + { +@@ -501,8 +525,21 @@ static void pc_i440fx_4_2_machine_options(MachineClass *m) + m->is_default = false; + compat_props_add(m->compat_props, hw_compat_4_2, hw_compat_4_2_len); + compat_props_add(m->compat_props, pc_compat_4_2, pc_compat_4_2_len); ++ ++ /* ++ * RHEL: Mark all upstream machines as deprecated because they're not ++ * supported by RHEL, even if exported. ++ */ ++ m->deprecation_reason = "Not supported by RHEL"; ++ /* ++ * RHEL: Specific compat properties to have limited support for upstream ++ * machines exported. ++ */ ++ compat_props_add(m->compat_props, hw_compat_4_2_extra, ++ hw_compat_4_2_extra_len); + } + ++/* RHEL: Export pc-4.2 */ + DEFINE_I440FX_MACHINE(v4_2, "pc-i440fx-4.2", NULL, + pc_i440fx_4_2_machine_options); + +@@ -515,8 +552,10 @@ static void pc_i440fx_4_1_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_4_1, pc_compat_4_1_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v4_1, "pc-i440fx-4.1", NULL, + pc_i440fx_4_1_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_4_0_machine_options(MachineClass *m) + { +@@ -529,8 +568,10 @@ static void pc_i440fx_4_0_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v4_0, "pc-i440fx-4.0", NULL, + pc_i440fx_4_0_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_3_1_machine_options(MachineClass *m) + { +@@ -546,8 +587,10 @@ static void pc_i440fx_3_1_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_3_1, pc_compat_3_1_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v3_1, "pc-i440fx-3.1", NULL, + pc_i440fx_3_1_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_3_0_machine_options(MachineClass *m) + { +@@ -556,8 +599,10 @@ static void pc_i440fx_3_0_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_3_0, pc_compat_3_0_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v3_0, "pc-i440fx-3.0", NULL, + pc_i440fx_3_0_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_2_12_machine_options(MachineClass *m) + { +@@ -566,8 +611,10 @@ static void pc_i440fx_2_12_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_2_12, pc_compat_2_12_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v2_12, "pc-i440fx-2.12", NULL, + pc_i440fx_2_12_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_2_11_machine_options(MachineClass *m) + { +@@ -576,9 +623,11 @@ static void pc_i440fx_2_11_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_2_11, pc_compat_2_11_len); + } + ++/* RHEL: Export pc-2.11 */ + DEFINE_I440FX_MACHINE(v2_11, "pc-i440fx-2.11", NULL, + pc_i440fx_2_11_machine_options); + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_i440fx_2_10_machine_options(MachineClass *m) + { + pc_i440fx_2_11_machine_options(m); +@@ -951,3 +1000,224 @@ static void xenfv_3_1_machine_options(MachineClass *m) + DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, + xenfv_3_1_machine_options); #endif +#endif /* Disabled for Red Hat Enterprise Linux */ + @@ -402,10 +670,13 @@ index 1bd70d1abb..bd7fdb99bb 100644 +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + m->family = "pc_piix_Y"; -+ m->default_machine_opts = "firmware=bios-256k.bin"; ++ m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; + pcmc->default_nic_model = "e1000"; ++ pcmc->pci_root_uid = 0; + m->default_display = "std"; + m->no_parallel = 1; ++ m->numa_mem_supported = true; ++ m->auto_enable_numa_with_memdev = false; + machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + m->alias = "pc"; @@ -427,6 +698,21 @@ index 1bd70d1abb..bd7fdb99bb 100644 + m->smbus_no_migration_support = true; + pcmc->pvh_enabled = false; + pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ pcmc->kvmclock_create_always = false; ++ /* From pc_i440fx_5_1_machine_options() */ ++ pcmc->pci_root_uid = 1; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_4, ++ hw_compat_rhel_8_4_len); ++ compat_props_add(m->compat_props, pc_rhel_8_4_compat, ++ pc_rhel_8_4_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ compat_props_add(m->compat_props, pc_rhel_8_3_compat, ++ pc_rhel_8_3_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(m->compat_props, pc_rhel_8_2_compat, ++ pc_rhel_8_2_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); + compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); @@ -469,7 +755,6 @@ index 1bd70d1abb..bd7fdb99bb 100644 + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_machine_rhel750_options(m); + m->desc = "RHEL 7.4.0 PC (i440FX + PIIX, 1996)"; -+ m->numa_auto_assign_ram = numa_legacy_auto_assign_ram; + pcmc->pc_rom_ro = false; + compat_props_add(m->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); + compat_props_add(m->compat_props, pc_rhel_7_4_compat, pc_rhel_7_4_compat_len); @@ -486,10 +771,10 @@ index 1bd70d1abb..bd7fdb99bb 100644 + +static void pc_machine_rhel730_options(MachineClass *m) +{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ X86MachineClass *x86mc = X86_MACHINE_CLASS(m); + pc_machine_rhel740_options(m); + m->desc = "RHEL 7.3.0 PC (i440FX + PIIX, 1996)"; -+ pcmc->linuxboot_dma_enabled = false; ++ x86mc->fwcfg_dma_enabled = false; + compat_props_add(m->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); + compat_props_add(m->compat_props, pc_rhel_7_3_compat, pc_rhel_7_3_compat_len); +} @@ -598,7 +883,7 @@ index 1bd70d1abb..bd7fdb99bb 100644 +DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, + pc_machine_rhel700_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 385e5cffb1..7531d8ed76 100644 +index 235054a643..c67418b6a9 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -197,8 +197,8 @@ static void pc_q35_init(MachineState *machine) @@ -610,9 +895,9 @@ index 385e5cffb1..7531d8ed76 100644 + smbios_set_defaults("Red Hat", "KVM", + mc->desc, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, - SMBIOS_ENTRY_POINT_21); - } -@@ -330,6 +330,7 @@ static void pc_q35_init(MachineState *machine) + pcmc->smbios_stream_product, + pcmc->smbios_stream_version, +@@ -342,6 +342,7 @@ static void pc_q35_init(MachineState *machine) DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) @@ -620,7 +905,7 @@ index 385e5cffb1..7531d8ed76 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -533,3 +534,154 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -620,3 +621,232 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -633,9 +918,10 @@ index 385e5cffb1..7531d8ed76 100644 +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pcmc->default_nic_model = "e1000e"; ++ pcmc->pci_root_uid = 0; + m->family = "pc_q35_Z"; + m->units_per_default_bus = 1; -+ m->default_machine_opts = "firmware=bios-256k.bin"; ++ m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; + m->default_display = "std"; + m->no_floppy = 1; + m->no_parallel = 1; @@ -644,10 +930,76 @@ index 385e5cffb1..7531d8ed76 100644 + machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); + machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); + m->alias = "q35"; -+ m->max_cpus = 384; ++ m->max_cpus = 710; + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); +} + ++static void pc_q35_init_rhel850(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel850_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.5.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850, ++ pc_q35_machine_rhel850_options); ++ ++ ++static void pc_q35_init_rhel840(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel840_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel850_options(m); ++ m->desc = "RHEL-8.4.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.4.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_4, ++ hw_compat_rhel_8_4_len); ++ compat_props_add(m->compat_props, pc_rhel_8_4_compat, ++ pc_rhel_8_4_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel840, "pc-q35-rhel8.4.0", pc_q35_init_rhel840, ++ pc_q35_machine_rhel840_options); ++ ++ ++static void pc_q35_init_rhel830(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel830_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel840_options(m); ++ m->desc = "RHEL-8.3.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.3.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ compat_props_add(m->compat_props, pc_rhel_8_3_compat, ++ pc_rhel_8_3_compat_len); ++ /* From pc_q35_5_1_machine_options() */ ++ pcmc->kvmclock_create_always = false; ++ /* From pc_q35_5_1_machine_options() */ ++ pcmc->pci_root_uid = 1; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel830, "pc-q35-rhel8.3.0", pc_q35_init_rhel830, ++ pc_q35_machine_rhel830_options); ++ +static void pc_q35_init_rhel820(MachineState *machine) +{ + pc_q35_init(machine); @@ -655,8 +1007,17 @@ index 385e5cffb1..7531d8ed76 100644 + +static void pc_q35_machine_rhel820_options(MachineClass *m) +{ -+ pc_q35_machine_rhel_options(m); ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel830_options(m); + m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; ++ m->numa_mem_supported = true; ++ m->auto_enable_numa_with_memdev = false; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.2.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(m->compat_props, pc_rhel_8_2_compat, ++ pc_rhel_8_2_compat_len); +} + +DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820, @@ -669,9 +1030,12 @@ index 385e5cffb1..7531d8ed76 100644 + +static void pc_q35_machine_rhel810_options(MachineClass *m) +{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_machine_rhel820_options(m); + m->desc = "RHEL-8.1.0 PC (Q35 + ICH9, 2009)"; + m->alias = NULL; ++ pcmc->smbios_stream_product = NULL; ++ pcmc->smbios_stream_version = NULL; + compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); + compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); +} @@ -748,7 +1112,6 @@ index 385e5cffb1..7531d8ed76 100644 + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_machine_rhel750_options(m); + m->desc = "RHEL-7.4.0 PC (Q35 + ICH9, 2009)"; -+ m->numa_auto_assign_ram = numa_legacy_auto_assign_ram; + pcmc->pc_rom_ro = false; + compat_props_add(m->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); + compat_props_add(m->compat_props, pc_rhel_7_4_compat, pc_rhel_7_4_compat_len); @@ -764,11 +1127,11 @@ index 385e5cffb1..7531d8ed76 100644 + +static void pc_q35_machine_rhel730_options(MachineClass *m) +{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ X86MachineClass *x86mc = X86_MACHINE_CLASS(m); + pc_q35_machine_rhel740_options(m); + m->desc = "RHEL-7.3.0 PC (Q35 + ICH9, 2009)"; + m->max_cpus = 255; -+ pcmc->linuxboot_dma_enabled = false; ++ x86mc->fwcfg_dma_enabled = false; + compat_props_add(m->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); + compat_props_add(m->compat_props, pc_rhel_7_3_compat, pc_rhel_7_3_compat_len); +} @@ -776,39 +1139,48 @@ index 385e5cffb1..7531d8ed76 100644 +DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, + pc_q35_machine_rhel730_options); diff --git a/include/hw/boards.h b/include/hw/boards.h -index 6f85a0e032..2920bdef5b 100644 +index 8bba96ef2b..04e8759815 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -222,6 +222,8 @@ struct MachineClass { - const char **valid_cpu_types; +@@ -263,6 +263,8 @@ struct MachineClass { strList *allowed_dynamic_sysbus_devices; bool auto_enable_numa_with_memhp; + bool auto_enable_numa_with_memdev; + /* RHEL only */ + bool async_pf_vmexit_disable; - void (*numa_auto_assign_ram)(MachineClass *mc, NodeInfo *nodes, - int nb_nodes, ram_addr_t size); bool ignore_boot_device_suffixes; + bool smbus_no_migration_support; + bool nvdimm_supported; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 1f86eba3f9..2e362c8faa 100644 +index 7ccc9a1a07..d0544ee119 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -124,6 +124,9 @@ typedef struct PCMachineClass { +@@ -125,6 +125,9 @@ struct PCMachineClass { - /* use PVH to load kernels that support this feature */ - bool pvh_enabled; -+ + /* create kvmclock device even when KVM PV features are not exposed */ + bool kvmclock_create_always; ++ + /* RH only, see bz 1489800 */ + bool pc_rom_ro; - } PCMachineClass; + }; #define TYPE_PC_MACHINE "generic-pc-machine" -@@ -300,6 +303,36 @@ extern const size_t pc_compat_1_5_len; +@@ -280,6 +283,48 @@ extern const size_t pc_compat_1_5_len; extern GlobalProperty pc_compat_1_4[]; extern const size_t pc_compat_1_4_len; +extern GlobalProperty pc_rhel_compat[]; +extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_8_4_compat[]; ++extern const size_t pc_rhel_8_4_compat_len; ++ ++extern GlobalProperty pc_rhel_8_3_compat[]; ++extern const size_t pc_rhel_8_3_compat_len; ++ ++extern GlobalProperty pc_rhel_8_2_compat[]; ++extern const size_t pc_rhel_8_2_compat_len; ++ +extern GlobalProperty pc_rhel_8_1_compat[]; +extern const size_t pc_rhel_8_1_compat_len; + @@ -836,33 +1208,17 @@ index 1f86eba3f9..2e362c8faa 100644 +extern GlobalProperty pc_rhel_7_0_compat[]; +extern const size_t pc_rhel_7_0_compat_len; + ++extern GlobalProperty hw_compat_4_2_extra[]; ++extern const size_t hw_compat_4_2_extra_len; ++ /* Helper for setting model-id for CPU models that changed model-id * depending on QEMU versions up to QEMU 2.4. */ -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 1b7880ae3a..790db778ab 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1829,11 +1829,17 @@ static CPUCaches epyc_cache_info = { - - static X86CPUDefinition builtin_x86_defs[] = { - { -+ /* qemu64 is the default CPU model for all *-rhel7.* machine-types. -+ * The default on RHEL-6 was cpu64-rhel6. -+ * libvirt assumes that qemu64 is the default for _all_ machine-types, -+ * so we should try to keep qemu64 and cpu64-rhel6 as similar as -+ * possible. -+ */ - .name = "qemu64", - .level = 0xd, - .vendor = CPUID_VENDOR_AMD, - .family = 6, -- .model = 6, -+ .model = 13, - .stepping = 3, - .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | - CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | -@@ -3932,6 +3938,7 @@ static PropValue kvm_default_props[] = { +diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c +index d95028018e..7b004065ae 100644 +--- a/target/i386/kvm/kvm-cpu.c ++++ b/target/i386/kvm/kvm-cpu.c +@@ -131,6 +131,7 @@ static PropValue kvm_default_props[] = { { "acpi", "off" }, { "monitor", "off" }, { "svm", "off" }, @@ -870,11 +1226,11 @@ index 1b7880ae3a..790db778ab 100644 { NULL, NULL }, }; -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 1d10046a6c..86d9a1f364 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -3079,6 +3079,7 @@ static int kvm_get_msrs(X86CPU *cpu) +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 5a698bde19..a668f521ac 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -3336,6 +3336,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -882,7 +1238,7 @@ index 1d10046a6c..86d9a1f364 100644 kvm_msr_buf_reset(cpu); -@@ -3388,6 +3389,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3665,6 +3666,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; @@ -890,8 +1246,31 @@ index 1d10046a6c..86d9a1f364 100644 + env->async_pf_en_msr &= ~(1ULL << 2); + } break; - case MSR_KVM_PV_EOI_EN: - env->pv_eoi_en_msr = msrs[i].data; + case MSR_KVM_ASYNC_PF_INT: + env->async_pf_int_msr = msrs[i].data; +diff --git a/tests/qtest/pvpanic-test.c b/tests/qtest/pvpanic-test.c +index 6dcad2db49..580c2c43d2 100644 +--- a/tests/qtest/pvpanic-test.c ++++ b/tests/qtest/pvpanic-test.c +@@ -17,7 +17,7 @@ static void test_panic_nopause(void) + QDict *response, *data; + QTestState *qts; + +- qts = qtest_init("-device pvpanic -action panic=none"); ++ qts = qtest_init("-M q35 -device pvpanic -action panic=none"); + + val = qtest_inb(qts, 0x505); + g_assert_cmpuint(val, ==, 3); +@@ -40,7 +40,8 @@ static void test_panic(void) + QDict *response, *data; + QTestState *qts; + +- qts = qtest_init("-device pvpanic -action panic=pause"); ++ /* RHEL: Use q35 */ ++ qts = qtest_init("-M q35 -device pvpanic -action panic=pause"); + + val = qtest_inb(qts, 0x505); + g_assert_cmpuint(val, ==, 3); -- -2.21.0 +2.27.0 diff --git a/SOURCES/0012-Enable-make-check.patch b/SOURCES/0012-Enable-make-check.patch index 09f7b4e..b2ff35a 100644 --- a/SOURCES/0012-Enable-make-check.patch +++ b/SOURCES/0012-Enable-make-check.patch @@ -1,6 +1,6 @@ -From 154215041df085271a780a2989f4f481226e3e34 Mon Sep 17 00:00:00 2001 +From 740a2dd943a2e0fcd41a9cd8eb94a136f8f49fa2 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina -Date: Fri, 19 Oct 2018 13:48:41 +0200 +Date: Wed, 2 Sep 2020 09:39:41 +0200 Subject: Enable make check Fixing tests after device disabling and machine types changes and enabling @@ -22,74 +22,112 @@ Rebase changes (4.1.0-rc1): Rebase changes (4.2.0-rc0): - partially disable hd-geo-test (requires lsi53c895a) +Rebase changes (5.1.0-rc1): +- Disable qtest/q35-test (uses upstream machine types) +- Do not run iotests on make checka +- Enabled iotests 071 and 099 + +Rebase changes (5.2.0 rc0): +- Disable cdrom tests (unsupported devices) on x86_64 +- disable fuzz test + +Rebase changes (6.0.0): +- Disabled xlnx-can-test +- Disable pxb-pcie subtest for bios-table-test +- Replace qtest usage of upstream q35 machine type with pc-q35-rhel8.4.0 +- Not run cdrom-test on aarch64 + +Rebase changes (6.1.0): +- Remove unnecessary test disabling changes + +Rebase changes (weekly-211006): +- New handling for bios-table-test (disabled downstream) + Merged patches (4.0.0): - f7ffd13 Remove 7 qcow2 and luks iotests that are taking > 25 sec to run during the fast train build proce Merged patches (4.1.0-rc0): - 41288ff redhat: Remove raw iotest 205 - -Signed-off-by: Danilo C. L. de Paula --- - redhat/qemu-kvm.spec.template | 2 +- - tests/Makefile.include | 10 +++++----- - tests/boot-serial-test.c | 6 +++++- - tests/cpu-plug-test.c | 4 ++-- - tests/e1000-test.c | 2 ++ - tests/hd-geo-test.c | 4 ++++ - tests/prom-env-test.c | 4 ++++ - tests/qemu-iotests/051 | 12 ++++++------ - tests/qemu-iotests/group | 4 ++-- - tests/test-x86-cpuid-compat.c | 2 ++ - tests/usb-hcd-xhci-test.c | 4 ++++ - 11 files changed, 37 insertions(+), 17 deletions(-) + redhat/qemu-kvm.spec.template | 2 +- + tests/qemu-iotests/051 | 8 ++++---- + tests/qtest/bios-tables-test.c | 5 ++++- + tests/qtest/boot-serial-test.c | 6 +++++- + tests/qtest/cdrom-test.c | 4 ++++ + tests/qtest/cpu-plug-test.c | 4 ++-- + tests/qtest/fuzz-e1000e-test.c | 2 +- + tests/qtest/fuzz-virtio-scsi-test.c | 2 +- + tests/qtest/hd-geo-test.c | 4 ++++ + tests/qtest/lpc-ich9-test.c | 2 +- + tests/qtest/meson.build | 13 ++++--------- + tests/qtest/prom-env-test.c | 4 ++++ + tests/qtest/test-x86-cpuid-compat.c | 2 ++ + tests/qtest/usb-hcd-xhci-test.c | 4 ++++ + 14 files changed, 41 insertions(+), 21 deletions(-) -diff --git a/tests/Makefile.include b/tests/Makefile.include -index b483790cf3..53bdbdfee0 100644 ---- a/tests/Makefile.include -+++ b/tests/Makefile.include -@@ -172,7 +172,7 @@ check-qtest-i386-y += tests/ide-test$(EXESUF) - check-qtest-i386-y += tests/ahci-test$(EXESUF) - check-qtest-i386-y += tests/hd-geo-test$(EXESUF) - check-qtest-i386-y += tests/boot-order-test$(EXESUF) --check-qtest-i386-y += tests/bios-tables-test$(EXESUF) -+#check-qtest-i386-y += tests/bios-tables-test$(EXESUF) - check-qtest-i386-$(CONFIG_SGA) += tests/boot-serial-test$(EXESUF) - check-qtest-i386-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) - check-qtest-i386-y += tests/rtc-test$(EXESUF) -@@ -230,7 +230,7 @@ check-qtest-mips64el-$(CONFIG_VGA) += tests/display-vga-test$(EXESUF) - check-qtest-moxie-y += tests/boot-serial-test$(EXESUF) +diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 +index 1d2fa93a11..c8a2815f54 100755 +--- a/tests/qemu-iotests/051 ++++ b/tests/qemu-iotests/051 +@@ -174,9 +174,9 @@ run_qemu -drive if=virtio + case "$QEMU_DEFAULT_MACHINE" in + pc) + run_qemu -drive if=none,id=disk -device ide-cd,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk + run_qemu -drive if=none,id=disk -device ide-hd,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk + ;; + *) + ;; +@@ -225,9 +225,9 @@ run_qemu -drive file="$TEST_IMG",if=virtio,readonly=on + case "$QEMU_DEFAULT_MACHINE" in + pc) + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-cd,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-hd,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk + ;; + *) + ;; +diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c +index 258874167e..16d8304cde 100644 +--- a/tests/qtest/bios-tables-test.c ++++ b/tests/qtest/bios-tables-test.c +@@ -1372,6 +1372,7 @@ static void test_acpi_virt_tcg_numamem(void) + + } - check-qtest-ppc-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF) --check-qtest-ppc-y += tests/boot-order-test$(EXESUF) -+#check-qtest-ppc-y += tests/boot-order-test$(EXESUF) - check-qtest-ppc-y += tests/prom-env-test$(EXESUF) - check-qtest-ppc-y += tests/drive_del-test$(EXESUF) - check-qtest-ppc-y += tests/boot-serial-test$(EXESUF) -@@ -244,8 +244,8 @@ check-qtest-ppc64-$(CONFIG_PSERIES) += tests/rtas-test$(EXESUF) - check-qtest-ppc64-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) - check-qtest-ppc64-$(CONFIG_USB_UHCI) += tests/usb-hcd-uhci-test$(EXESUF) - check-qtest-ppc64-$(CONFIG_USB_XHCI_NEC) += tests/usb-hcd-xhci-test$(EXESUF) --check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) --check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) -+#check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) -+#check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) - check-qtest-ppc64-$(CONFIG_RTL8139_PCI) += tests/test-filter-redirector$(EXESUF) - check-qtest-ppc64-$(CONFIG_VGA) += tests/display-vga-test$(EXESUF) - check-qtest-ppc64-y += tests/numa-test$(EXESUF) -@@ -291,7 +291,7 @@ check-qtest-s390x-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) - check-qtest-s390x-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) - check-qtest-s390x-$(CONFIG_POSIX) += tests/test-filter-redirector$(EXESUF) - check-qtest-s390x-y += tests/drive_del-test$(EXESUF) --check-qtest-s390x-y += tests/device-plug-test$(EXESUF) -+#check-qtest-s390x-y += tests/device-plug-test$(EXESUF) - check-qtest-s390x-y += tests/virtio-ccw-test$(EXESUF) - check-qtest-s390x-y += tests/cpu-plug-test$(EXESUF) - check-qtest-s390x-y += tests/migration-test$(EXESUF) -diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c -index d3a54a0ba5..33ce72b89c 100644 ---- a/tests/boot-serial-test.c -+++ b/tests/boot-serial-test.c -@@ -108,19 +108,23 @@ static testdef_t tests[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void test_acpi_virt_tcg_pxb(void) + { + test_data data = { +@@ -1403,6 +1404,7 @@ static void test_acpi_virt_tcg_pxb(void) + + free_test_data(&data); + } ++#endif + + static void test_acpi_tcg_acpi_hmat(const char *machine) + { +@@ -1644,7 +1646,8 @@ int main(int argc, char *argv[]) + qtest_add_func("acpi/virt", test_acpi_virt_tcg); + qtest_add_func("acpi/virt/numamem", test_acpi_virt_tcg_numamem); + qtest_add_func("acpi/virt/memhp", test_acpi_virt_tcg_memhp); +- qtest_add_func("acpi/virt/pxb", test_acpi_virt_tcg_pxb); ++ /* Disabled for Red Hat Enterprise Linux ++ qtest_add_func("acpi/virt/pxb", test_acpi_virt_tcg_pxb); */ + qtest_add_func("acpi/virt/oem-fields", test_acpi_oem_fields_virt); + } + } +diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c +index 83828ba270..294476b959 100644 +--- a/tests/qtest/boot-serial-test.c ++++ b/tests/qtest/boot-serial-test.c +@@ -148,19 +148,23 @@ static testdef_t tests[] = { { "ppc", "g3beige", "", "PowerPC,750" }, { "ppc", "mac99", "", "PowerPC,G4" }, { "ppc", "sam460ex", "-m 256", "DRAM: 256 MiB" }, @@ -99,7 +137,7 @@ index d3a54a0ba5..33ce72b89c 100644 { "ppc64", "mac99", "", "PowerPC,970FX" }, +#endif { "ppc64", "pseries", - "-machine cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken", + "-machine " PSERIES_DEFAULT_CAPABILITIES, "Open Firmware" }, +#if 0 /* Disabled for Red Hat Enterprise Linux */ { "ppc64", "powernv8", "", "OPAL" }, @@ -114,11 +152,47 @@ index d3a54a0ba5..33ce72b89c 100644 { "x86_64", "q35", "-device sga", "SGABIOS" }, { "sparc", "LX", "", "TMS390S10" }, { "sparc", "SS-4", "", "MB86904" }, -diff --git a/tests/cpu-plug-test.c b/tests/cpu-plug-test.c -index 30e514bbfb..a04beae1c6 100644 ---- a/tests/cpu-plug-test.c -+++ b/tests/cpu-plug-test.c -@@ -185,8 +185,8 @@ static void add_pseries_test_case(const char *mname) +diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c +index 5af944a5fb..69d9bac38a 100644 +--- a/tests/qtest/cdrom-test.c ++++ b/tests/qtest/cdrom-test.c +@@ -140,6 +140,7 @@ static void add_x86_tests(void) + qtest_add_data_func("cdrom/boot/isapc", "-M isapc " + "-drive if=ide,media=cdrom,file=", test_cdboot); + } ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + qtest_add_data_func("cdrom/boot/am53c974", + "-device am53c974 -device scsi-cd,drive=cd1 " + "-drive if=none,id=cd1,format=raw,file=", test_cdboot); +@@ -155,6 +156,7 @@ static void add_x86_tests(void) + qtest_add_data_func("cdrom/boot/megasas-gen2", "-M q35 " + "-device megasas-gen2 -device scsi-cd,drive=cd1 " + "-blockdev file,node-name=cd1,filename=", test_cdboot); ++#endif + } + + static void add_s390x_tests(void) +@@ -220,6 +222,7 @@ int main(int argc, char **argv) + "magnum", "malta", "pica61", NULL + }; + add_cdrom_param_tests(mips64machines); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + } else if (g_str_equal(arch, "arm") || g_str_equal(arch, "aarch64")) { + const char *armmachines[] = { + "realview-eb", "realview-eb-mpcore", "realview-pb-a8", +@@ -227,6 +230,7 @@ int main(int argc, char **argv) + "vexpress-a9", "virt", NULL + }; + add_cdrom_param_tests(armmachines); ++#endif + } else { + const char *nonemachine[] = { "none", NULL }; + add_cdrom_param_tests(nonemachine); +diff --git a/tests/qtest/cpu-plug-test.c b/tests/qtest/cpu-plug-test.c +index a1c689414b..a8f076711c 100644 +--- a/tests/qtest/cpu-plug-test.c ++++ b/tests/qtest/cpu-plug-test.c +@@ -110,8 +110,8 @@ static void add_pseries_test_case(const char *mname) char *path; PlugTestData *data; @@ -129,27 +203,37 @@ index 30e514bbfb..a04beae1c6 100644 return; } data = g_new(PlugTestData, 1); -diff --git a/tests/e1000-test.c b/tests/e1000-test.c -index c387984ef6..c89112d6f8 100644 ---- a/tests/e1000-test.c -+++ b/tests/e1000-test.c -@@ -22,9 +22,11 @@ struct QE1000 { +diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c +index 66229e6096..947fba73b7 100644 +--- a/tests/qtest/fuzz-e1000e-test.c ++++ b/tests/qtest/fuzz-e1000e-test.c +@@ -17,7 +17,7 @@ static void test_lp1879531_eth_get_rss_ex_dst_addr(void) + { + QTestState *s; - static const char *models[] = { - "e1000", -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - "e1000-82540em", - "e1000-82544gc", - "e1000-82545em", -+#endif - }; +- s = qtest_init("-nographic -monitor none -serial none -M pc-q35-5.0"); ++ s = qtest_init("-nographic -monitor none -serial none -M pc-q35-rhel8.4.0"); + + qtest_outl(s, 0xcf8, 0x80001010); + qtest_outl(s, 0xcfc, 0xe1020000); +diff --git a/tests/qtest/fuzz-virtio-scsi-test.c b/tests/qtest/fuzz-virtio-scsi-test.c +index aaf6d10e18..43727d62ac 100644 +--- a/tests/qtest/fuzz-virtio-scsi-test.c ++++ b/tests/qtest/fuzz-virtio-scsi-test.c +@@ -19,7 +19,7 @@ static void test_mmio_oob_from_memory_region_cache(void) + { + QTestState *s; + +- s = qtest_init("-M pc-q35-5.2 -display none -m 512M " ++ s = qtest_init("-M pc-q35-rhel8.4.0 -display none -m 512M " + "-device virtio-scsi,num_queues=8,addr=03.0 "); - static void *e1000_get_driver(void *obj, const char *interface) -diff --git a/tests/hd-geo-test.c b/tests/hd-geo-test.c -index 7e86c5416c..cc068bad87 100644 ---- a/tests/hd-geo-test.c -+++ b/tests/hd-geo-test.c -@@ -732,6 +732,7 @@ static void test_override_ide(void) + qtest_outl(s, 0xcf8, 0x80001811); +diff --git a/tests/qtest/hd-geo-test.c b/tests/qtest/hd-geo-test.c +index 113126ae06..999ef2aace 100644 +--- a/tests/qtest/hd-geo-test.c ++++ b/tests/qtest/hd-geo-test.c +@@ -737,6 +737,7 @@ static void test_override_ide(void) test_override(args, expected); } @@ -157,7 +241,7 @@ index 7e86c5416c..cc068bad87 100644 static void test_override_scsi(void) { TestArgs *args = create_args(); -@@ -776,6 +777,7 @@ static void test_override_scsi_2_controllers(void) +@@ -781,6 +782,7 @@ static void test_override_scsi_2_controllers(void) add_scsi_disk(args, 3, 1, 0, 1, 2, 0, 1, 0); test_override(args, expected); } @@ -165,7 +249,7 @@ index 7e86c5416c..cc068bad87 100644 static void test_override_virtio_blk(void) { -@@ -951,9 +953,11 @@ int main(int argc, char **argv) +@@ -960,9 +962,11 @@ int main(int argc, char **argv) qtest_add_func("hd-geo/ide/device/user/chst", test_ide_device_user_chst); if (have_qemu_img()) { qtest_add_func("hd-geo/override/ide", test_override_ide); @@ -177,11 +261,83 @@ index 7e86c5416c..cc068bad87 100644 qtest_add_func("hd-geo/override/virtio_blk", test_override_virtio_blk); qtest_add_func("hd-geo/override/zero_chs", test_override_zero_chs); qtest_add_func("hd-geo/override/scsi_hot_unplug", -diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c -index 61bc1d1e7b..028d45c7d7 100644 ---- a/tests/prom-env-test.c -+++ b/tests/prom-env-test.c -@@ -88,10 +88,14 @@ int main(int argc, char *argv[]) +diff --git a/tests/qtest/lpc-ich9-test.c b/tests/qtest/lpc-ich9-test.c +index fe0bef9980..7a9d51579b 100644 +--- a/tests/qtest/lpc-ich9-test.c ++++ b/tests/qtest/lpc-ich9-test.c +@@ -15,7 +15,7 @@ static void test_lp1878642_pci_bus_get_irq_level_assert(void) + { + QTestState *s; + +- s = qtest_init("-M pc-q35-5.0 " ++ s = qtest_init("-M pc-q35-rhel8.4.0 " + "-nographic -monitor none -serial none"); + + qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */ +diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build +index c9d8458062..049e06c057 100644 +--- a/tests/qtest/meson.build ++++ b/tests/qtest/meson.build +@@ -68,7 +68,6 @@ qtests_i386 = \ + (config_all_devices.has_key('CONFIG_RTL8139_PCI') ? ['rtl8139-test'] : []) + \ + (config_all_devices.has_key('CONFIG_E1000E_PCI_EXPRESS') ? ['fuzz-e1000e-test'] : []) + \ + (config_all_devices.has_key('CONFIG_ESP_PCI') ? ['am53c974-test'] : []) + \ +- (unpack_edk2_blobs ? ['bios-tables-test'] : []) + \ + qtests_pci + \ + ['fdc-test', + 'ide-test', +@@ -81,7 +80,6 @@ qtests_i386 = \ + 'drive_del-test', + 'tco-test', + 'cpu-plug-test', +- 'q35-test', + 'vmgenid-test', + 'migration-test', + 'test-x86-cpuid-compat', +@@ -130,17 +128,15 @@ qtests_mips64el = \ + + qtests_ppc = \ + (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) + \ +- (config_all_devices.has_key('CONFIG_M48T59') ? ['m48t59-test'] : []) + \ +- ['boot-order-test', 'prom-env-test', 'boot-serial-test'] \ ++ (config_all_devices.has_key('CONFIG_M48T59') ? ['m48t59-test'] : []) + + qtests_ppc64 = \ + (config_all_devices.has_key('CONFIG_PSERIES') ? ['device-plug-test'] : []) + \ + (config_all_devices.has_key('CONFIG_POWERNV') ? ['pnv-xscom-test'] : []) + \ + (config_all_devices.has_key('CONFIG_PSERIES') ? ['rtas-test'] : []) + \ +- (slirp.found() ? ['pxe-test', 'test-netfilter'] : []) + \ ++ (slirp.found() ? ['pxe-test'] : []) + \ + (config_all_devices.has_key('CONFIG_USB_UHCI') ? ['usb-hcd-uhci-test'] : []) + \ + (config_all_devices.has_key('CONFIG_USB_XHCI_NEC') ? ['usb-hcd-xhci-test'] : []) + \ +- (config_host.has_key('CONFIG_POSIX') ? ['test-filter-mirror'] : []) + \ + qtests_pci + ['migration-test', 'numa-test', 'cpu-plug-test', 'drive_del-test'] + + qtests_sh4 = (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) +@@ -186,8 +182,8 @@ qtests_aarch64 = \ + ['arm-cpu-features', + 'numa-test', + 'boot-serial-test', +- 'xlnx-can-test', +- 'fuzz-xlnx-dp-test', ++# 'xlnx-can-test', ++# 'fuzz-xlnx-dp-test', + 'migration-test'] + + qtests_s390x = \ +@@ -196,7 +192,6 @@ qtests_s390x = \ + (config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \ + ['boot-serial-test', + 'drive_del-test', +- 'device-plug-test', + 'virtio-ccw-test', + 'cpu-plug-test', + 'migration-test'] +diff --git a/tests/qtest/prom-env-test.c b/tests/qtest/prom-env-test.c +index f41d80154a..f8dc478ce8 100644 +--- a/tests/qtest/prom-env-test.c ++++ b/tests/qtest/prom-env-test.c +@@ -89,10 +89,14 @@ int main(int argc, char *argv[]) if (!strcmp(arch, "ppc")) { add_tests(ppc_machines); } else if (!strcmp(arch, "ppc64")) { @@ -196,68 +352,12 @@ index 61bc1d1e7b..028d45c7d7 100644 } else if (!strcmp(arch, "sparc")) { add_tests(sparc_machines); } else if (!strcmp(arch, "sparc64")) { -diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 -index 53bcdbc911..b387e0c233 100755 ---- a/tests/qemu-iotests/051 -+++ b/tests/qemu-iotests/051 -@@ -181,11 +181,11 @@ run_qemu -drive if=virtio - case "$QEMU_DEFAULT_MACHINE" in - pc) - run_qemu -drive if=none,id=disk -device ide-cd,drive=disk -- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk -+# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk - run_qemu -drive if=none,id=disk -device ide-drive,drive=disk - run_qemu -drive if=none,id=disk -device ide-hd,drive=disk -- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-disk,drive=disk -- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk -+# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-disk,drive=disk -+# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk - ;; - *) - ;; -@@ -234,11 +234,11 @@ run_qemu -drive file="$TEST_IMG",if=virtio,readonly=on - case "$QEMU_DEFAULT_MACHINE" in - pc) - run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-cd,drive=disk -- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk -+# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk - run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-drive,drive=disk - run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-hd,drive=disk -- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk -- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk -+# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk -+# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk - ;; - *) - ;; -diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 6b10a6a762..06cc734b26 100644 ---- a/tests/qemu-iotests/group -+++ b/tests/qemu-iotests/group -@@ -92,7 +92,7 @@ - 068 rw quick - 069 rw auto quick - 070 rw quick --071 rw auto quick -+# 071 rw auto quick -- requires whitelisted blkverify - 072 rw auto quick - 073 rw auto quick - 074 rw auto quick -@@ -120,7 +120,7 @@ - 096 rw quick - 097 rw auto backing - 098 rw auto backing quick --099 rw auto quick -+# 099 rw auto quick -- requires whitelisted blkverify - # 100 was removed, do not reuse - 101 rw quick - 102 rw quick -diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c -index 772287bdb4..e7c075ed98 100644 ---- a/tests/test-x86-cpuid-compat.c -+++ b/tests/test-x86-cpuid-compat.c +diff --git a/tests/qtest/test-x86-cpuid-compat.c b/tests/qtest/test-x86-cpuid-compat.c +index f28848e06e..6b2fd398a2 100644 +--- a/tests/qtest/test-x86-cpuid-compat.c ++++ b/tests/qtest/test-x86-cpuid-compat.c @@ -300,6 +300,7 @@ int main(int argc, char **argv) - "-cpu 486,xlevel2=0xC0000002,+xstore", + "-cpu 486,xlevel2=0xC0000002,xstore=on", "xlevel2", 0xC0000002); +#if 0 /* Disabled in Red Hat Enterprise Linux */ @@ -266,16 +366,16 @@ index 772287bdb4..e7c075ed98 100644 @@ -350,6 +351,7 @@ int main(int argc, char **argv) add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-on", - "-machine pc-i440fx-2.4 -cpu SandyBridge,+npt", + "-machine pc-i440fx-2.4 -cpu SandyBridge,svm=on,npt=on", "xlevel", 0x80000008); +#endif /* Test feature parsing */ add_feature_test("x86/cpuid/features/plus", -diff --git a/tests/usb-hcd-xhci-test.c b/tests/usb-hcd-xhci-test.c +diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c index 10ef9d2a91..3855873050 100644 ---- a/tests/usb-hcd-xhci-test.c -+++ b/tests/usb-hcd-xhci-test.c +--- a/tests/qtest/usb-hcd-xhci-test.c ++++ b/tests/qtest/usb-hcd-xhci-test.c @@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) usb_test_hotplug(global_qtest, "xhci", "1", NULL); } @@ -303,5 +403,5 @@ index 10ef9d2a91..3855873050 100644 qtest_start("-device nec-usb-xhci,id=xhci" -- -2.21.0 +2.27.0 diff --git a/SOURCES/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/SOURCES/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch index db776c4..d9c8d42 100644 --- a/SOURCES/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/SOURCES/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From de433da59448eaad4ac1b902d07d57b57f922aff Mon Sep 17 00:00:00 2001 +From e9ebc159a9acf108e1ec6f622be3f256cf14aba7 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -32,20 +32,16 @@ Merged patches (2.9.0): Merged patches (4.1.0-rc3): - 2b89558 vfio: increase the cap on number of assigned devices to 64 - -(cherry picked from commit 9fa3c9fc6dfcde76d80db1aa601b2d577f72ceec) -(cherry picked from commit 3cb35556dc7d994f203d732fe952f95fcdb03c0a) -Signed-off-by: Danilo C. L. de Paula --- hw/vfio/pci.c | 29 ++++++++++++++++++++++++++++- hw/vfio/pci.h | 1 + 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index c8534d3035..309535f306 100644 +index 7b45353ce2..eb725a3aee 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c -@@ -47,6 +47,9 @@ +@@ -45,6 +45,9 @@ #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" @@ -55,7 +51,7 @@ index c8534d3035..309535f306 100644 static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); -@@ -2722,9 +2725,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) +@@ -2807,9 +2810,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) ssize_t len; struct stat st; int groupid; @@ -87,7 +83,7 @@ index c8534d3035..309535f306 100644 if (!vdev->vbasedev.sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3167,6 +3191,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3246,6 +3270,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), @@ -98,10 +94,10 @@ index c8534d3035..309535f306 100644 false), DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 35626cd63e..0cd4803aee 100644 +index 64777516d1..e0fe6ca97e 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h -@@ -135,6 +135,7 @@ typedef struct VFIOPCIDevice { +@@ -139,6 +139,7 @@ struct VFIOPCIDevice { EventNotifier err_notifier; EventNotifier req_notifier; int (*resetfn)(struct VFIOPCIDevice *); @@ -110,5 +106,5 @@ index 35626cd63e..0cd4803aee 100644 uint32_t device_id; uint32_t sub_vendor_id; -- -2.21.0 +2.27.0 diff --git a/SOURCES/0014-Add-support-statement-to-help-output.patch b/SOURCES/0014-Add-support-statement-to-help-output.patch index cb77bfe..2259e13 100644 --- a/SOURCES/0014-Add-support-statement-to-help-output.patch +++ b/SOURCES/0014-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 2754dd8da8975757753fd491985d5e7b36966106 Mon Sep 17 00:00:00 2001 +From b736b0c41dd62ed6f874a7b33ca1d4f9ceab4573 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -16,18 +16,15 @@ Add support statement to -help output, reporting direct qemu-kvm usage as unsupported by Red Hat, and advising users to use libvirt instead. Signed-off-by: Eduardo Habkost -(cherry picked from commit 2a07700936e39856cc9f149c6a6517f0715536a6) -(cherry picked from commit 5dd2f4706e2fef945771949e59a8fcc1b5452de9) -Signed-off-by: Danilo C. L. de Paula --- - vl.c | 9 +++++++++ + softmmu/vl.c | 9 +++++++++ 1 file changed, 9 insertions(+) -diff --git a/vl.c b/vl.c -index 668a34577e..9f3e7e7733 100644 ---- a/vl.c -+++ b/vl.c -@@ -1822,9 +1822,17 @@ static void version(void) +diff --git a/softmmu/vl.c b/softmmu/vl.c +index 620a1f1367..d46b8fb4ab 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -827,9 +827,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -45,7 +42,7 @@ index 668a34577e..9f3e7e7733 100644 printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", error_get_progname()); -@@ -1841,6 +1849,7 @@ static void help(int exitcode) +@@ -855,6 +863,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); @@ -54,5 +51,5 @@ index 668a34577e..9f3e7e7733 100644 } -- -2.21.0 +2.27.0 diff --git a/SOURCES/0015-globally-limit-the-maximum-number-of-CPUs.patch b/SOURCES/0015-globally-limit-the-maximum-number-of-CPUs.patch index cec862d..31d9643 100644 --- a/SOURCES/0015-globally-limit-the-maximum-number-of-CPUs.patch +++ b/SOURCES/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From c9c3cf721b0e9e359418f64c2a5121c3f8b5d27a Mon Sep 17 00:00:00 2001 +From 9a7621819821ee88d2f99d6b629fd87aa9a07758 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs @@ -25,67 +25,23 @@ Merged patches (2.11.0): - 0584216921 Match POWER max cpus to x86 Signed-off-by: Andrew Jones -(cherry picked from commit a4ceb63bdc5cbac19f5f633ec761b9de0dedb55e) -(cherry picked from commit a1f26d85171b4d554225150053700e93ba6eba10) -redhat: globally limit the maximum number of CPUs +Merged patches (5.1.0): +- redhat: globally limit the maximum number of CPUs +- redhat: remove manual max_cpus limitations for ppc +- use recommended max vcpu count -RH-Author: David Hildenbrand -Message-id: <20180109103253.24517-2-david@redhat.com> -Patchwork-id: 78531 -O-Subject: [RHEL-7.5 qemu-kvm-ma PATCH v2 1/2] redhat: globally limit the maximum number of CPUs -Bugzilla: 1527449 -RH-Acked-by: David Gibson -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck - -Upstream-status: n/a - -For RHEL, we support 240, for RHV up to 384 VCPUs. Let's limit this -globally instead of fixing up all machines. This way, we can easily -change (increase) the product specific levels later. - -Signed-off-by: David Hildenbrand -Signed-off-by: Miroslav Rezanina - -redhat: remove manual max_cpus limitations for ppc - -RH-Author: David Hildenbrand -Message-id: <20180109103253.24517-3-david@redhat.com> -Patchwork-id: 78532 -O-Subject: [RHEL-7.5 qemu-kvm-ma PATCH v2 2/2] redhat: remove manual max_cpus limitations for ppc -Bugzilla: 1527449 -RH-Acked-by: David Gibson -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck - -Upstream-status: n/a - -RH-Author: Andrew Jones -Message-id: <1390301212-15344-1-git-send-email-drjones@redhat.com> -Patchwork-id: 56862 -O-Subject: [RHEL7.0 qemu-kvm PATCH v6] use recommended max vcpu count -Bugzilla: 998708 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Marcelo Tosatti - -The recommended vcpu max limit (KVM_CAP_NR_VCPUS) should be used instead -of the actual max vcpu limit (KVM_CAP_MAX_VCPUS) to give an error. - -This commit matches the limit to current KVM_CAP_NR_VCPUS value. - -Signed-off-by: Danilo C. L. de Paula +Merged patches (5.2.0 rc0): +- f8a4123 vl: Remove downstream-only MAX_RHEL_CPUS code --- accel/kvm/kvm-all.c | 12 ++++++++++++ - vl.c | 18 ++++++++++++++++++ - 2 files changed, 30 insertions(+) + 1 file changed, 12 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index ca00daa2f5..dc3ed7f04e 100644 +index eecd8031cf..8f2a53438f 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c -@@ -1943,6 +1943,18 @@ static int kvm_init(MachineState *ms) +@@ -2423,6 +2423,18 @@ static int kvm_init(MachineState *ms) soft_vcpus_limit = kvm_recommended_vcpus(s); hard_vcpus_limit = kvm_max_vcpus(s); @@ -104,49 +60,6 @@ index ca00daa2f5..dc3ed7f04e 100644 while (nc->name) { if (nc->num > soft_vcpus_limit) { warn_report("Number of %s cpus requested (%d) exceeds " -diff --git a/vl.c b/vl.c -index 9f3e7e7733..1550aa2aaa 100644 ---- a/vl.c -+++ b/vl.c -@@ -134,6 +134,8 @@ int main(int argc, char **argv) - - #define MAX_VIRTIO_CONSOLES 1 - -+#define RHEL_MAX_CPUS 384 -+ - static const char *data_dir[16]; - static int data_dir_idx; - const char *bios_name = NULL; -@@ -1339,6 +1341,20 @@ static MachineClass *find_default_machine(GSList *machines) - return NULL; - } - -+/* Maximum number of CPUs limited for Red Hat Enterprise Linux */ -+static void limit_max_cpus_in_machines(void) -+{ -+ GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false); -+ -+ for (el = machines; el; el = el->next) { -+ MachineClass *mc = el->data; -+ -+ if (mc->max_cpus > RHEL_MAX_CPUS) { -+ mc->max_cpus = RHEL_MAX_CPUS; -+ } -+ } -+} -+ - static int machine_help_func(QemuOpts *opts, MachineState *machine) - { - ObjectProperty *prop; -@@ -3857,6 +3873,8 @@ int main(int argc, char **argv, char **envp) - "mutually exclusive"); - exit(EXIT_FAILURE); - } -+ /* Maximum number of CPUs limited for Red Hat Enterprise Linux */ -+ limit_max_cpus_in_machines(); - - configure_rtc(qemu_find_opts_singleton("rtc")); - -- -2.21.0 +2.27.0 diff --git a/SOURCES/0016-Add-support-for-simpletrace.patch b/SOURCES/0016-Add-support-for-simpletrace.patch deleted file mode 100644 index 9624855..0000000 --- a/SOURCES/0016-Add-support-for-simpletrace.patch +++ /dev/null @@ -1,121 +0,0 @@ -From 26128b3ede339e292a3c50a84e3248af46ecd0ec Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Thu, 8 Oct 2015 09:50:17 +0200 -Subject: Add support for simpletrace - -As simpletrace is upstream, we just need to properly handle it during rpmbuild. - -Signed-off-by: Miroslav Rezanina - -Rebase notes (3.1.0): -- Fixed python 2 to python3 switch - -Rebase notes (2.9.0): -- Added group argument for tracetool.py (upstream) - -Rebase notes (2.8.0): -- Changed tracetool.py parameters - -Merged patches (2.3.0): -- db959d6 redhat/qemu-kvm.spec.template: Install qemu-kvm-simpletrace.stp -- 5292fc3 trace: add SystemTap init scripts for simpletrace bridge -- eda9e5e simpletrace: install simpletrace.py -- 85c4c8f trace: add systemtap-initscript README file to RPM - -Signed-off-by: Danilo C. L. de Paula ---- - .gitignore | 2 ++ - Makefile | 4 +++ - README.systemtap | 43 +++++++++++++++++++++++++ - redhat/qemu-kvm.spec.template | 26 ++++++++++++++- - scripts/systemtap/conf.d/qemu_kvm.conf | 4 +++ - scripts/systemtap/script.d/qemu_kvm.stp | 1 + - 6 files changed, 79 insertions(+), 1 deletion(-) - create mode 100644 README.systemtap - create mode 100644 scripts/systemtap/conf.d/qemu_kvm.conf - create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp - -diff --git a/Makefile b/Makefile -index 086727dbb9..4254950f7f 100644 ---- a/Makefile -+++ b/Makefile -@@ -939,6 +939,10 @@ endif - $(INSTALL_DATA) $(SRC_PATH)/pc-bios/keymaps/$$x "$(DESTDIR)$(qemu_datadir)/keymaps"; \ - done - $(INSTALL_DATA) $(BUILD_DIR)/trace-events-all "$(DESTDIR)$(qemu_datadir)/trace-events-all" -+ $(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)/systemtap/script.d" -+ $(INSTALL_DATA) $(SRC_PATH)/scripts/systemtap/script.d/qemu_kvm.stp "$(DESTDIR)$(qemu_datadir)/systemtap/script.d/" -+ $(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)/systemtap/conf.d" -+ $(INSTALL_DATA) $(SRC_PATH)/scripts/systemtap/conf.d/qemu_kvm.conf "$(DESTDIR)$(qemu_datadir)/systemtap/conf.d/" - - .PHONY: ctags - ctags: -diff --git a/README.systemtap b/README.systemtap -new file mode 100644 -index 0000000000..ad913fc990 ---- /dev/null -+++ b/README.systemtap -@@ -0,0 +1,43 @@ -+QEMU tracing using systemtap-initscript -+--------------------------------------- -+ -+You can capture QEMU trace data all the time using systemtap-initscript. This -+uses SystemTap's flight recorder mode to trace all running guests to a -+fixed-size buffer on the host. Old trace entries are overwritten by new -+entries when the buffer size wraps. -+ -+1. Install the systemtap-initscript package: -+ # yum install systemtap-initscript -+ -+2. Install the systemtap scripts and the conf file: -+ # cp /usr/share/qemu-kvm/systemtap/script.d/qemu_kvm.stp /etc/systemtap/script.d/ -+ # cp /usr/share/qemu-kvm/systemtap/conf.d/qemu_kvm.conf /etc/systemtap/conf.d/ -+ -+The set of trace events to enable is given in qemu_kvm.stp. This SystemTap -+script can be customized to add or remove trace events provided in -+/usr/share/systemtap/tapset/qemu-kvm-simpletrace.stp. -+ -+SystemTap customizations can be made to qemu_kvm.conf to control the flight -+recorder buffer size and whether to store traces in memory only or disk too. -+See stap(1) for option documentation. -+ -+3. Start the systemtap service. -+ # service systemtap start qemu_kvm -+ -+4. Make the service start at boot time. -+ # chkconfig systemtap on -+ -+5. Confirm that the service works. -+ # service systemtap status qemu_kvm -+ qemu_kvm is running... -+ -+When you want to inspect the trace buffer, perform the following steps: -+ -+1. Dump the trace buffer. -+ # staprun -A qemu_kvm >/tmp/trace.log -+ -+2. Start the systemtap service because the preceding step stops the service. -+ # service systemtap start qemu_kvm -+ -+3. Translate the trace record to readable format. -+ # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log -diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf -new file mode 100644 -index 0000000000..372d8160a4 ---- /dev/null -+++ b/scripts/systemtap/conf.d/qemu_kvm.conf -@@ -0,0 +1,4 @@ -+# Force load uprobes (see BZ#1118352) -+stap -e 'probe process("/usr/libexec/qemu-kvm").function("main") { printf("") }' -c true -+ -+qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes -diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp -new file mode 100644 -index 0000000000..c04abf9449 ---- /dev/null -+++ b/scripts/systemtap/script.d/qemu_kvm.stp -@@ -0,0 +1 @@ -+probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} --- -2.21.0 - diff --git a/SOURCES/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/SOURCES/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch new file mode 100644 index 0000000..9eda7c3 --- /dev/null +++ b/SOURCES/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -0,0 +1,126 @@ +From 0d3fc0b4c5773c6cabb0a58c064475f76eb6ac1e Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 8 Jul 2020 08:35:50 +0200 +Subject: Use qemu-kvm in documentation instead of qemu-system- + +Patchwork-id: 62380 +O-Subject: [RHEV-7.1 qemu-kvm-rhev PATCHv4] Use qemu-kvm in documentation instead of qemu-system-i386 +Bugzilla: 1140620 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Markus Armbruster +RH-Acked-by: Stefan Hajnoczi + +From: Miroslav Rezanina + +We change the name and location of qemu-kvm binaries. Update documentation +to reflect this change. Only architectures available in RHEL are updated. + +Signed-off-by: Miroslav Rezanina + +Rebase notes (5.1.0 rc0): + - qemu-block-drivers.texi converted to qemu-block-drivers.rst (upstream) + +Rebase notes (5.2.0 rc0): + - rewrite patch to new docs structure +--- + docs/defs.rst.inc | 4 ++-- + docs/tools/qemu-trace-stap.rst | 14 +++++++------- + qemu-options.hx | 10 +++++----- + 3 files changed, 14 insertions(+), 14 deletions(-) + +diff --git a/docs/defs.rst.inc b/docs/defs.rst.inc +index 52d6454b93..d74dbdeca9 100644 +--- a/docs/defs.rst.inc ++++ b/docs/defs.rst.inc +@@ -9,7 +9,7 @@ + but the manpages will end up misrendered with following normal text + incorrectly in boldface. + +-.. |qemu_system| replace:: qemu-system-x86_64 +-.. |qemu_system_x86| replace:: qemu-system-x86_64 ++.. |qemu_system| replace:: qemu-kvm ++.. |qemu_system_x86| replace:: qemu-kvm + .. |I2C| replace:: I\ :sup:`2`\ C + .. |I2S| replace:: I\ :sup:`2`\ S +diff --git a/docs/tools/qemu-trace-stap.rst b/docs/tools/qemu-trace-stap.rst +index d53073b52b..9e93df084f 100644 +--- a/docs/tools/qemu-trace-stap.rst ++++ b/docs/tools/qemu-trace-stap.rst +@@ -46,19 +46,19 @@ The following commands are valid: + any of the listed names. If no *PATTERN* is given, the all possible + probes will be listed. + +- For example, to list all probes available in the ``qemu-system-x86_64`` ++ For example, to list all probes available in the ``qemu-kvm`` + binary: + + :: + +- $ qemu-trace-stap list qemu-system-x86_64 ++ $ qemu-trace-stap list qemu-kvm + + To filter the list to only cover probes related to QEMU's cryptographic + subsystem, in a binary outside ``$PATH`` + + :: + +- $ qemu-trace-stap list /opt/qemu/4.0.0/bin/qemu-system-x86_64 'qcrypto*' ++ $ qemu-trace-stap list /opt/qemu/4.0.0/bin/qemu-kvm 'qcrypto*' + + .. option:: run OPTIONS BINARY PATTERN... + +@@ -90,18 +90,18 @@ The following commands are valid: + Restrict the tracing session so that it only triggers for the process + identified by *PID*. + +- For example, to monitor all processes executing ``qemu-system-x86_64`` ++ For example, to monitor all processes executing ``qemu-kvm`` + as found on ``$PATH``, displaying all I/O related probes: + + :: + +- $ qemu-trace-stap run qemu-system-x86_64 'qio*' ++ $ qemu-trace-stap run qemu-kvm 'qio*' + + To monitor only the QEMU process with PID 1732 + + :: + +- $ qemu-trace-stap run --pid=1732 qemu-system-x86_64 'qio*' ++ $ qemu-trace-stap run --pid=1732 qemu-kvm 'qio*' + + To monitor QEMU processes running an alternative binary outside of + ``$PATH``, displaying verbose information about setup of the +@@ -109,7 +109,7 @@ The following commands are valid: + + :: + +- $ qemu-trace-stap -v run /opt/qemu/4.0.0/qemu-system-x86_64 'qio*' ++ $ qemu-trace-stap -v run /opt/qemu/4.0.0/qemu-kvm 'qio*' + + See also + -------- +diff --git a/qemu-options.hx b/qemu-options.hx +index ae2c6dbbfc..94c4a8dbaf 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -3150,11 +3150,11 @@ SRST + + :: + +- qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ +- -numa node,memdev=mem \ +- -chardev socket,id=chr0,path=/path/to/socket \ +- -netdev type=vhost-user,id=net0,chardev=chr0 \ +- -device virtio-net-pci,netdev=net0 ++ qemu-kvm -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ ++ -numa node,memdev=mem \ ++ -chardev socket,id=chr0,path=/path/to/socket \ ++ -netdev type=vhost-user,id=net0,chardev=chr0 \ ++ -device virtio-net-pci,netdev=net0 + + ``-netdev vhost-vdpa,vhostdev=/path/to/dev`` + Establish a vhost-vdpa netdev. +-- +2.27.0 + diff --git a/SOURCES/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/SOURCES/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch deleted file mode 100644 index ef83445..0000000 --- a/SOURCES/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ /dev/null @@ -1,118 +0,0 @@ -From 97ed62562b883c384346bfef3e1c7e379f03ccab Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Fri, 30 Nov 2018 09:11:03 +0100 -Subject: Use qemu-kvm in documentation instead of qemu-system- - -Patchwork-id: 62380 -O-Subject: [RHEV-7.1 qemu-kvm-rhev PATCHv4] Use qemu-kvm in documentation instead of qemu-system-i386 -Bugzilla: 1140620 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Markus Armbruster -RH-Acked-by: Stefan Hajnoczi - -From: Miroslav Rezanina - -We change the name and location of qemu-kvm binaries. Update documentation -to reflect this change. Only architectures available in RHEL are updated. - -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - docs/qemu-block-drivers.texi | 2 +- - docs/qemu-cpu-models.texi | 2 +- - qemu-doc.texi | 6 +++--- - qemu-options.hx | 16 ++++++++-------- - 4 files changed, 13 insertions(+), 13 deletions(-) - -diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi -index 2c7ea49c32..5d0afb3dee 100644 ---- a/docs/qemu-block-drivers.texi -+++ b/docs/qemu-block-drivers.texi -@@ -2,7 +2,7 @@ - QEMU block driver reference manual - @c man end - --@set qemu_system qemu-system-x86_64 -+@set qemu_system qemu-kvm - - @c man begin DESCRIPTION - -diff --git a/docs/qemu-cpu-models.texi b/docs/qemu-cpu-models.texi -index f88a1def0d..c82cf8fab7 100644 ---- a/docs/qemu-cpu-models.texi -+++ b/docs/qemu-cpu-models.texi -@@ -2,7 +2,7 @@ - QEMU / KVM CPU model configuration - @c man end - --@set qemu_system_x86 qemu-system-x86_64 -+@set qemu_system_x86 qemu-kvm - - @c man begin DESCRIPTION - -diff --git a/qemu-doc.texi b/qemu-doc.texi -index 3ddf5c0a68..d460f8d2c0 100644 ---- a/qemu-doc.texi -+++ b/qemu-doc.texi -@@ -11,8 +11,8 @@ - @paragraphindent 0 - @c %**end of header - --@set qemu_system qemu-system-x86_64 --@set qemu_system_x86 qemu-system-x86_64 -+@set qemu_system qemu-kvm -+@set qemu_system_x86 qemu-kvm - - @ifinfo - @direntry -@@ -1827,7 +1827,7 @@ Set the initial VGA graphic mode. The default is 800x600x32. - Set OpenBIOS variables in NVRAM, for example: - - @example --qemu-system-ppc -prom-env 'auto-boot?=false' \ -+qemu-kvm -prom-env 'auto-boot?=false' \ - -prom-env 'boot-device=hd:2,\yaboot' \ - -prom-env 'boot-args=conf=hd:2,\yaboot.conf' - @end example -diff --git a/qemu-options.hx b/qemu-options.hx -index fc17aca631..df1d27b6f2 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -2737,11 +2737,11 @@ be created for multiqueue vhost-user. - - Example: - @example --qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ -- -numa node,memdev=mem \ -- -chardev socket,id=chr0,path=/path/to/socket \ -- -netdev type=vhost-user,id=net0,chardev=chr0 \ -- -device virtio-net-pci,netdev=net0 -+qemu-kvm -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ -+ -numa node,memdev=mem \ -+ -chardev socket,id=chr0,path=/path/to/socket \ -+ -netdev type=vhost-user,id=net0,chardev=chr0 \ -+ -device virtio-net-pci,netdev=net0 - @end example - - @item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}] -@@ -3631,14 +3631,14 @@ ETEXI - - DEF("realtime", HAS_ARG, QEMU_OPTION_realtime, - "-realtime [mlock=on|off]\n" -- " run qemu with realtime features\n" -+ " run qemu-kvm with realtime features\n" - " mlock=on|off controls mlock support (default: on)\n", - QEMU_ARCH_ALL) - STEXI - @item -realtime mlock=on|off - @findex -realtime --Run qemu with realtime features. --mlocking qemu and guest memory can be enabled via @option{mlock=on} -+Run qemu-kvm with realtime features. -+mlocking qemu-kvm and guest memory can be enabled via @option{mlock=on} - (enabled by default). - ETEXI - --- -2.21.0 - diff --git a/SOURCES/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/SOURCES/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch new file mode 100644 index 0000000..6b60efc --- /dev/null +++ b/SOURCES/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -0,0 +1,66 @@ +From d95768c039a2bf6b68422f83a8d55dad41bd3181 Mon Sep 17 00:00:00 2001 +From: Fam Zheng +Date: Wed, 14 Jun 2017 15:37:01 +0200 +Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] + +RH-Author: Fam Zheng +Message-id: <20170614153701.14757-1-famz@redhat.com> +Patchwork-id: 75613 +O-Subject: [RHV-7.4 qemu-kvm-rhev PATCH v3] virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] +Bugzilla: 1378816 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +We need a fix for RHEL 7.4 and 7.3.z, but unfortunately upstream isn't +ready. If it were, the changes will be too invasive. To have an idea: + +https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg05400.html + +is an incomplete attempt to fix part of the issue, and the remaining +work unfortunately involve even more complex changes. + +As a band-aid, this partially reverts the effect of ef8875b +(virtio-scsi: Remove op blocker for dataplane, since v2.7). We cannot +simply revert that commit as a whole because we already shipped it in +qemu-kvm-rhev 7.3, since when, block jobs has been possible. We should +only block what has been broken. Also, faithfully reverting the above +commit means adding back the removed op blocker, but that is not enough, +because it still crashes when inserting media into an initially empty +scsi-cd. + +All in all, scsi-cd on virtio-scsi-dataplane has basically been unusable +unless the scsi-cd never enters an empty state, so, disable it +altogether. Otherwise it would be much more difficult to avoid +crashing. + +Signed-off-by: Fam Zheng +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + hw/scsi/virtio-scsi.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 51fd09522a..a35257c35a 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -896,6 +896,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, + AioContext *old_context; + int ret; + ++ /* XXX: Remove this check once block backend is capable of handling ++ * AioContext change upon eject/insert. ++ * s->ctx is NULL if ioeventfd is off, s->ctx is qemu_get_aio_context() if ++ * data plane is not used, both cases are safe for scsi-cd. */ ++ if (s->ctx && s->ctx != qemu_get_aio_context() && ++ object_dynamic_cast(OBJECT(dev), "scsi-cd")) { ++ error_setg(errp, "scsi-cd is not supported by data plane"); ++ return; ++ } + if (s->ctx && !s->dataplane_fenced) { + if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { + return; +-- +2.27.0 + diff --git a/SOURCES/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/SOURCES/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch new file mode 100644 index 0000000..e07746d --- /dev/null +++ b/SOURCES/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -0,0 +1,60 @@ +From 92bb62c47eab021f8dabecd09b5fbc1706e6a29c Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Wed, 6 Feb 2019 03:58:56 +0000 +Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts + +RH-Author: David Gibson +Message-id: <20190206035856.19058-1-dgibson@redhat.com> +Patchwork-id: 84246 +O-Subject: [RHELAV-8.0/rhel qemu-kvm PATCH] BZ1653590: Require at least 64kiB pages for downstream guests & hosts +Bugzilla: 1653590 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Serhii Popovych +RH-Acked-by: Thomas Huth + +Most current POWER guests require 64kiB page support, so that's the default +for the cap-hpt-max-pagesize option in qemu which limits available guest +page sizes. We warn if the value is set smaller than that, but don't +outright fail upstream, because we need to allow for the possibility of +guest (and/or host) kernels configured for 4kiB page sizes. + +Downstream, however, we simply don't support 4kiB pagesize configured +kernels in guest or host, so we can have qemu simply error out in this +situation. + +Testing: Attempted to start a guest with cap-hpt-max-page-size=4k and verified + it failed immediately with a qemu error + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr_caps.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c +index ed7c077a0d..48a8efe678 100644 +--- a/hw/ppc/spapr_caps.c ++++ b/hw/ppc/spapr_caps.c +@@ -332,12 +332,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, + static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, + uint8_t val, Error **errp) + { ++#if 0 /* disabled for RHEL */ + if (val < 12) { + error_setg(errp, "Require at least 4kiB hpt-max-page-size"); + return; + } else if (val < 16) { + warn_report("Many guests require at least 64kiB hpt-max-page-size"); + } ++#else /* Only page sizes >=64kiB supported for RHEL */ ++ if (val < 16) { ++ error_setg(errp, "Require at least 64kiB hpt-max-page-size"); ++ return; ++ } ++#endif + + spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); + } +-- +2.27.0 + diff --git a/SOURCES/0018-usb-xhci-Fix-PCI-capability-order.patch b/SOURCES/0018-usb-xhci-Fix-PCI-capability-order.patch deleted file mode 100644 index bc6146d..0000000 --- a/SOURCES/0018-usb-xhci-Fix-PCI-capability-order.patch +++ /dev/null @@ -1,96 +0,0 @@ -From b13a7d3527c5c91e7a50236de30a2244b8453911 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 5 May 2017 19:06:14 +0200 -Subject: usb-xhci: Fix PCI capability order - -RH-Author: Dr. David Alan Gilbert -Message-id: <20170505190614.15987-2-dgilbert@redhat.com> -Patchwork-id: 75038 -O-Subject: [RHEL-7.4 qemu-kvm-rhev PATCH 1/1] usb-xhci: Fix PCI capability order -Bugzilla: 1447874 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Gerd Hoffmann -RH-Acked-by: Juan Quintela - -From: "Dr. David Alan Gilbert" - -Upstream commit 1108b2f8a9 in 2.7.0 changed the order -of the PCI capability chain in the XHCI pci device in the case -where the device has the PCIe endpoint capability (i.e. only -older machine types, pc-i440fx-2.0 upstream, pc-i440fx-rhel7.0.0 -apparently for us). - -Changing the order breaks migration compatibility; fixing this -upstream would mean breaking the same case going from 2.7.0->current -that currently works 2.7.0->2.9.0 - so upstream it's a choice -of two breakages. - -Since we never released 2.7.0/2.8.0 we can fix this downstream. - -This reverts the order so that we create the capabilities in the -order: - PCIe - MSI - MSI-X - -The symptom is: -qemu-kvm: get_pci_config_device: Bad config data: i=0x71 read: a0 device: 0 cmask: ff wmask: 0 w1cmask:0 -qemu-kvm: Failed to load PCIDevice:config -qemu-kvm: Failed to load xhci:parent_obj -qemu-kvm: error while loading state for instance 0x0 of device '0000:00:0d.0/xhci' -qemu-kvm: load of migration failed: Invalid argument - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Miroslav Rezanina - --- -Rebase notes (2.9.0): -- Change in assert condition (upstream) - -(cherry picked from commit aad727a5ecde1ad4935eb8427604d4df5a1f1f35) -(cherry picked from commit 2dd7402227e77d748a7375233ac9e7feab244bda) - -Conflicts: - hw/usb/hcd-xhci.c - -(cherry picked from commit a42f86dc906cc7d2c16d02bf125ed76847b469cb) -(cherry picked from commit 992ab2e4f6e15d3e51bc716763aa8d6f45c6d29d) -Signed-off-by: Danilo C. L. de Paula ---- - hw/usb/hcd-xhci.c | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) - -diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 8fed2eedd6..d2b9744030 100644 ---- a/hw/usb/hcd-xhci.c -+++ b/hw/usb/hcd-xhci.c -@@ -3403,6 +3403,12 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) - xhci->max_pstreams_mask = 0; - } - -+ if (pci_bus_is_express(pci_get_bus(dev)) || -+ xhci_get_flag(xhci, XHCI_FLAG_FORCE_PCIE_ENDCAP)) { -+ ret = pcie_endpoint_cap_init(dev, 0xa0); -+ assert(ret > 0); -+ } -+ - if (xhci->msi != ON_OFF_AUTO_OFF) { - ret = msi_init(dev, 0x70, xhci->numintrs, true, false, &err); - /* Any error other than -ENOTSUP(board's MSI support is broken) -@@ -3451,12 +3457,6 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) - PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64, - &xhci->mem); - -- if (pci_bus_is_express(pci_get_bus(dev)) || -- xhci_get_flag(xhci, XHCI_FLAG_FORCE_PCIE_ENDCAP)) { -- ret = pcie_endpoint_cap_init(dev, 0xa0); -- assert(ret > 0); -- } -- - if (xhci->msix != ON_OFF_AUTO_OFF) { - /* TODO check for errors, and should fail when msix=on */ - msix_init(dev, xhci->numintrs, --- -2.21.0 - diff --git a/SOURCES/0019-compat-Update-hw_compat_rhel_8_5.patch b/SOURCES/0019-compat-Update-hw_compat_rhel_8_5.patch new file mode 100644 index 0000000..6d2b7c3 --- /dev/null +++ b/SOURCES/0019-compat-Update-hw_compat_rhel_8_5.patch @@ -0,0 +1,53 @@ +From a9b5da617c29f48199cbea08d6a1c083877dce10 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Mon, 15 Nov 2021 14:22:29 +0100 +Subject: compat: Update hw_compat_rhel_8_5 + +RH-Author: Laurent Vivier +RH-MergeRequest: 66: redhat: Update pseries-rhel8.5.0 machine type +RH-Commit: [1/2] 232f2ad2b29d250fbdb8fcea9d814704c575ba2b +RH-Bugzilla: 2022608 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Greg Kurz + +Add properties from hw_compat_6_1 as it already includes the ones from +hw_compat_6_0. Add a lately added property from 6.0 too. + +Signed-off-by: Laurent Vivier +-- +Rebase notes (6.2.0 rc3): +- Included compatc changes introduced in RC2 +--- + hw/core/machine.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 62febde5aa..736c765c30 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -38,7 +38,7 @@ + #include "hw/virtio/virtio-pci.h" + + /* +- * Mostly the same as hw_compat_6_0 ++ * Mostly the same as hw_compat_6_0 and hw_compat_6_1 + */ + GlobalProperty hw_compat_rhel_8_5[] = { + /* hw_compat_rhel_8_5 from hw_compat_6_0 */ +@@ -51,6 +51,12 @@ GlobalProperty hw_compat_rhel_8_5[] = { + { "e1000", "init-vet", "off" }, + /* hw_compat_rhel_8_5 from hw_compat_6_0 */ + { "e1000e", "init-vet", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "vhost-vsock-device", "seqpacket", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_1 */ ++ { "vhost-user-vsock-device", "seqpacket", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_1 */ ++ { "nvme-ns", "shared", "off" }, + }; + const size_t hw_compat_rhel_8_5_len = G_N_ELEMENTS(hw_compat_rhel_8_5); + +-- +2.27.0 + diff --git a/SOURCES/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/SOURCES/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch deleted file mode 100644 index e167b2e..0000000 --- a/SOURCES/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 3fab8f5e8a9e190c1ed6916ac13c7c4d65e874b7 Mon Sep 17 00:00:00 2001 -From: Fam Zheng -Date: Wed, 14 Jun 2017 15:37:01 +0200 -Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] - -RH-Author: Fam Zheng -Message-id: <20170614153701.14757-1-famz@redhat.com> -Patchwork-id: 75613 -O-Subject: [RHV-7.4 qemu-kvm-rhev PATCH v3] virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] -Bugzilla: 1378816 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz - -We need a fix for RHEL 7.4 and 7.3.z, but unfortunately upstream isn't -ready. If it were, the changes will be too invasive. To have an idea: - -https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg05400.html - -is an incomplete attempt to fix part of the issue, and the remaining -work unfortunately involve even more complex changes. - -As a band-aid, this partially reverts the effect of ef8875b -(virtio-scsi: Remove op blocker for dataplane, since v2.7). We cannot -simply revert that commit as a whole because we already shipped it in -qemu-kvm-rhev 7.3, since when, block jobs has been possible. We should -only block what has been broken. Also, faithfully reverting the above -commit means adding back the removed op blocker, but that is not enough, -because it still crashes when inserting media into an initially empty -scsi-cd. - -All in all, scsi-cd on virtio-scsi-dataplane has basically been unusable -unless the scsi-cd never enters an empty state, so, disable it -altogether. Otherwise it would be much more difficult to avoid -crashing. - -Signed-off-by: Fam Zheng -Signed-off-by: Miroslav Rezanina -(cherry picked from commit b0caf00bbc35c7d89e02999bdce86e1f867728e8) -(cherry picked from commit c9c4f117d8b507c2f86035c282d537c0a327364f) -(cherry picked from commit 5d586bb2543337f0ff172c6ce942dba3acbcedff) -Signed-off-by: Danilo C. L. de Paula ---- - hw/scsi/virtio-scsi.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index e8b2b64d09..54108c0056 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -808,6 +808,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, - SCSIDevice *sd = SCSI_DEVICE(dev); - int ret; - -+ /* XXX: Remove this check once block backend is capable of handling -+ * AioContext change upon eject/insert. -+ * s->ctx is NULL if ioeventfd is off, s->ctx is qemu_get_aio_context() if -+ * data plane is not used, both cases are safe for scsi-cd. */ -+ if (s->ctx && s->ctx != qemu_get_aio_context() && -+ object_dynamic_cast(OBJECT(dev), "scsi-cd")) { -+ error_setg(errp, "scsi-cd is not supported by data plane"); -+ return; -+ } - if (s->ctx && !s->dataplane_fenced) { - if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { - return; --- -2.21.0 - diff --git a/SOURCES/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/SOURCES/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch deleted file mode 100644 index b3350da..0000000 --- a/SOURCES/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 148e9e80a3a430615b552075082fad22d007d851 Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Wed, 6 Feb 2019 03:58:56 +0000 -Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts - -RH-Author: David Gibson -Message-id: <20190206035856.19058-1-dgibson@redhat.com> -Patchwork-id: 84246 -O-Subject: [RHELAV-8.0/rhel qemu-kvm PATCH] BZ1653590: Require at least 64kiB pages for downstream guests & hosts -Bugzilla: 1653590 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Serhii Popovych -RH-Acked-by: Thomas Huth - -Most current POWER guests require 64kiB page support, so that's the default -for the cap-hpt-max-pagesize option in qemu which limits available guest -page sizes. We warn if the value is set smaller than that, but don't -outright fail upstream, because we need to allow for the possibility of -guest (and/or host) kernels configured for 4kiB page sizes. - -Downstream, however, we simply don't support 4kiB pagesize configured -kernels in guest or host, so we can have qemu simply error out in this -situation. - -Testing: Attempted to start a guest with cap-hpt-max-page-size=4k and verified - it failed immediately with a qemu error - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr_caps.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index 481dfd2a27..805f38533e 100644 ---- a/hw/ppc/spapr_caps.c -+++ b/hw/ppc/spapr_caps.c -@@ -351,12 +351,19 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, - static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, - uint8_t val, Error **errp) - { -+#if 0 /* disabled for RHEL */ - if (val < 12) { - error_setg(errp, "Require at least 4kiB hpt-max-page-size"); - return; - } else if (val < 16) { - warn_report("Many guests require at least 64kiB hpt-max-page-size"); - } -+#else /* Only page sizes >=64kiB supported for RHEL */ -+ if (val < 16) { -+ error_setg(errp, "Require at least 64kiB hpt-max-page-size"); -+ return; -+ } -+#endif - - spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); - } --- -2.21.0 - diff --git a/SOURCES/0020-redhat-Update-pseries-rhel8.5.0-machine-type.patch b/SOURCES/0020-redhat-Update-pseries-rhel8.5.0-machine-type.patch new file mode 100644 index 0000000..af8e9dd --- /dev/null +++ b/SOURCES/0020-redhat-Update-pseries-rhel8.5.0-machine-type.patch @@ -0,0 +1,43 @@ +From 82358c35f04f026820b3907069a6c19cd95b654d Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Mon, 15 Nov 2021 14:25:33 +0100 +Subject: redhat: Update pseries-rhel8.5.0 machine type + +RH-Author: Laurent Vivier +RH-MergeRequest: 66: redhat: Update pseries-rhel8.5.0 machine type +RH-Commit: [2/2] 36f7ad1ea56baaaecb139875ad0a90a6470196be +RH-Bugzilla: 2022608 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Greg Kurz +` +We don't introduce a new machine type for rhel8.6.0 but we need +to keep compatibility with rhel8.5.0 machine type. + +Signed-off-by: Laurent Vivier +--- + hw/ppc/spapr.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index cace86028d..2f27888d8a 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -5177,10 +5177,14 @@ static void spapr_machine_rhel_default_class_options(MachineClass *mc) + + static void spapr_machine_rhel850_class_options(MachineClass *mc) + { ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ + /* The default machine type must apply the RHEL specific defaults */ + spapr_machine_rhel_default_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, + hw_compat_rhel_8_5_len); ++ smc->pre_6_2_numa_affinity = true; ++ mc->smp_props.prefer_sockets = true; + } + + DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", true); +-- +2.27.0 + diff --git a/SOURCES/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch b/SOURCES/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch deleted file mode 100644 index a2a800b..0000000 --- a/SOURCES/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch +++ /dev/null @@ -1,61 +0,0 @@ -From ab9ebc29bb9bb142e73a160750a451d40bfe9746 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Mon, 16 Sep 2019 17:07:00 +0100 -Subject: Using ip_deq after m_free might read pointers from an allocation - reuse. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Philippe Mathieu-Daudé -Message-id: <20190916170700.647-2-philmd@redhat.com> -Patchwork-id: 90470 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] Using ip_deq after m_free might read pointers from an allocation reuse. -Bugzilla: 1749737 -RH-Acked-by: Danilo de Paula -RH-Acked-by: John Snow - -From: Samuel Thibault - -This would be difficult to exploit, but that is still related with -CVE-2019-14378 which generates fragmented IP packets that would trigger this -issue and at least produce a DoS. - -Signed-off-by: Samuel Thibault -(cherry picked from libslirp commit c59279437eda91841b9d26079c70b8a540d41204) -Signed-off-by: Philippe Mathieu-Daudé - -Signed-off-by: Danilo C. L. de Paula ---- - slirp/src/ip_input.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c -index 8c75d91495..df1c846ade 100644 ---- a/slirp/src/ip_input.c -+++ b/slirp/src/ip_input.c -@@ -292,6 +292,7 @@ static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) - */ - while (q != (struct ipasfrag *)&fp->frag_link && - ip->ip_off + ip->ip_len > q->ipf_off) { -+ struct ipasfrag *prev; - i = (ip->ip_off + ip->ip_len) - q->ipf_off; - if (i < q->ipf_len) { - q->ipf_len -= i; -@@ -299,9 +300,11 @@ static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) - m_adj(dtom(slirp, q), i); - break; - } -+ prev = q; - q = q->ipf_next; -- m_free(dtom(slirp, q->ipf_prev)); -- ip_deq(q->ipf_prev); -+ ip_deq(prev); -+ m_free(dtom(slirp, prev)); -+ - } - - insert: --- -2.21.0 - diff --git a/SOURCES/0021-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch b/SOURCES/0021-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch new file mode 100644 index 0000000..3bcf4e0 --- /dev/null +++ b/SOURCES/0021-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch @@ -0,0 +1,51 @@ +From ce73e939b993cc6be170cdb5d3f2068270593f2b Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 16 Nov 2021 17:03:07 +0100 +Subject: redhat: virt-rhel8.5.0: Update machine type compatibility for QEMU + 6.2.0 update + +RH-Author: Eric Auger +RH-MergeRequest: 75: redhat: virt-rhel8.5.0: Update machine type compatibility for QEMU 6.2.0 update +RH-Commit: [21/21] f027d13654944e3d34e3356affe7af952eec2bed +RH-Bugzilla: 2022607 +RH-Acked-by: Gavin Shan +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Cornelia Huck +RH-Acked-by: Laurent Vivier + +To keep compatibility with 8.5-AV machine type we need to +turn few new options on by default: +smp_props.prefer_sockets, no_cpu_topology, no_tcg_its + +TESTED: migrate from rhel-av-8.5.0 to rhel-8.6.0 and vice-versa +with upstream fix: 33a0c404fb hw/intc/arm_gicv3_its: Revert version +increments in vmstate_its + +Signed-off-by: Eric Auger +Signed-off-by: Miroslav Rezanina +--- + hw/arm/virt.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index c77d26ab13..e8941afd01 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3225,8 +3225,13 @@ type_init(rhel_machine_init); + + static void rhel850_virt_options(MachineClass *mc) + { ++ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); ++ + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ mc->smp_props.prefer_sockets = true; ++ vmc->no_cpu_topology = true; ++ vmc->no_tcg_its = true; + } + DEFINE_RHEL_MACHINE_AS_LATEST(8, 5, 0) + +-- +2.27.0 + diff --git a/SOURCES/0022-Fix-virtio-net-pci-vectors-compat.patch b/SOURCES/0022-Fix-virtio-net-pci-vectors-compat.patch new file mode 100644 index 0000000..b484ea1 --- /dev/null +++ b/SOURCES/0022-Fix-virtio-net-pci-vectors-compat.patch @@ -0,0 +1,45 @@ +From f9643b6934657292aae0b830627b1e5f9b8cbaa1 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Tue, 19 Oct 2021 13:17:06 -0400 +Subject: Fix virtio-net-pci* "vectors" compat + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 77: 8.6/6.2 mt fixes +RH-Commit: [21/23] 8ad581932275d2698a99f31bec40b14f1dbd3d2e +RH-Bugzilla: 2026443 +RH-Acked-by: Miroslav Rezanina + +hw_compat_rhel_8_4 has an issue: it affects only "virtio-net-pci" +but not "virtio-net-pci-transitional" and +"virtio-net-pci-non-transitional". The solution is to use the +"virtio-net-pci-base" type in compat_props. + +An equivalent fix will be submitted for hw_compat_5_2 upstream. + +Signed-off-by: Eduardo Habkost +(cherry picked from commit d45823ab0d0138b2fbaf2ed1e1896d2052f3ccb3) +Signed-off-by: Miroslav Rezanina +--- + hw/core/machine.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 736c765c30..024b025fc2 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -71,7 +71,11 @@ GlobalProperty hw_compat_rhel_8_4[] = { + /* hw_compat_rhel_8_4 from hw_compat_5_2 */ + { "virtio-blk-device", "report-discard-granularity", "off" }, + /* hw_compat_rhel_8_4 from hw_compat_5_2 */ +- { "virtio-net-pci", "vectors", "3"}, ++ /* ++ * Upstream incorrectly had "virtio-net-pci" instead of "virtio-net-pci-base", ++ * (https://bugzilla.redhat.com/show_bug.cgi?id=1999141) ++ */ ++ { "virtio-net-pci-base", "vectors", "3"}, + }; + const size_t hw_compat_rhel_8_4_len = G_N_ELEMENTS(hw_compat_rhel_8_4); + +-- +2.27.0 + diff --git a/SOURCES/0023-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch b/SOURCES/0023-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch new file mode 100644 index 0000000..8572d61 --- /dev/null +++ b/SOURCES/0023-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch @@ -0,0 +1,73 @@ +From 7ad8814e583dcc7dc23e3e8398570243b8f176a1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 23 Nov 2021 17:57:42 +0000 +Subject: x86/rhel machine types: Add pc_rhel_8_5_compat + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 77: 8.6/6.2 mt fixes +RH-Commit: [22/23] 8bf555c5d78f344b97ffd5c888c7a7bed592d9d0 +RH-Bugzilla: 2026443 +RH-Acked-by: Miroslav Rezanina + +Add pc_rhel_8_5_compat as the merge of pc_compat_6_1 and pc_compat_6_0 +(since 8.5 was based on 6.0). + +Note, x-keep-pci-slot-hpc flipped back and forward, leaving it out +looks like it leaves us with the original. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Miroslav Rezanina +--- + hw/i386/pc.c | 21 +++++++++++++++++++++ + include/hw/i386/pc.h | 3 +++ + 2 files changed, 24 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index e8109954ca..4c08a1971c 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -387,6 +387,27 @@ GlobalProperty pc_rhel_compat[] = { + }; + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_8_5_compat[] = { ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "family", "6" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "model", "6" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "stepping", "3" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { TYPE_X86_CPU, "x-vendor-cpuid-only", "off" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" }, ++ ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-build", "0x1bbc" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-major", "0x0006" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-minor", "0x0001" }, ++}; ++const size_t pc_rhel_8_5_compat_len = G_N_ELEMENTS(pc_rhel_8_5_compat); ++ + GlobalProperty pc_rhel_8_4_compat[] = { + /* pc_rhel_8_4_compat from pc_compat_5_2 */ + { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" }, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index d0544ee119..9e8bfb69f8 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -286,6 +286,9 @@ extern const size_t pc_compat_1_4_len; + extern GlobalProperty pc_rhel_compat[]; + extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_8_5_compat[]; ++extern const size_t pc_rhel_8_5_compat_len; ++ + extern GlobalProperty pc_rhel_8_4_compat[]; + extern const size_t pc_rhel_8_4_compat_len; + +-- +2.27.0 + diff --git a/SOURCES/0024-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch b/SOURCES/0024-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch new file mode 100644 index 0000000..4acfa88 --- /dev/null +++ b/SOURCES/0024-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch @@ -0,0 +1,54 @@ +From 7bd99eebadfdbea6a76585b526e7cab1ee8b1fde Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 23 Nov 2021 18:07:49 +0000 +Subject: x86/rhel machine types: Wire compat into q35 and i440fx + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 77: 8.6/6.2 mt fixes +RH-Commit: [23/23] fc3861aeccc943b434231193ef45ffbc0b3cf6c6 +RH-Bugzilla: 2026443 +RH-Acked-by: Miroslav Rezanina + +Wire the pc_rhel_8_5 compat data into both piix and q35 +to keep the existing machine types compatible. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Miroslav Rezanina +--- + hw/i386/pc_piix.c | 4 ++++ + hw/i386/pc_q35.c | 4 ++++ + 2 files changed, 8 insertions(+) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 2885edffe9..37fab00733 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1040,6 +1040,10 @@ static void pc_machine_rhel760_options(MachineClass *m) + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++ compat_props_add(m->compat_props, pc_rhel_8_5_compat, ++ pc_rhel_8_5_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_4, + hw_compat_rhel_8_4_len); + compat_props_add(m->compat_props, pc_rhel_8_4_compat, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index c67418b6a9..78876e1101 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -658,6 +658,10 @@ static void pc_q35_machine_rhel850_options(MachineClass *m) + m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.5.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++ compat_props_add(m->compat_props, pc_rhel_8_5_compat, ++ pc_rhel_8_5_compat_len); + } + + DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850, +-- +2.27.0 + diff --git a/SOURCES/0025-redhat-Add-s390x-machine-type-compatibility-handling.patch b/SOURCES/0025-redhat-Add-s390x-machine-type-compatibility-handling.patch new file mode 100644 index 0000000..1ae8a99 --- /dev/null +++ b/SOURCES/0025-redhat-Add-s390x-machine-type-compatibility-handling.patch @@ -0,0 +1,58 @@ +From 265a57f2955b7f0b65e3f57f89aa1ff2541d3f73 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 26 Nov 2021 09:37:11 +0100 +Subject: redhat: Add s390x machine type compatibility handling for the rebase + to v6.2 + +RH-Author: Thomas Huth +RH-MergeRequest: 80: Add s390x machine type compatibility handling for the rebase to v6.2 +RH-Commit: [26/26] c45cf594604f6dd23954696b9c84d2025e328d11 +RH-Bugzilla: 2022602 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cornelia Huck + +Add compatibility handling for the rhel8.5.0 machine type (and +recursively older, of course). + +Based on the following upstream commits: + + 463e50da8b - s390x/cpumodel: Bump up QEMU model to a stripped-down IBM z14 GA2 + 30e398f796 - s390x/cpumodel: Add more feature to gen16 default model + 4a0af2930a - machine: Prefer cores over sockets in smp parsing since 6.2 + 2b52619994 - machine: Move smp_prefer_sockets to struct SMPCompatProps + +Signed-off-by: Thomas Huth +--- + hw/s390x/s390-virtio-ccw.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 181856e6cf..cf13c457d6 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1105,11 +1105,21 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false); + + static void ccw_machine_rhel850_instance_options(MachineState *machine) + { ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_0 }; ++ ++ s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat); ++ ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_NNPA); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_VECTOR_PACKED_DECIMAL_ENH2); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_BEAR_ENH); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_RDP); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAI); + } + + static void ccw_machine_rhel850_class_options(MachineClass *mc) + { + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ mc->smp_props.prefer_sockets = true; + } + DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); + +-- +2.27.0 + diff --git a/SOURCES/kvm-ACPI-add-expected-files-for-HMAT-tests-acpihmat.patch b/SOURCES/kvm-ACPI-add-expected-files-for-HMAT-tests-acpihmat.patch deleted file mode 100644 index 7310f17..0000000 --- a/SOURCES/kvm-ACPI-add-expected-files-for-HMAT-tests-acpihmat.patch +++ /dev/null @@ -1,41 +0,0 @@ -From ff8529dcbf86b3a086d64dd630cf6a687603c571 Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Thu, 21 May 2020 23:56:55 +0100 -Subject: [PATCH 12/12] ACPI: add expected files for HMAT tests (acpihmat) - -RH-Author: plai@redhat.com -Message-id: <20200521235655.27141-12-plai@redhat.com> -Patchwork-id: 96742 -O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 11/11] ACPI: add expected files for HMAT tests (acpihmat) -Bugzilla: 1600217 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Igor Mammedov -RH-Acked-by: Eduardo Habkost - -From: "Michael S. Tsirkin" - -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 48892c6c8def6624a0ed57e2bd6c2a0a9878b973) -Signed-off-by: Paul Lai -Signed-off-by: Danilo C. L. de Paula ---- - tests/bios-tables-test-allowed-diff.h | 8 -------- - 1 file changed, 8 deletions(-) - -diff --git a/tests/bios-tables-test-allowed-diff.h b/tests/bios-tables-test-allowed-diff.h -index 3c9e0c9..dfb8523 100644 ---- a/tests/bios-tables-test-allowed-diff.h -+++ b/tests/bios-tables-test-allowed-diff.h -@@ -1,9 +1 @@ - /* List of comma-separated changed AML files to ignore */ --"tests/data/acpi/pc/APIC.acpihmat", --"tests/data/acpi/pc/SRAT.acpihmat", --"tests/data/acpi/pc/HMAT.acpihmat", --"tests/data/acpi/pc/DSDT.acpihmat", --"tests/data/acpi/q35/APIC.acpihmat", --"tests/data/acpi/q35/SRAT.acpihmat", --"tests/data/acpi/q35/HMAT.acpihmat", --"tests/data/acpi/q35/DSDT.acpihmat", --- -1.8.3.1 - diff --git a/SOURCES/kvm-Add-mtod_check.patch b/SOURCES/kvm-Add-mtod_check.patch deleted file mode 100644 index 0b2e710..0000000 --- a/SOURCES/kvm-Add-mtod_check.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 52bf635da30c75d0fdb0a3e7e7b9a2483ca033fc Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Thu, 29 Jul 2021 04:55:59 -0400 -Subject: [PATCH 05/14] Add mtod_check() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20210708082537.1550263-2-marcandre.lureau@redhat.com> -Patchwork-id: 101819 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 1/8] Add mtod_check() -Bugzilla: 1970819 1970835 1970843 1970853 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi - -From: Marc-André Lureau - -Recent security issues demonstrate the lack of safety care when casting -a mbuf to a particular structure type. At least, it should check that -the buffer is large enough. The following patches will make use of this -function. - -Signed-off-by: Marc-André Lureau - -(cherry picked from commit 93e645e72a056ec0b2c16e0299fc5c6b94e4ca17) -Signed-off-by: Marc-André Lureau -Signed-off-by: Miroslav Rezanina ---- - slirp/src/mbuf.c | 11 +++++++++++ - slirp/src/mbuf.h | 1 + - 2 files changed, 12 insertions(+) - -diff --git a/slirp/src/mbuf.c b/slirp/src/mbuf.c -index 4fd62282a9..6d0653ed3d 100644 ---- a/slirp/src/mbuf.c -+++ b/slirp/src/mbuf.c -@@ -222,3 +222,14 @@ struct mbuf *dtom(Slirp *slirp, void *dat) - - return (struct mbuf *)0; - } -+ -+void *mtod_check(struct mbuf *m, size_t len) -+{ -+ if (m->m_len >= len) { -+ return m->m_data; -+ } -+ -+ DEBUG_ERROR("mtod failed"); -+ -+ return NULL; -+} -diff --git a/slirp/src/mbuf.h b/slirp/src/mbuf.h -index 546e7852c5..2015e3232f 100644 ---- a/slirp/src/mbuf.h -+++ b/slirp/src/mbuf.h -@@ -118,6 +118,7 @@ void m_inc(struct mbuf *, int); - void m_adj(struct mbuf *, int); - int m_copy(struct mbuf *, struct mbuf *, int, int); - struct mbuf *dtom(Slirp *, void *); -+void *mtod_check(struct mbuf *, size_t len); - - static inline void ifs_init(struct mbuf *ifm) - { --- -2.27.0 - diff --git a/SOURCES/kvm-Compress-lines-for-immediate-return.patch b/SOURCES/kvm-Compress-lines-for-immediate-return.patch deleted file mode 100644 index aed5149..0000000 --- a/SOURCES/kvm-Compress-lines-for-immediate-return.patch +++ /dev/null @@ -1,242 +0,0 @@ -From 5cf6dd33456c4e7e2a8849f458ce234fb5bb290c Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 25 Jun 2021 17:41:03 -0400 -Subject: [PATCH 3/4] Compress lines for immediate return -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Kevin Wolf -Message-id: <20210625174104.44313-2-kwolf@redhat.com> -Patchwork-id: 101777 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 1/2] Compress lines for immediate return -Bugzilla: 1970912 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth - -From: Simran Singhal - -Compress two lines into a single line if immediate return statement is found. - -It also remove variables progress, val, data, ret and sock -as they are no longer needed. - -Remove space between function "mixer_load" and '(' to fix the -checkpatch.pl error:- -ERROR: space prohibited between function name and open parenthesis '(' - -Done using following coccinelle script: -@@ -local idexpression ret; -expression e; -@@ - --ret = -+return - e; --return ret; - -Signed-off-by: Simran Singhal -Reviewed-by: Stefan Hajnoczi -Message-Id: <20200401165314.GA3213@simran-Inspiron-5558> -[lv: in handle_aiocb_write_zeroes_unmap() move "int ret" inside the #ifdef] -Signed-off-by: Laurent Vivier -(cherry picked from commit b3ac2b94cdc939a90d5a22338ae507689e2cfab0) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/file-posix.c | 8 +++----- - block/nfs.c | 3 +-- - block/nvme.c | 4 +--- - block/vhdx.c | 3 +-- - hw/audio/ac97.c | 4 +--- - hw/audio/adlib.c | 5 +---- - hw/display/cirrus_vga.c | 4 +--- - migration/ram.c | 4 +--- - ui/gtk.c | 3 +-- - util/qemu-sockets.c | 5 +---- - 10 files changed, 12 insertions(+), 31 deletions(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 371572f1b0..837edcf027 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -1626,13 +1626,12 @@ static int handle_aiocb_write_zeroes_unmap(void *opaque) - { - RawPosixAIOData *aiocb = opaque; - BDRVRawState *s G_GNUC_UNUSED = aiocb->bs->opaque; -- int ret; - - /* First try to write zeros and unmap at the same time */ - - #ifdef CONFIG_FALLOCATE_PUNCH_HOLE -- ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, -- aiocb->aio_offset, aiocb->aio_nbytes); -+ int ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, -+ aiocb->aio_offset, aiocb->aio_nbytes); - if (ret != -ENOTSUP) { - return ret; - } -@@ -1640,8 +1639,7 @@ static int handle_aiocb_write_zeroes_unmap(void *opaque) - - /* If we couldn't manage to unmap while guaranteed that the area reads as - * all-zero afterwards, just write zeroes without unmapping */ -- ret = handle_aiocb_write_zeroes(aiocb); -- return ret; -+ return handle_aiocb_write_zeroes(aiocb); - } - - #ifndef HAVE_COPY_FILE_RANGE -diff --git a/block/nfs.c b/block/nfs.c -index 2393fbfe6b..18c0a73694 100644 ---- a/block/nfs.c -+++ b/block/nfs.c -@@ -623,8 +623,7 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags, - } - - bs->total_sectors = ret; -- ret = 0; -- return ret; -+ return 0; - } - - static QemuOptsList nfs_create_opts = { -diff --git a/block/nvme.c b/block/nvme.c -index 7b7c0cc5d6..eb2f54dd9d 100644 ---- a/block/nvme.c -+++ b/block/nvme.c -@@ -575,11 +575,9 @@ static bool nvme_poll_cb(void *opaque) - { - EventNotifier *e = opaque; - BDRVNVMeState *s = container_of(e, BDRVNVMeState, irq_notifier); -- bool progress = false; - - trace_nvme_poll_cb(s); -- progress = nvme_poll_queues(s); -- return progress; -+ return nvme_poll_queues(s); - } - - static int nvme_init(BlockDriverState *bs, const char *device, int namespace, -diff --git a/block/vhdx.c b/block/vhdx.c -index 21497f7318..a427e47f10 100644 ---- a/block/vhdx.c -+++ b/block/vhdx.c -@@ -411,8 +411,7 @@ int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, - if (ret < 0) { - return ret; - } -- ret = vhdx_update_header(bs, s, generate_data_write_guid, log_guid); -- return ret; -+ return vhdx_update_header(bs, s, generate_data_write_guid, log_guid); - } - - /* opens the specified header block from the VHDX file header section */ -diff --git a/hw/audio/ac97.c b/hw/audio/ac97.c -index a136b97f68..a2cfae52b3 100644 ---- a/hw/audio/ac97.c -+++ b/hw/audio/ac97.c -@@ -574,11 +574,9 @@ static uint32_t nam_readb (void *opaque, uint32_t addr) - static uint32_t nam_readw (void *opaque, uint32_t addr) - { - AC97LinkState *s = opaque; -- uint32_t val = ~0U; - uint32_t index = addr; - s->cas = 0; -- val = mixer_load (s, index); -- return val; -+ return mixer_load(s, index); - } - - static uint32_t nam_readl (void *opaque, uint32_t addr) -diff --git a/hw/audio/adlib.c b/hw/audio/adlib.c -index cb4178d861..5779d09815 100644 ---- a/hw/audio/adlib.c -+++ b/hw/audio/adlib.c -@@ -120,13 +120,10 @@ static void adlib_write(void *opaque, uint32_t nport, uint32_t val) - static uint32_t adlib_read(void *opaque, uint32_t nport) - { - AdlibState *s = opaque; -- uint8_t data; - int a = nport & 3; - - adlib_kill_timers (s); -- data = OPLRead (s->opl, a); -- -- return data; -+ return OPLRead (s->opl, a); - } - - static void timer_handler (void *opaque, int c, double interval_Sec) -diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index 93afa26fda..a52d3094b9 100644 ---- a/hw/display/cirrus_vga.c -+++ b/hw/display/cirrus_vga.c -@@ -2411,12 +2411,10 @@ static uint64_t cirrus_linear_bitblt_read(void *opaque, - unsigned size) - { - CirrusVGAState *s = opaque; -- uint32_t ret; - - /* XXX handle bitblt */ - (void)s; -- ret = 0xff; -- return ret; -+ return 0xff; - } - - static void cirrus_linear_bitblt_write(void *opaque, -diff --git a/migration/ram.c b/migration/ram.c -index 5344c7d59e..92c506d13c 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -3101,9 +3101,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms) - } - trace_ram_postcopy_send_discard_bitmap(); - -- ret = postcopy_each_ram_send_discard(ms); -- -- return ret; -+ return postcopy_each_ram_send_discard(ms); - } - - /** -diff --git a/ui/gtk.c b/ui/gtk.c -index 692ccc7bbb..e032e3c36f 100644 ---- a/ui/gtk.c -+++ b/ui/gtk.c -@@ -1649,8 +1649,7 @@ static GSList *gd_vc_menu_init(GtkDisplayState *s, VirtualConsole *vc, - G_CALLBACK(gd_menu_switch_vc), s); - gtk_menu_shell_append(GTK_MENU_SHELL(view_menu), vc->menu_item); - -- group = gtk_radio_menu_item_get_group(GTK_RADIO_MENU_ITEM(vc->menu_item)); -- return group; -+ return gtk_radio_menu_item_get_group(GTK_RADIO_MENU_ITEM(vc->menu_item)); - } - - #if defined(CONFIG_VTE) -diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c -index bcc06d0e01..86c48b9fa5 100644 ---- a/util/qemu-sockets.c -+++ b/util/qemu-sockets.c -@@ -765,15 +765,12 @@ static int vsock_connect_addr(const struct sockaddr_vm *svm, Error **errp) - static int vsock_connect_saddr(VsockSocketAddress *vaddr, Error **errp) - { - struct sockaddr_vm svm; -- int sock = -1; - - if (!vsock_parse_vaddr_to_sockaddr(vaddr, &svm, errp)) { - return -1; - } - -- sock = vsock_connect_addr(&svm, errp); -- -- return sock; -+ return vsock_connect_addr(&svm, errp); - } - - static int vsock_listen_saddr(VsockSocketAddress *vaddr, --- -2.27.0 - diff --git a/SOURCES/kvm-Don-t-leak-memory-when-reallocation-fails.patch b/SOURCES/kvm-Don-t-leak-memory-when-reallocation-fails.patch deleted file mode 100644 index 5747672..0000000 --- a/SOURCES/kvm-Don-t-leak-memory-when-reallocation-fails.patch +++ /dev/null @@ -1,58 +0,0 @@ -From bcb6107f98d7b1edf687d7afd552a4528b7e673b Mon Sep 17 00:00:00 2001 -From: jmaloy -Date: Tue, 12 May 2020 21:15:13 +0100 -Subject: [PATCH 2/7] Don't leak memory when reallocation fails. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: jmaloy -Message-id: <20200512211514.1398384-2-jmaloy@redhat.com> -Patchwork-id: 96412 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/2] Don't leak memory when reallocation fails. -Bugzilla: 1749737 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth -RH-Acked-by: Philippe Mathieu-Daudé - -From: Jindrich Novy - -Signed-off-by: Jindrich Novy -[ Marc-André - modified to use a temporary variable ] -Signed-off-by: Marc-André Lureau -(cherry picked from libslirp commit d171af3732a0610a25334b06b77fa547bd677918) -Signed-off-by: Jon Maloy - -Signed-off-by: Danilo C. L. de Paula ---- - slirp/src/sbuf.c | 11 +++++++---- - 1 file changed, 7 insertions(+), 4 deletions(-) - -diff --git a/slirp/src/sbuf.c b/slirp/src/sbuf.c -index abced48..0569c34 100644 ---- a/slirp/src/sbuf.c -+++ b/slirp/src/sbuf.c -@@ -39,13 +39,16 @@ void sbreserve(struct sbuf *sb, int size) - if (sb->sb_data) { - /* Already alloced, realloc if necessary */ - if (sb->sb_datalen != size) { -- sb->sb_wptr = sb->sb_rptr = sb->sb_data = -- (char *)realloc(sb->sb_data, size); -+ char *new = realloc(sb->sb_data, size); - sb->sb_cc = 0; -- if (sb->sb_wptr) -+ if (new) { -+ sb->sb_data = sb->sb_wptr = sb->sb_rptr = new; - sb->sb_datalen = size; -- else -+ } else { -+ free(sb->sb_data); -+ sb->sb_data = sb->sb_wptr = sb->sb_rptr = NULL; - sb->sb_datalen = 0; -+ } - } - } else { - sb->sb_wptr = sb->sb_rptr = sb->sb_data = (char *)malloc(size); --- -1.8.3.1 - diff --git a/SOURCES/kvm-Drop-bogus-IPv6-messages.patch b/SOURCES/kvm-Drop-bogus-IPv6-messages.patch deleted file mode 100644 index 4c30a3b..0000000 --- a/SOURCES/kvm-Drop-bogus-IPv6-messages.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 89c4300c97739aa3291f0322037bb65068e08d41 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 19 Jan 2021 23:34:33 -0500 -Subject: [PATCH] Drop bogus IPv6 messages -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20210119233433.1352902-2-jmaloy@redhat.com> -Patchwork-id: 100695 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] Drop bogus IPv6 messages -Bugzilla: 1918054 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Thomas Huth -RH-Acked-by: Philippe Mathieu-Daudé - -From: Ralf Haferkamp - -Drop IPv6 message shorter than what's mentioned in the payload -length header (+ the size of the IPv6 header). They're invalid an could -lead to data leakage in icmp6_send_echoreply(). - -(cherry picked from libslirp commit c7ede54cbd2e2b25385325600958ba0124e31cc0) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - slirp/src/ip6_input.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c -index d9d2b7e9cd4..0f2b17853ad 100644 ---- a/slirp/src/ip6_input.c -+++ b/slirp/src/ip6_input.c -@@ -49,6 +49,13 @@ void ip6_input(struct mbuf *m) - goto bad; - } - -+ // Check if the message size is big enough to hold what's -+ // set in the payload length header. If not this is an invalid -+ // packet -+ if (m->m_len < ntohs(ip6->ip_pl) + sizeof(struct ip6)) { -+ goto bad; -+ } -+ - /* check ip_ttl for a correct ICMP reply */ - if (ip6->ip_hl == 0) { - icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); --- -2.27.0 - diff --git a/SOURCES/kvm-Enable-SGX-RH-Only.patch b/SOURCES/kvm-Enable-SGX-RH-Only.patch new file mode 100644 index 0000000..efc8cac --- /dev/null +++ b/SOURCES/kvm-Enable-SGX-RH-Only.patch @@ -0,0 +1,28 @@ +From db6e042fe4fdc1a1bbf562a46b15d4d8e33e2fa6 Mon Sep 17 00:00:00 2001 +From: Paul Lai +Date: Tue, 25 Jan 2022 15:16:22 -0500 +Subject: [PATCH 4/7] Enable SGX -- RH Only + +RH-Author: Paul Lai +RH-MergeRequest: 111: numa: Enable numa for SGX EPC sections +RH-Commit: [4/5] cea874f29984897ef1232fb7749c13203c888034 +RH-Bugzilla: 1518984 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck +--- + configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +index ddf036f042..fdbbdf9742 100644 +--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -102,3 +102,4 @@ CONFIG_TPM_CRB=y + CONFIG_TPM_TIS_ISA=y + CONFIG_TPM_EMULATOR=y + CONFIG_TPM_PASSTHROUGH=y ++CONFIG_SGX=y +-- +2.27.0 + diff --git a/SOURCES/kvm-Fix-DHCP-broken-in-libslirp-v4.6.0.patch b/SOURCES/kvm-Fix-DHCP-broken-in-libslirp-v4.6.0.patch deleted file mode 100644 index 2dd4457..0000000 --- a/SOURCES/kvm-Fix-DHCP-broken-in-libslirp-v4.6.0.patch +++ /dev/null @@ -1,59 +0,0 @@ -From d0c668aa0ad255c3598267816154874541ac2943 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Thu, 29 Jul 2021 04:56:42 -0400 -Subject: [PATCH 12/14] Fix "DHCP broken in libslirp v4.6.0" -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20210708082537.1550263-9-marcandre.lureau@redhat.com> -Patchwork-id: 101824 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 8/8] Fix "DHCP broken in libslirp v4.6.0" -Bugzilla: 1970819 1970835 1970843 1970853 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi - -From: Akihiro Suda - -Fix issue 48 - -Signed-off-by: Akihiro Suda - -(cherry picked from commit c9f314f6e315a5518432761fea864196a290f799) -[ minor conflict fix due to indentation change ] -Signed-off-by: Marc-André Lureau -Signed-off-by: Miroslav Rezanina ---- - slirp/src/bootp.c | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) - -diff --git a/slirp/src/bootp.c b/slirp/src/bootp.c -index 5789187166..3e4af075f1 100644 ---- a/slirp/src/bootp.c -+++ b/slirp/src/bootp.c -@@ -354,14 +354,14 @@ static void bootp_reply(Slirp *slirp, - q += sizeof(nak_msg) - 1; - } - assert(q < end); -- *q = --RFC1533_END --; -+ *q = RFC1533_END; - --daddr.sin_addr.s_addr = 0xffffffffu; -+ daddr.sin_addr.s_addr = 0xffffffffu; - --m->m_len = sizeof(struct bootp_t) - sizeof(struct ip) - sizeof(struct udphdr); --udp_output(NULL, m, &saddr, &daddr, IPTOS_LOWDELAY); -+ assert ((q - rbp->bp_vend + 1) <= DHCP_OPT_LEN); -+ -+ m->m_len = sizeof(struct bootp_t) + (q - rbp->bp_vend + 1) - sizeof(struct ip) - sizeof(struct udphdr); -+ udp_output(NULL, m, &saddr, &daddr, IPTOS_LOWDELAY); - } - - void bootp_input(struct mbuf *m) --- -2.27.0 - diff --git a/SOURCES/kvm-Fix-use-afte-free-in-ip_reass-CVE-2020-1983.patch b/SOURCES/kvm-Fix-use-afte-free-in-ip_reass-CVE-2020-1983.patch deleted file mode 100644 index 535c3af..0000000 --- a/SOURCES/kvm-Fix-use-afte-free-in-ip_reass-CVE-2020-1983.patch +++ /dev/null @@ -1,60 +0,0 @@ -From a33ea192428d9c9307f1140f3e25631a6ef7657c Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Sat, 20 Jun 2020 15:02:59 -0400 -Subject: [PATCH 12/12] Fix use-afte-free in ip_reass() (CVE-2020-1983) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20200620150259.3352467-2-jmaloy@redhat.com> -Patchwork-id: 97678 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 1/1] Fix use-afte-free in ip_reass() (CVE-2020-1983) -Bugzilla: 1838070 -RH-Acked-by: Stefan Hajnoczi - -From: Marc-André Lureau - -The q pointer is updated when the mbuf data is moved from m_dat to -m_ext. - -m_ext buffer may also be realloc()'ed and moved during m_cat(): -q should also be updated in this case. - -Reported-by: Aviv Sasson -Signed-off-by: Marc-André Lureau -Reviewed-by: Samuel Thibault - -(cherry picked from libslirp commit 9bd6c5913271eabcb7768a58197ed3301fe19f2d) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - slirp/src/ip_input.c | 5 ++--- - 1 file changed, 2 insertions(+), 3 deletions(-) - -diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c -index df1c846ade..0f5d522ec1 100644 ---- a/slirp/src/ip_input.c -+++ b/slirp/src/ip_input.c -@@ -329,7 +329,7 @@ insert: - q = fp->frag_link.next; - m = dtom(slirp, q); - -- int was_ext = m->m_flags & M_EXT; -+ int delta = (char *)q - (m->m_flags & M_EXT ? m->m_ext : m->m_dat); - - q = (struct ipasfrag *)q->ipf_next; - while (q != (struct ipasfrag *)&fp->frag_link) { -@@ -353,8 +353,7 @@ insert: - * the old buffer (in the mbuf), so we must point ip - * into the new buffer. - */ -- if (!was_ext && m->m_flags & M_EXT) { -- int delta = (char *)q - m->m_dat; -+ if (m->m_flags & M_EXT) { - q = (struct ipasfrag *)(m->m_ext + delta); - } - --- -2.27.0 - diff --git a/SOURCES/kvm-MAINTAINERS-fix-qcow2-bitmap.c-under-Dirty-Bitmaps-h.patch b/SOURCES/kvm-MAINTAINERS-fix-qcow2-bitmap.c-under-Dirty-Bitmaps-h.patch deleted file mode 100644 index dce89d9..0000000 --- a/SOURCES/kvm-MAINTAINERS-fix-qcow2-bitmap.c-under-Dirty-Bitmaps-h.patch +++ /dev/null @@ -1,55 +0,0 @@ -From e3bec8c83459a68ae0c08e2ae0f1dbef24872d59 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Tue, 2 Jun 2020 02:34:09 +0100 -Subject: [PATCH 04/26] MAINTAINERS: fix qcow2-bitmap.c under Dirty Bitmaps - header - -RH-Author: Eric Blake -Message-id: <20200602023420.2133649-2-eblake@redhat.com> -Patchwork-id: 97068 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 01/12] MAINTAINERS: fix qcow2-bitmap.c under Dirty Bitmaps header -Bugzilla: 1779893 1779904 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -From: Vladimir Sementsov-Ogievskiy - -Somehow I wrote not full path to the file. Fix that. - -Also, while being here, rearrange entries, so that includes go first, -then block, than migration, than util. - -Fixes: 052db8e71444d -Signed-off-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit 00637c6b0b67694127cc01dd75f3626da23acdaa) -Signed-off-by: Eric Blake -Signed-off-by: Danilo C. L. de Paula ---- - MAINTAINERS | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/MAINTAINERS b/MAINTAINERS -index d1b3e26..3a81ac9 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -1873,12 +1873,12 @@ M: John Snow - R: Vladimir Sementsov-Ogievskiy - L: qemu-block@nongnu.org - S: Supported --F: util/hbitmap.c --F: block/dirty-bitmap.c - F: include/qemu/hbitmap.h - F: include/block/dirty-bitmap.h --F: qcow2-bitmap.c -+F: block/dirty-bitmap.c -+F: block/qcow2-bitmap.c - F: migration/block-dirty-bitmap.c -+F: util/hbitmap.c - F: tests/test-hbitmap.c - F: docs/interop/bitmaps.rst - T: git https://github.com/jnsnow/qemu.git bitmaps --- -1.8.3.1 - diff --git a/SOURCES/kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch b/SOURCES/kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch deleted file mode 100644 index 1435017..0000000 --- a/SOURCES/kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 481357ea8ae32b6894860c296cf6a2898260195f Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Fri, 17 Jan 2020 13:18:27 +0100 -Subject: [PATCH 4/4] RHEL: hw/i386: disable nested PERF_GLOBAL_CTRL MSR - support - -RH-Author: Paolo Bonzini -Message-id: <20200117131827.20361-1-pbonzini@redhat.com> -Patchwork-id: 93405 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v3] RHEL: hw/i386: disable nested PERF_GLOBAL_CTRL MSR support -Bugzilla: 1559846 -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Miroslav Rezanina - -BZ: 1559846 -BRANCH: rhel-av-8.2.0 -BREW: 25775160 -UPSTREAM: RHEL only - -Nested PERF_GLOBAL_CTRL support is not present in the 8.2 kernel. Drop the -features via compat properties, they will be moved to 8.2 machine type compat -properties in the 8.3 timeframe. - -Signed-off-by: Paolo Bonzini ---- - No change, for v2 I mistakenly wrote "origin/rhel-av-8.2.0" as the - branch. :( - - hw/i386/pc.c | 2 ++ - 1 file changed, 2 insertions(+) - -Signed-off-by: Miroslav Rezanina ---- - hw/i386/pc.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 61e70e4..73a0f11 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -351,6 +351,8 @@ const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); - GlobalProperty pc_rhel_compat[] = { - { TYPE_X86_CPU, "host-phys-bits", "on" }, - { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, -+ { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, -+ { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, - /* bz 1508330 */ - { "vfio-pci", "x-no-geforce-quirks", "on" }, - }; --- -1.8.3.1 - diff --git a/SOURCES/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch b/SOURCES/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch deleted file mode 100644 index d717ae2..0000000 --- a/SOURCES/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch +++ /dev/null @@ -1,115 +0,0 @@ -From c477581ccc6962651d4d6c702a6c3e2fcc5e4205 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 2 Jan 2020 11:56:51 +0000 -Subject: [PATCH 2/2] kvm: Reallocate dirty_bmap when we change a slot - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200102115651.140177-1-dgilbert@redhat.com> -Patchwork-id: 93256 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] kvm: Reallocate dirty_bmap when we change a slot -Bugzilla: 1772774 -RH-Acked-by: Peter Xu -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Laszlo Ersek - -From: "Dr. David Alan Gilbert" - -bz: https://bugzilla.redhat.com/show_bug.cgi?id=1772774 -brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=25575691 -branch: rhel-av-8.2.0 - -kvm_set_phys_mem can be called to reallocate a slot by something the -guest does (e.g. writing to PAM and other chipset registers). -This can happen in the middle of a migration, and if we're unlucky -it can now happen between the split 'sync' and 'clear'; the clear -asserts if there's no bmap to clear. Recreate the bmap whenever -we change the slot, keeping the clear path happy. - -Typically this is triggered by the guest rebooting during a migrate. - -Corresponds to: -https://bugzilla.redhat.com/show_bug.cgi?id=1772774 -https://bugzilla.redhat.com/show_bug.cgi?id=1771032 - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Peter Xu -(cherry picked from commit 9b3a31c745b61758aaa5466a3a9fc0526d409188) -Signed-off-by: Danilo C. L. de Paula ---- - accel/kvm/kvm-all.c | 44 +++++++++++++++++++++++++++++--------------- - 1 file changed, 29 insertions(+), 15 deletions(-) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index dc3ed7f..5007bda 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -518,6 +518,27 @@ static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section, - - #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1)) - -+/* Allocate the dirty bitmap for a slot */ -+static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem) -+{ -+ /* -+ * XXX bad kernel interface alert -+ * For dirty bitmap, kernel allocates array of size aligned to -+ * bits-per-long. But for case when the kernel is 64bits and -+ * the userspace is 32bits, userspace can't align to the same -+ * bits-per-long, since sizeof(long) is different between kernel -+ * and user space. This way, userspace will provide buffer which -+ * may be 4 bytes less than the kernel will use, resulting in -+ * userspace memory corruption (which is not detectable by valgrind -+ * too, in most cases). -+ * So for now, let's align to 64 instead of HOST_LONG_BITS here, in -+ * a hope that sizeof(long) won't become >8 any time soon. -+ */ -+ hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), -+ /*HOST_LONG_BITS*/ 64) / 8; -+ mem->dirty_bmap = g_malloc0(bitmap_size); -+} -+ - /** - * kvm_physical_sync_dirty_bitmap - Sync dirty bitmap from kernel space - * -@@ -550,23 +571,9 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, - goto out; - } - -- /* XXX bad kernel interface alert -- * For dirty bitmap, kernel allocates array of size aligned to -- * bits-per-long. But for case when the kernel is 64bits and -- * the userspace is 32bits, userspace can't align to the same -- * bits-per-long, since sizeof(long) is different between kernel -- * and user space. This way, userspace will provide buffer which -- * may be 4 bytes less than the kernel will use, resulting in -- * userspace memory corruption (which is not detectable by valgrind -- * too, in most cases). -- * So for now, let's align to 64 instead of HOST_LONG_BITS here, in -- * a hope that sizeof(long) won't become >8 any time soon. -- */ - if (!mem->dirty_bmap) { -- hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), -- /*HOST_LONG_BITS*/ 64) / 8; - /* Allocate on the first log_sync, once and for all */ -- mem->dirty_bmap = g_malloc0(bitmap_size); -+ kvm_memslot_init_dirty_bitmap(mem); - } - - d.dirty_bitmap = mem->dirty_bmap; -@@ -1067,6 +1074,13 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, - mem->ram = ram; - mem->flags = kvm_mem_flags(mr); - -+ if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { -+ /* -+ * Reallocate the bmap; it means it doesn't disappear in -+ * middle of a migrate. -+ */ -+ kvm_memslot_init_dirty_bitmap(mem); -+ } - err = kvm_set_user_memory_region(kml, mem, true); - if (err) { - fprintf(stderr, "%s: error registering slot: %s\n", __func__, --- -1.8.3.1 - diff --git a/SOURCES/kvm-Replace-remaining-malloc-free-user-with-glib.patch b/SOURCES/kvm-Replace-remaining-malloc-free-user-with-glib.patch deleted file mode 100644 index 71e6e47..0000000 --- a/SOURCES/kvm-Replace-remaining-malloc-free-user-with-glib.patch +++ /dev/null @@ -1,118 +0,0 @@ -From c012dc9b501d96a2ff54a8a7a182726043b69aeb Mon Sep 17 00:00:00 2001 -From: jmaloy -Date: Tue, 12 May 2020 21:15:14 +0100 -Subject: [PATCH 3/7] Replace remaining malloc/free user with glib -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: jmaloy -Message-id: <20200512211514.1398384-3-jmaloy@redhat.com> -Patchwork-id: 96413 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 2/2] Replace remaining malloc/free user with glib -Bugzilla: 1749737 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth -RH-Acked-by: Philippe Mathieu-Daudé - -From: Marc-André Lureau - -glib mem functions are already used in various places. Let's not mix -the two, and instead abort on OOM conditions. - -Signed-off-by: Marc-André Lureau -(cherry picked from libslirp commit 3a494648526be4eb96cba739a816a60e933ffd14) -Signed-off-by: Jon Maloy - -Signed-off-by: Danilo C. L. de Paula ---- - slirp/src/sbuf.c | 21 ++++++--------------- - slirp/src/socket.c | 2 +- - slirp/src/tcp_subr.c | 8 ++------ - 3 files changed, 9 insertions(+), 22 deletions(-) - -diff --git a/slirp/src/sbuf.c b/slirp/src/sbuf.c -index 0569c34..eab87f3 100644 ---- a/slirp/src/sbuf.c -+++ b/slirp/src/sbuf.c -@@ -9,7 +9,7 @@ static void sbappendsb(struct sbuf *sb, struct mbuf *m); - - void sbfree(struct sbuf *sb) - { -- free(sb->sb_data); -+ g_free(sb->sb_data); - } - - bool sbdrop(struct sbuf *sb, int num) -@@ -39,24 +39,15 @@ void sbreserve(struct sbuf *sb, int size) - if (sb->sb_data) { - /* Already alloced, realloc if necessary */ - if (sb->sb_datalen != size) { -- char *new = realloc(sb->sb_data, size); -+ char *new = g_realloc(sb->sb_data, size); - sb->sb_cc = 0; -- if (new) { -- sb->sb_data = sb->sb_wptr = sb->sb_rptr = new; -- sb->sb_datalen = size; -- } else { -- free(sb->sb_data); -- sb->sb_data = sb->sb_wptr = sb->sb_rptr = NULL; -- sb->sb_datalen = 0; -- } -+ sb->sb_data = sb->sb_wptr = sb->sb_rptr = new; -+ sb->sb_datalen = size; - } - } else { -- sb->sb_wptr = sb->sb_rptr = sb->sb_data = (char *)malloc(size); -+ sb->sb_wptr = sb->sb_rptr = sb->sb_data = g_malloc(size); - sb->sb_cc = 0; -- if (sb->sb_wptr) -- sb->sb_datalen = size; -- else -- sb->sb_datalen = 0; -+ sb->sb_datalen = size; - } - } - -diff --git a/slirp/src/socket.c b/slirp/src/socket.c -index 34daffc..ace18bf 100644 ---- a/slirp/src/socket.c -+++ b/slirp/src/socket.c -@@ -95,7 +95,7 @@ void sofree(struct socket *so) - remque(so); /* crashes if so is not in a queue */ - - if (so->so_tcpcb) { -- free(so->so_tcpcb); -+ g_free(so->so_tcpcb); - } - g_free(so); - } -diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -index 26d4ead..4e5a801 100644 ---- a/slirp/src/tcp_subr.c -+++ b/slirp/src/tcp_subr.c -@@ -255,11 +255,7 @@ struct tcpcb *tcp_newtcpcb(struct socket *so) - { - register struct tcpcb *tp; - -- tp = (struct tcpcb *)malloc(sizeof(*tp)); -- if (tp == NULL) -- return ((struct tcpcb *)0); -- -- memset((char *)tp, 0, sizeof(struct tcpcb)); -+ tp = g_new0(struct tcpcb, 1); - tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp; - tp->t_maxseg = (so->so_ffamily == AF_INET) ? TCP_MSS : TCP6_MSS; - -@@ -330,7 +326,7 @@ struct tcpcb *tcp_close(struct tcpcb *tp) - remque(tcpiphdr2qlink(tcpiphdr_prev(t))); - m_free(m); - } -- free(tp); -+ g_free(tp); - so->so_tcpcb = NULL; - /* clobber input socket cache if we're closing the cached connection */ - if (so == slirp->tcp_last_so) --- -1.8.3.1 - diff --git a/SOURCES/kvm-Revert-RHEL-disable-hostmem-memfd.patch b/SOURCES/kvm-Revert-RHEL-disable-hostmem-memfd.patch deleted file mode 100644 index f959752..0000000 --- a/SOURCES/kvm-Revert-RHEL-disable-hostmem-memfd.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 559d5899473dea180ced39a32bfbfbf2310c6e04 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Mon, 25 May 2020 15:33:06 +0100 -Subject: [PATCH 4/7] Revert "RHEL: disable hostmem-memfd" -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20200525153306.15373-1-marcandre.lureau@redhat.com> -Patchwork-id: 96747 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH] Revert "RHEL: disable hostmem-memfd" -Bugzilla: 1839030 -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Stefano Garzarella - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1839030 -BRANCH: rhel-av-8.2.1 -UPSTREAM: RHEL-only -BREW: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=28817132 - -This reverts commit f7587ddb9a2731bf678a24156b6285dda79a4b2b. - -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - backends/Makefile.objs | 3 +-- - util/memfd.c | 2 +- - 2 files changed, 2 insertions(+), 3 deletions(-) - -diff --git a/backends/Makefile.objs b/backends/Makefile.objs -index f328d40..f069111 100644 ---- a/backends/Makefile.objs -+++ b/backends/Makefile.objs -@@ -16,5 +16,4 @@ endif - - common-obj-$(call land,$(CONFIG_VHOST_USER),$(CONFIG_VIRTIO)) += vhost-user.o - --# RHEL: disable memfd --# common-obj-$(CONFIG_LINUX) += hostmem-memfd.o -+common-obj-$(CONFIG_LINUX) += hostmem-memfd.o -diff --git a/util/memfd.c b/util/memfd.c -index 3303ec9..4a3c07e 100644 ---- a/util/memfd.c -+++ b/util/memfd.c -@@ -193,7 +193,7 @@ bool qemu_memfd_alloc_check(void) - */ - bool qemu_memfd_check(unsigned int flags) - { --#if 0 /* RHEL: memfd support disabled */ -+#ifdef CONFIG_LINUX - int mfd = memfd_create("test", flags | MFD_CLOEXEC); - - if (mfd >= 0) { --- -1.8.3.1 - diff --git a/SOURCES/kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch b/SOURCES/kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch deleted file mode 100644 index 0c1c37f..0000000 --- a/SOURCES/kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch +++ /dev/null @@ -1,121 +0,0 @@ -From 71b5267ed33f9e60bc98acbabcbed62f01a96ff4 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 30 Mar 2020 11:19:23 +0100 -Subject: [PATCH 3/4] Revert "mirror: Don't let an operation wait for itself" - -RH-Author: Kevin Wolf -Message-id: <20200330111924.22938-2-kwolf@redhat.com> -Patchwork-id: 94464 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] Revert "mirror: Don't let an operation wait for itself" -Bugzilla: 1794692 -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -This reverts commit 7e6c4ff792734e196c8ca82564c56b5e7c6288ca. - -The fix was incomplete as it only protected against requests waiting for -themselves, but not against requests waiting for each other. We need a -different solution. - -Signed-off-by: Kevin Wolf -Message-Id: <20200326153628.4869-2-kwolf@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit 9178f4fe5f083064f5c91f04d98c815ce5a5af1c) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/mirror.c | 21 +++++++++------------ - 1 file changed, 9 insertions(+), 12 deletions(-) - -diff --git a/block/mirror.c b/block/mirror.c -index cacbc70..8959e42 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -283,14 +283,11 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, - } - - static inline void coroutine_fn --mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active) -+mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) - { - MirrorOp *op; - - QTAILQ_FOREACH(op, &s->ops_in_flight, next) { -- if (self == op) { -- continue; -- } - /* Do not wait on pseudo ops, because it may in turn wait on - * some other operation to start, which may in fact be the - * caller of this function. Since there is only one pseudo op -@@ -305,10 +302,10 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active) - } - - static inline void coroutine_fn --mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s, MirrorOp *self) -+mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) - { - /* Only non-active operations use up in-flight slots */ -- mirror_wait_for_any_operation(s, self, false); -+ mirror_wait_for_any_operation(s, false); - } - - /* Perform a mirror copy operation. -@@ -351,7 +348,7 @@ static void coroutine_fn mirror_co_read(void *opaque) - - while (s->buf_free_count < nb_chunks) { - trace_mirror_yield_in_flight(s, op->offset, s->in_flight); -- mirror_wait_for_free_in_flight_slot(s, op); -+ mirror_wait_for_free_in_flight_slot(s); - } - - /* Now make a QEMUIOVector taking enough granularity-sized chunks -@@ -558,7 +555,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) - - while (s->in_flight >= MAX_IN_FLIGHT) { - trace_mirror_yield_in_flight(s, offset, s->in_flight); -- mirror_wait_for_free_in_flight_slot(s, pseudo_op); -+ mirror_wait_for_free_in_flight_slot(s); - } - - if (s->ret < 0) { -@@ -612,7 +609,7 @@ static void mirror_free_init(MirrorBlockJob *s) - static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s) - { - while (s->in_flight > 0) { -- mirror_wait_for_free_in_flight_slot(s, NULL); -+ mirror_wait_for_free_in_flight_slot(s); - } - } - -@@ -797,7 +794,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) - if (s->in_flight >= MAX_IN_FLIGHT) { - trace_mirror_yield(s, UINT64_MAX, s->buf_free_count, - s->in_flight); -- mirror_wait_for_free_in_flight_slot(s, NULL); -+ mirror_wait_for_free_in_flight_slot(s); - continue; - } - -@@ -950,7 +947,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) - /* Do not start passive operations while there are active - * writes in progress */ - while (s->in_active_write_counter) { -- mirror_wait_for_any_operation(s, NULL, true); -+ mirror_wait_for_any_operation(s, true); - } - - if (s->ret < 0) { -@@ -976,7 +973,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) - if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || - (cnt == 0 && s->in_flight > 0)) { - trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); -- mirror_wait_for_free_in_flight_slot(s, NULL); -+ mirror_wait_for_free_in_flight_slot(s); - continue; - } else if (cnt != 0) { - delay_ns = mirror_iteration(s); --- -1.8.3.1 - diff --git a/SOURCES/kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch b/SOURCES/kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch deleted file mode 100644 index dc65c26..0000000 --- a/SOURCES/kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch +++ /dev/null @@ -1,63 +0,0 @@ -From ceb6d97674b8bc9a072db1be4167411bc0ee48d7 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:02 +0100 -Subject: [PATCH 091/116] Virtiofsd: fix memory leak on fuse queueinfo -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-88-dgilbert@redhat.com> -Patchwork-id: 93542 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 087/112] Virtiofsd: fix memory leak on fuse queueinfo -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Liu Bo - -For fuse's queueinfo, both queueinfo array and queueinfos are allocated in -fv_queue_set_started() but not cleaned up when the daemon process quits. - -This fixes the leak in proper places. - -Signed-off-by: Liu Bo -Signed-off-by: Eric Ren -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 740b0b700a6338a1cf60c26229651ac5f6724944) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index b7948de..fb8d6d1 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -625,6 +625,8 @@ static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx) - } - close(ourqi->kill_fd); - ourqi->kick_fd = -1; -+ free(vud->qi[qidx]); -+ vud->qi[qidx] = NULL; - } - - /* Callback from libvhost-user on start or stop of a queue */ -@@ -884,6 +886,12 @@ int virtio_session_mount(struct fuse_session *se) - void virtio_session_close(struct fuse_session *se) - { - close(se->vu_socketfd); -+ -+ if (!se->virtio_dev) { -+ return; -+ } -+ -+ free(se->virtio_dev->qi); - free(se->virtio_dev); - se->virtio_dev = NULL; - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-acpi-accept-byte-and-word-access-to-core-ACPI-regist.patch b/SOURCES/kvm-acpi-accept-byte-and-word-access-to-core-ACPI-regist.patch deleted file mode 100644 index 1538d11..0000000 --- a/SOURCES/kvm-acpi-accept-byte-and-word-access-to-core-ACPI-regist.patch +++ /dev/null @@ -1,82 +0,0 @@ -From dcac680adb6b8624f14eda3e812521bddbe8ecea Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Wed, 21 Apr 2021 22:30:04 -0400 -Subject: [PATCH 5/7] acpi: accept byte and word access to core ACPI registers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20210421223006.19650-5-jmaloy@redhat.com> -Patchwork-id: 101482 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH v2 4/6] acpi: accept byte and word access to core ACPI registers -Bugzilla: 1842478 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laszlo Ersek - -From: Michael Tokarev - -All ISA registers should be accessible as bytes, words or dwords -(if wide enough). Fix the access constraints for acpi-pm-evt, -acpi-pm-tmr & acpi-cnt registers. - -Fixes: 5d971f9e67 (memory: Revert "memory: accept mismatching sizes in memory_region_access_valid") -Fixes: afafe4bbe0 (apci: switch cnt to memory api) -Fixes: 77d58b1e47 (apci: switch timer to memory api) -Fixes: b5a7c024d2 (apci: switch evt to memory api) -Buglink: https://lore.kernel.org/xen-devel/20200630170913.123646-1-anthony.perard@citrix.com/T/ -Buglink: https://bugs.debian.org/964793 -BugLink: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=964247 -BugLink: https://bugs.launchpad.net/bugs/1886318 -Reported-By: Simon John -Signed-off-by: Michael Tokarev -Message-Id: <20200720160627.15491-1-mjt@msgid.tls.msk.ru> -Cc: qemu-stable@nongnu.org -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin - -(cherry picked from commit dba04c3488c4699f5afe96f66e448b1d447cf3fb) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - hw/acpi/core.c | 9 ++++++--- - 1 file changed, 6 insertions(+), 3 deletions(-) - -diff --git a/hw/acpi/core.c b/hw/acpi/core.c -index 45cbed49ab..d85052c34a 100644 ---- a/hw/acpi/core.c -+++ b/hw/acpi/core.c -@@ -461,7 +461,8 @@ static void acpi_pm_evt_write(void *opaque, hwaddr addr, uint64_t val, - static const MemoryRegionOps acpi_pm_evt_ops = { - .read = acpi_pm_evt_read, - .write = acpi_pm_evt_write, -- .valid.min_access_size = 2, -+ .impl.min_access_size = 2, -+ .valid.min_access_size = 1, - .valid.max_access_size = 2, - .endianness = DEVICE_LITTLE_ENDIAN, - }; -@@ -530,7 +531,8 @@ static void acpi_pm_tmr_write(void *opaque, hwaddr addr, uint64_t val, - static const MemoryRegionOps acpi_pm_tmr_ops = { - .read = acpi_pm_tmr_read, - .write = acpi_pm_tmr_write, -- .valid.min_access_size = 4, -+ .impl.min_access_size = 4, -+ .valid.min_access_size = 1, - .valid.max_access_size = 4, - .endianness = DEVICE_LITTLE_ENDIAN, - }; -@@ -602,7 +604,8 @@ static void acpi_pm_cnt_write(void *opaque, hwaddr addr, uint64_t val, - static const MemoryRegionOps acpi_pm_cnt_ops = { - .read = acpi_pm_cnt_read, - .write = acpi_pm_cnt_write, -- .valid.min_access_size = 2, -+ .impl.min_access_size = 2, -+ .valid.min_access_size = 1, - .valid.max_access_size = 2, - .endianness = DEVICE_LITTLE_ENDIAN, - }; --- -2.27.0 - diff --git a/SOURCES/kvm-acpi-validate-hotplug-selector-on-access.patch b/SOURCES/kvm-acpi-validate-hotplug-selector-on-access.patch new file mode 100644 index 0000000..d18989a --- /dev/null +++ b/SOURCES/kvm-acpi-validate-hotplug-selector-on-access.patch @@ -0,0 +1,51 @@ +From 529a5d908f5d16714b8ae0a51eaaaa84994dfae8 Mon Sep 17 00:00:00 2001 +From: "Michael S. Tsirkin" +Date: Tue, 21 Dec 2021 09:45:44 -0500 +Subject: [PATCH 1/2] acpi: validate hotplug selector on access +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 97: acpi: validate hotplug selector on access +RH-Commit: [1/1] 79bcfb0df0091e2b716d2e1c545f047b3409c26c (jmaloy/qemu-kvm) +RH-Bugzilla: 2036580 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Igor Mammedov + +When bus is looked up on a pci write, we didn't +validate that the lookup succeeded. +Fuzzers thus can trigger QEMU crash by dereferencing the NULL +bus pointer. + +Fixes: b32bd763a1 ("pci: introduce acpi-index property for PCI device") +Fixes: CVE-2021-4158 +Cc: "Igor Mammedov" +Fixes: https://gitlab.com/qemu-project/qemu/-/issues/770 +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Ani Sinha +(cherry picked from commit 9bd6565ccee68f72d5012e24646e12a1c662827e) +Signed-off-by: Jon Maloy +--- + hw/acpi/pcihp.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c +index 30405b5113..a5e182dd3a 100644 +--- a/hw/acpi/pcihp.c ++++ b/hw/acpi/pcihp.c +@@ -491,6 +491,9 @@ static void pci_write(void *opaque, hwaddr addr, uint64_t data, + } + + bus = acpi_pcihp_find_hotplug_bus(s, s->hotplug_select); ++ if (!bus) { ++ break; ++ } + QTAILQ_FOREACH_SAFE(kid, &bus->qbus.children, sibling, next) { + Object *o = OBJECT(kid->child); + PCIDevice *dev = PCI_DEVICE(o); +-- +2.27.0 + diff --git a/SOURCES/kvm-aio-posix-completely-stop-polling-when-disabled.patch b/SOURCES/kvm-aio-posix-completely-stop-polling-when-disabled.patch deleted file mode 100644 index 3993181..0000000 --- a/SOURCES/kvm-aio-posix-completely-stop-polling-when-disabled.patch +++ /dev/null @@ -1,104 +0,0 @@ -From 4b4fb1cccb8e0307658cee3bc90c77e5f1dde60a Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 9 Oct 2020 10:08:49 -0400 -Subject: [PATCH 13/14] aio-posix: completely stop polling when disabled - -RH-Author: Thomas Huth -Message-id: <20201009100849.264994-10-thuth@redhat.com> -Patchwork-id: 98603 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 9/9] aio-posix: completely stop polling when disabled -Bugzilla: 1846975 -RH-Acked-by: Jens Freimann -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -From: Stefan Hajnoczi - -One iteration of polling is always performed even when polling is -disabled. This is done because: -1. Userspace polling is cheaper than making a syscall. We might get - lucky. -2. We must poll once more after polling has stopped in case an event - occurred while stopping polling. - -However, there are downsides: -1. Polling becomes a bottleneck when the number of event sources is very - high. It's more efficient to monitor fds in that case. -2. A high-frequency polling event source can starve non-polling event - sources because ppoll(2)/epoll(7) is never invoked. - -This patch removes the forced polling iteration so that poll_ns=0 really -means no polling. - -IOPS increases from 10k to 60k when the guest has 100 -virtio-blk-pci,num-queues=32 devices and 1 virtio-blk-pci,num-queues=1 -device because the large number of event sources being polled slows down -the event loop. - -Signed-off-by: Stefan Hajnoczi -Link: https://lore.kernel.org/r/20200305170806.1313245-2-stefanha@redhat.com -Message-Id: <20200305170806.1313245-2-stefanha@redhat.com> -(cherry picked from commit e4346192f1c2e1683a807b46efac47ef0cf9b545) -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - util/aio-posix.c | 22 +++++++++++++++------- - 1 file changed, 15 insertions(+), 7 deletions(-) - -diff --git a/util/aio-posix.c b/util/aio-posix.c -index a4977f538e..abc396d030 100644 ---- a/util/aio-posix.c -+++ b/util/aio-posix.c -@@ -340,12 +340,13 @@ void aio_set_event_notifier_poll(AioContext *ctx, - (IOHandler *)io_poll_end); - } - --static void poll_set_started(AioContext *ctx, bool started) -+static bool poll_set_started(AioContext *ctx, bool started) - { - AioHandler *node; -+ bool progress = false; - - if (started == ctx->poll_started) { -- return; -+ return false; - } - - ctx->poll_started = started; -@@ -367,8 +368,15 @@ static void poll_set_started(AioContext *ctx, bool started) - if (fn) { - fn(node->opaque); - } -+ -+ /* Poll one last time in case ->io_poll_end() raced with the event */ -+ if (!started) { -+ progress = node->io_poll(node->opaque) || progress; -+ } - } - qemu_lockcnt_dec(&ctx->list_lock); -+ -+ return progress; - } - - -@@ -599,12 +607,12 @@ static bool try_poll_mode(AioContext *ctx, int64_t *timeout) - } - } - -- poll_set_started(ctx, false); -+ if (poll_set_started(ctx, false)) { -+ *timeout = 0; -+ return true; -+ } - -- /* Even if we don't run busy polling, try polling once in case it can make -- * progress and the caller will be able to avoid ppoll(2)/epoll_wait(2). -- */ -- return run_poll_handlers_once(ctx, timeout); -+ return false; - } - - bool aio_poll(AioContext *ctx, bool blocking) --- -2.27.0 - diff --git a/SOURCES/kvm-aio-wait-delegate-polling-of-main-AioContext-if-BQL-.patch b/SOURCES/kvm-aio-wait-delegate-polling-of-main-AioContext-if-BQL-.patch deleted file mode 100644 index a234140..0000000 --- a/SOURCES/kvm-aio-wait-delegate-polling-of-main-AioContext-if-BQL-.patch +++ /dev/null @@ -1,132 +0,0 @@ -From b474155fdc38f86f516c14ba9a6f934616d589ef Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Wed, 4 Aug 2021 03:27:22 -0400 -Subject: [PATCH 1/2] aio-wait: delegate polling of main AioContext if BQL not - held - -RH-Author: Andrew Jones -Message-id: <20210729134448.4995-2-drjones@redhat.com> -Patchwork-id: 101935 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH v2 1/2] aio-wait: delegate polling of main AioContext if BQL not held -Bugzilla: 1969848 -RH-Acked-by: Gavin Shan -RH-Acked-by: Auger Eric -RH-Acked-by: Stefan Hajnoczi - -From: Paolo Bonzini - -Any thread that is not a iothread returns NULL for qemu_get_current_aio_context(). -As a result, it would also return true for -in_aio_context_home_thread(qemu_get_aio_context()), causing -AIO_WAIT_WHILE to invoke aio_poll() directly. This is incorrect -if the BQL is not held, because aio_poll() does not expect to -run concurrently from multiple threads, and it can actually -happen when savevm writes to the vmstate file from the -migration thread. - -Therefore, restrict in_aio_context_home_thread to return true -for the main AioContext only if the BQL is held. - -The function is moved to aio-wait.h because it is mostly used -there and to avoid a circular reference between main-loop.h -and block/aio.h. - -Signed-off-by: Paolo Bonzini -Message-Id: <20200407140746.8041-5-pbonzini@redhat.com> -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 3c18a92dc4b55ca8cc37a755ed119f11c0f34099) -Signed-off-by: Andrew Jones -Signed-off-by: Miroslav Rezanina ---- - include/block/aio-wait.h | 22 ++++++++++++++++++++++ - include/block/aio.h | 29 ++++++++++------------------- - 2 files changed, 32 insertions(+), 19 deletions(-) - -diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h -index afeeb18f95..716d2639df 100644 ---- a/include/block/aio-wait.h -+++ b/include/block/aio-wait.h -@@ -26,6 +26,7 @@ - #define QEMU_AIO_WAIT_H - - #include "block/aio.h" -+#include "qemu/main-loop.h" - - /** - * AioWait: -@@ -124,4 +125,25 @@ void aio_wait_kick(void); - */ - void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque); - -+/** -+ * in_aio_context_home_thread: -+ * @ctx: the aio context -+ * -+ * Return whether we are running in the thread that normally runs @ctx. Note -+ * that acquiring/releasing ctx does not affect the outcome, each AioContext -+ * still only has one home thread that is responsible for running it. -+ */ -+static inline bool in_aio_context_home_thread(AioContext *ctx) -+{ -+ if (ctx == qemu_get_current_aio_context()) { -+ return true; -+ } -+ -+ if (ctx == qemu_get_aio_context()) { -+ return qemu_mutex_iothread_locked(); -+ } else { -+ return false; -+ } -+} -+ - #endif /* QEMU_AIO_WAIT_H */ -diff --git a/include/block/aio.h b/include/block/aio.h -index 6b0d52f732..9d28e247df 100644 ---- a/include/block/aio.h -+++ b/include/block/aio.h -@@ -60,12 +60,16 @@ struct AioContext { - QLIST_HEAD(, AioHandler) aio_handlers; - - /* Used to avoid unnecessary event_notifier_set calls in aio_notify; -- * accessed with atomic primitives. If this field is 0, everything -- * (file descriptors, bottom halves, timers) will be re-evaluated -- * before the next blocking poll(), thus the event_notifier_set call -- * can be skipped. If it is non-zero, you may need to wake up a -- * concurrent aio_poll or the glib main event loop, making -- * event_notifier_set necessary. -+ * only written from the AioContext home thread, or under the BQL in -+ * the case of the main AioContext. However, it is read from any -+ * thread so it is still accessed with atomic primitives. -+ * -+ * If this field is 0, everything (file descriptors, bottom halves, -+ * timers) will be re-evaluated before the next blocking poll() or -+ * io_uring wait; therefore, the event_notifier_set call can be -+ * skipped. If it is non-zero, you may need to wake up a concurrent -+ * aio_poll or the glib main event loop, making event_notifier_set -+ * necessary. - * - * Bit 0 is reserved for GSource usage of the AioContext, and is 1 - * between a call to aio_ctx_prepare and the next call to aio_ctx_check. -@@ -580,19 +584,6 @@ void aio_co_enter(AioContext *ctx, struct Coroutine *co); - */ - AioContext *qemu_get_current_aio_context(void); - --/** -- * in_aio_context_home_thread: -- * @ctx: the aio context -- * -- * Return whether we are running in the thread that normally runs @ctx. Note -- * that acquiring/releasing ctx does not affect the outcome, each AioContext -- * still only has one home thread that is responsible for running it. -- */ --static inline bool in_aio_context_home_thread(AioContext *ctx) --{ -- return ctx == qemu_get_current_aio_context(); --} -- - /** - * aio_context_setup: - * @ctx: the aio context --- -2.27.0 - diff --git a/SOURCES/kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch b/SOURCES/kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch deleted file mode 100644 index becba21..0000000 --- a/SOURCES/kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 0d5a09173eb75b7e56122c2aefb2646a2be58400 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Fri, 31 Jan 2020 17:12:57 +0000 -Subject: [PATCH 15/15] apic: Use 32bit APIC ID for migration instance ID - -RH-Author: Peter Xu -Message-id: <20200131171257.1066593-4-peterx@redhat.com> -Patchwork-id: 93628 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/3] apic: Use 32bit APIC ID for migration instance ID -Bugzilla: 1529231 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert - -Migration is silently broken now with x2apic config like this: - - -smp 200,maxcpus=288,sockets=2,cores=72,threads=2 \ - -device intel-iommu,intremap=on,eim=on - -After migration, the guest kernel could hang at anything, due to -x2apic bit not migrated correctly in IA32_APIC_BASE on some vcpus, so -any operations related to x2apic could be broken then (e.g., RDMSR on -x2apic MSRs could fail because KVM would think that the vcpu hasn't -enabled x2apic at all). - -The issue is that the x2apic bit was never applied correctly for vcpus -whose ID > 255 when migrate completes, and that's because when we -migrate APIC we use the APICCommonState.id as instance ID of the -migration stream, while that's too short for x2apic. - -Let's use the newly introduced initial_apic_id for that. - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Reviewed-by: Eduardo Habkost -Signed-off-by: Juan Quintela -(cherry picked from commit 0ab994867c365db21e15f9503922c79234d8e40e) -Signed-off-by: Peter Xu -Signed-off-by: Danilo C. L. de Paula ---- - hw/intc/apic_common.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c -index 54b8731..b5dbeb6 100644 ---- a/hw/intc/apic_common.c -+++ b/hw/intc/apic_common.c -@@ -268,7 +268,10 @@ static void apic_common_realize(DeviceState *dev, Error **errp) - APICCommonState *s = APIC_COMMON(dev); - APICCommonClass *info; - static DeviceState *vapic; -- uint32_t instance_id = s->id; -+ uint32_t instance_id = s->initial_apic_id; -+ -+ /* Normally initial APIC ID should be no more than hundreds */ -+ assert(instance_id != VMSTATE_INSTANCE_ID_ANY); - - info = APIC_COMMON_GET_CLASS(s); - info->realize(dev, errp); --- -1.8.3.1 - diff --git a/SOURCES/kvm-async-use-explicit-memory-barriers.patch b/SOURCES/kvm-async-use-explicit-memory-barriers.patch deleted file mode 100644 index 2bf7245..0000000 --- a/SOURCES/kvm-async-use-explicit-memory-barriers.patch +++ /dev/null @@ -1,183 +0,0 @@ -From 82a02aec3a8b3c2ac925d0b71ea4c35aa5d6463b Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Wed, 4 Aug 2021 03:27:24 -0400 -Subject: [PATCH 2/2] async: use explicit memory barriers - -RH-Author: Andrew Jones -Message-id: <20210729134448.4995-3-drjones@redhat.com> -Patchwork-id: 101937 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH v2 2/2] async: use explicit memory barriers -Bugzilla: 1969848 -RH-Acked-by: Gavin Shan -RH-Acked-by: Auger Eric -RH-Acked-by: Stefan Hajnoczi - -From: Paolo Bonzini - -When using C11 atomics, non-seqcst reads and writes do not participate -in the total order of seqcst operations. In util/async.c and util/aio-posix.c, -in particular, the pattern that we use - - write ctx->notify_me write bh->scheduled - read bh->scheduled read ctx->notify_me - if !bh->scheduled, sleep if ctx->notify_me, notify - -needs to use seqcst operations for both the write and the read. In -general this is something that we do not want, because there can be -many sources that are polled in addition to bottom halves. The -alternative is to place a seqcst memory barrier between the write -and the read. This also comes with a disadvantage, in that the -memory barrier is implicit on strongly-ordered architectures and -it wastes a few dozen clock cycles. - -Fortunately, ctx->notify_me is never written concurrently by two -threads, so we can assert that and relax the writes to ctx->notify_me. -The resulting solution works and performs well on both aarch64 and x86. - -Note that the atomic_set/atomic_read combination is not an atomic -read-modify-write, and therefore it is even weaker than C11 ATOMIC_RELAXED; -on x86, ATOMIC_RELAXED compiles to a locked operation. - -Analyzed-by: Ying Fang -Signed-off-by: Paolo Bonzini -Tested-by: Ying Fang -Message-Id: <20200407140746.8041-6-pbonzini@redhat.com> -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 5710a3e09f9b85801e5ce70797a4a511e5fc9e2c) -Signed-off-by: Andrew Jones -Signed-off-by: Miroslav Rezanina ---- - util/aio-posix.c | 16 ++++++++++++++-- - util/aio-win32.c | 17 ++++++++++++++--- - util/async.c | 16 ++++++++++++---- - 3 files changed, 40 insertions(+), 9 deletions(-) - -diff --git a/util/aio-posix.c b/util/aio-posix.c -index abc396d030..8cfb25650d 100644 ---- a/util/aio-posix.c -+++ b/util/aio-posix.c -@@ -624,6 +624,11 @@ bool aio_poll(AioContext *ctx, bool blocking) - int64_t timeout; - int64_t start = 0; - -+ /* -+ * There cannot be two concurrent aio_poll calls for the same AioContext (or -+ * an aio_poll concurrent with a GSource prepare/check/dispatch callback). -+ * We rely on this below to avoid slow locked accesses to ctx->notify_me. -+ */ - assert(in_aio_context_home_thread(ctx)); - - /* aio_notify can avoid the expensive event_notifier_set if -@@ -634,7 +639,13 @@ bool aio_poll(AioContext *ctx, bool blocking) - * so disable the optimization now. - */ - if (blocking) { -- atomic_add(&ctx->notify_me, 2); -+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2); -+ /* -+ * Write ctx->notify_me before computing the timeout -+ * (reading bottom half flags, etc.). Pairs with -+ * smp_mb in aio_notify(). -+ */ -+ smp_mb(); - } - - qemu_lockcnt_inc(&ctx->list_lock); -@@ -679,7 +690,8 @@ bool aio_poll(AioContext *ctx, bool blocking) - } - - if (blocking) { -- atomic_sub(&ctx->notify_me, 2); -+ /* Finish the poll before clearing the flag. */ -+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2); - aio_notify_accept(ctx); - } - -diff --git a/util/aio-win32.c b/util/aio-win32.c -index a23b9c364d..729d533faf 100644 ---- a/util/aio-win32.c -+++ b/util/aio-win32.c -@@ -321,6 +321,12 @@ bool aio_poll(AioContext *ctx, bool blocking) - int count; - int timeout; - -+ /* -+ * There cannot be two concurrent aio_poll calls for the same AioContext (or -+ * an aio_poll concurrent with a GSource prepare/check/dispatch callback). -+ * We rely on this below to avoid slow locked accesses to ctx->notify_me. -+ */ -+ assert(in_aio_context_home_thread(ctx)); - progress = false; - - /* aio_notify can avoid the expensive event_notifier_set if -@@ -331,7 +337,13 @@ bool aio_poll(AioContext *ctx, bool blocking) - * so disable the optimization now. - */ - if (blocking) { -- atomic_add(&ctx->notify_me, 2); -+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2); -+ /* -+ * Write ctx->notify_me before computing the timeout -+ * (reading bottom half flags, etc.). Pairs with -+ * smp_mb in aio_notify(). -+ */ -+ smp_mb(); - } - - qemu_lockcnt_inc(&ctx->list_lock); -@@ -364,8 +376,7 @@ bool aio_poll(AioContext *ctx, bool blocking) - ret = WaitForMultipleObjects(count, events, FALSE, timeout); - if (blocking) { - assert(first); -- assert(in_aio_context_home_thread(ctx)); -- atomic_sub(&ctx->notify_me, 2); -+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2); - aio_notify_accept(ctx); - } - -diff --git a/util/async.c b/util/async.c -index b1fa5319e5..c65c58bbc9 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -220,7 +220,14 @@ aio_ctx_prepare(GSource *source, gint *timeout) - { - AioContext *ctx = (AioContext *) source; - -- atomic_or(&ctx->notify_me, 1); -+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) | 1); -+ -+ /* -+ * Write ctx->notify_me before computing the timeout -+ * (reading bottom half flags, etc.). Pairs with -+ * smp_mb in aio_notify(). -+ */ -+ smp_mb(); - - /* We assume there is no timeout already supplied */ - *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)); -@@ -238,7 +245,8 @@ aio_ctx_check(GSource *source) - AioContext *ctx = (AioContext *) source; - QEMUBH *bh; - -- atomic_and(&ctx->notify_me, ~1); -+ /* Finish computing the timeout before clearing the flag. */ -+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) & ~1); - aio_notify_accept(ctx); - - for (bh = ctx->first_bh; bh; bh = bh->next) { -@@ -343,10 +351,10 @@ LinuxAioState *aio_get_linux_aio(AioContext *ctx) - void aio_notify(AioContext *ctx) - { - /* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs -- * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll. -+ * with smp_mb in aio_ctx_prepare or aio_poll. - */ - smp_mb(); -- if (ctx->notify_me) { -+ if (atomic_read(&ctx->notify_me)) { - event_notifier_set(&ctx->notifier); - atomic_mb_set(&ctx->notified, true); - } --- -2.27.0 - diff --git a/SOURCES/kvm-audio-audio_generic_get_buffer_in-should-honor-size.patch b/SOURCES/kvm-audio-audio_generic_get_buffer_in-should-honor-size.patch deleted file mode 100644 index 1a20688..0000000 --- a/SOURCES/kvm-audio-audio_generic_get_buffer_in-should-honor-size.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 96c8fcafa7325cd0e8a23a743a55f0ad0aa9f79b Mon Sep 17 00:00:00 2001 -From: Gerd Hoffmann -Date: Thu, 18 Mar 2021 09:13:42 -0400 -Subject: [PATCH 5/5] audio: audio_generic_get_buffer_in should honor *size -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Gerd Hoffmann -Message-id: <20210318091342.3232471-2-kraxel@redhat.com> -Patchwork-id: 101352 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] audio: audio_generic_get_buffer_in should honor *size -Bugzilla: 1932823 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Danilo de Paula -RH-Acked-by: Philippe Mathieu-Daudé - -From: Volker Rümelin - -The function generic_get_buffer_in currently ignores the *size -parameter and may return a buffer larger than *size. - -As a result the variable samples in function -audio_pcm_hw_run_in may underflow. The while loop then most -likely will never termiate. - -Buglink: http://bugs.debian.org/948658 -Signed-off-by: Volker Rümelin -Message-Id: <20200123074943.6699-9-vr_qemu@t-online.de> -Signed-off-by: Gerd Hoffmann -(cherry picked from commit 599eac4e5a41e828645594097daee39373acc3c0) -Signed-off-by: Danilo C. L. de Paula ---- - audio/audio.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/audio/audio.c b/audio/audio.c -index 56fae55047..39a62fc62a 100644 ---- a/audio/audio.c -+++ b/audio/audio.c -@@ -1402,7 +1402,8 @@ void *audio_generic_get_buffer_in(HWVoiceIn *hw, size_t *size) - } - assert(start >= 0 && start < hw->size_emul); - -- *size = MIN(hw->pending_emul, hw->size_emul - start); -+ *size = MIN(*size, hw->pending_emul); -+ *size = MIN(*size, hw->size_emul - start); - return hw->buf_emul + start; - } - --- -2.27.0 - diff --git a/SOURCES/kvm-backup-Improve-error-for-bdrv_getlength-failure.patch b/SOURCES/kvm-backup-Improve-error-for-bdrv_getlength-failure.patch deleted file mode 100644 index 8fa2629..0000000 --- a/SOURCES/kvm-backup-Improve-error-for-bdrv_getlength-failure.patch +++ /dev/null @@ -1,51 +0,0 @@ -From fba183faf8ce819262a1a47f8531ea68051cdce7 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 3 Jun 2020 16:03:19 +0100 -Subject: [PATCH 20/26] backup: Improve error for bdrv_getlength() failure - -RH-Author: Kevin Wolf -Message-id: <20200603160325.67506-6-kwolf@redhat.com> -Patchwork-id: 97103 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 05/11] backup: Improve error for bdrv_getlength() failure -Bugzilla: 1778593 -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz -RH-Acked-by: Stefano Garzarella - -bdrv_get_device_name() will be an empty string with modern management -tools that don't use -drive. Use bdrv_get_device_or_node_name() instead -so that the node name is used if the BlockBackend is anonymous. - -While at it, start with upper case to make the message consistent with -the rest of the function. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Alberto Garcia -Message-Id: <20200430142755.315494-3-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 58226634c4b02af7b10862f7fbd3610a344bfb7f) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/backup.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/block/backup.c b/block/backup.c -index ec50946..7c6ddd2 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -408,8 +408,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, - - len = bdrv_getlength(bs); - if (len < 0) { -- error_setg_errno(errp, -len, "unable to get length for '%s'", -- bdrv_get_device_name(bs)); -+ error_setg_errno(errp, -len, "Unable to get length for '%s'", -+ bdrv_get_device_or_node_name(bs)); - goto error; - } - --- -1.8.3.1 - diff --git a/SOURCES/kvm-backup-Make-sure-that-source-and-target-size-match.patch b/SOURCES/kvm-backup-Make-sure-that-source-and-target-size-match.patch deleted file mode 100644 index 05b5d10..0000000 --- a/SOURCES/kvm-backup-Make-sure-that-source-and-target-size-match.patch +++ /dev/null @@ -1,124 +0,0 @@ -From e56abd782be8bb41bb07c0317d008f95ec9a8ee5 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 3 Jun 2020 16:03:20 +0100 -Subject: [PATCH 21/26] backup: Make sure that source and target size match - -RH-Author: Kevin Wolf -Message-id: <20200603160325.67506-7-kwolf@redhat.com> -Patchwork-id: 97107 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 06/11] backup: Make sure that source and target size match -Bugzilla: 1778593 -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz -RH-Acked-by: Stefano Garzarella - -Since the introduction of a backup filter node in commit 00e30f05d, the -backup block job crashes when the target image is smaller than the -source image because it will try to write after the end of the target -node without having BLK_PERM_RESIZE. (Previously, the BlockBackend layer -would have caught this and errored out gracefully.) - -We can fix this and even do better than the old behaviour: Check that -source and target have the same image size at the start of the block job -and unshare BLK_PERM_RESIZE. (This permission was already unshared -before the same commit 00e30f05d, but the BlockBackend that was used to -make the restriction was removed without a replacement.) This will -immediately error out when starting the job instead of only when writing -to a block that doesn't exist in the target. - -Longer target than source would technically work because we would never -write to blocks that don't exist, but semantically these are invalid, -too, because a backup is supposed to create a copy, not just an image -that starts with a copy. - -Fixes: 00e30f05de1d19586345ec373970ef4c192c6270 -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1778593 -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Message-Id: <20200430142755.315494-4-kwolf@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit 958a04bd32af18d9a207bcc78046e56a202aebc2) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/backup-top.c | 14 +++++++++----- - block/backup.c | 14 +++++++++++++- - 2 files changed, 22 insertions(+), 6 deletions(-) - -diff --git a/block/backup-top.c b/block/backup-top.c -index b8d863f..6756091 100644 ---- a/block/backup-top.c -+++ b/block/backup-top.c -@@ -143,8 +143,10 @@ static void backup_top_child_perm(BlockDriverState *bs, BdrvChild *c, - * - * Share write to target (child_file), to not interfere - * with guest writes to its disk which may be in target backing chain. -+ * Can't resize during a backup block job because we check the size -+ * only upfront. - */ -- *nshared = BLK_PERM_ALL; -+ *nshared = BLK_PERM_ALL & ~BLK_PERM_RESIZE; - *nperm = BLK_PERM_WRITE; - } else { - /* Source child */ -@@ -154,7 +156,7 @@ static void backup_top_child_perm(BlockDriverState *bs, BdrvChild *c, - if (perm & BLK_PERM_WRITE) { - *nperm = *nperm | BLK_PERM_CONSISTENT_READ; - } -- *nshared &= ~BLK_PERM_WRITE; -+ *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); - } - } - -@@ -187,10 +189,12 @@ BlockDriverState *bdrv_backup_top_append(BlockDriverState *source, - { - Error *local_err = NULL; - BDRVBackupTopState *state; -- BlockDriverState *top = bdrv_new_open_driver(&bdrv_backup_top_filter, -- filter_node_name, -- BDRV_O_RDWR, errp); -+ BlockDriverState *top; -+ -+ assert(source->total_sectors == target->total_sectors); - -+ top = bdrv_new_open_driver(&bdrv_backup_top_filter, filter_node_name, -+ BDRV_O_RDWR, errp); - if (!top) { - return NULL; - } -diff --git a/block/backup.c b/block/backup.c -index 7c6ddd2..821c9fb 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -348,7 +348,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, - BlockCompletionFunc *cb, void *opaque, - JobTxn *txn, Error **errp) - { -- int64_t len; -+ int64_t len, target_len; - BackupBlockJob *job = NULL; - int64_t cluster_size; - BdrvRequestFlags write_flags; -@@ -413,6 +413,18 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, - goto error; - } - -+ target_len = bdrv_getlength(target); -+ if (target_len < 0) { -+ error_setg_errno(errp, -target_len, "Unable to get length for '%s'", -+ bdrv_get_device_or_node_name(bs)); -+ goto error; -+ } -+ -+ if (target_len != len) { -+ error_setg(errp, "Source and target image have different sizes"); -+ goto error; -+ } -+ - cluster_size = backup_calculate_cluster_size(target, errp); - if (cluster_size < 0) { - goto error; --- -1.8.3.1 - diff --git a/SOURCES/kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch b/SOURCES/kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch deleted file mode 100644 index 7fb76c1..0000000 --- a/SOURCES/kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 619b3aac9790a7ca7c01846144395a318a9ab250 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 8 Apr 2020 17:29:14 +0100 -Subject: [PATCH 3/6] backup: don't acquire aio_context in backup_clean - -RH-Author: Kevin Wolf -Message-id: <20200408172917.18712-4-kwolf@redhat.com> -Patchwork-id: 94596 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/6] backup: don't acquire aio_context in backup_clean -Bugzilla: 1817621 -RH-Acked-by: Eric Blake -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -From: Stefan Reiter - -All code-paths leading to backup_clean (via job_clean) have the job's -context already acquired. The job's context is guaranteed to be the same -as the one used by backup_top via backup_job_create. - -Since the previous logic effectively acquired the lock twice, this -broke cleanup of backups for disks using IO threads, since the BDRV_POLL_WHILE -in bdrv_backup_top_drop -> bdrv_do_drained_begin would only release the lock -once, thus deadlocking with the IO thread. - -This is a partial revert of 0abf2581717a19. - -Signed-off-by: Stefan Reiter -Reviewed-by: Max Reitz -Message-Id: <20200407115651.69472-4-s.reiter@proxmox.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit eca0f3524a4eb57d03a56b0cbcef5527a0981ce4) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/backup.c | 4 ---- - 1 file changed, 4 deletions(-) - -diff --git a/block/backup.c b/block/backup.c -index 1383e21..ec50946 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -135,11 +135,7 @@ static void backup_abort(Job *job) - static void backup_clean(Job *job) - { - BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); -- AioContext *aio_context = bdrv_get_aio_context(s->backup_top); -- -- aio_context_acquire(aio_context); - bdrv_backup_top_drop(s->backup_top); -- aio_context_release(aio_context); - } - - void backup_do_checkpoint(BlockJob *job, Error **errp) --- -1.8.3.1 - diff --git a/SOURCES/kvm-backup-top-Begin-drain-earlier.patch b/SOURCES/kvm-backup-top-Begin-drain-earlier.patch deleted file mode 100644 index ef289b7..0000000 --- a/SOURCES/kvm-backup-top-Begin-drain-earlier.patch +++ /dev/null @@ -1,56 +0,0 @@ -From bc78ee07bf400cbff0021367e05d308870471710 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:45 +0000 -Subject: [PATCH 12/18] backup-top: Begin drain earlier - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-6-slp@redhat.com> -Patchwork-id: 93757 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 5/9] backup-top: Begin drain earlier -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -From: Max Reitz - -When dropping backup-top, we need to drain the node before freeing the -BlockCopyState. Otherwise, requests may still be in flight and then the -assertion in shres_destroy() will fail. - -(This becomes visible in intermittent failure of 056.) - -Cc: qemu-stable@nongnu.org -Signed-off-by: Max Reitz -Message-id: 20191219182638.104621-1-mreitz@redhat.com -Signed-off-by: Max Reitz -(cherry picked from commit 503ca1262bab2c11c533a4816d1ff4297d4f58a6) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - block/backup-top.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/block/backup-top.c b/block/backup-top.c -index 7cdb1f8..818d3f2 100644 ---- a/block/backup-top.c -+++ b/block/backup-top.c -@@ -257,12 +257,12 @@ void bdrv_backup_top_drop(BlockDriverState *bs) - BDRVBackupTopState *s = bs->opaque; - AioContext *aio_context = bdrv_get_aio_context(bs); - -- block_copy_state_free(s->bcs); -- - aio_context_acquire(aio_context); - - bdrv_drained_begin(bs); - -+ block_copy_state_free(s->bcs); -+ - s->active = false; - bdrv_child_refresh_perms(bs, bs->backing, &error_abort); - bdrv_replace_node(bs, backing_bs(bs), &error_abort); --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-Activate-recursively-even-for-already-active-n.patch b/SOURCES/kvm-block-Activate-recursively-even-for-already-active-n.patch deleted file mode 100644 index d6cad06..0000000 --- a/SOURCES/kvm-block-Activate-recursively-even-for-already-active-n.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 0ef6691ce8964bb2bbd677756c4e594793ca3ad8 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 7 Feb 2020 11:24:01 +0000 -Subject: [PATCH 04/18] block: Activate recursively even for already active - nodes - -RH-Author: Kevin Wolf -Message-id: <20200207112404.25198-4-kwolf@redhat.com> -Patchwork-id: 93749 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 3/6] block: Activate recursively even for already active nodes -Bugzilla: 1781637 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -bdrv_invalidate_cache_all() assumes that all nodes in a given subtree -are either active or inactive when it starts. Therefore, as soon as it -arrives at an already active node, it stops. - -However, this assumption is wrong. For example, it's possible to take a -snapshot of an inactive node, which results in an active overlay over an -inactive backing file. The active overlay is probably also the root node -of an inactive BlockBackend (blk->disable_perm == true). - -In this case, bdrv_invalidate_cache_all() does not need to do anything -to activate the overlay node, but it still needs to recurse into the -children and the parents to make sure that after returning success, -really everything is activated. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Reviewed-by: Max Reitz -(cherry picked from commit 7bb4941ace471fc7dd6ded4749b95b9622baa6ed) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 50 ++++++++++++++++++++++++-------------------------- - 1 file changed, 24 insertions(+), 26 deletions(-) - -diff --git a/block.c b/block.c -index 473eb6e..2e5e8b6 100644 ---- a/block.c -+++ b/block.c -@@ -5335,10 +5335,6 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, - return; - } - -- if (!(bs->open_flags & BDRV_O_INACTIVE)) { -- return; -- } -- - QLIST_FOREACH(child, &bs->children, next) { - bdrv_co_invalidate_cache(child->bs, &local_err); - if (local_err) { -@@ -5360,34 +5356,36 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, - * just keep the extended permissions for the next time that an activation - * of the image is tried. - */ -- bs->open_flags &= ~BDRV_O_INACTIVE; -- bdrv_get_cumulative_perm(bs, &perm, &shared_perm); -- ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, NULL, &local_err); -- if (ret < 0) { -- bs->open_flags |= BDRV_O_INACTIVE; -- error_propagate(errp, local_err); -- return; -- } -- bdrv_set_perm(bs, perm, shared_perm); -- -- if (bs->drv->bdrv_co_invalidate_cache) { -- bs->drv->bdrv_co_invalidate_cache(bs, &local_err); -- if (local_err) { -+ if (bs->open_flags & BDRV_O_INACTIVE) { -+ bs->open_flags &= ~BDRV_O_INACTIVE; -+ bdrv_get_cumulative_perm(bs, &perm, &shared_perm); -+ ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, NULL, &local_err); -+ if (ret < 0) { - bs->open_flags |= BDRV_O_INACTIVE; - error_propagate(errp, local_err); - return; - } -- } -+ bdrv_set_perm(bs, perm, shared_perm); - -- FOR_EACH_DIRTY_BITMAP(bs, bm) { -- bdrv_dirty_bitmap_skip_store(bm, false); -- } -+ if (bs->drv->bdrv_co_invalidate_cache) { -+ bs->drv->bdrv_co_invalidate_cache(bs, &local_err); -+ if (local_err) { -+ bs->open_flags |= BDRV_O_INACTIVE; -+ error_propagate(errp, local_err); -+ return; -+ } -+ } - -- ret = refresh_total_sectors(bs, bs->total_sectors); -- if (ret < 0) { -- bs->open_flags |= BDRV_O_INACTIVE; -- error_setg_errno(errp, -ret, "Could not refresh total sector count"); -- return; -+ FOR_EACH_DIRTY_BITMAP(bs, bm) { -+ bdrv_dirty_bitmap_skip_store(bm, false); -+ } -+ -+ ret = refresh_total_sectors(bs, bs->total_sectors); -+ if (ret < 0) { -+ bs->open_flags |= BDRV_O_INACTIVE; -+ error_setg_errno(errp, -ret, "Could not refresh total sector count"); -+ return; -+ } - } - - QLIST_FOREACH(parent, &bs->parents, next_parent) { --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-Add-flags-to-BlockDriver.bdrv_co_truncate.patch b/SOURCES/kvm-block-Add-flags-to-BlockDriver.bdrv_co_truncate.patch deleted file mode 100644 index bc67279..0000000 --- a/SOURCES/kvm-block-Add-flags-to-BlockDriver.bdrv_co_truncate.patch +++ /dev/null @@ -1,283 +0,0 @@ -From 13e2076f5c4adbc9a3f96c8978150aa5e423e14a Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 8 Jun 2020 15:01:30 +0100 -Subject: [PATCH 02/17] block: Add flags to BlockDriver.bdrv_co_truncate() - -RH-Author: Kevin Wolf -Message-id: <20200608150140.38218-2-kwolf@redhat.com> -Patchwork-id: 97448 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 01/11] block: Add flags to BlockDriver.bdrv_co_truncate() -Bugzilla: 1780574 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz - -This adds a new BdrvRequestFlags parameter to the .bdrv_co_truncate() -driver callbacks, and a supported_truncate_flags field in -BlockDriverState that allows drivers to advertise support for request -flags in the context of truncate. - -For now, we always pass 0 and no drivers declare support for any flag. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Alberto Garcia -Reviewed-by: Max Reitz -Message-Id: <20200424125448.63318-2-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 92b92799dc8662b6f71809100a4aabc1ae408ebb) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/crypto.c | 3 ++- - block/file-posix.c | 2 +- - block/file-win32.c | 2 +- - block/gluster.c | 1 + - block/io.c | 8 +++++++- - block/iscsi.c | 2 +- - block/nfs.c | 3 ++- - block/qcow2.c | 2 +- - block/qed.c | 1 + - block/raw-format.c | 2 +- - block/rbd.c | 1 + - block/sheepdog.c | 4 ++-- - block/ssh.c | 2 +- - include/block/block_int.h | 10 +++++++++- - tests/test-block-iothread.c | 3 ++- - 15 files changed, 33 insertions(+), 13 deletions(-) - -diff --git a/block/crypto.c b/block/crypto.c -index 5e3b15c..6e4b726 100644 ---- a/block/crypto.c -+++ b/block/crypto.c -@@ -299,7 +299,8 @@ static int block_crypto_co_create_generic(BlockDriverState *bs, - - static int coroutine_fn - block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, -- PreallocMode prealloc, Error **errp) -+ PreallocMode prealloc, BdrvRequestFlags flags, -+ Error **errp) - { - BlockCrypto *crypto = bs->opaque; - uint64_t payload_offset = -diff --git a/block/file-posix.c b/block/file-posix.c -index 1609598..7551e8d 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -2021,7 +2021,7 @@ raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset, - - static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, - bool exact, PreallocMode prealloc, -- Error **errp) -+ BdrvRequestFlags flags, Error **errp) - { - BDRVRawState *s = bs->opaque; - struct stat st; -diff --git a/block/file-win32.c b/block/file-win32.c -index 1585983..a6b0dda 100644 ---- a/block/file-win32.c -+++ b/block/file-win32.c -@@ -469,7 +469,7 @@ static void raw_close(BlockDriverState *bs) - - static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, - bool exact, PreallocMode prealloc, -- Error **errp) -+ BdrvRequestFlags flags, Error **errp) - { - BDRVRawState *s = bs->opaque; - LONG low, high; -diff --git a/block/gluster.c b/block/gluster.c -index 0aa1f2c..d06df90 100644 ---- a/block/gluster.c -+++ b/block/gluster.c -@@ -1228,6 +1228,7 @@ static coroutine_fn int qemu_gluster_co_truncate(BlockDriverState *bs, - int64_t offset, - bool exact, - PreallocMode prealloc, -+ BdrvRequestFlags flags, - Error **errp) - { - BDRVGlusterState *s = bs->opaque; -diff --git a/block/io.c b/block/io.c -index f75777f..549e5a4 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -3320,6 +3320,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, - BlockDriverState *bs = child->bs; - BlockDriver *drv = bs->drv; - BdrvTrackedRequest req; -+ BdrvRequestFlags flags = 0; - int64_t old_size, new_bytes; - int ret; - -@@ -3370,7 +3371,12 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, - } - - if (drv->bdrv_co_truncate) { -- ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, errp); -+ if (flags & ~bs->supported_truncate_flags) { -+ error_setg(errp, "Block driver does not support requested flags"); -+ ret = -ENOTSUP; -+ goto out; -+ } -+ ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp); - } else if (bs->file && drv->is_filter) { - ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); - } else { -diff --git a/block/iscsi.c b/block/iscsi.c -index 16b0716..0bea2d3 100644 ---- a/block/iscsi.c -+++ b/block/iscsi.c -@@ -2125,7 +2125,7 @@ static void iscsi_reopen_commit(BDRVReopenState *reopen_state) - - static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset, - bool exact, PreallocMode prealloc, -- Error **errp) -+ BdrvRequestFlags flags, Error **errp) - { - IscsiLun *iscsilun = bs->opaque; - int64_t cur_length; -diff --git a/block/nfs.c b/block/nfs.c -index cc2413d..2393fbf 100644 ---- a/block/nfs.c -+++ b/block/nfs.c -@@ -755,7 +755,8 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs) - - static int coroutine_fn - nfs_file_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, -- PreallocMode prealloc, Error **errp) -+ PreallocMode prealloc, BdrvRequestFlags flags, -+ Error **errp) - { - NFSClient *client = bs->opaque; - int ret; -diff --git a/block/qcow2.c b/block/qcow2.c -index dbd870a..977445e 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -3948,7 +3948,7 @@ fail: - - static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, - bool exact, PreallocMode prealloc, -- Error **errp) -+ BdrvRequestFlags flags, Error **errp) - { - BDRVQcow2State *s = bs->opaque; - uint64_t old_length; -diff --git a/block/qed.c b/block/qed.c -index 1af9b3c..fb6100b 100644 ---- a/block/qed.c -+++ b/block/qed.c -@@ -1467,6 +1467,7 @@ static int coroutine_fn bdrv_qed_co_truncate(BlockDriverState *bs, - int64_t offset, - bool exact, - PreallocMode prealloc, -+ BdrvRequestFlags flags, - Error **errp) - { - BDRVQEDState *s = bs->opaque; -diff --git a/block/raw-format.c b/block/raw-format.c -index 4bb54f4..f994c4a 100644 ---- a/block/raw-format.c -+++ b/block/raw-format.c -@@ -371,7 +371,7 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) - - static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, - bool exact, PreallocMode prealloc, -- Error **errp) -+ BdrvRequestFlags flags, Error **errp) - { - BDRVRawState *s = bs->opaque; - -diff --git a/block/rbd.c b/block/rbd.c -index 8847259..fcdb60a 100644 ---- a/block/rbd.c -+++ b/block/rbd.c -@@ -1090,6 +1090,7 @@ static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs, - int64_t offset, - bool exact, - PreallocMode prealloc, -+ BdrvRequestFlags flags, - Error **errp) - { - int r; -diff --git a/block/sheepdog.c b/block/sheepdog.c -index a8a7e32..077aed8 100644 ---- a/block/sheepdog.c -+++ b/block/sheepdog.c -@@ -2288,7 +2288,7 @@ static int64_t sd_getlength(BlockDriverState *bs) - - static int coroutine_fn sd_co_truncate(BlockDriverState *bs, int64_t offset, - bool exact, PreallocMode prealloc, -- Error **errp) -+ BdrvRequestFlags flags, Error **errp) - { - BDRVSheepdogState *s = bs->opaque; - int ret, fd; -@@ -2604,7 +2604,7 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, - - assert(!flags); - if (offset > s->inode.vdi_size) { -- ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, NULL); -+ ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, 0, NULL); - if (ret < 0) { - return ret; - } -diff --git a/block/ssh.c b/block/ssh.c -index 84e9282..9eb33df 100644 ---- a/block/ssh.c -+++ b/block/ssh.c -@@ -1298,7 +1298,7 @@ static int64_t ssh_getlength(BlockDriverState *bs) - - static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset, - bool exact, PreallocMode prealloc, -- Error **errp) -+ BdrvRequestFlags flags, Error **errp) - { - BDRVSSHState *s = bs->opaque; - -diff --git a/include/block/block_int.h b/include/block/block_int.h -index 876a83d..41f13ec 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -356,7 +356,7 @@ struct BlockDriver { - */ - int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset, - bool exact, PreallocMode prealloc, -- Error **errp); -+ BdrvRequestFlags flags, Error **errp); - - int64_t (*bdrv_getlength)(BlockDriverState *bs); - bool has_variable_length; -@@ -849,6 +849,14 @@ struct BlockDriverState { - /* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA, - * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */ - unsigned int supported_zero_flags; -+ /* -+ * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE). -+ * -+ * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure -+ * that any added space reads as all zeros. If this can't be guaranteed, -+ * the operation must fail. -+ */ -+ unsigned int supported_truncate_flags; - - /* the following member gives a name to every node on the bs graph. */ - char node_name[32]; -diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c -index 0c86180..2f3b763 100644 ---- a/tests/test-block-iothread.c -+++ b/tests/test-block-iothread.c -@@ -46,7 +46,8 @@ static int coroutine_fn bdrv_test_co_pdiscard(BlockDriverState *bs, - - static int coroutine_fn - bdrv_test_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, -- PreallocMode prealloc, Error **errp) -+ PreallocMode prealloc, BdrvRequestFlags flags, -+ Error **errp) - { - return 0; - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-Add-flags-to-bdrv-_co-_truncate.patch b/SOURCES/kvm-block-Add-flags-to-bdrv-_co-_truncate.patch deleted file mode 100644 index 3da05ff..0000000 --- a/SOURCES/kvm-block-Add-flags-to-bdrv-_co-_truncate.patch +++ /dev/null @@ -1,353 +0,0 @@ -From 50127f0ff9e13a15fd5bfeb2662e2404ff20f364 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 8 Jun 2020 15:01:31 +0100 -Subject: [PATCH 03/17] block: Add flags to bdrv(_co)_truncate() - -RH-Author: Kevin Wolf -Message-id: <20200608150140.38218-3-kwolf@redhat.com> -Patchwork-id: 97445 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 02/11] block: Add flags to bdrv(_co)_truncate() -Bugzilla: 1780574 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz - -Now that block drivers can support flags for .bdrv_co_truncate, expose -the parameter in the node level interfaces bdrv_co_truncate() and -bdrv_truncate(). - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Alberto Garcia -Reviewed-by: Max Reitz -Message-Id: <20200424125448.63318-3-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 7b8e4857426f2e2de2441749996c6161b550bada) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/block-backend.c | 2 +- - block/crypto.c | 2 +- - block/io.c | 12 +++++++----- - block/parallels.c | 6 +++--- - block/qcow.c | 4 ++-- - block/qcow2-refcount.c | 2 +- - block/qcow2.c | 15 +++++++++------ - block/raw-format.c | 2 +- - block/vhdx-log.c | 2 +- - block/vhdx.c | 2 +- - block/vmdk.c | 2 +- - include/block/block.h | 5 +++-- - tests/test-block-iothread.c | 6 +++--- - 13 files changed, 34 insertions(+), 28 deletions(-) - -diff --git a/block/block-backend.c b/block/block-backend.c -index 38ae413..8be2006 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -2144,7 +2144,7 @@ int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, - return -ENOMEDIUM; - } - -- return bdrv_truncate(blk->root, offset, exact, prealloc, errp); -+ return bdrv_truncate(blk->root, offset, exact, prealloc, 0, errp); - } - - int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, -diff --git a/block/crypto.c b/block/crypto.c -index 6e4b726..fcb4a97 100644 ---- a/block/crypto.c -+++ b/block/crypto.c -@@ -313,7 +313,7 @@ block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, - - offset += payload_offset; - -- return bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); -+ return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp); - } - - static void block_crypto_close(BlockDriverState *bs) -diff --git a/block/io.c b/block/io.c -index 549e5a4..3235ce5 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -3315,12 +3315,12 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs) - * 'offset' bytes in length. - */ - int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, -- PreallocMode prealloc, Error **errp) -+ PreallocMode prealloc, BdrvRequestFlags flags, -+ Error **errp) - { - BlockDriverState *bs = child->bs; - BlockDriver *drv = bs->drv; - BdrvTrackedRequest req; -- BdrvRequestFlags flags = 0; - int64_t old_size, new_bytes; - int ret; - -@@ -3378,7 +3378,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, - } - ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp); - } else if (bs->file && drv->is_filter) { -- ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); -+ ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp); - } else { - error_setg(errp, "Image format driver does not support resize"); - ret = -ENOTSUP; -@@ -3411,6 +3411,7 @@ typedef struct TruncateCo { - int64_t offset; - bool exact; - PreallocMode prealloc; -+ BdrvRequestFlags flags; - Error **errp; - int ret; - } TruncateCo; -@@ -3419,12 +3420,12 @@ static void coroutine_fn bdrv_truncate_co_entry(void *opaque) - { - TruncateCo *tco = opaque; - tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->exact, -- tco->prealloc, tco->errp); -+ tco->prealloc, tco->flags, tco->errp); - aio_wait_kick(); - } - - int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, -- PreallocMode prealloc, Error **errp) -+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp) - { - Coroutine *co; - TruncateCo tco = { -@@ -3432,6 +3433,7 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, - .offset = offset, - .exact = exact, - .prealloc = prealloc, -+ .flags = flags, - .errp = errp, - .ret = NOT_DONE, - }; -diff --git a/block/parallels.c b/block/parallels.c -index 6d4ed77..2be92cf 100644 ---- a/block/parallels.c -+++ b/block/parallels.c -@@ -203,7 +203,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, - } else { - ret = bdrv_truncate(bs->file, - (s->data_end + space) << BDRV_SECTOR_BITS, -- false, PREALLOC_MODE_OFF, NULL); -+ false, PREALLOC_MODE_OFF, 0, NULL); - } - if (ret < 0) { - return ret; -@@ -493,7 +493,7 @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs, - * That means we have to pass exact=true. - */ - ret = bdrv_truncate(bs->file, res->image_end_offset, true, -- PREALLOC_MODE_OFF, &local_err); -+ PREALLOC_MODE_OFF, 0, &local_err); - if (ret < 0) { - error_report_err(local_err); - res->check_errors++; -@@ -889,7 +889,7 @@ static void parallels_close(BlockDriverState *bs) - - /* errors are ignored, so we might as well pass exact=true */ - bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, true, -- PREALLOC_MODE_OFF, NULL); -+ PREALLOC_MODE_OFF, 0, NULL); - } - - g_free(s->bat_dirty_bmap); -diff --git a/block/qcow.c b/block/qcow.c -index 8973e4e..6b5f226 100644 ---- a/block/qcow.c -+++ b/block/qcow.c -@@ -480,7 +480,7 @@ static int get_cluster_offset(BlockDriverState *bs, - return -E2BIG; - } - ret = bdrv_truncate(bs->file, cluster_offset + s->cluster_size, -- false, PREALLOC_MODE_OFF, NULL); -+ false, PREALLOC_MODE_OFF, 0, NULL); - if (ret < 0) { - return ret; - } -@@ -1035,7 +1035,7 @@ static int qcow_make_empty(BlockDriverState *bs) - l1_length) < 0) - return -1; - ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length, false, -- PREALLOC_MODE_OFF, NULL); -+ PREALLOC_MODE_OFF, 0, NULL); - if (ret < 0) - return ret; - -diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c -index f67ac6b..3a90d75 100644 ---- a/block/qcow2-refcount.c -+++ b/block/qcow2-refcount.c -@@ -2017,7 +2017,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res, - } - - ret = bdrv_truncate(bs->file, offset + s->cluster_size, false, -- PREALLOC_MODE_OFF, &local_err); -+ PREALLOC_MODE_OFF, 0, &local_err); - if (ret < 0) { - error_report_err(local_err); - goto resize_fail; -diff --git a/block/qcow2.c b/block/qcow2.c -index 977445e..c0fdcb9 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -3082,7 +3082,7 @@ static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, - mode = PREALLOC_MODE_OFF; - } - ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, false, -- mode, errp); -+ mode, 0, errp); - if (ret < 0) { - return ret; - } -@@ -4044,7 +4044,7 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, - * always fulfilled, so there is no need to pass it on.) - */ - bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size, -- false, PREALLOC_MODE_OFF, &local_err); -+ false, PREALLOC_MODE_OFF, 0, &local_err); - if (local_err) { - warn_reportf_err(local_err, - "Failed to truncate the tail of the image: "); -@@ -4066,7 +4066,8 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, - * file should be resized to the exact target size, too, - * so we pass @exact here. - */ -- ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, errp); -+ ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, 0, -+ errp); - if (ret < 0) { - goto fail; - } -@@ -4152,7 +4153,8 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, - new_file_size = allocation_start + - nb_new_data_clusters * s->cluster_size; - /* Image file grows, so @exact does not matter */ -- ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, errp); -+ ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0, -+ errp); - if (ret < 0) { - error_prepend(errp, "Failed to resize underlying file: "); - qcow2_free_clusters(bs, allocation_start, -@@ -4255,7 +4257,8 @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs, - if (len < 0) { - return len; - } -- return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, NULL); -+ return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, 0, -+ NULL); - } - - if (offset_into_cluster(s, offset)) { -@@ -4493,7 +4496,7 @@ static int make_completely_empty(BlockDriverState *bs) - } - - ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, false, -- PREALLOC_MODE_OFF, &local_err); -+ PREALLOC_MODE_OFF, 0, &local_err); - if (ret < 0) { - error_report_err(local_err); - goto fail; -diff --git a/block/raw-format.c b/block/raw-format.c -index f994c4a..c3acf9a 100644 ---- a/block/raw-format.c -+++ b/block/raw-format.c -@@ -387,7 +387,7 @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, - - s->size = offset; - offset += s->offset; -- return bdrv_co_truncate(bs->file, offset, exact, prealloc, errp); -+ return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp); - } - - static void raw_eject(BlockDriverState *bs, bool eject_flag) -diff --git a/block/vhdx-log.c b/block/vhdx-log.c -index 13a49c2..404fb5f 100644 ---- a/block/vhdx-log.c -+++ b/block/vhdx-log.c -@@ -558,7 +558,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s, - goto exit; - } - ret = bdrv_truncate(bs->file, new_file_size, false, -- PREALLOC_MODE_OFF, NULL); -+ PREALLOC_MODE_OFF, 0, NULL); - if (ret < 0) { - goto exit; - } -diff --git a/block/vhdx.c b/block/vhdx.c -index 33e57cd..5dfbb20 100644 ---- a/block/vhdx.c -+++ b/block/vhdx.c -@@ -1264,7 +1264,7 @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s, - } - - return bdrv_truncate(bs->file, *new_offset + s->block_size, false, -- PREALLOC_MODE_OFF, NULL); -+ PREALLOC_MODE_OFF, 0, NULL); - } - - /* -diff --git a/block/vmdk.c b/block/vmdk.c -index eb726f2..1bbf937 100644 ---- a/block/vmdk.c -+++ b/block/vmdk.c -@@ -2077,7 +2077,7 @@ vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, - } - length = QEMU_ALIGN_UP(length, BDRV_SECTOR_SIZE); - ret = bdrv_truncate(s->extents[i].file, length, false, -- PREALLOC_MODE_OFF, NULL); -+ PREALLOC_MODE_OFF, 0, NULL); - if (ret < 0) { - return ret; - } -diff --git a/include/block/block.h b/include/block/block.h -index b2a3074..4913596 100644 ---- a/include/block/block.h -+++ b/include/block/block.h -@@ -348,9 +348,10 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, - void bdrv_refresh_filename(BlockDriverState *bs); - - int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, -- PreallocMode prealloc, Error **errp); -+ PreallocMode prealloc, BdrvRequestFlags flags, -+ Error **errp); - int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, -- PreallocMode prealloc, Error **errp); -+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); - - int64_t bdrv_nb_sectors(BlockDriverState *bs); - int64_t bdrv_getlength(BlockDriverState *bs); -diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c -index 2f3b763..71e9bce 100644 ---- a/tests/test-block-iothread.c -+++ b/tests/test-block-iothread.c -@@ -186,18 +186,18 @@ static void test_sync_op_truncate(BdrvChild *c) - int ret; - - /* Normal success path */ -- ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, NULL); -+ ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL); - g_assert_cmpint(ret, ==, 0); - - /* Early error: Negative offset */ -- ret = bdrv_truncate(c, -2, false, PREALLOC_MODE_OFF, NULL); -+ ret = bdrv_truncate(c, -2, false, PREALLOC_MODE_OFF, 0, NULL); - g_assert_cmpint(ret, ==, -EINVAL); - - /* Error: Read-only image */ - c->bs->read_only = true; - c->bs->open_flags &= ~BDRV_O_RDWR; - -- ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, NULL); -+ ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL); - g_assert_cmpint(ret, ==, -EACCES); - - c->bs->read_only = false; --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-Call-attention-to-truncation-of-long-NBD-expor.patch b/SOURCES/kvm-block-Call-attention-to-truncation-of-long-NBD-expor.patch deleted file mode 100644 index 190826f..0000000 --- a/SOURCES/kvm-block-Call-attention-to-truncation-of-long-NBD-expor.patch +++ /dev/null @@ -1,105 +0,0 @@ -From c8ecaea34f03b8ddda7d2b41b0d6f397469c8959 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Wed, 10 Jun 2020 18:32:02 -0400 -Subject: [PATCH 2/2] block: Call attention to truncation of long NBD exports - -RH-Author: Eric Blake -Message-id: <20200610183202.3780750-3-eblake@redhat.com> -Patchwork-id: 97495 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 2/2] block: Call attention to truncation of long NBD exports -Bugzilla: 1845384 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -Commit 93676c88 relaxed our NBD client code to request export names up -to the NBD protocol maximum of 4096 bytes without NUL terminator, even -though the block layer can't store anything longer than 4096 bytes -including NUL terminator for display to the user. Since this means -there are some export names where we have to truncate things, we can -at least try to make the truncation a bit more obvious for the user. -Note that in spite of the truncated display name, we can still -communicate with an NBD server using such a long export name; this was -deemed nicer than refusing to even connect to such a server (since the -server may not be under our control, and since determining our actual -length limits gets tricky when nbd://host:port/export and -nbd+unix:///export?socket=/path are themselves variable-length -expansions beyond the export name but count towards the block layer -name length). - -Reported-by: Xueqiang Wei -Fixes: https://bugzilla.redhat.com/1843684 -Signed-off-by: Eric Blake -Reviewed-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20200610163741.3745251-3-eblake@redhat.com> -(cherry picked from commit 5c86bdf1208916ece0b87e1151c9b48ee54faa3e) -Signed-off-by: Eric Blake -Signed-off-by: Eduardo Lima (Etrunko) ---- - block.c | 7 +++++-- - block/nbd.c | 21 +++++++++++++-------- - 2 files changed, 18 insertions(+), 10 deletions(-) - -diff --git a/block.c b/block.c -index 12c8941879..57740d312e 100644 ---- a/block.c -+++ b/block.c -@@ -6683,8 +6683,11 @@ void bdrv_refresh_filename(BlockDriverState *bs) - pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename); - } else { - QString *json = qobject_to_json(QOBJECT(bs->full_open_options)); -- snprintf(bs->filename, sizeof(bs->filename), "json:%s", -- qstring_get_str(json)); -+ if (snprintf(bs->filename, sizeof(bs->filename), "json:%s", -+ qstring_get_str(json)) >= sizeof(bs->filename)) { -+ /* Give user a hint if we truncated things. */ -+ strcpy(bs->filename + sizeof(bs->filename) - 4, "..."); -+ } - qobject_unref(json); - } - } -diff --git a/block/nbd.c b/block/nbd.c -index 927915d93d..5bb154017d 100644 ---- a/block/nbd.c -+++ b/block/nbd.c -@@ -1978,6 +1978,7 @@ static void nbd_refresh_filename(BlockDriverState *bs) - { - BDRVNBDState *s = bs->opaque; - const char *host = NULL, *port = NULL, *path = NULL; -+ size_t len = 0; - - if (s->saddr->type == SOCKET_ADDRESS_TYPE_INET) { - const InetSocketAddress *inet = &s->saddr->u.inet; -@@ -1990,17 +1991,21 @@ static void nbd_refresh_filename(BlockDriverState *bs) - } /* else can't represent as pseudo-filename */ - - if (path && s->export) { -- snprintf(bs->exact_filename, sizeof(bs->exact_filename), -- "nbd+unix:///%s?socket=%s", s->export, path); -+ len = snprintf(bs->exact_filename, sizeof(bs->exact_filename), -+ "nbd+unix:///%s?socket=%s", s->export, path); - } else if (path && !s->export) { -- snprintf(bs->exact_filename, sizeof(bs->exact_filename), -- "nbd+unix://?socket=%s", path); -+ len = snprintf(bs->exact_filename, sizeof(bs->exact_filename), -+ "nbd+unix://?socket=%s", path); - } else if (host && s->export) { -- snprintf(bs->exact_filename, sizeof(bs->exact_filename), -- "nbd://%s:%s/%s", host, port, s->export); -+ len = snprintf(bs->exact_filename, sizeof(bs->exact_filename), -+ "nbd://%s:%s/%s", host, port, s->export); - } else if (host && !s->export) { -- snprintf(bs->exact_filename, sizeof(bs->exact_filename), -- "nbd://%s:%s", host, port); -+ len = snprintf(bs->exact_filename, sizeof(bs->exact_filename), -+ "nbd://%s:%s", host, port); -+ } -+ if (len > sizeof(bs->exact_filename)) { -+ /* Name is too long to represent exactly, so leave it empty. */ -+ bs->exact_filename[0] = '\0'; - } - } - --- -2.27.0 - diff --git a/SOURCES/kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch b/SOURCES/kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch deleted file mode 100644 index b16c0b7..0000000 --- a/SOURCES/kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch +++ /dev/null @@ -1,84 +0,0 @@ -From f17b37b58a57d849d2ff5fa04f149d9415803a39 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 8 Apr 2020 17:29:17 +0100 -Subject: [PATCH 6/6] block: Fix blk->in_flight during blk_wait_while_drained() - -RH-Author: Kevin Wolf -Message-id: <20200408172917.18712-7-kwolf@redhat.com> -Patchwork-id: 94599 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 6/6] block: Fix blk->in_flight during blk_wait_while_drained() -Bugzilla: 1817621 -RH-Acked-by: Eric Blake -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -Waiting in blk_wait_while_drained() while blk->in_flight is increased -for the current request is wrong because it will cause the drain -operation to deadlock. - -This patch makes sure that blk_wait_while_drained() is called with -blk->in_flight increased exactly once for the current request, and that -it temporarily decreases the counter while it waits. - -Fixes: cf3129323f900ef5ddbccbe86e4fa801e88c566e -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Max Reitz -Message-Id: <20200407121259.21350-4-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 7f16476fab14fc32388e0ebae793f64673848efa) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/block-backend.c | 17 +++++------------ - 1 file changed, 5 insertions(+), 12 deletions(-) - -diff --git a/block/block-backend.c b/block/block-backend.c -index 610dbfa..38ae413 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -1140,10 +1140,15 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset, - return 0; - } - -+/* To be called between exactly one pair of blk_inc/dec_in_flight() */ - static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) - { -+ assert(blk->in_flight > 0); -+ - if (blk->quiesce_counter && !blk->disable_request_queuing) { -+ blk_dec_in_flight(blk); - qemu_co_queue_wait(&blk->queued_requests, NULL); -+ blk_inc_in_flight(blk); - } - } - -@@ -1418,12 +1423,6 @@ static void blk_aio_read_entry(void *opaque) - BlkRwCo *rwco = &acb->rwco; - QEMUIOVector *qiov = rwco->iobuf; - -- if (rwco->blk->quiesce_counter) { -- blk_dec_in_flight(rwco->blk); -- blk_wait_while_drained(rwco->blk); -- blk_inc_in_flight(rwco->blk); -- } -- - assert(qiov->size == acb->bytes); - rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes, - qiov, rwco->flags); -@@ -1436,12 +1435,6 @@ static void blk_aio_write_entry(void *opaque) - BlkRwCo *rwco = &acb->rwco; - QEMUIOVector *qiov = rwco->iobuf; - -- if (rwco->blk->quiesce_counter) { -- blk_dec_in_flight(rwco->blk); -- blk_wait_while_drained(rwco->blk); -- blk_inc_in_flight(rwco->blk); -- } -- - assert(!qiov || qiov->size == acb->bytes); - rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, - qiov, 0, rwco->flags); --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch b/SOURCES/kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch deleted file mode 100644 index 0bad890..0000000 --- a/SOURCES/kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch +++ /dev/null @@ -1,91 +0,0 @@ -From 5774af5a3c713d0c93010c30453812eae6a749cd Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:37 +0000 -Subject: [PATCH 17/20] block: Fix cross-AioContext blockdev-snapshot - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-12-kwolf@redhat.com> -Patchwork-id: 94286 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 11/13] block: Fix cross-AioContext blockdev-snapshot -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -external_snapshot_prepare() tries to move the overlay to the AioContext -of the backing file (the snapshotted node). However, it's possible that -this doesn't work, but the backing file can instead be moved to the -overlay's AioContext (e.g. opening the backing chain for a mirror -target). - -bdrv_append() already indirectly uses bdrv_attach_node(), which takes -care to move nodes to make sure they use the same AioContext and which -tries both directions. - -So the problem has a simple fix: Just delete the unnecessary extra -bdrv_try_set_aio_context() call in external_snapshot_prepare() and -instead assert in bdrv_append() that both nodes were indeed moved to the -same AioContext. - -Signed-off-by: Kevin Wolf -Message-Id: <20200310113831.27293-6-kwolf@redhat.com> -Tested-by: Peter Krempa -Signed-off-by: Kevin Wolf -(cherry picked from commit 30dd65f307b647eef8156c4a33bd007823ef85cb) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 1 + - blockdev.c | 16 ---------------- - 2 files changed, 1 insertion(+), 16 deletions(-) - -diff --git a/block.c b/block.c -index 354d388..ec29b1e 100644 ---- a/block.c -+++ b/block.c -@@ -4327,6 +4327,7 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, - bdrv_ref(from); - - assert(qemu_get_current_aio_context() == qemu_get_aio_context()); -+ assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to)); - bdrv_drained_begin(from); - - /* Put all parents into @list and calculate their cumulative permissions */ -diff --git a/blockdev.c b/blockdev.c -index 7918533..c8d4b51 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1535,9 +1535,7 @@ static void external_snapshot_prepare(BlkActionState *common, - DO_UPCAST(ExternalSnapshotState, common, common); - TransactionAction *action = common->action; - AioContext *aio_context; -- AioContext *old_context; - uint64_t perm, shared; -- int ret; - - /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar - * purpose but a different set of parameters */ -@@ -1678,20 +1676,6 @@ static void external_snapshot_prepare(BlkActionState *common, - goto out; - } - -- /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ -- old_context = bdrv_get_aio_context(state->new_bs); -- aio_context_release(aio_context); -- aio_context_acquire(old_context); -- -- ret = bdrv_try_set_aio_context(state->new_bs, aio_context, errp); -- -- aio_context_release(old_context); -- aio_context_acquire(aio_context); -- -- if (ret < 0) { -- goto out; -- } -- - /* This removes our old bs and adds the new bs. This is an operation that - * can fail, so we need to do it in .prepare; undoing it for abort is - * always possible. */ --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch b/SOURCES/kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch deleted file mode 100644 index 1735dc0..0000000 --- a/SOURCES/kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 05452efd7e0fb0522099ae09a396f8f97e66014a Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Wed, 11 Mar 2020 10:51:47 +0000 -Subject: [PATCH 06/20] block: Fix leak in bdrv_create_file_fallback() - -RH-Author: Maxim Levitsky -Message-id: <20200311105147.13208-7-mlevitsk@redhat.com> -Patchwork-id: 94229 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 6/6] block: Fix leak in bdrv_create_file_fallback() -Bugzilla: 1640894 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz - -From: Max Reitz - -@options is leaked by the first two return statements in this function. - -Note that blk_new_open() takes the reference to @options even on -failure, so all we need to do to fix the leak is to move the QDict -allocation down to where we actually need it. - -Reported-by: Coverity (CID 1419884) -Fixes: fd17146cd93d1704cd96d7c2757b325fc7aac6fd - ("block: Generic file creation fallback") -Signed-off-by: Max Reitz -Message-Id: <20200225155618.133412-1-mreitz@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit eeea1faa099f82328f5831cf252f8ce0a59a9287) -Signed-off-by: Maxim Levitsky - -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/block.c b/block.c -index 3beec7f..e1a4e38 100644 ---- a/block.c -+++ b/block.c -@@ -600,7 +600,7 @@ static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, - QemuOpts *opts, Error **errp) - { - BlockBackend *blk; -- QDict *options = qdict_new(); -+ QDict *options; - int64_t size = 0; - char *buf = NULL; - PreallocMode prealloc; -@@ -623,6 +623,7 @@ static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, - return -ENOTSUP; - } - -+ options = qdict_new(); - qdict_put_str(options, "driver", drv->format_name); - - blk = blk_new_open(filename, NULL, options, --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-Generic-file-creation-fallback.patch b/SOURCES/kvm-block-Generic-file-creation-fallback.patch deleted file mode 100644 index a5dd1d7..0000000 --- a/SOURCES/kvm-block-Generic-file-creation-fallback.patch +++ /dev/null @@ -1,227 +0,0 @@ -From 882d09226b7f45b72c5b7763c4c4aba182e0f8a1 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Wed, 11 Mar 2020 10:51:43 +0000 -Subject: [PATCH 02/20] block: Generic file creation fallback - -RH-Author: Maxim Levitsky -Message-id: <20200311105147.13208-3-mlevitsk@redhat.com> -Patchwork-id: 94227 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 2/6] block: Generic file creation fallback -Bugzilla: 1640894 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz - -From: Max Reitz - -If a protocol driver does not support image creation, we can see whether -maybe the file exists already. If so, just truncating it will be -sufficient. - -Signed-off-by: Max Reitz -Message-Id: <20200122164532.178040-3-mreitz@redhat.com> -Signed-off-by: Max Reitz -(cherry picked from commit fd17146cd93d1704cd96d7c2757b325fc7aac6fd) -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- - 1 file changed, 147 insertions(+), 12 deletions(-) - -diff --git a/block.c b/block.c -index 2e5e8b6..3beec7f 100644 ---- a/block.c -+++ b/block.c -@@ -532,20 +532,139 @@ out: - return ret; - } - --int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) -+/** -+ * Helper function for bdrv_create_file_fallback(): Resize @blk to at -+ * least the given @minimum_size. -+ * -+ * On success, return @blk's actual length. -+ * Otherwise, return -errno. -+ */ -+static int64_t create_file_fallback_truncate(BlockBackend *blk, -+ int64_t minimum_size, Error **errp) - { -- BlockDriver *drv; -+ Error *local_err = NULL; -+ int64_t size; -+ int ret; -+ -+ ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, &local_err); -+ if (ret < 0 && ret != -ENOTSUP) { -+ error_propagate(errp, local_err); -+ return ret; -+ } -+ -+ size = blk_getlength(blk); -+ if (size < 0) { -+ error_free(local_err); -+ error_setg_errno(errp, -size, -+ "Failed to inquire the new image file's length"); -+ return size; -+ } -+ -+ if (size < minimum_size) { -+ /* Need to grow the image, but we failed to do that */ -+ error_propagate(errp, local_err); -+ return -ENOTSUP; -+ } -+ -+ error_free(local_err); -+ local_err = NULL; -+ -+ return size; -+} -+ -+/** -+ * Helper function for bdrv_create_file_fallback(): Zero the first -+ * sector to remove any potentially pre-existing image header. -+ */ -+static int create_file_fallback_zero_first_sector(BlockBackend *blk, -+ int64_t current_size, -+ Error **errp) -+{ -+ int64_t bytes_to_clear; -+ int ret; -+ -+ bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE); -+ if (bytes_to_clear) { -+ ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, -+ "Failed to clear the new image's first sector"); -+ return ret; -+ } -+ } -+ -+ return 0; -+} -+ -+static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, -+ QemuOpts *opts, Error **errp) -+{ -+ BlockBackend *blk; -+ QDict *options = qdict_new(); -+ int64_t size = 0; -+ char *buf = NULL; -+ PreallocMode prealloc; - Error *local_err = NULL; - int ret; - -+ size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); -+ buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); -+ prealloc = qapi_enum_parse(&PreallocMode_lookup, buf, -+ PREALLOC_MODE_OFF, &local_err); -+ g_free(buf); -+ if (local_err) { -+ error_propagate(errp, local_err); -+ return -EINVAL; -+ } -+ -+ if (prealloc != PREALLOC_MODE_OFF) { -+ error_setg(errp, "Unsupported preallocation mode '%s'", -+ PreallocMode_str(prealloc)); -+ return -ENOTSUP; -+ } -+ -+ qdict_put_str(options, "driver", drv->format_name); -+ -+ blk = blk_new_open(filename, NULL, options, -+ BDRV_O_RDWR | BDRV_O_RESIZE, errp); -+ if (!blk) { -+ error_prepend(errp, "Protocol driver '%s' does not support image " -+ "creation, and opening the image failed: ", -+ drv->format_name); -+ return -EINVAL; -+ } -+ -+ size = create_file_fallback_truncate(blk, size, errp); -+ if (size < 0) { -+ ret = size; -+ goto out; -+ } -+ -+ ret = create_file_fallback_zero_first_sector(blk, size, errp); -+ if (ret < 0) { -+ goto out; -+ } -+ -+ ret = 0; -+out: -+ blk_unref(blk); -+ return ret; -+} -+ -+int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) -+{ -+ BlockDriver *drv; -+ - drv = bdrv_find_protocol(filename, true, errp); - if (drv == NULL) { - return -ENOENT; - } - -- ret = bdrv_create(drv, filename, opts, &local_err); -- error_propagate(errp, local_err); -- return ret; -+ if (drv->bdrv_co_create_opts) { -+ return bdrv_create(drv, filename, opts, errp); -+ } else { -+ return bdrv_create_file_fallback(filename, drv, opts, errp); -+ } - } - - /** -@@ -1422,6 +1541,24 @@ QemuOptsList bdrv_runtime_opts = { - }, - }; - -+static QemuOptsList fallback_create_opts = { -+ .name = "fallback-create-opts", -+ .head = QTAILQ_HEAD_INITIALIZER(fallback_create_opts.head), -+ .desc = { -+ { -+ .name = BLOCK_OPT_SIZE, -+ .type = QEMU_OPT_SIZE, -+ .help = "Virtual disk size" -+ }, -+ { -+ .name = BLOCK_OPT_PREALLOC, -+ .type = QEMU_OPT_STRING, -+ .help = "Preallocation mode (allowed values: off)" -+ }, -+ { /* end of list */ } -+ } -+}; -+ - /* - * Common part for opening disk images and files - * -@@ -5743,14 +5880,12 @@ void bdrv_img_create(const char *filename, const char *fmt, - return; - } - -- if (!proto_drv->create_opts) { -- error_setg(errp, "Protocol driver '%s' does not support image creation", -- proto_drv->format_name); -- return; -- } -- - create_opts = qemu_opts_append(create_opts, drv->create_opts); -- create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); -+ if (proto_drv->create_opts) { -+ create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); -+ } else { -+ create_opts = qemu_opts_append(create_opts, &fallback_create_opts); -+ } - - /* Create parameter list with default values */ - opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch b/SOURCES/kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch deleted file mode 100644 index 463501a..0000000 --- a/SOURCES/kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch +++ /dev/null @@ -1,295 +0,0 @@ -From 52cc1d1cd2f695c5761d65baec961d14552a79ed Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 8 Apr 2020 17:29:16 +0100 -Subject: [PATCH 5/6] block: Increase BB.in_flight for coroutine and sync - interfaces - -RH-Author: Kevin Wolf -Message-id: <20200408172917.18712-6-kwolf@redhat.com> -Patchwork-id: 94600 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/6] block: Increase BB.in_flight for coroutine and sync interfaces -Bugzilla: 1817621 -RH-Acked-by: Eric Blake -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -External callers of blk_co_*() and of the synchronous blk_*() functions -don't currently increase the BlockBackend.in_flight counter, but calls -from blk_aio_*() do, so there is an inconsistency whether the counter -has been increased or not. - -This patch moves the actual operations to static functions that can -later know they will always be called with in_flight increased exactly -once, even for external callers using the blk_co_*() coroutine -interfaces. - -If the public blk_co_*() interface is unused, remove it. - -Signed-off-by: Kevin Wolf -Message-Id: <20200407121259.21350-3-kwolf@redhat.com> -Reviewed-by: Max Reitz -Signed-off-by: Kevin Wolf -(cherry picked from commit fbb92b6798894d3bf62fe3578d99fa62c720b242) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/block-backend.c | 103 ++++++++++++++++++++++++++++++++--------- - include/sysemu/block-backend.h | 1 - - 2 files changed, 80 insertions(+), 24 deletions(-) - -diff --git a/block/block-backend.c b/block/block-backend.c -index 17b2e87..610dbfa 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -1147,9 +1147,10 @@ static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) - } - } - --int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, -- unsigned int bytes, QEMUIOVector *qiov, -- BdrvRequestFlags flags) -+/* To be called between exactly one pair of blk_inc/dec_in_flight() */ -+static int coroutine_fn -+blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes, -+ QEMUIOVector *qiov, BdrvRequestFlags flags) - { - int ret; - BlockDriverState *bs; -@@ -1178,10 +1179,24 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, - return ret; - } - --int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, -- unsigned int bytes, -- QEMUIOVector *qiov, size_t qiov_offset, -- BdrvRequestFlags flags) -+int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, -+ unsigned int bytes, QEMUIOVector *qiov, -+ BdrvRequestFlags flags) -+{ -+ int ret; -+ -+ blk_inc_in_flight(blk); -+ ret = blk_do_preadv(blk, offset, bytes, qiov, flags); -+ blk_dec_in_flight(blk); -+ -+ return ret; -+} -+ -+/* To be called between exactly one pair of blk_inc/dec_in_flight() */ -+static int coroutine_fn -+blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes, -+ QEMUIOVector *qiov, size_t qiov_offset, -+ BdrvRequestFlags flags) - { - int ret; - BlockDriverState *bs; -@@ -1214,6 +1229,20 @@ int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, - return ret; - } - -+int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, -+ unsigned int bytes, -+ QEMUIOVector *qiov, size_t qiov_offset, -+ BdrvRequestFlags flags) -+{ -+ int ret; -+ -+ blk_inc_in_flight(blk); -+ ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags); -+ blk_dec_in_flight(blk); -+ -+ return ret; -+} -+ - int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, - unsigned int bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags) -@@ -1234,7 +1263,7 @@ static void blk_read_entry(void *opaque) - BlkRwCo *rwco = opaque; - QEMUIOVector *qiov = rwco->iobuf; - -- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size, -+ rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, qiov->size, - qiov, rwco->flags); - aio_wait_kick(); - } -@@ -1244,8 +1273,8 @@ static void blk_write_entry(void *opaque) - BlkRwCo *rwco = opaque; - QEMUIOVector *qiov = rwco->iobuf; - -- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size, -- qiov, rwco->flags); -+ rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, qiov->size, -+ qiov, 0, rwco->flags); - aio_wait_kick(); - } - -@@ -1262,6 +1291,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, - .ret = NOT_DONE, - }; - -+ blk_inc_in_flight(blk); - if (qemu_in_coroutine()) { - /* Fast-path if already in coroutine context */ - co_entry(&rwco); -@@ -1270,6 +1300,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, - bdrv_coroutine_enter(blk_bs(blk), co); - BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE); - } -+ blk_dec_in_flight(blk); - - return rwco.ret; - } -@@ -1394,7 +1425,7 @@ static void blk_aio_read_entry(void *opaque) - } - - assert(qiov->size == acb->bytes); -- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes, -+ rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes, - qiov, rwco->flags); - blk_aio_complete(acb); - } -@@ -1412,8 +1443,8 @@ static void blk_aio_write_entry(void *opaque) - } - - assert(!qiov || qiov->size == acb->bytes); -- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes, -- qiov, rwco->flags); -+ rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, -+ qiov, 0, rwco->flags); - blk_aio_complete(acb); - } - -@@ -1498,7 +1529,9 @@ void blk_aio_cancel_async(BlockAIOCB *acb) - bdrv_aio_cancel_async(acb); - } - --int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf) -+/* To be called between exactly one pair of blk_inc/dec_in_flight() */ -+static int coroutine_fn -+blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf) - { - blk_wait_while_drained(blk); - -@@ -1514,8 +1547,7 @@ static void blk_ioctl_entry(void *opaque) - BlkRwCo *rwco = opaque; - QEMUIOVector *qiov = rwco->iobuf; - -- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, -- qiov->iov[0].iov_base); -+ rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, qiov->iov[0].iov_base); - aio_wait_kick(); - } - -@@ -1529,7 +1561,7 @@ static void blk_aio_ioctl_entry(void *opaque) - BlkAioEmAIOCB *acb = opaque; - BlkRwCo *rwco = &acb->rwco; - -- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf); -+ rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf); - - blk_aio_complete(acb); - } -@@ -1540,7 +1572,9 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, - return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); - } - --int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) -+/* To be called between exactly one pair of blk_inc/dec_in_flight() */ -+static int coroutine_fn -+blk_do_pdiscard(BlockBackend *blk, int64_t offset, int bytes) - { - int ret; - -@@ -1559,7 +1593,7 @@ static void blk_aio_pdiscard_entry(void *opaque) - BlkAioEmAIOCB *acb = opaque; - BlkRwCo *rwco = &acb->rwco; - -- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes); -+ rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, acb->bytes); - blk_aio_complete(acb); - } - -@@ -1571,12 +1605,23 @@ BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, - cb, opaque); - } - -+int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) -+{ -+ int ret; -+ -+ blk_inc_in_flight(blk); -+ ret = blk_do_pdiscard(blk, offset, bytes); -+ blk_dec_in_flight(blk); -+ -+ return ret; -+} -+ - static void blk_pdiscard_entry(void *opaque) - { - BlkRwCo *rwco = opaque; - QEMUIOVector *qiov = rwco->iobuf; - -- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); -+ rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, qiov->size); - aio_wait_kick(); - } - -@@ -1585,7 +1630,8 @@ int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) - return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); - } - --int blk_co_flush(BlockBackend *blk) -+/* To be called between exactly one pair of blk_inc/dec_in_flight() */ -+static int coroutine_fn blk_do_flush(BlockBackend *blk) - { - blk_wait_while_drained(blk); - -@@ -1601,7 +1647,7 @@ static void blk_aio_flush_entry(void *opaque) - BlkAioEmAIOCB *acb = opaque; - BlkRwCo *rwco = &acb->rwco; - -- rwco->ret = blk_co_flush(rwco->blk); -+ rwco->ret = blk_do_flush(rwco->blk); - blk_aio_complete(acb); - } - -@@ -1611,10 +1657,21 @@ BlockAIOCB *blk_aio_flush(BlockBackend *blk, - return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); - } - -+int coroutine_fn blk_co_flush(BlockBackend *blk) -+{ -+ int ret; -+ -+ blk_inc_in_flight(blk); -+ ret = blk_do_flush(blk); -+ blk_dec_in_flight(blk); -+ -+ return ret; -+} -+ - static void blk_flush_entry(void *opaque) - { - BlkRwCo *rwco = opaque; -- rwco->ret = blk_co_flush(rwco->blk); -+ rwco->ret = blk_do_flush(rwco->blk); - aio_wait_kick(); - } - -diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h -index b198dec..9bbdbd6 100644 ---- a/include/sysemu/block-backend.h -+++ b/include/sysemu/block-backend.h -@@ -171,7 +171,6 @@ BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int bytes, - BlockCompletionFunc *cb, void *opaque); - void blk_aio_cancel(BlockAIOCB *acb); - void blk_aio_cancel_async(BlockAIOCB *acb); --int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf); - int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf); - BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, - BlockCompletionFunc *cb, void *opaque); --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-Introduce-bdrv_reopen_commit_post-step.patch b/SOURCES/kvm-block-Introduce-bdrv_reopen_commit_post-step.patch deleted file mode 100644 index 72c8986..0000000 --- a/SOURCES/kvm-block-Introduce-bdrv_reopen_commit_post-step.patch +++ /dev/null @@ -1,65 +0,0 @@ -From f7dd953c2d0380cef3c351afb03d68c6fcda1dca Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:28 +0000 -Subject: [PATCH 08/20] block: Introduce 'bdrv_reopen_commit_post' step - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-3-kwolf@redhat.com> -Patchwork-id: 94278 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 02/13] block: Introduce 'bdrv_reopen_commit_post' step -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -From: Peter Krempa - -Add another step in the reopen process where driver can execute code -after permission changes are comitted. - -Signed-off-by: Peter Krempa -Message-Id: -Signed-off-by: Kevin Wolf -(cherry picked from commit 17e1e2be5f9e84e0298e28e70675655b43e225ea) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 9 +++++++++ - include/block/block_int.h | 1 + - 2 files changed, 10 insertions(+) - -diff --git a/block.c b/block.c -index e1a4e38..a744bb5 100644 ---- a/block.c -+++ b/block.c -@@ -3657,6 +3657,15 @@ cleanup_perm: - } - } - } -+ -+ if (ret == 0) { -+ QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { -+ BlockDriverState *bs = bs_entry->state.bs; -+ -+ if (bs->drv->bdrv_reopen_commit_post) -+ bs->drv->bdrv_reopen_commit_post(&bs_entry->state); -+ } -+ } - cleanup: - QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { - if (ret) { -diff --git a/include/block/block_int.h b/include/block/block_int.h -index dd033d0..c168690 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -123,6 +123,7 @@ struct BlockDriver { - int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, - BlockReopenQueue *queue, Error **errp); - void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); -+ void (*bdrv_reopen_commit_post)(BDRVReopenState *reopen_state); - void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); - void (*bdrv_join_options)(QDict *options, QDict *old_options); - --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-Make-bdrv_get_cumulative_perm-public.patch b/SOURCES/kvm-block-Make-bdrv_get_cumulative_perm-public.patch deleted file mode 100644 index 2f0f999..0000000 --- a/SOURCES/kvm-block-Make-bdrv_get_cumulative_perm-public.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 294ab4c4963295556d12ac15150b48c8536175a7 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:33 +0000 -Subject: [PATCH 13/20] block: Make bdrv_get_cumulative_perm() public - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-8-kwolf@redhat.com> -Patchwork-id: 94287 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 07/13] block: Make bdrv_get_cumulative_perm() public -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -Signed-off-by: Kevin Wolf -Message-Id: <20200310113831.27293-2-kwolf@redhat.com> -Reviewed-by: Peter Krempa -Signed-off-by: Kevin Wolf -(cherry picked from commit c7a0f2be8f95b220cdadbba9a9236eaf115951dc) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 6 ++---- - include/block/block_int.h | 3 +++ - 2 files changed, 5 insertions(+), 4 deletions(-) - -diff --git a/block.c b/block.c -index 39e4647..354d388 100644 ---- a/block.c -+++ b/block.c -@@ -1850,8 +1850,6 @@ static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, - bool *tighten_restrictions, Error **errp); - static void bdrv_child_abort_perm_update(BdrvChild *c); - static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared); --static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, -- uint64_t *shared_perm); - - typedef struct BlockReopenQueueEntry { - bool prepared; -@@ -2075,8 +2073,8 @@ static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms, - } - } - --static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, -- uint64_t *shared_perm) -+void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, -+ uint64_t *shared_perm) - { - BdrvChild *c; - uint64_t cumulative_perms = 0; -diff --git a/include/block/block_int.h b/include/block/block_int.h -index c168690..96e327b 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -1228,6 +1228,9 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, - void *opaque, Error **errp); - void bdrv_root_unref_child(BdrvChild *child); - -+void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, -+ uint64_t *shared_perm); -+ - /** - * Sets a BdrvChild's permissions. Avoid if the parent is a BDS; use - * bdrv_child_refresh_perms() instead and make the parent's --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-Make-it-easier-to-learn-which-BDS-support-bitm.patch b/SOURCES/kvm-block-Make-it-easier-to-learn-which-BDS-support-bitm.patch deleted file mode 100644 index 0d4a000..0000000 --- a/SOURCES/kvm-block-Make-it-easier-to-learn-which-BDS-support-bitm.patch +++ /dev/null @@ -1,145 +0,0 @@ -From 41d6c207c482093df8669f7cdcdb49bb25dba741 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Tue, 2 Jun 2020 02:34:12 +0100 -Subject: [PATCH 07/26] block: Make it easier to learn which BDS support - bitmaps -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Blake -Message-id: <20200602023420.2133649-5-eblake@redhat.com> -Patchwork-id: 97071 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 04/12] block: Make it easier to learn which BDS support bitmaps -Bugzilla: 1779893 1779904 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Upcoming patches will enhance bitmap support in qemu-img, but in doing -so, it turns out to be nice to suppress output when persistent bitmaps -make no sense (such as on a qcow2 v2 image). Add a hook to make this -easier to query. - -This patch adds a new callback .bdrv_supports_persistent_dirty_bitmap, -rather than trying to shoehorn the answer in via existing callbacks. -In particular, while it might have been possible to overload -.bdrv_co_can_store_new_dirty_bitmap to special-case a NULL input to -answer whether any persistent bitmaps are supported, that is at odds -with whether a particular bitmap can be stored (for example, even on -an image that supports persistent bitmaps but has currently filled up -the maximum number of bitmaps, attempts to store another one should -fail); and the new functionality doesn't require coroutine safety. -Similarly, we could have added one more piece of information to -.bdrv_get_info, but then again, most callers to that function tend to -already discard extraneous information, and making it a catch-all -rather than a series of dedicated scalar queries hasn't really -simplified life. - -In the future, when we improve the ability to look up bitmaps through -a filter, we will probably also want to teach the block layer to -automatically let filters pass this request on through. - -Signed-off-by: Eric Blake -Message-Id: <20200513011648.166876-4-eblake@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit ef893b5c84f3199d777e33966dc28839f71b1a5c) -Signed-off-by: Eric Blake -Signed-off-by: Danilo C. L. de Paula ---- - block/dirty-bitmap.c | 9 +++++++++ - block/qcow2-bitmap.c | 7 +++++++ - block/qcow2.c | 2 ++ - block/qcow2.h | 1 + - include/block/block_int.h | 1 + - include/block/dirty-bitmap.h | 1 + - 6 files changed, 21 insertions(+) - -diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c -index 7039e82..2f96acc 100644 ---- a/block/dirty-bitmap.c -+++ b/block/dirty-bitmap.c -@@ -478,6 +478,15 @@ int bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name, - } - } - -+bool -+bdrv_supports_persistent_dirty_bitmap(BlockDriverState *bs) -+{ -+ if (bs->drv && bs->drv->bdrv_supports_persistent_dirty_bitmap) { -+ return bs->drv->bdrv_supports_persistent_dirty_bitmap(bs); -+ } -+ return false; -+} -+ - static bool coroutine_fn - bdrv_co_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, - uint32_t granularity, Error **errp) -diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c -index c6c8ebb..cbac905 100644 ---- a/block/qcow2-bitmap.c -+++ b/block/qcow2-bitmap.c -@@ -1759,3 +1759,10 @@ fail: - name, bdrv_get_device_or_node_name(bs)); - return false; - } -+ -+bool qcow2_supports_persistent_dirty_bitmap(BlockDriverState *bs) -+{ -+ BDRVQcow2State *s = bs->opaque; -+ -+ return s->qcow_version >= 3; -+} -diff --git a/block/qcow2.c b/block/qcow2.c -index af0ad4a..36b0f7d 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -5551,6 +5551,8 @@ BlockDriver bdrv_qcow2 = { - .bdrv_detach_aio_context = qcow2_detach_aio_context, - .bdrv_attach_aio_context = qcow2_attach_aio_context, - -+ .bdrv_supports_persistent_dirty_bitmap = -+ qcow2_supports_persistent_dirty_bitmap, - .bdrv_co_can_store_new_dirty_bitmap = qcow2_co_can_store_new_dirty_bitmap, - .bdrv_co_remove_persistent_dirty_bitmap = - qcow2_co_remove_persistent_dirty_bitmap, -diff --git a/block/qcow2.h b/block/qcow2.h -index 0942126..ceb1ceb 100644 ---- a/block/qcow2.h -+++ b/block/qcow2.h -@@ -767,6 +767,7 @@ bool qcow2_co_can_store_new_dirty_bitmap(BlockDriverState *bs, - int qcow2_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, - const char *name, - Error **errp); -+bool qcow2_supports_persistent_dirty_bitmap(BlockDriverState *bs); - - ssize_t coroutine_fn - qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size, -diff --git a/include/block/block_int.h b/include/block/block_int.h -index 562dca1..cc18e8d 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -568,6 +568,7 @@ struct BlockDriver { - uint64_t parent_perm, uint64_t parent_shared, - uint64_t *nperm, uint64_t *nshared); - -+ bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs); - bool (*bdrv_co_can_store_new_dirty_bitmap)(BlockDriverState *bs, - const char *name, - uint32_t granularity, -diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h -index e2b20ec..f6e9a38 100644 ---- a/include/block/dirty-bitmap.h -+++ b/include/block/dirty-bitmap.h -@@ -16,6 +16,7 @@ typedef enum BitmapCheckFlags { - - #define BDRV_BITMAP_MAX_NAME_SIZE 1023 - -+bool bdrv_supports_persistent_dirty_bitmap(BlockDriverState *bs); - BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, - uint32_t granularity, - const char *name, --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-Relax-restrictions-for-blockdev-snapshot.patch b/SOURCES/kvm-block-Relax-restrictions-for-blockdev-snapshot.patch deleted file mode 100644 index de85205..0000000 --- a/SOURCES/kvm-block-Relax-restrictions-for-blockdev-snapshot.patch +++ /dev/null @@ -1,117 +0,0 @@ -From 9ba321e18a357c1a3a238ceee301bbb174f96eee Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:34 +0000 -Subject: [PATCH 14/20] block: Relax restrictions for blockdev-snapshot - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-9-kwolf@redhat.com> -Patchwork-id: 94285 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 08/13] block: Relax restrictions for blockdev-snapshot -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -blockdev-snapshot returned an error if the overlay was already in use, -which it defined as having any BlockBackend parent. This is in fact both -too strict (some parents can tolerate the change of visible data caused -by attaching a backing file) and too loose (some non-BlockBackend -parents may not be happy with it). - -One important use case that is prevented by the too strict check is live -storage migration with blockdev-mirror. Here, the target node is -usually opened without a backing file so that the active layer is -mirrored while its backing chain can be copied in the background. - -The backing chain should be attached to the mirror target node when -finalising the job, just before switching the users of the source node -to the new copy (at which point the mirror job still has a reference to -the node). drive-mirror did this automatically, but with blockdev-mirror -this is the job of the QMP client, so it needs a way to do this. - -blockdev-snapshot is the obvious way, so this patch makes it work in -this scenario. The new condition is that no parent uses CONSISTENT_READ -permissions. This will ensure that the operation will still be blocked -when the node is attached to the guest device, so blockdev-snapshot -remains safe. - -(For the sake of completeness, x-blockdev-reopen can be used to achieve -the same, however it is a big hammer, performs the graph change -completely unchecked and is still experimental. So even with the option -of using x-blockdev-reopen, there are reasons why blockdev-snapshot -should be able to perform this operation.) - -Signed-off-by: Kevin Wolf -Message-Id: <20200310113831.27293-3-kwolf@redhat.com> -Reviewed-by: Peter Krempa -Tested-by: Peter Krempa -Signed-off-by: Kevin Wolf -(cherry picked from commit d29d3d1f80b3947fb26e7139645c83de66d146a9) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 14 ++++++++------ - tests/qemu-iotests/085.out | 4 ++-- - 2 files changed, 10 insertions(+), 8 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 4cd9a58..7918533 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1536,6 +1536,7 @@ static void external_snapshot_prepare(BlkActionState *common, - TransactionAction *action = common->action; - AioContext *aio_context; - AioContext *old_context; -+ uint64_t perm, shared; - int ret; - - /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar -@@ -1656,16 +1657,17 @@ static void external_snapshot_prepare(BlkActionState *common, - goto out; - } - -- if (bdrv_has_blk(state->new_bs)) { -+ /* -+ * Allow attaching a backing file to an overlay that's already in use only -+ * if the parents don't assume that they are already seeing a valid image. -+ * (Specifically, allow it as a mirror target, which is write-only access.) -+ */ -+ bdrv_get_cumulative_perm(state->new_bs, &perm, &shared); -+ if (perm & BLK_PERM_CONSISTENT_READ) { - error_setg(errp, "The overlay is already in use"); - goto out; - } - -- if (bdrv_op_is_blocked(state->new_bs, BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, -- errp)) { -- goto out; -- } -- - if (state->new_bs->backing != NULL) { - error_setg(errp, "The overlay already has a backing image"); - goto out; -diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out -index bb50227..487d920 100644 ---- a/tests/qemu-iotests/085.out -+++ b/tests/qemu-iotests/085.out -@@ -82,7 +82,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ - === Invalid command - cannot create a snapshot using a file BDS === - - { 'execute': 'blockdev-snapshot', 'arguments': { 'node':'virtio0', 'overlay':'file_12' } } --{"error": {"class": "GenericError", "desc": "The overlay does not support backing images"}} -+{"error": {"class": "GenericError", "desc": "The overlay is already in use"}} - - === Invalid command - snapshot node used as active layer === - -@@ -96,7 +96,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ - === Invalid command - snapshot node used as backing hd === - - { 'execute': 'blockdev-snapshot', 'arguments': { 'node': 'virtio0', 'overlay':'snap_11' } } --{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'snap_12'"}} -+{"error": {"class": "GenericError", "desc": "The overlay is already in use"}} - - === Invalid command - snapshot node has a backing image === - --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-Require-aligned-image-size-to-avoid-assertion-.patch b/SOURCES/kvm-block-Require-aligned-image-size-to-avoid-assertion-.patch deleted file mode 100644 index 1188911..0000000 --- a/SOURCES/kvm-block-Require-aligned-image-size-to-avoid-assertion-.patch +++ /dev/null @@ -1,77 +0,0 @@ -From e191ab6358b656764374ff1b3c7224a744dc902a Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Tue, 26 Jan 2021 17:21:02 -0500 -Subject: [PATCH 7/9] block: Require aligned image size to avoid assertion - failure - -RH-Author: Kevin Wolf -Message-id: <20210126172103.136060-2-kwolf@redhat.com> -Patchwork-id: 100786 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/2] block: Require aligned image size to avoid assertion failure -Bugzilla: 1834281 -RH-Acked-by: Markus Armbruster -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz - -Unaligned requests will automatically be aligned to bl.request_alignment -and we can't extend write requests to access space beyond the end of the -image without resizing the image, so if we have the WRITE permission, -but not the RESIZE one, it's required that the image size is aligned. - -Failing to meet this requirement could cause assertion failures like -this if RESIZE permissions weren't requested: - -qemu-img: block/io.c:1910: bdrv_co_write_req_prepare: Assertion `end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE' failed. - -This was e.g. triggered by qemu-img converting to a target image with 4k -request alignment when the image was only aligned to 512 bytes, but not -to 4k. - -Turn this into a graceful error in bdrv_check_perm() so that WRITE -without RESIZE can only be taken if the image size is aligned. If a user -holds both permissions and drops only RESIZE, the function will return -an error, but bdrv_child_try_set_perm() will ignore the failure silently -if permissions are only requested to be relaxed and just keep both -permissions while returning success. - -Signed-off-by: Kevin Wolf -Message-Id: <20200716142601.111237-2-kwolf@redhat.com> -Reviewed-by: Max Reitz -Signed-off-by: Kevin Wolf -(cherry picked from commit 9c60a5d1978e6dcf85c0e01b50e6f7f54ca09104) -Signed-off-by: Kevin Wolf -Signed-off-by: Jon Maloy ---- - block.c | 16 ++++++++++++++++ - 1 file changed, 16 insertions(+) - -diff --git a/block.c b/block.c -index 57740d312e..e9579ddf84 100644 ---- a/block.c -+++ b/block.c -@@ -2009,6 +2009,22 @@ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q, - return -EPERM; - } - -+ /* -+ * Unaligned requests will automatically be aligned to bl.request_alignment -+ * and without RESIZE we can't extend requests to write to space beyond the -+ * end of the image, so it's required that the image size is aligned. -+ */ -+ if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && -+ !(cumulative_perms & BLK_PERM_RESIZE)) -+ { -+ if ((bs->total_sectors * BDRV_SECTOR_SIZE) % bs->bl.request_alignment) { -+ error_setg(errp, "Cannot get 'write' permission without 'resize': " -+ "Image size is not a multiple of request " -+ "alignment"); -+ return -EPERM; -+ } -+ } -+ - /* Check this node */ - if (!drv) { - return 0; --- -2.18.2 - diff --git a/SOURCES/kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch b/SOURCES/kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch deleted file mode 100644 index ea796d5..0000000 --- a/SOURCES/kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 371d312300251c0dc24522607b06b7e47e760b53 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:32 +0000 -Subject: [PATCH 12/20] block: Versioned x-blockdev-reopen API with feature - flag - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-7-kwolf@redhat.com> -Patchwork-id: 94283 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 06/13] block: Versioned x-blockdev-reopen API with feature flag -Bugzilla: 1790482 1805143 -RH-Acked-by: Eric Blake -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -x-blockdev-reopen is still considered unstable upstream. libvirt needs -(a small subset of) it for incremental backups, though. - -Add a downstream-only feature flag that effectively makes this a -versioned interface. As long as the feature is present, we promise that -we won't change the interface incompatibly. Incompatible changes to the -command will require us to drop the feature flag (and possibly introduce -a new one if the new version is still not stable upstream). - -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - qapi/block-core.json | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/qapi/block-core.json b/qapi/block-core.json -index 0cf68fe..a1e85b0 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -4202,10 +4202,17 @@ - # image does not have a default backing file name as part of its - # metadata. - # -+# Features: -+# @__com.redhat_rhel-av-8_2_0-api: Versioning the downstream interface while -+# it's still unstable upstream. As long as -+# this flag is present, this command will not -+# change incompatibly. -+# - # Since: 4.0 - ## - { 'command': 'x-blockdev-reopen', -- 'data': 'BlockdevOptions', 'boxed': true } -+ 'data': 'BlockdevOptions', 'boxed': true, -+ 'features': [ '__com.redhat_rhel-av-8_2_0-api' ] } - - ## - # @blockdev-del: --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-always-fill-entire-LUKS-header-space-with-zero.patch b/SOURCES/kvm-block-always-fill-entire-LUKS-header-space-with-zero.patch deleted file mode 100644 index d1511d2..0000000 --- a/SOURCES/kvm-block-always-fill-entire-LUKS-header-space-with-zero.patch +++ /dev/null @@ -1,308 +0,0 @@ -From 67f36d057aa71ca56ebc17ef28a7cb70bac6c6b6 Mon Sep 17 00:00:00 2001 -From: "Daniel P. Berrange" -Date: Tue, 5 May 2020 16:46:01 +0100 -Subject: [PATCH 01/12] block: always fill entire LUKS header space with zeros -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Daniel P. Berrange -Message-id: <20200505164601.1059974-2-berrange@redhat.com> -Patchwork-id: 96277 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/1] block: always fill entire LUKS header space with zeros -Bugzilla: 1775462 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: John Snow -RH-Acked-by: Stefan Hajnoczi - -When initializing the LUKS header the size with default encryption -parameters will currently be 2068480 bytes. This is rounded up to -a multiple of the cluster size, 2081792, with 64k sectors. If the -end of the header is not the same as the end of the cluster we fill -the extra space with zeros. This was forgetting that not even the -space allocated for the header will be fully initialized, as we -only write key material for the first key slot. The space left -for the other 7 slots is never written to. - -An optimization to the ref count checking code: - - commit a5fff8d4b4d928311a5005efa12d0991fe3b66f9 (refs/bisect/bad) - Author: Vladimir Sementsov-Ogievskiy - Date: Wed Feb 27 16:14:30 2019 +0300 - - qcow2-refcount: avoid eating RAM - -made the assumption that every cluster which was allocated would -have at least some data written to it. This was violated by way -the LUKS header is only partially written, with much space simply -reserved for future use. - -Depending on the cluster size this problem was masked by the -logic which wrote zeros between the end of the LUKS header and -the end of the cluster. - -$ qemu-img create --object secret,id=cluster_encrypt0,data=123456 \ - -f qcow2 -o cluster_size=2k,encrypt.iter-time=1,\ - encrypt.format=luks,encrypt.key-secret=cluster_encrypt0 \ - cluster_size_check.qcow2 100M - Formatting 'cluster_size_check.qcow2', fmt=qcow2 size=104857600 - encrypt.format=luks encrypt.key-secret=cluster_encrypt0 - encrypt.iter-time=1 cluster_size=2048 lazy_refcounts=off refcount_bits=16 - -$ qemu-img check --object secret,id=cluster_encrypt0,data=redhat \ - 'json:{"driver": "qcow2", "encrypt.format": "luks", \ - "encrypt.key-secret": "cluster_encrypt0", \ - "file.driver": "file", "file.filename": "cluster_size_check.qcow2"}' -ERROR: counting reference for region exceeding the end of the file by one cluster or more: offset 0x2000 size 0x1f9000 -Leaked cluster 4 refcount=1 reference=0 -...snip... -Leaked cluster 130 refcount=1 reference=0 - -1 errors were found on the image. -Data may be corrupted, or further writes to the image may corrupt it. - -127 leaked clusters were found on the image. -This means waste of disk space, but no harm to data. -Image end offset: 268288 - -The problem only exists when the disk image is entirely empty. Writing -data to the disk image payload will solve the problem by causing the -end of the file to be extended further. - -The change fixes it by ensuring that the entire allocated LUKS header -region is fully initialized with zeros. The qemu-img check will still -fail for any pre-existing disk images created prior to this change, -unless at least 1 byte of the payload is written to. - -Fully writing zeros to the entire LUKS header is a good idea regardless -as it ensures that space has been allocated on the host filesystem (or -whatever block storage backend is used). - -Signed-off-by: Daniel P. Berrangé -Message-Id: <20200207135520.2669430-1-berrange@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Max Reitz -(cherry picked from commit 087ab8e775f48766068e65de1bc99d03b40d1670) -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - tests/qemu-iotests/group: no test 283 in downstream - -Signed-off-by: Danilo C. L. de Paula ---- - block/qcow2.c | 11 ++++-- - tests/qemu-iotests/284 | 97 ++++++++++++++++++++++++++++++++++++++++++++++ - tests/qemu-iotests/284.out | 62 +++++++++++++++++++++++++++++ - tests/qemu-iotests/group | 1 + - 4 files changed, 167 insertions(+), 4 deletions(-) - create mode 100755 tests/qemu-iotests/284 - create mode 100644 tests/qemu-iotests/284.out - -diff --git a/block/qcow2.c b/block/qcow2.c -index 71067c6..af0ad4a 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -135,13 +135,16 @@ static ssize_t qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, - s->crypto_header.length = headerlen; - s->crypto_header.offset = ret; - -- /* Zero fill remaining space in cluster so it has predictable -- * content in case of future spec changes */ -+ /* -+ * Zero fill all space in cluster so it has predictable -+ * content, as we may not initialize some regions of the -+ * header (eg only 1 out of 8 key slots will be initialized) -+ */ - clusterlen = size_to_clusters(s, headerlen) * s->cluster_size; - assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen, false) == 0); - ret = bdrv_pwrite_zeroes(bs->file, -- ret + headerlen, -- clusterlen - headerlen, 0); -+ ret, -+ clusterlen, 0); - if (ret < 0) { - error_setg_errno(errp, -ret, "Could not zero fill encryption header"); - return -1; -diff --git a/tests/qemu-iotests/284 b/tests/qemu-iotests/284 -new file mode 100755 -index 0000000..071e89b ---- /dev/null -+++ b/tests/qemu-iotests/284 -@@ -0,0 +1,97 @@ -+#!/usr/bin/env bash -+# -+# Test ref count checks on encrypted images -+# -+# Copyright (C) 2019 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+# creator -+owner=berrange@redhat.com -+ -+seq=`basename $0` -+echo "QA output created by $seq" -+ -+status=1 # failure is the default! -+ -+_cleanup() -+{ -+ _cleanup_test_img -+} -+trap "_cleanup; exit \$status" 0 1 2 3 15 -+ -+# get standard environment, filters and checks -+. ./common.rc -+. ./common.filter -+ -+_supported_fmt qcow2 -+_supported_proto generic -+_supported_os Linux -+ -+ -+size=1M -+ -+SECRET="secret,id=sec0,data=astrochicken" -+ -+IMGSPEC="driver=$IMGFMT,file.filename=$TEST_IMG,encrypt.key-secret=sec0" -+QEMU_IO_OPTIONS=$QEMU_IO_OPTIONS_NO_FMT -+ -+_run_test() -+{ -+ IMGOPTSSYNTAX=true -+ OLD_TEST_IMG="$TEST_IMG" -+ TEST_IMG="driver=$IMGFMT,file.filename=$TEST_IMG,encrypt.key-secret=sec0" -+ QEMU_IMG_EXTRA_ARGS="--image-opts --object $SECRET" -+ -+ echo -+ echo "== cluster size $csize" -+ echo "== checking image refcounts ==" -+ _check_test_img -+ -+ echo -+ echo "== writing some data ==" -+ $QEMU_IO -c "write -P 0x9 0 1" $QEMU_IMG_EXTRA_ARGS $TEST_IMG | _filter_qemu_io | _filter_testdir -+ echo -+ echo "== rechecking image refcounts ==" -+ _check_test_img -+ -+ echo -+ echo "== writing some more data ==" -+ $QEMU_IO -c "write -P 0x9 $csize 1" $QEMU_IMG_EXTRA_ARGS $TEST_IMG | _filter_qemu_io | _filter_testdir -+ echo -+ echo "== rechecking image refcounts ==" -+ _check_test_img -+ -+ TEST_IMG="$OLD_TEST_IMG" -+ QEMU_IMG_EXTRA_ARGS= -+ IMGOPTSSYNTAX= -+} -+ -+ -+echo -+echo "testing LUKS qcow2 encryption" -+echo -+ -+for csize in 512 2048 32768 -+do -+ _make_test_img --object $SECRET -o "encrypt.format=luks,encrypt.key-secret=sec0,encrypt.iter-time=10,cluster_size=$csize" $size -+ _run_test -+ _cleanup_test_img -+done -+ -+# success, all done -+echo "*** done" -+rm -f $seq.full -+status=0 -diff --git a/tests/qemu-iotests/284.out b/tests/qemu-iotests/284.out -new file mode 100644 -index 0000000..48216f5 ---- /dev/null -+++ b/tests/qemu-iotests/284.out -@@ -0,0 +1,62 @@ -+QA output created by 284 -+ -+testing LUKS qcow2 encryption -+ -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 encrypt.format=luks encrypt.key-secret=sec0 encrypt.iter-time=10 -+ -+== cluster size 512 -+== checking image refcounts == -+No errors were found on the image. -+ -+== writing some data == -+wrote 1/1 bytes at offset 0 -+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+== rechecking image refcounts == -+No errors were found on the image. -+ -+== writing some more data == -+wrote 1/1 bytes at offset 512 -+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+== rechecking image refcounts == -+No errors were found on the image. -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 encrypt.format=luks encrypt.key-secret=sec0 encrypt.iter-time=10 -+ -+== cluster size 2048 -+== checking image refcounts == -+No errors were found on the image. -+ -+== writing some data == -+wrote 1/1 bytes at offset 0 -+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+== rechecking image refcounts == -+No errors were found on the image. -+ -+== writing some more data == -+wrote 1/1 bytes at offset 2048 -+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+== rechecking image refcounts == -+No errors were found on the image. -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 encrypt.format=luks encrypt.key-secret=sec0 encrypt.iter-time=10 -+ -+== cluster size 32768 -+== checking image refcounts == -+No errors were found on the image. -+ -+== writing some data == -+wrote 1/1 bytes at offset 0 -+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+== rechecking image refcounts == -+No errors were found on the image. -+ -+== writing some more data == -+wrote 1/1 bytes at offset 32768 -+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+== rechecking image refcounts == -+No errors were found on the image. -+*** done -diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index e47cbfc..9c565cf 100644 ---- a/tests/qemu-iotests/group -+++ b/tests/qemu-iotests/group -@@ -289,3 +289,4 @@ - 277 rw quick - 280 rw migration quick - 281 rw quick -+284 rw --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-backend-Add-flags-to-blk_truncate.patch b/SOURCES/kvm-block-backend-Add-flags-to-blk_truncate.patch deleted file mode 100644 index 5b212fc..0000000 --- a/SOURCES/kvm-block-backend-Add-flags-to-blk_truncate.patch +++ /dev/null @@ -1,294 +0,0 @@ -From 07a93e74efa4861f54dd3d4bec01885f7af2fee3 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 8 Jun 2020 17:01:32 +0200 -Subject: [PATCH 04/17] block-backend: Add flags to blk_truncate() - -RH-Author: Kevin Wolf -Message-id: <20200608150140.38218-4-kwolf@redhat.com> -Patchwork-id: 97450 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 03/11] block-backend: Add flags to blk_truncate() -Bugzilla: 1780574 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz - -Now that node level interface bdrv_truncate() supports passing request -flags to the block driver, expose this on the BlockBackend level, too. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Alberto Garcia -Reviewed-by: Max Reitz -Message-Id: <20200424125448.63318-4-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 8c6242b6f383e43fd11d2c50f8bcdd2bba1100fc) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 3 ++- - block/block-backend.c | 4 ++-- - block/commit.c | 4 ++-- - block/crypto.c | 2 +- - block/mirror.c | 2 +- - block/qcow2.c | 4 ++-- - block/qed.c | 2 +- - block/vdi.c | 2 +- - block/vhdx.c | 4 ++-- - block/vmdk.c | 6 +++--- - block/vpc.c | 2 +- - blockdev.c | 2 +- - include/sysemu/block-backend.h | 2 +- - qemu-img.c | 2 +- - qemu-io-cmds.c | 2 +- - 15 files changed, 22 insertions(+), 21 deletions(-) - -diff --git a/block.c b/block.c -index d6a05da..12c8941 100644 ---- a/block.c -+++ b/block.c -@@ -547,7 +547,8 @@ static int64_t create_file_fallback_truncate(BlockBackend *blk, - int64_t size; - int ret; - -- ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, &local_err); -+ ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0, -+ &local_err); - if (ret < 0 && ret != -ENOTSUP) { - error_propagate(errp, local_err); - return ret; -diff --git a/block/block-backend.c b/block/block-backend.c -index 8be2006..17ed6d8 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -2137,14 +2137,14 @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, - } - - int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, -- PreallocMode prealloc, Error **errp) -+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp) - { - if (!blk_is_available(blk)) { - error_setg(errp, "No medium inserted"); - return -ENOMEDIUM; - } - -- return bdrv_truncate(blk->root, offset, exact, prealloc, 0, errp); -+ return bdrv_truncate(blk->root, offset, exact, prealloc, flags, errp); - } - - int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, -diff --git a/block/commit.c b/block/commit.c -index 23c90b3..075ebf8 100644 ---- a/block/commit.c -+++ b/block/commit.c -@@ -155,7 +155,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp) - } - - if (base_len < len) { -- ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, NULL); -+ ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, 0, NULL); - if (ret) { - goto out; - } -@@ -471,7 +471,7 @@ int bdrv_commit(BlockDriverState *bs) - * grow the backing file image if possible. If not possible, - * we must return an error */ - if (length > backing_length) { -- ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF, -+ ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF, 0, - &local_err); - if (ret < 0) { - error_report_err(local_err); -diff --git a/block/crypto.c b/block/crypto.c -index fcb4a97..83a8fc0 100644 ---- a/block/crypto.c -+++ b/block/crypto.c -@@ -115,7 +115,7 @@ static ssize_t block_crypto_init_func(QCryptoBlock *block, - * which will be used by the crypto header - */ - return blk_truncate(data->blk, data->size + headerlen, false, -- data->prealloc, errp); -+ data->prealloc, 0, errp); - } - - -diff --git a/block/mirror.c b/block/mirror.c -index 0d32fca..c8028cd 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -886,7 +886,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) - if (s->base == blk_bs(s->target)) { - if (s->bdev_length > target_length) { - ret = blk_truncate(s->target, s->bdev_length, false, -- PREALLOC_MODE_OFF, NULL); -+ PREALLOC_MODE_OFF, 0, NULL); - if (ret < 0) { - goto immediate_exit; - } -diff --git a/block/qcow2.c b/block/qcow2.c -index c0fdcb9..86aa74a 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -3497,7 +3497,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) - - /* Okay, now that we have a valid image, let's give it the right size */ - ret = blk_truncate(blk, qcow2_opts->size, false, qcow2_opts->preallocation, -- errp); -+ 0, errp); - if (ret < 0) { - error_prepend(errp, "Could not resize image: "); - goto out; -@@ -5347,7 +5347,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, - * Amending image options should ensure that the image has - * exactly the given new values, so pass exact=true here. - */ -- ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, errp); -+ ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, 0, errp); - blk_unref(blk); - if (ret < 0) { - return ret; -diff --git a/block/qed.c b/block/qed.c -index fb6100b..b0fdb8f 100644 ---- a/block/qed.c -+++ b/block/qed.c -@@ -677,7 +677,7 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts, - * The QED format associates file length with allocation status, - * so a new file (which is empty) must have a length of 0. - */ -- ret = blk_truncate(blk, 0, true, PREALLOC_MODE_OFF, errp); -+ ret = blk_truncate(blk, 0, true, PREALLOC_MODE_OFF, 0, errp); - if (ret < 0) { - goto out; - } -diff --git a/block/vdi.c b/block/vdi.c -index e1a11f2..0c7835a 100644 ---- a/block/vdi.c -+++ b/block/vdi.c -@@ -875,7 +875,7 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, - - if (image_type == VDI_TYPE_STATIC) { - ret = blk_truncate(blk, offset + blocks * block_size, false, -- PREALLOC_MODE_OFF, errp); -+ PREALLOC_MODE_OFF, 0, errp); - if (ret < 0) { - error_prepend(errp, "Failed to statically allocate file"); - goto exit; -diff --git a/block/vhdx.c b/block/vhdx.c -index 5dfbb20..21497f7 100644 ---- a/block/vhdx.c -+++ b/block/vhdx.c -@@ -1703,13 +1703,13 @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s, - /* All zeroes, so we can just extend the file - the end of the BAT - * is the furthest thing we have written yet */ - ret = blk_truncate(blk, data_file_offset, false, PREALLOC_MODE_OFF, -- errp); -+ 0, errp); - if (ret < 0) { - goto exit; - } - } else if (type == VHDX_TYPE_FIXED) { - ret = blk_truncate(blk, data_file_offset + image_size, false, -- PREALLOC_MODE_OFF, errp); -+ PREALLOC_MODE_OFF, 0, errp); - if (ret < 0) { - goto exit; - } -diff --git a/block/vmdk.c b/block/vmdk.c -index 1bbf937..1bd3991 100644 ---- a/block/vmdk.c -+++ b/block/vmdk.c -@@ -2118,7 +2118,7 @@ static int vmdk_init_extent(BlockBackend *blk, - int gd_buf_size; - - if (flat) { -- ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, errp); -+ ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, 0, errp); - goto exit; - } - magic = cpu_to_be32(VMDK4_MAGIC); -@@ -2182,7 +2182,7 @@ static int vmdk_init_extent(BlockBackend *blk, - } - - ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, false, -- PREALLOC_MODE_OFF, errp); -+ PREALLOC_MODE_OFF, 0, errp); - if (ret < 0) { - goto exit; - } -@@ -2523,7 +2523,7 @@ static int coroutine_fn vmdk_co_do_create(int64_t size, - /* bdrv_pwrite write padding zeros to align to sector, we don't need that - * for description file */ - if (desc_offset == 0) { -- ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, errp); -+ ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, 0, errp); - if (ret < 0) { - goto exit; - } -diff --git a/block/vpc.c b/block/vpc.c -index 6df75e2..d5e7dc8 100644 ---- a/block/vpc.c -+++ b/block/vpc.c -@@ -898,7 +898,7 @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf, - /* Add footer to total size */ - total_size += HEADER_SIZE; - -- ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, errp); -+ ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, 0, errp); - if (ret < 0) { - return ret; - } -diff --git a/blockdev.c b/blockdev.c -index 5128c9b..6dde52a 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3055,7 +3055,7 @@ void qmp_block_resize(bool has_device, const char *device, - } - - bdrv_drained_begin(bs); -- ret = blk_truncate(blk, size, false, PREALLOC_MODE_OFF, errp); -+ ret = blk_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp); - bdrv_drained_end(bs); - - out: -diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h -index 9bbdbd6..34de7fa 100644 ---- a/include/sysemu/block-backend.h -+++ b/include/sysemu/block-backend.h -@@ -237,7 +237,7 @@ int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf, - int bytes); - int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, -- PreallocMode prealloc, Error **errp); -+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); - int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes); - int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, - int64_t pos, int size); -diff --git a/qemu-img.c b/qemu-img.c -index 6dc881b..a27ad70 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -3939,7 +3939,7 @@ static int img_resize(int argc, char **argv) - * resizing, so pass @exact=true. It is of no use to report - * success when the image has not actually been resized. - */ -- ret = blk_truncate(blk, total_size, true, prealloc, &err); -+ ret = blk_truncate(blk, total_size, true, prealloc, 0, &err); - if (!ret) { - qprintf(quiet, "Image resized.\n"); - } else { -diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c -index 1b7e700..851f07e 100644 ---- a/qemu-io-cmds.c -+++ b/qemu-io-cmds.c -@@ -1715,7 +1715,7 @@ static int truncate_f(BlockBackend *blk, int argc, char **argv) - * exact=true. It is better to err on the "emit more errors" side - * than to be overly permissive. - */ -- ret = blk_truncate(blk, offset, true, PREALLOC_MODE_OFF, &local_err); -+ ret = blk_truncate(blk, offset, true, PREALLOC_MODE_OFF, 0, &local_err); - if (ret < 0) { - error_report_err(local_err); - return ret; --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch b/SOURCES/kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch deleted file mode 100644 index 9d49cfa..0000000 --- a/SOURCES/kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch +++ /dev/null @@ -1,158 +0,0 @@ -From 6cc456c4c1e6557fdc7e138e8ef8171b71609222 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 8 Apr 2020 17:29:15 +0100 -Subject: [PATCH 4/6] block-backend: Reorder flush/pdiscard function - definitions - -RH-Author: Kevin Wolf -Message-id: <20200408172917.18712-5-kwolf@redhat.com> -Patchwork-id: 94598 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/6] block-backend: Reorder flush/pdiscard function definitions -Bugzilla: 1817621 -RH-Acked-by: Eric Blake -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -Move all variants of the flush/pdiscard functions to a single place and -put the blk_co_*() version first because it is called by all other -variants (and will become static in the next patch). - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Max Reitz -Message-Id: <20200407121259.21350-2-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 564806c529d4e0acad209b1e5b864a8886092f1f) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/block-backend.c | 92 +++++++++++++++++++++++++-------------------------- - 1 file changed, 46 insertions(+), 46 deletions(-) - -diff --git a/block/block-backend.c b/block/block-backend.c -index 8b8f2a8..17b2e87 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -1488,38 +1488,6 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, - blk_aio_write_entry, flags, cb, opaque); - } - --static void blk_aio_flush_entry(void *opaque) --{ -- BlkAioEmAIOCB *acb = opaque; -- BlkRwCo *rwco = &acb->rwco; -- -- rwco->ret = blk_co_flush(rwco->blk); -- blk_aio_complete(acb); --} -- --BlockAIOCB *blk_aio_flush(BlockBackend *blk, -- BlockCompletionFunc *cb, void *opaque) --{ -- return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); --} -- --static void blk_aio_pdiscard_entry(void *opaque) --{ -- BlkAioEmAIOCB *acb = opaque; -- BlkRwCo *rwco = &acb->rwco; -- -- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes); -- blk_aio_complete(acb); --} -- --BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, -- int64_t offset, int bytes, -- BlockCompletionFunc *cb, void *opaque) --{ -- return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0, -- cb, opaque); --} -- - void blk_aio_cancel(BlockAIOCB *acb) - { - bdrv_aio_cancel(acb); -@@ -1586,6 +1554,37 @@ int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) - return bdrv_co_pdiscard(blk->root, offset, bytes); - } - -+static void blk_aio_pdiscard_entry(void *opaque) -+{ -+ BlkAioEmAIOCB *acb = opaque; -+ BlkRwCo *rwco = &acb->rwco; -+ -+ rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes); -+ blk_aio_complete(acb); -+} -+ -+BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, -+ int64_t offset, int bytes, -+ BlockCompletionFunc *cb, void *opaque) -+{ -+ return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0, -+ cb, opaque); -+} -+ -+static void blk_pdiscard_entry(void *opaque) -+{ -+ BlkRwCo *rwco = opaque; -+ QEMUIOVector *qiov = rwco->iobuf; -+ -+ rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); -+ aio_wait_kick(); -+} -+ -+int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) -+{ -+ return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); -+} -+ - int blk_co_flush(BlockBackend *blk) - { - blk_wait_while_drained(blk); -@@ -1597,6 +1596,21 @@ int blk_co_flush(BlockBackend *blk) - return bdrv_co_flush(blk_bs(blk)); - } - -+static void blk_aio_flush_entry(void *opaque) -+{ -+ BlkAioEmAIOCB *acb = opaque; -+ BlkRwCo *rwco = &acb->rwco; -+ -+ rwco->ret = blk_co_flush(rwco->blk); -+ blk_aio_complete(acb); -+} -+ -+BlockAIOCB *blk_aio_flush(BlockBackend *blk, -+ BlockCompletionFunc *cb, void *opaque) -+{ -+ return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); -+} -+ - static void blk_flush_entry(void *opaque) - { - BlkRwCo *rwco = opaque; -@@ -2083,20 +2097,6 @@ int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, - return bdrv_truncate(blk->root, offset, exact, prealloc, errp); - } - --static void blk_pdiscard_entry(void *opaque) --{ -- BlkRwCo *rwco = opaque; -- QEMUIOVector *qiov = rwco->iobuf; -- -- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); -- aio_wait_kick(); --} -- --int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) --{ -- return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); --} -- - int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, - int64_t pos, int size) - { --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch b/SOURCES/kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch new file mode 100644 index 0000000..52d37d8 --- /dev/null +++ b/SOURCES/kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch @@ -0,0 +1,129 @@ +From bf4c15a3debbe68b6eb25c52174843470a9c014f Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 11 Jan 2022 15:36:12 +0000 +Subject: [PATCH 3/6] block-backend: prevent dangling BDS pointers across + aio_poll() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 109: block-backend: prevent dangling BDS pointers across aio_poll() +RH-Commit: [1/2] da5a59eddff0dc10be7de8e291fa675143d11d73 +RH-Bugzilla: 2021778 2036178 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Kevin Wolf + +The BlockBackend root child can change when aio_poll() is invoked. This +happens when a temporary filter node is removed upon blockjob +completion, for example. + +Functions in block/block-backend.c must be aware of this when using a +blk_bs() pointer across aio_poll() because the BlockDriverState refcnt +may reach 0, resulting in a stale pointer. + +One example is scsi_device_purge_requests(), which calls blk_drain() to +wait for in-flight requests to cancel. If the backup blockjob is active, +then the BlockBackend root child is a temporary filter BDS owned by the +blockjob. The blockjob can complete during bdrv_drained_begin() and the +last reference to the BDS is released when the temporary filter node is +removed. This results in a use-after-free when blk_drain() calls +bdrv_drained_end(bs) on the dangling pointer. + +Explicitly hold a reference to bs across block APIs that invoke +aio_poll(). + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2021778 +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2036178 +Signed-off-by: Stefan Hajnoczi +Message-Id: <20220111153613.25453-2-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 1e3552dbd28359d35967b7c28dc86cde1bc29205) +Signed-off-by: Stefan Hajnoczi +--- + block/block-backend.c | 19 +++++++++++++++++-- + 1 file changed, 17 insertions(+), 2 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 12ef80ea17..23e727199b 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -822,16 +822,22 @@ BlockBackend *blk_by_public(BlockBackendPublic *public) + void blk_remove_bs(BlockBackend *blk) + { + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; +- BlockDriverState *bs; + BdrvChild *root; + + notifier_list_notify(&blk->remove_bs_notifiers, blk); + if (tgm->throttle_state) { +- bs = blk_bs(blk); ++ BlockDriverState *bs = blk_bs(blk); ++ ++ /* ++ * Take a ref in case blk_bs() changes across bdrv_drained_begin(), for ++ * example, if a temporary filter node is removed by a blockjob. ++ */ ++ bdrv_ref(bs); + bdrv_drained_begin(bs); + throttle_group_detach_aio_context(tgm); + throttle_group_attach_aio_context(tgm, qemu_get_aio_context()); + bdrv_drained_end(bs); ++ bdrv_unref(bs); + } + + blk_update_root_state(blk); +@@ -1705,6 +1711,7 @@ void blk_drain(BlockBackend *blk) + BlockDriverState *bs = blk_bs(blk); + + if (bs) { ++ bdrv_ref(bs); + bdrv_drained_begin(bs); + } + +@@ -1714,6 +1721,7 @@ void blk_drain(BlockBackend *blk) + + if (bs) { + bdrv_drained_end(bs); ++ bdrv_unref(bs); + } + } + +@@ -2044,10 +2052,13 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, + int ret; + + if (bs) { ++ bdrv_ref(bs); ++ + if (update_root_node) { + ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root, + errp); + if (ret < 0) { ++ bdrv_unref(bs); + return ret; + } + } +@@ -2057,6 +2068,8 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, + throttle_group_attach_aio_context(tgm, new_context); + bdrv_drained_end(bs); + } ++ ++ bdrv_unref(bs); + } + + blk->ctx = new_context; +@@ -2326,11 +2339,13 @@ void blk_io_limits_disable(BlockBackend *blk) + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; + assert(tgm->throttle_state); + if (bs) { ++ bdrv_ref(bs); + bdrv_drained_begin(bs); + } + throttle_group_unregister_tgm(tgm); + if (bs) { + bdrv_drained_end(bs); ++ bdrv_unref(bs); + } + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch b/SOURCES/kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch deleted file mode 100644 index 45f506c..0000000 --- a/SOURCES/kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch +++ /dev/null @@ -1,130 +0,0 @@ -From aefff389c4d11bd69180db7177135c4645a9b1bd Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:46 +0000 -Subject: [PATCH 13/18] block/backup-top: Don't acquire context while dropping - top - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-7-slp@redhat.com> -Patchwork-id: 93759 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 6/9] block/backup-top: Don't acquire context while dropping top -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -All paths that lead to bdrv_backup_top_drop(), except for the call -from backup_clean(), imply that the BDS AioContext has already been -acquired, so doing it there too can potentially lead to QEMU hanging -on AIO_WAIT_WHILE(). - -An easy way to trigger this situation is by issuing a two actions -transaction, with a proper and a bogus blockdev-backup, so the second -one will trigger a rollback. This will trigger a hang with an stack -trace like this one: - - #0 0x00007fb680c75016 in __GI_ppoll (fds=0x55e74580f7c0, nfds=1, timeout=, - timeout@entry=0x0, sigmask=sigmask@entry=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:39 - #1 0x000055e743386e09 in ppoll (__ss=0x0, __timeout=0x0, __nfds=, __fds=) - at /usr/include/bits/poll2.h:77 - #2 0x000055e743386e09 in qemu_poll_ns - (fds=, nfds=, timeout=) at util/qemu-timer.c:336 - #3 0x000055e743388dc4 in aio_poll (ctx=0x55e7458925d0, blocking=blocking@entry=true) - at util/aio-posix.c:669 - #4 0x000055e743305dea in bdrv_flush (bs=bs@entry=0x55e74593c0d0) at block/io.c:2878 - #5 0x000055e7432be58e in bdrv_close (bs=0x55e74593c0d0) at block.c:4017 - #6 0x000055e7432be58e in bdrv_delete (bs=) at block.c:4262 - #7 0x000055e7432be58e in bdrv_unref (bs=bs@entry=0x55e74593c0d0) at block.c:5644 - #8 0x000055e743316b9b in bdrv_backup_top_drop (bs=bs@entry=0x55e74593c0d0) at block/backup-top.c:273 - #9 0x000055e74331461f in backup_job_create - (job_id=0x0, bs=bs@entry=0x55e7458d5820, target=target@entry=0x55e74589f640, speed=0, sync_mode=MIRROR_SYNC_MODE_FULL, sync_bitmap=sync_bitmap@entry=0x0, bitmap_mode=BITMAP_SYNC_MODE_ON_SUCCESS, compress=false, filter_node_name=0x0, on_source_error=BLOCKDEV_ON_ERROR_REPORT, on_target_error=BLOCKDEV_ON_ERROR_REPORT, creation_flags=0, cb=0x0, opaque=0x0, txn=0x0, errp=0x7ffddfd1efb0) at block/backup.c:478 - #10 0x000055e74315bc52 in do_backup_common - (backup=backup@entry=0x55e746c066d0, bs=bs@entry=0x55e7458d5820, target_bs=target_bs@entry=0x55e74589f640, aio_context=aio_context@entry=0x55e7458a91e0, txn=txn@entry=0x0, errp=errp@entry=0x7ffddfd1efb0) - at blockdev.c:3580 - #11 0x000055e74315c37c in do_blockdev_backup - (backup=backup@entry=0x55e746c066d0, txn=0x0, errp=errp@entry=0x7ffddfd1efb0) - at /usr/src/debug/qemu-kvm-4.2.0-2.module+el8.2.0+5135+ed3b2489.x86_64/./qapi/qapi-types-block-core.h:1492 - #12 0x000055e74315c449 in blockdev_backup_prepare (common=0x55e746a8de90, errp=0x7ffddfd1f018) - at blockdev.c:1885 - #13 0x000055e743160152 in qmp_transaction - (dev_list=, has_props=, props=0x55e7467fe2c0, errp=errp@entry=0x7ffddfd1f088) at blockdev.c:2340 - #14 0x000055e743287ff5 in qmp_marshal_transaction - (args=, ret=, errp=0x7ffddfd1f0f8) - at qapi/qapi-commands-transaction.c:44 - #15 0x000055e74333de6c in do_qmp_dispatch - (errp=0x7ffddfd1f0f0, allow_oob=, request=, cmds=0x55e743c28d60 ) at qapi/qmp-dispatch.c:132 - #16 0x000055e74333de6c in qmp_dispatch - (cmds=0x55e743c28d60 , request=, allow_oob=) - at qapi/qmp-dispatch.c:175 - #17 0x000055e74325c061 in monitor_qmp_dispatch (mon=0x55e745908030, req=) - at monitor/qmp.c:145 - #18 0x000055e74325c6fa in monitor_qmp_bh_dispatcher (data=) at monitor/qmp.c:234 - #19 0x000055e743385866 in aio_bh_call (bh=0x55e745807ae0) at util/async.c:117 - #20 0x000055e743385866 in aio_bh_poll (ctx=ctx@entry=0x55e7458067a0) at util/async.c:117 - #21 0x000055e743388c54 in aio_dispatch (ctx=0x55e7458067a0) at util/aio-posix.c:459 - #22 0x000055e743385742 in aio_ctx_dispatch - (source=, callback=, user_data=) at util/async.c:260 - #23 0x00007fb68543e67d in g_main_dispatch (context=0x55e745893a40) at gmain.c:3176 - #24 0x00007fb68543e67d in g_main_context_dispatch (context=context@entry=0x55e745893a40) at gmain.c:3829 - #25 0x000055e743387d08 in glib_pollfds_poll () at util/main-loop.c:219 - #26 0x000055e743387d08 in os_host_main_loop_wait (timeout=) at util/main-loop.c:242 - #27 0x000055e743387d08 in main_loop_wait (nonblocking=) at util/main-loop.c:518 - #28 0x000055e74316a3c1 in main_loop () at vl.c:1828 - #29 0x000055e743016a72 in main (argc=, argv=, envp=) - at vl.c:4504 - -Fix this by not acquiring the AioContext there, and ensuring all paths -leading to it have it already acquired (backup_clean()). - -RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1782111 -Signed-off-by: Sergio Lopez -Signed-off-by: Kevin Wolf -(cherry picked from commit 0abf2581717a19d9749d5c2ff8acd0ac203452c2) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - block/backup-top.c | 5 ----- - block/backup.c | 3 +++ - 2 files changed, 3 insertions(+), 5 deletions(-) - -diff --git a/block/backup-top.c b/block/backup-top.c -index 818d3f2..b8d863f 100644 ---- a/block/backup-top.c -+++ b/block/backup-top.c -@@ -255,9 +255,6 @@ append_failed: - void bdrv_backup_top_drop(BlockDriverState *bs) - { - BDRVBackupTopState *s = bs->opaque; -- AioContext *aio_context = bdrv_get_aio_context(bs); -- -- aio_context_acquire(aio_context); - - bdrv_drained_begin(bs); - -@@ -271,6 +268,4 @@ void bdrv_backup_top_drop(BlockDriverState *bs) - bdrv_drained_end(bs); - - bdrv_unref(bs); -- -- aio_context_release(aio_context); - } -diff --git a/block/backup.c b/block/backup.c -index cf62b1a..1383e21 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -135,8 +135,11 @@ static void backup_abort(Job *job) - static void backup_clean(Job *job) - { - BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); -+ AioContext *aio_context = bdrv_get_aio_context(s->backup_top); - -+ aio_context_acquire(aio_context); - bdrv_backup_top_drop(s->backup_top); -+ aio_context_release(aio_context); - } - - void backup_do_checkpoint(BlockJob *job, Error **errp) --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch b/SOURCES/kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch deleted file mode 100644 index 745be9f..0000000 --- a/SOURCES/kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch +++ /dev/null @@ -1,114 +0,0 @@ -From 1e0582ad34e77a060e2067a35992979c9eae82c9 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:31 +0000 -Subject: [PATCH 11/20] block: bdrv_reopen() with backing file in different - AioContext - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-6-kwolf@redhat.com> -Patchwork-id: 94282 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 05/13] block: bdrv_reopen() with backing file in different AioContext -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -This patch allows bdrv_reopen() (and therefore the x-blockdev-reopen QMP -command) to attach a node as the new backing file even if the node is in -a different AioContext than the parent if one of both nodes can be moved -to the AioContext of the other node. - -Signed-off-by: Kevin Wolf -Tested-by: Peter Krempa -Message-Id: <20200306141413.30705-3-kwolf@redhat.com> -Reviewed-by: Alberto Garcia -Signed-off-by: Kevin Wolf -(cherry picked from commit 1de6b45fb5c1489b450df7d1a4c692bba9678ce6) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 32 ++++++++++++++++++++++++++------ - tests/qemu-iotests/245 | 8 +++----- - 2 files changed, 29 insertions(+), 11 deletions(-) - -diff --git a/block.c b/block.c -index a744bb5..39e4647 100644 ---- a/block.c -+++ b/block.c -@@ -3749,6 +3749,29 @@ static void bdrv_reopen_perm(BlockReopenQueue *q, BlockDriverState *bs, - *shared = cumulative_shared_perms; - } - -+static bool bdrv_reopen_can_attach(BlockDriverState *parent, -+ BdrvChild *child, -+ BlockDriverState *new_child, -+ Error **errp) -+{ -+ AioContext *parent_ctx = bdrv_get_aio_context(parent); -+ AioContext *child_ctx = bdrv_get_aio_context(new_child); -+ GSList *ignore; -+ bool ret; -+ -+ ignore = g_slist_prepend(NULL, child); -+ ret = bdrv_can_set_aio_context(new_child, parent_ctx, &ignore, NULL); -+ g_slist_free(ignore); -+ if (ret) { -+ return ret; -+ } -+ -+ ignore = g_slist_prepend(NULL, child); -+ ret = bdrv_can_set_aio_context(parent, child_ctx, &ignore, errp); -+ g_slist_free(ignore); -+ return ret; -+} -+ - /* - * Take a BDRVReopenState and check if the value of 'backing' in the - * reopen_state->options QDict is valid or not. -@@ -3800,14 +3823,11 @@ static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state, - } - - /* -- * TODO: before removing the x- prefix from x-blockdev-reopen we -- * should move the new backing file into the right AioContext -- * instead of returning an error. -+ * Check AioContext compatibility so that the bdrv_set_backing_hd() call in -+ * bdrv_reopen_commit() won't fail. - */ - if (new_backing_bs) { -- if (bdrv_get_aio_context(new_backing_bs) != bdrv_get_aio_context(bs)) { -- error_setg(errp, "Cannot use a new backing file " -- "with a different AioContext"); -+ if (!bdrv_reopen_can_attach(bs, bs->backing, new_backing_bs, errp)) { - return -EINVAL; - } - } -diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 -index f69c2fa..919131d 100644 ---- a/tests/qemu-iotests/245 -+++ b/tests/qemu-iotests/245 -@@ -1013,18 +1013,16 @@ class TestBlockdevReopen(iotests.QMPTestCase): - # neither of them can switch to the other AioContext - def test_iothreads_error(self): - self.run_test_iothreads('iothread0', 'iothread1', -- "Cannot use a new backing file with a different AioContext") -+ "Cannot change iothread of active block backend") - - def test_iothreads_compatible_users(self): - self.run_test_iothreads('iothread0', 'iothread0') - - def test_iothreads_switch_backing(self): -- self.run_test_iothreads('iothread0', None, -- "Cannot use a new backing file with a different AioContext") -+ self.run_test_iothreads('iothread0', None) - - def test_iothreads_switch_overlay(self): -- self.run_test_iothreads(None, 'iothread0', -- "Cannot use a new backing file with a different AioContext") -+ self.run_test_iothreads(None, 'iothread0') - - if __name__ == '__main__': - iotests.main(supported_fmts=["qcow2"], --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-curl-HTTP-header-field-names-are-case-insensit.patch b/SOURCES/kvm-block-curl-HTTP-header-field-names-are-case-insensit.patch deleted file mode 100644 index a974a18..0000000 --- a/SOURCES/kvm-block-curl-HTTP-header-field-names-are-case-insensit.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 5e5ca17e1e09cfe9a780c556528bbde23c93fc4e Mon Sep 17 00:00:00 2001 -From: Richard Jones -Date: Thu, 28 May 2020 14:27:37 +0100 -Subject: [PATCH 03/26] block/curl: HTTP header field names are case - insensitive -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Richard Jones -Message-id: <20200528142737.17318-3-rjones@redhat.com> -Patchwork-id: 96895 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 2/2] block/curl: HTTP header field names are case insensitive -Bugzilla: 1841038 -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz -RH-Acked-by: Philippe Mathieu-Daudé - -From: David Edmondson - -RFC 7230 section 3.2 indicates that HTTP header field names are case -insensitive. - -Signed-off-by: David Edmondson -Message-Id: <20200224101310.101169-3-david.edmondson@oracle.com> -Reviewed-by: Max Reitz -Signed-off-by: Max Reitz -(cherry picked from commit 69032253c33ae1774233c63cedf36d32242a85fc) -Signed-off-by: Danilo C. L. de Paula ---- - block/curl.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/block/curl.c b/block/curl.c -index f9ffb7f..6e32590 100644 ---- a/block/curl.c -+++ b/block/curl.c -@@ -216,11 +216,12 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque) - size_t realsize = size * nmemb; - const char *header = (char *)ptr; - const char *end = header + realsize; -- const char *accept_ranges = "Accept-Ranges:"; -+ const char *accept_ranges = "accept-ranges:"; - const char *bytes = "bytes"; - - if (realsize >= strlen(accept_ranges) -- && strncmp(header, accept_ranges, strlen(accept_ranges)) == 0) { -+ && g_ascii_strncasecmp(header, accept_ranges, -+ strlen(accept_ranges)) == 0) { - - char *p = strchr(header, ':') + 1; - --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-curl-HTTP-header-fields-allow-whitespace-aroun.patch b/SOURCES/kvm-block-curl-HTTP-header-fields-allow-whitespace-aroun.patch deleted file mode 100644 index c09a1e2..0000000 --- a/SOURCES/kvm-block-curl-HTTP-header-fields-allow-whitespace-aroun.patch +++ /dev/null @@ -1,76 +0,0 @@ -From e5ac775de83d3d22f13c74ab198780b8b579f684 Mon Sep 17 00:00:00 2001 -From: Richard Jones -Date: Thu, 28 May 2020 14:27:36 +0100 -Subject: [PATCH 02/26] block/curl: HTTP header fields allow whitespace around - values - -RH-Author: Richard Jones -Message-id: <20200528142737.17318-2-rjones@redhat.com> -Patchwork-id: 96894 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/2] block/curl: HTTP header fields allow whitespace around values -Bugzilla: 1841038 -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz -RH-Acked-by: Danilo de Paula - -From: David Edmondson - -RFC 7230 section 3.2 indicates that whitespace is permitted between -the field name and field value and after the field value. - -Signed-off-by: David Edmondson -Message-Id: <20200224101310.101169-2-david.edmondson@oracle.com> -Reviewed-by: Max Reitz -Signed-off-by: Max Reitz -(cherry picked from commit 7788a319399f17476ff1dd43164c869e320820a2) -Signed-off-by: Danilo C. L. de Paula ---- - block/curl.c | 31 +++++++++++++++++++++++++++---- - 1 file changed, 27 insertions(+), 4 deletions(-) - -diff --git a/block/curl.c b/block/curl.c -index f862993..f9ffb7f 100644 ---- a/block/curl.c -+++ b/block/curl.c -@@ -214,11 +214,34 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque) - { - BDRVCURLState *s = opaque; - size_t realsize = size * nmemb; -- const char *accept_line = "Accept-Ranges: bytes"; -+ const char *header = (char *)ptr; -+ const char *end = header + realsize; -+ const char *accept_ranges = "Accept-Ranges:"; -+ const char *bytes = "bytes"; - -- if (realsize >= strlen(accept_line) -- && strncmp((char *)ptr, accept_line, strlen(accept_line)) == 0) { -- s->accept_range = true; -+ if (realsize >= strlen(accept_ranges) -+ && strncmp(header, accept_ranges, strlen(accept_ranges)) == 0) { -+ -+ char *p = strchr(header, ':') + 1; -+ -+ /* Skip whitespace between the header name and value. */ -+ while (p < end && *p && g_ascii_isspace(*p)) { -+ p++; -+ } -+ -+ if (end - p >= strlen(bytes) -+ && strncmp(p, bytes, strlen(bytes)) == 0) { -+ -+ /* Check that there is nothing but whitespace after the value. */ -+ p += strlen(bytes); -+ while (p < end && *p && g_ascii_isspace(*p)) { -+ p++; -+ } -+ -+ if (p == end || !*p) { -+ s->accept_range = true; -+ } -+ } - } - - return realsize; --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-file-posix-Fix-problem-with-fallocate-PUNCH_HO.patch b/SOURCES/kvm-block-file-posix-Fix-problem-with-fallocate-PUNCH_HO.patch deleted file mode 100644 index 60b1b0a..0000000 --- a/SOURCES/kvm-block-file-posix-Fix-problem-with-fallocate-PUNCH_HO.patch +++ /dev/null @@ -1,76 +0,0 @@ -From 8c339c3535728179acc94deb5b922aebcfac9ab6 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Thu, 3 Jun 2021 16:13:34 -0400 -Subject: [PATCH 2/4] block/file-posix: Fix problem with fallocate(PUNCH_HOLE) - on GPFS - -RH-Author: Thomas Huth -Message-id: <20210603161334.607005-2-thuth@redhat.com> -Patchwork-id: 101673 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 1/1] block/file-posix: Fix problem with fallocate(PUNCH_HOLE) on GPFS -Bugzilla: 1944861 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Max Reitz -RH-Acked-by: Cornelia Huck -RH-Acked-by: Laszlo Ersek - -A customer reported that running - - qemu-img convert -t none -O qcow2 -f qcow2 input.qcow2 output.qcow2 - -fails for them with the following error message when the images are -stored on a GPFS file system : - - qemu-img: error while writing sector 0: Invalid argument - -After analyzing the strace output, it seems like the problem is in -handle_aiocb_write_zeroes(): The call to fallocate(FALLOC_FL_PUNCH_HOLE) -returns EINVAL, which can apparently happen if the file system has -a different idea of the granularity of the operation. It's arguably -a bug in GPFS, since the PUNCH_HOLE mode should not result in EINVAL -according to the man-page of fallocate(), but the file system is out -there in production and so we have to deal with it. In commit 294682cc3a -("block: workaround for unaligned byte range in fallocate()") we also -already applied the a work-around for the same problem to the earlier -fallocate(FALLOC_FL_ZERO_RANGE) call, so do it now similar with the -PUNCH_HOLE call. But instead of silently catching and returning --ENOTSUP (which causes the caller to fall back to writing zeroes), -let's rather inform the user once about the buggy file system and -try the other fallback instead. - -Signed-off-by: Thomas Huth -Message-Id: <20210527172020.847617-2-thuth@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 73ebf29729d1a40feaa9f8ab8951b6ee6dbfbede) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1944861 -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - block/file-posix.c | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 62a463229f..371572f1b0 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -1587,6 +1587,17 @@ static int handle_aiocb_write_zeroes(void *opaque) - return ret; - } - s->has_fallocate = false; -+ } else if (ret == -EINVAL) { -+ /* -+ * Some file systems like older versions of GPFS do not like un- -+ * aligned byte ranges, and return EINVAL in such a case, though -+ * they should not do it according to the man-page of fallocate(). -+ * Warn about the bad filesystem and try the final fallback instead. -+ */ -+ warn_report_once("Your file system is misbehaving: " -+ "fallocate(FALLOC_FL_PUNCH_HOLE) returned EINVAL. " -+ "Please report this bug to your file sytem " -+ "vendor."); - } else if (ret != -ENOTSUP) { - return ret; - } else { --- -2.27.0 - diff --git a/SOURCES/kvm-block-introducing-bdrv_co_delete_file-interface.patch b/SOURCES/kvm-block-introducing-bdrv_co_delete_file-interface.patch deleted file mode 100644 index 9d5e659..0000000 --- a/SOURCES/kvm-block-introducing-bdrv_co_delete_file-interface.patch +++ /dev/null @@ -1,99 +0,0 @@ -From 9581770f48911cbe68cfa1a7fa125df2a0a27d02 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Sun, 31 May 2020 16:40:33 +0100 -Subject: [PATCH 5/7] block: introducing 'bdrv_co_delete_file' interface -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Maxim Levitsky -Message-id: <20200531164035.34188-2-mlevitsk@redhat.com> -Patchwork-id: 97057 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/3] block: introducing 'bdrv_co_delete_file' interface -Bugzilla: 1827630 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: John Snow -RH-Acked-by: Eric Blake - -From: Daniel Henrique Barboza - -Adding to Block Drivers the capability of being able to clean up -its created files can be useful in certain situations. For the -LUKS driver, for instance, a failure in one of its authentication -steps can leave files in the host that weren't there before. - -This patch adds the 'bdrv_co_delete_file' interface to block -drivers and add it to the 'file' driver in file-posix.c. The -implementation is given by 'raw_co_delete_file'. - -Suggested-by: Daniel P. Berrangé -Signed-off-by: Daniel Henrique Barboza -Message-Id: <20200130213907.2830642-2-danielhb413@gmail.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 9bffae14df879255329473a7bd578643af2d4c9c) -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block/file-posix.c | 23 +++++++++++++++++++++++ - include/block/block_int.h | 4 ++++ - 2 files changed, 27 insertions(+) - -diff --git a/block/file-posix.c b/block/file-posix.c -index dd18d40..1609598 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -2388,6 +2388,28 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv, - return raw_co_create(&options, errp); - } - -+static int coroutine_fn raw_co_delete_file(BlockDriverState *bs, -+ Error **errp) -+{ -+ struct stat st; -+ int ret; -+ -+ if (!(stat(bs->filename, &st) == 0) || !S_ISREG(st.st_mode)) { -+ error_setg_errno(errp, ENOENT, "%s is not a regular file", -+ bs->filename); -+ return -ENOENT; -+ } -+ -+ ret = unlink(bs->filename); -+ if (ret < 0) { -+ ret = -errno; -+ error_setg_errno(errp, -ret, "Error when deleting file %s", -+ bs->filename); -+ } -+ -+ return ret; -+} -+ - /* - * Find allocation range in @bs around offset @start. - * May change underlying file descriptor's file offset. -@@ -3019,6 +3041,7 @@ BlockDriver bdrv_file = { - .bdrv_co_block_status = raw_co_block_status, - .bdrv_co_invalidate_cache = raw_co_invalidate_cache, - .bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes, -+ .bdrv_co_delete_file = raw_co_delete_file, - - .bdrv_co_preadv = raw_co_preadv, - .bdrv_co_pwritev = raw_co_pwritev, -diff --git a/include/block/block_int.h b/include/block/block_int.h -index 529f153..562dca1 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -316,6 +316,10 @@ struct BlockDriver { - */ - int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs); - -+ /* Delete a created file. */ -+ int coroutine_fn (*bdrv_co_delete_file)(BlockDriverState *bs, -+ Error **errp); -+ - /* - * Flushes all data that was already written to the OS all the way down to - * the disk (for example file-posix.c calls fsync()). --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch b/SOURCES/kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch new file mode 100644 index 0000000..c1ee128 --- /dev/null +++ b/SOURCES/kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch @@ -0,0 +1,56 @@ +From 4c6eff78f4b31ec4bd7b42440396760d19fde63e Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 18 Jan 2022 17:59:59 +0100 +Subject: [PATCH 6/7] block/io: Update BSC only if want_zero is true + +RH-Author: Hanna Reitz +RH-MergeRequest: 112: block/io: Update BSC only if want_zero is true +RH-Commit: [1/2] a202de1f52110d1e871c3b5b58f2d9e9b5d17570 +RH-Bugzilla: 2041480 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf + +We update the block-status cache whenever we get new information from a +bdrv_co_block_status() call to the block driver. However, if we have +passed want_zero=false to that call, it may flag areas containing zeroes +as data, and so we would update the block-status cache with wrong +information. + +Therefore, we should not update the cache with want_zero=false. + +Reported-by: Nir Soffer +Fixes: 0bc329fbb00 ("block: block-status cache for data regions") +Reviewed-by: Nir Soffer +Cc: qemu-stable@nongnu.org +Signed-off-by: Hanna Reitz +Message-Id: <20220118170000.49423-2-hreitz@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Eric Blake +(cherry picked from commit 113b727ce788335cf76f65355d670c9bc130fd75) +Signed-off-by: Hanna Reitz +--- + block/io.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/block/io.c b/block/io.c +index bb0a254def..4e4cb556c5 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -2497,8 +2497,12 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, + * non-protocol nodes, and then it is never used. However, filling + * the cache requires an RCU update, so double check here to avoid + * such an update if possible. ++ * ++ * Check want_zero, because we only want to update the cache when we ++ * have accurate information about what is zero and what is data. + */ +- if (ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) && ++ if (want_zero && ++ ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) && + QLIST_EMPTY(&bs->children)) + { + /* +-- +2.27.0 + diff --git a/SOURCES/kvm-block-iscsi-fix-heap-buffer-overflow-in-iscsi_aio_io.patch b/SOURCES/kvm-block-iscsi-fix-heap-buffer-overflow-in-iscsi_aio_io.patch deleted file mode 100644 index fe8c49b..0000000 --- a/SOURCES/kvm-block-iscsi-fix-heap-buffer-overflow-in-iscsi_aio_io.patch +++ /dev/null @@ -1,100 +0,0 @@ -From b9b77159567283628645943b5367d39b558e8faa Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 26 Jan 2021 20:07:59 -0500 -Subject: [PATCH 9/9] block/iscsi:fix heap-buffer-overflow in - iscsi_aio_ioctl_cb -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20210126200759.245891-2-jmaloy@redhat.com> -Patchwork-id: 100787 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] block/iscsi:fix heap-buffer-overflow in iscsi_aio_ioctl_cb -Bugzilla: 1912974 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Kevin Wolf -RH-Acked-by: Laszlo Ersek - -From: Chen Qun - -There is an overflow, the source 'datain.data[2]' is 100 bytes, - but the 'ss' is 252 bytes.This may cause a security issue because - we can access a lot of unrelated memory data. - -The len for sbp copy data should take the minimum of mx_sb_len and - sb_len_wr, not the maximum. - -If we use iscsi device for VM backend storage, ASAN show stack: - -READ of size 252 at 0xfffd149dcfc4 thread T0 - #0 0xaaad433d0d34 in __asan_memcpy (aarch64-softmmu/qemu-system-aarch64+0x2cb0d34) - #1 0xaaad45f9d6d0 in iscsi_aio_ioctl_cb /qemu/block/iscsi.c:996:9 - #2 0xfffd1af0e2dc (/usr/lib64/iscsi/libiscsi.so.8+0xe2dc) - #3 0xfffd1af0d174 (/usr/lib64/iscsi/libiscsi.so.8+0xd174) - #4 0xfffd1af19fac (/usr/lib64/iscsi/libiscsi.so.8+0x19fac) - #5 0xaaad45f9acc8 in iscsi_process_read /qemu/block/iscsi.c:403:5 - #6 0xaaad4623733c in aio_dispatch_handler /qemu/util/aio-posix.c:467:9 - #7 0xaaad4622f350 in aio_dispatch_handlers /qemu/util/aio-posix.c:510:20 - #8 0xaaad4622f350 in aio_dispatch /qemu/util/aio-posix.c:520 - #9 0xaaad46215944 in aio_ctx_dispatch /qemu/util/async.c:298:5 - #10 0xfffd1bed12f4 in g_main_context_dispatch (/lib64/libglib-2.0.so.0+0x512f4) - #11 0xaaad46227de0 in glib_pollfds_poll /qemu/util/main-loop.c:219:9 - #12 0xaaad46227de0 in os_host_main_loop_wait /qemu/util/main-loop.c:242 - #13 0xaaad46227de0 in main_loop_wait /qemu/util/main-loop.c:518 - #14 0xaaad43d9d60c in qemu_main_loop /qemu/softmmu/vl.c:1662:9 - #15 0xaaad4607a5b0 in main /qemu/softmmu/main.c:49:5 - #16 0xfffd1a460b9c in __libc_start_main (/lib64/libc.so.6+0x20b9c) - #17 0xaaad43320740 in _start (aarch64-softmmu/qemu-system-aarch64+0x2c00740) - -0xfffd149dcfc4 is located 0 bytes to the right of 100-byte region [0xfffd149dcf60,0xfffd149dcfc4) -allocated by thread T0 here: - #0 0xaaad433d1e70 in __interceptor_malloc (aarch64-softmmu/qemu-system-aarch64+0x2cb1e70) - #1 0xfffd1af0e254 (/usr/lib64/iscsi/libiscsi.so.8+0xe254) - #2 0xfffd1af0d174 (/usr/lib64/iscsi/libiscsi.so.8+0xd174) - #3 0xfffd1af19fac (/usr/lib64/iscsi/libiscsi.so.8+0x19fac) - #4 0xaaad45f9acc8 in iscsi_process_read /qemu/block/iscsi.c:403:5 - #5 0xaaad4623733c in aio_dispatch_handler /qemu/util/aio-posix.c:467:9 - #6 0xaaad4622f350 in aio_dispatch_handlers /qemu/util/aio-posix.c:510:20 - #7 0xaaad4622f350 in aio_dispatch /qemu/util/aio-posix.c:520 - #8 0xaaad46215944 in aio_ctx_dispatch /qemu/util/async.c:298:5 - #9 0xfffd1bed12f4 in g_main_context_dispatch (/lib64/libglib-2.0.so.0+0x512f4) - #10 0xaaad46227de0 in glib_pollfds_poll /qemu/util/main-loop.c:219:9 - #11 0xaaad46227de0 in os_host_main_loop_wait /qemu/util/main-loop.c:242 - #12 0xaaad46227de0 in main_loop_wait /qemu/util/main-loop.c:518 - #13 0xaaad43d9d60c in qemu_main_loop /qemu/softmmu/vl.c:1662:9 - #14 0xaaad4607a5b0 in main /qemu/softmmu/main.c:49:5 - #15 0xfffd1a460b9c in __libc_start_main (/lib64/libc.so.6+0x20b9c) - #16 0xaaad43320740 in _start (aarch64-softmmu/qemu-system-aarch64+0x2c00740) - -Reported-by: Euler Robot -Signed-off-by: Chen Qun -Reviewed-by: Stefan Hajnoczi -Message-id: 20200418062602.10776-1-kuhn.chenqun@huawei.com -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Peter Maydell - -(cherry picked from ff0507c239a246fd7215b31c5658fc6a3ee1e4c5) -Signed-off-by: Jon Maloy -Signed-off-by: Jon Maloy ---- - block/iscsi.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/block/iscsi.c b/block/iscsi.c -index 0bea2d3a93..06915655b3 100644 ---- a/block/iscsi.c -+++ b/block/iscsi.c -@@ -991,8 +991,7 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status, - acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE; - - acb->ioh->sb_len_wr = acb->task->datain.size - 2; -- ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ? -- acb->ioh->mx_sb_len : acb->ioh->sb_len_wr; -+ ss = MIN(acb->ioh->mx_sb_len, acb->ioh->sb_len_wr); - memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss); - } - --- -2.18.2 - diff --git a/SOURCES/kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch b/SOURCES/kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch new file mode 100644 index 0000000..324021b --- /dev/null +++ b/SOURCES/kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch @@ -0,0 +1,52 @@ +From d5a85fcf996948d1154e88e9ee3b4e8c64ec2694 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:08 +0100 +Subject: [PATCH 2/6] block/nbd: Assert there are no timers when closed + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [2/6] 995795ae9844a7d2b28cb1e57fd7fe81482d0205 +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +Our two timers must not remain armed beyond nbd_clear_bdrvstate(), or +they will access freed data when they fire. + +This patch is separate from the patches that actually fix the issue +(HEAD^^ and HEAD^) so that you can run the associated regression iotest +(281) on a configuration that reproducibly exposes the bug. + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 8a39c381e5e407d2fe5500324323f90a8540fa90) + +Conflict: +- block/nbd.c: open_timer was introduced after the 6.2 release (for + nbd's @open-timeout parameter), and has not been backported, so drop + the assertion that it is NULL + +Signed-off-by: Hanna Reitz +--- + block/nbd.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/block/nbd.c b/block/nbd.c +index b8e5a9b4cc..aab20125d8 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -108,6 +108,9 @@ static void nbd_clear_bdrvstate(BlockDriverState *bs) + + yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name)); + ++ /* Must not leave timers behind that would access freed data */ ++ assert(!s->reconnect_delay_timer); ++ + object_unref(OBJECT(s->tlscreds)); + qapi_free_SocketAddress(s->saddr); + s->saddr = NULL; +-- +2.27.0 + diff --git a/SOURCES/kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch b/SOURCES/kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch new file mode 100644 index 0000000..7d1c000 --- /dev/null +++ b/SOURCES/kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch @@ -0,0 +1,54 @@ +From 8e23c0f208c6bd5bb64c4f6e4863b93fa6f4e9de Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:06 +0100 +Subject: [PATCH 1/6] block/nbd: Delete reconnect delay timer when done + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [1/6] 70814602a8a43a7c14857d76266d82b1aa5174a9 +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +We start the reconnect delay timer to cancel the reconnection attempt +after a while. Once nbd_co_do_establish_connection() has returned, this +attempt is over, and we no longer need the timer. + +Delete it before returning from nbd_reconnect_attempt(), so that it does +not persist beyond the I/O request that was paused for reconnecting; we +do not want it to fire in a drained section, because all sort of things +can happen in such a section (e.g. the AioContext might be changed, and +we do not want the timer to fire in the wrong context; or the BDS might +even be deleted, and so the timer CB would access already-freed data). + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 3ce1fc16bad9c3f8b7b10b451a224d6d76e5c551) +Signed-off-by: Hanna Reitz +--- + block/nbd.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/block/nbd.c b/block/nbd.c +index 5ef462db1b..b8e5a9b4cc 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -353,6 +353,13 @@ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s) + } + + nbd_co_do_establish_connection(s->bs, NULL); ++ ++ /* ++ * The reconnect attempt is done (maybe successfully, maybe not), so ++ * we no longer need this timer. Delete it so it will not outlive ++ * this I/O request (so draining removes all timers). ++ */ ++ reconnect_delay_timer_del(s); + } + + static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t handle) +-- +2.27.0 + diff --git a/SOURCES/kvm-block-nbd-Fix-hang-in-.bdrv_close.patch b/SOURCES/kvm-block-nbd-Fix-hang-in-.bdrv_close.patch deleted file mode 100644 index 378ae1a..0000000 --- a/SOURCES/kvm-block-nbd-Fix-hang-in-.bdrv_close.patch +++ /dev/null @@ -1,78 +0,0 @@ -From 4ef2c464a54b0b618d933641ac0a7012e629fed9 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Wed, 11 Mar 2020 10:51:42 +0000 -Subject: [PATCH 01/20] block/nbd: Fix hang in .bdrv_close() - -RH-Author: Maxim Levitsky -Message-id: <20200311105147.13208-2-mlevitsk@redhat.com> -Patchwork-id: 94224 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 1/6] block/nbd: Fix hang in .bdrv_close() -Bugzilla: 1640894 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz - -From: Max Reitz - -When nbd_close() is called from a coroutine, the connection_co never -gets to run, and thus nbd_teardown_connection() hangs. - -This is because aio_co_enter() only puts the connection_co into the main -coroutine's wake-up queue, so this main coroutine needs to yield and -wait for connection_co to terminate. - -Suggested-by: Kevin Wolf -Signed-off-by: Max Reitz -Message-Id: <20200122164532.178040-2-mreitz@redhat.com> -Reviewed-by: Eric Blake -Reviewed-by: Maxim Levitsky -Signed-off-by: Max Reitz -(cherry picked from commit 78c81a3f108870d325b0a39d88711366afe6f703) -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block/nbd.c | 14 +++++++++++++- - 1 file changed, 13 insertions(+), 1 deletion(-) - -diff --git a/block/nbd.c b/block/nbd.c -index 5f18f78..a73f0d9 100644 ---- a/block/nbd.c -+++ b/block/nbd.c -@@ -70,6 +70,7 @@ typedef struct BDRVNBDState { - CoMutex send_mutex; - CoQueue free_sema; - Coroutine *connection_co; -+ Coroutine *teardown_co; - QemuCoSleepState *connection_co_sleep_ns_state; - bool drained; - bool wait_drained_end; -@@ -203,7 +204,15 @@ static void nbd_teardown_connection(BlockDriverState *bs) - qemu_co_sleep_wake(s->connection_co_sleep_ns_state); - } - } -- BDRV_POLL_WHILE(bs, s->connection_co); -+ if (qemu_in_coroutine()) { -+ s->teardown_co = qemu_coroutine_self(); -+ /* connection_co resumes us when it terminates */ -+ qemu_coroutine_yield(); -+ s->teardown_co = NULL; -+ } else { -+ BDRV_POLL_WHILE(bs, s->connection_co); -+ } -+ assert(!s->connection_co); - } - - static bool nbd_client_connecting(BDRVNBDState *s) -@@ -395,6 +404,9 @@ static coroutine_fn void nbd_connection_entry(void *opaque) - s->ioc = NULL; - } - -+ if (s->teardown_co) { -+ aio_co_wake(s->teardown_co); -+ } - aio_wait_kick(); - } - --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch b/SOURCES/kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch new file mode 100644 index 0000000..4cd3cce --- /dev/null +++ b/SOURCES/kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch @@ -0,0 +1,107 @@ +From c7f63e7bbc5119d92775e20d1ebbf8280c78b732 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:11 +0100 +Subject: [PATCH 5/6] block/nbd: Move s->ioc on AioContext change + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [5/6] 107757b9fbadfb832c75521317108525daa4174e +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +s->ioc must always be attached to the NBD node's AioContext. If that +context changes, s->ioc must be attached to the new context. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2033626 +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit e15f3a66c830e3fce99c9d56c493c2f7078a1225) + +Conflict: +- block/nbd.c: open_timer was added after the 6.2 release, so we need + not (and cannot) assert it is NULL here. + +Signed-off-by: Hanna Reitz +--- + block/nbd.c | 41 +++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 41 insertions(+) + +diff --git a/block/nbd.c b/block/nbd.c +index aab20125d8..a3896c7f5f 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -2003,6 +2003,38 @@ static void nbd_cancel_in_flight(BlockDriverState *bs) + nbd_co_establish_connection_cancel(s->conn); + } + ++static void nbd_attach_aio_context(BlockDriverState *bs, ++ AioContext *new_context) ++{ ++ BDRVNBDState *s = bs->opaque; ++ ++ /* ++ * The reconnect_delay_timer is scheduled in I/O paths when the ++ * connection is lost, to cancel the reconnection attempt after a ++ * given time. Once this attempt is done (successfully or not), ++ * nbd_reconnect_attempt() ensures the timer is deleted before the ++ * respective I/O request is resumed. ++ * Since the AioContext can only be changed when a node is drained, ++ * the reconnect_delay_timer cannot be active here. ++ */ ++ assert(!s->reconnect_delay_timer); ++ ++ if (s->ioc) { ++ qio_channel_attach_aio_context(s->ioc, new_context); ++ } ++} ++ ++static void nbd_detach_aio_context(BlockDriverState *bs) ++{ ++ BDRVNBDState *s = bs->opaque; ++ ++ assert(!s->reconnect_delay_timer); ++ ++ if (s->ioc) { ++ qio_channel_detach_aio_context(s->ioc); ++ } ++} ++ + static BlockDriver bdrv_nbd = { + .format_name = "nbd", + .protocol_name = "nbd", +@@ -2026,6 +2058,9 @@ static BlockDriver bdrv_nbd = { + .bdrv_dirname = nbd_dirname, + .strong_runtime_opts = nbd_strong_runtime_opts, + .bdrv_cancel_in_flight = nbd_cancel_in_flight, ++ ++ .bdrv_attach_aio_context = nbd_attach_aio_context, ++ .bdrv_detach_aio_context = nbd_detach_aio_context, + }; + + static BlockDriver bdrv_nbd_tcp = { +@@ -2051,6 +2086,9 @@ static BlockDriver bdrv_nbd_tcp = { + .bdrv_dirname = nbd_dirname, + .strong_runtime_opts = nbd_strong_runtime_opts, + .bdrv_cancel_in_flight = nbd_cancel_in_flight, ++ ++ .bdrv_attach_aio_context = nbd_attach_aio_context, ++ .bdrv_detach_aio_context = nbd_detach_aio_context, + }; + + static BlockDriver bdrv_nbd_unix = { +@@ -2076,6 +2114,9 @@ static BlockDriver bdrv_nbd_unix = { + .bdrv_dirname = nbd_dirname, + .strong_runtime_opts = nbd_strong_runtime_opts, + .bdrv_cancel_in_flight = nbd_cancel_in_flight, ++ ++ .bdrv_attach_aio_context = nbd_attach_aio_context, ++ .bdrv_detach_aio_context = nbd_detach_aio_context, + }; + + static void bdrv_nbd_init(void) +-- +2.27.0 + diff --git a/SOURCES/kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch b/SOURCES/kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch deleted file mode 100644 index 43f9ffc..0000000 --- a/SOURCES/kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch +++ /dev/null @@ -1,328 +0,0 @@ -From 25c528b30f8774f33e957d14060805398da524d9 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Thu, 26 Mar 2020 20:23:06 +0000 -Subject: [PATCH 1/4] block: pass BlockDriver reference to the .bdrv_co_create - -RH-Author: Maxim Levitsky -Message-id: <20200326202307.9264-2-mlevitsk@redhat.com> -Patchwork-id: 94447 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] block: pass BlockDriver reference to the .bdrv_co_create -Bugzilla: 1816007 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Kevin Wolf -RH-Acked-by: Max Reitz - -This will allow the reuse of a single generic .bdrv_co_create -implementation for several drivers. -No functional changes. - -Signed-off-by: Maxim Levitsky -Message-Id: <20200326011218.29230-2-mlevitsk@redhat.com> -Reviewed-by: Denis V. Lunev -Signed-off-by: Max Reitz -(cherry picked from commit b92902dfeaafbceaf744ab7473f2d070284f6172) -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 3 ++- - block/crypto.c | 3 ++- - block/file-posix.c | 4 +++- - block/file-win32.c | 4 +++- - block/gluster.c | 3 ++- - block/nfs.c | 4 +++- - block/parallels.c | 3 ++- - block/qcow.c | 3 ++- - block/qcow2.c | 4 +++- - block/qed.c | 3 ++- - block/raw-format.c | 4 +++- - block/rbd.c | 3 ++- - block/sheepdog.c | 4 +++- - block/ssh.c | 4 +++- - block/vdi.c | 4 +++- - block/vhdx.c | 3 ++- - block/vmdk.c | 4 +++- - block/vpc.c | 6 ++++-- - include/block/block_int.h | 3 ++- - 19 files changed, 49 insertions(+), 20 deletions(-) - -diff --git a/block.c b/block.c -index ec29b1e..f9a1c5b 100644 ---- a/block.c -+++ b/block.c -@@ -482,7 +482,8 @@ static void coroutine_fn bdrv_create_co_entry(void *opaque) - CreateCo *cco = opaque; - assert(cco->drv); - -- ret = cco->drv->bdrv_co_create_opts(cco->filename, cco->opts, &local_err); -+ ret = cco->drv->bdrv_co_create_opts(cco->drv, -+ cco->filename, cco->opts, &local_err); - error_propagate(&cco->err, local_err); - cco->ret = ret; - } -diff --git a/block/crypto.c b/block/crypto.c -index 2482383..970d463 100644 ---- a/block/crypto.c -+++ b/block/crypto.c -@@ -539,7 +539,8 @@ fail: - return ret; - } - --static int coroutine_fn block_crypto_co_create_opts_luks(const char *filename, -+static int coroutine_fn block_crypto_co_create_opts_luks(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, - Error **errp) - { -diff --git a/block/file-posix.c b/block/file-posix.c -index fd29372..a2e0a74 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -2346,7 +2346,9 @@ out: - return result; - } - --static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn raw_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - BlockdevCreateOptions options; -diff --git a/block/file-win32.c b/block/file-win32.c -index 77e8ff7..1585983 100644 ---- a/block/file-win32.c -+++ b/block/file-win32.c -@@ -588,7 +588,9 @@ static int raw_co_create(BlockdevCreateOptions *options, Error **errp) - return 0; - } - --static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn raw_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - BlockdevCreateOptions options; -diff --git a/block/gluster.c b/block/gluster.c -index 4fa4a77..0aa1f2c 100644 ---- a/block/gluster.c -+++ b/block/gluster.c -@@ -1130,7 +1130,8 @@ out: - return ret; - } - --static int coroutine_fn qemu_gluster_co_create_opts(const char *filename, -+static int coroutine_fn qemu_gluster_co_create_opts(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, - Error **errp) - { -diff --git a/block/nfs.c b/block/nfs.c -index 9a6311e..cc2413d 100644 ---- a/block/nfs.c -+++ b/block/nfs.c -@@ -662,7 +662,9 @@ out: - return ret; - } - --static int coroutine_fn nfs_file_co_create_opts(const char *url, QemuOpts *opts, -+static int coroutine_fn nfs_file_co_create_opts(BlockDriver *drv, -+ const char *url, -+ QemuOpts *opts, - Error **errp) - { - BlockdevCreateOptions *create_options; -diff --git a/block/parallels.c b/block/parallels.c -index 7a01997..6d4ed77 100644 ---- a/block/parallels.c -+++ b/block/parallels.c -@@ -609,7 +609,8 @@ exit: - goto out; - } - --static int coroutine_fn parallels_co_create_opts(const char *filename, -+static int coroutine_fn parallels_co_create_opts(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, - Error **errp) - { -diff --git a/block/qcow.c b/block/qcow.c -index fce8989..8973e4e 100644 ---- a/block/qcow.c -+++ b/block/qcow.c -@@ -934,7 +934,8 @@ exit: - return ret; - } - --static int coroutine_fn qcow_co_create_opts(const char *filename, -+static int coroutine_fn qcow_co_create_opts(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, Error **errp) - { - BlockdevCreateOptions *create_options = NULL; -diff --git a/block/qcow2.c b/block/qcow2.c -index 83b1fc0..71067c6 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -3558,7 +3558,9 @@ out: - return ret; - } - --static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - BlockdevCreateOptions *create_options = NULL; -diff --git a/block/qed.c b/block/qed.c -index d8c4e5f..1af9b3c 100644 ---- a/block/qed.c -+++ b/block/qed.c -@@ -720,7 +720,8 @@ out: - return ret; - } - --static int coroutine_fn bdrv_qed_co_create_opts(const char *filename, -+static int coroutine_fn bdrv_qed_co_create_opts(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, - Error **errp) - { -diff --git a/block/raw-format.c b/block/raw-format.c -index 3a76ec7..93b25e1 100644 ---- a/block/raw-format.c -+++ b/block/raw-format.c -@@ -419,7 +419,9 @@ static int raw_has_zero_init_truncate(BlockDriverState *bs) - return bdrv_has_zero_init_truncate(bs->file->bs); - } - --static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn raw_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - return bdrv_create_file(filename, opts, errp); -diff --git a/block/rbd.c b/block/rbd.c -index 027cbcc..8847259 100644 ---- a/block/rbd.c -+++ b/block/rbd.c -@@ -425,7 +425,8 @@ static int qemu_rbd_co_create(BlockdevCreateOptions *options, Error **errp) - return qemu_rbd_do_create(options, NULL, NULL, errp); - } - --static int coroutine_fn qemu_rbd_co_create_opts(const char *filename, -+static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, - Error **errp) - { -diff --git a/block/sheepdog.c b/block/sheepdog.c -index cfa8433..a8a7e32 100644 ---- a/block/sheepdog.c -+++ b/block/sheepdog.c -@@ -2157,7 +2157,9 @@ out: - return ret; - } - --static int coroutine_fn sd_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn sd_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - BlockdevCreateOptions *create_options = NULL; -diff --git a/block/ssh.c b/block/ssh.c -index b4375cf..84e9282 100644 ---- a/block/ssh.c -+++ b/block/ssh.c -@@ -963,7 +963,9 @@ fail: - return ret; - } - --static int coroutine_fn ssh_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn ssh_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - BlockdevCreateOptions *create_options; -diff --git a/block/vdi.c b/block/vdi.c -index 0142da7..e1a11f2 100644 ---- a/block/vdi.c -+++ b/block/vdi.c -@@ -896,7 +896,9 @@ static int coroutine_fn vdi_co_create(BlockdevCreateOptions *create_options, - return vdi_co_do_create(create_options, DEFAULT_CLUSTER_SIZE, errp); - } - --static int coroutine_fn vdi_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn vdi_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - QDict *qdict = NULL; -diff --git a/block/vhdx.c b/block/vhdx.c -index f02d261..33e57cd 100644 ---- a/block/vhdx.c -+++ b/block/vhdx.c -@@ -2046,7 +2046,8 @@ delete_and_exit: - return ret; - } - --static int coroutine_fn vhdx_co_create_opts(const char *filename, -+static int coroutine_fn vhdx_co_create_opts(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, - Error **errp) - { -diff --git a/block/vmdk.c b/block/vmdk.c -index 20e909d..eb726f2 100644 ---- a/block/vmdk.c -+++ b/block/vmdk.c -@@ -2588,7 +2588,9 @@ exit: - return blk; - } - --static int coroutine_fn vmdk_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - Error *local_err = NULL; -diff --git a/block/vpc.c b/block/vpc.c -index a655502..6df75e2 100644 ---- a/block/vpc.c -+++ b/block/vpc.c -@@ -1089,8 +1089,10 @@ out: - return ret; - } - --static int coroutine_fn vpc_co_create_opts(const char *filename, -- QemuOpts *opts, Error **errp) -+static int coroutine_fn vpc_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, -+ Error **errp) - { - BlockdevCreateOptions *create_options = NULL; - QDict *qdict; -diff --git a/include/block/block_int.h b/include/block/block_int.h -index 96e327b..7ff81be 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -136,7 +136,8 @@ struct BlockDriver { - void (*bdrv_close)(BlockDriverState *bs); - int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts, - Error **errp); -- int coroutine_fn (*bdrv_co_create_opts)(const char *filename, -+ int coroutine_fn (*bdrv_co_create_opts)(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, - Error **errp); - int (*bdrv_make_empty)(BlockDriverState *bs); --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch b/SOURCES/kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch deleted file mode 100644 index 2c27fd2..0000000 --- a/SOURCES/kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch +++ /dev/null @@ -1,78 +0,0 @@ -From ec5408763c49cd0b63ee324bdc38a429ed1adeee Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:29 +0000 -Subject: [PATCH 09/20] block/qcow2: Move bitmap reopen into - bdrv_reopen_commit_post - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-4-kwolf@redhat.com> -Patchwork-id: 94280 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 03/13] block/qcow2: Move bitmap reopen into bdrv_reopen_commit_post -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -From: Peter Krempa - -The bitmap code requires writing the 'file' child when the qcow2 driver -is reopened in read-write mode. - -If the 'file' child is being reopened due to a permissions change, the -modification is commited yet when qcow2_reopen_commit is called. This -means that any attempt to write the 'file' child will end with EBADFD -as the original fd was already closed. - -Moving bitmap reopening to the new callback which is called after -permission modifications are commited fixes this as the file descriptor -will be replaced with the correct one. - -The above problem manifests itself when reopening 'qcow2' format layer -which uses a 'file-posix' file child which was opened with the -'auto-read-only' property set. - -Signed-off-by: Peter Krempa -Message-Id: -Signed-off-by: Kevin Wolf -(cherry picked from commit 65eb7c85a3e62529e2bad782e94d5a7b11dd5a92) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/qcow2.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/block/qcow2.c b/block/qcow2.c -index 7c18721..83b1fc0 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -1881,6 +1881,11 @@ fail: - static void qcow2_reopen_commit(BDRVReopenState *state) - { - qcow2_update_options_commit(state->bs, state->opaque); -+ g_free(state->opaque); -+} -+ -+static void qcow2_reopen_commit_post(BDRVReopenState *state) -+{ - if (state->flags & BDRV_O_RDWR) { - Error *local_err = NULL; - -@@ -1895,7 +1900,6 @@ static void qcow2_reopen_commit(BDRVReopenState *state) - bdrv_get_node_name(state->bs)); - } - } -- g_free(state->opaque); - } - - static void qcow2_reopen_abort(BDRVReopenState *state) -@@ -5492,6 +5496,7 @@ BlockDriver bdrv_qcow2 = { - .bdrv_close = qcow2_close, - .bdrv_reopen_prepare = qcow2_reopen_prepare, - .bdrv_reopen_commit = qcow2_reopen_commit, -+ .bdrv_reopen_commit_post = qcow2_reopen_commit_post, - .bdrv_reopen_abort = qcow2_reopen_abort, - .bdrv_join_options = qcow2_join_options, - .bdrv_child_perm = bdrv_format_default_perms, --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch b/SOURCES/kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch new file mode 100644 index 0000000..2d8f3b4 --- /dev/null +++ b/SOURCES/kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch @@ -0,0 +1,59 @@ +From f4b7133d7aeb1d0b9115d01b5cff4df7f6b24e78 Mon Sep 17 00:00:00 2001 +From: Peter Lieven +Date: Thu, 13 Jan 2022 15:44:25 +0100 +Subject: [PATCH 5/6] block/rbd: fix handling of holes in .bdrv_co_block_status + +RH-Author: Stefano Garzarella +RH-MergeRequest: 110: block/rbd: fix handling of holes in .bdrv_co_block_status +RH-Commit: [1/2] 352656a5c77cc7855b476c3559a10c6aa64a4f58 +RH-Bugzilla: 2037135 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Reitz + +the assumption that we can't hit a hole if we do not diff against a snapshot was wrong. + +We can see a hole in an image if we diff against base if there exists an older snapshot +of the image and we have discarded blocks in the image where the snapshot has data. + +Fix this by simply handling a hole like an unallocated area. There are no callbacks +for unallocated areas so just bail out if we hit a hole. + +Fixes: 0347a8fd4c3faaedf119be04c197804be40a384b +Suggested-by: Ilya Dryomov +Cc: qemu-stable@nongnu.org +Signed-off-by: Peter Lieven +Message-Id: <20220113144426.4036493-2-pl@kamp.de> +Reviewed-by: Ilya Dryomov +Reviewed-by: Stefano Garzarella +Signed-off-by: Kevin Wolf +(cherry picked from commit 9e302f64bb407a9bb097b626da97228c2654cfee) +Signed-off-by: Stefano Garzarella +--- + block/rbd.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/block/rbd.c b/block/rbd.c +index def96292e0..20bb896c4a 100644 +--- a/block/rbd.c ++++ b/block/rbd.c +@@ -1279,11 +1279,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len, + RBDDiffIterateReq *req = opaque; + + assert(req->offs + req->bytes <= offs); +- /* +- * we do not diff against a snapshot so we should never receive a callback +- * for a hole. +- */ +- assert(exists); ++ ++ /* treat a hole like an unallocated area and bail out */ ++ if (!exists) { ++ return 0; ++ } + + if (!req->exists && offs > req->offs) { + /* +-- +2.27.0 + diff --git a/SOURCES/kvm-block-rbd-workaround-for-ceph-issue-53784.patch b/SOURCES/kvm-block-rbd-workaround-for-ceph-issue-53784.patch new file mode 100644 index 0000000..7e052f2 --- /dev/null +++ b/SOURCES/kvm-block-rbd-workaround-for-ceph-issue-53784.patch @@ -0,0 +1,103 @@ +From 8c50eedf03d8e62acd387b9aa9369dadcea9324c Mon Sep 17 00:00:00 2001 +From: Peter Lieven +Date: Thu, 13 Jan 2022 15:44:26 +0100 +Subject: [PATCH 6/6] block/rbd: workaround for ceph issue #53784 + +RH-Author: Stefano Garzarella +RH-MergeRequest: 110: block/rbd: fix handling of holes in .bdrv_co_block_status +RH-Commit: [2/2] 1384557462e89bb539d0d25a1a471ad738fb9e89 +RH-Bugzilla: 2037135 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Reitz + +librbd had a bug until early 2022 that affected all versions of ceph that +supported fast-diff. This bug results in reporting of incorrect offsets +if the offset parameter to rbd_diff_iterate2 is not object aligned. + +This patch works around this bug for pre Quincy versions of librbd. + +Fixes: 0347a8fd4c3faaedf119be04c197804be40a384b +Cc: qemu-stable@nongnu.org +Signed-off-by: Peter Lieven +Message-Id: <20220113144426.4036493-3-pl@kamp.de> +Reviewed-by: Ilya Dryomov +Reviewed-by: Stefano Garzarella +Tested-by: Stefano Garzarella +Signed-off-by: Kevin Wolf +(cherry picked from commit fc176116cdea816ceb8dd969080b2b95f58edbc0) +Signed-off-by: Stefano Garzarella +--- + block/rbd.c | 42 ++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 40 insertions(+), 2 deletions(-) + +diff --git a/block/rbd.c b/block/rbd.c +index 20bb896c4a..8f183eba2a 100644 +--- a/block/rbd.c ++++ b/block/rbd.c +@@ -1320,6 +1320,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, + int status, r; + RBDDiffIterateReq req = { .offs = offset }; + uint64_t features, flags; ++ uint64_t head = 0; + + assert(offset + bytes <= s->image_size); + +@@ -1347,7 +1348,43 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, + return status; + } + +- r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true, ++#if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 17, 0) ++ /* ++ * librbd had a bug until early 2022 that affected all versions of ceph that ++ * supported fast-diff. This bug results in reporting of incorrect offsets ++ * if the offset parameter to rbd_diff_iterate2 is not object aligned. ++ * Work around this bug by rounding down the offset to object boundaries. ++ * This is OK because we call rbd_diff_iterate2 with whole_object = true. ++ * However, this workaround only works for non cloned images with default ++ * striping. ++ * ++ * See: https://tracker.ceph.com/issues/53784 ++ */ ++ ++ /* check if RBD image has non-default striping enabled */ ++ if (features & RBD_FEATURE_STRIPINGV2) { ++ return status; ++ } ++ ++#pragma GCC diagnostic push ++#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ++ /* ++ * check if RBD image is a clone (= has a parent). ++ * ++ * rbd_get_parent_info is deprecated from Nautilus onwards, but the ++ * replacement rbd_get_parent is not present in Luminous and Mimic. ++ */ ++ if (rbd_get_parent_info(s->image, NULL, 0, NULL, 0, NULL, 0) != -ENOENT) { ++ return status; ++ } ++#pragma GCC diagnostic pop ++ ++ head = req.offs & (s->object_size - 1); ++ req.offs -= head; ++ bytes += head; ++#endif ++ ++ r = rbd_diff_iterate2(s->image, NULL, req.offs, bytes, true, true, + qemu_rbd_diff_iterate_cb, &req); + if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) { + return status; +@@ -1366,7 +1403,8 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, + status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID; + } + +- *pnum = req.bytes; ++ assert(req.bytes > head); ++ *pnum = req.bytes - head; + return status; + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-block-trickle-down-the-fallback-image-creation-funct.patch b/SOURCES/kvm-block-trickle-down-the-fallback-image-creation-funct.patch deleted file mode 100644 index 5ba1521..0000000 --- a/SOURCES/kvm-block-trickle-down-the-fallback-image-creation-funct.patch +++ /dev/null @@ -1,296 +0,0 @@ -From a1f7b929ae1fe6fa424c520c3a5eb497333b0fd9 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Thu, 26 Mar 2020 20:23:07 +0000 -Subject: [PATCH 2/4] block: trickle down the fallback image creation function - use to the block drivers - -RH-Author: Maxim Levitsky -Message-id: <20200326202307.9264-3-mlevitsk@redhat.com> -Patchwork-id: 94446 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] block: trickle down the fallback image creation function use to the block drivers -Bugzilla: 1816007 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Kevin Wolf -RH-Acked-by: Max Reitz - -Instead of checking the .bdrv_co_create_opts to see if we need the -fallback, just implement the .bdrv_co_create_opts in the drivers that -need it. - -This way we don't break various places that need to know if the -underlying protocol/format really supports image creation, and this way -we still allow some drivers to not support image creation. - -Fixes: fd17146cd93d1704cd96d7c2757b325fc7aac6fd -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1816007 - -Note that technically this driver reverts the image creation fallback -for the vxhs driver since I don't have a means to test it, and IMHO it -is better to leave it not supported as it was prior to generic image -creation patches. - -Also drop iscsi_create_opts which was left accidentally. - -Signed-off-by: Maxim Levitsky -Message-Id: <20200326011218.29230-3-mlevitsk@redhat.com> -Reviewed-by: Denis V. Lunev -[mreitz: Fixed alignment, and moved bdrv_co_create_opts_simple() and - bdrv_create_opts_simple from block.h into block_int.h] -Signed-off-by: Max Reitz -(cherry picked from commit 5a5e7f8cd86b7ced0732b1b6e28c82baa65b09c9) - -Contextual conflicts in block.c and include/block/block_int.h - -(conflict in block.c by default shows as functional but -with --diff-algorithm=patience it becomes a contextual conflict) - -... -001/2:[----] [--] 'block: pass BlockDriver reference to the .bdrv_co_create' -002/2:[0014] [FC] 'block: trickle down the fallback image creation function use to the block drivers' -... -002/2: 'meld <(git show 5a5e7f8^\!) <(git show 6d3bca5^\!)' - -So now running: -meld <(git show 5a5e7f8^\! --diff-algorithm=patience) <(git show 6d3bca5^\! --diff-algorithm=patience) - -shows no contextual conflicts -It is mostly due to missing commit f6dc1c31d3801dcbdf0c56574f9ff4f05180810c -Thanks to Max Reitz for helping me with this. - -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 35 ++++++++++++++++++++--------------- - block/file-posix.c | 7 ++++++- - block/iscsi.c | 16 ++++------------ - block/nbd.c | 6 ++++++ - block/nvme.c | 3 +++ - include/block/block.h | 1 + - include/block/block_int.h | 11 +++++++++++ - 7 files changed, 51 insertions(+), 28 deletions(-) - -diff --git a/block.c b/block.c -index f9a1c5b..ba3b40d7 100644 ---- a/block.c -+++ b/block.c -@@ -597,8 +597,15 @@ static int create_file_fallback_zero_first_sector(BlockBackend *blk, - return 0; - } - --static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, -- QemuOpts *opts, Error **errp) -+/** -+ * Simple implementation of bdrv_co_create_opts for protocol drivers -+ * which only support creation via opening a file -+ * (usually existing raw storage device) -+ */ -+int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, -+ Error **errp) - { - BlockBackend *blk; - QDict *options; -@@ -662,11 +669,7 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) - return -ENOENT; - } - -- if (drv->bdrv_co_create_opts) { -- return bdrv_create(drv, filename, opts, errp); -- } else { -- return bdrv_create_file_fallback(filename, drv, opts, errp); -- } -+ return bdrv_create(drv, filename, opts, errp); - } - - /** -@@ -1543,9 +1546,9 @@ QemuOptsList bdrv_runtime_opts = { - }, - }; - --static QemuOptsList fallback_create_opts = { -- .name = "fallback-create-opts", -- .head = QTAILQ_HEAD_INITIALIZER(fallback_create_opts.head), -+QemuOptsList bdrv_create_opts_simple = { -+ .name = "simple-create-opts", -+ .head = QTAILQ_HEAD_INITIALIZER(bdrv_create_opts_simple.head), - .desc = { - { - .name = BLOCK_OPT_SIZE, -@@ -5910,13 +5913,15 @@ void bdrv_img_create(const char *filename, const char *fmt, - return; - } - -- create_opts = qemu_opts_append(create_opts, drv->create_opts); -- if (proto_drv->create_opts) { -- create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); -- } else { -- create_opts = qemu_opts_append(create_opts, &fallback_create_opts); -+ if (!proto_drv->create_opts) { -+ error_setg(errp, "Protocol driver '%s' does not support image creation", -+ proto_drv->format_name); -+ return; - } - -+ create_opts = qemu_opts_append(create_opts, drv->create_opts); -+ create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); -+ - /* Create parameter list with default values */ - opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); - qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); -diff --git a/block/file-posix.c b/block/file-posix.c -index a2e0a74..dd18d40 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -3432,6 +3432,8 @@ static BlockDriver bdrv_host_device = { - .bdrv_reopen_prepare = raw_reopen_prepare, - .bdrv_reopen_commit = raw_reopen_commit, - .bdrv_reopen_abort = raw_reopen_abort, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .mutable_opts = mutable_opts, - .bdrv_co_invalidate_cache = raw_co_invalidate_cache, - .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes, -@@ -3558,10 +3560,11 @@ static BlockDriver bdrv_host_cdrom = { - .bdrv_reopen_prepare = raw_reopen_prepare, - .bdrv_reopen_commit = raw_reopen_commit, - .bdrv_reopen_abort = raw_reopen_abort, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .mutable_opts = mutable_opts, - .bdrv_co_invalidate_cache = raw_co_invalidate_cache, - -- - .bdrv_co_preadv = raw_co_preadv, - .bdrv_co_pwritev = raw_co_pwritev, - .bdrv_co_flush_to_disk = raw_co_flush_to_disk, -@@ -3690,6 +3693,8 @@ static BlockDriver bdrv_host_cdrom = { - .bdrv_reopen_prepare = raw_reopen_prepare, - .bdrv_reopen_commit = raw_reopen_commit, - .bdrv_reopen_abort = raw_reopen_abort, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .mutable_opts = mutable_opts, - - .bdrv_co_preadv = raw_co_preadv, -diff --git a/block/iscsi.c b/block/iscsi.c -index b45da65..16b0716 100644 ---- a/block/iscsi.c -+++ b/block/iscsi.c -@@ -2399,18 +2399,6 @@ out_unlock: - return r; - } - --static QemuOptsList iscsi_create_opts = { -- .name = "iscsi-create-opts", -- .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head), -- .desc = { -- { -- .name = BLOCK_OPT_SIZE, -- .type = QEMU_OPT_SIZE, -- .help = "Virtual disk size" -- }, -- { /* end of list */ } -- } --}; - - static const char *const iscsi_strong_runtime_opts[] = { - "transport", -@@ -2434,6 +2422,8 @@ static BlockDriver bdrv_iscsi = { - .bdrv_parse_filename = iscsi_parse_filename, - .bdrv_file_open = iscsi_open, - .bdrv_close = iscsi_close, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .bdrv_reopen_prepare = iscsi_reopen_prepare, - .bdrv_reopen_commit = iscsi_reopen_commit, - .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, -@@ -2471,6 +2461,8 @@ static BlockDriver bdrv_iser = { - .bdrv_parse_filename = iscsi_parse_filename, - .bdrv_file_open = iscsi_open, - .bdrv_close = iscsi_close, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .bdrv_reopen_prepare = iscsi_reopen_prepare, - .bdrv_reopen_commit = iscsi_reopen_commit, - .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, -diff --git a/block/nbd.c b/block/nbd.c -index a73f0d9..927915d 100644 ---- a/block/nbd.c -+++ b/block/nbd.c -@@ -2030,6 +2030,8 @@ static BlockDriver bdrv_nbd = { - .protocol_name = "nbd", - .instance_size = sizeof(BDRVNBDState), - .bdrv_parse_filename = nbd_parse_filename, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .bdrv_file_open = nbd_open, - .bdrv_reopen_prepare = nbd_client_reopen_prepare, - .bdrv_co_preadv = nbd_client_co_preadv, -@@ -2055,6 +2057,8 @@ static BlockDriver bdrv_nbd_tcp = { - .protocol_name = "nbd+tcp", - .instance_size = sizeof(BDRVNBDState), - .bdrv_parse_filename = nbd_parse_filename, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .bdrv_file_open = nbd_open, - .bdrv_reopen_prepare = nbd_client_reopen_prepare, - .bdrv_co_preadv = nbd_client_co_preadv, -@@ -2080,6 +2084,8 @@ static BlockDriver bdrv_nbd_unix = { - .protocol_name = "nbd+unix", - .instance_size = sizeof(BDRVNBDState), - .bdrv_parse_filename = nbd_parse_filename, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .bdrv_file_open = nbd_open, - .bdrv_reopen_prepare = nbd_client_reopen_prepare, - .bdrv_co_preadv = nbd_client_co_preadv, -diff --git a/block/nvme.c b/block/nvme.c -index d41c4bd..7b7c0cc 100644 ---- a/block/nvme.c -+++ b/block/nvme.c -@@ -1333,6 +1333,9 @@ static BlockDriver bdrv_nvme = { - .protocol_name = "nvme", - .instance_size = sizeof(BDRVNVMeState), - -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, -+ - .bdrv_parse_filename = nvme_parse_filename, - .bdrv_file_open = nvme_file_open, - .bdrv_close = nvme_close, -diff --git a/include/block/block.h b/include/block/block.h -index 1df9848..92685d2 100644 ---- a/include/block/block.h -+++ b/include/block/block.h -@@ -293,6 +293,7 @@ BlockDriver *bdrv_find_format(const char *format_name); - int bdrv_create(BlockDriver *drv, const char* filename, - QemuOpts *opts, Error **errp); - int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp); -+ - BlockDriverState *bdrv_new(void); - void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - Error **errp); -diff --git a/include/block/block_int.h b/include/block/block_int.h -index 7ff81be..529f153 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -1325,4 +1325,15 @@ int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset, - - int refresh_total_sectors(BlockDriverState *bs, int64_t hint); - -+/** -+ * Simple implementation of bdrv_co_create_opts for protocol drivers -+ * which only support creation via opening a file -+ * (usually existing raw storage device) -+ */ -+int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, -+ Error **errp); -+extern QemuOptsList bdrv_create_opts_simple; -+ - #endif /* BLOCK_INT_H */ --- -1.8.3.1 - diff --git a/SOURCES/kvm-block-truncate-Don-t-make-backing-file-data-visible.patch b/SOURCES/kvm-block-truncate-Don-t-make-backing-file-data-visible.patch deleted file mode 100644 index 114e1b7..0000000 --- a/SOURCES/kvm-block-truncate-Don-t-make-backing-file-data-visible.patch +++ /dev/null @@ -1,94 +0,0 @@ -From d84b9b93755ece6618ed98fa84386beeb1a0e40b Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 8 Jun 2020 15:01:36 +0100 -Subject: [PATCH 08/17] block: truncate: Don't make backing file data visible - -RH-Author: Kevin Wolf -Message-id: <20200608150140.38218-8-kwolf@redhat.com> -Patchwork-id: 97454 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 07/11] block: truncate: Don't make backing file data visible -Bugzilla: 1780574 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz - -When extending the size of an image that has a backing file larger than -its old size, make sure that the backing file data doesn't become -visible in the guest, but the added area is properly zeroed out. - -Consider the following scenario where the overlay is shorter than its -backing file: - - base.qcow2: AAAAAAAA - overlay.qcow2: BBBB - -When resizing (extending) overlay.qcow2, the new blocks should not stay -unallocated and make the additional As from base.qcow2 visible like -before this patch, but zeros should be read. - -A similar case happens with the various variants of a commit job when an -intermediate file is short (- for unallocated): - - base.qcow2: A-A-AAAA - mid.qcow2: BB-B - top.qcow2: C--C--C- - -After commit top.qcow2 to mid.qcow2, the following happens: - - mid.qcow2: CB-C00C0 (correct result) - mid.qcow2: CB-C--C- (before this fix) - -Without the fix, blocks that previously read as zeros on top.qcow2 -suddenly turn into A. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20200424125448.63318-8-kwolf@redhat.com> -Reviewed-by: Max Reitz -Signed-off-by: Kevin Wolf -(cherry picked from commit 955c7d6687fefcd903900a1e597fcbc896c661cd) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/io.c | 25 +++++++++++++++++++++++++ - 1 file changed, 25 insertions(+) - -diff --git a/block/io.c b/block/io.c -index 3235ce5..6c70b56 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -3370,6 +3370,31 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact, - goto out; - } - -+ /* -+ * If the image has a backing file that is large enough that it would -+ * provide data for the new area, we cannot leave it unallocated because -+ * then the backing file content would become visible. Instead, zero-fill -+ * the new area. -+ * -+ * Note that if the image has a backing file, but was opened without the -+ * backing file, taking care of keeping things consistent with that backing -+ * file is the user's responsibility. -+ */ -+ if (new_bytes && bs->backing) { -+ int64_t backing_len; -+ -+ backing_len = bdrv_getlength(backing_bs(bs)); -+ if (backing_len < 0) { -+ ret = backing_len; -+ error_setg_errno(errp, -ret, "Could not get backing file size"); -+ goto out; -+ } -+ -+ if (backing_len > old_size) { -+ flags |= BDRV_REQ_ZERO_WRITE; -+ } -+ } -+ - if (drv->bdrv_co_truncate) { - if (flags & ~bs->supported_truncate_flags) { - error_setg(errp, "Block driver does not support requested flags"); --- -1.8.3.1 - diff --git a/SOURCES/kvm-block.c-adding-bdrv_co_delete_file.patch b/SOURCES/kvm-block.c-adding-bdrv_co_delete_file.patch deleted file mode 100644 index 91c3cd1..0000000 --- a/SOURCES/kvm-block.c-adding-bdrv_co_delete_file.patch +++ /dev/null @@ -1,92 +0,0 @@ -From 23b92512d7f11b3a38cf24a5c2fe7848f1e550e8 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Sun, 31 May 2020 16:40:34 +0100 -Subject: [PATCH 6/7] block.c: adding bdrv_co_delete_file -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Maxim Levitsky -Message-id: <20200531164035.34188-3-mlevitsk@redhat.com> -Patchwork-id: 97058 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 2/3] block.c: adding bdrv_co_delete_file -Bugzilla: 1827630 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: John Snow -RH-Acked-by: Eric Blake - -From: Daniel Henrique Barboza - -Using the new 'bdrv_co_delete_file' interface, a pure co_routine function -'bdrv_co_delete_file' inside block.c can can be used in a way similar of -the existing bdrv_create_file to to clean up a created file. - -We're creating a pure co_routine because the only caller of -'bdrv_co_delete_file' will be already in co_routine context, thus there -is no need to add all the machinery to check for qemu_in_coroutine() and -create a separated co_routine to do the job. - -Suggested-by: Daniel P. Berrangé -Signed-off-by: Daniel Henrique Barboza -Message-Id: <20200130213907.2830642-3-danielhb413@gmail.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit e1d7f8bb1ec0c6911dcea81641ce6139dbded02d) -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 26 ++++++++++++++++++++++++++ - include/block/block.h | 1 + - 2 files changed, 27 insertions(+) - -diff --git a/block.c b/block.c -index ba3b40d7..d6a05da 100644 ---- a/block.c -+++ b/block.c -@@ -672,6 +672,32 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) - return bdrv_create(drv, filename, opts, errp); - } - -+int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp) -+{ -+ Error *local_err = NULL; -+ int ret; -+ -+ assert(bs != NULL); -+ -+ if (!bs->drv) { -+ error_setg(errp, "Block node '%s' is not opened", bs->filename); -+ return -ENOMEDIUM; -+ } -+ -+ if (!bs->drv->bdrv_co_delete_file) { -+ error_setg(errp, "Driver '%s' does not support image deletion", -+ bs->drv->format_name); -+ return -ENOTSUP; -+ } -+ -+ ret = bs->drv->bdrv_co_delete_file(bs, &local_err); -+ if (ret < 0) { -+ error_propagate(errp, local_err); -+ } -+ -+ return ret; -+} -+ - /** - * Try to get @bs's logical and physical block size. - * On success, store them in @bsz struct and return 0. -diff --git a/include/block/block.h b/include/block/block.h -index 92685d2..b2a3074 100644 ---- a/include/block/block.h -+++ b/include/block/block.h -@@ -373,6 +373,7 @@ bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base, - int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base, - Error **errp); - void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base); -+int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp); - - - typedef struct BdrvCheckResult { --- -1.8.3.1 - diff --git a/SOURCES/kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch b/SOURCES/kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch deleted file mode 100644 index 9a69130..0000000 --- a/SOURCES/kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch +++ /dev/null @@ -1,176 +0,0 @@ -From dc2654f2319ad6c379e0ba10be143726c6f0e9e0 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:47 +0000 -Subject: [PATCH 14/18] blockdev: Acquire AioContext on dirty bitmap functions - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-8-slp@redhat.com> -Patchwork-id: 93760 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 7/9] blockdev: Acquire AioContext on dirty bitmap functions -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -Dirty map addition and removal functions are not acquiring to BDS -AioContext, while they may call to code that expects it to be -acquired. - -This may trigger a crash with a stack trace like this one: - - #0 0x00007f0ef146370f in __GI_raise (sig=sig@entry=6) - at ../sysdeps/unix/sysv/linux/raise.c:50 - #1 0x00007f0ef144db25 in __GI_abort () at abort.c:79 - #2 0x0000565022294dce in error_exit - (err=, msg=msg@entry=0x56502243a730 <__func__.16350> "qemu_mutex_unlock_impl") at util/qemu-thread-posix.c:36 - #3 0x00005650222950ba in qemu_mutex_unlock_impl - (mutex=mutex@entry=0x5650244b0240, file=file@entry=0x565022439adf "util/async.c", line=line@entry=526) at util/qemu-thread-posix.c:108 - #4 0x0000565022290029 in aio_context_release - (ctx=ctx@entry=0x5650244b01e0) at util/async.c:526 - #5 0x000056502221cd08 in bdrv_can_store_new_dirty_bitmap - (bs=bs@entry=0x5650244dc820, name=name@entry=0x56502481d360 "bitmap1", granularity=granularity@entry=65536, errp=errp@entry=0x7fff22831718) - at block/dirty-bitmap.c:542 - #6 0x000056502206ae53 in qmp_block_dirty_bitmap_add - (errp=0x7fff22831718, disabled=false, has_disabled=, persistent=, has_persistent=true, granularity=65536, has_granularity=, name=0x56502481d360 "bitmap1", node=) at blockdev.c:2894 - #7 0x000056502206ae53 in qmp_block_dirty_bitmap_add - (node=, name=0x56502481d360 "bitmap1", has_granularity=, granularity=, has_persistent=true, persistent=, has_disabled=false, disabled=false, errp=0x7fff22831718) at blockdev.c:2856 - #8 0x00005650221847a3 in qmp_marshal_block_dirty_bitmap_add - (args=, ret=, errp=0x7fff22831798) - at qapi/qapi-commands-block-core.c:651 - #9 0x0000565022247e6c in do_qmp_dispatch - (errp=0x7fff22831790, allow_oob=, request=, cmds=0x565022b32d60 ) at qapi/qmp-dispatch.c:132 - #10 0x0000565022247e6c in qmp_dispatch - (cmds=0x565022b32d60 , request=, allow_oob=) at qapi/qmp-dispatch.c:175 - #11 0x0000565022166061 in monitor_qmp_dispatch - (mon=0x56502450faa0, req=) at monitor/qmp.c:145 - #12 0x00005650221666fa in monitor_qmp_bh_dispatcher - (data=) at monitor/qmp.c:234 - #13 0x000056502228f866 in aio_bh_call (bh=0x56502440eae0) - at util/async.c:117 - #14 0x000056502228f866 in aio_bh_poll (ctx=ctx@entry=0x56502440d7a0) - at util/async.c:117 - #15 0x0000565022292c54 in aio_dispatch (ctx=0x56502440d7a0) - at util/aio-posix.c:459 - #16 0x000056502228f742 in aio_ctx_dispatch - (source=, callback=, user_data=) at util/async.c:260 - #17 0x00007f0ef5ce667d in g_main_dispatch (context=0x56502449aa40) - at gmain.c:3176 - #18 0x00007f0ef5ce667d in g_main_context_dispatch - (context=context@entry=0x56502449aa40) at gmain.c:3829 - #19 0x0000565022291d08 in glib_pollfds_poll () at util/main-loop.c:219 - #20 0x0000565022291d08 in os_host_main_loop_wait - (timeout=) at util/main-loop.c:242 - #21 0x0000565022291d08 in main_loop_wait (nonblocking=) - at util/main-loop.c:518 - #22 0x00005650220743c1 in main_loop () at vl.c:1828 - #23 0x0000565021f20a72 in main - (argc=, argv=, envp=) - at vl.c:4504 - -Fix this by acquiring the AioContext at qmp_block_dirty_bitmap_add() -and qmp_block_dirty_bitmap_add(). - -RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1782175 -Signed-off-by: Sergio Lopez -Signed-off-by: Kevin Wolf -(cherry picked from commit 91005a495e228ebd7e5e173cd18f952450eef82d) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 22 ++++++++++++++++++---- - 1 file changed, 18 insertions(+), 4 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 1dacbc2..d4ef6cd 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -2984,6 +2984,7 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - { - BlockDriverState *bs; - BdrvDirtyBitmap *bitmap; -+ AioContext *aio_context; - - if (!name || name[0] == '\0') { - error_setg(errp, "Bitmap name cannot be empty"); -@@ -2995,11 +2996,14 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - return; - } - -+ aio_context = bdrv_get_aio_context(bs); -+ aio_context_acquire(aio_context); -+ - if (has_granularity) { - if (granularity < 512 || !is_power_of_2(granularity)) { - error_setg(errp, "Granularity must be power of 2 " - "and at least 512"); -- return; -+ goto out; - } - } else { - /* Default to cluster size, if available: */ -@@ -3017,12 +3021,12 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - if (persistent && - !bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp)) - { -- return; -+ goto out; - } - - bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp); - if (bitmap == NULL) { -- return; -+ goto out; - } - - if (disabled) { -@@ -3030,6 +3034,9 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - } - - bdrv_dirty_bitmap_set_persistence(bitmap, persistent); -+ -+out: -+ aio_context_release(aio_context); - } - - static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( -@@ -3038,21 +3045,27 @@ static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( - { - BlockDriverState *bs; - BdrvDirtyBitmap *bitmap; -+ AioContext *aio_context; - - bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); - if (!bitmap || !bs) { - return NULL; - } - -+ aio_context = bdrv_get_aio_context(bs); -+ aio_context_acquire(aio_context); -+ - if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_BUSY | BDRV_BITMAP_RO, - errp)) { -+ aio_context_release(aio_context); - return NULL; - } - - if (bdrv_dirty_bitmap_get_persistence(bitmap) && - bdrv_remove_persistent_dirty_bitmap(bs, name, errp) < 0) - { -- return NULL; -+ aio_context_release(aio_context); -+ return NULL; - } - - if (release) { -@@ -3063,6 +3076,7 @@ static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( - *bitmap_bs = bs; - } - -+ aio_context_release(aio_context); - return release ? NULL : bitmap; - } - --- -1.8.3.1 - diff --git a/SOURCES/kvm-blockdev-Promote-several-bitmap-functions-to-non-sta.patch b/SOURCES/kvm-blockdev-Promote-several-bitmap-functions-to-non-sta.patch deleted file mode 100644 index 8cb1700..0000000 --- a/SOURCES/kvm-blockdev-Promote-several-bitmap-functions-to-non-sta.patch +++ /dev/null @@ -1,179 +0,0 @@ -From 0c8ba0a96a7d0cbf371f1a5fbee543e8b2cb2595 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Tue, 2 Jun 2020 02:34:13 +0100 -Subject: [PATCH 08/26] blockdev: Promote several bitmap functions to - non-static -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Blake -Message-id: <20200602023420.2133649-6-eblake@redhat.com> -Patchwork-id: 97077 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 05/12] blockdev: Promote several bitmap functions to non-static -Bugzilla: 1779893 1779904 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -The next patch will split blockdev.c, which will require accessing -some previously-static functions from more than one .c file. But part -of promoting a function to public is picking a naming scheme that does -not reek of exposing too many internals (two of the three functions -were named starting with 'do_'). To make future code motion easier, -perform the function rename and non-static promotion into its own -patch. - -Signed-off-by: Eric Blake -Reviewed-by: Max Reitz -Message-Id: <20200513011648.166876-5-eblake@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit c6996cf9a6c759c29919642be9a73ac64b38301b) -Signed-off-by: Eric Blake -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 47 +++++++++++++++++++---------------------------- - include/block/block_int.h | 12 ++++++++++++ - 2 files changed, 31 insertions(+), 28 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 86eb115..3958058 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1262,10 +1262,10 @@ out_aio_context: - * - * @return: A bitmap object on success, or NULL on failure. - */ --static BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node, -- const char *name, -- BlockDriverState **pbs, -- Error **errp) -+BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node, -+ const char *name, -+ BlockDriverState **pbs, -+ Error **errp) - { - BlockDriverState *bs; - BdrvDirtyBitmap *bitmap; -@@ -2241,11 +2241,6 @@ static void block_dirty_bitmap_disable_abort(BlkActionState *common) - } - } - --static BdrvDirtyBitmap *do_block_dirty_bitmap_merge( -- const char *node, const char *target, -- BlockDirtyBitmapMergeSourceList *bitmaps, -- HBitmap **backup, Error **errp); -- - static void block_dirty_bitmap_merge_prepare(BlkActionState *common, - Error **errp) - { -@@ -2259,15 +2254,11 @@ static void block_dirty_bitmap_merge_prepare(BlkActionState *common, - - action = common->action->u.block_dirty_bitmap_merge.data; - -- state->bitmap = do_block_dirty_bitmap_merge(action->node, action->target, -- action->bitmaps, &state->backup, -- errp); -+ state->bitmap = block_dirty_bitmap_merge(action->node, action->target, -+ action->bitmaps, &state->backup, -+ errp); - } - --static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( -- const char *node, const char *name, bool release, -- BlockDriverState **bitmap_bs, Error **errp); -- - static void block_dirty_bitmap_remove_prepare(BlkActionState *common, - Error **errp) - { -@@ -2281,8 +2272,8 @@ static void block_dirty_bitmap_remove_prepare(BlkActionState *common, - - action = common->action->u.block_dirty_bitmap_remove.data; - -- state->bitmap = do_block_dirty_bitmap_remove(action->node, action->name, -- false, &state->bs, errp); -+ state->bitmap = block_dirty_bitmap_remove(action->node, action->name, -+ false, &state->bs, errp); - if (state->bitmap) { - bdrv_dirty_bitmap_skip_store(state->bitmap, true); - bdrv_dirty_bitmap_set_busy(state->bitmap, true); -@@ -3046,9 +3037,10 @@ out: - aio_context_release(aio_context); - } - --static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( -- const char *node, const char *name, bool release, -- BlockDriverState **bitmap_bs, Error **errp) -+BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, -+ bool release, -+ BlockDriverState **bitmap_bs, -+ Error **errp) - { - BlockDriverState *bs; - BdrvDirtyBitmap *bitmap; -@@ -3090,7 +3082,7 @@ static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( - void qmp_block_dirty_bitmap_remove(const char *node, const char *name, - Error **errp) - { -- do_block_dirty_bitmap_remove(node, name, true, NULL, errp); -+ block_dirty_bitmap_remove(node, name, true, NULL, errp); - } - - /** -@@ -3151,10 +3143,9 @@ void qmp_block_dirty_bitmap_disable(const char *node, const char *name, - bdrv_disable_dirty_bitmap(bitmap); - } - --static BdrvDirtyBitmap *do_block_dirty_bitmap_merge( -- const char *node, const char *target, -- BlockDirtyBitmapMergeSourceList *bitmaps, -- HBitmap **backup, Error **errp) -+BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, -+ BlockDirtyBitmapMergeSourceList *bms, -+ HBitmap **backup, Error **errp) - { - BlockDriverState *bs; - BdrvDirtyBitmap *dst, *src, *anon; -@@ -3172,7 +3163,7 @@ static BdrvDirtyBitmap *do_block_dirty_bitmap_merge( - return NULL; - } - -- for (lst = bitmaps; lst; lst = lst->next) { -+ for (lst = bms; lst; lst = lst->next) { - switch (lst->value->type) { - const char *name, *node; - case QTYPE_QSTRING: -@@ -3217,7 +3208,7 @@ void qmp_block_dirty_bitmap_merge(const char *node, const char *target, - BlockDirtyBitmapMergeSourceList *bitmaps, - Error **errp) - { -- do_block_dirty_bitmap_merge(node, target, bitmaps, NULL, errp); -+ block_dirty_bitmap_merge(node, target, bitmaps, NULL, errp); - } - - BlockDirtyBitmapSha256 *qmp_x_debug_block_dirty_bitmap_sha256(const char *node, -diff --git a/include/block/block_int.h b/include/block/block_int.h -index cc18e8d..876a83d 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -1341,4 +1341,16 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, - Error **errp); - extern QemuOptsList bdrv_create_opts_simple; - -+BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node, -+ const char *name, -+ BlockDriverState **pbs, -+ Error **errp); -+BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, -+ BlockDirtyBitmapMergeSourceList *bms, -+ HBitmap **backup, Error **errp); -+BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, -+ bool release, -+ BlockDriverState **bitmap_bs, -+ Error **errp); -+ - #endif /* BLOCK_INT_H */ --- -1.8.3.1 - diff --git a/SOURCES/kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch b/SOURCES/kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch deleted file mode 100644 index b2dd453..0000000 --- a/SOURCES/kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 24e5eca4218b294bd013e2d85a38345045506bec Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:48 +0000 -Subject: [PATCH 15/18] blockdev: Return bs to the proper context on snapshot - abort - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-9-slp@redhat.com> -Patchwork-id: 93761 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 8/9] blockdev: Return bs to the proper context on snapshot abort -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -external_snapshot_abort() calls to bdrv_set_backing_hd(), which -returns state->old_bs to the main AioContext, as it's intended to be -used then the BDS is going to be released. As that's not the case when -aborting an external snapshot, return it to the AioContext it was -before the call. - -This issue can be triggered by issuing a transaction with two actions, -a proper blockdev-snapshot-sync and a bogus one, so the second will -trigger a transaction abort. This results in a crash with an stack -trace like this one: - - #0 0x00007fa1048b28df in __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50 - #1 0x00007fa10489ccf5 in __GI_abort () at abort.c:79 - #2 0x00007fa10489cbc9 in __assert_fail_base - (fmt=0x7fa104a03300 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x5572240b44d8 "bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)", file=0x557224014d30 "block.c", line=2240, function=) at assert.c:92 - #3 0x00007fa1048aae96 in __GI___assert_fail - (assertion=assertion@entry=0x5572240b44d8 "bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)", file=file@entry=0x557224014d30 "block.c", line=line@entry=2240, function=function@entry=0x5572240b5d60 <__PRETTY_FUNCTION__.31620> "bdrv_replace_child_noperm") at assert.c:101 - #4 0x0000557223e631f8 in bdrv_replace_child_noperm (child=0x557225b9c980, new_bs=new_bs@entry=0x557225c42e40) at block.c:2240 - #5 0x0000557223e68be7 in bdrv_replace_node (from=0x557226951a60, to=0x557225c42e40, errp=0x5572247d6138 ) at block.c:4196 - #6 0x0000557223d069c4 in external_snapshot_abort (common=0x557225d7e170) at blockdev.c:1731 - #7 0x0000557223d069c4 in external_snapshot_abort (common=0x557225d7e170) at blockdev.c:1717 - #8 0x0000557223d09013 in qmp_transaction (dev_list=, has_props=, props=0x557225cc7d70, errp=errp@entry=0x7ffe704c0c98) at blockdev.c:2360 - #9 0x0000557223e32085 in qmp_marshal_transaction (args=, ret=, errp=0x7ffe704c0d08) at qapi/qapi-commands-transaction.c:44 - #10 0x0000557223ee798c in do_qmp_dispatch (errp=0x7ffe704c0d00, allow_oob=, request=, cmds=0x5572247d3cc0 ) at qapi/qmp-dispatch.c:132 - #11 0x0000557223ee798c in qmp_dispatch (cmds=0x5572247d3cc0 , request=, allow_oob=) at qapi/qmp-dispatch.c:175 - #12 0x0000557223e06141 in monitor_qmp_dispatch (mon=0x557225c69ff0, req=) at monitor/qmp.c:120 - #13 0x0000557223e0678a in monitor_qmp_bh_dispatcher (data=) at monitor/qmp.c:209 - #14 0x0000557223f2f366 in aio_bh_call (bh=0x557225b9dc60) at util/async.c:117 - #15 0x0000557223f2f366 in aio_bh_poll (ctx=ctx@entry=0x557225b9c840) at util/async.c:117 - #16 0x0000557223f32754 in aio_dispatch (ctx=0x557225b9c840) at util/aio-posix.c:459 - #17 0x0000557223f2f242 in aio_ctx_dispatch (source=, callback=, user_data=) at util/async.c:260 - #18 0x00007fa10913467d in g_main_dispatch (context=0x557225c28e80) at gmain.c:3176 - #19 0x00007fa10913467d in g_main_context_dispatch (context=context@entry=0x557225c28e80) at gmain.c:3829 - #20 0x0000557223f31808 in glib_pollfds_poll () at util/main-loop.c:219 - #21 0x0000557223f31808 in os_host_main_loop_wait (timeout=) at util/main-loop.c:242 - #22 0x0000557223f31808 in main_loop_wait (nonblocking=) at util/main-loop.c:518 - #23 0x0000557223d13201 in main_loop () at vl.c:1828 - #24 0x0000557223bbfb82 in main (argc=, argv=, envp=) at vl.c:4504 - -RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1779036 -Signed-off-by: Sergio Lopez -Signed-off-by: Kevin Wolf -(cherry picked from commit 377410f6fb4f6b0d26d4a028c20766fae05de17e) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 21 +++++++++++++++++++++ - 1 file changed, 21 insertions(+) - -diff --git a/blockdev.c b/blockdev.c -index d4ef6cd..4cd9a58 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1731,6 +1731,8 @@ static void external_snapshot_abort(BlkActionState *common) - if (state->new_bs) { - if (state->overlay_appended) { - AioContext *aio_context; -+ AioContext *tmp_context; -+ int ret; - - aio_context = bdrv_get_aio_context(state->old_bs); - aio_context_acquire(aio_context); -@@ -1738,6 +1740,25 @@ static void external_snapshot_abort(BlkActionState *common) - bdrv_ref(state->old_bs); /* we can't let bdrv_set_backind_hd() - close state->old_bs; we need it */ - bdrv_set_backing_hd(state->new_bs, NULL, &error_abort); -+ -+ /* -+ * The call to bdrv_set_backing_hd() above returns state->old_bs to -+ * the main AioContext. As we're still going to be using it, return -+ * it to the AioContext it was before. -+ */ -+ tmp_context = bdrv_get_aio_context(state->old_bs); -+ if (aio_context != tmp_context) { -+ aio_context_release(aio_context); -+ aio_context_acquire(tmp_context); -+ -+ ret = bdrv_try_set_aio_context(state->old_bs, -+ aio_context, NULL); -+ assert(ret == 0); -+ -+ aio_context_release(tmp_context); -+ aio_context_acquire(aio_context); -+ } -+ - bdrv_replace_node(state->new_bs, state->old_bs, &error_abort); - bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */ - --- -1.8.3.1 - diff --git a/SOURCES/kvm-blockdev-Split-off-basic-bitmap-operations-for-qemu-.patch b/SOURCES/kvm-blockdev-Split-off-basic-bitmap-operations-for-qemu-.patch deleted file mode 100644 index d977922..0000000 --- a/SOURCES/kvm-blockdev-Split-off-basic-bitmap-operations-for-qemu-.patch +++ /dev/null @@ -1,720 +0,0 @@ -From 2afa718d59ef86879a9e34b4601a1f2658afa9ba Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Tue, 2 Jun 2020 02:34:14 +0100 -Subject: [PATCH 09/26] blockdev: Split off basic bitmap operations for - qemu-img - -RH-Author: Eric Blake -Message-id: <20200602023420.2133649-7-eblake@redhat.com> -Patchwork-id: 97073 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 06/12] blockdev: Split off basic bitmap operations for qemu-img -Bugzilla: 1779893 1779904 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Upcoming patches want to add some basic bitmap manipulation abilities -to qemu-img. But blockdev.o is too heavyweight to link into qemu-img -(among other things, it would drag in block jobs and transaction -support - qemu-img does offline manipulation, where atomicity is less -important because there are no concurrent modifications to compete -with), so it's time to split off the bare bones of what we will need -into a new file block/monitor/bitmap-qmp-cmds.o. - -This is sufficient to expose 6 QMP commands for use by qemu-img (add, -remove, clear, enable, disable, merge), as well as move the three -helper functions touched in the previous patch. Regarding -MAINTAINERS, the new file is automatically part of block core, but -also makes sense as related to other dirty bitmap files. - -Signed-off-by: Eric Blake -Reviewed-by: Max Reitz -Message-Id: <20200513011648.166876-6-eblake@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit bb4e58c6137e80129b955789dd4b66c1504f20dc) - -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - Makefile.objs - comment context - block/monitor/Makefile.objs - context: a2dde2f2 not backported - blockdev.c - context -Signed-off-by: Eric Blake - -Signed-off-by: Danilo C. L. de Paula ---- - MAINTAINERS | 1 + - Makefile.objs | 3 +- - block/monitor/Makefile.objs | 1 + - block/monitor/bitmap-qmp-cmds.c | 321 ++++++++++++++++++++++++++++++++++++++++ - blockdev.c | 284 ----------------------------------- - 5 files changed, 324 insertions(+), 286 deletions(-) - create mode 100644 block/monitor/Makefile.objs - create mode 100644 block/monitor/bitmap-qmp-cmds.c - -diff --git a/MAINTAINERS b/MAINTAINERS -index 3a81ac9..49d5d44 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -1875,6 +1875,7 @@ L: qemu-block@nongnu.org - S: Supported - F: include/qemu/hbitmap.h - F: include/block/dirty-bitmap.h -+F: block/monitor/bitmap-qmp-cmds.c - F: block/dirty-bitmap.c - F: block/qcow2-bitmap.c - F: migration/block-dirty-bitmap.c -diff --git a/Makefile.objs b/Makefile.objs -index 1a8f288..7404ef0 100644 ---- a/Makefile.objs -+++ b/Makefile.objs -@@ -13,9 +13,8 @@ authz-obj-y = authz/ - ####################################################################### - # block-obj-y is code used by both qemu system emulation and qemu-img - --block-obj-y = nbd/ -+block-obj-y = block/ block/monitor/ nbd/ scsi/ - block-obj-y += block.o blockjob.o job.o --block-obj-y += block/ scsi/ - block-obj-y += qemu-io-cmds.o - block-obj-$(CONFIG_REPLICATION) += replication.o - -diff --git a/block/monitor/Makefile.objs b/block/monitor/Makefile.objs -new file mode 100644 -index 0000000..f0c7642 ---- /dev/null -+++ b/block/monitor/Makefile.objs -@@ -0,0 +1 @@ -+block-obj-y += bitmap-qmp-cmds.o -diff --git a/block/monitor/bitmap-qmp-cmds.c b/block/monitor/bitmap-qmp-cmds.c -new file mode 100644 -index 0000000..9f11dee ---- /dev/null -+++ b/block/monitor/bitmap-qmp-cmds.c -@@ -0,0 +1,321 @@ -+/* -+ * QEMU block dirty bitmap QMP commands -+ * -+ * Copyright (c) 2003-2008 Fabrice Bellard -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or -+ * later. See the COPYING file in the top-level directory. -+ * -+ * This file incorporates work covered by the following copyright and -+ * permission notice: -+ * -+ * Copyright (c) 2003-2008 Fabrice Bellard -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+ -+#include "qemu/osdep.h" -+ -+#include "block/block_int.h" -+#include "qapi/qapi-commands-block.h" -+#include "qapi/error.h" -+ -+/** -+ * block_dirty_bitmap_lookup: -+ * Return a dirty bitmap (if present), after validating -+ * the node reference and bitmap names. -+ * -+ * @node: The name of the BDS node to search for bitmaps -+ * @name: The name of the bitmap to search for -+ * @pbs: Output pointer for BDS lookup, if desired. Can be NULL. -+ * @errp: Output pointer for error information. Can be NULL. -+ * -+ * @return: A bitmap object on success, or NULL on failure. -+ */ -+BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node, -+ const char *name, -+ BlockDriverState **pbs, -+ Error **errp) -+{ -+ BlockDriverState *bs; -+ BdrvDirtyBitmap *bitmap; -+ -+ if (!node) { -+ error_setg(errp, "Node cannot be NULL"); -+ return NULL; -+ } -+ if (!name) { -+ error_setg(errp, "Bitmap name cannot be NULL"); -+ return NULL; -+ } -+ bs = bdrv_lookup_bs(node, node, NULL); -+ if (!bs) { -+ error_setg(errp, "Node '%s' not found", node); -+ return NULL; -+ } -+ -+ bitmap = bdrv_find_dirty_bitmap(bs, name); -+ if (!bitmap) { -+ error_setg(errp, "Dirty bitmap '%s' not found", name); -+ return NULL; -+ } -+ -+ if (pbs) { -+ *pbs = bs; -+ } -+ -+ return bitmap; -+} -+ -+void qmp_block_dirty_bitmap_add(const char *node, const char *name, -+ bool has_granularity, uint32_t granularity, -+ bool has_persistent, bool persistent, -+ bool has_disabled, bool disabled, -+ Error **errp) -+{ -+ BlockDriverState *bs; -+ BdrvDirtyBitmap *bitmap; -+ AioContext *aio_context; -+ -+ if (!name || name[0] == '\0') { -+ error_setg(errp, "Bitmap name cannot be empty"); -+ return; -+ } -+ -+ bs = bdrv_lookup_bs(node, node, errp); -+ if (!bs) { -+ return; -+ } -+ -+ aio_context = bdrv_get_aio_context(bs); -+ aio_context_acquire(aio_context); -+ -+ if (has_granularity) { -+ if (granularity < 512 || !is_power_of_2(granularity)) { -+ error_setg(errp, "Granularity must be power of 2 " -+ "and at least 512"); -+ goto out; -+ } -+ } else { -+ /* Default to cluster size, if available: */ -+ granularity = bdrv_get_default_bitmap_granularity(bs); -+ } -+ -+ if (!has_persistent) { -+ persistent = false; -+ } -+ -+ if (!has_disabled) { -+ disabled = false; -+ } -+ -+ if (persistent && -+ !bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp)) -+ { -+ goto out; -+ } -+ -+ bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp); -+ if (bitmap == NULL) { -+ goto out; -+ } -+ -+ if (disabled) { -+ bdrv_disable_dirty_bitmap(bitmap); -+ } -+ -+ bdrv_dirty_bitmap_set_persistence(bitmap, persistent); -+ -+out: -+ aio_context_release(aio_context); -+} -+ -+BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, -+ bool release, -+ BlockDriverState **bitmap_bs, -+ Error **errp) -+{ -+ BlockDriverState *bs; -+ BdrvDirtyBitmap *bitmap; -+ AioContext *aio_context; -+ -+ bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); -+ if (!bitmap || !bs) { -+ return NULL; -+ } -+ -+ aio_context = bdrv_get_aio_context(bs); -+ aio_context_acquire(aio_context); -+ -+ if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_BUSY | BDRV_BITMAP_RO, -+ errp)) { -+ aio_context_release(aio_context); -+ return NULL; -+ } -+ -+ if (bdrv_dirty_bitmap_get_persistence(bitmap) && -+ bdrv_remove_persistent_dirty_bitmap(bs, name, errp) < 0) -+ { -+ aio_context_release(aio_context); -+ return NULL; -+ } -+ -+ if (release) { -+ bdrv_release_dirty_bitmap(bitmap); -+ } -+ -+ if (bitmap_bs) { -+ *bitmap_bs = bs; -+ } -+ -+ aio_context_release(aio_context); -+ return release ? NULL : bitmap; -+} -+ -+void qmp_block_dirty_bitmap_remove(const char *node, const char *name, -+ Error **errp) -+{ -+ block_dirty_bitmap_remove(node, name, true, NULL, errp); -+} -+ -+/** -+ * Completely clear a bitmap, for the purposes of synchronizing a bitmap -+ * immediately after a full backup operation. -+ */ -+void qmp_block_dirty_bitmap_clear(const char *node, const char *name, -+ Error **errp) -+{ -+ BdrvDirtyBitmap *bitmap; -+ BlockDriverState *bs; -+ -+ bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); -+ if (!bitmap || !bs) { -+ return; -+ } -+ -+ if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, errp)) { -+ return; -+ } -+ -+ bdrv_clear_dirty_bitmap(bitmap, NULL); -+} -+ -+void qmp_block_dirty_bitmap_enable(const char *node, const char *name, -+ Error **errp) -+{ -+ BlockDriverState *bs; -+ BdrvDirtyBitmap *bitmap; -+ -+ bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); -+ if (!bitmap) { -+ return; -+ } -+ -+ if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) { -+ return; -+ } -+ -+ bdrv_enable_dirty_bitmap(bitmap); -+} -+ -+void qmp_block_dirty_bitmap_disable(const char *node, const char *name, -+ Error **errp) -+{ -+ BlockDriverState *bs; -+ BdrvDirtyBitmap *bitmap; -+ -+ bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); -+ if (!bitmap) { -+ return; -+ } -+ -+ if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) { -+ return; -+ } -+ -+ bdrv_disable_dirty_bitmap(bitmap); -+} -+ -+BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, -+ BlockDirtyBitmapMergeSourceList *bms, -+ HBitmap **backup, Error **errp) -+{ -+ BlockDriverState *bs; -+ BdrvDirtyBitmap *dst, *src, *anon; -+ BlockDirtyBitmapMergeSourceList *lst; -+ Error *local_err = NULL; -+ -+ dst = block_dirty_bitmap_lookup(node, target, &bs, errp); -+ if (!dst) { -+ return NULL; -+ } -+ -+ anon = bdrv_create_dirty_bitmap(bs, bdrv_dirty_bitmap_granularity(dst), -+ NULL, errp); -+ if (!anon) { -+ return NULL; -+ } -+ -+ for (lst = bms; lst; lst = lst->next) { -+ switch (lst->value->type) { -+ const char *name, *node; -+ case QTYPE_QSTRING: -+ name = lst->value->u.local; -+ src = bdrv_find_dirty_bitmap(bs, name); -+ if (!src) { -+ error_setg(errp, "Dirty bitmap '%s' not found", name); -+ dst = NULL; -+ goto out; -+ } -+ break; -+ case QTYPE_QDICT: -+ node = lst->value->u.external.node; -+ name = lst->value->u.external.name; -+ src = block_dirty_bitmap_lookup(node, name, NULL, errp); -+ if (!src) { -+ dst = NULL; -+ goto out; -+ } -+ break; -+ default: -+ abort(); -+ } -+ -+ bdrv_merge_dirty_bitmap(anon, src, NULL, &local_err); -+ if (local_err) { -+ error_propagate(errp, local_err); -+ dst = NULL; -+ goto out; -+ } -+ } -+ -+ /* Merge into dst; dst is unchanged on failure. */ -+ bdrv_merge_dirty_bitmap(dst, anon, backup, errp); -+ -+ out: -+ bdrv_release_dirty_bitmap(anon); -+ return dst; -+} -+ -+void qmp_block_dirty_bitmap_merge(const char *node, const char *target, -+ BlockDirtyBitmapMergeSourceList *bitmaps, -+ Error **errp) -+{ -+ block_dirty_bitmap_merge(node, target, bitmaps, NULL, errp); -+} -diff --git a/blockdev.c b/blockdev.c -index 3958058..5128c9b 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1250,53 +1250,6 @@ out_aio_context: - return NULL; - } - --/** -- * block_dirty_bitmap_lookup: -- * Return a dirty bitmap (if present), after validating -- * the node reference and bitmap names. -- * -- * @node: The name of the BDS node to search for bitmaps -- * @name: The name of the bitmap to search for -- * @pbs: Output pointer for BDS lookup, if desired. Can be NULL. -- * @errp: Output pointer for error information. Can be NULL. -- * -- * @return: A bitmap object on success, or NULL on failure. -- */ --BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node, -- const char *name, -- BlockDriverState **pbs, -- Error **errp) --{ -- BlockDriverState *bs; -- BdrvDirtyBitmap *bitmap; -- -- if (!node) { -- error_setg(errp, "Node cannot be NULL"); -- return NULL; -- } -- if (!name) { -- error_setg(errp, "Bitmap name cannot be NULL"); -- return NULL; -- } -- bs = bdrv_lookup_bs(node, node, NULL); -- if (!bs) { -- error_setg(errp, "Node '%s' not found", node); -- return NULL; -- } -- -- bitmap = bdrv_find_dirty_bitmap(bs, name); -- if (!bitmap) { -- error_setg(errp, "Dirty bitmap '%s' not found", name); -- return NULL; -- } -- -- if (pbs) { -- *pbs = bs; -- } -- -- return bitmap; --} -- - /* New and old BlockDriverState structs for atomic group operations */ - - typedef struct BlkActionState BlkActionState; -@@ -2974,243 +2927,6 @@ out: - aio_context_release(aio_context); - } - --void qmp_block_dirty_bitmap_add(const char *node, const char *name, -- bool has_granularity, uint32_t granularity, -- bool has_persistent, bool persistent, -- bool has_disabled, bool disabled, -- Error **errp) --{ -- BlockDriverState *bs; -- BdrvDirtyBitmap *bitmap; -- AioContext *aio_context; -- -- if (!name || name[0] == '\0') { -- error_setg(errp, "Bitmap name cannot be empty"); -- return; -- } -- -- bs = bdrv_lookup_bs(node, node, errp); -- if (!bs) { -- return; -- } -- -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- -- if (has_granularity) { -- if (granularity < 512 || !is_power_of_2(granularity)) { -- error_setg(errp, "Granularity must be power of 2 " -- "and at least 512"); -- goto out; -- } -- } else { -- /* Default to cluster size, if available: */ -- granularity = bdrv_get_default_bitmap_granularity(bs); -- } -- -- if (!has_persistent) { -- persistent = false; -- } -- -- if (!has_disabled) { -- disabled = false; -- } -- -- if (persistent && -- !bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp)) -- { -- goto out; -- } -- -- bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp); -- if (bitmap == NULL) { -- goto out; -- } -- -- if (disabled) { -- bdrv_disable_dirty_bitmap(bitmap); -- } -- -- bdrv_dirty_bitmap_set_persistence(bitmap, persistent); -- --out: -- aio_context_release(aio_context); --} -- --BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, -- bool release, -- BlockDriverState **bitmap_bs, -- Error **errp) --{ -- BlockDriverState *bs; -- BdrvDirtyBitmap *bitmap; -- AioContext *aio_context; -- -- bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); -- if (!bitmap || !bs) { -- return NULL; -- } -- -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- -- if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_BUSY | BDRV_BITMAP_RO, -- errp)) { -- aio_context_release(aio_context); -- return NULL; -- } -- -- if (bdrv_dirty_bitmap_get_persistence(bitmap) && -- bdrv_remove_persistent_dirty_bitmap(bs, name, errp) < 0) -- { -- aio_context_release(aio_context); -- return NULL; -- } -- -- if (release) { -- bdrv_release_dirty_bitmap(bitmap); -- } -- -- if (bitmap_bs) { -- *bitmap_bs = bs; -- } -- -- aio_context_release(aio_context); -- return release ? NULL : bitmap; --} -- --void qmp_block_dirty_bitmap_remove(const char *node, const char *name, -- Error **errp) --{ -- block_dirty_bitmap_remove(node, name, true, NULL, errp); --} -- --/** -- * Completely clear a bitmap, for the purposes of synchronizing a bitmap -- * immediately after a full backup operation. -- */ --void qmp_block_dirty_bitmap_clear(const char *node, const char *name, -- Error **errp) --{ -- BdrvDirtyBitmap *bitmap; -- BlockDriverState *bs; -- -- bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); -- if (!bitmap || !bs) { -- return; -- } -- -- if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, errp)) { -- return; -- } -- -- bdrv_clear_dirty_bitmap(bitmap, NULL); --} -- --void qmp_block_dirty_bitmap_enable(const char *node, const char *name, -- Error **errp) --{ -- BlockDriverState *bs; -- BdrvDirtyBitmap *bitmap; -- -- bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); -- if (!bitmap) { -- return; -- } -- -- if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) { -- return; -- } -- -- bdrv_enable_dirty_bitmap(bitmap); --} -- --void qmp_block_dirty_bitmap_disable(const char *node, const char *name, -- Error **errp) --{ -- BlockDriverState *bs; -- BdrvDirtyBitmap *bitmap; -- -- bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); -- if (!bitmap) { -- return; -- } -- -- if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) { -- return; -- } -- -- bdrv_disable_dirty_bitmap(bitmap); --} -- --BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target, -- BlockDirtyBitmapMergeSourceList *bms, -- HBitmap **backup, Error **errp) --{ -- BlockDriverState *bs; -- BdrvDirtyBitmap *dst, *src, *anon; -- BlockDirtyBitmapMergeSourceList *lst; -- Error *local_err = NULL; -- -- dst = block_dirty_bitmap_lookup(node, target, &bs, errp); -- if (!dst) { -- return NULL; -- } -- -- anon = bdrv_create_dirty_bitmap(bs, bdrv_dirty_bitmap_granularity(dst), -- NULL, errp); -- if (!anon) { -- return NULL; -- } -- -- for (lst = bms; lst; lst = lst->next) { -- switch (lst->value->type) { -- const char *name, *node; -- case QTYPE_QSTRING: -- name = lst->value->u.local; -- src = bdrv_find_dirty_bitmap(bs, name); -- if (!src) { -- error_setg(errp, "Dirty bitmap '%s' not found", name); -- dst = NULL; -- goto out; -- } -- break; -- case QTYPE_QDICT: -- node = lst->value->u.external.node; -- name = lst->value->u.external.name; -- src = block_dirty_bitmap_lookup(node, name, NULL, errp); -- if (!src) { -- dst = NULL; -- goto out; -- } -- break; -- default: -- abort(); -- } -- -- bdrv_merge_dirty_bitmap(anon, src, NULL, &local_err); -- if (local_err) { -- error_propagate(errp, local_err); -- dst = NULL; -- goto out; -- } -- } -- -- /* Merge into dst; dst is unchanged on failure. */ -- bdrv_merge_dirty_bitmap(dst, anon, backup, errp); -- -- out: -- bdrv_release_dirty_bitmap(anon); -- return dst; --} -- --void qmp_block_dirty_bitmap_merge(const char *node, const char *target, -- BlockDirtyBitmapMergeSourceList *bitmaps, -- Error **errp) --{ -- block_dirty_bitmap_merge(node, target, bitmaps, NULL, errp); --} -- - BlockDirtyBitmapSha256 *qmp_x_debug_block_dirty_bitmap_sha256(const char *node, - const char *name, - Error **errp) --- -1.8.3.1 - diff --git a/SOURCES/kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch b/SOURCES/kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch deleted file mode 100644 index 399a06a..0000000 --- a/SOURCES/kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch +++ /dev/null @@ -1,62 +0,0 @@ -From d56b53cd75c4146eae7a06d1cc30ab823a9bde93 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:41 +0000 -Subject: [PATCH 08/18] blockdev: fix coding style issues in - drive_backup_prepare -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-2-slp@redhat.com> -Patchwork-id: 93754 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 1/9] blockdev: fix coding style issues in drive_backup_prepare -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -Fix a couple of minor coding style issues in drive_backup_prepare. - -Signed-off-by: Sergio Lopez -Reviewed-by: Max Reitz -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 471ded690e19689018535e3f48480507ed073e22) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 8e029e9..553e315 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3620,7 +3620,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, - - if (!backup->has_format) { - backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? -- NULL : (char*) bs->drv->format_name; -+ NULL : (char *) bs->drv->format_name; - } - - /* Early check to avoid creating target */ -@@ -3630,8 +3630,10 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, - - flags = bs->open_flags | BDRV_O_RDWR; - -- /* See if we have a backing HD we can use to create our new image -- * on top of. */ -+ /* -+ * See if we have a backing HD we can use to create our new image -+ * on top of. -+ */ - if (backup->sync == MIRROR_SYNC_MODE_TOP) { - source = backing_bs(bs); - if (!source) { --- -1.8.3.1 - diff --git a/SOURCES/kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch b/SOURCES/kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch deleted file mode 100644 index a94ee75..0000000 --- a/SOURCES/kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch +++ /dev/null @@ -1,204 +0,0 @@ -From da4ee4c0d56200042cb86f8ccd2777009bd82df3 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:44 +0000 -Subject: [PATCH 11/18] blockdev: honor bdrv_try_set_aio_context() context - requirements - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-5-slp@redhat.com> -Patchwork-id: 93758 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 4/9] blockdev: honor bdrv_try_set_aio_context() context requirements -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -bdrv_try_set_aio_context() requires that the old context is held, and -the new context is not held. Fix all the occurrences where it's not -done this way. - -Suggested-by: Max Reitz -Signed-off-by: Sergio Lopez -Signed-off-by: Kevin Wolf -(cherry picked from commit 3ea67e08832775a28d0bd2795f01bc77e7ea1512) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- - 1 file changed, 60 insertions(+), 8 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 152a0f7..1dacbc2 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1535,6 +1535,7 @@ static void external_snapshot_prepare(BlkActionState *common, - DO_UPCAST(ExternalSnapshotState, common, common); - TransactionAction *action = common->action; - AioContext *aio_context; -+ AioContext *old_context; - int ret; - - /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar -@@ -1675,7 +1676,16 @@ static void external_snapshot_prepare(BlkActionState *common, - goto out; - } - -+ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ -+ old_context = bdrv_get_aio_context(state->new_bs); -+ aio_context_release(aio_context); -+ aio_context_acquire(old_context); -+ - ret = bdrv_try_set_aio_context(state->new_bs, aio_context, errp); -+ -+ aio_context_release(old_context); -+ aio_context_acquire(aio_context); -+ - if (ret < 0) { - goto out; - } -@@ -1775,11 +1785,13 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) - BlockDriverState *target_bs; - BlockDriverState *source = NULL; - AioContext *aio_context; -+ AioContext *old_context; - QDict *options; - Error *local_err = NULL; - int flags; - int64_t size; - bool set_backing_hd = false; -+ int ret; - - assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); - backup = common->action->u.drive_backup.data; -@@ -1868,6 +1880,21 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) - goto out; - } - -+ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ -+ old_context = bdrv_get_aio_context(target_bs); -+ aio_context_release(aio_context); -+ aio_context_acquire(old_context); -+ -+ ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); -+ if (ret < 0) { -+ bdrv_unref(target_bs); -+ aio_context_release(old_context); -+ return; -+ } -+ -+ aio_context_release(old_context); -+ aio_context_acquire(aio_context); -+ - if (set_backing_hd) { - bdrv_set_backing_hd(target_bs, source, &local_err); - if (local_err) { -@@ -1947,6 +1974,8 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) - BlockDriverState *bs; - BlockDriverState *target_bs; - AioContext *aio_context; -+ AioContext *old_context; -+ int ret; - - assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); - backup = common->action->u.blockdev_backup.data; -@@ -1961,7 +1990,18 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) - return; - } - -+ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ - aio_context = bdrv_get_aio_context(bs); -+ old_context = bdrv_get_aio_context(target_bs); -+ aio_context_acquire(old_context); -+ -+ ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); -+ if (ret < 0) { -+ aio_context_release(old_context); -+ return; -+ } -+ -+ aio_context_release(old_context); - aio_context_acquire(aio_context); - state->bs = bs; - -@@ -3562,7 +3602,6 @@ static BlockJob *do_backup_common(BackupCommon *backup, - BlockJob *job = NULL; - BdrvDirtyBitmap *bmap = NULL; - int job_flags = JOB_DEFAULT; -- int ret; - - if (!backup->has_speed) { - backup->speed = 0; -@@ -3586,11 +3625,6 @@ static BlockJob *do_backup_common(BackupCommon *backup, - backup->compress = false; - } - -- ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); -- if (ret < 0) { -- return NULL; -- } -- - if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) || - (backup->sync == MIRROR_SYNC_MODE_INCREMENTAL)) { - /* done before desugaring 'incremental' to print the right message */ -@@ -3825,6 +3859,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - BlockDriverState *bs; - BlockDriverState *source, *target_bs; - AioContext *aio_context; -+ AioContext *old_context; - BlockMirrorBackingMode backing_mode; - Error *local_err = NULL; - QDict *options = NULL; -@@ -3937,12 +3972,22 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - (arg->mode == NEW_IMAGE_MODE_EXISTING || - !bdrv_has_zero_init(target_bs))); - -+ -+ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ -+ old_context = bdrv_get_aio_context(target_bs); -+ aio_context_release(aio_context); -+ aio_context_acquire(old_context); -+ - ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); - if (ret < 0) { - bdrv_unref(target_bs); -- goto out; -+ aio_context_release(old_context); -+ return; - } - -+ aio_context_release(old_context); -+ aio_context_acquire(aio_context); -+ - blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs, - arg->has_replaces, arg->replaces, arg->sync, - backing_mode, zero_target, -@@ -3984,6 +4029,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, - BlockDriverState *bs; - BlockDriverState *target_bs; - AioContext *aio_context; -+ AioContext *old_context; - BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN; - Error *local_err = NULL; - bool zero_target; -@@ -4001,10 +4047,16 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, - - zero_target = (sync == MIRROR_SYNC_MODE_FULL); - -+ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ -+ old_context = bdrv_get_aio_context(target_bs); - aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -+ aio_context_acquire(old_context); - - ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); -+ -+ aio_context_release(old_context); -+ aio_context_acquire(aio_context); -+ - if (ret < 0) { - goto out; - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch b/SOURCES/kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch deleted file mode 100644 index c426384..0000000 --- a/SOURCES/kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch +++ /dev/null @@ -1,144 +0,0 @@ -From 959955217f745f1ee6cbea97314efe69f2d7dc08 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:43 +0000 -Subject: [PATCH 10/18] blockdev: unify qmp_blockdev_backup and blockdev-backup - transaction paths - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-4-slp@redhat.com> -Patchwork-id: 93756 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 3/9] blockdev: unify qmp_blockdev_backup and blockdev-backup transaction paths -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -Issuing a blockdev-backup from qmp_blockdev_backup takes a slightly -different path than when it's issued from a transaction. In the code, -this is manifested as some redundancy between do_blockdev_backup() and -blockdev_backup_prepare(). - -This change unifies both paths, merging do_blockdev_backup() and -blockdev_backup_prepare(), and changing qmp_blockdev_backup() to -create a transaction instead of calling do_backup_common() direcly. - -As a side-effect, now qmp_blockdev_backup() is executed inside a -drained section, as it happens when creating a blockdev-backup -transaction. This change is visible from the user's perspective, as -the job gets paused and immediately resumed before starting the actual -work. - -Signed-off-by: Sergio Lopez -Reviewed-by: Max Reitz -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 5b7bfe515ecbd584b40ff6e41d2fd8b37c7d5139) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 60 +++++++++++++----------------------------------------------- - 1 file changed, 13 insertions(+), 47 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 5e85fc0..152a0f7 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1940,16 +1940,13 @@ typedef struct BlockdevBackupState { - BlockJob *job; - } BlockdevBackupState; - --static BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn, -- Error **errp); -- - static void blockdev_backup_prepare(BlkActionState *common, Error **errp) - { - BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); - BlockdevBackup *backup; -- BlockDriverState *bs, *target; -+ BlockDriverState *bs; -+ BlockDriverState *target_bs; - AioContext *aio_context; -- Error *local_err = NULL; - - assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); - backup = common->action->u.blockdev_backup.data; -@@ -1959,8 +1956,8 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) - return; - } - -- target = bdrv_lookup_bs(backup->target, backup->target, errp); -- if (!target) { -+ target_bs = bdrv_lookup_bs(backup->target, backup->target, errp); -+ if (!target_bs) { - return; - } - -@@ -1971,13 +1968,10 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) - /* Paired with .clean() */ - bdrv_drained_begin(state->bs); - -- state->job = do_blockdev_backup(backup, common->block_job_txn, &local_err); -- if (local_err) { -- error_propagate(errp, local_err); -- goto out; -- } -+ state->job = do_backup_common(qapi_BlockdevBackup_base(backup), -+ bs, target_bs, aio_context, -+ common->block_job_txn, errp); - --out: - aio_context_release(aio_context); - } - -@@ -3695,41 +3689,13 @@ XDbgBlockGraph *qmp_x_debug_query_block_graph(Error **errp) - return bdrv_get_xdbg_block_graph(errp); - } - --BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn, -- Error **errp) -+void qmp_blockdev_backup(BlockdevBackup *backup, Error **errp) - { -- BlockDriverState *bs; -- BlockDriverState *target_bs; -- AioContext *aio_context; -- BlockJob *job; -- -- bs = bdrv_lookup_bs(backup->device, backup->device, errp); -- if (!bs) { -- return NULL; -- } -- -- target_bs = bdrv_lookup_bs(backup->target, backup->target, errp); -- if (!target_bs) { -- return NULL; -- } -- -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- -- job = do_backup_common(qapi_BlockdevBackup_base(backup), -- bs, target_bs, aio_context, txn, errp); -- -- aio_context_release(aio_context); -- return job; --} -- --void qmp_blockdev_backup(BlockdevBackup *arg, Error **errp) --{ -- BlockJob *job; -- job = do_blockdev_backup(arg, NULL, errp); -- if (job) { -- job_start(&job->job); -- } -+ TransactionAction action = { -+ .type = TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP, -+ .u.blockdev_backup.data = backup, -+ }; -+ blockdev_do_action(&action, errp); - } - - /* Parameter check and block job starting for drive mirroring. --- -1.8.3.1 - diff --git a/SOURCES/kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch b/SOURCES/kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch deleted file mode 100644 index 9ec1975..0000000 --- a/SOURCES/kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch +++ /dev/null @@ -1,419 +0,0 @@ -From 4a03ab2a6cc4974d8d43240d1297b09160818af3 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:42 +0000 -Subject: [PATCH 09/18] blockdev: unify qmp_drive_backup and drive-backup - transaction paths - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-3-slp@redhat.com> -Patchwork-id: 93755 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 2/9] blockdev: unify qmp_drive_backup and drive-backup transaction paths -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -Issuing a drive-backup from qmp_drive_backup takes a slightly -different path than when it's issued from a transaction. In the code, -this is manifested as some redundancy between do_drive_backup() and -drive_backup_prepare(). - -This change unifies both paths, merging do_drive_backup() and -drive_backup_prepare(), and changing qmp_drive_backup() to create a -transaction instead of calling do_backup_common() direcly. - -As a side-effect, now qmp_drive_backup() is executed inside a drained -section, as it happens when creating a drive-backup transaction. This -change is visible from the user's perspective, as the job gets paused -and immediately resumed before starting the actual work. - -Also fix tests 141, 185 and 219 to cope with the extra -JOB_STATUS_CHANGE lines. - -Signed-off-by: Sergio Lopez -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 2288ccfac96281c316db942d10e3f921c1373064) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 224 ++++++++++++++++++++------------------------- - tests/qemu-iotests/141.out | 2 + - tests/qemu-iotests/185.out | 2 + - tests/qemu-iotests/219 | 7 +- - tests/qemu-iotests/219.out | 8 ++ - 5 files changed, 117 insertions(+), 126 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 553e315..5e85fc0 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1761,39 +1761,128 @@ typedef struct DriveBackupState { - BlockJob *job; - } DriveBackupState; - --static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, -- Error **errp); -+static BlockJob *do_backup_common(BackupCommon *backup, -+ BlockDriverState *bs, -+ BlockDriverState *target_bs, -+ AioContext *aio_context, -+ JobTxn *txn, Error **errp); - - static void drive_backup_prepare(BlkActionState *common, Error **errp) - { - DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); -- BlockDriverState *bs; - DriveBackup *backup; -+ BlockDriverState *bs; -+ BlockDriverState *target_bs; -+ BlockDriverState *source = NULL; - AioContext *aio_context; -+ QDict *options; - Error *local_err = NULL; -+ int flags; -+ int64_t size; -+ bool set_backing_hd = false; - - assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); - backup = common->action->u.drive_backup.data; - -+ if (!backup->has_mode) { -+ backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; -+ } -+ - bs = bdrv_lookup_bs(backup->device, backup->device, errp); - if (!bs) { - return; - } - -+ if (!bs->drv) { -+ error_setg(errp, "Device has no medium"); -+ return; -+ } -+ - aio_context = bdrv_get_aio_context(bs); - aio_context_acquire(aio_context); - - /* Paired with .clean() */ - bdrv_drained_begin(bs); - -- state->bs = bs; -+ if (!backup->has_format) { -+ backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? -+ NULL : (char *) bs->drv->format_name; -+ } -+ -+ /* Early check to avoid creating target */ -+ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { -+ goto out; -+ } -+ -+ flags = bs->open_flags | BDRV_O_RDWR; -+ -+ /* -+ * See if we have a backing HD we can use to create our new image -+ * on top of. -+ */ -+ if (backup->sync == MIRROR_SYNC_MODE_TOP) { -+ source = backing_bs(bs); -+ if (!source) { -+ backup->sync = MIRROR_SYNC_MODE_FULL; -+ } -+ } -+ if (backup->sync == MIRROR_SYNC_MODE_NONE) { -+ source = bs; -+ flags |= BDRV_O_NO_BACKING; -+ set_backing_hd = true; -+ } -+ -+ size = bdrv_getlength(bs); -+ if (size < 0) { -+ error_setg_errno(errp, -size, "bdrv_getlength failed"); -+ goto out; -+ } -+ -+ if (backup->mode != NEW_IMAGE_MODE_EXISTING) { -+ assert(backup->format); -+ if (source) { -+ bdrv_refresh_filename(source); -+ bdrv_img_create(backup->target, backup->format, source->filename, -+ source->drv->format_name, NULL, -+ size, flags, false, &local_err); -+ } else { -+ bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL, -+ size, flags, false, &local_err); -+ } -+ } - -- state->job = do_drive_backup(backup, common->block_job_txn, &local_err); - if (local_err) { - error_propagate(errp, local_err); - goto out; - } - -+ options = qdict_new(); -+ qdict_put_str(options, "discard", "unmap"); -+ qdict_put_str(options, "detect-zeroes", "unmap"); -+ if (backup->format) { -+ qdict_put_str(options, "driver", backup->format); -+ } -+ -+ target_bs = bdrv_open(backup->target, NULL, options, flags, errp); -+ if (!target_bs) { -+ goto out; -+ } -+ -+ if (set_backing_hd) { -+ bdrv_set_backing_hd(target_bs, source, &local_err); -+ if (local_err) { -+ goto unref; -+ } -+ } -+ -+ state->bs = bs; -+ -+ state->job = do_backup_common(qapi_DriveBackup_base(backup), -+ bs, target_bs, aio_context, -+ common->block_job_txn, errp); -+ -+unref: -+ bdrv_unref(target_bs); - out: - aio_context_release(aio_context); - } -@@ -3587,126 +3676,13 @@ static BlockJob *do_backup_common(BackupCommon *backup, - return job; - } - --static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, -- Error **errp) --{ -- BlockDriverState *bs; -- BlockDriverState *target_bs; -- BlockDriverState *source = NULL; -- BlockJob *job = NULL; -- AioContext *aio_context; -- QDict *options; -- Error *local_err = NULL; -- int flags; -- int64_t size; -- bool set_backing_hd = false; -- -- if (!backup->has_mode) { -- backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; -- } -- -- bs = bdrv_lookup_bs(backup->device, backup->device, errp); -- if (!bs) { -- return NULL; -- } -- -- if (!bs->drv) { -- error_setg(errp, "Device has no medium"); -- return NULL; -- } -- -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- -- if (!backup->has_format) { -- backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? -- NULL : (char *) bs->drv->format_name; -- } -- -- /* Early check to avoid creating target */ -- if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { -- goto out; -- } -- -- flags = bs->open_flags | BDRV_O_RDWR; -- -- /* -- * See if we have a backing HD we can use to create our new image -- * on top of. -- */ -- if (backup->sync == MIRROR_SYNC_MODE_TOP) { -- source = backing_bs(bs); -- if (!source) { -- backup->sync = MIRROR_SYNC_MODE_FULL; -- } -- } -- if (backup->sync == MIRROR_SYNC_MODE_NONE) { -- source = bs; -- flags |= BDRV_O_NO_BACKING; -- set_backing_hd = true; -- } -- -- size = bdrv_getlength(bs); -- if (size < 0) { -- error_setg_errno(errp, -size, "bdrv_getlength failed"); -- goto out; -- } -- -- if (backup->mode != NEW_IMAGE_MODE_EXISTING) { -- assert(backup->format); -- if (source) { -- bdrv_refresh_filename(source); -- bdrv_img_create(backup->target, backup->format, source->filename, -- source->drv->format_name, NULL, -- size, flags, false, &local_err); -- } else { -- bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL, -- size, flags, false, &local_err); -- } -- } -- -- if (local_err) { -- error_propagate(errp, local_err); -- goto out; -- } -- -- options = qdict_new(); -- qdict_put_str(options, "discard", "unmap"); -- qdict_put_str(options, "detect-zeroes", "unmap"); -- if (backup->format) { -- qdict_put_str(options, "driver", backup->format); -- } -- -- target_bs = bdrv_open(backup->target, NULL, options, flags, errp); -- if (!target_bs) { -- goto out; -- } -- -- if (set_backing_hd) { -- bdrv_set_backing_hd(target_bs, source, &local_err); -- if (local_err) { -- goto unref; -- } -- } -- -- job = do_backup_common(qapi_DriveBackup_base(backup), -- bs, target_bs, aio_context, txn, errp); -- --unref: -- bdrv_unref(target_bs); --out: -- aio_context_release(aio_context); -- return job; --} -- --void qmp_drive_backup(DriveBackup *arg, Error **errp) -+void qmp_drive_backup(DriveBackup *backup, Error **errp) - { -- -- BlockJob *job; -- job = do_drive_backup(arg, NULL, errp); -- if (job) { -- job_start(&job->job); -- } -+ TransactionAction action = { -+ .type = TRANSACTION_ACTION_KIND_DRIVE_BACKUP, -+ .u.drive_backup.data = backup, -+ }; -+ blockdev_do_action(&action, errp); - } - - BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp) -diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out -index 3645675..263b680 100644 ---- a/tests/qemu-iotests/141.out -+++ b/tests/qemu-iotests/141.out -@@ -13,6 +13,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/m. - Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "job0"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} - {'execute': 'blockdev-del', 'arguments': {'node-name': 'drv0'}} - {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}} - {'execute': 'block-job-cancel', 'arguments': {'device': 'job0'}} -diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out -index 8379ac5..9a3b657 100644 ---- a/tests/qemu-iotests/185.out -+++ b/tests/qemu-iotests/185.out -@@ -65,6 +65,8 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 cluster_size=65536 l - Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 cluster_size=65536 lazy_refcounts=off refcount_bits=16 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "disk"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} - {"return": {}} - { 'execute': 'quit' } - {"return": {}} -diff --git a/tests/qemu-iotests/219 b/tests/qemu-iotests/219 -index e0c5166..655f54d 100755 ---- a/tests/qemu-iotests/219 -+++ b/tests/qemu-iotests/219 -@@ -63,7 +63,7 @@ def test_pause_resume(vm): - # logged immediately - iotests.log(vm.qmp('query-jobs')) - --def test_job_lifecycle(vm, job, job_args, has_ready=False): -+def test_job_lifecycle(vm, job, job_args, has_ready=False, is_mirror=False): - global img_size - - iotests.log('') -@@ -135,6 +135,9 @@ def test_job_lifecycle(vm, job, job_args, has_ready=False): - iotests.log('Waiting for PENDING state...') - iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) - iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) -+ if is_mirror: -+ iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) -+ iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) - - if not job_args.get('auto-finalize', True): - # PENDING state: -@@ -218,7 +221,7 @@ with iotests.FilePath('disk.img') as disk_path, \ - - for auto_finalize in [True, False]: - for auto_dismiss in [True, False]: -- test_job_lifecycle(vm, 'drive-backup', job_args={ -+ test_job_lifecycle(vm, 'drive-backup', is_mirror=True, job_args={ - 'device': 'drive0-node', - 'target': copy_path, - 'sync': 'full', -diff --git a/tests/qemu-iotests/219.out b/tests/qemu-iotests/219.out -index 8ebd3fe..0ea5d0b 100644 ---- a/tests/qemu-iotests/219.out -+++ b/tests/qemu-iotests/219.out -@@ -135,6 +135,8 @@ Pause/resume in RUNNING - {"return": {}} - - Waiting for PENDING state... -+{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "concluded"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -@@ -186,6 +188,8 @@ Pause/resume in RUNNING - {"return": {}} - - Waiting for PENDING state... -+{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "concluded"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -@@ -245,6 +249,8 @@ Pause/resume in RUNNING - {"return": {}} - - Waiting for PENDING state... -+{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"return": [{"current-progress": 4194304, "id": "job0", "status": "pending", "total-progress": 4194304, "type": "backup"}]} -@@ -304,6 +310,8 @@ Pause/resume in RUNNING - {"return": {}} - - Waiting for PENDING state... -+{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"return": [{"current-progress": 4194304, "id": "job0", "status": "pending", "total-progress": 4194304, "type": "backup"}]} --- -1.8.3.1 - diff --git a/SOURCES/kvm-bootp-check-bootp_input-buffer-size.patch b/SOURCES/kvm-bootp-check-bootp_input-buffer-size.patch deleted file mode 100644 index 3362cb0..0000000 --- a/SOURCES/kvm-bootp-check-bootp_input-buffer-size.patch +++ /dev/null @@ -1,52 +0,0 @@ -From a66ab346bf74ebf3ed8fca0dc2e2febfe70069e8 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Thu, 29 Jul 2021 04:56:28 -0400 -Subject: [PATCH 07/14] bootp: check bootp_input buffer size -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20210708082537.1550263-4-marcandre.lureau@redhat.com> -Patchwork-id: 101820 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 3/8] bootp: check bootp_input buffer size -Bugzilla: 1970819 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi - -From: Marc-André Lureau - -Fixes: CVE-2021-3592 -Fixes: https://gitlab.freedesktop.org/slirp/libslirp/-/issues/44 - -Signed-off-by: Marc-André Lureau - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1970819 - -(cherry picked from commit 2eca0838eee1da96204545e22cdaed860d9d7c6c) -Signed-off-by: Marc-André Lureau -Signed-off-by: Miroslav Rezanina ---- - slirp/src/bootp.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/slirp/src/bootp.c b/slirp/src/bootp.c -index 5754327138..5789187166 100644 ---- a/slirp/src/bootp.c -+++ b/slirp/src/bootp.c -@@ -366,9 +366,9 @@ udp_output(NULL, m, &saddr, &daddr, IPTOS_LOWDELAY); - - void bootp_input(struct mbuf *m) - { -- struct bootp_t *bp = mtod(m, struct bootp_t *); -+ struct bootp_t *bp = mtod_check(m, sizeof(struct bootp_t)); - -- if (bp->bp_op == BOOTP_REQUEST) { -+ if (bp && bp->bp_op == BOOTP_REQUEST) { - bootp_reply(m->slirp, bp, m_end(m)); - } - } --- -2.27.0 - diff --git a/SOURCES/kvm-bootp-limit-vendor-specific-area-to-input-packet-mem.patch b/SOURCES/kvm-bootp-limit-vendor-specific-area-to-input-packet-mem.patch deleted file mode 100644 index bbf9b03..0000000 --- a/SOURCES/kvm-bootp-limit-vendor-specific-area-to-input-packet-mem.patch +++ /dev/null @@ -1,175 +0,0 @@ -From 8198ae7c21a4d37f7e365058f973867c41d44d21 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Thu, 29 Jul 2021 04:56:25 -0400 -Subject: [PATCH 06/14] bootp: limit vendor-specific area to input packet - memory buffer -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20210708082537.1550263-3-marcandre.lureau@redhat.com> -Patchwork-id: 101821 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 2/8] bootp: limit vendor-specific area to input packet memory buffer -Bugzilla: 1970819 1970835 1970843 1970853 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi - -From: Marc-André Lureau - -sizeof(bootp_t) currently holds DHCP_OPT_LEN. Remove this optional field -from the structure, to help with the following patch checking for -minimal header size. Modify the bootp_reply() function to take the -buffer boundaries and avoiding potential buffer overflow. - -Related to CVE-2021-3592. - -https://gitlab.freedesktop.org/slirp/libslirp/-/issues/44 - -Signed-off-by: Marc-André Lureau - -(cherry picked from commit f13cad45b25d92760bb0ad67bec0300a4d7d5275) -Signed-off-by: Marc-André Lureau -Signed-off-by: Miroslav Rezanina ---- - slirp/src/bootp.c | 26 +++++++++++++++----------- - slirp/src/bootp.h | 2 +- - slirp/src/mbuf.c | 5 +++++ - slirp/src/mbuf.h | 1 + - 4 files changed, 22 insertions(+), 12 deletions(-) - -diff --git a/slirp/src/bootp.c b/slirp/src/bootp.c -index 3f9ce2553e..5754327138 100644 ---- a/slirp/src/bootp.c -+++ b/slirp/src/bootp.c -@@ -92,21 +92,22 @@ found: - return bc; - } - --static void dhcp_decode(const struct bootp_t *bp, int *pmsg_type, -+static void dhcp_decode(const struct bootp_t *bp, -+ const uint8_t *bp_end, -+ int *pmsg_type, - struct in_addr *preq_addr) - { -- const uint8_t *p, *p_end; -+ const uint8_t *p; - int len, tag; - - *pmsg_type = 0; - preq_addr->s_addr = htonl(0L); - - p = bp->bp_vend; -- p_end = p + DHCP_OPT_LEN; - if (memcmp(p, rfc1533_cookie, 4) != 0) - return; - p += 4; -- while (p < p_end) { -+ while (p < bp_end) { - tag = p[0]; - if (tag == RFC1533_PAD) { - p++; -@@ -114,10 +115,10 @@ static void dhcp_decode(const struct bootp_t *bp, int *pmsg_type, - break; - } else { - p++; -- if (p >= p_end) -+ if (p >= bp_end) - break; - len = *p++; -- if (p + len > p_end) { -+ if (p + len > bp_end) { - break; - } - DPRINTF("dhcp: tag=%d len=%d\n", tag, len); -@@ -144,7 +145,9 @@ static void dhcp_decode(const struct bootp_t *bp, int *pmsg_type, - } - } - --static void bootp_reply(Slirp *slirp, const struct bootp_t *bp) -+static void bootp_reply(Slirp *slirp, -+ const struct bootp_t *bp, -+ const uint8_t *bp_end) - { - BOOTPClient *bc = NULL; - struct mbuf *m; -@@ -157,7 +160,7 @@ static void bootp_reply(Slirp *slirp, const struct bootp_t *bp) - uint8_t client_ethaddr[ETH_ALEN]; - - /* extract exact DHCP msg type */ -- dhcp_decode(bp, &dhcp_msg_type, &preq_addr); -+ dhcp_decode(bp, bp_end, &dhcp_msg_type, &preq_addr); - DPRINTF("bootp packet op=%d msgtype=%d", bp->bp_op, dhcp_msg_type); - if (preq_addr.s_addr != htonl(0L)) - DPRINTF(" req_addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); -@@ -179,9 +182,10 @@ static void bootp_reply(Slirp *slirp, const struct bootp_t *bp) - return; - } - m->m_data += IF_MAXLINKHDR; -+ m_inc(m, sizeof(struct bootp_t) + DHCP_OPT_LEN); - rbp = (struct bootp_t *)m->m_data; - m->m_data += sizeof(struct udpiphdr); -- memset(rbp, 0, sizeof(struct bootp_t)); -+ memset(rbp, 0, sizeof(struct bootp_t) + DHCP_OPT_LEN); - - if (dhcp_msg_type == DHCPDISCOVER) { - if (preq_addr.s_addr != htonl(0L)) { -@@ -235,7 +239,7 @@ static void bootp_reply(Slirp *slirp, const struct bootp_t *bp) - rbp->bp_siaddr = saddr.sin_addr; /* Server IP address */ - - q = rbp->bp_vend; -- end = (uint8_t *)&rbp[1]; -+ end = rbp->bp_vend + DHCP_OPT_LEN; - memcpy(q, rfc1533_cookie, 4); - q += 4; - -@@ -365,6 +369,6 @@ void bootp_input(struct mbuf *m) - struct bootp_t *bp = mtod(m, struct bootp_t *); - - if (bp->bp_op == BOOTP_REQUEST) { -- bootp_reply(m->slirp, bp); -+ bootp_reply(m->slirp, bp, m_end(m)); - } - } -diff --git a/slirp/src/bootp.h b/slirp/src/bootp.h -index 03ece9bf28..0d20a944a8 100644 ---- a/slirp/src/bootp.h -+++ b/slirp/src/bootp.h -@@ -114,7 +114,7 @@ struct bootp_t { - uint8_t bp_hwaddr[16]; - uint8_t bp_sname[64]; - uint8_t bp_file[128]; -- uint8_t bp_vend[DHCP_OPT_LEN]; -+ uint8_t bp_vend[]; - }; - - typedef struct { -diff --git a/slirp/src/mbuf.c b/slirp/src/mbuf.c -index 6d0653ed3d..7db07c088e 100644 ---- a/slirp/src/mbuf.c -+++ b/slirp/src/mbuf.c -@@ -233,3 +233,8 @@ void *mtod_check(struct mbuf *m, size_t len) - - return NULL; - } -+ -+void *m_end(struct mbuf *m) -+{ -+ return m->m_data + m->m_len; -+} -diff --git a/slirp/src/mbuf.h b/slirp/src/mbuf.h -index 2015e3232f..a9752a36e0 100644 ---- a/slirp/src/mbuf.h -+++ b/slirp/src/mbuf.h -@@ -119,6 +119,7 @@ void m_adj(struct mbuf *, int); - int m_copy(struct mbuf *, struct mbuf *, int, int); - struct mbuf *dtom(Slirp *, void *); - void *mtod_check(struct mbuf *, size_t len); -+void *m_end(struct mbuf *); - - static inline void ifs_init(struct mbuf *ifm) - { --- -2.27.0 - diff --git a/SOURCES/kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch b/SOURCES/kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch deleted file mode 100644 index 5d21bf8..0000000 --- a/SOURCES/kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch +++ /dev/null @@ -1,137 +0,0 @@ -From f756c1c4590a37c533ec0429644a7034ba35dada Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:38 +0100 -Subject: [PATCH 007/116] build: rename CONFIG_LIBCAP to CONFIG_LIBCAP_NG -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-4-dgilbert@redhat.com> -Patchwork-id: 93459 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 003/112] build: rename CONFIG_LIBCAP to CONFIG_LIBCAP_NG -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Paolo Bonzini - -Since we are actually testing for the newer capng library, rename the -symbol to match. - -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Paolo Bonzini -(cherry picked from commit a358bca24026a377e0804e137a4499e4e041918d) -Signed-off-by: Miroslav Rezanina ---- - configure | 2 +- - qemu-bridge-helper.c | 6 +++--- - scsi/qemu-pr-helper.c | 12 ++++++------ - 3 files changed, 10 insertions(+), 10 deletions(-) - -diff --git a/configure b/configure -index 16564f8..7831618 100755 ---- a/configure -+++ b/configure -@@ -6760,7 +6760,7 @@ if test "$l2tpv3" = "yes" ; then - echo "CONFIG_L2TPV3=y" >> $config_host_mak - fi - if test "$cap_ng" = "yes" ; then -- echo "CONFIG_LIBCAP=y" >> $config_host_mak -+ echo "CONFIG_LIBCAP_NG=y" >> $config_host_mak - fi - echo "CONFIG_AUDIO_DRIVERS=$audio_drv_list" >> $config_host_mak - for drv in $audio_drv_list; do -diff --git a/qemu-bridge-helper.c b/qemu-bridge-helper.c -index 3d50ec0..88b2674 100644 ---- a/qemu-bridge-helper.c -+++ b/qemu-bridge-helper.c -@@ -43,7 +43,7 @@ - - #include "net/tap-linux.h" - --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - #include - #endif - -@@ -207,7 +207,7 @@ static int send_fd(int c, int fd) - return sendmsg(c, &msg, 0); - } - --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - static int drop_privileges(void) - { - /* clear all capabilities */ -@@ -246,7 +246,7 @@ int main(int argc, char **argv) - int access_allowed, access_denied; - int ret = EXIT_SUCCESS; - --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - /* if we're run from an suid binary, immediately drop privileges preserving - * cap_net_admin */ - if (geteuid() == 0 && getuid() != geteuid()) { -diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c -index debb18f..0659cee 100644 ---- a/scsi/qemu-pr-helper.c -+++ b/scsi/qemu-pr-helper.c -@@ -24,7 +24,7 @@ - #include - #include - --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - #include - #endif - #include -@@ -70,7 +70,7 @@ static int num_active_sockets = 1; - static int noisy; - static int verbose; - --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - static int uid = -1; - static int gid = -1; - #endif -@@ -97,7 +97,7 @@ static void usage(const char *name) - " (default '%s')\n" - " -T, --trace [[enable=]][,events=][,file=]\n" - " specify tracing options\n" --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - " -u, --user=USER user to drop privileges to\n" - " -g, --group=GROUP group to drop privileges to\n" - #endif -@@ -827,7 +827,7 @@ static void close_server_socket(void) - num_active_sockets--; - } - --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - static int drop_privileges(void) - { - /* clear all capabilities */ -@@ -920,7 +920,7 @@ int main(int argc, char **argv) - pidfile = g_strdup(optarg); - pidfile_specified = true; - break; --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - case 'u': { - unsigned long res; - struct passwd *userinfo = getpwnam(optarg); -@@ -1056,7 +1056,7 @@ int main(int argc, char **argv) - exit(EXIT_FAILURE); - } - --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - if (drop_privileges() < 0) { - error_report("Failed to drop privileges: %s", strerror(errno)); - exit(EXIT_FAILURE); --- -1.8.3.1 - diff --git a/SOURCES/kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch b/SOURCES/kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch deleted file mode 100644 index 5b1b170..0000000 --- a/SOURCES/kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch +++ /dev/null @@ -1,2463 +0,0 @@ -From fc2d0dfe60b14992a9b67e7a18394ba6365dc5ed Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 18 Mar 2020 18:10:40 +0000 -Subject: [PATCH 2/2] build-sys: do not make qemu-ga link with pixman -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20200318181040.256425-1-marcandre.lureau@redhat.com> -Patchwork-id: 94381 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] build-sys: do not make qemu-ga link with pixman -Bugzilla: 1811670 -RH-Acked-by: Markus Armbruster -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange - -Since commit d52c454aadcdae74506f315ebf8b58bb79a05573 ("contrib: add -vhost-user-gpu"), qemu-ga is linking with pixman. - -This is because the Make-based build-system use a global namespace for -variables, and we rely on "main.o-libs" for different linking targets. - -Note: this kind of variable clashing is hard to fix or prevent -currently. meson should help, as declarations have a linear -dependency and doesn't rely so much on variables and clever tricks. - -Note2: we have a lot of main.c (or other duplicated names!) in -tree. Imho, it would be annoying and a bad workaroud to rename all -those to avoid conflicts like I did here. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1811670 - -Signed-off-by: Marc-André Lureau -Message-Id: <20200311160923.882474-1-marcandre.lureau@redhat.com> -Signed-off-by: Paolo Bonzini - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1811670 -Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=27330493 - -(cherry picked from commit 5b42bc5ce9ab4a3171819feea5042931817211fd) -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - contrib/vhost-user-gpu/Makefile.objs | 6 +- - contrib/vhost-user-gpu/main.c | 1191 ------------------------------- - contrib/vhost-user-gpu/vhost-user-gpu.c | 1191 +++++++++++++++++++++++++++++++ - 3 files changed, 1194 insertions(+), 1194 deletions(-) - delete mode 100644 contrib/vhost-user-gpu/main.c - create mode 100644 contrib/vhost-user-gpu/vhost-user-gpu.c - -diff --git a/contrib/vhost-user-gpu/Makefile.objs b/contrib/vhost-user-gpu/Makefile.objs -index 6170c91..0929609 100644 ---- a/contrib/vhost-user-gpu/Makefile.objs -+++ b/contrib/vhost-user-gpu/Makefile.objs -@@ -1,7 +1,7 @@ --vhost-user-gpu-obj-y = main.o virgl.o vugbm.o -+vhost-user-gpu-obj-y = vhost-user-gpu.o virgl.o vugbm.o - --main.o-cflags := $(PIXMAN_CFLAGS) $(GBM_CFLAGS) --main.o-libs := $(PIXMAN_LIBS) -+vhost-user-gpu.o-cflags := $(PIXMAN_CFLAGS) $(GBM_CFLAGS) -+vhost-user-gpu.o-libs := $(PIXMAN_LIBS) - - virgl.o-cflags := $(VIRGL_CFLAGS) $(GBM_CFLAGS) - virgl.o-libs := $(VIRGL_LIBS) -diff --git a/contrib/vhost-user-gpu/main.c b/contrib/vhost-user-gpu/main.c -deleted file mode 100644 -index b45d201..0000000 ---- a/contrib/vhost-user-gpu/main.c -+++ /dev/null -@@ -1,1191 +0,0 @@ --/* -- * Virtio vhost-user GPU Device -- * -- * Copyright Red Hat, Inc. 2013-2018 -- * -- * Authors: -- * Dave Airlie -- * Gerd Hoffmann -- * Marc-André Lureau -- * -- * This work is licensed under the terms of the GNU GPL, version 2 or later. -- * See the COPYING file in the top-level directory. -- */ --#include "qemu/osdep.h" --#include "qemu/drm.h" --#include "qapi/error.h" --#include "qemu/sockets.h" -- --#include --#include -- --#include "vugpu.h" --#include "hw/virtio/virtio-gpu-bswap.h" --#include "hw/virtio/virtio-gpu-pixman.h" --#include "virgl.h" --#include "vugbm.h" -- --enum { -- VHOST_USER_GPU_MAX_QUEUES = 2, --}; -- --struct virtio_gpu_simple_resource { -- uint32_t resource_id; -- uint32_t width; -- uint32_t height; -- uint32_t format; -- struct iovec *iov; -- unsigned int iov_cnt; -- uint32_t scanout_bitmask; -- pixman_image_t *image; -- struct vugbm_buffer buffer; -- QTAILQ_ENTRY(virtio_gpu_simple_resource) next; --}; -- --static gboolean opt_print_caps; --static int opt_fdnum = -1; --static char *opt_socket_path; --static char *opt_render_node; --static gboolean opt_virgl; -- --static void vg_handle_ctrl(VuDev *dev, int qidx); -- --static const char * --vg_cmd_to_string(int cmd) --{ --#define CMD(cmd) [cmd] = #cmd -- static const char *vg_cmd_str[] = { -- CMD(VIRTIO_GPU_UNDEFINED), -- -- /* 2d commands */ -- CMD(VIRTIO_GPU_CMD_GET_DISPLAY_INFO), -- CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_2D), -- CMD(VIRTIO_GPU_CMD_RESOURCE_UNREF), -- CMD(VIRTIO_GPU_CMD_SET_SCANOUT), -- CMD(VIRTIO_GPU_CMD_RESOURCE_FLUSH), -- CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D), -- CMD(VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING), -- CMD(VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING), -- CMD(VIRTIO_GPU_CMD_GET_CAPSET_INFO), -- CMD(VIRTIO_GPU_CMD_GET_CAPSET), -- -- /* 3d commands */ -- CMD(VIRTIO_GPU_CMD_CTX_CREATE), -- CMD(VIRTIO_GPU_CMD_CTX_DESTROY), -- CMD(VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE), -- CMD(VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE), -- CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_3D), -- CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D), -- CMD(VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D), -- CMD(VIRTIO_GPU_CMD_SUBMIT_3D), -- -- /* cursor commands */ -- CMD(VIRTIO_GPU_CMD_UPDATE_CURSOR), -- CMD(VIRTIO_GPU_CMD_MOVE_CURSOR), -- }; --#undef REQ -- -- if (cmd >= 0 && cmd < G_N_ELEMENTS(vg_cmd_str)) { -- return vg_cmd_str[cmd]; -- } else { -- return "unknown"; -- } --} -- --static int --vg_sock_fd_read(int sock, void *buf, ssize_t buflen) --{ -- int ret; -- -- do { -- ret = read(sock, buf, buflen); -- } while (ret < 0 && (errno == EINTR || errno == EAGAIN)); -- -- g_warn_if_fail(ret == buflen); -- return ret; --} -- --static void --vg_sock_fd_close(VuGpu *g) --{ -- if (g->sock_fd >= 0) { -- close(g->sock_fd); -- g->sock_fd = -1; -- } --} -- --static gboolean --source_wait_cb(gint fd, GIOCondition condition, gpointer user_data) --{ -- VuGpu *g = user_data; -- -- if (!vg_recv_msg(g, VHOST_USER_GPU_DMABUF_UPDATE, 0, NULL)) { -- return G_SOURCE_CONTINUE; -- } -- -- /* resume */ -- g->wait_ok = 0; -- vg_handle_ctrl(&g->dev.parent, 0); -- -- return G_SOURCE_REMOVE; --} -- --void --vg_wait_ok(VuGpu *g) --{ -- assert(g->wait_ok == 0); -- g->wait_ok = g_unix_fd_add(g->sock_fd, G_IO_IN | G_IO_HUP, -- source_wait_cb, g); --} -- --static int --vg_sock_fd_write(int sock, const void *buf, ssize_t buflen, int fd) --{ -- ssize_t ret; -- struct iovec iov = { -- .iov_base = (void *)buf, -- .iov_len = buflen, -- }; -- struct msghdr msg = { -- .msg_iov = &iov, -- .msg_iovlen = 1, -- }; -- union { -- struct cmsghdr cmsghdr; -- char control[CMSG_SPACE(sizeof(int))]; -- } cmsgu; -- struct cmsghdr *cmsg; -- -- if (fd != -1) { -- msg.msg_control = cmsgu.control; -- msg.msg_controllen = sizeof(cmsgu.control); -- -- cmsg = CMSG_FIRSTHDR(&msg); -- cmsg->cmsg_len = CMSG_LEN(sizeof(int)); -- cmsg->cmsg_level = SOL_SOCKET; -- cmsg->cmsg_type = SCM_RIGHTS; -- -- *((int *)CMSG_DATA(cmsg)) = fd; -- } -- -- do { -- ret = sendmsg(sock, &msg, 0); -- } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); -- -- g_warn_if_fail(ret == buflen); -- return ret; --} -- --void --vg_send_msg(VuGpu *vg, const VhostUserGpuMsg *msg, int fd) --{ -- if (vg_sock_fd_write(vg->sock_fd, msg, -- VHOST_USER_GPU_HDR_SIZE + msg->size, fd) < 0) { -- vg_sock_fd_close(vg); -- } --} -- --bool --vg_recv_msg(VuGpu *g, uint32_t expect_req, uint32_t expect_size, -- gpointer payload) --{ -- uint32_t req, flags, size; -- -- if (vg_sock_fd_read(g->sock_fd, &req, sizeof(req)) < 0 || -- vg_sock_fd_read(g->sock_fd, &flags, sizeof(flags)) < 0 || -- vg_sock_fd_read(g->sock_fd, &size, sizeof(size)) < 0) { -- goto err; -- } -- -- g_return_val_if_fail(req == expect_req, false); -- g_return_val_if_fail(flags & VHOST_USER_GPU_MSG_FLAG_REPLY, false); -- g_return_val_if_fail(size == expect_size, false); -- -- if (size && vg_sock_fd_read(g->sock_fd, payload, size) != size) { -- goto err; -- } -- -- return true; -- --err: -- vg_sock_fd_close(g); -- return false; --} -- --static struct virtio_gpu_simple_resource * --virtio_gpu_find_resource(VuGpu *g, uint32_t resource_id) --{ -- struct virtio_gpu_simple_resource *res; -- -- QTAILQ_FOREACH(res, &g->reslist, next) { -- if (res->resource_id == resource_id) { -- return res; -- } -- } -- return NULL; --} -- --void --vg_ctrl_response(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd, -- struct virtio_gpu_ctrl_hdr *resp, -- size_t resp_len) --{ -- size_t s; -- -- if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE) { -- resp->flags |= VIRTIO_GPU_FLAG_FENCE; -- resp->fence_id = cmd->cmd_hdr.fence_id; -- resp->ctx_id = cmd->cmd_hdr.ctx_id; -- } -- virtio_gpu_ctrl_hdr_bswap(resp); -- s = iov_from_buf(cmd->elem.in_sg, cmd->elem.in_num, 0, resp, resp_len); -- if (s != resp_len) { -- g_critical("%s: response size incorrect %zu vs %zu", -- __func__, s, resp_len); -- } -- vu_queue_push(&g->dev.parent, cmd->vq, &cmd->elem, s); -- vu_queue_notify(&g->dev.parent, cmd->vq); -- cmd->finished = true; --} -- --void --vg_ctrl_response_nodata(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd, -- enum virtio_gpu_ctrl_type type) --{ -- struct virtio_gpu_ctrl_hdr resp = { -- .type = type, -- }; -- -- vg_ctrl_response(g, cmd, &resp, sizeof(resp)); --} -- --void --vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) --{ -- struct virtio_gpu_resp_display_info dpy_info = { {} }; -- VhostUserGpuMsg msg = { -- .request = VHOST_USER_GPU_GET_DISPLAY_INFO, -- .size = 0, -- }; -- -- assert(vg->wait_ok == 0); -- -- vg_send_msg(vg, &msg, -1); -- if (!vg_recv_msg(vg, msg.request, sizeof(dpy_info), &dpy_info)) { -- return; -- } -- -- vg_ctrl_response(vg, cmd, &dpy_info.hdr, sizeof(dpy_info)); --} -- --static void --vg_resource_create_2d(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd) --{ -- pixman_format_code_t pformat; -- struct virtio_gpu_simple_resource *res; -- struct virtio_gpu_resource_create_2d c2d; -- -- VUGPU_FILL_CMD(c2d); -- virtio_gpu_bswap_32(&c2d, sizeof(c2d)); -- -- if (c2d.resource_id == 0) { -- g_critical("%s: resource id 0 is not allowed", __func__); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- -- res = virtio_gpu_find_resource(g, c2d.resource_id); -- if (res) { -- g_critical("%s: resource already exists %d", __func__, c2d.resource_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- -- res = g_new0(struct virtio_gpu_simple_resource, 1); -- res->width = c2d.width; -- res->height = c2d.height; -- res->format = c2d.format; -- res->resource_id = c2d.resource_id; -- -- pformat = virtio_gpu_get_pixman_format(c2d.format); -- if (!pformat) { -- g_critical("%s: host couldn't handle guest format %d", -- __func__, c2d.format); -- g_free(res); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -- return; -- } -- vugbm_buffer_create(&res->buffer, &g->gdev, c2d.width, c2d.height); -- res->image = pixman_image_create_bits(pformat, -- c2d.width, -- c2d.height, -- (uint32_t *)res->buffer.mmap, -- res->buffer.stride); -- if (!res->image) { -- g_critical("%s: resource creation failed %d %d %d", -- __func__, c2d.resource_id, c2d.width, c2d.height); -- g_free(res); -- cmd->error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY; -- return; -- } -- -- QTAILQ_INSERT_HEAD(&g->reslist, res, next); --} -- --static void --vg_disable_scanout(VuGpu *g, int scanout_id) --{ -- struct virtio_gpu_scanout *scanout = &g->scanout[scanout_id]; -- struct virtio_gpu_simple_resource *res; -- -- if (scanout->resource_id == 0) { -- return; -- } -- -- res = virtio_gpu_find_resource(g, scanout->resource_id); -- if (res) { -- res->scanout_bitmask &= ~(1 << scanout_id); -- } -- -- scanout->width = 0; -- scanout->height = 0; -- -- if (g->sock_fd >= 0) { -- VhostUserGpuMsg msg = { -- .request = VHOST_USER_GPU_SCANOUT, -- .size = sizeof(VhostUserGpuScanout), -- .payload.scanout.scanout_id = scanout_id, -- }; -- vg_send_msg(g, &msg, -1); -- } --} -- --static void --vg_resource_destroy(VuGpu *g, -- struct virtio_gpu_simple_resource *res) --{ -- int i; -- -- if (res->scanout_bitmask) { -- for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { -- if (res->scanout_bitmask & (1 << i)) { -- vg_disable_scanout(g, i); -- } -- } -- } -- -- vugbm_buffer_destroy(&res->buffer); -- pixman_image_unref(res->image); -- QTAILQ_REMOVE(&g->reslist, res, next); -- g_free(res); --} -- --static void --vg_resource_unref(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd) --{ -- struct virtio_gpu_simple_resource *res; -- struct virtio_gpu_resource_unref unref; -- -- VUGPU_FILL_CMD(unref); -- virtio_gpu_bswap_32(&unref, sizeof(unref)); -- -- res = virtio_gpu_find_resource(g, unref.resource_id); -- if (!res) { -- g_critical("%s: illegal resource specified %d", -- __func__, unref.resource_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- vg_resource_destroy(g, res); --} -- --int --vg_create_mapping_iov(VuGpu *g, -- struct virtio_gpu_resource_attach_backing *ab, -- struct virtio_gpu_ctrl_command *cmd, -- struct iovec **iov) --{ -- struct virtio_gpu_mem_entry *ents; -- size_t esize, s; -- int i; -- -- if (ab->nr_entries > 16384) { -- g_critical("%s: nr_entries is too big (%d > 16384)", -- __func__, ab->nr_entries); -- return -1; -- } -- -- esize = sizeof(*ents) * ab->nr_entries; -- ents = g_malloc(esize); -- s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, -- sizeof(*ab), ents, esize); -- if (s != esize) { -- g_critical("%s: command data size incorrect %zu vs %zu", -- __func__, s, esize); -- g_free(ents); -- return -1; -- } -- -- *iov = g_malloc0(sizeof(struct iovec) * ab->nr_entries); -- for (i = 0; i < ab->nr_entries; i++) { -- uint64_t len = ents[i].length; -- (*iov)[i].iov_len = ents[i].length; -- (*iov)[i].iov_base = vu_gpa_to_va(&g->dev.parent, &len, ents[i].addr); -- if (!(*iov)[i].iov_base || len != ents[i].length) { -- g_critical("%s: resource %d element %d", -- __func__, ab->resource_id, i); -- g_free(*iov); -- g_free(ents); -- *iov = NULL; -- return -1; -- } -- } -- g_free(ents); -- return 0; --} -- --static void --vg_resource_attach_backing(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd) --{ -- struct virtio_gpu_simple_resource *res; -- struct virtio_gpu_resource_attach_backing ab; -- int ret; -- -- VUGPU_FILL_CMD(ab); -- virtio_gpu_bswap_32(&ab, sizeof(ab)); -- -- res = virtio_gpu_find_resource(g, ab.resource_id); -- if (!res) { -- g_critical("%s: illegal resource specified %d", -- __func__, ab.resource_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- -- ret = vg_create_mapping_iov(g, &ab, cmd, &res->iov); -- if (ret != 0) { -- cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; -- return; -- } -- -- res->iov_cnt = ab.nr_entries; --} -- --static void --vg_resource_detach_backing(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd) --{ -- struct virtio_gpu_simple_resource *res; -- struct virtio_gpu_resource_detach_backing detach; -- -- VUGPU_FILL_CMD(detach); -- virtio_gpu_bswap_32(&detach, sizeof(detach)); -- -- res = virtio_gpu_find_resource(g, detach.resource_id); -- if (!res || !res->iov) { -- g_critical("%s: illegal resource specified %d", -- __func__, detach.resource_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- -- g_free(res->iov); -- res->iov = NULL; -- res->iov_cnt = 0; --} -- --static void --vg_transfer_to_host_2d(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd) --{ -- struct virtio_gpu_simple_resource *res; -- int h; -- uint32_t src_offset, dst_offset, stride; -- int bpp; -- pixman_format_code_t format; -- struct virtio_gpu_transfer_to_host_2d t2d; -- -- VUGPU_FILL_CMD(t2d); -- virtio_gpu_t2d_bswap(&t2d); -- -- res = virtio_gpu_find_resource(g, t2d.resource_id); -- if (!res || !res->iov) { -- g_critical("%s: illegal resource specified %d", -- __func__, t2d.resource_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- -- if (t2d.r.x > res->width || -- t2d.r.y > res->height || -- t2d.r.width > res->width || -- t2d.r.height > res->height || -- t2d.r.x + t2d.r.width > res->width || -- t2d.r.y + t2d.r.height > res->height) { -- g_critical("%s: transfer bounds outside resource" -- " bounds for resource %d: %d %d %d %d vs %d %d", -- __func__, t2d.resource_id, t2d.r.x, t2d.r.y, -- t2d.r.width, t2d.r.height, res->width, res->height); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -- return; -- } -- -- format = pixman_image_get_format(res->image); -- bpp = (PIXMAN_FORMAT_BPP(format) + 7) / 8; -- stride = pixman_image_get_stride(res->image); -- -- if (t2d.offset || t2d.r.x || t2d.r.y || -- t2d.r.width != pixman_image_get_width(res->image)) { -- void *img_data = pixman_image_get_data(res->image); -- for (h = 0; h < t2d.r.height; h++) { -- src_offset = t2d.offset + stride * h; -- dst_offset = (t2d.r.y + h) * stride + (t2d.r.x * bpp); -- -- iov_to_buf(res->iov, res->iov_cnt, src_offset, -- img_data -- + dst_offset, t2d.r.width * bpp); -- } -- } else { -- iov_to_buf(res->iov, res->iov_cnt, 0, -- pixman_image_get_data(res->image), -- pixman_image_get_stride(res->image) -- * pixman_image_get_height(res->image)); -- } --} -- --static void --vg_set_scanout(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd) --{ -- struct virtio_gpu_simple_resource *res, *ores; -- struct virtio_gpu_scanout *scanout; -- struct virtio_gpu_set_scanout ss; -- int fd; -- -- VUGPU_FILL_CMD(ss); -- virtio_gpu_bswap_32(&ss, sizeof(ss)); -- -- if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUTS) { -- g_critical("%s: illegal scanout id specified %d", -- __func__, ss.scanout_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID; -- return; -- } -- -- if (ss.resource_id == 0) { -- vg_disable_scanout(g, ss.scanout_id); -- return; -- } -- -- /* create a surface for this scanout */ -- res = virtio_gpu_find_resource(g, ss.resource_id); -- if (!res) { -- g_critical("%s: illegal resource specified %d", -- __func__, ss.resource_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- -- if (ss.r.x > res->width || -- ss.r.y > res->height || -- ss.r.width > res->width || -- ss.r.height > res->height || -- ss.r.x + ss.r.width > res->width || -- ss.r.y + ss.r.height > res->height) { -- g_critical("%s: illegal scanout %d bounds for" -- " resource %d, (%d,%d)+%d,%d vs %d %d", -- __func__, ss.scanout_id, ss.resource_id, ss.r.x, ss.r.y, -- ss.r.width, ss.r.height, res->width, res->height); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -- return; -- } -- -- scanout = &g->scanout[ss.scanout_id]; -- -- ores = virtio_gpu_find_resource(g, scanout->resource_id); -- if (ores) { -- ores->scanout_bitmask &= ~(1 << ss.scanout_id); -- } -- -- res->scanout_bitmask |= (1 << ss.scanout_id); -- scanout->resource_id = ss.resource_id; -- scanout->x = ss.r.x; -- scanout->y = ss.r.y; -- scanout->width = ss.r.width; -- scanout->height = ss.r.height; -- -- struct vugbm_buffer *buffer = &res->buffer; -- -- if (vugbm_buffer_can_get_dmabuf_fd(buffer)) { -- VhostUserGpuMsg msg = { -- .request = VHOST_USER_GPU_DMABUF_SCANOUT, -- .size = sizeof(VhostUserGpuDMABUFScanout), -- .payload.dmabuf_scanout = (VhostUserGpuDMABUFScanout) { -- .scanout_id = ss.scanout_id, -- .x = ss.r.x, -- .y = ss.r.y, -- .width = ss.r.width, -- .height = ss.r.height, -- .fd_width = buffer->width, -- .fd_height = buffer->height, -- .fd_stride = buffer->stride, -- .fd_drm_fourcc = buffer->format -- } -- }; -- -- if (vugbm_buffer_get_dmabuf_fd(buffer, &fd)) { -- vg_send_msg(g, &msg, fd); -- close(fd); -- } -- } else { -- VhostUserGpuMsg msg = { -- .request = VHOST_USER_GPU_SCANOUT, -- .size = sizeof(VhostUserGpuScanout), -- .payload.scanout = (VhostUserGpuScanout) { -- .scanout_id = ss.scanout_id, -- .width = scanout->width, -- .height = scanout->height -- } -- }; -- vg_send_msg(g, &msg, -1); -- } --} -- --static void --vg_resource_flush(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd) --{ -- struct virtio_gpu_simple_resource *res; -- struct virtio_gpu_resource_flush rf; -- pixman_region16_t flush_region; -- int i; -- -- VUGPU_FILL_CMD(rf); -- virtio_gpu_bswap_32(&rf, sizeof(rf)); -- -- res = virtio_gpu_find_resource(g, rf.resource_id); -- if (!res) { -- g_critical("%s: illegal resource specified %d\n", -- __func__, rf.resource_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- -- if (rf.r.x > res->width || -- rf.r.y > res->height || -- rf.r.width > res->width || -- rf.r.height > res->height || -- rf.r.x + rf.r.width > res->width || -- rf.r.y + rf.r.height > res->height) { -- g_critical("%s: flush bounds outside resource" -- " bounds for resource %d: %d %d %d %d vs %d %d\n", -- __func__, rf.resource_id, rf.r.x, rf.r.y, -- rf.r.width, rf.r.height, res->width, res->height); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -- return; -- } -- -- pixman_region_init_rect(&flush_region, -- rf.r.x, rf.r.y, rf.r.width, rf.r.height); -- for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { -- struct virtio_gpu_scanout *scanout; -- pixman_region16_t region, finalregion; -- pixman_box16_t *extents; -- -- if (!(res->scanout_bitmask & (1 << i))) { -- continue; -- } -- scanout = &g->scanout[i]; -- -- pixman_region_init(&finalregion); -- pixman_region_init_rect(®ion, scanout->x, scanout->y, -- scanout->width, scanout->height); -- -- pixman_region_intersect(&finalregion, &flush_region, ®ion); -- -- extents = pixman_region_extents(&finalregion); -- size_t width = extents->x2 - extents->x1; -- size_t height = extents->y2 - extents->y1; -- -- if (vugbm_buffer_can_get_dmabuf_fd(&res->buffer)) { -- VhostUserGpuMsg vmsg = { -- .request = VHOST_USER_GPU_DMABUF_UPDATE, -- .size = sizeof(VhostUserGpuUpdate), -- .payload.update = (VhostUserGpuUpdate) { -- .scanout_id = i, -- .x = extents->x1, -- .y = extents->y1, -- .width = width, -- .height = height, -- } -- }; -- vg_send_msg(g, &vmsg, -1); -- vg_wait_ok(g); -- } else { -- size_t bpp = -- PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) / 8; -- size_t size = width * height * bpp; -- -- void *p = g_malloc(VHOST_USER_GPU_HDR_SIZE + -- sizeof(VhostUserGpuUpdate) + size); -- VhostUserGpuMsg *msg = p; -- msg->request = VHOST_USER_GPU_UPDATE; -- msg->size = sizeof(VhostUserGpuUpdate) + size; -- msg->payload.update = (VhostUserGpuUpdate) { -- .scanout_id = i, -- .x = extents->x1, -- .y = extents->y1, -- .width = width, -- .height = height, -- }; -- pixman_image_t *i = -- pixman_image_create_bits(pixman_image_get_format(res->image), -- msg->payload.update.width, -- msg->payload.update.height, -- p + offsetof(VhostUserGpuMsg, -- payload.update.data), -- width * bpp); -- pixman_image_composite(PIXMAN_OP_SRC, -- res->image, NULL, i, -- extents->x1, extents->y1, -- 0, 0, 0, 0, -- width, height); -- pixman_image_unref(i); -- vg_send_msg(g, msg, -1); -- g_free(msg); -- } -- pixman_region_fini(®ion); -- pixman_region_fini(&finalregion); -- } -- pixman_region_fini(&flush_region); --} -- --static void --vg_process_cmd(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) --{ -- switch (cmd->cmd_hdr.type) { -- case VIRTIO_GPU_CMD_GET_DISPLAY_INFO: -- vg_get_display_info(vg, cmd); -- break; -- case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D: -- vg_resource_create_2d(vg, cmd); -- break; -- case VIRTIO_GPU_CMD_RESOURCE_UNREF: -- vg_resource_unref(vg, cmd); -- break; -- case VIRTIO_GPU_CMD_RESOURCE_FLUSH: -- vg_resource_flush(vg, cmd); -- break; -- case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D: -- vg_transfer_to_host_2d(vg, cmd); -- break; -- case VIRTIO_GPU_CMD_SET_SCANOUT: -- vg_set_scanout(vg, cmd); -- break; -- case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING: -- vg_resource_attach_backing(vg, cmd); -- break; -- case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING: -- vg_resource_detach_backing(vg, cmd); -- break; -- /* case VIRTIO_GPU_CMD_GET_EDID: */ -- /* break */ -- default: -- g_warning("TODO handle ctrl %x\n", cmd->cmd_hdr.type); -- cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; -- break; -- } -- if (!cmd->finished) { -- vg_ctrl_response_nodata(vg, cmd, cmd->error ? cmd->error : -- VIRTIO_GPU_RESP_OK_NODATA); -- } --} -- --static void --vg_handle_ctrl(VuDev *dev, int qidx) --{ -- VuGpu *vg = container_of(dev, VuGpu, dev.parent); -- VuVirtq *vq = vu_get_queue(dev, qidx); -- struct virtio_gpu_ctrl_command *cmd = NULL; -- size_t len; -- -- for (;;) { -- if (vg->wait_ok != 0) { -- return; -- } -- -- cmd = vu_queue_pop(dev, vq, sizeof(struct virtio_gpu_ctrl_command)); -- if (!cmd) { -- break; -- } -- cmd->vq = vq; -- cmd->error = 0; -- cmd->finished = false; -- -- len = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, -- 0, &cmd->cmd_hdr, sizeof(cmd->cmd_hdr)); -- if (len != sizeof(cmd->cmd_hdr)) { -- g_warning("%s: command size incorrect %zu vs %zu\n", -- __func__, len, sizeof(cmd->cmd_hdr)); -- } -- -- virtio_gpu_ctrl_hdr_bswap(&cmd->cmd_hdr); -- g_debug("%d %s\n", cmd->cmd_hdr.type, -- vg_cmd_to_string(cmd->cmd_hdr.type)); -- -- if (vg->virgl) { -- vg_virgl_process_cmd(vg, cmd); -- } else { -- vg_process_cmd(vg, cmd); -- } -- -- if (!cmd->finished) { -- QTAILQ_INSERT_TAIL(&vg->fenceq, cmd, next); -- vg->inflight++; -- } else { -- g_free(cmd); -- } -- } --} -- --static void --update_cursor_data_simple(VuGpu *g, uint32_t resource_id, gpointer data) --{ -- struct virtio_gpu_simple_resource *res; -- -- res = virtio_gpu_find_resource(g, resource_id); -- g_return_if_fail(res != NULL); -- g_return_if_fail(pixman_image_get_width(res->image) == 64); -- g_return_if_fail(pixman_image_get_height(res->image) == 64); -- g_return_if_fail( -- PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) == 32); -- -- memcpy(data, pixman_image_get_data(res->image), 64 * 64 * sizeof(uint32_t)); --} -- --static void --vg_process_cursor_cmd(VuGpu *g, struct virtio_gpu_update_cursor *cursor) --{ -- bool move = cursor->hdr.type != VIRTIO_GPU_CMD_MOVE_CURSOR; -- -- g_debug("%s move:%d\n", G_STRFUNC, move); -- -- if (move) { -- VhostUserGpuMsg msg = { -- .request = cursor->resource_id ? -- VHOST_USER_GPU_CURSOR_POS : VHOST_USER_GPU_CURSOR_POS_HIDE, -- .size = sizeof(VhostUserGpuCursorPos), -- .payload.cursor_pos = { -- .scanout_id = cursor->pos.scanout_id, -- .x = cursor->pos.x, -- .y = cursor->pos.y, -- } -- }; -- vg_send_msg(g, &msg, -1); -- } else { -- VhostUserGpuMsg msg = { -- .request = VHOST_USER_GPU_CURSOR_UPDATE, -- .size = sizeof(VhostUserGpuCursorUpdate), -- .payload.cursor_update = { -- .pos = { -- .scanout_id = cursor->pos.scanout_id, -- .x = cursor->pos.x, -- .y = cursor->pos.y, -- }, -- .hot_x = cursor->hot_x, -- .hot_y = cursor->hot_y, -- } -- }; -- if (g->virgl) { -- vg_virgl_update_cursor_data(g, cursor->resource_id, -- msg.payload.cursor_update.data); -- } else { -- update_cursor_data_simple(g, cursor->resource_id, -- msg.payload.cursor_update.data); -- } -- vg_send_msg(g, &msg, -1); -- } --} -- --static void --vg_handle_cursor(VuDev *dev, int qidx) --{ -- VuGpu *g = container_of(dev, VuGpu, dev.parent); -- VuVirtq *vq = vu_get_queue(dev, qidx); -- VuVirtqElement *elem; -- size_t len; -- struct virtio_gpu_update_cursor cursor; -- -- for (;;) { -- elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); -- if (!elem) { -- break; -- } -- g_debug("cursor out:%d in:%d\n", elem->out_num, elem->in_num); -- -- len = iov_to_buf(elem->out_sg, elem->out_num, -- 0, &cursor, sizeof(cursor)); -- if (len != sizeof(cursor)) { -- g_warning("%s: cursor size incorrect %zu vs %zu\n", -- __func__, len, sizeof(cursor)); -- } else { -- virtio_gpu_bswap_32(&cursor, sizeof(cursor)); -- vg_process_cursor_cmd(g, &cursor); -- } -- vu_queue_push(dev, vq, elem, 0); -- vu_queue_notify(dev, vq); -- g_free(elem); -- } --} -- --static void --vg_panic(VuDev *dev, const char *msg) --{ -- g_critical("%s\n", msg); -- exit(1); --} -- --static void --vg_queue_set_started(VuDev *dev, int qidx, bool started) --{ -- VuVirtq *vq = vu_get_queue(dev, qidx); -- -- g_debug("queue started %d:%d\n", qidx, started); -- -- switch (qidx) { -- case 0: -- vu_set_queue_handler(dev, vq, started ? vg_handle_ctrl : NULL); -- break; -- case 1: -- vu_set_queue_handler(dev, vq, started ? vg_handle_cursor : NULL); -- break; -- default: -- break; -- } --} -- --static void --set_gpu_protocol_features(VuGpu *g) --{ -- uint64_t u64; -- VhostUserGpuMsg msg = { -- .request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES -- }; -- -- assert(g->wait_ok == 0); -- vg_send_msg(g, &msg, -1); -- if (!vg_recv_msg(g, msg.request, sizeof(u64), &u64)) { -- return; -- } -- -- msg = (VhostUserGpuMsg) { -- .request = VHOST_USER_GPU_SET_PROTOCOL_FEATURES, -- .size = sizeof(uint64_t), -- .payload.u64 = 0 -- }; -- vg_send_msg(g, &msg, -1); --} -- --static int --vg_process_msg(VuDev *dev, VhostUserMsg *msg, int *do_reply) --{ -- VuGpu *g = container_of(dev, VuGpu, dev.parent); -- -- switch (msg->request) { -- case VHOST_USER_GPU_SET_SOCKET: { -- g_return_val_if_fail(msg->fd_num == 1, 1); -- g_return_val_if_fail(g->sock_fd == -1, 1); -- g->sock_fd = msg->fds[0]; -- set_gpu_protocol_features(g); -- return 1; -- } -- default: -- return 0; -- } -- -- return 0; --} -- --static uint64_t --vg_get_features(VuDev *dev) --{ -- uint64_t features = 0; -- -- if (opt_virgl) { -- features |= 1 << VIRTIO_GPU_F_VIRGL; -- } -- -- return features; --} -- --static void --vg_set_features(VuDev *dev, uint64_t features) --{ -- VuGpu *g = container_of(dev, VuGpu, dev.parent); -- bool virgl = features & (1 << VIRTIO_GPU_F_VIRGL); -- -- if (virgl && !g->virgl_inited) { -- if (!vg_virgl_init(g)) { -- vg_panic(dev, "Failed to initialize virgl"); -- } -- g->virgl_inited = true; -- } -- -- g->virgl = virgl; --} -- --static int --vg_get_config(VuDev *dev, uint8_t *config, uint32_t len) --{ -- VuGpu *g = container_of(dev, VuGpu, dev.parent); -- -- g_return_val_if_fail(len <= sizeof(struct virtio_gpu_config), -1); -- -- if (opt_virgl) { -- g->virtio_config.num_capsets = vg_virgl_get_num_capsets(); -- } -- -- memcpy(config, &g->virtio_config, len); -- -- return 0; --} -- --static int --vg_set_config(VuDev *dev, const uint8_t *data, -- uint32_t offset, uint32_t size, -- uint32_t flags) --{ -- VuGpu *g = container_of(dev, VuGpu, dev.parent); -- struct virtio_gpu_config *config = (struct virtio_gpu_config *)data; -- -- if (config->events_clear) { -- g->virtio_config.events_read &= ~config->events_clear; -- } -- -- return 0; --} -- --static const VuDevIface vuiface = { -- .set_features = vg_set_features, -- .get_features = vg_get_features, -- .queue_set_started = vg_queue_set_started, -- .process_msg = vg_process_msg, -- .get_config = vg_get_config, -- .set_config = vg_set_config, --}; -- --static void --vg_destroy(VuGpu *g) --{ -- struct virtio_gpu_simple_resource *res, *tmp; -- -- vug_deinit(&g->dev); -- -- vg_sock_fd_close(g); -- -- QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) { -- vg_resource_destroy(g, res); -- } -- -- vugbm_device_destroy(&g->gdev); --} -- --static GOptionEntry entries[] = { -- { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps, -- "Print capabilities", NULL }, -- { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum, -- "Use inherited fd socket", "FDNUM" }, -- { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path, -- "Use UNIX socket path", "PATH" }, -- { "render-node", 'r', 0, G_OPTION_ARG_FILENAME, &opt_render_node, -- "Specify DRM render node", "PATH" }, -- { "virgl", 'v', 0, G_OPTION_ARG_NONE, &opt_virgl, -- "Turn virgl rendering on", NULL }, -- { NULL, } --}; -- --int --main(int argc, char *argv[]) --{ -- GOptionContext *context; -- GError *error = NULL; -- GMainLoop *loop = NULL; -- int fd; -- VuGpu g = { .sock_fd = -1, .drm_rnode_fd = -1 }; -- -- QTAILQ_INIT(&g.reslist); -- QTAILQ_INIT(&g.fenceq); -- -- context = g_option_context_new("QEMU vhost-user-gpu"); -- g_option_context_add_main_entries(context, entries, NULL); -- if (!g_option_context_parse(context, &argc, &argv, &error)) { -- g_printerr("Option parsing failed: %s\n", error->message); -- exit(EXIT_FAILURE); -- } -- g_option_context_free(context); -- -- if (opt_print_caps) { -- g_print("{\n"); -- g_print(" \"type\": \"gpu\",\n"); -- g_print(" \"features\": [\n"); -- g_print(" \"render-node\",\n"); -- g_print(" \"virgl\"\n"); -- g_print(" ]\n"); -- g_print("}\n"); -- exit(EXIT_SUCCESS); -- } -- -- g.drm_rnode_fd = qemu_drm_rendernode_open(opt_render_node); -- if (opt_render_node && g.drm_rnode_fd == -1) { -- g_printerr("Failed to open DRM rendernode.\n"); -- exit(EXIT_FAILURE); -- } -- -- if (g.drm_rnode_fd >= 0) { -- if (!vugbm_device_init(&g.gdev, g.drm_rnode_fd)) { -- g_warning("Failed to init DRM device, using fallback path"); -- } -- } -- -- if ((!!opt_socket_path + (opt_fdnum != -1)) != 1) { -- g_printerr("Please specify either --fd or --socket-path\n"); -- exit(EXIT_FAILURE); -- } -- -- if (opt_socket_path) { -- int lsock = unix_listen(opt_socket_path, &error_fatal); -- if (lsock < 0) { -- g_printerr("Failed to listen on %s.\n", opt_socket_path); -- exit(EXIT_FAILURE); -- } -- fd = accept(lsock, NULL, NULL); -- close(lsock); -- } else { -- fd = opt_fdnum; -- } -- if (fd == -1) { -- g_printerr("Invalid vhost-user socket.\n"); -- exit(EXIT_FAILURE); -- } -- -- if (!vug_init(&g.dev, VHOST_USER_GPU_MAX_QUEUES, fd, vg_panic, &vuiface)) { -- g_printerr("Failed to initialize libvhost-user-glib.\n"); -- exit(EXIT_FAILURE); -- } -- -- loop = g_main_loop_new(NULL, FALSE); -- g_main_loop_run(loop); -- g_main_loop_unref(loop); -- -- vg_destroy(&g); -- if (g.drm_rnode_fd >= 0) { -- close(g.drm_rnode_fd); -- } -- -- return 0; --} -diff --git a/contrib/vhost-user-gpu/vhost-user-gpu.c b/contrib/vhost-user-gpu/vhost-user-gpu.c -new file mode 100644 -index 0000000..b45d201 ---- /dev/null -+++ b/contrib/vhost-user-gpu/vhost-user-gpu.c -@@ -0,0 +1,1191 @@ -+/* -+ * Virtio vhost-user GPU Device -+ * -+ * Copyright Red Hat, Inc. 2013-2018 -+ * -+ * Authors: -+ * Dave Airlie -+ * Gerd Hoffmann -+ * Marc-André Lureau -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+#include "qemu/osdep.h" -+#include "qemu/drm.h" -+#include "qapi/error.h" -+#include "qemu/sockets.h" -+ -+#include -+#include -+ -+#include "vugpu.h" -+#include "hw/virtio/virtio-gpu-bswap.h" -+#include "hw/virtio/virtio-gpu-pixman.h" -+#include "virgl.h" -+#include "vugbm.h" -+ -+enum { -+ VHOST_USER_GPU_MAX_QUEUES = 2, -+}; -+ -+struct virtio_gpu_simple_resource { -+ uint32_t resource_id; -+ uint32_t width; -+ uint32_t height; -+ uint32_t format; -+ struct iovec *iov; -+ unsigned int iov_cnt; -+ uint32_t scanout_bitmask; -+ pixman_image_t *image; -+ struct vugbm_buffer buffer; -+ QTAILQ_ENTRY(virtio_gpu_simple_resource) next; -+}; -+ -+static gboolean opt_print_caps; -+static int opt_fdnum = -1; -+static char *opt_socket_path; -+static char *opt_render_node; -+static gboolean opt_virgl; -+ -+static void vg_handle_ctrl(VuDev *dev, int qidx); -+ -+static const char * -+vg_cmd_to_string(int cmd) -+{ -+#define CMD(cmd) [cmd] = #cmd -+ static const char *vg_cmd_str[] = { -+ CMD(VIRTIO_GPU_UNDEFINED), -+ -+ /* 2d commands */ -+ CMD(VIRTIO_GPU_CMD_GET_DISPLAY_INFO), -+ CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_2D), -+ CMD(VIRTIO_GPU_CMD_RESOURCE_UNREF), -+ CMD(VIRTIO_GPU_CMD_SET_SCANOUT), -+ CMD(VIRTIO_GPU_CMD_RESOURCE_FLUSH), -+ CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D), -+ CMD(VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING), -+ CMD(VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING), -+ CMD(VIRTIO_GPU_CMD_GET_CAPSET_INFO), -+ CMD(VIRTIO_GPU_CMD_GET_CAPSET), -+ -+ /* 3d commands */ -+ CMD(VIRTIO_GPU_CMD_CTX_CREATE), -+ CMD(VIRTIO_GPU_CMD_CTX_DESTROY), -+ CMD(VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE), -+ CMD(VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE), -+ CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_3D), -+ CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D), -+ CMD(VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D), -+ CMD(VIRTIO_GPU_CMD_SUBMIT_3D), -+ -+ /* cursor commands */ -+ CMD(VIRTIO_GPU_CMD_UPDATE_CURSOR), -+ CMD(VIRTIO_GPU_CMD_MOVE_CURSOR), -+ }; -+#undef REQ -+ -+ if (cmd >= 0 && cmd < G_N_ELEMENTS(vg_cmd_str)) { -+ return vg_cmd_str[cmd]; -+ } else { -+ return "unknown"; -+ } -+} -+ -+static int -+vg_sock_fd_read(int sock, void *buf, ssize_t buflen) -+{ -+ int ret; -+ -+ do { -+ ret = read(sock, buf, buflen); -+ } while (ret < 0 && (errno == EINTR || errno == EAGAIN)); -+ -+ g_warn_if_fail(ret == buflen); -+ return ret; -+} -+ -+static void -+vg_sock_fd_close(VuGpu *g) -+{ -+ if (g->sock_fd >= 0) { -+ close(g->sock_fd); -+ g->sock_fd = -1; -+ } -+} -+ -+static gboolean -+source_wait_cb(gint fd, GIOCondition condition, gpointer user_data) -+{ -+ VuGpu *g = user_data; -+ -+ if (!vg_recv_msg(g, VHOST_USER_GPU_DMABUF_UPDATE, 0, NULL)) { -+ return G_SOURCE_CONTINUE; -+ } -+ -+ /* resume */ -+ g->wait_ok = 0; -+ vg_handle_ctrl(&g->dev.parent, 0); -+ -+ return G_SOURCE_REMOVE; -+} -+ -+void -+vg_wait_ok(VuGpu *g) -+{ -+ assert(g->wait_ok == 0); -+ g->wait_ok = g_unix_fd_add(g->sock_fd, G_IO_IN | G_IO_HUP, -+ source_wait_cb, g); -+} -+ -+static int -+vg_sock_fd_write(int sock, const void *buf, ssize_t buflen, int fd) -+{ -+ ssize_t ret; -+ struct iovec iov = { -+ .iov_base = (void *)buf, -+ .iov_len = buflen, -+ }; -+ struct msghdr msg = { -+ .msg_iov = &iov, -+ .msg_iovlen = 1, -+ }; -+ union { -+ struct cmsghdr cmsghdr; -+ char control[CMSG_SPACE(sizeof(int))]; -+ } cmsgu; -+ struct cmsghdr *cmsg; -+ -+ if (fd != -1) { -+ msg.msg_control = cmsgu.control; -+ msg.msg_controllen = sizeof(cmsgu.control); -+ -+ cmsg = CMSG_FIRSTHDR(&msg); -+ cmsg->cmsg_len = CMSG_LEN(sizeof(int)); -+ cmsg->cmsg_level = SOL_SOCKET; -+ cmsg->cmsg_type = SCM_RIGHTS; -+ -+ *((int *)CMSG_DATA(cmsg)) = fd; -+ } -+ -+ do { -+ ret = sendmsg(sock, &msg, 0); -+ } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); -+ -+ g_warn_if_fail(ret == buflen); -+ return ret; -+} -+ -+void -+vg_send_msg(VuGpu *vg, const VhostUserGpuMsg *msg, int fd) -+{ -+ if (vg_sock_fd_write(vg->sock_fd, msg, -+ VHOST_USER_GPU_HDR_SIZE + msg->size, fd) < 0) { -+ vg_sock_fd_close(vg); -+ } -+} -+ -+bool -+vg_recv_msg(VuGpu *g, uint32_t expect_req, uint32_t expect_size, -+ gpointer payload) -+{ -+ uint32_t req, flags, size; -+ -+ if (vg_sock_fd_read(g->sock_fd, &req, sizeof(req)) < 0 || -+ vg_sock_fd_read(g->sock_fd, &flags, sizeof(flags)) < 0 || -+ vg_sock_fd_read(g->sock_fd, &size, sizeof(size)) < 0) { -+ goto err; -+ } -+ -+ g_return_val_if_fail(req == expect_req, false); -+ g_return_val_if_fail(flags & VHOST_USER_GPU_MSG_FLAG_REPLY, false); -+ g_return_val_if_fail(size == expect_size, false); -+ -+ if (size && vg_sock_fd_read(g->sock_fd, payload, size) != size) { -+ goto err; -+ } -+ -+ return true; -+ -+err: -+ vg_sock_fd_close(g); -+ return false; -+} -+ -+static struct virtio_gpu_simple_resource * -+virtio_gpu_find_resource(VuGpu *g, uint32_t resource_id) -+{ -+ struct virtio_gpu_simple_resource *res; -+ -+ QTAILQ_FOREACH(res, &g->reslist, next) { -+ if (res->resource_id == resource_id) { -+ return res; -+ } -+ } -+ return NULL; -+} -+ -+void -+vg_ctrl_response(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd, -+ struct virtio_gpu_ctrl_hdr *resp, -+ size_t resp_len) -+{ -+ size_t s; -+ -+ if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE) { -+ resp->flags |= VIRTIO_GPU_FLAG_FENCE; -+ resp->fence_id = cmd->cmd_hdr.fence_id; -+ resp->ctx_id = cmd->cmd_hdr.ctx_id; -+ } -+ virtio_gpu_ctrl_hdr_bswap(resp); -+ s = iov_from_buf(cmd->elem.in_sg, cmd->elem.in_num, 0, resp, resp_len); -+ if (s != resp_len) { -+ g_critical("%s: response size incorrect %zu vs %zu", -+ __func__, s, resp_len); -+ } -+ vu_queue_push(&g->dev.parent, cmd->vq, &cmd->elem, s); -+ vu_queue_notify(&g->dev.parent, cmd->vq); -+ cmd->finished = true; -+} -+ -+void -+vg_ctrl_response_nodata(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd, -+ enum virtio_gpu_ctrl_type type) -+{ -+ struct virtio_gpu_ctrl_hdr resp = { -+ .type = type, -+ }; -+ -+ vg_ctrl_response(g, cmd, &resp, sizeof(resp)); -+} -+ -+void -+vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) -+{ -+ struct virtio_gpu_resp_display_info dpy_info = { {} }; -+ VhostUserGpuMsg msg = { -+ .request = VHOST_USER_GPU_GET_DISPLAY_INFO, -+ .size = 0, -+ }; -+ -+ assert(vg->wait_ok == 0); -+ -+ vg_send_msg(vg, &msg, -1); -+ if (!vg_recv_msg(vg, msg.request, sizeof(dpy_info), &dpy_info)) { -+ return; -+ } -+ -+ vg_ctrl_response(vg, cmd, &dpy_info.hdr, sizeof(dpy_info)); -+} -+ -+static void -+vg_resource_create_2d(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd) -+{ -+ pixman_format_code_t pformat; -+ struct virtio_gpu_simple_resource *res; -+ struct virtio_gpu_resource_create_2d c2d; -+ -+ VUGPU_FILL_CMD(c2d); -+ virtio_gpu_bswap_32(&c2d, sizeof(c2d)); -+ -+ if (c2d.resource_id == 0) { -+ g_critical("%s: resource id 0 is not allowed", __func__); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ -+ res = virtio_gpu_find_resource(g, c2d.resource_id); -+ if (res) { -+ g_critical("%s: resource already exists %d", __func__, c2d.resource_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ -+ res = g_new0(struct virtio_gpu_simple_resource, 1); -+ res->width = c2d.width; -+ res->height = c2d.height; -+ res->format = c2d.format; -+ res->resource_id = c2d.resource_id; -+ -+ pformat = virtio_gpu_get_pixman_format(c2d.format); -+ if (!pformat) { -+ g_critical("%s: host couldn't handle guest format %d", -+ __func__, c2d.format); -+ g_free(res); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -+ return; -+ } -+ vugbm_buffer_create(&res->buffer, &g->gdev, c2d.width, c2d.height); -+ res->image = pixman_image_create_bits(pformat, -+ c2d.width, -+ c2d.height, -+ (uint32_t *)res->buffer.mmap, -+ res->buffer.stride); -+ if (!res->image) { -+ g_critical("%s: resource creation failed %d %d %d", -+ __func__, c2d.resource_id, c2d.width, c2d.height); -+ g_free(res); -+ cmd->error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY; -+ return; -+ } -+ -+ QTAILQ_INSERT_HEAD(&g->reslist, res, next); -+} -+ -+static void -+vg_disable_scanout(VuGpu *g, int scanout_id) -+{ -+ struct virtio_gpu_scanout *scanout = &g->scanout[scanout_id]; -+ struct virtio_gpu_simple_resource *res; -+ -+ if (scanout->resource_id == 0) { -+ return; -+ } -+ -+ res = virtio_gpu_find_resource(g, scanout->resource_id); -+ if (res) { -+ res->scanout_bitmask &= ~(1 << scanout_id); -+ } -+ -+ scanout->width = 0; -+ scanout->height = 0; -+ -+ if (g->sock_fd >= 0) { -+ VhostUserGpuMsg msg = { -+ .request = VHOST_USER_GPU_SCANOUT, -+ .size = sizeof(VhostUserGpuScanout), -+ .payload.scanout.scanout_id = scanout_id, -+ }; -+ vg_send_msg(g, &msg, -1); -+ } -+} -+ -+static void -+vg_resource_destroy(VuGpu *g, -+ struct virtio_gpu_simple_resource *res) -+{ -+ int i; -+ -+ if (res->scanout_bitmask) { -+ for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { -+ if (res->scanout_bitmask & (1 << i)) { -+ vg_disable_scanout(g, i); -+ } -+ } -+ } -+ -+ vugbm_buffer_destroy(&res->buffer); -+ pixman_image_unref(res->image); -+ QTAILQ_REMOVE(&g->reslist, res, next); -+ g_free(res); -+} -+ -+static void -+vg_resource_unref(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd) -+{ -+ struct virtio_gpu_simple_resource *res; -+ struct virtio_gpu_resource_unref unref; -+ -+ VUGPU_FILL_CMD(unref); -+ virtio_gpu_bswap_32(&unref, sizeof(unref)); -+ -+ res = virtio_gpu_find_resource(g, unref.resource_id); -+ if (!res) { -+ g_critical("%s: illegal resource specified %d", -+ __func__, unref.resource_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ vg_resource_destroy(g, res); -+} -+ -+int -+vg_create_mapping_iov(VuGpu *g, -+ struct virtio_gpu_resource_attach_backing *ab, -+ struct virtio_gpu_ctrl_command *cmd, -+ struct iovec **iov) -+{ -+ struct virtio_gpu_mem_entry *ents; -+ size_t esize, s; -+ int i; -+ -+ if (ab->nr_entries > 16384) { -+ g_critical("%s: nr_entries is too big (%d > 16384)", -+ __func__, ab->nr_entries); -+ return -1; -+ } -+ -+ esize = sizeof(*ents) * ab->nr_entries; -+ ents = g_malloc(esize); -+ s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, -+ sizeof(*ab), ents, esize); -+ if (s != esize) { -+ g_critical("%s: command data size incorrect %zu vs %zu", -+ __func__, s, esize); -+ g_free(ents); -+ return -1; -+ } -+ -+ *iov = g_malloc0(sizeof(struct iovec) * ab->nr_entries); -+ for (i = 0; i < ab->nr_entries; i++) { -+ uint64_t len = ents[i].length; -+ (*iov)[i].iov_len = ents[i].length; -+ (*iov)[i].iov_base = vu_gpa_to_va(&g->dev.parent, &len, ents[i].addr); -+ if (!(*iov)[i].iov_base || len != ents[i].length) { -+ g_critical("%s: resource %d element %d", -+ __func__, ab->resource_id, i); -+ g_free(*iov); -+ g_free(ents); -+ *iov = NULL; -+ return -1; -+ } -+ } -+ g_free(ents); -+ return 0; -+} -+ -+static void -+vg_resource_attach_backing(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd) -+{ -+ struct virtio_gpu_simple_resource *res; -+ struct virtio_gpu_resource_attach_backing ab; -+ int ret; -+ -+ VUGPU_FILL_CMD(ab); -+ virtio_gpu_bswap_32(&ab, sizeof(ab)); -+ -+ res = virtio_gpu_find_resource(g, ab.resource_id); -+ if (!res) { -+ g_critical("%s: illegal resource specified %d", -+ __func__, ab.resource_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ -+ ret = vg_create_mapping_iov(g, &ab, cmd, &res->iov); -+ if (ret != 0) { -+ cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; -+ return; -+ } -+ -+ res->iov_cnt = ab.nr_entries; -+} -+ -+static void -+vg_resource_detach_backing(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd) -+{ -+ struct virtio_gpu_simple_resource *res; -+ struct virtio_gpu_resource_detach_backing detach; -+ -+ VUGPU_FILL_CMD(detach); -+ virtio_gpu_bswap_32(&detach, sizeof(detach)); -+ -+ res = virtio_gpu_find_resource(g, detach.resource_id); -+ if (!res || !res->iov) { -+ g_critical("%s: illegal resource specified %d", -+ __func__, detach.resource_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ -+ g_free(res->iov); -+ res->iov = NULL; -+ res->iov_cnt = 0; -+} -+ -+static void -+vg_transfer_to_host_2d(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd) -+{ -+ struct virtio_gpu_simple_resource *res; -+ int h; -+ uint32_t src_offset, dst_offset, stride; -+ int bpp; -+ pixman_format_code_t format; -+ struct virtio_gpu_transfer_to_host_2d t2d; -+ -+ VUGPU_FILL_CMD(t2d); -+ virtio_gpu_t2d_bswap(&t2d); -+ -+ res = virtio_gpu_find_resource(g, t2d.resource_id); -+ if (!res || !res->iov) { -+ g_critical("%s: illegal resource specified %d", -+ __func__, t2d.resource_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ -+ if (t2d.r.x > res->width || -+ t2d.r.y > res->height || -+ t2d.r.width > res->width || -+ t2d.r.height > res->height || -+ t2d.r.x + t2d.r.width > res->width || -+ t2d.r.y + t2d.r.height > res->height) { -+ g_critical("%s: transfer bounds outside resource" -+ " bounds for resource %d: %d %d %d %d vs %d %d", -+ __func__, t2d.resource_id, t2d.r.x, t2d.r.y, -+ t2d.r.width, t2d.r.height, res->width, res->height); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -+ return; -+ } -+ -+ format = pixman_image_get_format(res->image); -+ bpp = (PIXMAN_FORMAT_BPP(format) + 7) / 8; -+ stride = pixman_image_get_stride(res->image); -+ -+ if (t2d.offset || t2d.r.x || t2d.r.y || -+ t2d.r.width != pixman_image_get_width(res->image)) { -+ void *img_data = pixman_image_get_data(res->image); -+ for (h = 0; h < t2d.r.height; h++) { -+ src_offset = t2d.offset + stride * h; -+ dst_offset = (t2d.r.y + h) * stride + (t2d.r.x * bpp); -+ -+ iov_to_buf(res->iov, res->iov_cnt, src_offset, -+ img_data -+ + dst_offset, t2d.r.width * bpp); -+ } -+ } else { -+ iov_to_buf(res->iov, res->iov_cnt, 0, -+ pixman_image_get_data(res->image), -+ pixman_image_get_stride(res->image) -+ * pixman_image_get_height(res->image)); -+ } -+} -+ -+static void -+vg_set_scanout(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd) -+{ -+ struct virtio_gpu_simple_resource *res, *ores; -+ struct virtio_gpu_scanout *scanout; -+ struct virtio_gpu_set_scanout ss; -+ int fd; -+ -+ VUGPU_FILL_CMD(ss); -+ virtio_gpu_bswap_32(&ss, sizeof(ss)); -+ -+ if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUTS) { -+ g_critical("%s: illegal scanout id specified %d", -+ __func__, ss.scanout_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID; -+ return; -+ } -+ -+ if (ss.resource_id == 0) { -+ vg_disable_scanout(g, ss.scanout_id); -+ return; -+ } -+ -+ /* create a surface for this scanout */ -+ res = virtio_gpu_find_resource(g, ss.resource_id); -+ if (!res) { -+ g_critical("%s: illegal resource specified %d", -+ __func__, ss.resource_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ -+ if (ss.r.x > res->width || -+ ss.r.y > res->height || -+ ss.r.width > res->width || -+ ss.r.height > res->height || -+ ss.r.x + ss.r.width > res->width || -+ ss.r.y + ss.r.height > res->height) { -+ g_critical("%s: illegal scanout %d bounds for" -+ " resource %d, (%d,%d)+%d,%d vs %d %d", -+ __func__, ss.scanout_id, ss.resource_id, ss.r.x, ss.r.y, -+ ss.r.width, ss.r.height, res->width, res->height); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -+ return; -+ } -+ -+ scanout = &g->scanout[ss.scanout_id]; -+ -+ ores = virtio_gpu_find_resource(g, scanout->resource_id); -+ if (ores) { -+ ores->scanout_bitmask &= ~(1 << ss.scanout_id); -+ } -+ -+ res->scanout_bitmask |= (1 << ss.scanout_id); -+ scanout->resource_id = ss.resource_id; -+ scanout->x = ss.r.x; -+ scanout->y = ss.r.y; -+ scanout->width = ss.r.width; -+ scanout->height = ss.r.height; -+ -+ struct vugbm_buffer *buffer = &res->buffer; -+ -+ if (vugbm_buffer_can_get_dmabuf_fd(buffer)) { -+ VhostUserGpuMsg msg = { -+ .request = VHOST_USER_GPU_DMABUF_SCANOUT, -+ .size = sizeof(VhostUserGpuDMABUFScanout), -+ .payload.dmabuf_scanout = (VhostUserGpuDMABUFScanout) { -+ .scanout_id = ss.scanout_id, -+ .x = ss.r.x, -+ .y = ss.r.y, -+ .width = ss.r.width, -+ .height = ss.r.height, -+ .fd_width = buffer->width, -+ .fd_height = buffer->height, -+ .fd_stride = buffer->stride, -+ .fd_drm_fourcc = buffer->format -+ } -+ }; -+ -+ if (vugbm_buffer_get_dmabuf_fd(buffer, &fd)) { -+ vg_send_msg(g, &msg, fd); -+ close(fd); -+ } -+ } else { -+ VhostUserGpuMsg msg = { -+ .request = VHOST_USER_GPU_SCANOUT, -+ .size = sizeof(VhostUserGpuScanout), -+ .payload.scanout = (VhostUserGpuScanout) { -+ .scanout_id = ss.scanout_id, -+ .width = scanout->width, -+ .height = scanout->height -+ } -+ }; -+ vg_send_msg(g, &msg, -1); -+ } -+} -+ -+static void -+vg_resource_flush(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd) -+{ -+ struct virtio_gpu_simple_resource *res; -+ struct virtio_gpu_resource_flush rf; -+ pixman_region16_t flush_region; -+ int i; -+ -+ VUGPU_FILL_CMD(rf); -+ virtio_gpu_bswap_32(&rf, sizeof(rf)); -+ -+ res = virtio_gpu_find_resource(g, rf.resource_id); -+ if (!res) { -+ g_critical("%s: illegal resource specified %d\n", -+ __func__, rf.resource_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ -+ if (rf.r.x > res->width || -+ rf.r.y > res->height || -+ rf.r.width > res->width || -+ rf.r.height > res->height || -+ rf.r.x + rf.r.width > res->width || -+ rf.r.y + rf.r.height > res->height) { -+ g_critical("%s: flush bounds outside resource" -+ " bounds for resource %d: %d %d %d %d vs %d %d\n", -+ __func__, rf.resource_id, rf.r.x, rf.r.y, -+ rf.r.width, rf.r.height, res->width, res->height); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -+ return; -+ } -+ -+ pixman_region_init_rect(&flush_region, -+ rf.r.x, rf.r.y, rf.r.width, rf.r.height); -+ for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { -+ struct virtio_gpu_scanout *scanout; -+ pixman_region16_t region, finalregion; -+ pixman_box16_t *extents; -+ -+ if (!(res->scanout_bitmask & (1 << i))) { -+ continue; -+ } -+ scanout = &g->scanout[i]; -+ -+ pixman_region_init(&finalregion); -+ pixman_region_init_rect(®ion, scanout->x, scanout->y, -+ scanout->width, scanout->height); -+ -+ pixman_region_intersect(&finalregion, &flush_region, ®ion); -+ -+ extents = pixman_region_extents(&finalregion); -+ size_t width = extents->x2 - extents->x1; -+ size_t height = extents->y2 - extents->y1; -+ -+ if (vugbm_buffer_can_get_dmabuf_fd(&res->buffer)) { -+ VhostUserGpuMsg vmsg = { -+ .request = VHOST_USER_GPU_DMABUF_UPDATE, -+ .size = sizeof(VhostUserGpuUpdate), -+ .payload.update = (VhostUserGpuUpdate) { -+ .scanout_id = i, -+ .x = extents->x1, -+ .y = extents->y1, -+ .width = width, -+ .height = height, -+ } -+ }; -+ vg_send_msg(g, &vmsg, -1); -+ vg_wait_ok(g); -+ } else { -+ size_t bpp = -+ PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) / 8; -+ size_t size = width * height * bpp; -+ -+ void *p = g_malloc(VHOST_USER_GPU_HDR_SIZE + -+ sizeof(VhostUserGpuUpdate) + size); -+ VhostUserGpuMsg *msg = p; -+ msg->request = VHOST_USER_GPU_UPDATE; -+ msg->size = sizeof(VhostUserGpuUpdate) + size; -+ msg->payload.update = (VhostUserGpuUpdate) { -+ .scanout_id = i, -+ .x = extents->x1, -+ .y = extents->y1, -+ .width = width, -+ .height = height, -+ }; -+ pixman_image_t *i = -+ pixman_image_create_bits(pixman_image_get_format(res->image), -+ msg->payload.update.width, -+ msg->payload.update.height, -+ p + offsetof(VhostUserGpuMsg, -+ payload.update.data), -+ width * bpp); -+ pixman_image_composite(PIXMAN_OP_SRC, -+ res->image, NULL, i, -+ extents->x1, extents->y1, -+ 0, 0, 0, 0, -+ width, height); -+ pixman_image_unref(i); -+ vg_send_msg(g, msg, -1); -+ g_free(msg); -+ } -+ pixman_region_fini(®ion); -+ pixman_region_fini(&finalregion); -+ } -+ pixman_region_fini(&flush_region); -+} -+ -+static void -+vg_process_cmd(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) -+{ -+ switch (cmd->cmd_hdr.type) { -+ case VIRTIO_GPU_CMD_GET_DISPLAY_INFO: -+ vg_get_display_info(vg, cmd); -+ break; -+ case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D: -+ vg_resource_create_2d(vg, cmd); -+ break; -+ case VIRTIO_GPU_CMD_RESOURCE_UNREF: -+ vg_resource_unref(vg, cmd); -+ break; -+ case VIRTIO_GPU_CMD_RESOURCE_FLUSH: -+ vg_resource_flush(vg, cmd); -+ break; -+ case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D: -+ vg_transfer_to_host_2d(vg, cmd); -+ break; -+ case VIRTIO_GPU_CMD_SET_SCANOUT: -+ vg_set_scanout(vg, cmd); -+ break; -+ case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING: -+ vg_resource_attach_backing(vg, cmd); -+ break; -+ case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING: -+ vg_resource_detach_backing(vg, cmd); -+ break; -+ /* case VIRTIO_GPU_CMD_GET_EDID: */ -+ /* break */ -+ default: -+ g_warning("TODO handle ctrl %x\n", cmd->cmd_hdr.type); -+ cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; -+ break; -+ } -+ if (!cmd->finished) { -+ vg_ctrl_response_nodata(vg, cmd, cmd->error ? cmd->error : -+ VIRTIO_GPU_RESP_OK_NODATA); -+ } -+} -+ -+static void -+vg_handle_ctrl(VuDev *dev, int qidx) -+{ -+ VuGpu *vg = container_of(dev, VuGpu, dev.parent); -+ VuVirtq *vq = vu_get_queue(dev, qidx); -+ struct virtio_gpu_ctrl_command *cmd = NULL; -+ size_t len; -+ -+ for (;;) { -+ if (vg->wait_ok != 0) { -+ return; -+ } -+ -+ cmd = vu_queue_pop(dev, vq, sizeof(struct virtio_gpu_ctrl_command)); -+ if (!cmd) { -+ break; -+ } -+ cmd->vq = vq; -+ cmd->error = 0; -+ cmd->finished = false; -+ -+ len = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, -+ 0, &cmd->cmd_hdr, sizeof(cmd->cmd_hdr)); -+ if (len != sizeof(cmd->cmd_hdr)) { -+ g_warning("%s: command size incorrect %zu vs %zu\n", -+ __func__, len, sizeof(cmd->cmd_hdr)); -+ } -+ -+ virtio_gpu_ctrl_hdr_bswap(&cmd->cmd_hdr); -+ g_debug("%d %s\n", cmd->cmd_hdr.type, -+ vg_cmd_to_string(cmd->cmd_hdr.type)); -+ -+ if (vg->virgl) { -+ vg_virgl_process_cmd(vg, cmd); -+ } else { -+ vg_process_cmd(vg, cmd); -+ } -+ -+ if (!cmd->finished) { -+ QTAILQ_INSERT_TAIL(&vg->fenceq, cmd, next); -+ vg->inflight++; -+ } else { -+ g_free(cmd); -+ } -+ } -+} -+ -+static void -+update_cursor_data_simple(VuGpu *g, uint32_t resource_id, gpointer data) -+{ -+ struct virtio_gpu_simple_resource *res; -+ -+ res = virtio_gpu_find_resource(g, resource_id); -+ g_return_if_fail(res != NULL); -+ g_return_if_fail(pixman_image_get_width(res->image) == 64); -+ g_return_if_fail(pixman_image_get_height(res->image) == 64); -+ g_return_if_fail( -+ PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) == 32); -+ -+ memcpy(data, pixman_image_get_data(res->image), 64 * 64 * sizeof(uint32_t)); -+} -+ -+static void -+vg_process_cursor_cmd(VuGpu *g, struct virtio_gpu_update_cursor *cursor) -+{ -+ bool move = cursor->hdr.type != VIRTIO_GPU_CMD_MOVE_CURSOR; -+ -+ g_debug("%s move:%d\n", G_STRFUNC, move); -+ -+ if (move) { -+ VhostUserGpuMsg msg = { -+ .request = cursor->resource_id ? -+ VHOST_USER_GPU_CURSOR_POS : VHOST_USER_GPU_CURSOR_POS_HIDE, -+ .size = sizeof(VhostUserGpuCursorPos), -+ .payload.cursor_pos = { -+ .scanout_id = cursor->pos.scanout_id, -+ .x = cursor->pos.x, -+ .y = cursor->pos.y, -+ } -+ }; -+ vg_send_msg(g, &msg, -1); -+ } else { -+ VhostUserGpuMsg msg = { -+ .request = VHOST_USER_GPU_CURSOR_UPDATE, -+ .size = sizeof(VhostUserGpuCursorUpdate), -+ .payload.cursor_update = { -+ .pos = { -+ .scanout_id = cursor->pos.scanout_id, -+ .x = cursor->pos.x, -+ .y = cursor->pos.y, -+ }, -+ .hot_x = cursor->hot_x, -+ .hot_y = cursor->hot_y, -+ } -+ }; -+ if (g->virgl) { -+ vg_virgl_update_cursor_data(g, cursor->resource_id, -+ msg.payload.cursor_update.data); -+ } else { -+ update_cursor_data_simple(g, cursor->resource_id, -+ msg.payload.cursor_update.data); -+ } -+ vg_send_msg(g, &msg, -1); -+ } -+} -+ -+static void -+vg_handle_cursor(VuDev *dev, int qidx) -+{ -+ VuGpu *g = container_of(dev, VuGpu, dev.parent); -+ VuVirtq *vq = vu_get_queue(dev, qidx); -+ VuVirtqElement *elem; -+ size_t len; -+ struct virtio_gpu_update_cursor cursor; -+ -+ for (;;) { -+ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); -+ if (!elem) { -+ break; -+ } -+ g_debug("cursor out:%d in:%d\n", elem->out_num, elem->in_num); -+ -+ len = iov_to_buf(elem->out_sg, elem->out_num, -+ 0, &cursor, sizeof(cursor)); -+ if (len != sizeof(cursor)) { -+ g_warning("%s: cursor size incorrect %zu vs %zu\n", -+ __func__, len, sizeof(cursor)); -+ } else { -+ virtio_gpu_bswap_32(&cursor, sizeof(cursor)); -+ vg_process_cursor_cmd(g, &cursor); -+ } -+ vu_queue_push(dev, vq, elem, 0); -+ vu_queue_notify(dev, vq); -+ g_free(elem); -+ } -+} -+ -+static void -+vg_panic(VuDev *dev, const char *msg) -+{ -+ g_critical("%s\n", msg); -+ exit(1); -+} -+ -+static void -+vg_queue_set_started(VuDev *dev, int qidx, bool started) -+{ -+ VuVirtq *vq = vu_get_queue(dev, qidx); -+ -+ g_debug("queue started %d:%d\n", qidx, started); -+ -+ switch (qidx) { -+ case 0: -+ vu_set_queue_handler(dev, vq, started ? vg_handle_ctrl : NULL); -+ break; -+ case 1: -+ vu_set_queue_handler(dev, vq, started ? vg_handle_cursor : NULL); -+ break; -+ default: -+ break; -+ } -+} -+ -+static void -+set_gpu_protocol_features(VuGpu *g) -+{ -+ uint64_t u64; -+ VhostUserGpuMsg msg = { -+ .request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES -+ }; -+ -+ assert(g->wait_ok == 0); -+ vg_send_msg(g, &msg, -1); -+ if (!vg_recv_msg(g, msg.request, sizeof(u64), &u64)) { -+ return; -+ } -+ -+ msg = (VhostUserGpuMsg) { -+ .request = VHOST_USER_GPU_SET_PROTOCOL_FEATURES, -+ .size = sizeof(uint64_t), -+ .payload.u64 = 0 -+ }; -+ vg_send_msg(g, &msg, -1); -+} -+ -+static int -+vg_process_msg(VuDev *dev, VhostUserMsg *msg, int *do_reply) -+{ -+ VuGpu *g = container_of(dev, VuGpu, dev.parent); -+ -+ switch (msg->request) { -+ case VHOST_USER_GPU_SET_SOCKET: { -+ g_return_val_if_fail(msg->fd_num == 1, 1); -+ g_return_val_if_fail(g->sock_fd == -1, 1); -+ g->sock_fd = msg->fds[0]; -+ set_gpu_protocol_features(g); -+ return 1; -+ } -+ default: -+ return 0; -+ } -+ -+ return 0; -+} -+ -+static uint64_t -+vg_get_features(VuDev *dev) -+{ -+ uint64_t features = 0; -+ -+ if (opt_virgl) { -+ features |= 1 << VIRTIO_GPU_F_VIRGL; -+ } -+ -+ return features; -+} -+ -+static void -+vg_set_features(VuDev *dev, uint64_t features) -+{ -+ VuGpu *g = container_of(dev, VuGpu, dev.parent); -+ bool virgl = features & (1 << VIRTIO_GPU_F_VIRGL); -+ -+ if (virgl && !g->virgl_inited) { -+ if (!vg_virgl_init(g)) { -+ vg_panic(dev, "Failed to initialize virgl"); -+ } -+ g->virgl_inited = true; -+ } -+ -+ g->virgl = virgl; -+} -+ -+static int -+vg_get_config(VuDev *dev, uint8_t *config, uint32_t len) -+{ -+ VuGpu *g = container_of(dev, VuGpu, dev.parent); -+ -+ g_return_val_if_fail(len <= sizeof(struct virtio_gpu_config), -1); -+ -+ if (opt_virgl) { -+ g->virtio_config.num_capsets = vg_virgl_get_num_capsets(); -+ } -+ -+ memcpy(config, &g->virtio_config, len); -+ -+ return 0; -+} -+ -+static int -+vg_set_config(VuDev *dev, const uint8_t *data, -+ uint32_t offset, uint32_t size, -+ uint32_t flags) -+{ -+ VuGpu *g = container_of(dev, VuGpu, dev.parent); -+ struct virtio_gpu_config *config = (struct virtio_gpu_config *)data; -+ -+ if (config->events_clear) { -+ g->virtio_config.events_read &= ~config->events_clear; -+ } -+ -+ return 0; -+} -+ -+static const VuDevIface vuiface = { -+ .set_features = vg_set_features, -+ .get_features = vg_get_features, -+ .queue_set_started = vg_queue_set_started, -+ .process_msg = vg_process_msg, -+ .get_config = vg_get_config, -+ .set_config = vg_set_config, -+}; -+ -+static void -+vg_destroy(VuGpu *g) -+{ -+ struct virtio_gpu_simple_resource *res, *tmp; -+ -+ vug_deinit(&g->dev); -+ -+ vg_sock_fd_close(g); -+ -+ QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) { -+ vg_resource_destroy(g, res); -+ } -+ -+ vugbm_device_destroy(&g->gdev); -+} -+ -+static GOptionEntry entries[] = { -+ { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps, -+ "Print capabilities", NULL }, -+ { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum, -+ "Use inherited fd socket", "FDNUM" }, -+ { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path, -+ "Use UNIX socket path", "PATH" }, -+ { "render-node", 'r', 0, G_OPTION_ARG_FILENAME, &opt_render_node, -+ "Specify DRM render node", "PATH" }, -+ { "virgl", 'v', 0, G_OPTION_ARG_NONE, &opt_virgl, -+ "Turn virgl rendering on", NULL }, -+ { NULL, } -+}; -+ -+int -+main(int argc, char *argv[]) -+{ -+ GOptionContext *context; -+ GError *error = NULL; -+ GMainLoop *loop = NULL; -+ int fd; -+ VuGpu g = { .sock_fd = -1, .drm_rnode_fd = -1 }; -+ -+ QTAILQ_INIT(&g.reslist); -+ QTAILQ_INIT(&g.fenceq); -+ -+ context = g_option_context_new("QEMU vhost-user-gpu"); -+ g_option_context_add_main_entries(context, entries, NULL); -+ if (!g_option_context_parse(context, &argc, &argv, &error)) { -+ g_printerr("Option parsing failed: %s\n", error->message); -+ exit(EXIT_FAILURE); -+ } -+ g_option_context_free(context); -+ -+ if (opt_print_caps) { -+ g_print("{\n"); -+ g_print(" \"type\": \"gpu\",\n"); -+ g_print(" \"features\": [\n"); -+ g_print(" \"render-node\",\n"); -+ g_print(" \"virgl\"\n"); -+ g_print(" ]\n"); -+ g_print("}\n"); -+ exit(EXIT_SUCCESS); -+ } -+ -+ g.drm_rnode_fd = qemu_drm_rendernode_open(opt_render_node); -+ if (opt_render_node && g.drm_rnode_fd == -1) { -+ g_printerr("Failed to open DRM rendernode.\n"); -+ exit(EXIT_FAILURE); -+ } -+ -+ if (g.drm_rnode_fd >= 0) { -+ if (!vugbm_device_init(&g.gdev, g.drm_rnode_fd)) { -+ g_warning("Failed to init DRM device, using fallback path"); -+ } -+ } -+ -+ if ((!!opt_socket_path + (opt_fdnum != -1)) != 1) { -+ g_printerr("Please specify either --fd or --socket-path\n"); -+ exit(EXIT_FAILURE); -+ } -+ -+ if (opt_socket_path) { -+ int lsock = unix_listen(opt_socket_path, &error_fatal); -+ if (lsock < 0) { -+ g_printerr("Failed to listen on %s.\n", opt_socket_path); -+ exit(EXIT_FAILURE); -+ } -+ fd = accept(lsock, NULL, NULL); -+ close(lsock); -+ } else { -+ fd = opt_fdnum; -+ } -+ if (fd == -1) { -+ g_printerr("Invalid vhost-user socket.\n"); -+ exit(EXIT_FAILURE); -+ } -+ -+ if (!vug_init(&g.dev, VHOST_USER_GPU_MAX_QUEUES, fd, vg_panic, &vuiface)) { -+ g_printerr("Failed to initialize libvhost-user-glib.\n"); -+ exit(EXIT_FAILURE); -+ } -+ -+ loop = g_main_loop_new(NULL, FALSE); -+ g_main_loop_run(loop); -+ g_main_loop_unref(loop); -+ -+ vg_destroy(&g); -+ if (g.drm_rnode_fd >= 0) { -+ close(g.drm_rnode_fd); -+ } -+ -+ return 0; -+} --- -1.8.3.1 - diff --git a/SOURCES/kvm-cadence_gem-switch-to-use-qemu_receive_packet-for-lo.patch b/SOURCES/kvm-cadence_gem-switch-to-use-qemu_receive_packet-for-lo.patch deleted file mode 100644 index 32d5377..0000000 --- a/SOURCES/kvm-cadence_gem-switch-to-use-qemu_receive_packet-for-lo.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 6f1ebcfdb92d12ef2caae0b63a3a380265cba1fa Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 29 Jun 2021 03:42:46 -0400 -Subject: [PATCH 8/9] cadence_gem: switch to use qemu_receive_packet() for - loopback -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20210629034247.3286477-9-jmaloy@redhat.com> -Patchwork-id: 101793 -O-Subject: [RHEL-8.4.0.z qemu-kvm PATCH v2 8/9] cadence_gem: switch to use qemu_receive_packet() for loopback -Bugzilla: 1932917 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth - -From: Alexander Bulekov - -This patch switches to use qemu_receive_packet() which can detect -reentrancy and return early. - -This is intended to address CVE-2021-3416. - -Cc: Prasad J Pandit -Cc: qemu-stable@nongnu.org -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Alexander Bulekov -Signed-off-by: Jason Wang - -(cherry picked from commit e73adfbeec9d4e008630c814759052ed945c3fed) -Conflict: upstream commit 24d62fd5028e ("net: cadence_gem: Move tx/rx -packet buffert to CadenceGEMState") is missing in this version, so -we stick to using the original stack variable tx_packet in the calls. - -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/cadence_gem.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c -index b8be73dc55..be7c91123b 100644 ---- a/hw/net/cadence_gem.c -+++ b/hw/net/cadence_gem.c -@@ -1225,8 +1225,8 @@ static void gem_transmit(CadenceGEMState *s) - /* Send the packet somewhere */ - if (s->phy_loop || (s->regs[GEM_NWCTRL] & - GEM_NWCTRL_LOCALLOOP)) { -- gem_receive(qemu_get_queue(s->nic), tx_packet, -- total_bytes); -+ qemu_receive_packet(qemu_get_queue(s->nic), tx_packet, -+ total_bytes); - } else { - qemu_send_packet(qemu_get_queue(s->nic), tx_packet, - total_bytes); --- -2.27.0 - diff --git a/SOURCES/kvm-compat-disable-edid-for-virtio-gpu-ccw.patch b/SOURCES/kvm-compat-disable-edid-for-virtio-gpu-ccw.patch deleted file mode 100644 index e000534..0000000 --- a/SOURCES/kvm-compat-disable-edid-for-virtio-gpu-ccw.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 8f9f4d8d52ebb7878543ac0b84cc372477041e33 Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Wed, 1 Apr 2020 16:13:50 -0400 -Subject: [PATCH 2/2] compat: disable 'edid' for virtio-gpu-ccw - -RH-Author: Cornelia Huck -Message-id: <20200401161350.20462-1-cohuck@redhat.com> -Patchwork-id: 94523 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2] compat: disable 'edid' for virtio-gpu-ccw -Bugzilla: 1816793 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Markus Armbruster -RH-Acked-by: Dr. David Alan Gilbert - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1816793 -Branch: rhel-av-8.2.1 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=27629804 -Upstream: downstream only -Tested: verified that for a virtio-gpu-ccw device 'edid' is false with - a s390-ccw-virtio-rhel7.6.0 machine and true with a - s390-ccw-virtio-rhel8.2.0 (s390x does not have the 8.0 or 8.1 - machine types) - -hw_compat_rhel_8_0 copied the original upstream version of -disabling 'edid' for virtio-gpu-pci only (not following later -changes). Switch it to virtio-gpu-device, following upstream -02501fc39381 ("compat: disable edid on correct virtio-gpu device"). - -Signed-off-by: Cornelia Huck -Signed-off-by: Jon Maloy ---- - hw/core/machine.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index e0e0eec8bf..5a025d1af2 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -72,7 +72,7 @@ GlobalProperty hw_compat_rhel_8_0[] = { - /* hw_compat_rhel_8_0 from hw_compat_4_0 */ - { "virtio-vga", "edid", "false" }, - /* hw_compat_rhel_8_0 from hw_compat_4_0 */ -- { "virtio-gpu-pci", "edid", "false" }, -+ { "virtio-gpu-device", "edid", "false" }, - /* hw_compat_rhel_8_0 from hw_compat_4_0 */ - { "virtio-device", "use-started", "false" }, - /* hw_compat_rhel_8_0 from hw_compat_3_1 - that was added in 4.1 */ --- -2.18.2 - diff --git a/SOURCES/kvm-config-enable-VFIO_CCW.patch b/SOURCES/kvm-config-enable-VFIO_CCW.patch deleted file mode 100644 index 44af9cf..0000000 --- a/SOURCES/kvm-config-enable-VFIO_CCW.patch +++ /dev/null @@ -1,39 +0,0 @@ -From f3e80771c921560a58c30020781fa01a54be8eb0 Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Tue, 23 Jun 2020 09:25:43 -0400 -Subject: [PATCH 09/12] config: enable VFIO_CCW - -RH-Author: Cornelia Huck -Message-id: <20200623092543.358315-10-cohuck@redhat.com> -Patchwork-id: 97699 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 9/9] config: enable VFIO_CCW -Bugzilla: 1660916 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: David Hildenbrand -RH-Acked-by: Thomas Huth - -Enable vfio-ccw in RHEL builds. - -Upstream: n/a - -Signed-off-by: Cornelia Huck -Signed-off-by: Danilo C. L. de Paula ---- - default-configs/s390x-rh-devices.mak | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/default-configs/s390x-rh-devices.mak b/default-configs/s390x-rh-devices.mak -index c3c73fe752..08a15f3e01 100644 ---- a/default-configs/s390x-rh-devices.mak -+++ b/default-configs/s390x-rh-devices.mak -@@ -9,6 +9,7 @@ CONFIG_SCSI=y - CONFIG_TERMINAL3270=y - CONFIG_VFIO=y - CONFIG_VFIO_AP=y -+CONFIG_VFIO_CCW=y - CONFIG_VFIO_PCI=y - CONFIG_VHOST_USER=y - CONFIG_VIRTIO_CCW=y --- -2.27.0 - diff --git a/SOURCES/kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch b/SOURCES/kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch deleted file mode 100644 index 4212f1c..0000000 --- a/SOURCES/kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 548de8acbf0137b6e49a14b63682badfff037d23 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:44 +0100 -Subject: [PATCH 073/116] contrib/libvhost-user: Protect slave fd with mutex -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-70-dgilbert@redhat.com> -Patchwork-id: 93523 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 069/112] contrib/libvhost-user: Protect slave fd with mutex -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -In future patches we'll be performing commands on the slave-fd driven -by commands on queues, since those queues will be driven by individual -threads we need to make sure they don't attempt to use the slave-fd -for multiple commands in parallel. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit c25c02b9e6a196be87a818f459c426556b24770d) -Signed-off-by: Miroslav Rezanina ---- - contrib/libvhost-user/libvhost-user.c | 24 ++++++++++++++++++++---- - contrib/libvhost-user/libvhost-user.h | 3 +++ - 2 files changed, 23 insertions(+), 4 deletions(-) - -diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c -index ec27b78..63e4106 100644 ---- a/contrib/libvhost-user/libvhost-user.c -+++ b/contrib/libvhost-user/libvhost-user.c -@@ -392,26 +392,37 @@ vu_send_reply(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) - return vu_message_write(dev, conn_fd, vmsg); - } - -+/* -+ * Processes a reply on the slave channel. -+ * Entered with slave_mutex held and releases it before exit. -+ * Returns true on success. -+ */ - static bool - vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg) - { - VhostUserMsg msg_reply; -+ bool result = false; - - if ((vmsg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) { -- return true; -+ result = true; -+ goto out; - } - - if (!vu_message_read(dev, dev->slave_fd, &msg_reply)) { -- return false; -+ goto out; - } - - if (msg_reply.request != vmsg->request) { - DPRINT("Received unexpected msg type. Expected %d received %d", - vmsg->request, msg_reply.request); -- return false; -+ goto out; - } - -- return msg_reply.payload.u64 == 0; -+ result = msg_reply.payload.u64 == 0; -+ -+out: -+ pthread_mutex_unlock(&dev->slave_mutex); -+ return result; - } - - /* Kick the log_call_fd if required. */ -@@ -1105,10 +1116,13 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, - return false; - } - -+ pthread_mutex_lock(&dev->slave_mutex); - if (!vu_message_write(dev, dev->slave_fd, &vmsg)) { -+ pthread_mutex_unlock(&dev->slave_mutex); - return false; - } - -+ /* Also unlocks the slave_mutex */ - return vu_process_message_reply(dev, &vmsg); - } - -@@ -1628,6 +1642,7 @@ vu_deinit(VuDev *dev) - close(dev->slave_fd); - dev->slave_fd = -1; - } -+ pthread_mutex_destroy(&dev->slave_mutex); - - if (dev->sock != -1) { - close(dev->sock); -@@ -1663,6 +1678,7 @@ vu_init(VuDev *dev, - dev->remove_watch = remove_watch; - dev->iface = iface; - dev->log_call_fd = -1; -+ pthread_mutex_init(&dev->slave_mutex, NULL); - dev->slave_fd = -1; - dev->max_queues = max_queues; - -diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h -index 46b6007..1844b6f 100644 ---- a/contrib/libvhost-user/libvhost-user.h -+++ b/contrib/libvhost-user/libvhost-user.h -@@ -19,6 +19,7 @@ - #include - #include - #include -+#include - #include "standard-headers/linux/virtio_ring.h" - - /* Based on qemu/hw/virtio/vhost-user.c */ -@@ -355,6 +356,8 @@ struct VuDev { - VuVirtq *vq; - VuDevInflightInfo inflight_info; - int log_call_fd; -+ /* Must be held while using slave_fd */ -+ pthread_mutex_t slave_mutex; - int slave_fd; - uint64_t log_size; - uint8_t *log_table; --- -1.8.3.1 - diff --git a/SOURCES/kvm-crypto.c-cleanup-created-file-when-block_crypto_co_c.patch b/SOURCES/kvm-crypto.c-cleanup-created-file-when-block_crypto_co_c.patch deleted file mode 100644 index 891b866..0000000 --- a/SOURCES/kvm-crypto.c-cleanup-created-file-when-block_crypto_co_c.patch +++ /dev/null @@ -1,98 +0,0 @@ -From 043decff5812c1f46ed44dd0f82099e3b8bb6a28 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Sun, 31 May 2020 16:40:35 +0100 -Subject: [PATCH 7/7] crypto.c: cleanup created file when - block_crypto_co_create_opts_luks fails - -RH-Author: Maxim Levitsky -Message-id: <20200531164035.34188-4-mlevitsk@redhat.com> -Patchwork-id: 97060 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 3/3] crypto.c: cleanup created file when block_crypto_co_create_opts_luks fails -Bugzilla: 1827630 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: John Snow -RH-Acked-by: Eric Blake - -From: Daniel Henrique Barboza - -When using a non-UTF8 secret to create a volume using qemu-img, the -following error happens: - -$ qemu-img create -f luks --object secret,id=vol_1_encrypt0,file=vol_resize_pool.vol_1.secret.qzVQrI -o key-secret=vol_1_encrypt0 /var/tmp/pool_target/vol_1 10240K - -Formatting '/var/tmp/pool_target/vol_1', fmt=luks size=10485760 key-secret=vol_1_encrypt0 -qemu-img: /var/tmp/pool_target/vol_1: Data from secret vol_1_encrypt0 is not valid UTF-8 - -However, the created file '/var/tmp/pool_target/vol_1' is left behind in the -file system after the failure. This behavior can be observed when creating -the volume using Libvirt, via 'virsh vol-create', and then getting "volume -target path already exist" errors when trying to re-create the volume. - -The volume file is created inside block_crypto_co_create_opts_luks(), in -block/crypto.c. If the bdrv_create_file() call is successful but any -succeeding step fails*, the existing 'fail' label does not take into -account the created file, leaving it behind. - -This patch changes block_crypto_co_create_opts_luks() to delete -'filename' in case of failure. A failure in this point means that -the volume is now truncated/corrupted, so even if 'filename' was an -existing volume before calling qemu-img, it is now unusable. Deleting -the file it is not much worse than leaving it in the filesystem in -this scenario, and we don't have to deal with checking the file -pre-existence in the code. - -* in our case, block_crypto_co_create_generic calls qcrypto_block_create, -which calls qcrypto_block_luks_create, and this function fails when -calling qcrypto_secret_lookup_as_utf8. - -Reported-by: Srikanth Aithal -Suggested-by: Kevin Wolf -Signed-off-by: Daniel Henrique Barboza -Message-Id: <20200130213907.2830642-4-danielhb413@gmail.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 1bba30da24e1124ceeb0693c81382a0d77e20ca5) -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block/crypto.c | 18 ++++++++++++++++++ - 1 file changed, 18 insertions(+) - -diff --git a/block/crypto.c b/block/crypto.c -index 970d463..5e3b15c 100644 ---- a/block/crypto.c -+++ b/block/crypto.c -@@ -30,6 +30,7 @@ - #include "qapi/error.h" - #include "qemu/module.h" - #include "qemu/option.h" -+#include "qemu/cutils.h" - #include "crypto.h" - - typedef struct BlockCrypto BlockCrypto; -@@ -597,6 +598,23 @@ static int coroutine_fn block_crypto_co_create_opts_luks(BlockDriver *drv, - - ret = 0; - fail: -+ /* -+ * If an error occurred, delete 'filename'. Even if the file existed -+ * beforehand, it has been truncated and corrupted in the process. -+ */ -+ if (ret && bs) { -+ Error *local_delete_err = NULL; -+ int r_del = bdrv_co_delete_file(bs, &local_delete_err); -+ /* -+ * ENOTSUP will happen if the block driver doesn't support -+ * the 'bdrv_co_delete_file' interface. This is a predictable -+ * scenario and shouldn't be reported back to the user. -+ */ -+ if ((r_del < 0) && (r_del != -ENOTSUP)) { -+ error_report_err(local_delete_err); -+ } -+ } -+ - bdrv_unref(bs); - qapi_free_QCryptoBlockCreateOptions(create_opts); - qobject_unref(cryptoopts); --- -1.8.3.1 - diff --git a/SOURCES/kvm-doc-Add-the-SGX-numa-description.patch b/SOURCES/kvm-doc-Add-the-SGX-numa-description.patch new file mode 100644 index 0000000..0bed8a6 --- /dev/null +++ b/SOURCES/kvm-doc-Add-the-SGX-numa-description.patch @@ -0,0 +1,77 @@ +From e8377e3f4d540e2594a50985523e87d1f3cabbc7 Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Mon, 1 Nov 2021 12:20:08 -0400 +Subject: [PATCH 3/7] doc: Add the SGX numa description + +RH-Author: Paul Lai +RH-MergeRequest: 111: numa: Enable numa for SGX EPC sections +RH-Commit: [3/5] 41c74688c9662b966c243566a837135ff52341c4 +RH-Bugzilla: 1518984 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck + +Add the SGX numa reference command and how to check if +SGX numa is support or not with multiple EPC sections. + +Signed-off-by: Yang Zhong +Message-Id: <20211101162009.62161-5-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit d1889b36098c79e2e6ac90faf3d0dc5ec0057677) +Signed-off-by: Paul Lai +--- + docs/system/i386/sgx.rst | 31 +++++++++++++++++++++++++++---- + 1 file changed, 27 insertions(+), 4 deletions(-) + +diff --git a/docs/system/i386/sgx.rst b/docs/system/i386/sgx.rst +index f8fade5ac2..0f0a73f758 100644 +--- a/docs/system/i386/sgx.rst ++++ b/docs/system/i386/sgx.rst +@@ -141,8 +141,7 @@ To launch a SGX guest: + |qemu_system_x86| \\ + -cpu host,+sgx-provisionkey \\ + -object memory-backend-epc,id=mem1,size=64M,prealloc=on \\ +- -object memory-backend-epc,id=mem2,size=28M \\ +- -M sgx-epc.0.memdev=mem1,sgx-epc.1.memdev=mem2 ++ -M sgx-epc.0.memdev=mem1,sgx-epc.0.node=0 + + Utilizing SGX in the guest requires a kernel/OS with SGX support. + The support can be determined in guest by:: +@@ -152,8 +151,32 @@ The support can be determined in guest by:: + and SGX epc info by:: + + $ dmesg | grep sgx +- [ 1.242142] sgx: EPC section 0x180000000-0x181bfffff +- [ 1.242319] sgx: EPC section 0x181c00000-0x1837fffff ++ [ 0.182807] sgx: EPC section 0x140000000-0x143ffffff ++ [ 0.183695] sgx: [Firmware Bug]: Unable to map EPC section to online node. Fallback to the NUMA node 0. ++ ++To launch a SGX numa guest: ++ ++.. parsed-literal:: ++ ++ |qemu_system_x86| \\ ++ -cpu host,+sgx-provisionkey \\ ++ -object memory-backend-ram,size=2G,host-nodes=0,policy=bind,id=node0 \\ ++ -object memory-backend-epc,id=mem0,size=64M,prealloc=on,host-nodes=0,policy=bind \\ ++ -numa node,nodeid=0,cpus=0-1,memdev=node0 \\ ++ -object memory-backend-ram,size=2G,host-nodes=1,policy=bind,id=node1 \\ ++ -object memory-backend-epc,id=mem1,size=28M,prealloc=on,host-nodes=1,policy=bind \\ ++ -numa node,nodeid=1,cpus=2-3,memdev=node1 \\ ++ -M sgx-epc.0.memdev=mem0,sgx-epc.0.node=0,sgx-epc.1.memdev=mem1,sgx-epc.1.node=1 ++ ++and SGX epc numa info by:: ++ ++ $ dmesg | grep sgx ++ [ 0.369937] sgx: EPC section 0x180000000-0x183ffffff ++ [ 0.370259] sgx: EPC section 0x184000000-0x185bfffff ++ ++ $ dmesg | grep SRAT ++ [ 0.009981] ACPI: SRAT: Node 0 PXM 0 [mem 0x180000000-0x183ffffff] ++ [ 0.009982] ACPI: SRAT: Node 1 PXM 1 [mem 0x184000000-0x185bfffff] + + References + ---------- +-- +2.27.0 + diff --git a/SOURCES/kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch b/SOURCES/kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch deleted file mode 100644 index a6177c6..0000000 --- a/SOURCES/kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch +++ /dev/null @@ -1,56 +0,0 @@ -From f01178897c8f5ff98692a22059dd65e35677eaa3 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Mon, 10 Feb 2020 17:33:58 +0000 -Subject: [PATCH 18/18] docs/arm-cpu-features: Make kvm-no-adjvtime comment - clearer -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Andrew Jones -Message-id: <20200210173358.16896-3-drjones@redhat.com> -Patchwork-id: 93772 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] docs/arm-cpu-features: Make kvm-no-adjvtime comment clearer -Bugzilla: 1801320 -RH-Acked-by: Auger Eric -RH-Acked-by: Gavin Shan -RH-Acked-by: Philippe Mathieu-Daudé - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1801320 - -Author: Philippe Mathieu-Daudé -Date: Fri, 07 Feb 2020 14:04:28 +0000 - - docs/arm-cpu-features: Make kvm-no-adjvtime comment clearer - - The bold text sounds like 'knock knock'. Only bolding the - second 'not' makes it easier to read. - - Fixes: dea101a1ae - Signed-off-by: Philippe Mathieu-Daudé - Reviewed-by: Andrew Jones - Message-id: 20200206225148.23923-1-philmd@redhat.com - Signed-off-by: Peter Maydell - -(cherry picked from commit fa3236a970b6ea5be3fa3ad258f1a75920ca1ebb) -Signed-off-by: Danilo C. L. de Paula ---- - docs/arm-cpu-features.rst | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/docs/arm-cpu-features.rst b/docs/arm-cpu-features.rst -index 45d1eb6..48d5054 100644 ---- a/docs/arm-cpu-features.rst -+++ b/docs/arm-cpu-features.rst -@@ -185,7 +185,7 @@ the list of KVM VCPU features and their descriptions. - - kvm-no-adjvtime By default kvm-no-adjvtime is disabled. This - means that by default the virtual time -- adjustment is enabled (vtime is *not not* -+ adjustment is enabled (vtime is not *not* - adjusted). - - When virtual time adjustment is enabled each --- -1.8.3.1 - diff --git a/SOURCES/kvm-dp8393x-switch-to-use-qemu_receive_packet-for-loopba.patch b/SOURCES/kvm-dp8393x-switch-to-use-qemu_receive_packet-for-loopba.patch deleted file mode 100644 index 77e99eb..0000000 --- a/SOURCES/kvm-dp8393x-switch-to-use-qemu_receive_packet-for-loopba.patch +++ /dev/null @@ -1,53 +0,0 @@ -From a6f0bef82cdd84844a06dac1e6d279d95824d827 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 29 Jun 2021 03:42:41 -0400 -Subject: [PATCH 3/9] dp8393x: switch to use qemu_receive_packet() for loopback - packet -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20210629034247.3286477-4-jmaloy@redhat.com> -Patchwork-id: 101789 -O-Subject: [RHEL-8.4.0.z qemu-kvm PATCH v2 3/9] dp8393x: switch to use qemu_receive_packet() for loopback packet -Bugzilla: 1932917 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth - -From: Jason Wang - -This patch switches to use qemu_receive_packet() which can detect -reentrancy and return early. - -This is intended to address CVE-2021-3416. - -Cc: Prasad J Pandit -Cc: qemu-stable@nongnu.org -Reviewed-by: Philippe Mathieu-Daudé - -(cherry picked from commit 331d2ac9ea307c990dc86e6493e8f0c48d14bb33) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/dp8393x.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c -index 3d991af163..6d55b5de64 100644 ---- a/hw/net/dp8393x.c -+++ b/hw/net/dp8393x.c -@@ -482,7 +482,7 @@ static void dp8393x_do_transmit_packets(dp8393xState *s) - s->regs[SONIC_TCR] |= SONIC_TCR_CRSL; - if (nc->info->can_receive(nc)) { - s->loopback_packet = 1; -- nc->info->receive(nc, s->tx_buffer, tx_len); -+ qemu_receive_packet(nc, s->tx_buffer, tx_len); - } - } else { - /* Transmit packet */ --- -2.27.0 - diff --git a/SOURCES/kvm-e1000-fail-early-for-evil-descriptor.patch b/SOURCES/kvm-e1000-fail-early-for-evil-descriptor.patch deleted file mode 100644 index e599b7c..0000000 --- a/SOURCES/kvm-e1000-fail-early-for-evil-descriptor.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 7bd3000cf22a91e6bc6afc1e7adbf0ae1b731104 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 13 Apr 2021 22:45:17 -0400 -Subject: [PATCH 2/5] e1000: fail early for evil descriptor - -RH-Author: Jon Maloy -Message-id: <20210413224517.3841507-2-jmaloy@redhat.com> -Patchwork-id: 101473 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 1/1] e1000: fail early for evil descriptor -Bugzilla: 1930092 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Stefan Hajnoczi - -From: Jason Wang - -During procss_tx_desc(), driver can try to chain data descriptor with -legacy descriptor, when will lead underflow for the following -calculation in process_tx_desc() for bytes: - - if (tp->size + bytes > msh) - bytes = msh - tp->size; - -This will lead a infinite loop. So check and fail early if tp->size if -greater or equal to msh. - -Reported-by: Alexander Bulekov -Reported-by: Cheolwoo Myung -Reported-by: Ruhr-University Bochum -Cc: Prasad J Pandit -Cc: qemu-stable@nongnu.org -Signed-off-by: Jason Wang - -(cherry picked from commit 3de46e6fc489c52c9431a8a832ad8170a7569bd8) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/e1000.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index fc73fdd6fa..fe56bccd52 100644 ---- a/hw/net/e1000.c -+++ b/hw/net/e1000.c -@@ -671,6 +671,9 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) - msh = tp->tso_props.hdr_len + tp->tso_props.mss; - do { - bytes = split_size; -+ if (tp->size >= msh) { -+ goto eop; -+ } - if (tp->size + bytes > msh) - bytes = msh - tp->size; - -@@ -696,6 +699,7 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) - tp->size += split_size; - } - -+eop: - if (!(txd_lower & E1000_TXD_CMD_EOP)) - return; - if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) { --- -2.27.0 - diff --git a/SOURCES/kvm-e1000-switch-to-use-qemu_receive_packet-for-loopback.patch b/SOURCES/kvm-e1000-switch-to-use-qemu_receive_packet-for-loopback.patch deleted file mode 100644 index 05ff372..0000000 --- a/SOURCES/kvm-e1000-switch-to-use-qemu_receive_packet-for-loopback.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 128b97f6049144af3c1a41ceb8e8583419edcd69 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 29 Jun 2021 03:42:40 -0400 -Subject: [PATCH 2/9] e1000: switch to use qemu_receive_packet() for loopback -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20210629034247.3286477-3-jmaloy@redhat.com> -Patchwork-id: 101784 -O-Subject: [RHEL-8.4.0.z qemu-kvm PATCH v2 2/9] e1000: switch to use qemu_receive_packet() for loopback -Bugzilla: 1932917 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth - -From: Jason Wang - -This patch switches to use qemu_receive_packet() which can detect -reentrancy and return early. - -This is intended to address CVE-2021-3416. - -Cc: Prasad J Pandit -Cc: qemu-stable@nongnu.org -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Jason Wang - -(cherry picked from commit 1caff0340f49c93d535c6558a5138d20d475315c) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/e1000.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index fe56bccd52..8680b7d46b 100644 ---- a/hw/net/e1000.c -+++ b/hw/net/e1000.c -@@ -547,7 +547,7 @@ e1000_send_packet(E1000State *s, const uint8_t *buf, int size) - - NetClientState *nc = qemu_get_queue(s->nic); - if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) { -- nc->info->receive(nc, buf, size); -+ qemu_receive_packet(nc, buf, size); - } else { - qemu_send_packet(nc, buf, size); - } --- -2.27.0 - diff --git a/SOURCES/kvm-enable-ramfb.patch b/SOURCES/kvm-enable-ramfb.patch deleted file mode 100644 index fa2fe11..0000000 --- a/SOURCES/kvm-enable-ramfb.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 441128e2f13a56d4949b70971edd2f6902772959 Mon Sep 17 00:00:00 2001 -From: Gerd Hoffmann -Date: Wed, 3 Jun 2020 15:15:56 +0100 -Subject: [PATCH 01/17] enable ramfb - -RH-Author: Gerd Hoffmann -Message-id: <20200603151556.1195-2-kraxel@redhat.com> -Patchwork-id: 97097 -O-Subject: [RHEL-AV-8.2.0.z qemu-kvm PATCH 1/1] enable ramfb -Bugzilla: 1841068 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi - ---- - hw/vfio/pci.c | 5 ----- - hw/display/Makefile.objs | 5 ++--- - 2 files changed, 2 insertions(+), 8 deletions(-) - -Signed-off-by: Danilo C. L. de Paula ---- - hw/display/Makefile.objs | 5 ++--- - hw/vfio/pci.c | 5 ----- - 2 files changed, 2 insertions(+), 8 deletions(-) - -diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs -index 3d0cda1..f2182e3 100644 ---- a/hw/display/Makefile.objs -+++ b/hw/display/Makefile.objs -@@ -1,9 +1,8 @@ - common-obj-$(CONFIG_DDC) += i2c-ddc.o - common-obj-$(CONFIG_EDID) += edid-generate.o edid-region.o - --# Disabled for Red Hat Enterprise Linux --#common-obj-$(CONFIG_FW_CFG_DMA) += ramfb.o --#common-obj-$(CONFIG_FW_CFG_DMA) += ramfb-standalone.o -+common-obj-$(CONFIG_FW_CFG_DMA) += ramfb.o -+common-obj-$(CONFIG_FW_CFG_DMA) += ramfb-standalone.o - - common-obj-$(CONFIG_ADS7846) += ads7846.o - common-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index d717520..f191904 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3249,7 +3249,6 @@ static const TypeInfo vfio_pci_dev_info = { - }, - }; - --#if 0 /* Disabled for Red Hat Enterprise Linux */ - static Property vfio_pci_dev_nohotplug_properties[] = { - DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false), - DEFINE_PROP_END_OF_LIST(), -@@ -3269,15 +3268,11 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = { - .instance_size = sizeof(VFIOPCIDevice), - .class_init = vfio_pci_nohotplug_dev_class_init, - }; --#endif - - static void register_vfio_pci_dev_type(void) - { - type_register_static(&vfio_pci_dev_info); -- --#if 0 /* Disabled for Red Hat Enterprise Linux */ - type_register_static(&vfio_pci_nohotplug_dev_info); --#endif - } - - type_init(register_vfio_pci_dev_type) --- -1.8.3.1 - diff --git a/SOURCES/kvm-error-Document-Error-API-usage-rules.patch b/SOURCES/kvm-error-Document-Error-API-usage-rules.patch deleted file mode 100644 index fb9f1b0..0000000 --- a/SOURCES/kvm-error-Document-Error-API-usage-rules.patch +++ /dev/null @@ -1,154 +0,0 @@ -From b2ac3e491eb7f18a421e2b1132e527d484681767 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 16 Dec 2020 16:06:09 -0500 -Subject: [PATCH 08/14] error: Document Error API usage rules -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20201216160615.324213-5-marcandre.lureau@redhat.com> -Patchwork-id: 100477 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 04/10] error: Document Error API usage rules -Bugzilla: 1859494 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Stefan Hajnoczi - -From: Markus Armbruster - -This merely codifies existing practice, with one exception: the rule -advising against returning void, where existing practice is mixed. - -When the Error API was created, we adopted the (unwritten) rule to -return void when the function returns no useful value on success, -unlike GError, which recommends to return true on success and false on -error then. - -When a function returns a distinct error value, say false, a checked -call that passes the error up looks like - - if (!frobnicate(..., errp)) { - handle the error... - } - -When it returns void, we need - - Error *err = NULL; - - frobnicate(..., &err); - if (err) { - handle the error... - error_propagate(errp, err); - } - -Not only is this more verbose, it also creates an Error object even -when @errp is null, &error_abort or &error_fatal. - -People got tired of the additional boilerplate, and started to ignore -the unwritten rule. The result is confusion among developers about -the preferred usage. - -Make the rule advising against returning void official by putting it -in writing. This will hopefully reduce confusion. - -Update the examples accordingly. - -The remainder of this series will update a substantial amount of code -to honor the rule. - -Signed-off-by: Markus Armbruster -Reviewed-by: Eric Blake -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Greg Kurz -Message-Id: <20200707160613.848843-4-armbru@redhat.com> - -(cherry picked from commit e3fe3988d7851cac30abffae06d2f555ff7bee62) -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - include/qapi/error.h | 52 +++++++++++++++++++++++++++++++++++++++----- - 1 file changed, 46 insertions(+), 6 deletions(-) - -diff --git a/include/qapi/error.h b/include/qapi/error.h -index 3351fe76368..08d48e74836 100644 ---- a/include/qapi/error.h -+++ b/include/qapi/error.h -@@ -15,6 +15,33 @@ - /* - * Error reporting system loosely patterned after Glib's GError. - * -+ * = Rules = -+ * -+ * - Functions that use Error to report errors have an Error **errp -+ * parameter. It should be the last parameter, except for functions -+ * taking variable arguments. -+ * -+ * - You may pass NULL to not receive the error, &error_abort to abort -+ * on error, &error_fatal to exit(1) on error, or a pointer to a -+ * variable containing NULL to receive the error. -+ * -+ * - Separation of concerns: the function is responsible for detecting -+ * errors and failing cleanly; handling the error is its caller's -+ * job. Since the value of @errp is about handling the error, the -+ * function should not examine it. -+ * -+ * - On success, the function should not touch *errp. On failure, it -+ * should set a new error, e.g. with error_setg(errp, ...), or -+ * propagate an existing one, e.g. with error_propagate(errp, ...). -+ * -+ * - Whenever practical, also return a value that indicates success / -+ * failure. This can make the error checking more concise, and can -+ * avoid useless error object creation and destruction. Note that -+ * we still have many functions returning void. We recommend -+ * • bool-valued functions return true on success / false on failure, -+ * • pointer-valued functions return non-null / null pointer, and -+ * • integer-valued functions return non-negative / negative. -+ * - * = Creating errors = - * - * Create an error: -@@ -95,14 +122,13 @@ - * Create a new error and pass it to the caller: - * error_setg(errp, "situation normal, all fouled up"); - * -- * Call a function and receive an error from it: -- * Error *err = NULL; -- * foo(arg, &err); -- * if (err) { -+ * Call a function, receive an error from it, and pass it to the caller -+ * - when the function returns a value that indicates failure, say -+ * false: -+ * if (!foo(arg, errp)) { - * handle the error... - * } -- * -- * Receive an error and pass it on to the caller: -+ * - when it does not, say because it is a void function: - * Error *err = NULL; - * foo(arg, &err); - * if (err) { -@@ -120,6 +146,20 @@ - * foo(arg, errp); - * for readability. - * -+ * Receive an error, and handle it locally -+ * - when the function returns a value that indicates failure, say -+ * false: -+ * Error *err = NULL; -+ * if (!foo(arg, &err)) { -+ * handle the error... -+ * } -+ * - when it does not, say because it is a void function: -+ * Error *err = NULL; -+ * foo(arg, &err); -+ * if (err) { -+ * handle the error... -+ * } -+ * - * Receive and accumulate multiple errors (first one wins): - * Error *err = NULL, *local_err = NULL; - * foo(arg, &err); --- -2.27.0 - diff --git a/SOURCES/kvm-error-Fix-examples-in-error.h-s-big-comment.patch b/SOURCES/kvm-error-Fix-examples-in-error.h-s-big-comment.patch deleted file mode 100644 index ee14eb5..0000000 --- a/SOURCES/kvm-error-Fix-examples-in-error.h-s-big-comment.patch +++ /dev/null @@ -1,85 +0,0 @@ -From fe7dd779a9674dc54ffe296247ae6559f2b55b22 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 16 Dec 2020 16:06:07 -0500 -Subject: [PATCH 06/14] error: Fix examples in error.h's big comment -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20201216160615.324213-3-marcandre.lureau@redhat.com> -Patchwork-id: 100473 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 02/10] error: Fix examples in error.h's big comment -Bugzilla: 1859494 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Stefan Hajnoczi - -From: Markus Armbruster - -Mark a bad example more clearly. Fix the error_propagate_prepend() -example. Add a missing declaration and a second error pileup example. - -Signed-off-by: Markus Armbruster -Reviewed-by: Eric Blake -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Greg Kurz -Message-Id: <20200707160613.848843-2-armbru@redhat.com> - -(cherry picked from commit 47ff5ac81e8bb3096500de7b132051691d533d36) -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - include/qapi/error.h | 16 ++++++++++++++-- - 1 file changed, 14 insertions(+), 2 deletions(-) - -diff --git a/include/qapi/error.h b/include/qapi/error.h -index 3f95141a01a..83c38f9a188 100644 ---- a/include/qapi/error.h -+++ b/include/qapi/error.h -@@ -24,7 +24,7 @@ - * "charm, top, bottom.\n"); - * - * Do *not* contract this to -- * error_setg(&err, "invalid quark\n" -+ * error_setg(&err, "invalid quark\n" // WRONG! - * "Valid quarks are up, down, strange, charm, top, bottom."); - * - * Report an error to the current monitor if we have one, else stderr: -@@ -52,7 +52,8 @@ - * where Error **errp is a parameter, by convention the last one. - * - * Pass an existing error to the caller with the message modified: -- * error_propagate_prepend(errp, err); -+ * error_propagate_prepend(errp, err, -+ * "Could not frobnicate '%s': ", name); - * - * Avoid - * error_propagate(errp, err); -@@ -108,12 +109,23 @@ - * } - * - * Do *not* "optimize" this to -+ * Error *err = NULL; - * foo(arg, &err); - * bar(arg, &err); // WRONG! - * if (err) { - * handle the error... - * } - * because this may pass a non-null err to bar(). -+ * -+ * Likewise, do *not* -+ * Error *err = NULL; -+ * if (cond1) { -+ * error_setg(&err, ...); -+ * } -+ * if (cond2) { -+ * error_setg(&err, ...); // WRONG! -+ * } -+ * because this may pass a non-null err to error_setg(). - */ - - #ifndef ERROR_H --- -2.27.0 - diff --git a/SOURCES/kvm-error-Improve-error.h-s-big-comment.patch b/SOURCES/kvm-error-Improve-error.h-s-big-comment.patch deleted file mode 100644 index 0ad4367..0000000 --- a/SOURCES/kvm-error-Improve-error.h-s-big-comment.patch +++ /dev/null @@ -1,146 +0,0 @@ -From 439c11850165fd838e367aa6d4fff4af951a5bd9 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 16 Dec 2020 16:06:08 -0500 -Subject: [PATCH 07/14] error: Improve error.h's big comment -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20201216160615.324213-4-marcandre.lureau@redhat.com> -Patchwork-id: 100474 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 03/10] error: Improve error.h's big comment -Bugzilla: 1859494 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Stefan Hajnoczi - -From: Markus Armbruster - -Add headlines to the big comment. - -Explain examples for NULL, &error_abort and &error_fatal argument -better. - -Tweak rationale for error_propagate_prepend(). - -Signed-off-by: Markus Armbruster -Message-Id: <20200707160613.848843-3-armbru@redhat.com> -Reviewed-by: Eric Blake -Reviewed-by: Greg Kurz - -(cherry picked from commit 9aac7d486cc792191c25c30851f501624b0c2751) -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - include/qapi/error.h | 51 +++++++++++++++++++++++++++++++------------- - 1 file changed, 36 insertions(+), 15 deletions(-) - -diff --git a/include/qapi/error.h b/include/qapi/error.h -index 83c38f9a188..3351fe76368 100644 ---- a/include/qapi/error.h -+++ b/include/qapi/error.h -@@ -15,6 +15,8 @@ - /* - * Error reporting system loosely patterned after Glib's GError. - * -+ * = Creating errors = -+ * - * Create an error: - * error_setg(&err, "situation normal, all fouled up"); - * -@@ -27,6 +29,8 @@ - * error_setg(&err, "invalid quark\n" // WRONG! - * "Valid quarks are up, down, strange, charm, top, bottom."); - * -+ * = Reporting and destroying errors = -+ * - * Report an error to the current monitor if we have one, else stderr: - * error_report_err(err); - * This frees the error object. -@@ -40,6 +44,30 @@ - * error_free(err); - * Note that this loses hints added with error_append_hint(). - * -+ * Call a function ignoring errors: -+ * foo(arg, NULL); -+ * This is more concise than -+ * Error *err = NULL; -+ * foo(arg, &err); -+ * error_free(err); // don't do this -+ * -+ * Call a function aborting on errors: -+ * foo(arg, &error_abort); -+ * This is more concise and fails more nicely than -+ * Error *err = NULL; -+ * foo(arg, &err); -+ * assert(!err); // don't do this -+ * -+ * Call a function treating errors as fatal: -+ * foo(arg, &error_fatal); -+ * This is more concise than -+ * Error *err = NULL; -+ * foo(arg, &err); -+ * if (err) { // don't do this -+ * error_report_err(err); -+ * exit(1); -+ * } -+ * - * Handle an error without reporting it (just for completeness): - * error_free(err); - * -@@ -47,6 +75,11 @@ - * reporting it (primarily useful in testsuites): - * error_free_or_abort(&err); - * -+ * = Passing errors around = -+ * -+ * Errors get passed to the caller through the conventional @errp -+ * parameter. -+ * - * Pass an existing error to the caller: - * error_propagate(errp, err); - * where Error **errp is a parameter, by convention the last one. -@@ -54,11 +87,10 @@ - * Pass an existing error to the caller with the message modified: - * error_propagate_prepend(errp, err, - * "Could not frobnicate '%s': ", name); -- * -- * Avoid -- * error_propagate(errp, err); -+ * This is more concise than -+ * error_propagate(errp, err); // don't do this - * error_prepend(errp, "Could not frobnicate '%s': ", name); -- * because this fails to prepend when @errp is &error_fatal. -+ * and works even when @errp is &error_fatal. - * - * Create a new error and pass it to the caller: - * error_setg(errp, "situation normal, all fouled up"); -@@ -70,15 +102,6 @@ - * handle the error... - * } - * -- * Call a function ignoring errors: -- * foo(arg, NULL); -- * -- * Call a function aborting on errors: -- * foo(arg, &error_abort); -- * -- * Call a function treating errors as fatal: -- * foo(arg, &error_fatal); -- * - * Receive an error and pass it on to the caller: - * Error *err = NULL; - * foo(arg, &err); -@@ -86,8 +109,6 @@ - * handle the error... - * error_propagate(errp, err); - * } -- * where Error **errp is a parameter, by convention the last one. -- * - * Do *not* "optimize" this to - * foo(arg, errp); - * if (*errp) { // WRONG! --- -2.27.0 - diff --git a/SOURCES/kvm-error-New-macro-ERRP_GUARD.patch b/SOURCES/kvm-error-New-macro-ERRP_GUARD.patch deleted file mode 100644 index d67ad7c..0000000 --- a/SOURCES/kvm-error-New-macro-ERRP_GUARD.patch +++ /dev/null @@ -1,305 +0,0 @@ -From 46c3298774b976cc6a1cd834751e644fb482b08e Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 16 Dec 2020 16:06:10 -0500 -Subject: [PATCH 09/14] error: New macro ERRP_GUARD() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20201216160615.324213-6-marcandre.lureau@redhat.com> -Patchwork-id: 100476 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 05/10] error: New macro ERRP_GUARD() -Bugzilla: 1859494 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Stefan Hajnoczi - -From: Vladimir Sementsov-Ogievskiy - -Introduce a new ERRP_GUARD() macro, to be used at start of functions -with an errp OUT parameter. - -It has three goals: - -1. Fix issue with error_fatal and error_prepend/error_append_hint: the -user can't see this additional information, because exit() happens in -error_setg earlier than information is added. [Reported by Greg Kurz] - -2. Fix issue with error_abort and error_propagate: when we wrap -error_abort by local_err+error_propagate, the resulting coredump will -refer to error_propagate and not to the place where error happened. -(the macro itself doesn't fix the issue, but it allows us to [3.] drop -the local_err+error_propagate pattern, which will definitely fix the -issue) [Reported by Kevin Wolf] - -3. Drop local_err+error_propagate pattern, which is used to workaround -void functions with errp parameter, when caller wants to know resulting -status. (Note: actually these functions could be merely updated to -return int error code). - -To achieve these goals, later patches will add invocations -of this macro at the start of functions with either use -error_prepend/error_append_hint (solving 1) or which use -local_err+error_propagate to check errors, switching those -functions to use *errp instead (solving 2 and 3). - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Paul Durrant -Reviewed-by: Greg Kurz -Reviewed-by: Eric Blake -[Merge comments properly with recent commit "error: Document Error API -usage rules", and edit for clarity. Put ERRP_AUTO_PROPAGATE() before -its helpers, and touch up style. Tweak commit message.] -Signed-off-by: Markus Armbruster -Message-Id: <20200707165037.1026246-2-armbru@redhat.com> - -(cherry picked from commit ae7c80a7bd73685437bf6ba9d7c26098351f4166) -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - include/qapi/error.h | 158 +++++++++++++++++++++++++++++++++++++------ - 1 file changed, 139 insertions(+), 19 deletions(-) - -diff --git a/include/qapi/error.h b/include/qapi/error.h -index 08d48e74836..e658790acfc 100644 ---- a/include/qapi/error.h -+++ b/include/qapi/error.h -@@ -30,6 +30,10 @@ - * job. Since the value of @errp is about handling the error, the - * function should not examine it. - * -+ * - The function may pass @errp to functions it calls to pass on -+ * their errors to its caller. If it dereferences @errp to check -+ * for errors, it must use ERRP_GUARD(). -+ * - * - On success, the function should not touch *errp. On failure, it - * should set a new error, e.g. with error_setg(errp, ...), or - * propagate an existing one, e.g. with error_propagate(errp, ...). -@@ -45,15 +49,17 @@ - * = Creating errors = - * - * Create an error: -- * error_setg(&err, "situation normal, all fouled up"); -+ * error_setg(errp, "situation normal, all fouled up"); -+ * where @errp points to the location to receive the error. - * - * Create an error and add additional explanation: -- * error_setg(&err, "invalid quark"); -- * error_append_hint(&err, "Valid quarks are up, down, strange, " -+ * error_setg(errp, "invalid quark"); -+ * error_append_hint(errp, "Valid quarks are up, down, strange, " - * "charm, top, bottom.\n"); -+ * This may require use of ERRP_GUARD(); more on that below. - * - * Do *not* contract this to -- * error_setg(&err, "invalid quark\n" // WRONG! -+ * error_setg(errp, "invalid quark\n" // WRONG! - * "Valid quarks are up, down, strange, charm, top, bottom."); - * - * = Reporting and destroying errors = -@@ -107,18 +113,6 @@ - * Errors get passed to the caller through the conventional @errp - * parameter. - * -- * Pass an existing error to the caller: -- * error_propagate(errp, err); -- * where Error **errp is a parameter, by convention the last one. -- * -- * Pass an existing error to the caller with the message modified: -- * error_propagate_prepend(errp, err, -- * "Could not frobnicate '%s': ", name); -- * This is more concise than -- * error_propagate(errp, err); // don't do this -- * error_prepend(errp, "Could not frobnicate '%s': ", name); -- * and works even when @errp is &error_fatal. -- * - * Create a new error and pass it to the caller: - * error_setg(errp, "situation normal, all fouled up"); - * -@@ -129,18 +123,26 @@ - * handle the error... - * } - * - when it does not, say because it is a void function: -+ * ERRP_GUARD(); -+ * foo(arg, errp); -+ * if (*errp) { -+ * handle the error... -+ * } -+ * More on ERRP_GUARD() below. -+ * -+ * Code predating ERRP_GUARD() still exists, and looks like this: - * Error *err = NULL; - * foo(arg, &err); - * if (err) { - * handle the error... -- * error_propagate(errp, err); -+ * error_propagate(errp, err); // deprecated - * } -- * Do *not* "optimize" this to -+ * Avoid in new code. Do *not* "optimize" it to - * foo(arg, errp); - * if (*errp) { // WRONG! - * handle the error... - * } -- * because errp may be NULL! -+ * because errp may be NULL without the ERRP_GUARD() guard. - * - * But when all you do with the error is pass it on, please use - * foo(arg, errp); -@@ -160,6 +162,19 @@ - * handle the error... - * } - * -+ * Pass an existing error to the caller: -+ * error_propagate(errp, err); -+ * This is rarely needed. When @err is a local variable, use of -+ * ERRP_GUARD() commonly results in more readable code. -+ * -+ * Pass an existing error to the caller with the message modified: -+ * error_propagate_prepend(errp, err, -+ * "Could not frobnicate '%s': ", name); -+ * This is more concise than -+ * error_propagate(errp, err); // don't do this -+ * error_prepend(errp, "Could not frobnicate '%s': ", name); -+ * and works even when @errp is &error_fatal. -+ * - * Receive and accumulate multiple errors (first one wins): - * Error *err = NULL, *local_err = NULL; - * foo(arg, &err); -@@ -187,6 +202,69 @@ - * error_setg(&err, ...); // WRONG! - * } - * because this may pass a non-null err to error_setg(). -+ * -+ * = Why, when and how to use ERRP_GUARD() = -+ * -+ * Without ERRP_GUARD(), use of the @errp parameter is restricted: -+ * - It must not be dereferenced, because it may be null. -+ * - It should not be passed to error_prepend() or -+ * error_append_hint(), because that doesn't work with &error_fatal. -+ * ERRP_GUARD() lifts these restrictions. -+ * -+ * To use ERRP_GUARD(), add it right at the beginning of the function. -+ * @errp can then be used without worrying about the argument being -+ * NULL or &error_fatal. -+ * -+ * Using it when it's not needed is safe, but please avoid cluttering -+ * the source with useless code. -+ * -+ * = Converting to ERRP_GUARD() = -+ * -+ * To convert a function to use ERRP_GUARD(): -+ * -+ * 0. If the Error ** parameter is not named @errp, rename it to -+ * @errp. -+ * -+ * 1. Add an ERRP_GUARD() invocation, by convention right at the -+ * beginning of the function. This makes @errp safe to use. -+ * -+ * 2. Replace &err by errp, and err by *errp. Delete local variable -+ * @err. -+ * -+ * 3. Delete error_propagate(errp, *errp), replace -+ * error_propagate_prepend(errp, *errp, ...) by error_prepend(errp, ...) -+ * -+ * 4. Ensure @errp is valid at return: when you destroy *errp, set -+ * errp = NULL. -+ * -+ * Example: -+ * -+ * bool fn(..., Error **errp) -+ * { -+ * Error *err = NULL; -+ * -+ * foo(arg, &err); -+ * if (err) { -+ * handle the error... -+ * error_propagate(errp, err); -+ * return false; -+ * } -+ * ... -+ * } -+ * -+ * becomes -+ * -+ * bool fn(..., Error **errp) -+ * { -+ * ERRP_GUARD(); -+ * -+ * foo(arg, errp); -+ * if (*errp) { -+ * handle the error... -+ * return false; -+ * } -+ * ... -+ * } - */ - - #ifndef ERROR_H -@@ -287,6 +365,7 @@ void error_setg_win32_internal(Error **errp, - * the error object. - * Else, move the error object from @local_err to *@dst_errp. - * On return, @local_err is invalid. -+ * Please use ERRP_GUARD() instead when possible. - * Please don't error_propagate(&error_fatal, ...), use - * error_report_err() and exit(), because that's more obvious. - */ -@@ -298,6 +377,7 @@ void error_propagate(Error **dst_errp, Error *local_err); - * Behaves like - * error_prepend(&local_err, fmt, ...); - * error_propagate(dst_errp, local_err); -+ * Please use ERRP_GUARD() and error_prepend() instead when possible. - */ - void error_propagate_prepend(Error **dst_errp, Error *local_err, - const char *fmt, ...); -@@ -395,6 +475,46 @@ void error_set_internal(Error **errp, - ErrorClass err_class, const char *fmt, ...) - GCC_FMT_ATTR(6, 7); - -+/* -+ * Make @errp parameter easier to use regardless of argument value -+ * -+ * This macro is for use right at the beginning of a function that -+ * takes an Error **errp parameter to pass errors to its caller. The -+ * parameter must be named @errp. -+ * -+ * It must be used when the function dereferences @errp or passes -+ * @errp to error_prepend(), error_vprepend(), or error_append_hint(). -+ * It is safe to use even when it's not needed, but please avoid -+ * cluttering the source with useless code. -+ * -+ * If @errp is NULL or &error_fatal, rewrite it to point to a local -+ * Error variable, which will be automatically propagated to the -+ * original @errp on function exit. -+ * -+ * Note: &error_abort is not rewritten, because that would move the -+ * abort from the place where the error is created to the place where -+ * it's propagated. -+ */ -+#define ERRP_GUARD() \ -+ g_auto(ErrorPropagator) _auto_errp_prop = {.errp = errp}; \ -+ do { \ -+ if (!errp || errp == &error_fatal) { \ -+ errp = &_auto_errp_prop.local_err; \ -+ } \ -+ } while (0) -+ -+typedef struct ErrorPropagator { -+ Error *local_err; -+ Error **errp; -+} ErrorPropagator; -+ -+static inline void error_propagator_cleanup(ErrorPropagator *prop) -+{ -+ error_propagate(prop->errp, prop->local_err); -+} -+ -+G_DEFINE_AUTO_CLEANUP_CLEAR_FUNC(ErrorPropagator, error_propagator_cleanup); -+ - /* - * Special error destination to abort on error. - * See error_setg() and error_propagate() for details. --- -2.27.0 - diff --git a/SOURCES/kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch b/SOURCES/kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch deleted file mode 100644 index 5d44708..0000000 --- a/SOURCES/kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 5770fe43fe1e15e6f53cfd3705605e8645b95a98 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 13 Mar 2020 17:17:08 +0000 -Subject: [PATCH 20/20] exec/rom_reset: Free rom data during inmigrate skip -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200313171708.242774-1-dgilbert@redhat.com> -Patchwork-id: 94292 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] exec/rom_reset: Free rom data during inmigrate skip -Bugzilla: 1809380 -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Paolo Bonzini - -From: "Dr. David Alan Gilbert" - -bz: https://bugzilla.redhat.com/show_bug.cgi?id=1809380 -brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=27249921 -branch: rhel-av-8.2.0 -upstream: Posted and with review-by, not merged yet - -Commit 355477f8c73e9 skips rom reset when we're an incoming migration -so as not to overwrite shared ram in the ignore-shared migration -optimisation. -However, it's got an unexpected side effect that because it skips -freeing the ROM data, when rom_reset gets called later on, after -migration (e.g. during a reboot), the ROM does get reset to the original -file contents. Because of seabios/x86's weird reboot process -this confuses a reboot into hanging after a migration. - -Fixes: 355477f8c73e9 ("migration: do not rom_reset() during incoming migration") -https://bugzilla.redhat.com/show_bug.cgi?id=1809380 - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Danilo C. L. de Paula ---- - hw/core/loader.c | 25 ++++++++++++++++--------- - 1 file changed, 16 insertions(+), 9 deletions(-) - -diff --git a/hw/core/loader.c b/hw/core/loader.c -index 5099f27..375b29b 100644 ---- a/hw/core/loader.c -+++ b/hw/core/loader.c -@@ -1118,19 +1118,26 @@ static void rom_reset(void *unused) - { - Rom *rom; - -- /* -- * We don't need to fill in the RAM with ROM data because we'll fill -- * the data in during the next incoming migration in all cases. Note -- * that some of those RAMs can actually be modified by the guest on ARM -- * so this is probably the only right thing to do here. -- */ -- if (runstate_check(RUN_STATE_INMIGRATE)) -- return; -- - QTAILQ_FOREACH(rom, &roms, next) { - if (rom->fw_file) { - continue; - } -+ /* -+ * We don't need to fill in the RAM with ROM data because we'll fill -+ * the data in during the next incoming migration in all cases. Note -+ * that some of those RAMs can actually be modified by the guest. -+ */ -+ if (runstate_check(RUN_STATE_INMIGRATE)) { -+ if (rom->data && rom->isrom) { -+ /* -+ * Free it so that a rom_reset after migration doesn't -+ * overwrite a potentially modified 'rom'. -+ */ -+ rom_free_data(rom); -+ } -+ continue; -+ } -+ - if (rom->data == NULL) { - continue; - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-file-posix-Allow-byte-aligned-O_DIRECT-with-NFS.patch b/SOURCES/kvm-file-posix-Allow-byte-aligned-O_DIRECT-with-NFS.patch deleted file mode 100644 index aa47108..0000000 --- a/SOURCES/kvm-file-posix-Allow-byte-aligned-O_DIRECT-with-NFS.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 4e553943c8fe4924d194884b4719c5459210c686 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Tue, 26 Jan 2021 17:21:03 -0500 -Subject: [PATCH 8/9] file-posix: Allow byte-aligned O_DIRECT with NFS - -RH-Author: Kevin Wolf -Message-id: <20210126172103.136060-3-kwolf@redhat.com> -Patchwork-id: 100785 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 2/2] file-posix: Allow byte-aligned O_DIRECT with NFS -Bugzilla: 1834281 -RH-Acked-by: Markus Armbruster -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz - -Since commit a6b257a08e3 ('file-posix: Handle undetectable alignment'), -we assume that if we open a file with O_DIRECT and alignment probing -returns 1, we just couldn't find out the real alignment requirement -because some filesystems make the requirement only for allocated blocks. -In this case, a safe default of 4k is used. - -This is too strict for NFS, which does actually allow byte-aligned -requests even with O_DIRECT. Because we can't distinguish both cases -with generic code, let's just look at the file system magic and disable -s->needs_alignment for NFS. This way, O_DIRECT can still be used on NFS -for images that are not aligned to 4k. - -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -Message-Id: <20200716142601.111237-3-kwolf@redhat.com> -Reviewed-by: Max Reitz -Signed-off-by: Kevin Wolf -(cherry picked from commit 5edc85571e7b7269dce408735eba7507f18ac666) -Signed-off-by: Kevin Wolf -Signed-off-by: Jon Maloy ---- - block/file-posix.c | 26 +++++++++++++++++++++++++- - 1 file changed, 25 insertions(+), 1 deletion(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index adafbfa1be..2d834fbdf6 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -61,10 +61,12 @@ - #include - #include - #include -+#include - #include - #include - #include - #include -+#include - #include - #ifdef __s390__ - #include -@@ -298,6 +300,28 @@ static int probe_physical_blocksize(int fd, unsigned int *blk_size) - #endif - } - -+/* -+ * Returns true if no alignment restrictions are necessary even for files -+ * opened with O_DIRECT. -+ * -+ * raw_probe_alignment() probes the required alignment and assume that 1 means -+ * the probing failed, so it falls back to a safe default of 4k. This can be -+ * avoided if we know that byte alignment is okay for the file. -+ */ -+static bool dio_byte_aligned(int fd) -+{ -+#ifdef __linux__ -+ struct statfs buf; -+ int ret; -+ -+ ret = fstatfs(fd, &buf); -+ if (ret == 0 && buf.f_type == NFS_SUPER_MAGIC) { -+ return true; -+ } -+#endif -+ return false; -+} -+ - /* Check if read is allowed with given memory buffer and length. - * - * This function is used to check O_DIRECT memory buffer and request alignment. -@@ -602,7 +626,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, - - s->has_discard = true; - s->has_write_zeroes = true; -- if ((bs->open_flags & BDRV_O_NOCACHE) != 0) { -+ if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) { - s->needs_alignment = true; - } - --- -2.18.2 - diff --git a/SOURCES/kvm-file-posix-Drop-hdev_co_create_opts.patch b/SOURCES/kvm-file-posix-Drop-hdev_co_create_opts.patch deleted file mode 100644 index ea2edbd..0000000 --- a/SOURCES/kvm-file-posix-Drop-hdev_co_create_opts.patch +++ /dev/null @@ -1,131 +0,0 @@ -From 3d3509c010129bd15eb1f5ec1a7b9eedcdbf23f6 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Wed, 11 Mar 2020 10:51:44 +0000 -Subject: [PATCH 03/20] file-posix: Drop hdev_co_create_opts() - -RH-Author: Maxim Levitsky -Message-id: <20200311105147.13208-4-mlevitsk@redhat.com> -Patchwork-id: 94225 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 3/6] file-posix: Drop hdev_co_create_opts() -Bugzilla: 1640894 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz - -From: Max Reitz - -The generic fallback implementation effectively does the same. - -Reviewed-by: Maxim Levitsky -Signed-off-by: Max Reitz -Message-Id: <20200122164532.178040-4-mreitz@redhat.com> -Signed-off-by: Max Reitz -(cherry picked from commit 87ca3b8fa615b278b33cabf9ed22b3f44b5214ba) -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block/file-posix.c | 67 ------------------------------------------------------ - 1 file changed, 67 deletions(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 1b805bd..fd29372 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -3418,67 +3418,6 @@ static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs, - return raw_do_pwrite_zeroes(bs, offset, bytes, flags, true); - } - --static int coroutine_fn hdev_co_create_opts(const char *filename, QemuOpts *opts, -- Error **errp) --{ -- int fd; -- int ret = 0; -- struct stat stat_buf; -- int64_t total_size = 0; -- bool has_prefix; -- -- /* This function is used by both protocol block drivers and therefore either -- * of these prefixes may be given. -- * The return value has to be stored somewhere, otherwise this is an error -- * due to -Werror=unused-value. */ -- has_prefix = -- strstart(filename, "host_device:", &filename) || -- strstart(filename, "host_cdrom:" , &filename); -- -- (void)has_prefix; -- -- ret = raw_normalize_devicepath(&filename, errp); -- if (ret < 0) { -- return ret; -- } -- -- /* Read out options */ -- total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), -- BDRV_SECTOR_SIZE); -- -- fd = qemu_open(filename, O_WRONLY | O_BINARY); -- if (fd < 0) { -- ret = -errno; -- error_setg_errno(errp, -ret, "Could not open device"); -- return ret; -- } -- -- if (fstat(fd, &stat_buf) < 0) { -- ret = -errno; -- error_setg_errno(errp, -ret, "Could not stat device"); -- } else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode)) { -- error_setg(errp, -- "The given file is neither a block nor a character device"); -- ret = -ENODEV; -- } else if (lseek(fd, 0, SEEK_END) < total_size) { -- error_setg(errp, "Device is too small"); -- ret = -ENOSPC; -- } -- -- if (!ret && total_size) { -- uint8_t buf[BDRV_SECTOR_SIZE] = { 0 }; -- int64_t zero_size = MIN(BDRV_SECTOR_SIZE, total_size); -- if (lseek(fd, 0, SEEK_SET) == -1) { -- ret = -errno; -- } else { -- ret = qemu_write_full(fd, buf, zero_size); -- ret = ret == zero_size ? 0 : -errno; -- } -- } -- qemu_close(fd); -- return ret; --} -- - static BlockDriver bdrv_host_device = { - .format_name = "host_device", - .protocol_name = "host_device", -@@ -3491,8 +3430,6 @@ static BlockDriver bdrv_host_device = { - .bdrv_reopen_prepare = raw_reopen_prepare, - .bdrv_reopen_commit = raw_reopen_commit, - .bdrv_reopen_abort = raw_reopen_abort, -- .bdrv_co_create_opts = hdev_co_create_opts, -- .create_opts = &raw_create_opts, - .mutable_opts = mutable_opts, - .bdrv_co_invalidate_cache = raw_co_invalidate_cache, - .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes, -@@ -3619,8 +3556,6 @@ static BlockDriver bdrv_host_cdrom = { - .bdrv_reopen_prepare = raw_reopen_prepare, - .bdrv_reopen_commit = raw_reopen_commit, - .bdrv_reopen_abort = raw_reopen_abort, -- .bdrv_co_create_opts = hdev_co_create_opts, -- .create_opts = &raw_create_opts, - .mutable_opts = mutable_opts, - .bdrv_co_invalidate_cache = raw_co_invalidate_cache, - -@@ -3753,8 +3688,6 @@ static BlockDriver bdrv_host_cdrom = { - .bdrv_reopen_prepare = raw_reopen_prepare, - .bdrv_reopen_commit = raw_reopen_commit, - .bdrv_reopen_abort = raw_reopen_abort, -- .bdrv_co_create_opts = hdev_co_create_opts, -- .create_opts = &raw_create_opts, - .mutable_opts = mutable_opts, - - .bdrv_co_preadv = raw_co_preadv, --- -1.8.3.1 - diff --git a/SOURCES/kvm-file-posix-Handle-EINVAL-fallocate-return-value.patch b/SOURCES/kvm-file-posix-Handle-EINVAL-fallocate-return-value.patch deleted file mode 100644 index ac7b859..0000000 --- a/SOURCES/kvm-file-posix-Handle-EINVAL-fallocate-return-value.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 94d99b13b48e922861570f043490efc966b3b445 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 25 Jun 2021 17:41:04 -0400 -Subject: [PATCH 4/4] file-posix: Handle `EINVAL` fallocate return value -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Kevin Wolf -Message-id: <20210625174104.44313-3-kwolf@redhat.com> -Patchwork-id: 101778 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 2/2] file-posix: Handle `EINVAL` fallocate return value -Bugzilla: 1970912 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth - -From: Antoine Damhet - -The `detect-zeroes=unmap` option may issue unaligned -`FALLOC_FL_PUNCH_HOLE` requests, raw block devices can (and will) return -`EINVAL`, qemu should then write the zeroes to the blockdev instead of -issuing an `IO_ERROR`. - -The problem can be reprodced like this: - -$ qemu-io -c 'write -P 0 42 1234' --image-opts driver=host_device,filename=/dev/loop0,detect-zeroes=unmap -write failed: Invalid argument - -Signed-off-by: Antoine Damhet -Message-Id: <20200717135603.51180-1-antoine.damhet@blade-group.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit bae127d4dcf6158c5042e2eee9582430839a9967) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/file-posix.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 837edcf027..6cd19e6c9a 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -1632,7 +1632,11 @@ static int handle_aiocb_write_zeroes_unmap(void *opaque) - #ifdef CONFIG_FALLOCATE_PUNCH_HOLE - int ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - aiocb->aio_offset, aiocb->aio_nbytes); -- if (ret != -ENOTSUP) { -+ switch (ret) { -+ case -ENOTSUP: -+ case -EINVAL: -+ break; -+ default: - return ret; - } - #endif --- -2.27.0 - diff --git a/SOURCES/kvm-file-posix-Mitigate-file-fragmentation-with-extent-s.patch b/SOURCES/kvm-file-posix-Mitigate-file-fragmentation-with-extent-s.patch deleted file mode 100644 index e8639f3..0000000 --- a/SOURCES/kvm-file-posix-Mitigate-file-fragmentation-with-extent-s.patch +++ /dev/null @@ -1,466 +0,0 @@ -From 7ee01b5ccb7fc660dafaf3fdb1578649d17fbddf Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 26 May 2021 09:05:52 -0400 -Subject: [PATCH 1/4] file-posix: Mitigate file fragmentation with extent size - hints - -RH-Author: Kevin Wolf -Message-id: <20210526090552.155820-2-kwolf@redhat.com> -Patchwork-id: 101638 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 1/1] file-posix: Mitigate file fragmentation with extent size hints -Bugzilla: 1877163 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz - -Especially when O_DIRECT is used with image files so that the page cache -indirection can't cause a merge of allocating requests, the file will -fragment on the file system layer, with a potentially very small -fragment size (this depends on the requests the guest sent). - -On Linux, fragmentation can be reduced by setting an extent size hint -when creating the file (at least on XFS, it can't be set any more after -the first extent has been allocated), basically giving raw files a -"cluster size" for allocation. - -This adds a create option to set the extent size hint, and changes the -default from not setting a hint to setting it to 1 MB. The main reason -why qcow2 defaults to smaller cluster sizes is that COW becomes more -expensive, which is not an issue with raw files, so we can choose a -larger size. The tradeoff here is only potentially wasted disk space. - -For qcow2 (or other image formats) over file-posix, the advantage should -even be greater because they grow sequentially without leaving holes, so -there won't be wasted space. Setting even larger extent size hints for -such images may make sense. This can be done with the new option, but -let's keep the default conservative for now. - -The effect is very visible with a test that intentionally creates a -badly fragmented file with qemu-img bench (the time difference while -creating the file is already remarkable) and then looks at the number of -extents and the time a simple "qemu-img map" takes. - -Without an extent size hint: - - $ ./qemu-img create -f raw -o extent_size_hint=0 ~/tmp/test.raw 10G - Formatting '/home/kwolf/tmp/test.raw', fmt=raw size=10737418240 extent_size_hint=0 - $ ./qemu-img bench -f raw -t none -n -w ~/tmp/test.raw -c 1000000 -S 8192 -o 0 - Sending 1000000 write requests, 4096 bytes each, 64 in parallel (starting at offset 0, step size 8192) - Run completed in 25.848 seconds. - $ ./qemu-img bench -f raw -t none -n -w ~/tmp/test.raw -c 1000000 -S 8192 -o 4096 - Sending 1000000 write requests, 4096 bytes each, 64 in parallel (starting at offset 4096, step size 8192) - Run completed in 19.616 seconds. - $ filefrag ~/tmp/test.raw - /home/kwolf/tmp/test.raw: 2000000 extents found - $ time ./qemu-img map ~/tmp/test.raw - Offset Length Mapped to File - 0 0x1e8480000 0 /home/kwolf/tmp/test.raw - - real 0m1,279s - user 0m0,043s - sys 0m1,226s - -With the new default extent size hint of 1 MB: - - $ ./qemu-img create -f raw -o extent_size_hint=1M ~/tmp/test.raw 10G - Formatting '/home/kwolf/tmp/test.raw', fmt=raw size=10737418240 extent_size_hint=1048576 - $ ./qemu-img bench -f raw -t none -n -w ~/tmp/test.raw -c 1000000 -S 8192 -o 0 - Sending 1000000 write requests, 4096 bytes each, 64 in parallel (starting at offset 0, step size 8192) - Run completed in 11.833 seconds. - $ ./qemu-img bench -f raw -t none -n -w ~/tmp/test.raw -c 1000000 -S 8192 -o 4096 - Sending 1000000 write requests, 4096 bytes each, 64 in parallel (starting at offset 4096, step size 8192) - Run completed in 10.155 seconds. - $ filefrag ~/tmp/test.raw - /home/kwolf/tmp/test.raw: 178 extents found - $ time ./qemu-img map ~/tmp/test.raw - Offset Length Mapped to File - 0 0x1e8480000 0 /home/kwolf/tmp/test.raw - - real 0m0,061s - user 0m0,040s - sys 0m0,014s - -Signed-off-by: Kevin Wolf -Message-Id: <20200707142329.48303-1-kwolf@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit ffa244c84a1a30dff69ecc80b0137a2b6d428ecb) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/file-posix.c | 44 ++++++++++++++++++++++++++++++++ - include/block/block_int.h | 1 + - qapi/block-core.json | 11 +++++--- - tests/qemu-iotests/082.out | 16 ++++++++++++ - tests/qemu-iotests/106 | 7 +++-- - tests/qemu-iotests/175 | 6 ++--- - tests/qemu-iotests/243 | 6 ++--- - tests/qemu-iotests/common.filter | 1 + - 8 files changed, 80 insertions(+), 12 deletions(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 2d834fbdf6..62a463229f 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -30,6 +30,7 @@ - #include "block/block_int.h" - #include "qemu/module.h" - #include "qemu/option.h" -+#include "qemu/units.h" - #include "trace.h" - #include "block/thread-pool.h" - #include "qemu/iov.h" -@@ -2289,6 +2290,14 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) - if (!file_opts->has_preallocation) { - file_opts->preallocation = PREALLOC_MODE_OFF; - } -+ if (!file_opts->has_extent_size_hint) { -+ file_opts->extent_size_hint = 1 * MiB; -+ } -+ if (file_opts->extent_size_hint > UINT32_MAX) { -+ result = -EINVAL; -+ error_setg(errp, "Extent size hint is too large"); -+ goto out; -+ } - - /* Create file */ - fd = qemu_open(file_opts->filename, O_RDWR | O_CREAT | O_BINARY, 0644); -@@ -2346,6 +2355,27 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) - } - #endif - } -+#ifdef FS_IOC_FSSETXATTR -+ /* -+ * Try to set the extent size hint. Failure is not fatal, and a warning is -+ * only printed if the option was explicitly specified. -+ */ -+ { -+ struct fsxattr attr; -+ result = ioctl(fd, FS_IOC_FSGETXATTR, &attr); -+ if (result == 0) { -+ attr.fsx_xflags |= FS_XFLAG_EXTSIZE; -+ attr.fsx_extsize = file_opts->extent_size_hint; -+ result = ioctl(fd, FS_IOC_FSSETXATTR, &attr); -+ } -+ if (result < 0 && file_opts->has_extent_size_hint && -+ file_opts->extent_size_hint) -+ { -+ warn_report("Failed to set extent size hint: %s", -+ strerror(errno)); -+ } -+ } -+#endif - - /* Resize and potentially preallocate the file to the desired - * final size */ -@@ -2381,6 +2411,8 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv, - { - BlockdevCreateOptions options; - int64_t total_size = 0; -+ int64_t extent_size_hint = 0; -+ bool has_extent_size_hint = false; - bool nocow = false; - PreallocMode prealloc; - char *buf = NULL; -@@ -2392,6 +2424,11 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv, - /* Read out options */ - total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), - BDRV_SECTOR_SIZE); -+ if (qemu_opt_get(opts, BLOCK_OPT_EXTENT_SIZE_HINT)) { -+ has_extent_size_hint = true; -+ extent_size_hint = -+ qemu_opt_get_size_del(opts, BLOCK_OPT_EXTENT_SIZE_HINT, -1); -+ } - nocow = qemu_opt_get_bool(opts, BLOCK_OPT_NOCOW, false); - buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); - prealloc = qapi_enum_parse(&PreallocMode_lookup, buf, -@@ -2411,6 +2448,8 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv, - .preallocation = prealloc, - .has_nocow = true, - .nocow = nocow, -+ .has_extent_size_hint = has_extent_size_hint, -+ .extent_size_hint = extent_size_hint, - }, - }; - return raw_co_create(&options, errp); -@@ -2902,6 +2941,11 @@ static QemuOptsList raw_create_opts = { - #endif - ", full)" - }, -+ { -+ .name = BLOCK_OPT_EXTENT_SIZE_HINT, -+ .type = QEMU_OPT_SIZE, -+ .help = "Extent size hint for the image file, 0 to disable" -+ }, - { /* end of list */ } - } - }; -diff --git a/include/block/block_int.h b/include/block/block_int.h -index 41f13ecbed..4b23da2eb0 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -53,6 +53,7 @@ - #define BLOCK_OPT_ADAPTER_TYPE "adapter_type" - #define BLOCK_OPT_REDUNDANCY "redundancy" - #define BLOCK_OPT_NOCOW "nocow" -+#define BLOCK_OPT_EXTENT_SIZE_HINT "extent_size_hint" - #define BLOCK_OPT_OBJECT_SIZE "object_size" - #define BLOCK_OPT_REFCOUNT_BITS "refcount_bits" - #define BLOCK_OPT_DATA_FILE "data_file" -diff --git a/qapi/block-core.json b/qapi/block-core.json -index 289320902d..c7aa919fa3 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -4272,14 +4272,17 @@ - # falloc (if defined CONFIG_POSIX_FALLOCATE), - # full (if defined CONFIG_POSIX)) - # @nocow Turn off copy-on-write (valid only on btrfs; default: off) -+# @extent-size-hint: Extent size hint to add to the image file; 0 for not -+# adding an extent size hint (default: 1 MB, since 5.1) - # - # Since: 2.12 - ## - { 'struct': 'BlockdevCreateOptionsFile', -- 'data': { 'filename': 'str', -- 'size': 'size', -- '*preallocation': 'PreallocMode', -- '*nocow': 'bool' } } -+ 'data': { 'filename': 'str', -+ 'size': 'size', -+ '*preallocation': 'PreallocMode', -+ '*nocow': 'bool', -+ '*extent-size-hint': 'size'} } - - ## - # @BlockdevCreateOptionsGluster: -diff --git a/tests/qemu-iotests/082.out b/tests/qemu-iotests/082.out -index 9d4ed4dc9d..7a87946fa2 100644 ---- a/tests/qemu-iotests/082.out -+++ b/tests/qemu-iotests/082.out -@@ -59,6 +59,7 @@ Supported options: - encrypt.ivgen-hash-alg= - Name of IV generator hash algorithm - encrypt.key-secret= - ID of secret providing qcow AES key or LUKS passphrase - encryption= - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) -+ extent_size_hint= - Extent size hint for the image file, 0 to disable - lazy_refcounts= - Postpone refcount updates - nocow= - Turn off copy-on-write (valid only on btrfs) - preallocation= - Preallocation mode (allowed values: off, metadata, falloc, full) -@@ -82,6 +83,7 @@ Supported options: - encrypt.ivgen-hash-alg= - Name of IV generator hash algorithm - encrypt.key-secret= - ID of secret providing qcow AES key or LUKS passphrase - encryption= - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) -+ extent_size_hint= - Extent size hint for the image file, 0 to disable - lazy_refcounts= - Postpone refcount updates - nocow= - Turn off copy-on-write (valid only on btrfs) - preallocation= - Preallocation mode (allowed values: off, metadata, falloc, full) -@@ -105,6 +107,7 @@ Supported options: - encrypt.ivgen-hash-alg= - Name of IV generator hash algorithm - encrypt.key-secret= - ID of secret providing qcow AES key or LUKS passphrase - encryption= - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) -+ extent_size_hint= - Extent size hint for the image file, 0 to disable - lazy_refcounts= - Postpone refcount updates - nocow= - Turn off copy-on-write (valid only on btrfs) - preallocation= - Preallocation mode (allowed values: off, metadata, falloc, full) -@@ -128,6 +131,7 @@ Supported options: - encrypt.ivgen-hash-alg= - Name of IV generator hash algorithm - encrypt.key-secret= - ID of secret providing qcow AES key or LUKS passphrase - encryption= - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) -+ extent_size_hint= - Extent size hint for the image file, 0 to disable - lazy_refcounts= - Postpone refcount updates - nocow= - Turn off copy-on-write (valid only on btrfs) - preallocation= - Preallocation mode (allowed values: off, metadata, falloc, full) -@@ -151,6 +155,7 @@ Supported options: - encrypt.ivgen-hash-alg= - Name of IV generator hash algorithm - encrypt.key-secret= - ID of secret providing qcow AES key or LUKS passphrase - encryption= - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) -+ extent_size_hint= - Extent size hint for the image file, 0 to disable - lazy_refcounts= - Postpone refcount updates - nocow= - Turn off copy-on-write (valid only on btrfs) - preallocation= - Preallocation mode (allowed values: off, metadata, falloc, full) -@@ -174,6 +179,7 @@ Supported options: - encrypt.ivgen-hash-alg= - Name of IV generator hash algorithm - encrypt.key-secret= - ID of secret providing qcow AES key or LUKS passphrase - encryption= - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) -+ extent_size_hint= - Extent size hint for the image file, 0 to disable - lazy_refcounts= - Postpone refcount updates - nocow= - Turn off copy-on-write (valid only on btrfs) - preallocation= - Preallocation mode (allowed values: off, metadata, falloc, full) -@@ -197,6 +203,7 @@ Supported options: - encrypt.ivgen-hash-alg= - Name of IV generator hash algorithm - encrypt.key-secret= - ID of secret providing qcow AES key or LUKS passphrase - encryption= - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) -+ extent_size_hint= - Extent size hint for the image file, 0 to disable - lazy_refcounts= - Postpone refcount updates - nocow= - Turn off copy-on-write (valid only on btrfs) - preallocation= - Preallocation mode (allowed values: off, metadata, falloc, full) -@@ -220,6 +227,7 @@ Supported options: - encrypt.ivgen-hash-alg= - Name of IV generator hash algorithm - encrypt.key-secret= - ID of secret providing qcow AES key or LUKS passphrase - encryption= - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) -+ extent_size_hint= - Extent size hint for the image file, 0 to disable - lazy_refcounts= - Postpone refcount updates - nocow= - Turn off copy-on-write (valid only on btrfs) - preallocation= - Preallocation mode (allowed values: off, metadata, falloc, full) -@@ -339,6 +347,7 @@ Supported options: - encrypt.ivgen-hash-alg= - Name of IV generator hash algorithm - encrypt.key-secret= - ID of secret providing qcow AES key or LUKS passphrase - encryption= - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) -+ extent_size_hint= - Extent size hint for the image file, 0 to disable - lazy_refcounts= - Postpone refcount updates - nocow= - Turn off copy-on-write (valid only on btrfs) - preallocation= - Preallocation mode (allowed values: off, metadata, falloc, full) -@@ -362,6 +371,7 @@ Supported options: - encrypt.ivgen-hash-alg= - Name of IV generator hash algorithm - encrypt.key-secret= - ID of secret providing qcow AES key or LUKS passphrase - encryption= - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) -+ extent_size_hint= - Extent size hint for the image file, 0 to disable - lazy_refcounts= - Postpone refcount updates - nocow= - Turn off copy-on-write (valid only on btrfs) - preallocation= - Preallocation mode (allowed values: off, metadata, falloc, full) -@@ -385,6 +395,7 @@ Supported options: - encrypt.ivgen-hash-alg= - Name of IV generator hash algorithm - encrypt.key-secret= - ID of secret providing qcow AES key or LUKS passphrase - encryption= - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) -+ extent_size_hint= - Extent size hint for the image file, 0 to disable - lazy_refcounts= - Postpone refcount updates - nocow= - Turn off copy-on-write (valid only on btrfs) - preallocation= - Preallocation mode (allowed values: off, metadata, falloc, full) -@@ -408,6 +419,7 @@ Supported options: - encrypt.ivgen-hash-alg= - Name of IV generator hash algorithm - encrypt.key-secret= - ID of secret providing qcow AES key or LUKS passphrase - encryption= - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) -+ extent_size_hint= - Extent size hint for the image file, 0 to disable - lazy_refcounts= - Postpone refcount updates - nocow= - Turn off copy-on-write (valid only on btrfs) - preallocation= - Preallocation mode (allowed values: off, metadata, falloc, full) -@@ -431,6 +443,7 @@ Supported options: - encrypt.ivgen-hash-alg= - Name of IV generator hash algorithm - encrypt.key-secret= - ID of secret providing qcow AES key or LUKS passphrase - encryption= - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) -+ extent_size_hint= - Extent size hint for the image file, 0 to disable - lazy_refcounts= - Postpone refcount updates - nocow= - Turn off copy-on-write (valid only on btrfs) - preallocation= - Preallocation mode (allowed values: off, metadata, falloc, full) -@@ -454,6 +467,7 @@ Supported options: - encrypt.ivgen-hash-alg= - Name of IV generator hash algorithm - encrypt.key-secret= - ID of secret providing qcow AES key or LUKS passphrase - encryption= - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) -+ extent_size_hint= - Extent size hint for the image file, 0 to disable - lazy_refcounts= - Postpone refcount updates - nocow= - Turn off copy-on-write (valid only on btrfs) - preallocation= - Preallocation mode (allowed values: off, metadata, falloc, full) -@@ -477,6 +491,7 @@ Supported options: - encrypt.ivgen-hash-alg= - Name of IV generator hash algorithm - encrypt.key-secret= - ID of secret providing qcow AES key or LUKS passphrase - encryption= - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) -+ extent_size_hint= - Extent size hint for the image file, 0 to disable - lazy_refcounts= - Postpone refcount updates - nocow= - Turn off copy-on-write (valid only on btrfs) - preallocation= - Preallocation mode (allowed values: off, metadata, falloc, full) -@@ -500,6 +515,7 @@ Supported options: - encrypt.ivgen-hash-alg= - Name of IV generator hash algorithm - encrypt.key-secret= - ID of secret providing qcow AES key or LUKS passphrase - encryption= - Encrypt the image with format 'aes'. (Deprecated in favor of encrypt.format=aes) -+ extent_size_hint= - Extent size hint for the image file, 0 to disable - lazy_refcounts= - Postpone refcount updates - nocow= - Turn off copy-on-write (valid only on btrfs) - preallocation= - Preallocation mode (allowed values: off, metadata, falloc, full) -diff --git a/tests/qemu-iotests/106 b/tests/qemu-iotests/106 -index ac47eaa0f5..ee6f51d08b 100755 ---- a/tests/qemu-iotests/106 -+++ b/tests/qemu-iotests/106 -@@ -51,7 +51,10 @@ for create_mode in off falloc full; do - echo - echo "--- create_mode=$create_mode growth_mode=$growth_mode ---" - -- IMGOPTS="preallocation=$create_mode" _make_test_img ${CREATION_SIZE}K -+ # Our calculation below assumes kilobytes as unit for the actual size. -+ # Disable the extent size hint because it would give us a result in -+ # megabytes. -+ IMGOPTS="preallocation=$create_mode,extent_size_hint=0" _make_test_img ${CREATION_SIZE}K - $QEMU_IMG resize -f "$IMGFMT" --preallocation=$growth_mode "$TEST_IMG" +${GROWTH_SIZE}K - - expected_size=0 -@@ -98,7 +101,7 @@ for growth_mode in falloc full; do - # plain int. We should use the correct type for the result, and - # this tests we do. - -- _make_test_img 2G -+ _make_test_img -o "extent_size_hint=0" 2G - $QEMU_IMG resize -f "$IMGFMT" --preallocation=$growth_mode "$TEST_IMG" +${GROWTH_SIZE}K - - actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size') -diff --git a/tests/qemu-iotests/175 b/tests/qemu-iotests/175 -index 55db2803ed..8a8494aeb6 100755 ---- a/tests/qemu-iotests/175 -+++ b/tests/qemu-iotests/175 -@@ -89,20 +89,20 @@ min_blocks=$(stat -c '%b' "$TEST_DIR/empty") - - echo - echo "== creating image with default preallocation ==" --_make_test_img $size | _filter_imgfmt -+_make_test_img -o extent_size_hint=0 $size | _filter_imgfmt - stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $min_blocks $size - - for mode in off full falloc; do - echo - echo "== creating image with preallocation $mode ==" -- IMGOPTS=preallocation=$mode _make_test_img $size | _filter_imgfmt -+ IMGOPTS="preallocation=$mode,extent_size_hint=0" _make_test_img $size | _filter_imgfmt - stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $min_blocks $size - done - - for new_size in 4096 1048576; do - echo - echo "== resize empty image with block_resize ==" -- _make_test_img 0 | _filter_imgfmt -+ _make_test_img -o extent_size_hint=0 0 | _filter_imgfmt - _block_resize $TEST_IMG $new_size >/dev/null - stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $min_blocks $new_size - done -diff --git a/tests/qemu-iotests/243 b/tests/qemu-iotests/243 -index e563761307..104c7256c4 100755 ---- a/tests/qemu-iotests/243 -+++ b/tests/qemu-iotests/243 -@@ -47,7 +47,7 @@ for mode in off metadata falloc full; do - echo "=== preallocation=$mode ===" - echo - -- IMGOPTS="preallocation=$mode" _make_test_img 64M -+ IMGOPTS="preallocation=$mode,extent_size_hint=0" _make_test_img 64M - - printf "File size: " - du -b $TEST_IMG | cut -f1 -@@ -64,7 +64,7 @@ for mode in off metadata falloc full; do - echo "=== External data file: preallocation=$mode ===" - echo - -- IMGOPTS="data_file=$TEST_IMG.data,preallocation=$mode" _make_test_img 64M -+ IMGOPTS="data_file=$TEST_IMG.data,preallocation=$mode,extent_size_hint=0" _make_test_img 64M - - echo -n "qcow2 file size: " - du -b $TEST_IMG | cut -f1 -@@ -75,7 +75,7 @@ for mode in off metadata falloc full; do - echo -n "qcow2 disk usage: " - [ $(du -B1 $TEST_IMG | cut -f1) -lt 1048576 ] && echo "low" || echo "high" - echo -n "data disk usage: " -- [ $(du -B1 $TEST_IMG.data | cut -f1) -lt 1048576 ] && echo "low" || echo "high" -+ [ $(du -B1 $TEST_IMG.data | cut -f1) -lt 2097152 ] && echo "low" || echo "high" - - done - -diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter -index c8e8663665..f29c1d3238 100644 ---- a/tests/qemu-iotests/common.filter -+++ b/tests/qemu-iotests/common.filter -@@ -146,6 +146,7 @@ _filter_img_create() - -e "s# refcount_bits=[0-9]\\+##g" \ - -e "s# key-secret=[a-zA-Z0-9]\\+##g" \ - -e "s# iter-time=[0-9]\\+##g" \ -+ -e "s# extent_size_hint=[0-9]\\+##g" \ - -e "s# force_size=\\(on\\|off\\)##g" - } - --- -2.27.0 - diff --git a/SOURCES/kvm-file-posix-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch b/SOURCES/kvm-file-posix-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch deleted file mode 100644 index efdf16b..0000000 --- a/SOURCES/kvm-file-posix-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 55bfda3a0e077b822f57e8ed901f0cee848bc471 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 8 Jun 2020 15:01:35 +0100 -Subject: [PATCH 07/17] file-posix: Support BDRV_REQ_ZERO_WRITE for truncate - -RH-Author: Kevin Wolf -Message-id: <20200608150140.38218-7-kwolf@redhat.com> -Patchwork-id: 97452 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 06/11] file-posix: Support BDRV_REQ_ZERO_WRITE for truncate -Bugzilla: 1780574 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz - -For regular files, we always get BDRV_REQ_ZERO_WRITE behaviour from the -OS, so we can advertise the flag and just ignore it. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Alberto Garcia -Reviewed-by: Max Reitz -Message-Id: <20200424125448.63318-7-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 2f0c6e7a650de133eccd94e9bb6cf7b2070f07f1) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/file-posix.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 7551e8d..adafbfa 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -674,6 +674,10 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, - #endif - - bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK; -+ if (S_ISREG(st.st_mode)) { -+ /* When extending regular files, we get zeros from the OS */ -+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; -+ } - ret = 0; - fail: - if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) { --- -1.8.3.1 - diff --git a/SOURCES/kvm-glib-compat-add-g_unix_get_passwd_entry_qemu.patch b/SOURCES/kvm-glib-compat-add-g_unix_get_passwd_entry_qemu.patch deleted file mode 100644 index 551b2eb..0000000 --- a/SOURCES/kvm-glib-compat-add-g_unix_get_passwd_entry_qemu.patch +++ /dev/null @@ -1,89 +0,0 @@ -From 15331267d11713906361ddd767c3e04ae46d9a83 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Thu, 29 Jul 2021 04:55:50 -0400 -Subject: [PATCH 01/14] glib-compat: add g_unix_get_passwd_entry_qemu() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20210609100615.2501448-2-marcandre.lureau@redhat.com> -Patchwork-id: 101687 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 1/4] glib-compat: add g_unix_get_passwd_entry_qemu() -Bugzilla: 1967716 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Michal Privoznik - -From: Marc-André Lureau - -The glib function was introduced in 2.64. It's a safer version of -getpwnam, and also simpler to use than getpwnam_r. - -Currently, it's only use by the next patch in qemu-ga, which doesn't -(well well...) need the thread safety guarantees. Since the fallback -version is still unsafe, I would rather keep the _qemu postfix, to make -sure it's not being misused by mistake. When/if necessary, we can -implement a safer fallback and drop the _qemu suffix. - -Signed-off-by: Marc-André Lureau -Reviewed-by: Michal Privoznik -*fix checkpatch warnings about newlines before/after block comments -Signed-off-by: Michael Roth - -(cherry picked from commit 6d593ab451c490b0ca941c6a519894231634751e) -Signed-off-by: Marc-André Lureau -Signed-off-by: Miroslav Rezanina ---- - include/glib-compat.h | 28 ++++++++++++++++++++++++++++ - 1 file changed, 28 insertions(+) - -diff --git a/include/glib-compat.h b/include/glib-compat.h -index 0b0ec76299..695a96f7ea 100644 ---- a/include/glib-compat.h -+++ b/include/glib-compat.h -@@ -30,6 +30,11 @@ - #pragma GCC diagnostic ignored "-Wdeprecated-declarations" - - #include -+#if defined(G_OS_UNIX) -+#include -+#include -+#include -+#endif - - /* - * Note that because of the GLIB_VERSION_MAX_ALLOWED constant above, allowing -@@ -72,6 +77,29 @@ - gint g_poll_fixed(GPollFD *fds, guint nfds, gint timeout); - #endif - -+#if defined(G_OS_UNIX) -+/* -+ * Note: The fallback implementation is not MT-safe, and it returns a copy of -+ * the libc passwd (must be g_free() after use) but not the content. Because of -+ * these important differences the caller must be aware of, it's not #define for -+ * GLib API substitution. -+ */ -+static inline struct passwd * -+g_unix_get_passwd_entry_qemu(const gchar *user_name, GError **error) -+{ -+#if GLIB_CHECK_VERSION(2, 64, 0) -+ return g_unix_get_passwd_entry(user_name, error); -+#else -+ struct passwd *p = getpwnam(user_name); -+ if (!p) { -+ g_set_error_literal(error, G_UNIX_ERROR, 0, g_strerror(errno)); -+ return NULL; -+ } -+ return (struct passwd *)g_memdup(p, sizeof(*p)); -+#endif -+} -+#endif /* G_OS_UNIX */ -+ - #pragma GCC diagnostic pop - - #endif --- -2.27.0 - diff --git a/SOURCES/kvm-hmat-acpi-Build-Memory-Proximity-Domain-Attributes-S.patch b/SOURCES/kvm-hmat-acpi-Build-Memory-Proximity-Domain-Attributes-S.patch deleted file mode 100644 index e34f576..0000000 --- a/SOURCES/kvm-hmat-acpi-Build-Memory-Proximity-Domain-Attributes-S.patch +++ /dev/null @@ -1,275 +0,0 @@ -From a0816e4374759048cb24b9b3549a093a2ccb6240 Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Thu, 21 May 2020 23:56:50 +0100 -Subject: [PATCH 07/12] hmat acpi: Build Memory Proximity Domain Attributes - Structure(s) - -RH-Author: plai@redhat.com -Message-id: <20200521235655.27141-7-plai@redhat.com> -Patchwork-id: 96734 -O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 06/11] hmat acpi: Build Memory Proximity Domain Attributes Structure(s) -Bugzilla: 1600217 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Igor Mammedov -RH-Acked-by: Eduardo Habkost - -From: Liu Jingqi - -HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table -(HMAT). The specification references below link: -http://www.uefi.org/sites/default/files/resources/ACPI_6_3_final_Jan30.pdf - -It describes the memory attributes, such as memory side cache -attributes and bandwidth and latency details, related to the -Memory Proximity Domain. The software is -expected to use this information as hint for optimization. - -This structure describes Memory Proximity Domain Attributes by memory -subsystem and its associativity with processor proximity domain as well as -hint for memory usage. - -In the linux kernel, the codes in drivers/acpi/hmat/hmat.c parse and report -the platform's HMAT tables. - -Acked-by: Markus Armbruster -Reviewed-by: Igor Mammedov -Reviewed-by: Daniel Black -Reviewed-by: Jonathan Cameron -Signed-off-by: Liu Jingqi -Signed-off-by: Tao Xu -Message-Id: <20191213011929.2520-5-tao3.xu@intel.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit e6f123c3b81241be33f1b763d0ff8b36d1ae9c1e) -Signed-off-by: Paul Lai -Signed-off-by: Danilo C. L. de Paula ---- - hw/acpi/Kconfig | 7 ++-- - hw/acpi/Makefile.objs | 1 + - hw/acpi/hmat.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++ - hw/acpi/hmat.h | 42 ++++++++++++++++++++++ - hw/i386/acpi-build.c | 5 +++ - 5 files changed, 152 insertions(+), 2 deletions(-) - create mode 100644 hw/acpi/hmat.c - create mode 100644 hw/acpi/hmat.h - -diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig -index 12e3f1e..54209c6 100644 ---- a/hw/acpi/Kconfig -+++ b/hw/acpi/Kconfig -@@ -7,6 +7,7 @@ config ACPI_X86 - select ACPI_NVDIMM - select ACPI_CPU_HOTPLUG - select ACPI_MEMORY_HOTPLUG -+ select ACPI_HMAT - - config ACPI_X86_ICH - bool -@@ -23,6 +24,10 @@ config ACPI_NVDIMM - bool - depends on ACPI - -+config ACPI_HMAT -+ bool -+ depends on ACPI -+ - config ACPI_PCI - bool - depends on ACPI && PCI -@@ -33,5 +38,3 @@ config ACPI_VMGENID - depends on PC - - config ACPI_HW_REDUCED -- bool -- depends on ACPI -diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs -index 655a9c1..517bd88 100644 ---- a/hw/acpi/Makefile.objs -+++ b/hw/acpi/Makefile.objs -@@ -7,6 +7,7 @@ common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o - common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o - common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o - common-obj-$(CONFIG_ACPI_HW_REDUCED) += generic_event_device.o -+common-obj-$(CONFIG_ACPI_HMAT) += hmat.o - common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o - - common-obj-y += acpi_interface.o -diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c -new file mode 100644 -index 0000000..9ff7930 ---- /dev/null -+++ b/hw/acpi/hmat.c -@@ -0,0 +1,99 @@ -+/* -+ * HMAT ACPI Implementation -+ * -+ * Copyright(C) 2019 Intel Corporation. -+ * -+ * Author: -+ * Liu jingqi -+ * Tao Xu -+ * -+ * HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table -+ * (HMAT) -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, see -+ */ -+ -+#include "qemu/osdep.h" -+#include "sysemu/numa.h" -+#include "hw/acpi/hmat.h" -+ -+/* -+ * ACPI 6.3: -+ * 5.2.27.3 Memory Proximity Domain Attributes Structure: Table 5-145 -+ */ -+static void build_hmat_mpda(GArray *table_data, uint16_t flags, -+ uint32_t initiator, uint32_t mem_node) -+{ -+ -+ /* Memory Proximity Domain Attributes Structure */ -+ /* Type */ -+ build_append_int_noprefix(table_data, 0, 2); -+ /* Reserved */ -+ build_append_int_noprefix(table_data, 0, 2); -+ /* Length */ -+ build_append_int_noprefix(table_data, 40, 4); -+ /* Flags */ -+ build_append_int_noprefix(table_data, flags, 2); -+ /* Reserved */ -+ build_append_int_noprefix(table_data, 0, 2); -+ /* Proximity Domain for the Attached Initiator */ -+ build_append_int_noprefix(table_data, initiator, 4); -+ /* Proximity Domain for the Memory */ -+ build_append_int_noprefix(table_data, mem_node, 4); -+ /* Reserved */ -+ build_append_int_noprefix(table_data, 0, 4); -+ /* -+ * Reserved: -+ * Previously defined as the Start Address of the System Physical -+ * Address Range. Deprecated since ACPI Spec 6.3. -+ */ -+ build_append_int_noprefix(table_data, 0, 8); -+ /* -+ * Reserved: -+ * Previously defined as the Range Length of the region in bytes. -+ * Deprecated since ACPI Spec 6.3. -+ */ -+ build_append_int_noprefix(table_data, 0, 8); -+} -+ -+/* Build HMAT sub table structures */ -+static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state) -+{ -+ uint16_t flags; -+ int i; -+ -+ for (i = 0; i < numa_state->num_nodes; i++) { -+ flags = 0; -+ -+ if (numa_state->nodes[i].initiator < MAX_NODES) { -+ flags |= HMAT_PROXIMITY_INITIATOR_VALID; -+ } -+ -+ build_hmat_mpda(table_data, flags, numa_state->nodes[i].initiator, i); -+ } -+} -+ -+void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state) -+{ -+ int hmat_start = table_data->len; -+ -+ /* reserve space for HMAT header */ -+ acpi_data_push(table_data, 40); -+ -+ hmat_build_table_structs(table_data, numa_state); -+ -+ build_header(linker, table_data, -+ (void *)(table_data->data + hmat_start), -+ "HMAT", table_data->len - hmat_start, 2, NULL, NULL); -+} -diff --git a/hw/acpi/hmat.h b/hw/acpi/hmat.h -new file mode 100644 -index 0000000..437dbc6 ---- /dev/null -+++ b/hw/acpi/hmat.h -@@ -0,0 +1,42 @@ -+/* -+ * HMAT ACPI Implementation Header -+ * -+ * Copyright(C) 2019 Intel Corporation. -+ * -+ * Author: -+ * Liu jingqi -+ * Tao Xu -+ * -+ * HMAT is defined in ACPI 6.3: 5.2.27 Heterogeneous Memory Attribute Table -+ * (HMAT) -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, see -+ */ -+ -+#ifndef HMAT_H -+#define HMAT_H -+ -+#include "hw/acpi/aml-build.h" -+ -+/* -+ * ACPI 6.3: 5.2.27.3 Memory Proximity Domain Attributes Structure, -+ * Table 5-145, Field "flag", Bit [0]: set to 1 to indicate that data in -+ * the Proximity Domain for the Attached Initiator field is valid. -+ * Other bits reserved. -+ */ -+#define HMAT_PROXIMITY_INITIATOR_VALID 0x1 -+ -+void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state); -+ -+#endif -diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index 6400189..b1f8c55 100644 ---- a/hw/i386/acpi-build.c -+++ b/hw/i386/acpi-build.c -@@ -67,6 +67,7 @@ - #include "hw/i386/intel_iommu.h" - - #include "hw/acpi/ipmi.h" -+#include "hw/acpi/hmat.h" - - /* These are used to size the ACPI tables for -M pc-i440fx-1.7 and - * -M pc-i440fx-2.0. Even if the actual amount of AML generated grows -@@ -2837,6 +2838,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) - acpi_add_table(table_offsets, tables_blob); - build_slit(tables_blob, tables->linker, machine); - } -+ if (machine->numa_state->hmat_enabled) { -+ acpi_add_table(table_offsets, tables_blob); -+ build_hmat(tables_blob, tables->linker, machine->numa_state); -+ } - } - if (acpi_get_mcfg(&mcfg)) { - acpi_add_table(table_offsets, tables_blob); --- -1.8.3.1 - diff --git a/SOURCES/kvm-hmat-acpi-Build-Memory-Side-Cache-Information-Struct.patch b/SOURCES/kvm-hmat-acpi-Build-Memory-Side-Cache-Information-Struct.patch deleted file mode 100644 index 01ef4ce..0000000 --- a/SOURCES/kvm-hmat-acpi-Build-Memory-Side-Cache-Information-Struct.patch +++ /dev/null @@ -1,137 +0,0 @@ -From d00453667cb972dc2fe1242081d3b39313a6a925 Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Thu, 21 May 2020 23:56:52 +0100 -Subject: [PATCH 09/12] hmat acpi: Build Memory Side Cache Information - Structure(s) - -RH-Author: plai@redhat.com -Message-id: <20200521235655.27141-9-plai@redhat.com> -Patchwork-id: 96741 -O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 08/11] hmat acpi: Build Memory Side Cache Information Structure(s) -Bugzilla: 1600217 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Igor Mammedov -RH-Acked-by: Eduardo Habkost - -From: Liu Jingqi - -This structure describes memory side cache information for memory -proximity domains if the memory side cache is present and the -physical device forms the memory side cache. -The software could use this information to effectively place -the data in memory to maximize the performance of the system -memory that use the memory side cache. - -Acked-by: Markus Armbruster -Reviewed-by: Igor Mammedov -Reviewed-by: Daniel Black -Reviewed-by: Jonathan Cameron -Signed-off-by: Liu Jingqi -Signed-off-by: Tao Xu -Message-Id: <20191213011929.2520-7-tao3.xu@intel.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit a9c2b841af002db6e21e1297c9026b63fc22c875) -Signed-off-by: Paul Lai -Signed-off-by: Danilo C. L. de Paula ---- - hw/acpi/hmat.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 68 insertions(+), 1 deletion(-) - -diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c -index 4635d45..7c24bb5 100644 ---- a/hw/acpi/hmat.c -+++ b/hw/acpi/hmat.c -@@ -143,14 +143,62 @@ static void build_hmat_lb(GArray *table_data, HMAT_LB_Info *hmat_lb, - g_free(entry_list); - } - -+/* ACPI 6.3: 5.2.27.5 Memory Side Cache Information Structure: Table 5-147 */ -+static void build_hmat_cache(GArray *table_data, uint8_t total_levels, -+ NumaHmatCacheOptions *hmat_cache) -+{ -+ /* -+ * Cache Attributes: Bits [3:0] – Total Cache Levels -+ * for this Memory Proximity Domain -+ */ -+ uint32_t cache_attr = total_levels; -+ -+ /* Bits [7:4] : Cache Level described in this structure */ -+ cache_attr |= (uint32_t) hmat_cache->level << 4; -+ -+ /* Bits [11:8] - Cache Associativity */ -+ cache_attr |= (uint32_t) hmat_cache->associativity << 8; -+ -+ /* Bits [15:12] - Write Policy */ -+ cache_attr |= (uint32_t) hmat_cache->policy << 12; -+ -+ /* Bits [31:16] - Cache Line size in bytes */ -+ cache_attr |= (uint32_t) hmat_cache->line << 16; -+ -+ /* Type */ -+ build_append_int_noprefix(table_data, 2, 2); -+ /* Reserved */ -+ build_append_int_noprefix(table_data, 0, 2); -+ /* Length */ -+ build_append_int_noprefix(table_data, 32, 4); -+ /* Proximity Domain for the Memory */ -+ build_append_int_noprefix(table_data, hmat_cache->node_id, 4); -+ /* Reserved */ -+ build_append_int_noprefix(table_data, 0, 4); -+ /* Memory Side Cache Size */ -+ build_append_int_noprefix(table_data, hmat_cache->size, 8); -+ /* Cache Attributes */ -+ build_append_int_noprefix(table_data, cache_attr, 4); -+ /* Reserved */ -+ build_append_int_noprefix(table_data, 0, 2); -+ /* -+ * Number of SMBIOS handles (n) -+ * Linux kernel uses Memory Side Cache Information Structure -+ * without SMBIOS entries for now, so set Number of SMBIOS handles -+ * as 0. -+ */ -+ build_append_int_noprefix(table_data, 0, 2); -+} -+ - /* Build HMAT sub table structures */ - static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state) - { - uint16_t flags; - uint32_t num_initiator = 0; - uint32_t initiator_list[MAX_NODES]; -- int i, hierarchy, type; -+ int i, hierarchy, type, cache_level, total_levels; - HMAT_LB_Info *hmat_lb; -+ NumaHmatCacheOptions *hmat_cache; - - for (i = 0; i < numa_state->num_nodes; i++) { - flags = 0; -@@ -184,6 +232,25 @@ static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state) - } - } - } -+ -+ /* -+ * ACPI 6.3: 5.2.27.5 Memory Side Cache Information Structure: -+ * Table 5-147 -+ */ -+ for (i = 0; i < numa_state->num_nodes; i++) { -+ total_levels = 0; -+ for (cache_level = 1; cache_level < HMAT_LB_LEVELS; cache_level++) { -+ if (numa_state->hmat_cache[i][cache_level]) { -+ total_levels++; -+ } -+ } -+ for (cache_level = 0; cache_level <= total_levels; cache_level++) { -+ hmat_cache = numa_state->hmat_cache[i][cache_level]; -+ if (hmat_cache) { -+ build_hmat_cache(table_data, total_levels, hmat_cache); -+ } -+ } -+ } - } - - void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state) --- -1.8.3.1 - diff --git a/SOURCES/kvm-hmat-acpi-Build-System-Locality-Latency-and-Bandwidt.patch b/SOURCES/kvm-hmat-acpi-Build-System-Locality-Latency-and-Bandwidt.patch deleted file mode 100644 index a7120d7..0000000 --- a/SOURCES/kvm-hmat-acpi-Build-System-Locality-Latency-and-Bandwidt.patch +++ /dev/null @@ -1,173 +0,0 @@ -From f55b8b251c323856087baf2380d93fbf2da15db7 Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Thu, 21 May 2020 23:56:51 +0100 -Subject: [PATCH 08/12] hmat acpi: Build System Locality Latency and Bandwidth - Information Structure(s) - -RH-Author: plai@redhat.com -Message-id: <20200521235655.27141-8-plai@redhat.com> -Patchwork-id: 96733 -O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 07/11] hmat acpi: Build System Locality Latency and Bandwidth Information Structure(s) -Bugzilla: 1600217 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Igor Mammedov -RH-Acked-by: Eduardo Habkost - -From: Liu Jingqi - -This structure describes the memory access latency and bandwidth -information from various memory access initiator proximity domains. -The latency and bandwidth numbers represented in this structure -correspond to rated latency and bandwidth for the platform. -The software could use this information as hint for optimization. - -Acked-by: Markus Armbruster -Reviewed-by: Igor Mammedov -Signed-off-by: Liu Jingqi -Signed-off-by: Tao Xu -Message-Id: <20191213011929.2520-6-tao3.xu@intel.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 4586a2cb833f80b19c80ebe364a005ac2fa0974a) -Signed-off-by: Paul Lai -Signed-off-by: Danilo C. L. de Paula ---- - hw/acpi/hmat.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 103 insertions(+), 1 deletion(-) - -diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c -index 9ff7930..4635d45 100644 ---- a/hw/acpi/hmat.c -+++ b/hw/acpi/hmat.c -@@ -25,6 +25,7 @@ - */ - - #include "qemu/osdep.h" -+#include "qemu/units.h" - #include "sysemu/numa.h" - #include "hw/acpi/hmat.h" - -@@ -67,11 +68,89 @@ static void build_hmat_mpda(GArray *table_data, uint16_t flags, - build_append_int_noprefix(table_data, 0, 8); - } - -+/* -+ * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information -+ * Structure: Table 5-146 -+ */ -+static void build_hmat_lb(GArray *table_data, HMAT_LB_Info *hmat_lb, -+ uint32_t num_initiator, uint32_t num_target, -+ uint32_t *initiator_list) -+{ -+ int i, index; -+ HMAT_LB_Data *lb_data; -+ uint16_t *entry_list; -+ uint32_t base; -+ /* Length in bytes for entire structure */ -+ uint32_t lb_length -+ = 32 /* Table length upto and including Entry Base Unit */ -+ + 4 * num_initiator /* Initiator Proximity Domain List */ -+ + 4 * num_target /* Target Proximity Domain List */ -+ + 2 * num_initiator * num_target; /* Latency or Bandwidth Entries */ -+ -+ /* Type */ -+ build_append_int_noprefix(table_data, 1, 2); -+ /* Reserved */ -+ build_append_int_noprefix(table_data, 0, 2); -+ /* Length */ -+ build_append_int_noprefix(table_data, lb_length, 4); -+ /* Flags: Bits [3:0] Memory Hierarchy, Bits[7:4] Reserved */ -+ assert(!(hmat_lb->hierarchy >> 4)); -+ build_append_int_noprefix(table_data, hmat_lb->hierarchy, 1); -+ /* Data Type */ -+ build_append_int_noprefix(table_data, hmat_lb->data_type, 1); -+ /* Reserved */ -+ build_append_int_noprefix(table_data, 0, 2); -+ /* Number of Initiator Proximity Domains (s) */ -+ build_append_int_noprefix(table_data, num_initiator, 4); -+ /* Number of Target Proximity Domains (t) */ -+ build_append_int_noprefix(table_data, num_target, 4); -+ /* Reserved */ -+ build_append_int_noprefix(table_data, 0, 4); -+ -+ /* Entry Base Unit */ -+ if (hmat_lb->data_type <= HMAT_LB_DATA_WRITE_LATENCY) { -+ /* Convert latency base from nanoseconds to picosecond */ -+ base = hmat_lb->base * 1000; -+ } else { -+ /* Convert bandwidth base from Byte to Megabyte */ -+ base = hmat_lb->base / MiB; -+ } -+ build_append_int_noprefix(table_data, base, 8); -+ -+ /* Initiator Proximity Domain List */ -+ for (i = 0; i < num_initiator; i++) { -+ build_append_int_noprefix(table_data, initiator_list[i], 4); -+ } -+ -+ /* Target Proximity Domain List */ -+ for (i = 0; i < num_target; i++) { -+ build_append_int_noprefix(table_data, i, 4); -+ } -+ -+ /* Latency or Bandwidth Entries */ -+ entry_list = g_malloc0(num_initiator * num_target * sizeof(uint16_t)); -+ for (i = 0; i < hmat_lb->list->len; i++) { -+ lb_data = &g_array_index(hmat_lb->list, HMAT_LB_Data, i); -+ index = lb_data->initiator * num_target + lb_data->target; -+ -+ entry_list[index] = (uint16_t)(lb_data->data / hmat_lb->base); -+ } -+ -+ for (i = 0; i < num_initiator * num_target; i++) { -+ build_append_int_noprefix(table_data, entry_list[i], 2); -+ } -+ -+ g_free(entry_list); -+} -+ - /* Build HMAT sub table structures */ - static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state) - { - uint16_t flags; -- int i; -+ uint32_t num_initiator = 0; -+ uint32_t initiator_list[MAX_NODES]; -+ int i, hierarchy, type; -+ HMAT_LB_Info *hmat_lb; - - for (i = 0; i < numa_state->num_nodes; i++) { - flags = 0; -@@ -82,6 +161,29 @@ static void hmat_build_table_structs(GArray *table_data, NumaState *numa_state) - - build_hmat_mpda(table_data, flags, numa_state->nodes[i].initiator, i); - } -+ -+ for (i = 0; i < numa_state->num_nodes; i++) { -+ if (numa_state->nodes[i].has_cpu) { -+ initiator_list[num_initiator++] = i; -+ } -+ } -+ -+ /* -+ * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information -+ * Structure: Table 5-146 -+ */ -+ for (hierarchy = HMAT_LB_MEM_MEMORY; -+ hierarchy <= HMAT_LB_MEM_CACHE_3RD_LEVEL; hierarchy++) { -+ for (type = HMAT_LB_DATA_ACCESS_LATENCY; -+ type <= HMAT_LB_DATA_WRITE_BANDWIDTH; type++) { -+ hmat_lb = numa_state->hmat_lb[hierarchy][type]; -+ -+ if (hmat_lb && hmat_lb->list->len) { -+ build_hmat_lb(table_data, hmat_lb, num_initiator, -+ numa_state->num_nodes, initiator_list); -+ } -+ } -+ } - } - - void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *numa_state) --- -1.8.3.1 - diff --git a/SOURCES/kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch b/SOURCES/kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch deleted file mode 100644 index f01dec2..0000000 --- a/SOURCES/kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch +++ /dev/null @@ -1,100 +0,0 @@ -From cebc614e5ddd1f770c4d6dc26c066791f36e56df Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 7 Feb 2020 11:24:02 +0000 -Subject: [PATCH 05/18] hmp: Allow using qdev ID for qemu-io command - -RH-Author: Kevin Wolf -Message-id: <20200207112404.25198-5-kwolf@redhat.com> -Patchwork-id: 93750 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 4/6] hmp: Allow using qdev ID for qemu-io command -Bugzilla: 1781637 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -In order to issue requests on an existing BlockBackend with the -'qemu-io' HMP command, allow specifying the BlockBackend not only with a -BlockBackend name, but also with a qdev ID/QOM path for a device that -owns the (possibly anonymous) BlockBackend. - -Because qdev names could be conflicting with BlockBackend and node -names, introduce a -d option to explicitly address a device. If the -option is not given, a BlockBackend or a node is addressed. - -Signed-off-by: Kevin Wolf -(cherry picked from commit 89b6fc45614bb45dcd58f1590415afe5c2791abd) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - hmp-commands.hx | 8 +++++--- - monitor/hmp-cmds.c | 28 ++++++++++++++++++---------- - 2 files changed, 23 insertions(+), 13 deletions(-) - -diff --git a/hmp-commands.hx b/hmp-commands.hx -index cfcc044..dc23185 100644 ---- a/hmp-commands.hx -+++ b/hmp-commands.hx -@@ -1875,9 +1875,11 @@ ETEXI - - { - .name = "qemu-io", -- .args_type = "device:B,command:s", -- .params = "[device] \"[command]\"", -- .help = "run a qemu-io command on a block device", -+ .args_type = "qdev:-d,device:B,command:s", -+ .params = "[-d] [device] \"[command]\"", -+ .help = "run a qemu-io command on a block device\n\t\t\t" -+ "-d: [device] is a device ID rather than a " -+ "drive ID or node name", - .cmd = hmp_qemu_io, - }, - -diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c -index b2551c1..5f8941d 100644 ---- a/monitor/hmp-cmds.c -+++ b/monitor/hmp-cmds.c -@@ -2468,23 +2468,31 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) - { - BlockBackend *blk; - BlockBackend *local_blk = NULL; -+ bool qdev = qdict_get_try_bool(qdict, "qdev", false); - const char* device = qdict_get_str(qdict, "device"); - const char* command = qdict_get_str(qdict, "command"); - Error *err = NULL; - int ret; - -- blk = blk_by_name(device); -- if (!blk) { -- BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err); -- if (bs) { -- blk = local_blk = blk_new(bdrv_get_aio_context(bs), -- 0, BLK_PERM_ALL); -- ret = blk_insert_bs(blk, bs, &err); -- if (ret < 0) { -+ if (qdev) { -+ blk = blk_by_qdev_id(device, &err); -+ if (!blk) { -+ goto fail; -+ } -+ } else { -+ blk = blk_by_name(device); -+ if (!blk) { -+ BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err); -+ if (bs) { -+ blk = local_blk = blk_new(bdrv_get_aio_context(bs), -+ 0, BLK_PERM_ALL); -+ ret = blk_insert_bs(blk, bs, &err); -+ if (ret < 0) { -+ goto fail; -+ } -+ } else { - goto fail; - } -- } else { -- goto fail; - } - } - --- -1.8.3.1 - diff --git a/SOURCES/kvm-hw-arm-smmu-Introduce-SMMUTLBEntry-for-PTW-and-IOTLB.patch b/SOURCES/kvm-hw-arm-smmu-Introduce-SMMUTLBEntry-for-PTW-and-IOTLB.patch deleted file mode 100644 index 75788c5..0000000 --- a/SOURCES/kvm-hw-arm-smmu-Introduce-SMMUTLBEntry-for-PTW-and-IOTLB.patch +++ /dev/null @@ -1,222 +0,0 @@ -From 602f17920e422e2b8d3ce485e56066a97b74e723 Mon Sep 17 00:00:00 2001 -From: eperezma -Date: Tue, 12 Jan 2021 14:36:29 -0500 -Subject: [PATCH 05/17] hw/arm/smmu: Introduce SMMUTLBEntry for PTW and IOTLB - value -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: eperezma -Message-id: <20210112143638.374060-5-eperezma@redhat.com> -Patchwork-id: 100597 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 04/13] hw/arm/smmu: Introduce SMMUTLBEntry for PTW and IOTLB value -Bugzilla: 1843852 -RH-Acked-by: Xiao Wang -RH-Acked-by: Peter Xu -RH-Acked-by: Auger Eric - -From: Eric Auger - -Introduce a specialized SMMUTLBEntry to store the result of -the PTW and cache in the IOTLB. This structure extends the -generic IOMMUTLBEntry struct with the level of the entry and -the granule size. - -Those latter will be useful when implementing range invalidation. - -Signed-off-by: Eric Auger -Reviewed-by: Peter Maydell -Message-id: 20200728150815.11446-5-eric.auger@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit a7550158556b7fc2f2baaecf9092499c6687b160) -Signed-off-by: Eugenio Pérez -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/smmu-common.c | 32 +++++++++++++++++--------------- - hw/arm/smmuv3.c | 10 +++++----- - include/hw/arm/smmu-common.h | 12 +++++++++--- - 3 files changed, 31 insertions(+), 23 deletions(-) - -diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c -index 0b89c9fbbbc..06e9e38b007 100644 ---- a/hw/arm/smmu-common.c -+++ b/hw/arm/smmu-common.c -@@ -64,11 +64,11 @@ SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint64_t iova) - return key; - } - --IOMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, -- hwaddr iova) -+SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, -+ hwaddr iova) - { - SMMUIOTLBKey key = smmu_get_iotlb_key(cfg->asid, iova); -- IOMMUTLBEntry *entry = g_hash_table_lookup(bs->iotlb, &key); -+ SMMUTLBEntry *entry = g_hash_table_lookup(bs->iotlb, &key); - - if (entry) { - cfg->iotlb_hits++; -@@ -86,7 +86,7 @@ IOMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, - return entry; - } - --void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, IOMMUTLBEntry *entry) -+void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *new) - { - SMMUIOTLBKey *key = g_new0(SMMUIOTLBKey, 1); - -@@ -94,9 +94,9 @@ void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, IOMMUTLBEntry *entry) - smmu_iotlb_inv_all(bs); - } - -- *key = smmu_get_iotlb_key(cfg->asid, entry->iova); -- trace_smmu_iotlb_insert(cfg->asid, entry->iova); -- g_hash_table_insert(bs->iotlb, key, entry); -+ *key = smmu_get_iotlb_key(cfg->asid, new->entry.iova); -+ trace_smmu_iotlb_insert(cfg->asid, new->entry.iova); -+ g_hash_table_insert(bs->iotlb, key, new); - } - - inline void smmu_iotlb_inv_all(SMMUState *s) -@@ -217,7 +217,7 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova) - * @cfg: translation config - * @iova: iova to translate - * @perm: access type -- * @tlbe: IOMMUTLBEntry (out) -+ * @tlbe: SMMUTLBEntry (out) - * @info: handle to an error info - * - * Return 0 on success, < 0 on error. In case of error, @info is filled -@@ -227,7 +227,7 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova) - */ - static int smmu_ptw_64(SMMUTransCfg *cfg, - dma_addr_t iova, IOMMUAccessFlags perm, -- IOMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) -+ SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) - { - dma_addr_t baseaddr, indexmask; - int stage = cfg->stage; -@@ -247,8 +247,8 @@ static int smmu_ptw_64(SMMUTransCfg *cfg, - baseaddr = extract64(tt->ttb, 0, 48); - baseaddr &= ~indexmask; - -- tlbe->iova = iova; -- tlbe->addr_mask = (1 << granule_sz) - 1; -+ tlbe->entry.iova = iova; -+ tlbe->entry.addr_mask = (1 << granule_sz) - 1; - - while (level <= 3) { - uint64_t subpage_size = 1ULL << level_shift(level, granule_sz); -@@ -299,14 +299,16 @@ static int smmu_ptw_64(SMMUTransCfg *cfg, - goto error; - } - -- tlbe->translated_addr = gpa + (iova & mask); -- tlbe->perm = PTE_AP_TO_PERM(ap); -+ tlbe->entry.translated_addr = gpa + (iova & mask); -+ tlbe->entry.perm = PTE_AP_TO_PERM(ap); -+ tlbe->level = level; -+ tlbe->granule = granule_sz; - return 0; - } - info->type = SMMU_PTW_ERR_TRANSLATION; - - error: -- tlbe->perm = IOMMU_NONE; -+ tlbe->entry.perm = IOMMU_NONE; - return -EINVAL; - } - -@@ -322,7 +324,7 @@ error: - * return 0 on success - */ - inline int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, -- IOMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) -+ SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) - { - if (!cfg->aa64) { - /* -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index 34dea4df4da..ad8212779d3 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -614,7 +614,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, - SMMUTranslationStatus status; - SMMUState *bs = ARM_SMMU(s); - uint64_t page_mask, aligned_addr; -- IOMMUTLBEntry *cached_entry = NULL; -+ SMMUTLBEntry *cached_entry = NULL; - SMMUTransTableInfo *tt; - SMMUTransCfg *cfg = NULL; - IOMMUTLBEntry entry = { -@@ -664,7 +664,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, - - cached_entry = smmu_iotlb_lookup(bs, cfg, aligned_addr); - if (cached_entry) { -- if ((flag & IOMMU_WO) && !(cached_entry->perm & IOMMU_WO)) { -+ if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) { - status = SMMU_TRANS_ERROR; - if (event.record_trans_faults) { - event.type = SMMU_EVT_F_PERMISSION; -@@ -677,7 +677,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, - goto epilogue; - } - -- cached_entry = g_new0(IOMMUTLBEntry, 1); -+ cached_entry = g_new0(SMMUTLBEntry, 1); - - if (smmu_ptw(cfg, aligned_addr, flag, cached_entry, &ptw_info)) { - g_free(cached_entry); -@@ -731,9 +731,9 @@ epilogue: - switch (status) { - case SMMU_TRANS_SUCCESS: - entry.perm = flag; -- entry.translated_addr = cached_entry->translated_addr + -+ entry.translated_addr = cached_entry->entry.translated_addr + - (addr & page_mask); -- entry.addr_mask = cached_entry->addr_mask; -+ entry.addr_mask = cached_entry->entry.addr_mask; - trace_smmuv3_translate_success(mr->parent_obj.name, sid, addr, - entry.translated_addr, entry.perm); - break; -diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h -index bceba40885c..277923bdc0a 100644 ---- a/include/hw/arm/smmu-common.h -+++ b/include/hw/arm/smmu-common.h -@@ -52,6 +52,12 @@ typedef struct SMMUTransTableInfo { - uint8_t granule_sz; /* granule page shift */ - } SMMUTransTableInfo; - -+typedef struct SMMUTLBEntry { -+ IOMMUTLBEntry entry; -+ uint8_t level; -+ uint8_t granule; -+} SMMUTLBEntry; -+ - /* - * Generic structure populated by derived SMMU devices - * after decoding the configuration information and used as -@@ -140,7 +146,7 @@ static inline uint16_t smmu_get_sid(SMMUDevice *sdev) - * pair, according to @cfg translation config - */ - int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, -- IOMMUTLBEntry *tlbe, SMMUPTWEventInfo *info); -+ SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info); - - /** - * select_tt - compute which translation table shall be used according to -@@ -153,8 +159,8 @@ IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid); - - #define SMMU_IOTLB_MAX_SIZE 256 - --IOMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, hwaddr iova); --void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, IOMMUTLBEntry *entry); -+SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, hwaddr iova); -+void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *entry); - SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint64_t iova); - void smmu_iotlb_inv_all(SMMUState *s); - void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid); --- -2.27.0 - diff --git a/SOURCES/kvm-hw-arm-smmu-Introduce-smmu_get_iotlb_key.patch b/SOURCES/kvm-hw-arm-smmu-Introduce-smmu_get_iotlb_key.patch deleted file mode 100644 index 6500b41..0000000 --- a/SOURCES/kvm-hw-arm-smmu-Introduce-smmu_get_iotlb_key.patch +++ /dev/null @@ -1,166 +0,0 @@ -From 7833c0bf8321cb39614ee889cf3e3a64511c0aa5 Mon Sep 17 00:00:00 2001 -From: eperezma -Date: Tue, 12 Jan 2021 14:36:28 -0500 -Subject: [PATCH 04/17] hw/arm/smmu: Introduce smmu_get_iotlb_key() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: eperezma -Message-id: <20210112143638.374060-4-eperezma@redhat.com> -Patchwork-id: 100596 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 03/13] hw/arm/smmu: Introduce smmu_get_iotlb_key() -Bugzilla: 1843852 -RH-Acked-by: Xiao Wang -RH-Acked-by: Peter Xu -RH-Acked-by: Auger Eric - -From: Eric Auger - -Introduce the smmu_get_iotlb_key() helper and the -SMMU_IOTLB_ASID() macro. Also move smmu_get_iotlb_key and -smmu_iotlb_key_hash in the IOTLB related code section. - -Signed-off-by: Eric Auger -Reviewed-by: Peter Maydell -Message-id: 20200728150815.11446-4-eric.auger@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit 60a61f1b31fc03080aadb63c9b1006f8b1972adb) -Signed-off-by: Eugenio Pérez -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/smmu-common.c | 66 ++++++++++++++++++++---------------- - hw/arm/smmu-internal.h | 1 + - include/hw/arm/smmu-common.h | 1 + - 3 files changed, 38 insertions(+), 30 deletions(-) - -diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c -index 8e01505dbee..0b89c9fbbbc 100644 ---- a/hw/arm/smmu-common.c -+++ b/hw/arm/smmu-common.c -@@ -32,10 +32,42 @@ - - /* IOTLB Management */ - -+static guint smmu_iotlb_key_hash(gconstpointer v) -+{ -+ SMMUIOTLBKey *key = (SMMUIOTLBKey *)v; -+ uint32_t a, b, c; -+ -+ /* Jenkins hash */ -+ a = b = c = JHASH_INITVAL + sizeof(*key); -+ a += key->asid; -+ b += extract64(key->iova, 0, 32); -+ c += extract64(key->iova, 32, 32); -+ -+ __jhash_mix(a, b, c); -+ __jhash_final(a, b, c); -+ -+ return c; -+} -+ -+static gboolean smmu_iotlb_key_equal(gconstpointer v1, gconstpointer v2) -+{ -+ const SMMUIOTLBKey *k1 = v1; -+ const SMMUIOTLBKey *k2 = v2; -+ -+ return (k1->asid == k2->asid) && (k1->iova == k2->iova); -+} -+ -+SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint64_t iova) -+{ -+ SMMUIOTLBKey key = {.asid = asid, .iova = iova}; -+ -+ return key; -+} -+ - IOMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, - hwaddr iova) - { -- SMMUIOTLBKey key = {.asid = cfg->asid, .iova = iova}; -+ SMMUIOTLBKey key = smmu_get_iotlb_key(cfg->asid, iova); - IOMMUTLBEntry *entry = g_hash_table_lookup(bs->iotlb, &key); - - if (entry) { -@@ -62,8 +94,7 @@ void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, IOMMUTLBEntry *entry) - smmu_iotlb_inv_all(bs); - } - -- key->asid = cfg->asid; -- key->iova = entry->iova; -+ *key = smmu_get_iotlb_key(cfg->asid, entry->iova); - trace_smmu_iotlb_insert(cfg->asid, entry->iova); - g_hash_table_insert(bs->iotlb, key, entry); - } -@@ -80,12 +111,12 @@ static gboolean smmu_hash_remove_by_asid(gpointer key, gpointer value, - uint16_t asid = *(uint16_t *)user_data; - SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key; - -- return iotlb_key->asid == asid; -+ return SMMU_IOTLB_ASID(*iotlb_key) == asid; - } - - inline void smmu_iotlb_inv_iova(SMMUState *s, uint16_t asid, dma_addr_t iova) - { -- SMMUIOTLBKey key = {.asid = asid, .iova = iova}; -+ SMMUIOTLBKey key = smmu_get_iotlb_key(asid, iova); - - trace_smmu_iotlb_inv_iova(asid, iova); - g_hash_table_remove(s->iotlb, &key); -@@ -382,31 +413,6 @@ IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid) - return NULL; - } - --static guint smmu_iotlb_key_hash(gconstpointer v) --{ -- SMMUIOTLBKey *key = (SMMUIOTLBKey *)v; -- uint32_t a, b, c; -- -- /* Jenkins hash */ -- a = b = c = JHASH_INITVAL + sizeof(*key); -- a += key->asid; -- b += extract64(key->iova, 0, 32); -- c += extract64(key->iova, 32, 32); -- -- __jhash_mix(a, b, c); -- __jhash_final(a, b, c); -- -- return c; --} -- --static gboolean smmu_iotlb_key_equal(gconstpointer v1, gconstpointer v2) --{ -- const SMMUIOTLBKey *k1 = v1; -- const SMMUIOTLBKey *k2 = v2; -- -- return (k1->asid == k2->asid) && (k1->iova == k2->iova); --} -- - /* Unmap the whole notifier's range */ - static void smmu_unmap_notifier_range(IOMMUNotifier *n) - { -diff --git a/hw/arm/smmu-internal.h b/hw/arm/smmu-internal.h -index 7794d6d3947..3104f768cd2 100644 ---- a/hw/arm/smmu-internal.h -+++ b/hw/arm/smmu-internal.h -@@ -96,4 +96,5 @@ uint64_t iova_level_offset(uint64_t iova, int inputsize, - MAKE_64BIT_MASK(0, gsz - 3); - } - -+#define SMMU_IOTLB_ASID(key) ((key).asid) - #endif -diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h -index a28650c9350..bceba40885c 100644 ---- a/include/hw/arm/smmu-common.h -+++ b/include/hw/arm/smmu-common.h -@@ -155,6 +155,7 @@ IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid); - - IOMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, hwaddr iova); - void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, IOMMUTLBEntry *entry); -+SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint64_t iova); - void smmu_iotlb_inv_all(SMMUState *s); - void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid); - void smmu_iotlb_inv_iova(SMMUState *s, uint16_t asid, dma_addr_t iova); --- -2.27.0 - diff --git a/SOURCES/kvm-hw-arm-smmu-common-Add-IOTLB-helpers.patch b/SOURCES/kvm-hw-arm-smmu-common-Add-IOTLB-helpers.patch deleted file mode 100644 index ebe3d15..0000000 --- a/SOURCES/kvm-hw-arm-smmu-common-Add-IOTLB-helpers.patch +++ /dev/null @@ -1,181 +0,0 @@ -From fbfa584e58a560f27081043ad8e90ee9022421c0 Mon Sep 17 00:00:00 2001 -From: eperezma -Date: Tue, 12 Jan 2021 14:36:27 -0500 -Subject: [PATCH 03/17] hw/arm/smmu-common: Add IOTLB helpers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: eperezma -Message-id: <20210112143638.374060-3-eperezma@redhat.com> -Patchwork-id: 100595 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 02/13] hw/arm/smmu-common: Add IOTLB helpers -Bugzilla: 1843852 -RH-Acked-by: Xiao Wang -RH-Acked-by: Peter Xu -RH-Acked-by: Auger Eric - -From: Eric Auger - -Add two helpers: one to lookup for a given IOTLB entry and -one to insert a new entry. We also move the tracing there. - -Signed-off-by: Eric Auger -Reviewed-by: Peter Maydell -Message-id: 20200728150815.11446-3-eric.auger@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit 6808bca939b8722d98165319ba42366ca80de907) -Signed-off-by: Eugenio Pérez -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/smmu-common.c | 36 ++++++++++++++++++++++++++++++++++++ - hw/arm/smmuv3.c | 26 ++------------------------ - hw/arm/trace-events | 5 +++-- - include/hw/arm/smmu-common.h | 2 ++ - 4 files changed, 43 insertions(+), 26 deletions(-) - -diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c -index d2ba8b224ba..8e01505dbee 100644 ---- a/hw/arm/smmu-common.c -+++ b/hw/arm/smmu-common.c -@@ -32,6 +32,42 @@ - - /* IOTLB Management */ - -+IOMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, -+ hwaddr iova) -+{ -+ SMMUIOTLBKey key = {.asid = cfg->asid, .iova = iova}; -+ IOMMUTLBEntry *entry = g_hash_table_lookup(bs->iotlb, &key); -+ -+ if (entry) { -+ cfg->iotlb_hits++; -+ trace_smmu_iotlb_lookup_hit(cfg->asid, iova, -+ cfg->iotlb_hits, cfg->iotlb_misses, -+ 100 * cfg->iotlb_hits / -+ (cfg->iotlb_hits + cfg->iotlb_misses)); -+ } else { -+ cfg->iotlb_misses++; -+ trace_smmu_iotlb_lookup_miss(cfg->asid, iova, -+ cfg->iotlb_hits, cfg->iotlb_misses, -+ 100 * cfg->iotlb_hits / -+ (cfg->iotlb_hits + cfg->iotlb_misses)); -+ } -+ return entry; -+} -+ -+void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, IOMMUTLBEntry *entry) -+{ -+ SMMUIOTLBKey *key = g_new0(SMMUIOTLBKey, 1); -+ -+ if (g_hash_table_size(bs->iotlb) >= SMMU_IOTLB_MAX_SIZE) { -+ smmu_iotlb_inv_all(bs); -+ } -+ -+ key->asid = cfg->asid; -+ key->iova = entry->iova; -+ trace_smmu_iotlb_insert(cfg->asid, entry->iova); -+ g_hash_table_insert(bs->iotlb, key, entry); -+} -+ - inline void smmu_iotlb_inv_all(SMMUState *s) - { - trace_smmu_iotlb_inv_all(); -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index e2fbb8357ea..34dea4df4da 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -624,7 +624,6 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, - .addr_mask = ~(hwaddr)0, - .perm = IOMMU_NONE, - }; -- SMMUIOTLBKey key, *new_key; - - qemu_mutex_lock(&s->mutex); - -@@ -663,16 +662,8 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, - page_mask = (1ULL << (tt->granule_sz)) - 1; - aligned_addr = addr & ~page_mask; - -- key.asid = cfg->asid; -- key.iova = aligned_addr; -- -- cached_entry = g_hash_table_lookup(bs->iotlb, &key); -+ cached_entry = smmu_iotlb_lookup(bs, cfg, aligned_addr); - if (cached_entry) { -- cfg->iotlb_hits++; -- trace_smmu_iotlb_cache_hit(cfg->asid, aligned_addr, -- cfg->iotlb_hits, cfg->iotlb_misses, -- 100 * cfg->iotlb_hits / -- (cfg->iotlb_hits + cfg->iotlb_misses)); - if ((flag & IOMMU_WO) && !(cached_entry->perm & IOMMU_WO)) { - status = SMMU_TRANS_ERROR; - if (event.record_trans_faults) { -@@ -686,16 +677,6 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, - goto epilogue; - } - -- cfg->iotlb_misses++; -- trace_smmu_iotlb_cache_miss(cfg->asid, addr & ~page_mask, -- cfg->iotlb_hits, cfg->iotlb_misses, -- 100 * cfg->iotlb_hits / -- (cfg->iotlb_hits + cfg->iotlb_misses)); -- -- if (g_hash_table_size(bs->iotlb) >= SMMU_IOTLB_MAX_SIZE) { -- smmu_iotlb_inv_all(bs); -- } -- - cached_entry = g_new0(IOMMUTLBEntry, 1); - - if (smmu_ptw(cfg, aligned_addr, flag, cached_entry, &ptw_info)) { -@@ -741,10 +722,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, - } - status = SMMU_TRANS_ERROR; - } else { -- new_key = g_new0(SMMUIOTLBKey, 1); -- new_key->asid = cfg->asid; -- new_key->iova = aligned_addr; -- g_hash_table_insert(bs->iotlb, new_key, cached_entry); -+ smmu_iotlb_insert(bs, cfg, cached_entry); - status = SMMU_TRANS_SUCCESS; - } - -diff --git a/hw/arm/trace-events b/hw/arm/trace-events -index 0acedcedc6f..b808a1bfc19 100644 ---- a/hw/arm/trace-events -+++ b/hw/arm/trace-events -@@ -14,6 +14,9 @@ smmu_iotlb_inv_all(void) "IOTLB invalidate all" - smmu_iotlb_inv_asid(uint16_t asid) "IOTLB invalidate asid=%d" - smmu_iotlb_inv_iova(uint16_t asid, uint64_t addr) "IOTLB invalidate asid=%d addr=0x%"PRIx64 - smmu_inv_notifiers_mr(const char *name) "iommu mr=%s" -+smmu_iotlb_lookup_hit(uint16_t asid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" -+smmu_iotlb_lookup_miss(uint16_t asid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" -+smmu_iotlb_insert(uint16_t asid, uint64_t addr) "IOTLB ++ asid=%d addr=0x%"PRIx64 - - # smmuv3.c - smmuv3_read_mmio(uint64_t addr, uint64_t val, unsigned size, uint32_t r) "addr: 0x%"PRIx64" val:0x%"PRIx64" size: 0x%x(%d)" -@@ -46,8 +49,6 @@ smmuv3_cmdq_tlbi_nh_va(int vmid, int asid, uint64_t addr, bool leaf) "vmid =%d a - smmuv3_cmdq_tlbi_nh_vaa(int vmid, uint64_t addr) "vmid =%d addr=0x%"PRIx64 - smmuv3_cmdq_tlbi_nh(void) "" - smmuv3_cmdq_tlbi_nh_asid(uint16_t asid) "asid=%d" --smmu_iotlb_cache_hit(uint16_t asid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" --smmu_iotlb_cache_miss(uint16_t asid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" - smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid %d" - smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu mr=%s" - smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s" -diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h -index 1f37844e5c9..a28650c9350 100644 ---- a/include/hw/arm/smmu-common.h -+++ b/include/hw/arm/smmu-common.h -@@ -153,6 +153,8 @@ IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid); - - #define SMMU_IOTLB_MAX_SIZE 256 - -+IOMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, hwaddr iova); -+void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, IOMMUTLBEntry *entry); - void smmu_iotlb_inv_all(SMMUState *s); - void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid); - void smmu_iotlb_inv_iova(SMMUState *s, uint16_t asid, dma_addr_t iova); --- -2.27.0 - diff --git a/SOURCES/kvm-hw-arm-smmu-common-Factorize-some-code-in-smmu_ptw_6.patch b/SOURCES/kvm-hw-arm-smmu-common-Factorize-some-code-in-smmu_ptw_6.patch deleted file mode 100644 index d973b13..0000000 --- a/SOURCES/kvm-hw-arm-smmu-common-Factorize-some-code-in-smmu_ptw_6.patch +++ /dev/null @@ -1,124 +0,0 @@ -From 79718d8c67c9c54fa86a77f66aa8784aca7651d5 Mon Sep 17 00:00:00 2001 -From: eperezma -Date: Tue, 12 Jan 2021 14:36:26 -0500 -Subject: [PATCH 02/17] hw/arm/smmu-common: Factorize some code in - smmu_ptw_64() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: eperezma -Message-id: <20210112143638.374060-2-eperezma@redhat.com> -Patchwork-id: 100594 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 01/13] hw/arm/smmu-common: Factorize some code in smmu_ptw_64() -Bugzilla: 1843852 -RH-Acked-by: Xiao Wang -RH-Acked-by: Peter Xu -RH-Acked-by: Auger Eric - -From: Eric Auger - -Page and block PTE decoding can share some code. Let's -first handle table PTE and factorize some code shared by -page and block PTEs. - -Signed-off-by: Eric Auger -Reviewed-by: Peter Maydell -Message-id: 20200728150815.11446-2-eric.auger@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit 1733837d7cdb207653a849a5f1fa78de878c6ac1) -Signed-off-by: Eugenio Pérez -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/smmu-common.c | 48 ++++++++++++++++---------------------------- - 1 file changed, 17 insertions(+), 31 deletions(-) - -diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c -index 245817d23e9..d2ba8b224ba 100644 ---- a/hw/arm/smmu-common.c -+++ b/hw/arm/smmu-common.c -@@ -187,7 +187,7 @@ static int smmu_ptw_64(SMMUTransCfg *cfg, - uint64_t subpage_size = 1ULL << level_shift(level, granule_sz); - uint64_t mask = subpage_size - 1; - uint32_t offset = iova_level_offset(iova, inputsize, level, granule_sz); -- uint64_t pte; -+ uint64_t pte, gpa; - dma_addr_t pte_addr = baseaddr + offset * sizeof(pte); - uint8_t ap; - -@@ -200,56 +200,42 @@ static int smmu_ptw_64(SMMUTransCfg *cfg, - if (is_invalid_pte(pte) || is_reserved_pte(pte, level)) { - trace_smmu_ptw_invalid_pte(stage, level, baseaddr, - pte_addr, offset, pte); -- info->type = SMMU_PTW_ERR_TRANSLATION; -- goto error; -+ break; - } - -- if (is_page_pte(pte, level)) { -- uint64_t gpa = get_page_pte_address(pte, granule_sz); -+ if (is_table_pte(pte, level)) { -+ ap = PTE_APTABLE(pte); - -- ap = PTE_AP(pte); - if (is_permission_fault(ap, perm)) { - info->type = SMMU_PTW_ERR_PERMISSION; - goto error; - } -- -- tlbe->translated_addr = gpa + (iova & mask); -- tlbe->perm = PTE_AP_TO_PERM(ap); -+ baseaddr = get_table_pte_address(pte, granule_sz); -+ level++; -+ continue; -+ } else if (is_page_pte(pte, level)) { -+ gpa = get_page_pte_address(pte, granule_sz); - trace_smmu_ptw_page_pte(stage, level, iova, - baseaddr, pte_addr, pte, gpa); -- return 0; -- } -- if (is_block_pte(pte, level)) { -+ } else { - uint64_t block_size; -- hwaddr gpa = get_block_pte_address(pte, level, granule_sz, -- &block_size); -- -- ap = PTE_AP(pte); -- if (is_permission_fault(ap, perm)) { -- info->type = SMMU_PTW_ERR_PERMISSION; -- goto error; -- } - -+ gpa = get_block_pte_address(pte, level, granule_sz, -+ &block_size); - trace_smmu_ptw_block_pte(stage, level, baseaddr, - pte_addr, pte, iova, gpa, - block_size >> 20); -- -- tlbe->translated_addr = gpa + (iova & mask); -- tlbe->perm = PTE_AP_TO_PERM(ap); -- return 0; - } -- -- /* table pte */ -- ap = PTE_APTABLE(pte); -- -+ ap = PTE_AP(pte); - if (is_permission_fault(ap, perm)) { - info->type = SMMU_PTW_ERR_PERMISSION; - goto error; - } -- baseaddr = get_table_pte_address(pte, granule_sz); -- level++; -- } - -+ tlbe->translated_addr = gpa + (iova & mask); -+ tlbe->perm = PTE_AP_TO_PERM(ap); -+ return 0; -+ } - info->type = SMMU_PTW_ERR_TRANSLATION; - - error: --- -2.27.0 - diff --git a/SOURCES/kvm-hw-arm-smmu-common-Manage-IOTLB-block-entries.patch b/SOURCES/kvm-hw-arm-smmu-common-Manage-IOTLB-block-entries.patch deleted file mode 100644 index e118225..0000000 --- a/SOURCES/kvm-hw-arm-smmu-common-Manage-IOTLB-block-entries.patch +++ /dev/null @@ -1,274 +0,0 @@ -From 4770f43dab482e4585d3555933a473cf24e796db Mon Sep 17 00:00:00 2001 -From: eperezma -Date: Tue, 12 Jan 2021 14:36:30 -0500 -Subject: [PATCH 06/17] hw/arm/smmu-common: Manage IOTLB block entries -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: eperezma -Message-id: <20210112143638.374060-6-eperezma@redhat.com> -Patchwork-id: 100598 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 05/13] hw/arm/smmu-common: Manage IOTLB block entries -Bugzilla: 1843852 -RH-Acked-by: Xiao Wang -RH-Acked-by: Peter Xu -RH-Acked-by: Auger Eric - -From: Eric Auger - -At the moment each entry in the IOTLB corresponds to a page sized -mapping (4K, 16K or 64K), even if the page belongs to a mapped -block. In case of block mapping this unefficiently consumes IOTLB -entries. - -Change the value of the entry so that it reflects the actual -mapping it belongs to (block or page start address and size). - -Also the level/tg of the entry is encoded in the key. In subsequent -patches we will enable range invalidation. This latter is able -to provide the level/tg of the entry. - -Encoding the level/tg directly in the key will allow to invalidate -using g_hash_table_remove() when num_pages equals to 1. - -Signed-off-by: Eric Auger -Reviewed-by: Peter Maydell -Message-id: 20200728150815.11446-6-eric.auger@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit 9e54dee71fcfaae69f87b8e1f51485a832266a39) -Signed-off-by: Eugenio Pérez -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/smmu-common.c | 67 ++++++++++++++++++++++++++---------- - hw/arm/smmu-internal.h | 7 ++++ - hw/arm/smmuv3.c | 6 ++-- - hw/arm/trace-events | 2 +- - include/hw/arm/smmu-common.h | 10 ++++-- - 5 files changed, 67 insertions(+), 25 deletions(-) - -diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c -index 06e9e38b007..8007edeaaa2 100644 ---- a/hw/arm/smmu-common.c -+++ b/hw/arm/smmu-common.c -@@ -39,7 +39,7 @@ static guint smmu_iotlb_key_hash(gconstpointer v) - - /* Jenkins hash */ - a = b = c = JHASH_INITVAL + sizeof(*key); -- a += key->asid; -+ a += key->asid + key->level + key->tg; - b += extract64(key->iova, 0, 32); - c += extract64(key->iova, 32, 32); - -@@ -51,24 +51,41 @@ static guint smmu_iotlb_key_hash(gconstpointer v) - - static gboolean smmu_iotlb_key_equal(gconstpointer v1, gconstpointer v2) - { -- const SMMUIOTLBKey *k1 = v1; -- const SMMUIOTLBKey *k2 = v2; -+ SMMUIOTLBKey *k1 = (SMMUIOTLBKey *)v1, *k2 = (SMMUIOTLBKey *)v2; - -- return (k1->asid == k2->asid) && (k1->iova == k2->iova); -+ return (k1->asid == k2->asid) && (k1->iova == k2->iova) && -+ (k1->level == k2->level) && (k1->tg == k2->tg); - } - --SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint64_t iova) -+SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint64_t iova, -+ uint8_t tg, uint8_t level) - { -- SMMUIOTLBKey key = {.asid = asid, .iova = iova}; -+ SMMUIOTLBKey key = {.asid = asid, .iova = iova, .tg = tg, .level = level}; - - return key; - } - - SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, -- hwaddr iova) -+ SMMUTransTableInfo *tt, hwaddr iova) - { -- SMMUIOTLBKey key = smmu_get_iotlb_key(cfg->asid, iova); -- SMMUTLBEntry *entry = g_hash_table_lookup(bs->iotlb, &key); -+ uint8_t tg = (tt->granule_sz - 10) / 2; -+ uint8_t inputsize = 64 - tt->tsz; -+ uint8_t stride = tt->granule_sz - 3; -+ uint8_t level = 4 - (inputsize - 4) / stride; -+ SMMUTLBEntry *entry = NULL; -+ -+ while (level <= 3) { -+ uint64_t subpage_size = 1ULL << level_shift(level, tt->granule_sz); -+ uint64_t mask = subpage_size - 1; -+ SMMUIOTLBKey key; -+ -+ key = smmu_get_iotlb_key(cfg->asid, iova & ~mask, tg, level); -+ entry = g_hash_table_lookup(bs->iotlb, &key); -+ if (entry) { -+ break; -+ } -+ level++; -+ } - - if (entry) { - cfg->iotlb_hits++; -@@ -89,13 +106,14 @@ SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, - void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *new) - { - SMMUIOTLBKey *key = g_new0(SMMUIOTLBKey, 1); -+ uint8_t tg = (new->granule - 10) / 2; - - if (g_hash_table_size(bs->iotlb) >= SMMU_IOTLB_MAX_SIZE) { - smmu_iotlb_inv_all(bs); - } - -- *key = smmu_get_iotlb_key(cfg->asid, new->entry.iova); -- trace_smmu_iotlb_insert(cfg->asid, new->entry.iova); -+ *key = smmu_get_iotlb_key(cfg->asid, new->entry.iova, tg, new->level); -+ trace_smmu_iotlb_insert(cfg->asid, new->entry.iova, tg, new->level); - g_hash_table_insert(bs->iotlb, key, new); - } - -@@ -114,12 +132,26 @@ static gboolean smmu_hash_remove_by_asid(gpointer key, gpointer value, - return SMMU_IOTLB_ASID(*iotlb_key) == asid; - } - --inline void smmu_iotlb_inv_iova(SMMUState *s, uint16_t asid, dma_addr_t iova) -+static gboolean smmu_hash_remove_by_asid_iova(gpointer key, gpointer value, -+ gpointer user_data) - { -- SMMUIOTLBKey key = smmu_get_iotlb_key(asid, iova); -+ SMMUTLBEntry *iter = (SMMUTLBEntry *)value; -+ IOMMUTLBEntry *entry = &iter->entry; -+ SMMUIOTLBPageInvInfo *info = (SMMUIOTLBPageInvInfo *)user_data; -+ SMMUIOTLBKey iotlb_key = *(SMMUIOTLBKey *)key; -+ -+ if (info->asid >= 0 && info->asid != SMMU_IOTLB_ASID(iotlb_key)) { -+ return false; -+ } -+ return (info->iova & ~entry->addr_mask) == entry->iova; -+} -+ -+inline void smmu_iotlb_inv_iova(SMMUState *s, int asid, dma_addr_t iova) -+{ -+ SMMUIOTLBPageInvInfo info = {.asid = asid, .iova = iova}; - - trace_smmu_iotlb_inv_iova(asid, iova); -- g_hash_table_remove(s->iotlb, &key); -+ g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid_iova, &info); - } - - inline void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid) -@@ -247,9 +279,6 @@ static int smmu_ptw_64(SMMUTransCfg *cfg, - baseaddr = extract64(tt->ttb, 0, 48); - baseaddr &= ~indexmask; - -- tlbe->entry.iova = iova; -- tlbe->entry.addr_mask = (1 << granule_sz) - 1; -- - while (level <= 3) { - uint64_t subpage_size = 1ULL << level_shift(level, granule_sz); - uint64_t mask = subpage_size - 1; -@@ -299,7 +328,9 @@ static int smmu_ptw_64(SMMUTransCfg *cfg, - goto error; - } - -- tlbe->entry.translated_addr = gpa + (iova & mask); -+ tlbe->entry.translated_addr = gpa; -+ tlbe->entry.iova = iova & ~mask; -+ tlbe->entry.addr_mask = mask; - tlbe->entry.perm = PTE_AP_TO_PERM(ap); - tlbe->level = level; - tlbe->granule = granule_sz; -diff --git a/hw/arm/smmu-internal.h b/hw/arm/smmu-internal.h -index 3104f768cd2..55147f29be4 100644 ---- a/hw/arm/smmu-internal.h -+++ b/hw/arm/smmu-internal.h -@@ -97,4 +97,11 @@ uint64_t iova_level_offset(uint64_t iova, int inputsize, - } - - #define SMMU_IOTLB_ASID(key) ((key).asid) -+ -+typedef struct SMMUIOTLBPageInvInfo { -+ int asid; -+ uint64_t iova; -+ uint64_t mask; -+} SMMUIOTLBPageInvInfo; -+ - #endif -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index ad8212779d3..067c9480a03 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -662,7 +662,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, - page_mask = (1ULL << (tt->granule_sz)) - 1; - aligned_addr = addr & ~page_mask; - -- cached_entry = smmu_iotlb_lookup(bs, cfg, aligned_addr); -+ cached_entry = smmu_iotlb_lookup(bs, cfg, tt, aligned_addr); - if (cached_entry) { - if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) { - status = SMMU_TRANS_ERROR; -@@ -732,7 +732,7 @@ epilogue: - case SMMU_TRANS_SUCCESS: - entry.perm = flag; - entry.translated_addr = cached_entry->entry.translated_addr + -- (addr & page_mask); -+ (addr & cached_entry->entry.addr_mask); - entry.addr_mask = cached_entry->entry.addr_mask; - trace_smmuv3_translate_success(mr->parent_obj.name, sid, addr, - entry.translated_addr, entry.perm); -@@ -960,7 +960,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) - - trace_smmuv3_cmdq_tlbi_nh_vaa(vmid, addr); - smmuv3_inv_notifiers_iova(bs, -1, addr); -- smmu_iotlb_inv_all(bs); -+ smmu_iotlb_inv_iova(bs, -1, addr); - break; - } - case SMMU_CMD_TLBI_NH_VA: -diff --git a/hw/arm/trace-events b/hw/arm/trace-events -index b808a1bfc19..f74d3e920f1 100644 ---- a/hw/arm/trace-events -+++ b/hw/arm/trace-events -@@ -16,7 +16,7 @@ smmu_iotlb_inv_iova(uint16_t asid, uint64_t addr) "IOTLB invalidate asid=%d addr - smmu_inv_notifiers_mr(const char *name) "iommu mr=%s" - smmu_iotlb_lookup_hit(uint16_t asid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" - smmu_iotlb_lookup_miss(uint16_t asid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" --smmu_iotlb_insert(uint16_t asid, uint64_t addr) "IOTLB ++ asid=%d addr=0x%"PRIx64 -+smmu_iotlb_insert(uint16_t asid, uint64_t addr, uint8_t tg, uint8_t level) "IOTLB ++ asid=%d addr=0x%"PRIx64" tg=%d level=%d" - - # smmuv3.c - smmuv3_read_mmio(uint64_t addr, uint64_t val, unsigned size, uint32_t r) "addr: 0x%"PRIx64" val:0x%"PRIx64" size: 0x%x(%d)" -diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h -index 277923bdc0a..bbf3abc41fd 100644 ---- a/include/hw/arm/smmu-common.h -+++ b/include/hw/arm/smmu-common.h -@@ -97,6 +97,8 @@ typedef struct SMMUPciBus { - typedef struct SMMUIOTLBKey { - uint64_t iova; - uint16_t asid; -+ uint8_t tg; -+ uint8_t level; - } SMMUIOTLBKey; - - typedef struct SMMUState { -@@ -159,12 +161,14 @@ IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid); - - #define SMMU_IOTLB_MAX_SIZE 256 - --SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, hwaddr iova); -+SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, -+ SMMUTransTableInfo *tt, hwaddr iova); - void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *entry); --SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint64_t iova); -+SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint64_t iova, -+ uint8_t tg, uint8_t level); - void smmu_iotlb_inv_all(SMMUState *s); - void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid); --void smmu_iotlb_inv_iova(SMMUState *s, uint16_t asid, dma_addr_t iova); -+void smmu_iotlb_inv_iova(SMMUState *s, int asid, dma_addr_t iova); - - /* Unmap the range of all the notifiers registered to any IOMMU mr */ - void smmu_inv_notifiers_all(SMMUState *s); --- -2.27.0 - diff --git a/SOURCES/kvm-hw-arm-smmuv3-Fix-potential-integer-overflow-CID-143.patch b/SOURCES/kvm-hw-arm-smmuv3-Fix-potential-integer-overflow-CID-143.patch deleted file mode 100644 index 79e75d8..0000000 --- a/SOURCES/kvm-hw-arm-smmuv3-Fix-potential-integer-overflow-CID-143.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 69d71311d3d70282dec3d1f19f9e4b90c7b7c6b9 Mon Sep 17 00:00:00 2001 -From: eperezma -Date: Tue, 12 Jan 2021 14:36:33 -0500 -Subject: [PATCH 09/17] hw/arm/smmuv3: Fix potential integer overflow (CID - 1432363) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: eperezma -Message-id: <20210112143638.374060-9-eperezma@redhat.com> -Patchwork-id: 100601 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 08/13] hw/arm/smmuv3: Fix potential integer overflow (CID 1432363) -Bugzilla: 1843852 -RH-Acked-by: Xiao Wang -RH-Acked-by: Peter Xu -RH-Acked-by: Auger Eric - -From: Philippe Mathieu-Daudé - -Use the BIT_ULL() macro to ensure we use 64-bit arithmetic. -This fixes the following Coverity issue (OVERFLOW_BEFORE_WIDEN): - - CID 1432363 (#1 of 1): Unintentional integer overflow: - - overflow_before_widen: - Potentially overflowing expression 1 << scale with type int - (32 bits, signed) is evaluated using 32-bit arithmetic, and - then used in a context that expects an expression of type - hwaddr (64 bits, unsigned). - -Signed-off-by: Philippe Mathieu-Daudé -Acked-by: Eric Auger -Message-id: 20201030144617.1535064-1-philmd@redhat.com -Reviewed-by: Peter Maydell -Signed-off-by: Peter Maydell -(cherry picked from commit 744a790ec01a30033309e6a2155df4d61061e184) -Signed-off-by: Eugenio Pérez -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/smmuv3.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index f4d5d9d8222..a418fab2aa6 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -17,6 +17,7 @@ - */ - - #include "qemu/osdep.h" -+#include "qemu/bitops.h" - #include "hw/irq.h" - #include "hw/sysbus.h" - #include "migration/vmstate.h" -@@ -847,7 +848,7 @@ static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd) - scale = CMD_SCALE(cmd); - num = CMD_NUM(cmd); - ttl = CMD_TTL(cmd); -- num_pages = (num + 1) * (1 << (scale)); -+ num_pages = (num + 1) * BIT_ULL(scale); - } - - if (type == SMMU_CMD_TLBI_NH_VA) { --- -2.27.0 - diff --git a/SOURCES/kvm-hw-arm-smmuv3-Get-prepared-for-range-invalidation.patch b/SOURCES/kvm-hw-arm-smmuv3-Get-prepared-for-range-invalidation.patch deleted file mode 100644 index fd52e0c..0000000 --- a/SOURCES/kvm-hw-arm-smmuv3-Get-prepared-for-range-invalidation.patch +++ /dev/null @@ -1,255 +0,0 @@ -From 3f027ac56449e51a61e76c18b97fd341d302dc80 Mon Sep 17 00:00:00 2001 -From: eperezma -Date: Tue, 12 Jan 2021 14:36:32 -0500 -Subject: [PATCH 08/17] hw/arm/smmuv3: Get prepared for range invalidation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: eperezma -Message-id: <20210112143638.374060-8-eperezma@redhat.com> -Patchwork-id: 100600 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 07/13] hw/arm/smmuv3: Get prepared for range invalidation -Bugzilla: 1843852 -RH-Acked-by: Xiao Wang -RH-Acked-by: Peter Xu -RH-Acked-by: Auger Eric - -From: Eric Auger - -Enhance the smmu_iotlb_inv_iova() helper with range invalidation. -This uses the new fields passed in the NH_VA and NH_VAA commands: -the size of the range, the level and the granule. - -As NH_VA and NH_VAA both use those fields, their decoding and -handling is factorized in a new smmuv3_s1_range_inval() helper. - -Signed-off-by: Eric Auger -Reviewed-by: Peter Maydell -Message-id: 20200728150815.11446-8-eric.auger@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit d52915616c059ed273caa2d496b58e5d215c5962) -Signed-off-by: Eugenio Pérez -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/smmu-common.c | 25 +++++++++++--- - hw/arm/smmuv3-internal.h | 4 +++ - hw/arm/smmuv3.c | 64 +++++++++++++++++++++++------------- - hw/arm/trace-events | 4 +-- - include/hw/arm/smmu-common.h | 3 +- - 5 files changed, 69 insertions(+), 31 deletions(-) - -diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c -index 8007edeaaa2..9780404f002 100644 ---- a/hw/arm/smmu-common.c -+++ b/hw/arm/smmu-common.c -@@ -143,15 +143,30 @@ static gboolean smmu_hash_remove_by_asid_iova(gpointer key, gpointer value, - if (info->asid >= 0 && info->asid != SMMU_IOTLB_ASID(iotlb_key)) { - return false; - } -- return (info->iova & ~entry->addr_mask) == entry->iova; -+ return ((info->iova & ~entry->addr_mask) == entry->iova) || -+ ((entry->iova & ~info->mask) == info->iova); - } - --inline void smmu_iotlb_inv_iova(SMMUState *s, int asid, dma_addr_t iova) -+inline void -+smmu_iotlb_inv_iova(SMMUState *s, int asid, dma_addr_t iova, -+ uint8_t tg, uint64_t num_pages, uint8_t ttl) - { -- SMMUIOTLBPageInvInfo info = {.asid = asid, .iova = iova}; -+ if (ttl && (num_pages == 1)) { -+ SMMUIOTLBKey key = smmu_get_iotlb_key(asid, iova, tg, ttl); - -- trace_smmu_iotlb_inv_iova(asid, iova); -- g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid_iova, &info); -+ g_hash_table_remove(s->iotlb, &key); -+ } else { -+ /* if tg is not set we use 4KB range invalidation */ -+ uint8_t granule = tg ? tg * 2 + 10 : 12; -+ -+ SMMUIOTLBPageInvInfo info = { -+ .asid = asid, .iova = iova, -+ .mask = (num_pages * 1 << granule) - 1}; -+ -+ g_hash_table_foreach_remove(s->iotlb, -+ smmu_hash_remove_by_asid_iova, -+ &info); -+ } - } - - inline void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid) -diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h -index d190181ef1b..a4ec2c591cd 100644 ---- a/hw/arm/smmuv3-internal.h -+++ b/hw/arm/smmuv3-internal.h -@@ -298,6 +298,8 @@ enum { /* Command completion notification */ - }; - - #define CMD_TYPE(x) extract32((x)->word[0], 0 , 8) -+#define CMD_NUM(x) extract32((x)->word[0], 12 , 5) -+#define CMD_SCALE(x) extract32((x)->word[0], 20 , 5) - #define CMD_SSEC(x) extract32((x)->word[0], 10, 1) - #define CMD_SSV(x) extract32((x)->word[0], 11, 1) - #define CMD_RESUME_AC(x) extract32((x)->word[0], 12, 1) -@@ -310,6 +312,8 @@ enum { /* Command completion notification */ - #define CMD_RESUME_STAG(x) extract32((x)->word[2], 0 , 16) - #define CMD_RESP(x) extract32((x)->word[2], 11, 2) - #define CMD_LEAF(x) extract32((x)->word[2], 0 , 1) -+#define CMD_TTL(x) extract32((x)->word[2], 8 , 2) -+#define CMD_TG(x) extract32((x)->word[2], 10, 2) - #define CMD_STE_RANGE(x) extract32((x)->word[2], 0 , 5) - #define CMD_ADDR(x) ({ \ - uint64_t high = (uint64_t)(x)->word[3]; \ -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index ae2b769f891..f4d5d9d8222 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -773,42 +773,49 @@ epilogue: - * @n: notifier to be called - * @asid: address space ID or negative value if we don't care - * @iova: iova -+ * @tg: translation granule (if communicated through range invalidation) -+ * @num_pages: number of @granule sized pages (if tg != 0), otherwise 1 - */ - static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, - IOMMUNotifier *n, -- int asid, -- dma_addr_t iova) -+ int asid, dma_addr_t iova, -+ uint8_t tg, uint64_t num_pages) - { - SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu); -- SMMUEventInfo event = {.inval_ste_allowed = true}; -- SMMUTransTableInfo *tt; -- SMMUTransCfg *cfg; - IOMMUTLBEntry entry; -+ uint8_t granule = tg; - -- cfg = smmuv3_get_config(sdev, &event); -- if (!cfg) { -- return; -- } -+ if (!tg) { -+ SMMUEventInfo event = {.inval_ste_allowed = true}; -+ SMMUTransCfg *cfg = smmuv3_get_config(sdev, &event); -+ SMMUTransTableInfo *tt; - -- if (asid >= 0 && cfg->asid != asid) { -- return; -- } -+ if (!cfg) { -+ return; -+ } - -- tt = select_tt(cfg, iova); -- if (!tt) { -- return; -+ if (asid >= 0 && cfg->asid != asid) { -+ return; -+ } -+ -+ tt = select_tt(cfg, iova); -+ if (!tt) { -+ return; -+ } -+ granule = tt->granule_sz; - } - - entry.target_as = &address_space_memory; - entry.iova = iova; -- entry.addr_mask = (1 << tt->granule_sz) - 1; -+ entry.addr_mask = num_pages * (1 << granule) - 1; - entry.perm = IOMMU_NONE; - - memory_region_notify_one(n, &entry); - } - --/* invalidate an asid/iova tuple in all mr's */ --static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova) -+/* invalidate an asid/iova range tuple in all mr's */ -+static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova, -+ uint8_t tg, uint64_t num_pages) - { - SMMUDevice *sdev; - -@@ -816,28 +823,39 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova) - IOMMUMemoryRegion *mr = &sdev->iommu; - IOMMUNotifier *n; - -- trace_smmuv3_inv_notifiers_iova(mr->parent_obj.name, asid, iova); -+ trace_smmuv3_inv_notifiers_iova(mr->parent_obj.name, asid, iova, -+ tg, num_pages); - - IOMMU_NOTIFIER_FOREACH(n, mr) { -- smmuv3_notify_iova(mr, n, asid, iova); -+ smmuv3_notify_iova(mr, n, asid, iova, tg, num_pages); - } - } - } - - static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd) - { -+ uint8_t scale = 0, num = 0, ttl = 0; - dma_addr_t addr = CMD_ADDR(cmd); - uint8_t type = CMD_TYPE(cmd); - uint16_t vmid = CMD_VMID(cmd); - bool leaf = CMD_LEAF(cmd); -+ uint8_t tg = CMD_TG(cmd); -+ hwaddr num_pages = 1; - int asid = -1; - -+ if (tg) { -+ scale = CMD_SCALE(cmd); -+ num = CMD_NUM(cmd); -+ ttl = CMD_TTL(cmd); -+ num_pages = (num + 1) * (1 << (scale)); -+ } -+ - if (type == SMMU_CMD_TLBI_NH_VA) { - asid = CMD_ASID(cmd); - } -- trace_smmuv3_s1_range_inval(vmid, asid, addr, leaf); -- smmuv3_inv_notifiers_iova(s, asid, addr); -- smmu_iotlb_inv_iova(s, asid, addr); -+ trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, num_pages, ttl, leaf); -+ smmuv3_inv_notifiers_iova(s, asid, addr, tg, num_pages); -+ smmu_iotlb_inv_iova(s, asid, addr, tg, num_pages, ttl); - } - - static int smmuv3_cmdq_consume(SMMUv3State *s) -diff --git a/hw/arm/trace-events b/hw/arm/trace-events -index c219fe9e828..3d905e0f7d0 100644 ---- a/hw/arm/trace-events -+++ b/hw/arm/trace-events -@@ -45,11 +45,11 @@ smmuv3_cmdq_cfgi_ste_range(int start, int end) "start=0x%d - end=0x%d" - smmuv3_cmdq_cfgi_cd(uint32_t sid) "streamid = %d" - smmuv3_config_cache_hit(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache HIT for sid %d (hits=%d, misses=%d, hit rate=%d)" - smmuv3_config_cache_miss(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache MISS for sid %d (hits=%d, misses=%d, hit rate=%d)" --smmuv3_s1_range_inval(int vmid, int asid, uint64_t addr, bool leaf) "vmid =%d asid =%d addr=0x%"PRIx64" leaf=%d" -+smmuv3_s1_range_inval(int vmid, int asid, uint64_t addr, uint8_t tg, uint64_t num_pages, uint8_t ttl, bool leaf) "vmid =%d asid =%d addr=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" ttl=%d leaf=%d" - smmuv3_cmdq_tlbi_nh(void) "" - smmuv3_cmdq_tlbi_nh_asid(uint16_t asid) "asid=%d" - smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid %d" - smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu mr=%s" - smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s" --smmuv3_inv_notifiers_iova(const char *name, uint16_t asid, uint64_t iova) "iommu mr=%s asid=%d iova=0x%"PRIx64 -+smmuv3_inv_notifiers_iova(const char *name, uint16_t asid, uint64_t iova, uint8_t tg, uint64_t num_pages) "iommu mr=%s asid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64 - -diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h -index bbf3abc41fd..13489a1ac0d 100644 ---- a/include/hw/arm/smmu-common.h -+++ b/include/hw/arm/smmu-common.h -@@ -168,7 +168,8 @@ SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint64_t iova, - uint8_t tg, uint8_t level); - void smmu_iotlb_inv_all(SMMUState *s); - void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid); --void smmu_iotlb_inv_iova(SMMUState *s, int asid, dma_addr_t iova); -+void smmu_iotlb_inv_iova(SMMUState *s, int asid, dma_addr_t iova, -+ uint8_t tg, uint64_t num_pages, uint8_t ttl); - - /* Unmap the range of all the notifiers registered to any IOMMU mr */ - void smmu_inv_notifiers_all(SMMUState *s); --- -2.27.0 - diff --git a/SOURCES/kvm-hw-arm-smmuv3-Introduce-smmuv3_s1_range_inval-helper.patch b/SOURCES/kvm-hw-arm-smmuv3-Introduce-smmuv3_s1_range_inval-helper.patch deleted file mode 100644 index e77c403..0000000 --- a/SOURCES/kvm-hw-arm-smmuv3-Introduce-smmuv3_s1_range_inval-helper.patch +++ /dev/null @@ -1,115 +0,0 @@ -From c4ae2dbb8ee406f0a015b35fb76b3d6d131900d6 Mon Sep 17 00:00:00 2001 -From: eperezma -Date: Tue, 12 Jan 2021 14:36:31 -0500 -Subject: [PATCH 07/17] hw/arm/smmuv3: Introduce smmuv3_s1_range_inval() helper -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: eperezma -Message-id: <20210112143638.374060-7-eperezma@redhat.com> -Patchwork-id: 100599 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 06/13] hw/arm/smmuv3: Introduce smmuv3_s1_range_inval() helper -Bugzilla: 1843852 -RH-Acked-by: Xiao Wang -RH-Acked-by: Peter Xu -RH-Acked-by: Auger Eric - -From: Eric Auger - -Let's introduce an helper for S1 IOVA range invalidation. -This will be used for NH_VA and NH_VAA commands. It decodes -the same fields, trace, calls the UNMAP notifiers and -invalidate the corresponding IOTLB entries. - -At the moment, we do not support 3.2 range invalidation yet. -So it reduces to a single IOVA invalidation. - -Note the leaf bit now is also decoded for the CMD_TLBI_NH_VAA -command. At the moment it is only used for tracing. - -Signed-off-by: Eric Auger -Reviewed-by: Peter Maydell -Message-id: 20200728150815.11446-7-eric.auger@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit c0f9ef70377cfcbd0fa6559d5dc729a930d71b7c) -Signed-off-by: Eugenio Pérez -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/smmuv3.c | 36 +++++++++++++++++------------------- - hw/arm/trace-events | 3 +-- - 2 files changed, 18 insertions(+), 21 deletions(-) - -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index 067c9480a03..ae2b769f891 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -824,6 +824,22 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova) - } - } - -+static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd) -+{ -+ dma_addr_t addr = CMD_ADDR(cmd); -+ uint8_t type = CMD_TYPE(cmd); -+ uint16_t vmid = CMD_VMID(cmd); -+ bool leaf = CMD_LEAF(cmd); -+ int asid = -1; -+ -+ if (type == SMMU_CMD_TLBI_NH_VA) { -+ asid = CMD_ASID(cmd); -+ } -+ trace_smmuv3_s1_range_inval(vmid, asid, addr, leaf); -+ smmuv3_inv_notifiers_iova(s, asid, addr); -+ smmu_iotlb_inv_iova(s, asid, addr); -+} -+ - static int smmuv3_cmdq_consume(SMMUv3State *s) - { - SMMUState *bs = ARM_SMMU(s); -@@ -954,27 +970,9 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) - smmu_iotlb_inv_all(bs); - break; - case SMMU_CMD_TLBI_NH_VAA: -- { -- dma_addr_t addr = CMD_ADDR(&cmd); -- uint16_t vmid = CMD_VMID(&cmd); -- -- trace_smmuv3_cmdq_tlbi_nh_vaa(vmid, addr); -- smmuv3_inv_notifiers_iova(bs, -1, addr); -- smmu_iotlb_inv_iova(bs, -1, addr); -- break; -- } - case SMMU_CMD_TLBI_NH_VA: -- { -- uint16_t asid = CMD_ASID(&cmd); -- uint16_t vmid = CMD_VMID(&cmd); -- dma_addr_t addr = CMD_ADDR(&cmd); -- bool leaf = CMD_LEAF(&cmd); -- -- trace_smmuv3_cmdq_tlbi_nh_va(vmid, asid, addr, leaf); -- smmuv3_inv_notifiers_iova(bs, asid, addr); -- smmu_iotlb_inv_iova(bs, asid, addr); -+ smmuv3_s1_range_inval(bs, &cmd); - break; -- } - case SMMU_CMD_TLBI_EL3_ALL: - case SMMU_CMD_TLBI_EL3_VA: - case SMMU_CMD_TLBI_EL2_ALL: -diff --git a/hw/arm/trace-events b/hw/arm/trace-events -index f74d3e920f1..c219fe9e828 100644 ---- a/hw/arm/trace-events -+++ b/hw/arm/trace-events -@@ -45,8 +45,7 @@ smmuv3_cmdq_cfgi_ste_range(int start, int end) "start=0x%d - end=0x%d" - smmuv3_cmdq_cfgi_cd(uint32_t sid) "streamid = %d" - smmuv3_config_cache_hit(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache HIT for sid %d (hits=%d, misses=%d, hit rate=%d)" - smmuv3_config_cache_miss(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache MISS for sid %d (hits=%d, misses=%d, hit rate=%d)" --smmuv3_cmdq_tlbi_nh_va(int vmid, int asid, uint64_t addr, bool leaf) "vmid =%d asid =%d addr=0x%"PRIx64" leaf=%d" --smmuv3_cmdq_tlbi_nh_vaa(int vmid, uint64_t addr) "vmid =%d addr=0x%"PRIx64 -+smmuv3_s1_range_inval(int vmid, int asid, uint64_t addr, bool leaf) "vmid =%d asid =%d addr=0x%"PRIx64" leaf=%d" - smmuv3_cmdq_tlbi_nh(void) "" - smmuv3_cmdq_tlbi_nh_asid(uint16_t asid) "asid=%d" - smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid %d" --- -2.27.0 - diff --git a/SOURCES/kvm-hw-arm-virt-Add-8.6-machine-type.patch b/SOURCES/kvm-hw-arm-virt-Add-8.6-machine-type.patch new file mode 100644 index 0000000..f3c5492 --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Add-8.6-machine-type.patch @@ -0,0 +1,57 @@ +From a154eb35d738aecf552d57d99499facce1c834ba Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 20 Dec 2021 15:24:24 +0100 +Subject: [PATCH 4/6] hw/arm/virt: Add 8.6 machine type + +RH-Author: Eric Auger +RH-MergeRequest: 95: hw/arm/virt: Add virt-rhel8.6.0 machine type +RH-Commit: [4/5] d0df3e796d3e9a6ca2af1e3b33fc6021bcac5d09 +RH-Bugzilla: 2031039 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Gavin Shan + +branch: rhel-8.6.0 +Brew: 42212069 +Upstream: no + +Add 8.6 machine type. + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 6a4173b6c3..c9c17b9d45 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3228,17 +3228,23 @@ static void rhel_machine_init(void) + } + type_init(rhel_machine_init); + ++static void rhel860_virt_options(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(8, 6, 0) ++ + static void rhel850_virt_options(MachineClass *mc) + { + VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); + +- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ rhel860_virt_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); + mc->smp_props.prefer_sockets = true; + vmc->no_cpu_topology = true; + vmc->no_tcg_its = true; + } +-DEFINE_RHEL_MACHINE_AS_LATEST(8, 5, 0) ++DEFINE_RHEL_MACHINE(8, 5, 0) + + static void rhel840_virt_options(MachineClass *mc) + { +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch b/SOURCES/kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch new file mode 100644 index 0000000..679f436 --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch @@ -0,0 +1,86 @@ +From 1b4a8daf695a81f18ba70bea91b199da215da4e1 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 5 Jan 2022 16:17:10 +0100 +Subject: [PATCH 5/6] hw/arm/virt: Check no_tcg_its and minor style changes + +RH-Author: Eric Auger +RH-MergeRequest: 95: hw/arm/virt: Add virt-rhel8.6.0 machine type +RH-Commit: [5/5] 57e77446ff5a1a7efe152b2c907c0a0ca5487ab7 +RH-Bugzilla: 2031039 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Gavin Shan + +branch: rhel-8.6.0 +Brew: 42212069 +Upstream: no + +Truly allow TCG ITS instantiation according to the no_tcg_its +class flag. Otherwise it is always set to false. + +We also take benefit of this patch to do some minor non +functional style changes to be closer to the upstream code. + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index c9c17b9d45..dbf0a6d62f 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3157,6 +3157,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "in ACPI table header." + "The string may be up to 6 bytes in size"); + ++ + object_class_property_add_str(oc, "x-oem-table-id", + virt_get_oem_table_id, + virt_set_oem_table_id); +@@ -3164,6 +3165,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Override the default value of field OEM Table ID " + "in ACPI table header." + "The string may be up to 8 bytes in size"); ++ + } + + static void rhel_virt_instance_init(Object *obj) +@@ -3188,24 +3190,32 @@ static void rhel_virt_instance_init(Object *obj) + } else { + /* Default allows ITS instantiation */ + vms->its = true; ++ ++ if (vmc->no_tcg_its) { ++ vms->tcg_its = false; ++ } else { ++ vms->tcg_its = true; ++ } + } + + /* Default disallows iommu instantiation */ + vms->iommu = VIRT_IOMMU_NONE; + ++ /* The default root bus is attached to iommu by default */ ++ vms->default_bus_bypass_iommu = false; ++ + /* Default disallows RAS instantiation and is non-configurable for RHEL */ + vms->ras = false; + + /* MTE is disabled by default and non-configurable for RHEL */ + vms->mte = false; + +- vms->default_bus_bypass_iommu = false; + vms->irqmap = a15irqmap; + + virt_flash_create(vms); ++ + vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); + vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); +- + } + + static const TypeInfo rhel_machine_info = { +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch b/SOURCES/kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch new file mode 100644 index 0000000..734756d --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch @@ -0,0 +1,78 @@ +From 8d5b57798d079307a98f6be5e1f6d28d1937a2fe Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 20 Dec 2021 15:50:44 +0100 +Subject: [PATCH 1/6] hw/arm/virt: Register "iommu" as a class property + +RH-Author: Eric Auger +RH-MergeRequest: 95: hw/arm/virt: Add virt-rhel8.6.0 machine type +RH-Commit: [1/5] 74b01bb90213493db700d5bdf81dd99892571972 +RH-Bugzilla: 2031039 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Gavin Shan + +branch: rhel-8.6.0 +Brew: 42212069 +Upstream: no + +Register the "iommu" option as a class property. This mirrors what +was done in upstream commit b91def7b ("arm/virt: Register +most properties as class properties"). + +While we are at it we also move the "x-oem-id" and "x-oem-table-id" +registrations at the very end of the rhel_machine_class_init() +function. This makes our life easier when comparing with upstream. + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 20 ++++++++++++-------- + 1 file changed, 12 insertions(+), 8 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e8941afd01..684ffce52e 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3131,6 +3131,18 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Set GIC version. " + "Valid values are 2, 3, host and max"); + ++ object_class_property_add_str(oc, "iommu", virt_get_iommu, virt_set_iommu); ++ object_class_property_set_description(oc, "iommu", ++ "Set the IOMMU type. " ++ "Valid values are none and smmuv3"); ++ ++ object_class_property_add_bool(oc, "default_bus_bypass_iommu", ++ virt_get_default_bus_bypass_iommu, ++ virt_set_default_bus_bypass_iommu); ++ object_class_property_set_description(oc, "default_bus_bypass_iommu", ++ "Set on/off to enable/disable " ++ "bypass_iommu for default root bus"); ++ + object_class_property_add_str(oc, "x-oem-id", + virt_get_oem_id, + virt_set_oem_id); +@@ -3146,10 +3158,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Override the default value of field OEM Table ID " + "in ACPI table header." + "The string may be up to 8 bytes in size"); +- object_class_property_add_bool(oc, "default_bus_bypass_iommu", +- virt_get_default_bus_bypass_iommu, +- virt_set_default_bus_bypass_iommu); +- + } + + static void rhel_virt_instance_init(Object *obj) +@@ -3183,10 +3191,6 @@ static void rhel_virt_instance_init(Object *obj) + + /* Default disallows iommu instantiation */ + vms->iommu = VIRT_IOMMU_NONE; +- object_property_add_str(obj, "iommu", virt_get_iommu, virt_set_iommu); +- object_property_set_description(obj, "iommu", +- "Set the IOMMU type. " +- "Valid values are none and smmuv3"); + + /* Default disallows RAS instantiation and is non-configurable for RHEL */ + vms->ras = false; +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-arm-virt-Register-its-as-a-class-property.patch b/SOURCES/kvm-hw-arm-virt-Register-its-as-a-class-property.patch new file mode 100644 index 0000000..91b353a --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Register-its-as-a-class-property.patch @@ -0,0 +1,57 @@ +From 07e2094cd86c1be349c0bdda69acd1857afacb66 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 20 Dec 2021 16:04:59 +0100 +Subject: [PATCH 2/6] hw/arm/virt: Register "its" as a class property + +RH-Author: Eric Auger +RH-MergeRequest: 95: hw/arm/virt: Add virt-rhel8.6.0 machine type +RH-Commit: [2/5] 4ddfa57495578127770f93689c4d9f111a12b91c +RH-Bugzilla: 2031039 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Gavin Shan + +branch: rhel-8.6.0 +Brew: 42212069 +Upstream: no + +Register "its" as a class property. This mirrors what was done +in commit 27edeeaafe43 ("virt: Register "its" as class property"). + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 684ffce52e..d679391eb0 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3143,6 +3143,12 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Set on/off to enable/disable " + "bypass_iommu for default root bus"); + ++ object_class_property_add_bool(oc, "its", virt_get_its, ++ virt_set_its); ++ object_class_property_set_description(oc, "its", ++ "Set on/off to enable/disable " ++ "ITS instantiation"); ++ + object_class_property_add_str(oc, "x-oem-id", + virt_get_oem_id, + virt_set_oem_id); +@@ -3182,11 +3188,6 @@ static void rhel_virt_instance_init(Object *obj) + } else { + /* Default allows ITS instantiation */ + vms->its = true; +- object_property_add_bool(obj, "its", virt_get_its, +- virt_set_its); +- object_property_set_description(obj, "its", +- "Set on/off to enable/disable " +- "ITS instantiation"); + } + + /* Default disallows iommu instantiation */ +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch b/SOURCES/kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch new file mode 100644 index 0000000..25e20ea --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch @@ -0,0 +1,46 @@ +From e896ba2bfbb613576ec3fbe5b948a326ac06193d Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 20 Dec 2021 15:58:38 +0100 +Subject: [PATCH 3/6] hw/arm/virt: Rename default_bus_bypass_iommu + +RH-Author: Eric Auger +RH-MergeRequest: 95: hw/arm/virt: Add virt-rhel8.6.0 machine type +RH-Commit: [3/5] 3ed0425391dab7cf14c6e66fc1b2430be1152d6c +RH-Bugzilla: 2031039 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Gavin Shan + +branch: rhel-8.6.0 +Brew: 42212069 +Upstream: no + +Rename "default_bus_bypass_iommu" into "default-bus-bypass-iommu". +This mirrors what was done in upstream commit: +9dad363a223 ("hw/arm/virt: Rename default_bus_bypass_iommu") + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index d679391eb0..6a4173b6c3 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3136,10 +3136,10 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Set the IOMMU type. " + "Valid values are none and smmuv3"); + +- object_class_property_add_bool(oc, "default_bus_bypass_iommu", ++ object_class_property_add_bool(oc, "default-bus-bypass-iommu", + virt_get_default_bus_bypass_iommu, + virt_set_default_bus_bypass_iommu); +- object_class_property_set_description(oc, "default_bus_bypass_iommu", ++ object_class_property_set_description(oc, "default-bus-bypass-iommu", + "Set on/off to enable/disable " + "bypass_iommu for default root bus"); + +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-ehci-check-return-value-of-usb_packet_map.patch b/SOURCES/kvm-hw-ehci-check-return-value-of-usb_packet_map.patch deleted file mode 100644 index 3e3ed87..0000000 --- a/SOURCES/kvm-hw-ehci-check-return-value-of-usb_packet_map.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 6955223aa15ab6ea53322218ec03fb3dc2b776f8 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Thu, 14 Jan 2021 00:07:05 -0500 -Subject: [PATCH 16/17] hw: ehci: check return value of 'usb_packet_map' - -RH-Author: Jon Maloy -Message-id: <20210114000705.945169-2-jmaloy@redhat.com> -Patchwork-id: 100634 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] hw: ehci: check return value of 'usb_packet_map' -Bugzilla: 1898628 -RH-Acked-by: Gerd Hoffmann -RH-Acked-by: Laurent Vivier -RH-Acked-by: Thomas Huth - -From: Li Qiang - -If 'usb_packet_map' fails, we should stop to process the usb -request. - -Signed-off-by: Li Qiang -Message-Id: <20200812161727.29412-1-liq3ea@163.com> -Signed-off-by: Gerd Hoffmann - -(cherry picked from commit 2fdb42d840400d58f2e706ecca82c142b97bcbd6) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - hw/usb/hcd-ehci.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c -index 56ab2f457f4..024b1ed6b67 100644 ---- a/hw/usb/hcd-ehci.c -+++ b/hw/usb/hcd-ehci.c -@@ -1374,7 +1374,10 @@ static int ehci_execute(EHCIPacket *p, const char *action) - spd = (p->pid == USB_TOKEN_IN && NLPTR_TBIT(p->qtd.altnext) == 0); - usb_packet_setup(&p->packet, p->pid, ep, 0, p->qtdaddr, spd, - (p->qtd.token & QTD_TOKEN_IOC) != 0); -- usb_packet_map(&p->packet, &p->sgl); -+ if (usb_packet_map(&p->packet, &p->sgl)) { -+ qemu_sglist_destroy(&p->sgl); -+ return -1; -+ } - p->async = EHCI_ASYNC_INITIALIZED; - } - -@@ -1453,7 +1456,10 @@ static int ehci_process_itd(EHCIState *ehci, - if (ep && ep->type == USB_ENDPOINT_XFER_ISOC) { - usb_packet_setup(&ehci->ipacket, pid, ep, 0, addr, false, - (itd->transact[i] & ITD_XACT_IOC) != 0); -- usb_packet_map(&ehci->ipacket, &ehci->isgl); -+ if (usb_packet_map(&ehci->ipacket, &ehci->isgl)) { -+ qemu_sglist_destroy(&ehci->isgl); -+ return -1; -+ } - usb_handle_packet(dev, &ehci->ipacket); - usb_packet_unmap(&ehci->ipacket, &ehci->isgl); - } else { --- -2.27.0 - diff --git a/SOURCES/kvm-hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch b/SOURCES/kvm-hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch deleted file mode 100644 index 650555c..0000000 --- a/SOURCES/kvm-hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch +++ /dev/null @@ -1,80 +0,0 @@ -From dad4f9beaa3fd1eec1e0dd46c3d5cd2f444c0f48 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 13 Apr 2021 20:05:51 -0400 -Subject: [PATCH 1/7] hw/intc/arm_gic: Fix interrupt ID in GICD_SGIR register -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20210413200551.3825495-2-jmaloy@redhat.com> -Patchwork-id: 101471 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 1/1] hw/intc/arm_gic: Fix interrupt ID in GICD_SGIR register -Bugzilla: 1925430 -RH-Acked-by: Andrew Jones -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Philippe Mathieu-Daudé - -From: Philippe Mathieu-Daudé - -Per the ARM Generic Interrupt Controller Architecture specification -(document "ARM IHI 0048B.b (ID072613)"), the SGIINTID field is 4 bit, -not 10: - - - 4.3 Distributor register descriptions - - 4.3.15 Software Generated Interrupt Register, GICD_SG - - - Table 4-21 GICD_SGIR bit assignments - - The Interrupt ID of the SGI to forward to the specified CPU - interfaces. The value of this field is the Interrupt ID, in - the range 0-15, for example a value of 0b0011 specifies - Interrupt ID 3. - -Correct the irq mask to fix an undefined behavior (which eventually -lead to a heap-buffer-overflow, see [Buglink]): - - $ echo 'writel 0x8000f00 0xff4affb0' | qemu-system-aarch64 -M virt,accel=qtest -qtest stdio - [I 1612088147.116987] OPENED - [R +0.278293] writel 0x8000f00 0xff4affb0 - ../hw/intc/arm_gic.c:1498:13: runtime error: index 944 out of bounds for type 'uint8_t [16][8]' - SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior ../hw/intc/arm_gic.c:1498:13 - -This fixes a security issue when running with KVM on Arm with -kernel-irqchip=off. (The default is kernel-irqchip=on, which is -unaffected, and which is also the correct choice for performance.) - -Cc: qemu-stable@nongnu.org -Fixes: CVE-2021-20221 -Fixes: 9ee6e8bb853 ("ARMv7 support.") -Buglink: https://bugs.launchpad.net/qemu/+bug/1913916 -Buglink: https://bugs.launchpad.net/qemu/+bug/1913917 -Reported-by: Alexander Bulekov -Signed-off-by: Philippe Mathieu-Daudé -Message-id: 20210131103401.217160-1-f4bug@amsat.org -Reviewed-by: Peter Maydell -Signed-off-by: Peter Maydell - -(cherry picked from commit edfe2eb4360cde4ed5d95bda7777edcb3510f76a) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - hw/intc/arm_gic.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c -index 1d7da7baa2..df355f4d11 100644 ---- a/hw/intc/arm_gic.c -+++ b/hw/intc/arm_gic.c -@@ -1455,7 +1455,7 @@ static void gic_dist_writel(void *opaque, hwaddr offset, - int target_cpu; - - cpu = gic_get_current_cpu(s); -- irq = value & 0x3ff; -+ irq = value & 0xf; - switch ((value >> 24) & 3) { - case 0: - mask = (value >> 16) & ALL_CPU_MASK; --- -2.27.0 - diff --git a/SOURCES/kvm-hw-net-e1000e-advance-desc_offset-in-case-of-null-de.patch b/SOURCES/kvm-hw-net-e1000e-advance-desc_offset-in-case-of-null-de.patch deleted file mode 100644 index cf9f6ab..0000000 --- a/SOURCES/kvm-hw-net-e1000e-advance-desc_offset-in-case-of-null-de.patch +++ /dev/null @@ -1,62 +0,0 @@ -From d48034cc2b331313995c1d19060decc0e5ca1356 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Thu, 14 Jan 2021 01:35:41 -0500 -Subject: [PATCH 17/17] hw/net/e1000e: advance desc_offset in case of null - descriptor -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20210114013541.956735-2-jmaloy@redhat.com> -Patchwork-id: 100638 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] hw/net/e1000e: advance desc_offset in case of null descriptor -Bugzilla: 1903070 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Thomas Huth - -From: Prasad J Pandit - -While receiving packets via e1000e_write_packet_to_guest() routine, -'desc_offset' is advanced only when RX descriptor is processed. And -RX descriptor is not processed if it has NULL buffer address. -This may lead to an infinite loop condition. Increament 'desc_offset' -to process next descriptor in the ring to avoid infinite loop. - -Reported-by: Cheol-woo Myung <330cjfdn@gmail.com> -Signed-off-by: Prasad J Pandit -Signed-off-by: Jason Wang - -(cherry picked from c2cb511634012344e3d0fe49a037a33b12d8a98a) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/e1000e_core.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c -index 9b76f82db5b..166054f2e3f 100644 ---- a/hw/net/e1000e_core.c -+++ b/hw/net/e1000e_core.c -@@ -1596,13 +1596,13 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, - (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); - } - } -- desc_offset += desc_size; -- if (desc_offset >= total_size) { -- is_last = true; -- } - } else { /* as per intel docs; skip descriptors with null buf addr */ - trace_e1000e_rx_null_descriptor(); - } -+ desc_offset += desc_size; -+ if (desc_offset >= total_size) { -+ is_last = true; -+ } - - e1000e_write_rx_descr(core, desc, is_last ? core->rx_pkt : NULL, - rss_info, do_ps ? ps_hdr_len : 0, &bastate.written); --- -2.27.0 - diff --git a/SOURCES/kvm-hw-net-net_tx_pkt-fix-assertion-failure-in-net_tx_pk.patch b/SOURCES/kvm-hw-net-net_tx_pkt-fix-assertion-failure-in-net_tx_pk.patch deleted file mode 100644 index 228bdff..0000000 --- a/SOURCES/kvm-hw-net-net_tx_pkt-fix-assertion-failure-in-net_tx_pk.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 94ca0eddc117b57da009dacb19740fc8ae00143a Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Mon, 28 Sep 2020 18:27:35 -0400 -Subject: [PATCH] hw/net/net_tx_pkt: fix assertion failure in - net_tx_pkt_add_raw_fragment() - -RH-Author: Jon Maloy -Message-id: <20200928182735.1008839-2-jmaloy@redhat.com> -Patchwork-id: 98497 -O-Subject: [RHEL-8.0.0 qemu-kvm PATCH 1/1] hw/net/net_tx_pkt: fix assertion failure in net_tx_pkt_add_raw_fragment() -Bugzilla: 1860994 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Xiao Wang -RH-Acked-by: Thomas Huth -RH-Acked-by: Stefan Hajnoczi - -From: Mauro Matteo Cascella - -An assertion failure issue was found in the code that processes network packets -while adding data fragments into the packet context. It could be abused by a -malicious guest to abort the QEMU process on the host. This patch replaces the -affected assert() with a conditional statement, returning false if the current -data fragment exceeds max_raw_frags. - -Reported-by: Alexander Bulekov -Reported-by: Ziming Zhang -Reviewed-by: Dmitry Fleytman -Signed-off-by: Mauro Matteo Cascella -Signed-off-by: Jason Wang - -(cherry picked from commit 035e69b063835a5fd23cacabd63690a3d84532a8) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/net_tx_pkt.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c -index 162f802dd77..54d4c3bbd02 100644 ---- a/hw/net/net_tx_pkt.c -+++ b/hw/net/net_tx_pkt.c -@@ -379,7 +379,10 @@ bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, - hwaddr mapped_len = 0; - struct iovec *ventry; - assert(pkt); -- assert(pkt->max_raw_frags > pkt->raw_frags); -+ -+ if (pkt->raw_frags >= pkt->max_raw_frags) { -+ return false; -+ } - - if (!len) { - return true; --- -2.27.0 - diff --git a/SOURCES/kvm-hw-pci-pcie-Forbid-hot-plug-if-it-s-disabled-on-the-.patch b/SOURCES/kvm-hw-pci-pcie-Forbid-hot-plug-if-it-s-disabled-on-the-.patch deleted file mode 100644 index 2f4f6dd..0000000 --- a/SOURCES/kvm-hw-pci-pcie-Forbid-hot-plug-if-it-s-disabled-on-the-.patch +++ /dev/null @@ -1,77 +0,0 @@ -From fe8a9f211fba3588d60507b3d2f48c41d8ee3c79 Mon Sep 17 00:00:00 2001 -From: Julia Suvorova -Date: Mon, 4 May 2020 21:25:04 +0100 -Subject: [PATCH 1/9] hw/pci/pcie: Forbid hot-plug if it's disabled on the slot - -RH-Author: Julia Suvorova -Message-id: <20200504212505.15977-2-jusual@redhat.com> -Patchwork-id: 96257 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/2] hw/pci/pcie: Forbid hot-plug if it's disabled on the slot -Bugzilla: 1820531 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Igor Mammedov -RH-Acked-by: Peter Xu - -Raise an error when trying to hot-plug/unplug a device through QMP to a device -with disabled hot-plug capability. This makes the device behaviour more -consistent and provides an explanation of the failure in the case of -asynchronous unplug. - -Signed-off-by: Julia Suvorova -Message-Id: <20200427182440.92433-2-jusual@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Marcel Apfelbaum -(cherry picked from commit 0501e1aa1d32a6e02dd06a79bba97fbe9d557cb5) -Signed-off-by: Danilo C. L. de Paula ---- - hw/pci/pcie.c | 19 +++++++++++++++++++ - 1 file changed, 19 insertions(+) - -diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c -index 0eb3a2a..6b48d04 100644 ---- a/hw/pci/pcie.c -+++ b/hw/pci/pcie.c -@@ -415,6 +415,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, - { - PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev); - uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; -+ uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); - PCIDevice *pci_dev = PCI_DEVICE(dev); - - /* Don't send event when device is enabled during qemu machine creation: -@@ -430,6 +431,13 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, - return; - } - -+ /* Check if hot-plug is disabled on the slot */ -+ if ((sltcap & PCI_EXP_SLTCAP_HPC) == 0) { -+ error_setg(errp, "Hot-plug failed: unsupported by the port device '%s'", -+ DEVICE(hotplug_pdev)->id); -+ return; -+ } -+ - /* To enable multifunction hot-plug, we just ensure the function - * 0 added last. When function 0 is added, we set the sltsta and - * inform OS via event notification. -@@ -470,6 +478,17 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, - Error *local_err = NULL; - PCIDevice *pci_dev = PCI_DEVICE(dev); - PCIBus *bus = pci_get_bus(pci_dev); -+ PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev); -+ uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; -+ uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); -+ -+ /* Check if hot-unplug is disabled on the slot */ -+ if ((sltcap & PCI_EXP_SLTCAP_HPC) == 0) { -+ error_setg(errp, "Hot-unplug failed: " -+ "unsupported by the port device '%s'", -+ DEVICE(hotplug_pdev)->id); -+ return; -+ } - - pcie_cap_slot_plug_common(PCI_DEVICE(hotplug_dev), dev, &local_err); - if (local_err) { --- -1.8.3.1 - diff --git a/SOURCES/kvm-hw-pci-pcie-Move-hot-plug-capability-check-to-pre_pl.patch b/SOURCES/kvm-hw-pci-pcie-Move-hot-plug-capability-check-to-pre_pl.patch deleted file mode 100644 index 0c44c77..0000000 --- a/SOURCES/kvm-hw-pci-pcie-Move-hot-plug-capability-check-to-pre_pl.patch +++ /dev/null @@ -1,90 +0,0 @@ -From 035f8aaabf2c31cd6206bff6da23a12fee69d1b7 Mon Sep 17 00:00:00 2001 -From: Julia Suvorova -Date: Tue, 16 Jun 2020 12:25:36 -0400 -Subject: [PATCH 1/3] hw/pci/pcie: Move hot plug capability check to pre_plug - callback - -RH-Author: Julia Suvorova -Message-id: <20200616122536.1027685-1-jusual@redhat.com> -Patchwork-id: 97548 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/1] hw/pci/pcie: Move hot plug capability check to pre_plug callback -Bugzilla: 1820531 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Auger Eric -RH-Acked-by: Sergio Lopez Pascual - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1820531 -BRANCH: rhel-av-8.2.1 -UPSTREAM: merged -BREW: 29422092 - -Check for hot plug capability earlier to avoid removing devices attached -during the initialization process. - -Run qemu with an unattached drive: - -drive file=$FILE,if=none,id=drive0 \ - -device pcie-root-port,id=rp0,slot=3,bus=pcie.0,hotplug=off -Hotplug a block device: - device_add virtio-blk-pci,id=blk0,drive=drive0,bus=rp0 -If hotplug fails on plug_cb, drive0 will be deleted. - -Fixes: 0501e1aa1d32a6 ("hw/pci/pcie: Forbid hot-plug if it's disabled on the slot") - -Acked-by: Igor Mammedov -Signed-off-by: Julia Suvorova -Message-Id: <20200604125947.881210-1-jusual@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 0dabc0f6544f2c0310546f6d6cf3b68979580a9c) -Signed-off-by: Eduardo Lima (Etrunko) ---- - hw/pci/pcie.c | 19 +++++++++++-------- - 1 file changed, 11 insertions(+), 8 deletions(-) - -diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c -index abc99b6eff..1386dd228c 100644 ---- a/hw/pci/pcie.c -+++ b/hw/pci/pcie.c -@@ -407,6 +407,17 @@ static void pcie_cap_slot_plug_common(PCIDevice *hotplug_dev, DeviceState *dev, - void pcie_cap_slot_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, - Error **errp) - { -+ PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev); -+ uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; -+ uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); -+ -+ /* Check if hot-plug is disabled on the slot */ -+ if (dev->hotplugged && (sltcap & PCI_EXP_SLTCAP_HPC) == 0) { -+ error_setg(errp, "Hot-plug failed: unsupported by the port device '%s'", -+ DEVICE(hotplug_pdev)->id); -+ return; -+ } -+ - pcie_cap_slot_plug_common(PCI_DEVICE(hotplug_dev), dev, errp); - } - -@@ -415,7 +426,6 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, - { - PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev); - uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; -- uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); - PCIDevice *pci_dev = PCI_DEVICE(dev); - - /* Don't send event when device is enabled during qemu machine creation: -@@ -431,13 +441,6 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, - return; - } - -- /* Check if hot-plug is disabled on the slot */ -- if ((sltcap & PCI_EXP_SLTCAP_HPC) == 0) { -- error_setg(errp, "Hot-plug failed: unsupported by the port device '%s'", -- DEVICE(hotplug_pdev)->id); -- return; -- } -- - /* To enable multifunction hot-plug, we just ensure the function - * 0 added last. When function 0 is added, we set the sltsta and - * inform OS via event notification. --- -2.27.0 - diff --git a/SOURCES/kvm-hw-pci-pcie-Replace-PCI_DEVICE-casts-with-existing-v.patch b/SOURCES/kvm-hw-pci-pcie-Replace-PCI_DEVICE-casts-with-existing-v.patch deleted file mode 100644 index 51a587f..0000000 --- a/SOURCES/kvm-hw-pci-pcie-Replace-PCI_DEVICE-casts-with-existing-v.patch +++ /dev/null @@ -1,62 +0,0 @@ -From f98a1fdad0aa53337925ac46b73a3e6ad36f6295 Mon Sep 17 00:00:00 2001 -From: Julia Suvorova -Date: Mon, 4 May 2020 21:25:05 +0100 -Subject: [PATCH 2/9] hw/pci/pcie: Replace PCI_DEVICE() casts with existing - variable - -RH-Author: Julia Suvorova -Message-id: <20200504212505.15977-3-jusual@redhat.com> -Patchwork-id: 96259 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 2/2] hw/pci/pcie: Replace PCI_DEVICE() casts with existing variable -Bugzilla: 1820531 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Igor Mammedov -RH-Acked-by: Peter Xu - -A little cleanup is possible because of hotplug_pdev introduction. - -Signed-off-by: Julia Suvorova -Message-Id: <20200427182440.92433-3-jusual@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Marcel Apfelbaum -(cherry picked from commit 6a1e073378353eb6ac0565e0dc649b3db76ed5dc) -Signed-off-by: Danilo C. L. de Paula ---- - hw/pci/pcie.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c -index 6b48d04..abc99b6 100644 ---- a/hw/pci/pcie.c -+++ b/hw/pci/pcie.c -@@ -449,7 +449,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, - pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, - PCI_EXP_LNKSTA_DLLLA); - } -- pcie_cap_slot_event(PCI_DEVICE(hotplug_dev), -+ pcie_cap_slot_event(hotplug_pdev, - PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP); - } - } -@@ -490,7 +490,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, - return; - } - -- pcie_cap_slot_plug_common(PCI_DEVICE(hotplug_dev), dev, &local_err); -+ pcie_cap_slot_plug_common(hotplug_pdev, dev, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; -@@ -509,7 +509,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, - return; - } - -- pcie_cap_slot_push_attention_button(PCI_DEVICE(hotplug_dev)); -+ pcie_cap_slot_push_attention_button(hotplug_pdev); - } - - /* pci express slot for pci express root/downstream port --- -1.8.3.1 - diff --git a/SOURCES/kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch b/SOURCES/kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch deleted file mode 100644 index 0f0f126..0000000 --- a/SOURCES/kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch +++ /dev/null @@ -1,262 +0,0 @@ -From e6c3fbfc82863180007569cf2a9132c28a47bf1f Mon Sep 17 00:00:00 2001 -From: "Daniel P. Berrange" -Date: Mon, 20 Jan 2020 16:13:08 +0000 -Subject: [PATCH 01/18] hw/smbios: set new default SMBIOS fields for Windows - driver support -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Daniel P. Berrange -Message-id: <20200120161308.584989-2-berrange@redhat.com> -Patchwork-id: 93422 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] hw/smbios: set new default SMBIOS fields for Windows driver support -Bugzilla: 1782529 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Igor Mammedov -RH-Acked-by: Laszlo Ersek - -For Windows driver support, we have to follow this doc in order to -enable Windows to automatically determine the right drivers to install -for a given guest / host combination: - - https://docs.microsoft.com/en-us/windows-hardware/drivers/install/specifying-hardware-ids-for-a-computer - -Out of the choices available, it was decided that the Windows drivers -will be written to expect use of the scheme documented as "HardwareID-6" -against Windows 10. This uses SMBIOS System (Type 1) and Base Board -(Type 2) tables and will match on - - System Manufacturer = Red Hat - System SKU Number = 8.2.0 - Baseboard Manufacturer = Red Hat - Baseboard Product = RHEL-AV - -The new SMBIOS fields will be tied to machine type and only reported for -pc-q35-8.2.0 machine and later. - -The old SMBIOS fields, previously reported by all machines were: - - System Manufacturer: Red Hat - System Product Name: KVM - System Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) - System Family: Red Hat Enterprise Linux - Baseboard Manufacturer: Red Hat - Baseboard Product Name: KVM - Baseboard Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) - Chassis Manufacturer: Red Hat - Chassis Product Name: KVM - Chassis Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) - Processor Manufacturer: Red Hat - Processor Product Name: KVM - Processor Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) - -This information will continue to be reported for all machines, except -where it conflicts with the requirement of the new SMBIOS data. IOW, -the "Baseboard Product Name" will change to "RHEL-AV" for pc-q35-8.2.0 -machine types and later. - -Management applications MUST NEVER override the 4 new SMBIOS fields that -are used for Windows driver matching, with differing values. Aside from -this, they are free to override any other field, including those from -the old SMBIOS field data. - -In particular if a management application wants to report its own -product name and version, it is recommended to use "System product" -and "System version" as identifying fields, as these avoid a clash with -the new SMBIOS fields used for Windows drivers. - -Note that until now the Baseboard (type 2) table has only been generated -by QEMU if explicitly asked for on the CLI. This patch makes it always -present for new machine types. - -Signed-off-by: Daniel P. Berrangé -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/virt.c | 2 +- - hw/i386/pc_piix.c | 2 ++ - hw/i386/pc_q35.c | 8 ++++++++ - hw/smbios/smbios.c | 45 +++++++++++++++++++++++++++++++++++++++++--- - include/hw/firmware/smbios.h | 5 ++++- - include/hw/i386/pc.h | 3 +++ - 6 files changed, 60 insertions(+), 5 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index d30d38c..2dcf6e7 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1423,7 +1423,7 @@ static void virt_build_smbios(VirtMachineState *vms) - - smbios_set_defaults("QEMU", product, - vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, -- true, SMBIOS_ENTRY_POINT_30); -+ true, NULL, NULL, SMBIOS_ENTRY_POINT_30); - - smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len, - &smbios_anchor, &smbios_anchor_len); -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index bd7fdb9..2ac94d5 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -177,6 +177,8 @@ static void pc_init1(MachineState *machine, - smbios_set_defaults("Red Hat", "KVM", - mc->desc, pcmc->smbios_legacy_mode, - pcmc->smbios_uuid_encoded, -+ pcmc->smbios_stream_product, -+ pcmc->smbios_stream_version, - SMBIOS_ENTRY_POINT_21); - } - -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 7531d8e..e975643 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -200,6 +200,8 @@ static void pc_q35_init(MachineState *machine) - smbios_set_defaults("Red Hat", "KVM", - mc->desc, pcmc->smbios_legacy_mode, - pcmc->smbios_uuid_encoded, -+ pcmc->smbios_stream_product, -+ pcmc->smbios_stream_version, - SMBIOS_ENTRY_POINT_21); - } - -@@ -565,8 +567,11 @@ static void pc_q35_init_rhel820(MachineState *machine) - - static void pc_q35_machine_rhel820_options(MachineClass *m) - { -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pc_q35_machine_rhel_options(m); - m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; -+ pcmc->smbios_stream_product = "RHEL-AV"; -+ pcmc->smbios_stream_version = "8.2.0"; - } - - DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820, -@@ -579,9 +584,12 @@ static void pc_q35_init_rhel810(MachineState *machine) - - static void pc_q35_machine_rhel810_options(MachineClass *m) - { -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pc_q35_machine_rhel820_options(m); - m->desc = "RHEL-8.1.0 PC (Q35 + ICH9, 2009)"; - m->alias = NULL; -+ pcmc->smbios_stream_product = NULL; -+ pcmc->smbios_stream_version = NULL; - compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); - compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); - } -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index e6e9355..d65c149 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -57,6 +57,9 @@ static bool smbios_legacy = true; - static bool smbios_uuid_encoded = true; - /* end: legacy structures & constants for <= 2.0 machines */ - -+/* Set to true for modern Windows 10 HardwareID-6 compat */ -+static bool smbios_type2_required; -+ - - uint8_t *smbios_tables; - size_t smbios_tables_len; -@@ -532,7 +535,7 @@ static void smbios_build_type_1_table(void) - - static void smbios_build_type_2_table(void) - { -- SMBIOS_BUILD_TABLE_PRE(2, 0x200, false); /* optional */ -+ SMBIOS_BUILD_TABLE_PRE(2, 0x200, smbios_type2_required); - - SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); - SMBIOS_TABLE_SET_STR(2, product_str, type2.product); -@@ -753,7 +756,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) - - void smbios_set_defaults(const char *manufacturer, const char *product, - const char *version, bool legacy_mode, -- bool uuid_encoded, SmbiosEntryPointType ep_type) -+ bool uuid_encoded, -+ const char *stream_product, -+ const char *stream_version, -+ SmbiosEntryPointType ep_type) - { - smbios_have_defaults = true; - smbios_legacy = legacy_mode; -@@ -774,12 +780,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, - g_free(smbios_entries); - } - -+ /* -+ * If @stream_product & @stream_version are non-NULL, then -+ * we're following rules for new Windows driver support. -+ * The data we have to report is defined in this doc: -+ * -+ * https://docs.microsoft.com/en-us/windows-hardware/drivers/install/specifying-hardware-ids-for-a-computer -+ * -+ * The Windows drivers are written to expect use of the -+ * scheme documented as "HardwareID-6" against Windows 10, -+ * which uses SMBIOS System (Type 1) and Base Board (Type 2) -+ * tables and will match on -+ * -+ * System Manufacturer = Red Hat (@manufacturer) -+ * System SKU Number = 8.2.0 (@stream_version) -+ * Baseboard Manufacturer = Red Hat (@manufacturer) -+ * Baseboard Product = RHEL-AV (@stream_product) -+ * -+ * NB, SKU must be changed with each RHEL-AV release -+ * -+ * Other fields can be freely used by applications using -+ * QEMU. For example apps can use the "System product" -+ * and "System version" to identify themselves. -+ * -+ * We get 'System Manufacturer' and 'Baseboard Manufacturer' -+ */ - SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); - SMBIOS_SET_DEFAULT(type1.product, product); - SMBIOS_SET_DEFAULT(type1.version, version); - SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); -+ if (stream_version != NULL) { -+ SMBIOS_SET_DEFAULT(type1.sku, stream_version); -+ } - SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); -- SMBIOS_SET_DEFAULT(type2.product, product); -+ if (stream_product != NULL) { -+ SMBIOS_SET_DEFAULT(type2.product, stream_product); -+ smbios_type2_required = true; -+ } else { -+ SMBIOS_SET_DEFAULT(type2.product, product); -+ } - SMBIOS_SET_DEFAULT(type2.version, version); - SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); - SMBIOS_SET_DEFAULT(type3.version, version); -diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index 02a0ced..67e38a1 100644 ---- a/include/hw/firmware/smbios.h -+++ b/include/hw/firmware/smbios.h -@@ -267,7 +267,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); - void smbios_set_cpuid(uint32_t version, uint32_t features); - void smbios_set_defaults(const char *manufacturer, const char *product, - const char *version, bool legacy_mode, -- bool uuid_encoded, SmbiosEntryPointType ep_type); -+ bool uuid_encoded, -+ const char *stream_product, -+ const char *stream_version, -+ SmbiosEntryPointType ep_type); - uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length); - void smbios_get_tables(MachineState *ms, - const struct smbios_phys_mem_area *mem_array, -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 2e362c8..b9f29ba 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -109,6 +109,9 @@ typedef struct PCMachineClass { - bool smbios_defaults; - bool smbios_legacy_mode; - bool smbios_uuid_encoded; -+ /* New fields needed for Windows HardwareID-6 matching */ -+ const char *smbios_stream_product; -+ const char *smbios_stream_version; - - /* RAM / address space compat: */ - bool gigabyte_align; --- -1.8.3.1 - diff --git a/SOURCES/kvm-i386-Add-2nd-Generation-AMD-EPYC-processors.patch b/SOURCES/kvm-i386-Add-2nd-Generation-AMD-EPYC-processors.patch deleted file mode 100644 index b2cc438..0000000 --- a/SOURCES/kvm-i386-Add-2nd-Generation-AMD-EPYC-processors.patch +++ /dev/null @@ -1,199 +0,0 @@ -From 1bee5a77b3f999d2933a440021737d0720b32269 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Wed, 29 Jul 2020 18:56:21 -0400 -Subject: [PATCH 1/4] i386: Add 2nd Generation AMD EPYC processors - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200729185621.152427-2-dgilbert@redhat.com> -Patchwork-id: 98078 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 1/1] i386: Add 2nd Generation AMD EPYC processors -Bugzilla: 1780385 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Maxim Levitsky - -From: "Moger, Babu" - -Adds the support for 2nd Gen AMD EPYC Processors. The model display -name will be EPYC-Rome. - -Adds the following new feature bits on top of the feature bits from the -first generation EPYC models. -perfctr-core : core performance counter extensions support. Enables the VM to - use extended performance counter support. It enables six - programmable counters instead of four counters. -clzero : instruction zeroes out the 64 byte cache line specified in RAX. -xsaveerptr : XSAVE, XSAVE, FXSAVEOPT, XSAVEC, XSAVES always save error - pointers and FXRSTOR, XRSTOR, XRSTORS always restore error - pointers. -wbnoinvd : Write back and do not invalidate cache -ibpb : Indirect Branch Prediction Barrier -amd-stibp : Single Thread Indirect Branch Predictor -clwb : Cache Line Write Back and Retain -xsaves : XSAVES, XRSTORS and IA32_XSS support -rdpid : Read Processor ID instruction support -umip : User-Mode Instruction Prevention support - -The Reference documents are available at -https://developer.amd.com/wp-content/resources/55803_0.54-PUB.pdf -https://www.amd.com/system/files/TechDocs/24594.pdf - -Depends on following kernel commits: -40bc47b08b6e ("kvm: x86: Enumerate support for CLZERO instruction") -504ce1954fba ("KVM: x86: Expose XSAVEERPTR to the guest") -6d61e3c32248 ("kvm: x86: Expose RDPID in KVM_GET_SUPPORTED_CPUID") -52297436199d ("kvm: svm: Update svm_xsaves_supported") - -Signed-off-by: Babu Moger -Message-Id: <157314966312.23828.17684821666338093910.stgit@naples-babu.amd.com> -Signed-off-by: Eduardo Habkost -(cherry picked from commit 143c30d4d346831a09e59e9af45afdca0331e819) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 102 +++++++++++++++++++++++++++++++++++++++++++++- - target/i386/cpu.h | 2 + - 2 files changed, 103 insertions(+), 1 deletion(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index a343de0c9d..ff39fc9905 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1133,7 +1133,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - "clzero", NULL, "xsaveerptr", NULL, - NULL, NULL, NULL, NULL, - NULL, "wbnoinvd", NULL, NULL, -- "ibpb", NULL, NULL, NULL, -+ "ibpb", NULL, NULL, "amd-stibp", - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - "amd-ssbd", "virt-ssbd", "amd-no-ssb", NULL, -@@ -1803,6 +1803,56 @@ static CPUCaches epyc_cache_info = { - }, - }; - -+static CPUCaches epyc_rome_cache_info = { -+ .l1d_cache = &(CPUCacheInfo) { -+ .type = DATA_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l1i_cache = &(CPUCacheInfo) { -+ .type = INSTRUCTION_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l2_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 2, -+ .size = 512 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 1024, -+ .lines_per_tag = 1, -+ }, -+ .l3_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 3, -+ .size = 16 * MiB, -+ .line_size = 64, -+ .associativity = 16, -+ .partitions = 1, -+ .sets = 16384, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .inclusive = true, -+ .complex_indexing = true, -+ }, -+}; -+ - /* The following VMX features are not supported by KVM and are left out in the - * CPU definitions: - * -@@ -4024,6 +4074,56 @@ static X86CPUDefinition builtin_x86_defs[] = { - .model_id = "Hygon Dhyana Processor", - .cache_info = &epyc_cache_info, - }, -+ { -+ .name = "EPYC-Rome", -+ .level = 0xd, -+ .vendor = CPUID_VENDOR_AMD, -+ .family = 23, -+ .model = 49, -+ .stepping = 0, -+ .features[FEAT_1_EDX] = -+ CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | -+ CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | -+ CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | -+ CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | -+ CPUID_VME | CPUID_FP87, -+ .features[FEAT_1_ECX] = -+ CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | -+ CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | -+ CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | -+ CPUID_EXT_CX16 | CPUID_EXT_FMA | CPUID_EXT_SSSE3 | -+ CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3, -+ .features[FEAT_8000_0001_EDX] = -+ CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | -+ CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | -+ CPUID_EXT2_SYSCALL, -+ .features[FEAT_8000_0001_ECX] = -+ CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | -+ CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | -+ CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | -+ CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, -+ .features[FEAT_8000_0008_EBX] = -+ CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | -+ CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | -+ CPUID_8000_0008_EBX_STIBP, -+ .features[FEAT_7_0_EBX] = -+ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | -+ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_RDSEED | -+ CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | -+ CPUID_7_0_EBX_SHA_NI | CPUID_7_0_EBX_CLWB, -+ .features[FEAT_7_0_ECX] = -+ CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_RDPID, -+ .features[FEAT_XSAVE] = -+ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | -+ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, -+ .features[FEAT_6_EAX] = -+ CPUID_6_EAX_ARAT, -+ .features[FEAT_SVM] = -+ CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE, -+ .xlevel = 0x8000001E, -+ .model_id = "AMD EPYC-Rome Processor", -+ .cache_info = &epyc_rome_cache_info, -+ }, - }; - - /* KVM-specific features that are automatically added/removed -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 7bfbf2a5e5..f3da25cb8a 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -792,6 +792,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; - #define CPUID_8000_0008_EBX_WBNOINVD (1U << 9) - /* Indirect Branch Prediction Barrier */ - #define CPUID_8000_0008_EBX_IBPB (1U << 12) -+/* Single Thread Indirect Branch Predictors */ -+#define CPUID_8000_0008_EBX_STIBP (1U << 15) - - #define CPUID_XSAVE_XSAVEOPT (1U << 0) - #define CPUID_XSAVE_XSAVEC (1U << 1) --- -2.27.0 - diff --git a/SOURCES/kvm-i386-Add-MSR-feature-bit-for-MDS-NO.patch b/SOURCES/kvm-i386-Add-MSR-feature-bit-for-MDS-NO.patch deleted file mode 100644 index 823ff0c..0000000 --- a/SOURCES/kvm-i386-Add-MSR-feature-bit-for-MDS-NO.patch +++ /dev/null @@ -1,46 +0,0 @@ -From cdafcc1d68110ed172c09c9e6bba42ee15b5a6df Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Fri, 15 May 2020 18:02:40 +0100 -Subject: [PATCH 13/17] i386: Add MSR feature bit for MDS-NO - -RH-Author: plai@redhat.com -Message-id: <20200515180243.17488-2-plai@redhat.com> -Patchwork-id: 96609 -O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 1/4] i386: Add MSR feature bit for MDS-NO -Bugzilla: 1769912 -RH-Acked-by: Igor Mammedov -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Dr. David Alan Gilbert - -From: Cathy Zhang - -Define MSR_ARCH_CAP_MDS_NO in the IA32_ARCH_CAPABILITIES MSR to allow -CPU models to report the feature when host supports it. - -Signed-off-by: Cathy Zhang -Reviewed-by: Xiaoyao Li -Reviewed-by: Tao Xu -Message-Id: <1571729728-23284-2-git-send-email-cathy.zhang@intel.com> -Signed-off-by: Eduardo Habkost -(cherry picked from commit 77b168d221191156c47fcd8d1c47329dfdb9439e) -Signed-off-by: Paul Lai -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 4441061..60304cc 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -839,6 +839,7 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; - #define MSR_ARCH_CAP_RSBA (1U << 2) - #define MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY (1U << 3) - #define MSR_ARCH_CAP_SSB_NO (1U << 4) -+#define MSR_ARCH_CAP_MDS_NO (1U << 5) - - #define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5) - --- -1.8.3.1 - diff --git a/SOURCES/kvm-i386-Add-macro-for-stibp.patch b/SOURCES/kvm-i386-Add-macro-for-stibp.patch deleted file mode 100644 index 17dd149..0000000 --- a/SOURCES/kvm-i386-Add-macro-for-stibp.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 00f916987589f114f42ce20b138c00c47b9e4df7 Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Fri, 15 May 2020 18:02:41 +0100 -Subject: [PATCH 14/17] i386: Add macro for stibp - -RH-Author: plai@redhat.com -Message-id: <20200515180243.17488-3-plai@redhat.com> -Patchwork-id: 96610 -O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 2/4] i386: Add macro for stibp -Bugzilla: 1769912 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Igor Mammedov -RH-Acked-by: Eduardo Habkost - -From: Cathy Zhang - -stibp feature is already added through the following commit. -https://github.com/qemu/qemu/commit/0e8916582991b9fd0b94850a8444b8b80d0a0955 - -Add a macro for it to allow CPU models to report it when host supports. - -Signed-off-by: Cathy Zhang -Reviewed-by: Xiaoyao Li -Reviewed-by: Tao Xu -Message-Id: <1571729728-23284-3-git-send-email-cathy.zhang@intel.com> -Signed-off-by: Eduardo Habkost -(cherry picked from commit 5af514d0cb314f43bc53f2aefb437f6451d64d0c) -Signed-off-by: Paul Lai -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.h | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 60304cc..e77d101 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -772,6 +772,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; - #define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) - /* Speculation Control */ - #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) -+/* Single Thread Indirect Branch Predictors */ -+#define CPUID_7_0_EDX_STIBP (1U << 27) - /* Arch Capabilities */ - #define CPUID_7_0_EDX_ARCH_CAPABILITIES (1U << 29) - /* Core Capability */ --- -1.8.3.1 - diff --git a/SOURCES/kvm-i386-Add-new-CPU-model-Cooperlake.patch b/SOURCES/kvm-i386-Add-new-CPU-model-Cooperlake.patch deleted file mode 100644 index 289d1e3..0000000 --- a/SOURCES/kvm-i386-Add-new-CPU-model-Cooperlake.patch +++ /dev/null @@ -1,108 +0,0 @@ -From cf62577aed781b2515ea97b9f42285c2f608a7bf Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Fri, 15 May 2020 18:02:42 +0100 -Subject: [PATCH 16/17] i386: Add new CPU model Cooperlake - -RH-Author: plai@redhat.com -Message-id: <20200515180243.17488-4-plai@redhat.com> -Patchwork-id: 96608 -O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 3/4] i386: Add new CPU model Cooperlake -Bugzilla: 1769912 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Igor Mammedov -RH-Acked-by: Eduardo Habkost - -From: Cathy Zhang - -Cooper Lake is intel's successor to Cascade Lake, the new -CPU model inherits features from Cascadelake-Server, while -add one platform associated new feature: AVX512_BF16. Meanwhile, -add STIBP for speculative execution. - -Signed-off-by: Cathy Zhang -Reviewed-by: Xiaoyao Li -Reviewed-by: Tao Xu -Message-Id: <1571729728-23284-4-git-send-email-cathy.zhang@intel.com> -Reviewed-by: Bruce Rogers -Signed-off-by: Eduardo Habkost -(cherry picked from commit 22a866b6166db5caa4abaa6e656c2a431fa60726) -Signed-off-by: Paul Lai -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 60 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 0f0a2db..996a74f 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -3161,6 +3161,66 @@ static X86CPUDefinition builtin_x86_defs[] = { - } - }, - { -+ .name = "Cooperlake", -+ .level = 0xd, -+ .vendor = CPUID_VENDOR_INTEL, -+ .family = 6, -+ .model = 85, -+ .stepping = 10, -+ .features[FEAT_1_EDX] = -+ CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | -+ CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | -+ CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | -+ CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | -+ CPUID_DE | CPUID_FP87, -+ .features[FEAT_1_ECX] = -+ CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES | -+ CPUID_EXT_POPCNT | CPUID_EXT_X2APIC | CPUID_EXT_SSE42 | -+ CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | -+ CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 | -+ CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE | -+ CPUID_EXT_PCID | CPUID_EXT_F16C | CPUID_EXT_RDRAND, -+ .features[FEAT_8000_0001_EDX] = -+ CPUID_EXT2_LM | CPUID_EXT2_PDPE1GB | CPUID_EXT2_RDTSCP | -+ CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, -+ .features[FEAT_8000_0001_ECX] = -+ CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, -+ .features[FEAT_7_0_EBX] = -+ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | -+ CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | -+ CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | -+ CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | -+ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | -+ CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | -+ CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | -+ CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT, -+ .features[FEAT_7_0_ECX] = -+ CPUID_7_0_ECX_PKU | -+ CPUID_7_0_ECX_AVX512VNNI, -+ .features[FEAT_7_0_EDX] = -+ CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_STIBP | -+ CPUID_7_0_EDX_SPEC_CTRL_SSBD | CPUID_7_0_EDX_ARCH_CAPABILITIES, -+ .features[FEAT_ARCH_CAPABILITIES] = -+ MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL | -+ MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO, -+ .features[FEAT_7_1_EAX] = -+ CPUID_7_1_EAX_AVX512_BF16, -+ /* -+ * Missing: XSAVES (not supported by some Linux versions, -+ * including v4.1 to v4.12). -+ * KVM doesn't yet expose any XSAVES state save component, -+ * and the only one defined in Skylake (processor tracing) -+ * probably will block migration anyway. -+ */ -+ .features[FEAT_XSAVE] = -+ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | -+ CPUID_XSAVE_XGETBV1, -+ .features[FEAT_6_EAX] = -+ CPUID_6_EAX_ARAT, -+ .xlevel = 0x80000008, -+ .model_id = "Intel Xeon Processor (Cooperlake)", -+ }, -+ { - .name = "Icelake-Client", - .level = 0xd, - .vendor = CPUID_VENDOR_INTEL, --- -1.8.3.1 - diff --git a/SOURCES/kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch b/SOURCES/kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch deleted file mode 100644 index 5c335f8..0000000 --- a/SOURCES/kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch +++ /dev/null @@ -1,213 +0,0 @@ -From 4daa8dca77edec191dfe0ae4a0a9fc70f8f63607 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Wed, 24 Feb 2021 11:30:37 -0500 -Subject: [PATCH 4/4] i386: Add the support for AMD EPYC 3rd generation - processors - -RH-Author: Dr. David Alan Gilbert -Message-id: <20210224113037.15599-5-dgilbert@redhat.com> -Patchwork-id: 101202 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 4/4] i386: Add the support for AMD EPYC 3rd generation processors -Bugzilla: 1790620 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Peter Xu - -From: Babu Moger - -Adds the support for AMD 3rd generation processors. The model -display for the new processor will be EPYC-Milan. - -Adds the following new feature bits on top of the feature bits from -the first and second generation EPYC models. - -pcid : Process context identifiers support -ibrs : Indirect Branch Restricted Speculation -ssbd : Speculative Store Bypass Disable -erms : Enhanced REP MOVSB/STOSB support -fsrm : Fast Short REP MOVSB support -invpcid : Invalidate processor context ID -pku : Protection keys support -svme-addr-chk : SVM instructions address check for #GP handling - -Depends on the following kernel commits: -14c2bf81fcd2 ("KVM: SVM: Fix #GP handling for doubly-nested virtualization") -3b9c723ed7cf ("KVM: SVM: Add support for SVM instruction address check change") -4aa2691dcbd3 ("8ce1c461188799d863398dd2865d KVM: x86: Factor out x86 instruction emulation with decoding") -4407a797e941 ("KVM: SVM: Enable INVPCID feature on AMD") -9715092f8d7e ("KVM: X86: Move handling of INVPCID types to x86") -3f3393b3ce38 ("KVM: X86: Rename and move the function vmx_handle_memory_failure to x86.c") -830bd71f2c06 ("KVM: SVM: Remove set_cr_intercept, clr_cr_intercept and is_cr_intercept") -4c44e8d6c193 ("KVM: SVM: Add new intercept word in vmcb_control_area") -c62e2e94b9d4 ("KVM: SVM: Modify 64 bit intercept field to two 32 bit vectors") -9780d51dc2af ("KVM: SVM: Modify intercept_exceptions to generic intercepts") -30abaa88382c ("KVM: SVM: Change intercept_dr to generic intercepts") -03bfeeb988a9 ("KVM: SVM: Change intercept_cr to generic intercepts") -c45ad7229d13 ("KVM: SVM: Introduce vmcb_(set_intercept/clr_intercept/_is_intercept)") -a90c1ed9f11d ("(pcid) KVM: nSVM: Remove unused field") -fa44b82eb831 ("KVM: x86: Move MPK feature detection to common code") -38f3e775e9c2 ("x86/Kconfig: Update config and kernel doc for MPK feature on AMD") -37486135d3a7 ("KVM: x86: Fix pkru save/restore when guest CR4.PKE=0, move it to x86.c") - -Signed-off-by: Babu Moger -Message-Id: <161290460478.11352.8933244555799318236.stgit@bmoger-ubuntu> -Signed-off-by: Eduardo Habkost -(cherry picked from commit 623972ceae091b31331ae4a1dc94fe5cbb891937) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 107 +++++++++++++++++++++++++++++++++++++++++++++- - target/i386/cpu.h | 4 ++ - 2 files changed, 110 insertions(+), 1 deletion(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 7227c803c3..d5b0d4b7f0 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1133,7 +1133,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - "clzero", NULL, "xsaveerptr", NULL, - NULL, NULL, NULL, NULL, - NULL, "wbnoinvd", NULL, NULL, -- "ibpb", NULL, NULL, "amd-stibp", -+ "ibpb", NULL, "ibrs", "amd-stibp", - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - "amd-ssbd", "virt-ssbd", "amd-no-ssb", NULL, -@@ -1853,6 +1853,56 @@ static CPUCaches epyc_rome_cache_info = { - }, - }; - -+static CPUCaches epyc_milan_cache_info = { -+ .l1d_cache = &(CPUCacheInfo) { -+ .type = DATA_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l1i_cache = &(CPUCacheInfo) { -+ .type = INSTRUCTION_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l2_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 2, -+ .size = 512 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 1024, -+ .lines_per_tag = 1, -+ }, -+ .l3_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 3, -+ .size = 32 * MiB, -+ .line_size = 64, -+ .associativity = 16, -+ .partitions = 1, -+ .sets = 32768, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .inclusive = true, -+ .complex_indexing = true, -+ }, -+}; -+ - /* The following VMX features are not supported by KVM and are left out in the - * CPU definitions: - * -@@ -4124,6 +4174,61 @@ static X86CPUDefinition builtin_x86_defs[] = { - .model_id = "AMD EPYC-Rome Processor", - .cache_info = &epyc_rome_cache_info, - }, -+ { -+ .name = "EPYC-Milan", -+ .level = 0xd, -+ .vendor = CPUID_VENDOR_AMD, -+ .family = 25, -+ .model = 1, -+ .stepping = 1, -+ .features[FEAT_1_EDX] = -+ CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | -+ CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | -+ CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | -+ CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | -+ CPUID_VME | CPUID_FP87, -+ .features[FEAT_1_ECX] = -+ CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | -+ CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | -+ CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | -+ CPUID_EXT_CX16 | CPUID_EXT_FMA | CPUID_EXT_SSSE3 | -+ CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 | -+ CPUID_EXT_PCID, -+ .features[FEAT_8000_0001_EDX] = -+ CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | -+ CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | -+ CPUID_EXT2_SYSCALL, -+ .features[FEAT_8000_0001_ECX] = -+ CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | -+ CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | -+ CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | -+ CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, -+ .features[FEAT_8000_0008_EBX] = -+ CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | -+ CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | -+ CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | -+ CPUID_8000_0008_EBX_AMD_SSBD, -+ .features[FEAT_7_0_EBX] = -+ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | -+ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_RDSEED | -+ CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | -+ CPUID_7_0_EBX_SHA_NI | CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_ERMS | -+ CPUID_7_0_EBX_INVPCID, -+ .features[FEAT_7_0_ECX] = -+ CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_PKU, -+ .features[FEAT_7_0_EDX] = -+ CPUID_7_0_EDX_FSRM, -+ .features[FEAT_XSAVE] = -+ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | -+ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, -+ .features[FEAT_6_EAX] = -+ CPUID_6_EAX_ARAT, -+ .features[FEAT_SVM] = -+ CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE | CPUID_SVM_SVME_ADDR_CHK, -+ .xlevel = 0x8000001E, -+ .model_id = "AMD EPYC-Milan Processor", -+ .cache_info = &epyc_milan_cache_info, -+ }, - }; - - /* KVM-specific features that are automatically added/removed -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index e1b67910c2..7a3aa40201 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -800,8 +800,12 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; - #define CPUID_8000_0008_EBX_WBNOINVD (1U << 9) - /* Indirect Branch Prediction Barrier */ - #define CPUID_8000_0008_EBX_IBPB (1U << 12) -+/* Indirect Branch Restricted Speculation */ -+#define CPUID_8000_0008_EBX_IBRS (1U << 14) - /* Single Thread Indirect Branch Predictors */ - #define CPUID_8000_0008_EBX_STIBP (1U << 15) -+/* Speculative Store Bypass Disable */ -+#define CPUID_8000_0008_EBX_AMD_SSBD (1U << 24) - - #define CPUID_XSAVE_XSAVEOPT (1U << 0) - #define CPUID_XSAVE_XSAVEC (1U << 1) --- -2.27.0 - diff --git a/SOURCES/kvm-i386-Mask-SVM-features-if-nested-SVM-is-disabled.patch b/SOURCES/kvm-i386-Mask-SVM-features-if-nested-SVM-is-disabled.patch deleted file mode 100644 index 17251bf..0000000 --- a/SOURCES/kvm-i386-Mask-SVM-features-if-nested-SVM-is-disabled.patch +++ /dev/null @@ -1,82 +0,0 @@ -From d3b9c1891a6d05308dd5ea119d2c32c8f98a25da Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Tue, 30 Jun 2020 23:40:15 -0400 -Subject: [PATCH 1/4] i386: Mask SVM features if nested SVM is disabled - -RH-Author: Eduardo Habkost -Message-id: <20200630234015.166253-2-ehabkost@redhat.com> -Patchwork-id: 97852 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 1/1] i386: Mask SVM features if nested SVM is disabled -Bugzilla: 1835390 -RH-Acked-by: Igor Mammedov -RH-Acked-by: Bandan Das -RH-Acked-by: Dr. David Alan Gilbert - -QEMU incorrectly validates FEAT_SVM feature flags against -GET_SUPPORTED_CPUID even if SVM features are being masked out by -cpu_x86_cpuid(). This can make QEMU print warnings on most AMD -CPU models, even when SVM nesting is disabled (which is the -default). - -This bug was never detected before because of a Linux KVM bug: -until Linux v5.6, KVM was not filtering out SVM features in -GET_SUPPORTED_CPUID when nested was disabled. This KVM bug was -fixed in Linux v5.7-rc1, on Linux commit a50718cc3f43 ("KVM: -nSVM: Expose SVM features to L1 iff nested is enabled"). - -Fix the problem by adding a CPUID_EXT3_SVM dependency to all -FEAT_SVM feature flags in the feature_dependencies table. - -Reported-by: Yanan Fu -Signed-off-by: Eduardo Habkost -Message-Id: <20200623230116.277409-1-ehabkost@redhat.com> -[Fix testcase. - Paolo] -Signed-off-by: Paolo Bonzini -(cherry picked from commit 730319aef0fcb94f11a4a2d32656437fdde7efdd) -Signed-off-by: Eduardo Habkost -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 4 ++++ - tests/test-x86-cpuid-compat.c | 4 ++-- - 2 files changed, 6 insertions(+), 2 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 7d7b016bb7..a343de0c9d 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1477,6 +1477,10 @@ static FeatureDep feature_dependencies[] = { - .from = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_VMFUNC }, - .to = { FEAT_VMX_VMFUNC, ~0ull }, - }, -+ { -+ .from = { FEAT_8000_0001_ECX, CPUID_EXT3_SVM }, -+ .to = { FEAT_SVM, ~0ull }, -+ }, - }; - - typedef struct X86RegisterInfo32 { -diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c -index e7c075ed98..983aa0719a 100644 ---- a/tests/test-x86-cpuid-compat.c -+++ b/tests/test-x86-cpuid-compat.c -@@ -256,7 +256,7 @@ int main(int argc, char **argv) - "-cpu 486,+invtsc", "xlevel", 0x80000007); - /* CPUID[8000_000A].EDX: */ - add_cpuid_test("x86/cpuid/auto-xlevel/486/npt", -- "-cpu 486,+npt", "xlevel", 0x8000000A); -+ "-cpu 486,+svm,+npt", "xlevel", 0x8000000A); - /* CPUID[C000_0001].EDX: */ - add_cpuid_test("x86/cpuid/auto-xlevel2/phenom/xstore", - "-cpu phenom,+xstore", "xlevel2", 0xC0000001); -@@ -349,7 +349,7 @@ int main(int argc, char **argv) - "-machine pc-i440fx-2.4 -cpu SandyBridge,", - "xlevel", 0x80000008); - add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-on", -- "-machine pc-i440fx-2.4 -cpu SandyBridge,+npt", -+ "-machine pc-i440fx-2.4 -cpu SandyBridge,+svm,+npt", - "xlevel", 0x80000008); - #endif - --- -2.27.0 - diff --git a/SOURCES/kvm-i386-Remove-cpu64-rhel6-CPU-model.patch b/SOURCES/kvm-i386-Remove-cpu64-rhel6-CPU-model.patch deleted file mode 100644 index 5d62ace..0000000 --- a/SOURCES/kvm-i386-Remove-cpu64-rhel6-CPU-model.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 4543a3c19816bd07f27eb900f20ae609df03703c Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Mon, 23 Dec 2019 21:10:31 +0000 -Subject: [PATCH 1/2] i386: Remove cpu64-rhel6 CPU model - -RH-Author: Eduardo Habkost -Message-id: <20191223211031.26503-1-ehabkost@redhat.com> -Patchwork-id: 93213 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] i386: Remove cpu64-rhel6 CPU model -Bugzilla: 1741345 -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Laszlo Ersek - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1741345 -BRANCH: rhel-av-8.2.0 -Upstream: not applicable -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=25525975 - -We don't provide rhel6 machine types anymore, so we don't need to -provide compatibility with RHEl6. cpu64-rhel6 was documented as -deprecated and scheduled for removal in 8.2, so now it's time to -remove it. - -Signed-off-by: Eduardo Habkost -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 26 +------------------------- - 1 file changed, 1 insertion(+), 25 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 790db77..6dce6f2 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1829,12 +1829,7 @@ static CPUCaches epyc_cache_info = { - - static X86CPUDefinition builtin_x86_defs[] = { - { -- /* qemu64 is the default CPU model for all *-rhel7.* machine-types. -- * The default on RHEL-6 was cpu64-rhel6. -- * libvirt assumes that qemu64 is the default for _all_ machine-types, -- * so we should try to keep qemu64 and cpu64-rhel6 as similar as -- * possible. -- */ -+ /* qemu64 is the default CPU model for all machine-types */ - .name = "qemu64", - .level = 0xd, - .vendor = CPUID_VENDOR_AMD, -@@ -2135,25 +2130,6 @@ static X86CPUDefinition builtin_x86_defs[] = { - .model_id = "Intel(R) Atom(TM) CPU N270 @ 1.60GHz", - }, - { -- .name = "cpu64-rhel6", -- .level = 4, -- .vendor = CPUID_VENDOR_AMD, -- .family = 6, -- .model = 13, -- .stepping = 3, -- .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | -- CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | -- CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | -- CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | -- CPUID_PSE | CPUID_DE | CPUID_FP87, -- .features[FEAT_1_ECX] = CPUID_EXT_CX16 | CPUID_EXT_SSE3, -- .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, -- .features[FEAT_8000_0001_ECX] = CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | -- CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, -- .xlevel = 0x8000000A, -- .model_id = "QEMU Virtual CPU version (cpu64-rhel6)", -- }, -- { - .name = "Conroe", - .level = 10, - .vendor = CPUID_VENDOR_INTEL, --- -1.8.3.1 - diff --git a/SOURCES/kvm-i386-Resolve-CPU-models-to-v1-by-default.patch b/SOURCES/kvm-i386-Resolve-CPU-models-to-v1-by-default.patch deleted file mode 100644 index 1027341..0000000 --- a/SOURCES/kvm-i386-Resolve-CPU-models-to-v1-by-default.patch +++ /dev/null @@ -1,95 +0,0 @@ -From ccda4494b0ea4b81b6b0c3e539a0bcf7e673c68c Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Thu, 5 Dec 2019 21:56:50 +0000 -Subject: [PATCH 01/18] i386: Resolve CPU models to v1 by default - -RH-Author: Eduardo Habkost -Message-id: <20191205225650.772600-2-ehabkost@redhat.com> -Patchwork-id: 92907 -O-Subject: [RHEL-AV-8.1.1 qemu-kvm PATCH 1/1] i386: Resolve CPU models to v1 by default -Bugzilla: 1787291 1779078 1779078 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Igor Mammedov -RH-Acked-by: Paolo Bonzini - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1779078 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=25187823 -Upstream: submitted, Message-Id: <20191205223339.764534-1-ehabkost@redhat.com> - -When using `query-cpu-definitions` using `-machine none`, -QEMU is resolving all CPU models to their latest versions. The -actual CPU model version being used by another machine type (e.g. -`pc-q35-4.0`) might be different. - -In theory, this was OK because the correct CPU model -version is returned when using the correct `-machine` argument. - -Except that in practice, this breaks libvirt expectations: -libvirt always use `-machine none` when checking if a CPU model -is runnable, because runnability is not expected to be affected -when the machine type is changed. - -For example, when running on a Haswell host without TSX, -Haswell-v4 is runnable, but Haswell-v1 is not. On those hosts, -`query-cpu-definitions` says Haswell is runnable if using -`-machine none`, but Haswell is actually not runnable using any -of the `pc-*` machine types (because they resolve Haswell to -Haswell-v1). In other words, we're breaking the "runnability -guarantee" we promised to not break for a few releases (see -qemu-deprecated.texi). - -To address this issue, change the default CPU model version to v1 -on all machine types, so we make `query-cpu-definitions` output -when using `-machine none` match the results when using `pc-*`. -This will change in the future (the plan is to always return the -latest CPU model version if using `-machine none`), but only -after giving libvirt the opportunity to adapt. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1779078 -Signed-off-by: Eduardo Habkost -Signed-off-by: Danilo C. L. de Paula ---- - qemu-deprecated.texi | 7 +++++++ - target/i386/cpu.c | 8 +++++++- - 2 files changed, 14 insertions(+), 1 deletion(-) - -diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi -index 4b4b742..534ebe9 100644 ---- a/qemu-deprecated.texi -+++ b/qemu-deprecated.texi -@@ -374,6 +374,13 @@ guarantees must resolve the CPU model aliases using te - ``alias-of'' field returned by the ``query-cpu-definitions'' QMP - command. - -+While those guarantees are kept, the return value of -+``query-cpu-definitions'' will have existing CPU model aliases -+point to a version that doesn't break runnability guarantees -+(specifically, version 1 of those CPU models). In future QEMU -+versions, aliases will point to newer CPU model versions -+depending on the machine type, so management software must -+resolve CPU model aliases before starting a virtual machine. - - @node Recently removed features - @appendix Recently removed features -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 6dce6f2..863192c 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -3926,7 +3926,13 @@ static PropValue tcg_default_props[] = { - }; - - --X86CPUVersion default_cpu_version = CPU_VERSION_LATEST; -+/* -+ * We resolve CPU model aliases using -v1 when using "-machine -+ * none", but this is just for compatibility while libvirt isn't -+ * adapted to resolve CPU model versions before creating VMs. -+ * See "Runnability guarantee of CPU models" at * qemu-deprecated.texi. -+ */ -+X86CPUVersion default_cpu_version = 1; - - void x86_cpu_set_default_version(X86CPUVersion version) - { --- -1.8.3.1 - diff --git a/SOURCES/kvm-ide-atapi-check-logical-block-address-and-read-size-.patch b/SOURCES/kvm-ide-atapi-check-logical-block-address-and-read-size-.patch deleted file mode 100644 index 706bd8b..0000000 --- a/SOURCES/kvm-ide-atapi-check-logical-block-address-and-read-size-.patch +++ /dev/null @@ -1,120 +0,0 @@ -From 0453588f95294ed5ce912cb8b810a322bf9d91e0 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Thu, 25 Feb 2021 19:43:02 -0500 -Subject: [PATCH] ide: atapi: check logical block address and read size - (CVE-2020-29443) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20210225194302.3137699-2-jmaloy@redhat.com> -Patchwork-id: 101208 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 1/1] ide: atapi: check logical block address and read size (CVE-2020-29443) -Bugzilla: 1917451 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Danilo de Paula -RH-Acked-by: Paolo Bonzini - -From: Prasad J Pandit - -While processing ATAPI cmd_read/cmd_read_cd commands, -Logical Block Address (LBA) maybe invalid OR closer to the last block, -leading to an OOB access issues. Add range check to avoid it. - -Fixes: CVE-2020-29443 -Reported-by: Wenxiang Qian -Suggested-by: Paolo Bonzini -Reviewed-by: Paolo Bonzini -Signed-off-by: Prasad J Pandit -Message-Id: <20210118115130.457044-1-ppandit@redhat.com> -Signed-off-by: Paolo Bonzini - -(cherry picked from commit b8d7f1bc59276fec85e4d09f1567613a3e14d31e) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - hw/ide/atapi.c | 30 ++++++++++++++++++++++++------ - 1 file changed, 24 insertions(+), 6 deletions(-) - -diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c -index 17a9d635d8..d064935c8d 100644 ---- a/hw/ide/atapi.c -+++ b/hw/ide/atapi.c -@@ -320,6 +320,8 @@ static void ide_atapi_cmd_reply(IDEState *s, int size, int max_size) - static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors, - int sector_size) - { -+ assert(0 <= lba && lba < (s->nb_sectors >> 2)); -+ - s->lba = lba; - s->packet_transfer_size = nb_sectors * sector_size; - s->elementary_transfer_size = 0; -@@ -418,6 +420,8 @@ eot: - static void ide_atapi_cmd_read_dma(IDEState *s, int lba, int nb_sectors, - int sector_size) - { -+ assert(0 <= lba && lba < (s->nb_sectors >> 2)); -+ - s->lba = lba; - s->packet_transfer_size = nb_sectors * sector_size; - s->io_buffer_size = 0; -@@ -971,35 +975,49 @@ static void cmd_prevent_allow_medium_removal(IDEState *s, uint8_t* buf) - - static void cmd_read(IDEState *s, uint8_t* buf) - { -- int nb_sectors, lba; -+ unsigned int nb_sectors, lba; -+ -+ /* Total logical sectors of ATAPI_SECTOR_SIZE(=2048) bytes */ -+ uint64_t total_sectors = s->nb_sectors >> 2; - - if (buf[0] == GPCMD_READ_10) { - nb_sectors = lduw_be_p(buf + 7); - } else { - nb_sectors = ldl_be_p(buf + 6); - } -- -- lba = ldl_be_p(buf + 2); - if (nb_sectors == 0) { - ide_atapi_cmd_ok(s); - return; - } - -+ lba = ldl_be_p(buf + 2); -+ if (lba >= total_sectors || lba + nb_sectors - 1 >= total_sectors) { -+ ide_atapi_cmd_error(s, ILLEGAL_REQUEST, ASC_LOGICAL_BLOCK_OOR); -+ return; -+ } -+ - ide_atapi_cmd_read(s, lba, nb_sectors, 2048); - } - - static void cmd_read_cd(IDEState *s, uint8_t* buf) - { -- int nb_sectors, lba, transfer_request; -+ unsigned int nb_sectors, lba, transfer_request; - -- nb_sectors = (buf[6] << 16) | (buf[7] << 8) | buf[8]; -- lba = ldl_be_p(buf + 2); -+ /* Total logical sectors of ATAPI_SECTOR_SIZE(=2048) bytes */ -+ uint64_t total_sectors = s->nb_sectors >> 2; - -+ nb_sectors = (buf[6] << 16) | (buf[7] << 8) | buf[8]; - if (nb_sectors == 0) { - ide_atapi_cmd_ok(s); - return; - } - -+ lba = ldl_be_p(buf + 2); -+ if (lba >= total_sectors || lba + nb_sectors - 1 >= total_sectors) { -+ ide_atapi_cmd_error(s, ILLEGAL_REQUEST, ASC_LOGICAL_BLOCK_OOR); -+ return; -+ } -+ - transfer_request = buf[9] & 0xf8; - if (transfer_request == 0x00) { - /* nothing */ --- -2.27.0 - diff --git a/SOURCES/kvm-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch b/SOURCES/kvm-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch deleted file mode 100644 index db89a06..0000000 --- a/SOURCES/kvm-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch +++ /dev/null @@ -1,58 +0,0 @@ -From d8f84a8086dbe339a9f97dbcd10abd6379525068 Mon Sep 17 00:00:00 2001 -From: eperezma -Date: Tue, 12 Jan 2021 14:36:37 -0500 -Subject: [PATCH 13/17] intel_iommu: Skip page walking on device iotlb - invalidations -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: eperezma -Message-id: <20210112143638.374060-13-eperezma@redhat.com> -Patchwork-id: 100605 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 12/13] intel_iommu: Skip page walking on device iotlb invalidations -Bugzilla: 1843852 -RH-Acked-by: Xiao Wang -RH-Acked-by: Peter Xu -RH-Acked-by: Auger Eric - -Although they didn't reach the notifier because of the filtering in -memory_region_notify_iommu_one, the vt-d was still splitting huge -memory invalidations in chunks. Skipping it. - -This improves performance in case of netperf with vhost-net: -* TCP_STREAM: From 1923.6Mbit/s to 2175.13Mbit/s (13%) -* TCP_RR: From 8464.73 trans/s to 8932.703333 trans/s (5.5%) -* UDP_RR: From 8562.08 trans/s to 9005.62/s (5.1%) -* UDP_STREAM: No change observed (insignificant 0.1% improvement) - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20201116165506.31315-5-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit f7701e2c7983b680790af47117577b285b6a1aed) -Signed-off-by: Eugenio Pérez -Signed-off-by: Danilo C. L. de Paula ---- - hw/i386/intel_iommu.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c -index 3640bc2ed15..2b270f06645 100644 ---- a/hw/i386/intel_iommu.c -+++ b/hw/i386/intel_iommu.c -@@ -1421,6 +1421,10 @@ static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as) - VTDContextEntry ce; - IOMMUNotifier *n; - -+ if (!(vtd_as->iommu.iommu_notify_flags & IOMMU_NOTIFIER_IOTLB_EVENTS)) { -+ return 0; -+ } -+ - ret = vtd_dev_to_context_entry(vtd_as->iommu_state, - pci_bus_num(vtd_as->bus), - vtd_as->devfn, &ce); --- -2.27.0 - diff --git a/SOURCES/kvm-introduce-kvm_kernel_irqchip_-functions.patch b/SOURCES/kvm-introduce-kvm_kernel_irqchip_-functions.patch deleted file mode 100644 index b171749..0000000 --- a/SOURCES/kvm-introduce-kvm_kernel_irqchip_-functions.patch +++ /dev/null @@ -1,281 +0,0 @@ -From 3899672db472c1ca530badd49d17726a1057f8af Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 5 Jun 2020 07:41:10 -0400 -Subject: [PATCH 40/42] kvm: introduce kvm_kernel_irqchip_* functions -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20200605074111.2185-3-thuth@redhat.com> -Patchwork-id: 97369 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 2/3] kvm: introduce kvm_kernel_irqchip_* functions -Bugzilla: 1756946 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Paolo Bonzini - -The KVMState struct is opaque, so provide accessors for the fields -that will be moved from current_machine to the accelerator. For now -they just forward to the machine object, but this will change. - -Signed-off-by: Paolo Bonzini -(cherry picked from commit 4376c40dedb22530738eeb104a603e94ed03f719) - -Conflicts: - accel/kvm/kvm-all.c - (contextual conflict due to missing other commits in downstream) -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - accel/kvm/kvm-all.c | 23 +++++++++++++++++++---- - hw/ppc/e500.c | 5 ++--- - hw/ppc/spapr_irq.c | 16 ++++------------ - include/sysemu/kvm.h | 7 +++++-- - target/arm/kvm.c | 8 ++++---- - target/i386/kvm.c | 4 ++-- - target/mips/kvm.c | 2 +- - target/ppc/kvm.c | 2 +- - target/s390x/kvm.c | 2 +- - 9 files changed, 39 insertions(+), 30 deletions(-) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index 5007bdad96..b0250209f5 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -1772,7 +1772,7 @@ void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq irq, int gsi) - g_hash_table_insert(s->gsimap, irq, GINT_TO_POINTER(gsi)); - } - --static void kvm_irqchip_create(MachineState *machine, KVMState *s) -+static void kvm_irqchip_create(KVMState *s) - { - int ret; - -@@ -1790,9 +1790,9 @@ static void kvm_irqchip_create(MachineState *machine, KVMState *s) - - /* First probe and see if there's a arch-specific hook to create the - * in-kernel irqchip for us */ -- ret = kvm_arch_irqchip_create(machine, s); -+ ret = kvm_arch_irqchip_create(s); - if (ret == 0) { -- if (machine_kernel_irqchip_split(machine)) { -+ if (kvm_kernel_irqchip_split()) { - perror("Split IRQ chip mode not supported."); - exit(1); - } else { -@@ -2076,7 +2076,7 @@ static int kvm_init(MachineState *ms) - } - - if (machine_kernel_irqchip_allowed(ms)) { -- kvm_irqchip_create(ms, s); -+ kvm_irqchip_create(s); - } - - if (kvm_eventfds_allowed) { -@@ -2966,6 +2966,21 @@ static bool kvm_accel_has_memory(MachineState *ms, AddressSpace *as, - return false; - } - -+bool kvm_kernel_irqchip_allowed(void) -+{ -+ return machine_kernel_irqchip_allowed(current_machine); -+} -+ -+bool kvm_kernel_irqchip_required(void) -+{ -+ return machine_kernel_irqchip_required(current_machine); -+} -+ -+bool kvm_kernel_irqchip_split(void) -+{ -+ return machine_kernel_irqchip_split(current_machine); -+} -+ - static void kvm_accel_class_init(ObjectClass *oc, void *data) - { - AccelClass *ac = ACCEL_CLASS(oc); -diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c -index 91cd4c26f9..12b6a5b2a8 100644 ---- a/hw/ppc/e500.c -+++ b/hw/ppc/e500.c -@@ -793,7 +793,6 @@ static DeviceState *ppce500_init_mpic(PPCE500MachineState *pms, - MemoryRegion *ccsr, - IrqLines *irqs) - { -- MachineState *machine = MACHINE(pms); - const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms); - DeviceState *dev = NULL; - SysBusDevice *s; -@@ -801,10 +800,10 @@ static DeviceState *ppce500_init_mpic(PPCE500MachineState *pms, - if (kvm_enabled()) { - Error *err = NULL; - -- if (machine_kernel_irqchip_allowed(machine)) { -+ if (kvm_kernel_irqchip_allowed()) { - dev = ppce500_init_mpic_kvm(pmc, irqs, &err); - } -- if (machine_kernel_irqchip_required(machine) && !dev) { -+ if (kvm_kernel_irqchip_required() && !dev) { - error_reportf_err(err, - "kernel_irqchip requested but unavailable: "); - exit(1); -diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c -index 9da423658a..f388d07bf9 100644 ---- a/hw/ppc/spapr_irq.c -+++ b/hw/ppc/spapr_irq.c -@@ -75,12 +75,11 @@ int spapr_irq_init_kvm(SpaprInterruptControllerInitKvm fn, - uint32_t nr_servers, - Error **errp) - { -- MachineState *machine = MACHINE(qdev_get_machine()); - Error *local_err = NULL; - -- if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) { -+ if (kvm_enabled() && kvm_kernel_irqchip_allowed()) { - if (fn(intc, nr_servers, &local_err) < 0) { -- if (machine_kernel_irqchip_required(machine)) { -+ if (kvm_kernel_irqchip_required()) { - error_prepend(&local_err, - "kernel_irqchip requested but unavailable: "); - error_propagate(errp, local_err); -@@ -185,7 +184,7 @@ static int spapr_irq_check(SpaprMachineState *spapr, Error **errp) - */ - if (kvm_enabled() && - spapr->irq == &spapr_irq_dual && -- machine_kernel_irqchip_required(machine) && -+ kvm_kernel_irqchip_required() && - xics_kvm_has_broken_disconnect(spapr)) { - error_setg(errp, "KVM is too old to support ic-mode=dual,kernel-irqchip=on"); - return -1; -@@ -288,20 +287,13 @@ uint32_t spapr_irq_nr_msis(SpaprMachineState *spapr) - - void spapr_irq_init(SpaprMachineState *spapr, Error **errp) - { -- MachineState *machine = MACHINE(spapr); - SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); - -- if (machine_kernel_irqchip_split(machine)) { -+ if (kvm_enabled() && kvm_kernel_irqchip_split()) { - error_setg(errp, "kernel_irqchip split mode not supported on pseries"); - return; - } - -- if (!kvm_enabled() && machine_kernel_irqchip_required(machine)) { -- error_setg(errp, -- "kernel_irqchip requested but only available with KVM"); -- return; -- } -- - if (spapr_irq_check(spapr, errp) < 0) { - return; - } -diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h -index 9fe233b9bf..aaf2a502e8 100644 ---- a/include/sysemu/kvm.h -+++ b/include/sysemu/kvm.h -@@ -519,10 +519,13 @@ void kvm_pc_gsi_handler(void *opaque, int n, int level); - void kvm_pc_setup_irq_routing(bool pci_enabled); - void kvm_init_irq_routing(KVMState *s); - -+bool kvm_kernel_irqchip_allowed(void); -+bool kvm_kernel_irqchip_required(void); -+bool kvm_kernel_irqchip_split(void); -+ - /** - * kvm_arch_irqchip_create: - * @KVMState: The KVMState pointer -- * @MachineState: The MachineState pointer - * - * Allow architectures to create an in-kernel irq chip themselves. - * -@@ -530,7 +533,7 @@ void kvm_init_irq_routing(KVMState *s); - * 0: irq chip was not created - * > 0: irq chip was created - */ --int kvm_arch_irqchip_create(MachineState *ms, KVMState *s); -+int kvm_arch_irqchip_create(KVMState *s); - - /** - * kvm_set_one_reg - set a register value in KVM via KVM_SET_ONE_REG ioctl -diff --git a/target/arm/kvm.c b/target/arm/kvm.c -index 4be9497402..418bcedc3e 100644 ---- a/target/arm/kvm.c -+++ b/target/arm/kvm.c -@@ -861,11 +861,11 @@ void kvm_arch_init_irq_routing(KVMState *s) - { - } - --int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) -+int kvm_arch_irqchip_create(KVMState *s) - { -- if (machine_kernel_irqchip_split(ms)) { -- perror("-machine kernel_irqchip=split is not supported on ARM."); -- exit(1); -+ if (kvm_kernel_irqchip_split()) { -+ perror("-machine kernel_irqchip=split is not supported on ARM."); -+ exit(1); - } - - /* If we can create the VGIC using the newer device control API, we -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index fcc8f7d1f3..f5c17e0028 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -4532,10 +4532,10 @@ void kvm_arch_init_irq_routing(KVMState *s) - } - } - --int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) -+int kvm_arch_irqchip_create(KVMState *s) - { - int ret; -- if (machine_kernel_irqchip_split(ms)) { -+ if (kvm_kernel_irqchip_split()) { - ret = kvm_vm_enable_cap(s, KVM_CAP_SPLIT_IRQCHIP, 0, 24); - if (ret) { - error_report("Could not enable split irqchip mode: %s", -diff --git a/target/mips/kvm.c b/target/mips/kvm.c -index 578bc14625..de3e26ef1f 100644 ---- a/target/mips/kvm.c -+++ b/target/mips/kvm.c -@@ -57,7 +57,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) - return 0; - } - --int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) -+int kvm_arch_irqchip_create(KVMState *s) - { - return 0; - } -diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c -index c77f9848ec..461dc6dae1 100644 ---- a/target/ppc/kvm.c -+++ b/target/ppc/kvm.c -@@ -152,7 +152,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) - return 0; - } - --int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) -+int kvm_arch_irqchip_create(KVMState *s) - { - return 0; - } -diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 84d7cadd09..c589ef9034 100644 ---- a/target/s390x/kvm.c -+++ b/target/s390x/kvm.c -@@ -386,7 +386,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) - return 0; - } - --int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) -+int kvm_arch_irqchip_create(KVMState *s) - { - return 0; - } --- -2.27.0 - diff --git a/SOURCES/kvm-iotests-026-Move-v3-exclusive-test-to-new-file.patch b/SOURCES/kvm-iotests-026-Move-v3-exclusive-test-to-new-file.patch deleted file mode 100644 index a50bff9..0000000 --- a/SOURCES/kvm-iotests-026-Move-v3-exclusive-test-to-new-file.patch +++ /dev/null @@ -1,241 +0,0 @@ -From a4a984e67e276e643b8a51f39ca426d0967754a0 Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Mon, 13 Jul 2020 14:24:51 -0400 -Subject: [PATCH 4/4] iotests/026: Move v3-exclusive test to new file -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Max Reitz -Message-id: <20200713142451.289703-5-mreitz@redhat.com> -Patchwork-id: 97956 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 4/4] iotests/026: Move v3-exclusive test to new file -Bugzilla: 1807057 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Kevin Wolf - -data_file does not work with v2, and we probably want 026 to keep -working for v2 images. Thus, open a new file for v3-exclusive error -path test cases. - -Fixes: 81311255f217859413c94f2cd9cebf2684bbda94 - (“iotests/026: Test EIO on allocation in a data-file”) -Signed-off-by: Max Reitz -Message-Id: <20200311140707.1243218-1-mreitz@redhat.com> -Reviewed-by: John Snow -Tested-by: John Snow -Signed-off-by: Max Reitz -(cherry picked from commit c264e5d2f9f5d73977eac8e5d084f727b3d07ea9) - -Conflicts: - tests/qemu-iotests/group - - As per usual. - -Signed-off-by: Max Reitz -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/026 | 31 ----------- - tests/qemu-iotests/026.out | 6 -- - tests/qemu-iotests/026.out.nocache | 6 -- - tests/qemu-iotests/289 | 89 ++++++++++++++++++++++++++++++ - tests/qemu-iotests/289.out | 8 +++ - tests/qemu-iotests/group | 1 + - 6 files changed, 98 insertions(+), 43 deletions(-) - create mode 100755 tests/qemu-iotests/289 - create mode 100644 tests/qemu-iotests/289.out - -diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026 -index c1c96a41d9..3afd708863 100755 ---- a/tests/qemu-iotests/026 -+++ b/tests/qemu-iotests/026 -@@ -237,37 +237,6 @@ $QEMU_IO -c "write 0 $CLUSTER_SIZE" "$BLKDBG_TEST_IMG" | _filter_qemu_io - - _check_test_img - --echo --echo === Avoid freeing external data clusters on failure === --echo -- --# Similar test as the last one, except we test what happens when there --# is an error when writing to an external data file instead of when --# writing to a preallocated zero cluster --_make_test_img -o "data_file=$TEST_IMG.data_file" $CLUSTER_SIZE -- --# Put blkdebug above the data-file, and a raw node on top of that so --# that blkdebug will see a write_aio event and emit an error --$QEMU_IO -c "write 0 $CLUSTER_SIZE" \ -- "json:{ -- 'driver': 'qcow2', -- 'file': { 'driver': 'file', 'filename': '$TEST_IMG' }, -- 'data-file': { -- 'driver': 'raw', -- 'file': { -- 'driver': 'blkdebug', -- 'config': '$TEST_DIR/blkdebug.conf', -- 'image': { -- 'driver': 'file', -- 'filename': '$TEST_IMG.data_file' -- } -- } -- } -- }" \ -- | _filter_qemu_io -- --_check_test_img -- - # success, all done - echo "*** done" - rm -f $seq.full -diff --git a/tests/qemu-iotests/026.out b/tests/qemu-iotests/026.out -index c1b3b58482..83989996ff 100644 ---- a/tests/qemu-iotests/026.out -+++ b/tests/qemu-iotests/026.out -@@ -653,10 +653,4 @@ wrote 1024/1024 bytes at offset 0 - 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - write failed: Input/output error - No errors were found on the image. -- --=== Avoid freeing external data clusters on failure === -- --Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1024 data_file=TEST_DIR/t.IMGFMT.data_file --write failed: Input/output error --No errors were found on the image. - *** done -diff --git a/tests/qemu-iotests/026.out.nocache b/tests/qemu-iotests/026.out.nocache -index 8d5001648a..9359d26d7e 100644 ---- a/tests/qemu-iotests/026.out.nocache -+++ b/tests/qemu-iotests/026.out.nocache -@@ -661,10 +661,4 @@ wrote 1024/1024 bytes at offset 0 - 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - write failed: Input/output error - No errors were found on the image. -- --=== Avoid freeing external data clusters on failure === -- --Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1024 data_file=TEST_DIR/t.IMGFMT.data_file --write failed: Input/output error --No errors were found on the image. - *** done -diff --git a/tests/qemu-iotests/289 b/tests/qemu-iotests/289 -new file mode 100755 -index 0000000000..1c11d4030e ---- /dev/null -+++ b/tests/qemu-iotests/289 -@@ -0,0 +1,89 @@ -+#!/usr/bin/env bash -+# -+# qcow2 v3-exclusive error path testing -+# (026 tests paths common to v2 and v3) -+# -+# Copyright (C) 2020 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+seq=$(basename $0) -+echo "QA output created by $seq" -+ -+status=1 # failure is the default! -+ -+_cleanup() -+{ -+ _cleanup_test_img -+ rm "$TEST_DIR/blkdebug.conf" -+ rm -f "$TEST_IMG.data_file" -+} -+trap "_cleanup; exit \$status" 0 1 2 3 15 -+ -+# get standard environment, filters and checks -+. ./common.rc -+. ./common.filter -+. ./common.pattern -+ -+_supported_fmt qcow2 -+_supported_proto file -+# This is a v3-exclusive test; -+# As for data_file, error paths often very much depend on whether -+# there is an external data file or not; so we create one exactly when -+# we want to test it -+_unsupported_imgopts 'compat=0.10' data_file -+ -+echo -+echo === Avoid freeing external data clusters on failure === -+echo -+ -+cat > "$TEST_DIR/blkdebug.conf" < -Date: Mon, 13 Jul 2020 14:24:50 -0400 -Subject: [PATCH 3/4] iotests/026: Test EIO on allocation in a data-file - -RH-Author: Max Reitz -Message-id: <20200713142451.289703-4-mreitz@redhat.com> -Patchwork-id: 97955 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 3/4] iotests/026: Test EIO on allocation in a data-file -Bugzilla: 1807057 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Kevin Wolf - -Test what happens when writing data to an external data file, where the -write requires an L2 entry to be allocated, but the data write fails. - -Signed-off-by: Max Reitz -Message-Id: <20200225143130.111267-4-mreitz@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 81311255f217859413c94f2cd9cebf2684bbda94) -Signed-off-by: Max Reitz -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/026 | 32 ++++++++++++++++++++++++++++++ - tests/qemu-iotests/026.out | 6 ++++++ - tests/qemu-iotests/026.out.nocache | 6 ++++++ - 3 files changed, 44 insertions(+) - -diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026 -index d89729697f..c1c96a41d9 100755 ---- a/tests/qemu-iotests/026 -+++ b/tests/qemu-iotests/026 -@@ -30,6 +30,7 @@ _cleanup() - { - _cleanup_test_img - rm "$TEST_DIR/blkdebug.conf" -+ rm -f "$TEST_IMG.data_file" - } - trap "_cleanup; exit \$status" 0 1 2 3 15 - -@@ -236,6 +237,37 @@ $QEMU_IO -c "write 0 $CLUSTER_SIZE" "$BLKDBG_TEST_IMG" | _filter_qemu_io - - _check_test_img - -+echo -+echo === Avoid freeing external data clusters on failure === -+echo -+ -+# Similar test as the last one, except we test what happens when there -+# is an error when writing to an external data file instead of when -+# writing to a preallocated zero cluster -+_make_test_img -o "data_file=$TEST_IMG.data_file" $CLUSTER_SIZE -+ -+# Put blkdebug above the data-file, and a raw node on top of that so -+# that blkdebug will see a write_aio event and emit an error -+$QEMU_IO -c "write 0 $CLUSTER_SIZE" \ -+ "json:{ -+ 'driver': 'qcow2', -+ 'file': { 'driver': 'file', 'filename': '$TEST_IMG' }, -+ 'data-file': { -+ 'driver': 'raw', -+ 'file': { -+ 'driver': 'blkdebug', -+ 'config': '$TEST_DIR/blkdebug.conf', -+ 'image': { -+ 'driver': 'file', -+ 'filename': '$TEST_IMG.data_file' -+ } -+ } -+ } -+ }" \ -+ | _filter_qemu_io -+ -+_check_test_img -+ - # success, all done - echo "*** done" - rm -f $seq.full -diff --git a/tests/qemu-iotests/026.out b/tests/qemu-iotests/026.out -index 83989996ff..c1b3b58482 100644 ---- a/tests/qemu-iotests/026.out -+++ b/tests/qemu-iotests/026.out -@@ -653,4 +653,10 @@ wrote 1024/1024 bytes at offset 0 - 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - write failed: Input/output error - No errors were found on the image. -+ -+=== Avoid freeing external data clusters on failure === -+ -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1024 data_file=TEST_DIR/t.IMGFMT.data_file -+write failed: Input/output error -+No errors were found on the image. - *** done -diff --git a/tests/qemu-iotests/026.out.nocache b/tests/qemu-iotests/026.out.nocache -index 9359d26d7e..8d5001648a 100644 ---- a/tests/qemu-iotests/026.out.nocache -+++ b/tests/qemu-iotests/026.out.nocache -@@ -661,4 +661,10 @@ wrote 1024/1024 bytes at offset 0 - 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - write failed: Input/output error - No errors were found on the image. -+ -+=== Avoid freeing external data clusters on failure === -+ -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1024 data_file=TEST_DIR/t.IMGFMT.data_file -+write failed: Input/output error -+No errors were found on the image. - *** done --- -2.27.0 - diff --git a/SOURCES/kvm-iotests-026-Test-EIO-on-preallocated-zero-cluster.patch b/SOURCES/kvm-iotests-026-Test-EIO-on-preallocated-zero-cluster.patch deleted file mode 100644 index 36d609c..0000000 --- a/SOURCES/kvm-iotests-026-Test-EIO-on-preallocated-zero-cluster.patch +++ /dev/null @@ -1,102 +0,0 @@ -From b1035096f2d46e2146704d1db9581c6d2131d1f4 Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Mon, 13 Jul 2020 14:24:49 -0400 -Subject: [PATCH 2/4] iotests/026: Test EIO on preallocated zero cluster - -RH-Author: Max Reitz -Message-id: <20200713142451.289703-3-mreitz@redhat.com> -Patchwork-id: 97953 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 2/4] iotests/026: Test EIO on preallocated zero cluster -Bugzilla: 1807057 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Kevin Wolf - -Test what happens when writing data to a preallocated zero cluster, but -the data write fails. - -Signed-off-by: Max Reitz -Message-Id: <20200225143130.111267-3-mreitz@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 31ab00f3747c00fdbb9027cea644b40dd1405480) -Signed-off-by: Max Reitz -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/026 | 21 +++++++++++++++++++++ - tests/qemu-iotests/026.out | 10 ++++++++++ - tests/qemu-iotests/026.out.nocache | 10 ++++++++++ - 3 files changed, 41 insertions(+) - -diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026 -index 3430029ed6..d89729697f 100755 ---- a/tests/qemu-iotests/026 -+++ b/tests/qemu-iotests/026 -@@ -215,6 +215,27 @@ _make_test_img 64M - $QEMU_IO -c "write 0 1M" -c "write 0 1M" "$BLKDBG_TEST_IMG" | _filter_qemu_io - _check_test_img - -+echo -+echo === Avoid freeing preallocated zero clusters on failure === -+echo -+ -+cat > "$TEST_DIR/blkdebug.conf" < -Date: Wed, 3 Jun 2020 16:03:17 +0100 -Subject: [PATCH 18/26] iotests/055: refactor compressed backup to vmdk - -RH-Author: Kevin Wolf -Message-id: <20200603160325.67506-4-kwolf@redhat.com> -Patchwork-id: 97104 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 03/11] iotests/055: refactor compressed backup to vmdk -Bugzilla: 1778593 -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz -RH-Acked-by: Stefano Garzarella - -From: Vladimir Sementsov-Ogievskiy - -Instead of looping in each test, let's better refactor vmdk target case -as a subclass. - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20200430124713.3067-6-vsementsov@virtuozzo.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 8e8372944e5e097e98844b4db10f867689065e16) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/055 | 70 ++++++++++++++++++++++++---------------------- - tests/qemu-iotests/055.out | 4 +-- - 2 files changed, 39 insertions(+), 35 deletions(-) - -diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055 -index eb50c9f..8666601 100755 ---- a/tests/qemu-iotests/055 -+++ b/tests/qemu-iotests/055 -@@ -450,10 +450,9 @@ class TestSingleTransaction(iotests.QMPTestCase): - self.assert_no_active_block_jobs() - - --class TestDriveCompression(iotests.QMPTestCase): -+class TestCompressedToQcow2(iotests.QMPTestCase): - image_len = 64 * 1024 * 1024 # MB -- fmt_supports_compression = [{'type': 'qcow2', 'args': ()}, -- {'type': 'vmdk', 'args': ('-o', 'subformat=streamOptimized')}] -+ target_fmt = {'type': 'qcow2', 'args': ()} - - def tearDown(self): - self.vm.shutdown() -@@ -463,19 +462,20 @@ class TestDriveCompression(iotests.QMPTestCase): - except OSError: - pass - -- def do_prepare_drives(self, fmt, args, attach_target): -+ def do_prepare_drives(self, attach_target): - self.vm = iotests.VM().add_drive('blkdebug::' + test_img) - -- qemu_img('create', '-f', fmt, blockdev_target_img, -- str(TestDriveCompression.image_len), *args) -+ qemu_img('create', '-f', self.target_fmt['type'], blockdev_target_img, -+ str(self.image_len), *self.target_fmt['args']) - if attach_target: - self.vm.add_drive(blockdev_target_img, -- img_format=fmt, interface="none") -+ img_format=self.target_fmt['type'], -+ interface="none") - - self.vm.launch() - -- def do_test_compress_complete(self, cmd, format, attach_target, **args): -- self.do_prepare_drives(format['type'], format['args'], attach_target) -+ def do_test_compress_complete(self, cmd, attach_target, **args): -+ self.do_prepare_drives(attach_target) - - self.assert_no_active_block_jobs() - -@@ -486,21 +486,21 @@ class TestDriveCompression(iotests.QMPTestCase): - - self.vm.shutdown() - self.assertTrue(iotests.compare_images(test_img, blockdev_target_img, -- iotests.imgfmt, format['type']), -+ iotests.imgfmt, -+ self.target_fmt['type']), - 'target image does not match source after backup') - - def test_complete_compress_drive_backup(self): -- for format in TestDriveCompression.fmt_supports_compression: -- self.do_test_compress_complete('drive-backup', format, False, -- target=blockdev_target_img, mode='existing') -+ self.do_test_compress_complete('drive-backup', False, -+ target=blockdev_target_img, -+ mode='existing') - - def test_complete_compress_blockdev_backup(self): -- for format in TestDriveCompression.fmt_supports_compression: -- self.do_test_compress_complete('blockdev-backup', format, True, -- target='drive1') -+ self.do_test_compress_complete('blockdev-backup', -+ True, target='drive1') - -- def do_test_compress_cancel(self, cmd, format, attach_target, **args): -- self.do_prepare_drives(format['type'], format['args'], attach_target) -+ def do_test_compress_cancel(self, cmd, attach_target, **args): -+ self.do_prepare_drives(attach_target) - - self.assert_no_active_block_jobs() - -@@ -514,17 +514,16 @@ class TestDriveCompression(iotests.QMPTestCase): - self.vm.shutdown() - - def test_compress_cancel_drive_backup(self): -- for format in TestDriveCompression.fmt_supports_compression: -- self.do_test_compress_cancel('drive-backup', format, False, -- target=blockdev_target_img, mode='existing') -+ self.do_test_compress_cancel('drive-backup', False, -+ target=blockdev_target_img, -+ mode='existing') - - def test_compress_cancel_blockdev_backup(self): -- for format in TestDriveCompression.fmt_supports_compression: -- self.do_test_compress_cancel('blockdev-backup', format, True, -- target='drive1') -+ self.do_test_compress_cancel('blockdev-backup', True, -+ target='drive1') - -- def do_test_compress_pause(self, cmd, format, attach_target, **args): -- self.do_prepare_drives(format['type'], format['args'], attach_target) -+ def do_test_compress_pause(self, cmd, attach_target, **args): -+ self.do_prepare_drives(attach_target) - - self.assert_no_active_block_jobs() - -@@ -550,18 +549,23 @@ class TestDriveCompression(iotests.QMPTestCase): - - self.vm.shutdown() - self.assertTrue(iotests.compare_images(test_img, blockdev_target_img, -- iotests.imgfmt, format['type']), -+ iotests.imgfmt, -+ self.target_fmt['type']), - 'target image does not match source after backup') - - def test_compress_pause_drive_backup(self): -- for format in TestDriveCompression.fmt_supports_compression: -- self.do_test_compress_pause('drive-backup', format, False, -- target=blockdev_target_img, mode='existing') -+ self.do_test_compress_pause('drive-backup', False, -+ target=blockdev_target_img, -+ mode='existing') - - def test_compress_pause_blockdev_backup(self): -- for format in TestDriveCompression.fmt_supports_compression: -- self.do_test_compress_pause('blockdev-backup', format, True, -- target='drive1') -+ self.do_test_compress_pause('blockdev-backup', True, -+ target='drive1') -+ -+ -+class TestCompressedToVmdk(TestCompressedToQcow2): -+ target_fmt = {'type': 'vmdk', 'args': ('-o', 'subformat=streamOptimized')} -+ - - if __name__ == '__main__': - iotests.main(supported_fmts=['raw', 'qcow2'], -diff --git a/tests/qemu-iotests/055.out b/tests/qemu-iotests/055.out -index 5ce2f9a..5c26d15 100644 ---- a/tests/qemu-iotests/055.out -+++ b/tests/qemu-iotests/055.out -@@ -1,5 +1,5 @@ --.............................. -+.................................... - ---------------------------------------------------------------------- --Ran 30 tests -+Ran 36 tests - - OK --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-055-skip-vmdk-target-tests-if-vmdk-is-not-wh.patch b/SOURCES/kvm-iotests-055-skip-vmdk-target-tests-if-vmdk-is-not-wh.patch deleted file mode 100644 index 260d511..0000000 --- a/SOURCES/kvm-iotests-055-skip-vmdk-target-tests-if-vmdk-is-not-wh.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 9a0ca4797cbd029dab9209d88f8c81b78ded8fd0 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 3 Jun 2020 16:03:18 +0100 -Subject: [PATCH 19/26] iotests/055: skip vmdk target tests if vmdk is not - whitelisted - -RH-Author: Kevin Wolf -Message-id: <20200603160325.67506-5-kwolf@redhat.com> -Patchwork-id: 97101 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 04/11] iotests/055: skip vmdk target tests if vmdk is not whitelisted -Bugzilla: 1778593 -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz -RH-Acked-by: Stefano Garzarella - -From: Vladimir Sementsov-Ogievskiy - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20200430124713.3067-7-vsementsov@virtuozzo.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 761cd2e791eae38c3d08ea5f83309ce58bb85ff7) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/055 | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055 -index 8666601..c9cdc06 100755 ---- a/tests/qemu-iotests/055 -+++ b/tests/qemu-iotests/055 -@@ -566,6 +566,10 @@ class TestCompressedToQcow2(iotests.QMPTestCase): - class TestCompressedToVmdk(TestCompressedToQcow2): - target_fmt = {'type': 'vmdk', 'args': ('-o', 'subformat=streamOptimized')} - -+ @iotests.skip_if_unsupported(['vmdk']) -+ def setUp(self): -+ pass -+ - - if __name__ == '__main__': - iotests.main(supported_fmts=['raw', 'qcow2'], --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-109-Don-t-mirror-with-mismatched-size.patch b/SOURCES/kvm-iotests-109-Don-t-mirror-with-mismatched-size.patch deleted file mode 100644 index c71bcba..0000000 --- a/SOURCES/kvm-iotests-109-Don-t-mirror-with-mismatched-size.patch +++ /dev/null @@ -1,387 +0,0 @@ -From 2202321b549dda551190d919a5a1cbee0fab8c90 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 3 Jun 2020 16:03:22 +0100 -Subject: [PATCH 23/26] iotests/109: Don't mirror with mismatched size - -RH-Author: Kevin Wolf -Message-id: <20200603160325.67506-9-kwolf@redhat.com> -Patchwork-id: 97105 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 08/11] iotests/109: Don't mirror with mismatched size -Bugzilla: 1778593 -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz -RH-Acked-by: Stefano Garzarella - -This patch makes the raw image the same size as the file in a different -format that is mirrored as raw to it to avoid errors when mirror starts -to enforce that source and target are the same size. - -We check only that the first 512 bytes are zeroed (instead of 64k) -because some image formats create image files that are smaller than 64k, -so trying to read 64k would result in I/O errors. Apart from this, 512 -is more appropriate anyway because the raw format driver protects -specifically the first 512 bytes. - -Signed-off-by: Kevin Wolf -Message-Id: <20200511135825.219437-2-kwolf@redhat.com> -Reviewed-by: Max Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit ffa41a62d0b0e6d91f2071328befa046d56993e1) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/109 | 10 +++--- - tests/qemu-iotests/109.out | 74 +++++++++++++++++----------------------- - tests/qemu-iotests/common.filter | 5 +++ - 3 files changed, 41 insertions(+), 48 deletions(-) - -diff --git a/tests/qemu-iotests/109 b/tests/qemu-iotests/109 -index 9897ceb..190c35e 100755 ---- a/tests/qemu-iotests/109 -+++ b/tests/qemu-iotests/109 -@@ -76,14 +76,14 @@ for fmt in qcow qcow2 qed vdi vmdk vpc; do - echo "=== Writing a $fmt header into raw ===" - echo - -- _make_test_img 64M - TEST_IMG="$TEST_IMG.src" IMGFMT=$fmt _make_test_img 64M -+ _make_test_img $(du -b "$TEST_IMG.src" | cut -f1) | _filter_img_create_size - - # This first test should fail: The image format was probed, we may not - # write an image header at the start of the image - run_qemu "$TEST_IMG" "$TEST_IMG.src" "" "BLOCK_JOB_ERROR" | - _filter_block_job_len -- $QEMU_IO -c 'read -P 0 0 64k' "$TEST_IMG" | _filter_qemu_io -+ $QEMU_IO -c 'read -P 0 0 512' "$TEST_IMG" | _filter_qemu_io - - - # When raw was explicitly specified, the same must succeed -@@ -102,12 +102,12 @@ for sample_img in empty.bochs iotest-dirtylog-10G-4M.vhdx parallels-v1 \ - - # Can't use _use_sample_img because that isn't designed to be used multiple - # times and it overwrites $TEST_IMG (both breaks cleanup) -- _make_test_img 64M - bzcat "$SAMPLE_IMG_DIR/$sample_img.bz2" > "$TEST_IMG.src" -+ _make_test_img $(du -b "$TEST_IMG.src" | cut -f1) | _filter_img_create_size - - run_qemu "$TEST_IMG" "$TEST_IMG.src" "" "BLOCK_JOB_ERROR" | - _filter_block_job_offset | _filter_block_job_len -- $QEMU_IO -c 'read -P 0 0 64k' "$TEST_IMG" | _filter_qemu_io -+ $QEMU_IO -c 'read -P 0 0 512' "$TEST_IMG" | _filter_qemu_io - - run_qemu "$TEST_IMG" "$TEST_IMG.src" "'format': 'raw'," "BLOCK_JOB_READY" - $QEMU_IMG compare -f raw -F raw "$TEST_IMG" "$TEST_IMG.src" -@@ -118,8 +118,8 @@ echo "=== Write legitimate MBR into raw ===" - echo - - for sample_img in grub_mbr.raw; do -- _make_test_img 64M - bzcat "$SAMPLE_IMG_DIR/$sample_img.bz2" > "$TEST_IMG.src" -+ _make_test_img $(du -b "$TEST_IMG.src" | cut -f1) | _filter_img_create_size - - run_qemu "$TEST_IMG" "$TEST_IMG.src" "" "BLOCK_JOB_READY" - $QEMU_IMG compare -f raw -F raw "$TEST_IMG" "$TEST_IMG.src" -diff --git a/tests/qemu-iotests/109.out b/tests/qemu-iotests/109.out -index 884f65f..ad739df 100644 ---- a/tests/qemu-iotests/109.out -+++ b/tests/qemu-iotests/109.out -@@ -2,8 +2,8 @@ QA output created by 109 - - === Writing a qcow header into raw === - --Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 - Formatting 'TEST_DIR/t.raw.src', fmt=IMGFMT size=67108864 -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'mode': 'existing', 'sync': 'full'}} -@@ -23,8 +23,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"quit"} - {"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} --read 65536/65536 bytes at offset 0 --64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 512/512 bytes at offset 0 -+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'format': 'IMGFMT', 'mode': 'existing', 'sync': 'full'}} -@@ -43,13 +43,12 @@ read 65536/65536 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} --Warning: Image size mismatch! - Images are identical. - - === Writing a qcow2 header into raw === - --Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 - Formatting 'TEST_DIR/t.raw.src', fmt=IMGFMT size=67108864 -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'mode': 'existing', 'sync': 'full'}} -@@ -69,8 +68,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"quit"} - {"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} --read 65536/65536 bytes at offset 0 --64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 512/512 bytes at offset 0 -+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'format': 'IMGFMT', 'mode': 'existing', 'sync': 'full'}} -@@ -89,13 +88,12 @@ read 65536/65536 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 197120, "offset": 197120, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} --Warning: Image size mismatch! - Images are identical. - - === Writing a qed header into raw === - --Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 - Formatting 'TEST_DIR/t.raw.src', fmt=IMGFMT size=67108864 -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'mode': 'existing', 'sync': 'full'}} -@@ -115,8 +113,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"quit"} - {"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} --read 65536/65536 bytes at offset 0 --64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 512/512 bytes at offset 0 -+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'format': 'IMGFMT', 'mode': 'existing', 'sync': 'full'}} -@@ -135,13 +133,12 @@ read 65536/65536 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} --Warning: Image size mismatch! - Images are identical. - - === Writing a vdi header into raw === - --Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 - Formatting 'TEST_DIR/t.raw.src', fmt=IMGFMT size=67108864 -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'mode': 'existing', 'sync': 'full'}} -@@ -161,8 +158,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"quit"} - {"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} --read 65536/65536 bytes at offset 0 --64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 512/512 bytes at offset 0 -+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'format': 'IMGFMT', 'mode': 'existing', 'sync': 'full'}} -@@ -181,13 +178,12 @@ read 65536/65536 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} --Warning: Image size mismatch! - Images are identical. - - === Writing a vmdk header into raw === - --Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 - Formatting 'TEST_DIR/t.raw.src', fmt=IMGFMT size=67108864 -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'mode': 'existing', 'sync': 'full'}} -@@ -207,8 +203,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"quit"} - {"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} --read 65536/65536 bytes at offset 0 --64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 512/512 bytes at offset 0 -+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'format': 'IMGFMT', 'mode': 'existing', 'sync': 'full'}} -@@ -227,13 +223,12 @@ read 65536/65536 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 65536, "offset": 65536, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} --Warning: Image size mismatch! - Images are identical. - - === Writing a vpc header into raw === - --Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 - Formatting 'TEST_DIR/t.raw.src', fmt=IMGFMT size=67108864 -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'mode': 'existing', 'sync': 'full'}} -@@ -253,8 +248,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"quit"} - {"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} --read 65536/65536 bytes at offset 0 --64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 512/512 bytes at offset 0 -+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'format': 'IMGFMT', 'mode': 'existing', 'sync': 'full'}} -@@ -273,12 +268,11 @@ read 65536/65536 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} --Warning: Image size mismatch! - Images are identical. - - === Copying sample image empty.bochs into raw === - --Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'mode': 'existing', 'sync': 'full'}} -@@ -298,8 +292,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"quit"} - {"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} --read 65536/65536 bytes at offset 0 --64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 512/512 bytes at offset 0 -+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'format': 'IMGFMT', 'mode': 'existing', 'sync': 'full'}} -@@ -318,12 +312,11 @@ read 65536/65536 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} --Warning: Image size mismatch! - Images are identical. - - === Copying sample image iotest-dirtylog-10G-4M.vhdx into raw === - --Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'mode': 'existing', 'sync': 'full'}} -@@ -343,8 +336,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"quit"} - {"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} --read 65536/65536 bytes at offset 0 --64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 512/512 bytes at offset 0 -+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'format': 'IMGFMT', 'mode': 'existing', 'sync': 'full'}} -@@ -363,12 +356,11 @@ read 65536/65536 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 31457280, "offset": 31457280, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} --Warning: Image size mismatch! - Images are identical. - - === Copying sample image parallels-v1 into raw === - --Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'mode': 'existing', 'sync': 'full'}} -@@ -388,8 +380,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"quit"} - {"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} --read 65536/65536 bytes at offset 0 --64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 512/512 bytes at offset 0 -+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'format': 'IMGFMT', 'mode': 'existing', 'sync': 'full'}} -@@ -408,12 +400,11 @@ read 65536/65536 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} --Warning: Image size mismatch! - Images are identical. - - === Copying sample image simple-pattern.cloop into raw === - --Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'mode': 'existing', 'sync': 'full'}} -@@ -433,8 +424,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"execute":"quit"} - {"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} --read 65536/65536 bytes at offset 0 --64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 512/512 bytes at offset 0 -+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'format': 'IMGFMT', 'mode': 'existing', 'sync': 'full'}} -@@ -453,12 +444,11 @@ read 65536/65536 bytes at offset 0 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 2048, "offset": 2048, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} --Warning: Image size mismatch! - Images are identical. - - === Write legitimate MBR into raw === - --Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE - { 'execute': 'qmp_capabilities' } - {"return": {}} - {'execute':'drive-mirror', 'arguments':{ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', 'mode': 'existing', 'sync': 'full'}} -@@ -480,7 +470,6 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} --Warning: Image size mismatch! - Images are identical. - { 'execute': 'qmp_capabilities' } - {"return": {}} -@@ -500,6 +489,5 @@ Images are identical. - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} --Warning: Image size mismatch! - Images are identical. - *** done -diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter -index 5367dee..c8e8663 100644 ---- a/tests/qemu-iotests/common.filter -+++ b/tests/qemu-iotests/common.filter -@@ -149,6 +149,11 @@ _filter_img_create() - -e "s# force_size=\\(on\\|off\\)##g" - } - -+_filter_img_create_size() -+{ -+ $SED -e "s# size=[0-9]\\+# size=SIZE#g" -+} -+ - _filter_img_info() - { - if [[ "$1" == "--format-specific" ]]; then --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-229-Use-blkdebug-to-inject-an-error.patch b/SOURCES/kvm-iotests-229-Use-blkdebug-to-inject-an-error.patch deleted file mode 100644 index ef8807c..0000000 --- a/SOURCES/kvm-iotests-229-Use-blkdebug-to-inject-an-error.patch +++ /dev/null @@ -1,120 +0,0 @@ -From 104c8f6210bf573cf39c2a14cdb0b081baaaa3f0 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 3 Jun 2020 16:03:23 +0100 -Subject: [PATCH 24/26] iotests/229: Use blkdebug to inject an error - -RH-Author: Kevin Wolf -Message-id: <20200603160325.67506-10-kwolf@redhat.com> -Patchwork-id: 97108 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 09/11] iotests/229: Use blkdebug to inject an error -Bugzilla: 1778593 -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz -RH-Acked-by: Stefano Garzarella - -229 relies on the mirror running into an I/O error when the target is -smaller than the source. After changing mirror to catch this condition -while starting the job, this test case won't get a job that is paused -for an I/O error any more. Use blkdebug instead to inject an error. - -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -Message-Id: <20200511135825.219437-3-kwolf@redhat.com> -Reviewed-by: Max Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit d89ac3cf305b28c024a76805a84d75c0ee1e786f) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/229 | 18 +++++++++++++----- - tests/qemu-iotests/229.out | 6 +++--- - 2 files changed, 16 insertions(+), 8 deletions(-) - -diff --git a/tests/qemu-iotests/229 b/tests/qemu-iotests/229 -index e18a464..511fbc0 100755 ---- a/tests/qemu-iotests/229 -+++ b/tests/qemu-iotests/229 -@@ -32,6 +32,7 @@ _cleanup() - _cleanup_qemu - _cleanup_test_img - rm -f "$TEST_IMG" "$DEST_IMG" -+ rm -f "$TEST_DIR/blkdebug.conf" - } - trap "_cleanup; exit \$status" 0 1 2 3 15 - -@@ -48,11 +49,10 @@ _supported_os Linux - - DEST_IMG="$TEST_DIR/d.$IMGFMT" - TEST_IMG="$TEST_DIR/b.$IMGFMT" -+BLKDEBUG_CONF="$TEST_DIR/blkdebug.conf" - - _make_test_img 2M -- --# destination for mirror will be too small, causing error --TEST_IMG=$DEST_IMG _make_test_img 1M -+TEST_IMG=$DEST_IMG _make_test_img 2M - - $QEMU_IO -c 'write 0 2M' "$TEST_IMG" | _filter_qemu_io - -@@ -66,11 +66,18 @@ echo - echo '=== Starting drive-mirror, causing error & stop ===' - echo - -+cat > "$BLKDEBUG_CONF" < +Date: Fri, 4 Feb 2022 12:10:12 +0100 +Subject: [PATCH 6/6] iotests/281: Let NBD connection yield in iothread + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [6/6] a23706f34022d301eb7ffc84fc0d0a77d72b9844 +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +Put an NBD block device into an I/O thread, and then read data from it, +hoping that the NBD connection will yield during that read. When it +does, the coroutine must be reentered in the block device's I/O thread, +which will only happen if the NBD block driver attaches the connection's +QIOChannel to the new AioContext. It did not do that after 4ddb5d2fde +("block/nbd: drop connection_co") and prior to "block/nbd: Move s->ioc +on AioContext change", which would cause an assertion failure. + +To improve our chances of yielding, the NBD server is throttled to +reading 64 kB/s, and the NBD client reads 128 kB, so it should yield at +some point. + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 8cfbe929e8c26050f0a4580a1606a370a947d4ce) +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/281 | 28 +++++++++++++++++++++++++--- + tests/qemu-iotests/281.out | 4 ++-- + 2 files changed, 27 insertions(+), 5 deletions(-) + +diff --git a/tests/qemu-iotests/281 b/tests/qemu-iotests/281 +index 13c588be75..b2ead7f388 100755 +--- a/tests/qemu-iotests/281 ++++ b/tests/qemu-iotests/281 +@@ -253,8 +253,9 @@ class TestYieldingAndTimers(iotests.QMPTestCase): + self.create_nbd_export() + + # Simple VM with an NBD block device connected to the NBD export +- # provided by the QSD ++ # provided by the QSD, and an (initially unused) iothread + self.vm = iotests.VM() ++ self.vm.add_object('iothread,id=iothr') + self.vm.add_blockdev('nbd,node-name=nbd,server.type=unix,' + + f'server.path={self.sock},export=exp,' + + 'reconnect-delay=1') +@@ -293,19 +294,40 @@ class TestYieldingAndTimers(iotests.QMPTestCase): + # thus not see the error, and so the test will pass.) + time.sleep(2) + ++ def test_yield_in_iothread(self): ++ # Move the NBD node to the I/O thread; the NBD block driver should ++ # attach the connection's QIOChannel to that thread's AioContext, too ++ result = self.vm.qmp('x-blockdev-set-iothread', ++ node_name='nbd', iothread='iothr') ++ self.assert_qmp(result, 'return', {}) ++ ++ # Do some I/O that will be throttled by the QSD, so that the network ++ # connection hopefully will yield here. When it is resumed, it must ++ # then be resumed in the I/O thread's AioContext. ++ result = self.vm.qmp('human-monitor-command', ++ command_line='qemu-io nbd "read 0 128K"') ++ self.assert_qmp(result, 'return', '') ++ + def create_nbd_export(self): + assert self.qsd is None + +- # Simple NBD export of a null-co BDS ++ # Export a throttled null-co BDS: Reads are throttled (max 64 kB/s), ++ # writes are not. + self.qsd = QemuStorageDaemon( ++ '--object', ++ 'throttle-group,id=thrgr,x-bps-read=65536,x-bps-read-max=65536', ++ + '--blockdev', + 'null-co,node-name=null,read-zeroes=true', + ++ '--blockdev', ++ 'throttle,node-name=thr,file=null,throttle-group=thrgr', ++ + '--nbd-server', + f'addr.type=unix,addr.path={self.sock}', + + '--export', +- 'nbd,id=exp,node-name=null,name=exp,writable=true' ++ 'nbd,id=exp,node-name=thr,name=exp,writable=true' + ) + + def stop_nbd_export(self): +diff --git a/tests/qemu-iotests/281.out b/tests/qemu-iotests/281.out +index 914e3737bd..3f8a935a08 100644 +--- a/tests/qemu-iotests/281.out ++++ b/tests/qemu-iotests/281.out +@@ -1,5 +1,5 @@ +-..... ++...... + ---------------------------------------------------------------------- +-Ran 5 tests ++Ran 6 tests + + OK +-- +2.27.0 + diff --git a/SOURCES/kvm-iotests-281-Test-lingering-timers.patch b/SOURCES/kvm-iotests-281-Test-lingering-timers.patch new file mode 100644 index 0000000..c31b413 --- /dev/null +++ b/SOURCES/kvm-iotests-281-Test-lingering-timers.patch @@ -0,0 +1,174 @@ +From b56684f6c1bef4fb5bf87ac5a1106d3830c05ad0 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:10 +0100 +Subject: [PATCH 4/6] iotests/281: Test lingering timers + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [4/6] aaad466941637a34224dc037bbea37d128b5676b +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +Prior to "block/nbd: Delete reconnect delay timer when done" and +"block/nbd: Delete open timer when done", both of those timers would +remain scheduled even after successfully (re-)connecting to the server, +and they would not even be deleted when the BDS is deleted. + +This test constructs exactly this situation: +(1) Configure an @open-timeout, so the open timer is armed, and +(2) Configure a @reconnect-delay and trigger a reconnect situation + (which succeeds immediately), so the reconnect delay timer is armed. +Then we immediately delete the BDS, and sleep for longer than the +@open-timeout and @reconnect-delay. Prior to said patches, this caused +one (or both) of the timer CBs to access already-freed data. + +Accessing freed data may or may not crash, so this test can produce +false successes, but I do not know how to show the problem in a better +or more reliable way. If you run this test on "block/nbd: Assert there +are no timers when closed" and without the fix patches mentioned above, +you should reliably see an assertion failure. +(But all other tests that use the reconnect delay timer (264 and 277) +will fail in that configuration, too; as will nbd-reconnect-on-open, +which uses the open timer.) + +Remove this test from the quick group because of the two second sleep +this patch introduces. + +(I decided to put this test case into 281, because the main bug this +series addresses is in the interaction of the NBD block driver and I/O +threads, which is precisely the scope of 281. The test case for that +other bug will also be put into the test class added here. + +Also, excuse the test class's name, I couldn't come up with anything +better. The "yield" part will make sense two patches from now.) + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit eaf1e85d4ddefdbd197f393fa9c5acc7ba8133b0) + +Conflict: +- @open-timeout was introduced after the 6.2 release, and has not been + backported. Consequently, there is no open_timer, and we can (and + must) drop the respective parts of the test here. + +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/281 | 73 ++++++++++++++++++++++++++++++++++++-- + tests/qemu-iotests/281.out | 4 +-- + 2 files changed, 73 insertions(+), 4 deletions(-) + +diff --git a/tests/qemu-iotests/281 b/tests/qemu-iotests/281 +index 956698083f..13c588be75 100755 +--- a/tests/qemu-iotests/281 ++++ b/tests/qemu-iotests/281 +@@ -1,5 +1,5 @@ + #!/usr/bin/env python3 +-# group: rw quick ++# group: rw + # + # Test cases for blockdev + IOThread interactions + # +@@ -20,8 +20,9 @@ + # + + import os ++import time + import iotests +-from iotests import qemu_img ++from iotests import qemu_img, QemuStorageDaemon + + image_len = 64 * 1024 * 1024 + +@@ -243,6 +244,74 @@ class TestBlockdevBackupAbort(iotests.QMPTestCase): + # Hangs on failure, we expect this error. + self.assert_qmp(result, 'error/class', 'GenericError') + ++# Test for RHBZ#2033626 ++class TestYieldingAndTimers(iotests.QMPTestCase): ++ sock = os.path.join(iotests.sock_dir, 'nbd.sock') ++ qsd = None ++ ++ def setUp(self): ++ self.create_nbd_export() ++ ++ # Simple VM with an NBD block device connected to the NBD export ++ # provided by the QSD ++ self.vm = iotests.VM() ++ self.vm.add_blockdev('nbd,node-name=nbd,server.type=unix,' + ++ f'server.path={self.sock},export=exp,' + ++ 'reconnect-delay=1') ++ ++ self.vm.launch() ++ ++ def tearDown(self): ++ self.stop_nbd_export() ++ self.vm.shutdown() ++ ++ def test_timers_with_blockdev_del(self): ++ # Stop and restart the NBD server, and do some I/O on the client to ++ # trigger a reconnect and start the reconnect delay timer ++ self.stop_nbd_export() ++ self.create_nbd_export() ++ ++ result = self.vm.qmp('human-monitor-command', ++ command_line='qemu-io nbd "write 0 512"') ++ self.assert_qmp(result, 'return', '') ++ ++ # Reconnect is done, so the reconnect delay timer should be gone. ++ # (But there used to be a bug where it remained active, for which this ++ # is a regression test.) ++ ++ # Delete the BDS to see whether the timer is gone. If it is not, ++ # it will remain active, fire later, and then access freed data. ++ # (Or, with "block/nbd: Assert there are no timers when closed" ++ # applied, the assertion added in that patch will fail.) ++ result = self.vm.qmp('blockdev-del', node_name='nbd') ++ self.assert_qmp(result, 'return', {}) ++ ++ # Give the timer some time to fire (it has a timeout of 1 s). ++ # (Sleeping in an iotest may ring some alarm bells, but note that if ++ # the timing is off here, the test will just always pass. If we kill ++ # the VM too early, then we just kill the timer before it can fire, ++ # thus not see the error, and so the test will pass.) ++ time.sleep(2) ++ ++ def create_nbd_export(self): ++ assert self.qsd is None ++ ++ # Simple NBD export of a null-co BDS ++ self.qsd = QemuStorageDaemon( ++ '--blockdev', ++ 'null-co,node-name=null,read-zeroes=true', ++ ++ '--nbd-server', ++ f'addr.type=unix,addr.path={self.sock}', ++ ++ '--export', ++ 'nbd,id=exp,node-name=null,name=exp,writable=true' ++ ) ++ ++ def stop_nbd_export(self): ++ self.qsd.stop() ++ self.qsd = None ++ + if __name__ == '__main__': + iotests.main(supported_fmts=['qcow2'], + supported_protocols=['file']) +diff --git a/tests/qemu-iotests/281.out b/tests/qemu-iotests/281.out +index 89968f35d7..914e3737bd 100644 +--- a/tests/qemu-iotests/281.out ++++ b/tests/qemu-iotests/281.out +@@ -1,5 +1,5 @@ +-.... ++..... + ---------------------------------------------------------------------- +-Ran 4 tests ++Ran 5 tests + + OK +-- +2.27.0 + diff --git a/SOURCES/kvm-iotests-Add-iothread-cases-to-155.patch b/SOURCES/kvm-iotests-Add-iothread-cases-to-155.patch deleted file mode 100644 index 24ac90c..0000000 --- a/SOURCES/kvm-iotests-Add-iothread-cases-to-155.patch +++ /dev/null @@ -1,147 +0,0 @@ -From 2366cd9066e79d6c93a3a28710aea987b2c8f454 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:38 +0000 -Subject: [PATCH 18/20] iotests: Add iothread cases to 155 - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-13-kwolf@redhat.com> -Patchwork-id: 94289 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 12/13] iotests: Add iothread cases to 155 -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -This patch adds test cases for attaching the backing chain to a mirror -job target right before finalising the job, where the image is in a -non-mainloop AioContext (i.e. the backing chain needs to be moved to the -AioContext of the mirror target). - -This requires switching the test case from virtio-blk to virtio-scsi -because virtio-blk only actually starts using the iothreads when the -guest driver initialises the device (which never happens in a test case -without a guest OS). virtio-scsi always keeps its block nodes in the -AioContext of the the requested iothread without guest interaction. - -Signed-off-by: Kevin Wolf -Message-Id: <20200310113831.27293-7-kwolf@redhat.com> -Reviewed-by: Peter Krempa -Signed-off-by: Kevin Wolf -(cherry picked from commit 6a5f6403a11307794ec79d277a065c137cfc12b2) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/155 | 32 +++++++++++++++++++++++--------- - tests/qemu-iotests/155.out | 4 ++-- - 2 files changed, 25 insertions(+), 11 deletions(-) - -diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155 -index 3053e50..b552d1f 100755 ---- a/tests/qemu-iotests/155 -+++ b/tests/qemu-iotests/155 -@@ -49,11 +49,14 @@ target_img = os.path.join(iotests.test_dir, 'target.' + iotests.imgfmt) - # chain opened right away. If False, blockdev-add - # opens it without a backing file and job completion - # is supposed to open the backing chain. -+# use_iothread: If True, an iothread is configured for the virtio-blk device -+# that uses the image being mirrored - - class BaseClass(iotests.QMPTestCase): - target_blockdev_backing = None - target_real_backing = None - target_open_with_backing = True -+ use_iothread = False - - def setUp(self): - qemu_img('create', '-f', iotests.imgfmt, back0_img, '1440K') -@@ -69,7 +72,16 @@ class BaseClass(iotests.QMPTestCase): - 'file': {'driver': 'file', - 'filename': source_img}} - self.vm.add_blockdev(self.vm.qmp_to_opts(blockdev)) -- self.vm.add_device('virtio-blk,id=qdev0,drive=source') -+ -+ if self.use_iothread: -+ self.vm.add_object('iothread,id=iothread0') -+ iothread = ",iothread=iothread0" -+ else: -+ iothread = "" -+ -+ self.vm.add_device('virtio-scsi%s' % iothread) -+ self.vm.add_device('scsi-hd,id=qdev0,drive=source') -+ - self.vm.launch() - - self.assertIntactSourceBackingChain() -@@ -182,24 +194,21 @@ class MirrorBaseClass(BaseClass): - def testFull(self): - self.runMirror('full') - -- node = self.findBlockNode('target', -- '/machine/peripheral/qdev0/virtio-backend') -+ node = self.findBlockNode('target', 'qdev0') - self.assertCorrectBackingImage(node, None) - self.assertIntactSourceBackingChain() - - def testTop(self): - self.runMirror('top') - -- node = self.findBlockNode('target', -- '/machine/peripheral/qdev0/virtio-backend') -+ node = self.findBlockNode('target', 'qdev0') - self.assertCorrectBackingImage(node, back2_img) - self.assertIntactSourceBackingChain() - - def testNone(self): - self.runMirror('none') - -- node = self.findBlockNode('target', -- '/machine/peripheral/qdev0/virtio-backend') -+ node = self.findBlockNode('target', 'qdev0') - self.assertCorrectBackingImage(node, source_img) - self.assertIntactSourceBackingChain() - -@@ -252,6 +261,9 @@ class TestBlockdevMirrorReopen(MirrorBaseClass): - backing="backing") - self.assert_qmp(result, 'return', {}) - -+class TestBlockdevMirrorReopenIothread(TestBlockdevMirrorReopen): -+ use_iothread = True -+ - # Attach the backing chain only during completion, with blockdev-snapshot - class TestBlockdevMirrorSnapshot(MirrorBaseClass): - cmd = 'blockdev-mirror' -@@ -268,6 +280,9 @@ class TestBlockdevMirrorSnapshot(MirrorBaseClass): - overlay="target") - self.assert_qmp(result, 'return', {}) - -+class TestBlockdevMirrorSnapshotIothread(TestBlockdevMirrorSnapshot): -+ use_iothread = True -+ - class TestCommit(BaseClass): - existing = False - -@@ -283,8 +298,7 @@ class TestCommit(BaseClass): - - self.vm.event_wait('BLOCK_JOB_COMPLETED') - -- node = self.findBlockNode(None, -- '/machine/peripheral/qdev0/virtio-backend') -+ node = self.findBlockNode(None, 'qdev0') - self.assert_qmp(node, 'image' + '/backing-image' * 0 + '/filename', - back1_img) - self.assert_qmp(node, 'image' + '/backing-image' * 1 + '/filename', -diff --git a/tests/qemu-iotests/155.out b/tests/qemu-iotests/155.out -index 4fd1c2d..ed714d5 100644 ---- a/tests/qemu-iotests/155.out -+++ b/tests/qemu-iotests/155.out -@@ -1,5 +1,5 @@ --......................... -+............................... - ---------------------------------------------------------------------- --Ran 25 tests -+Ran 31 tests - - OK --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-Add-more-skip_if_unsupported-statements-to-t.patch b/SOURCES/kvm-iotests-Add-more-skip_if_unsupported-statements-to-t.patch deleted file mode 100644 index 6bdf130..0000000 --- a/SOURCES/kvm-iotests-Add-more-skip_if_unsupported-statements-to-t.patch +++ /dev/null @@ -1,236 +0,0 @@ -From adda561394bb07c13ef3f2712b36704790530891 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 3 Jun 2020 16:03:15 +0100 -Subject: [PATCH 16/26] iotests: Add more "skip_if_unsupported" statements to - the python tests - -RH-Author: Kevin Wolf -Message-id: <20200603160325.67506-2-kwolf@redhat.com> -Patchwork-id: 97099 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 01/11] iotests: Add more "skip_if_unsupported" statements to the python tests -Bugzilla: 1778593 -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz -RH-Acked-by: Stefano Garzarella - -From: Thomas Huth - -The python code already contains a possibility to skip tests if the -corresponding driver is not available in the qemu binary - use it -in more spots to avoid that the tests are failing if the driver has -been disabled. - -While we're at it, we can now also remove some of the old checks that -were using iotests.supports_quorum() - and which were apparently not -working as expected since the tests aborted instead of being skipped -when "quorum" was missing in the QEMU binary. - -Signed-off-by: Thomas Huth -Signed-off-by: Kevin Wolf -(cherry picked from commit 9442bebe6e67a5d038bbf2572b79e7b59d202a23) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/030 | 4 +--- - tests/qemu-iotests/040 | 2 ++ - tests/qemu-iotests/041 | 39 +++------------------------------------ - tests/qemu-iotests/245 | 2 ++ - 4 files changed, 8 insertions(+), 39 deletions(-) - -diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 -index f3766f2..bddbb30 100755 ---- a/tests/qemu-iotests/030 -+++ b/tests/qemu-iotests/030 -@@ -530,6 +530,7 @@ class TestQuorum(iotests.QMPTestCase): - children = [] - backing = [] - -+ @iotests.skip_if_unsupported(['quorum']) - def setUp(self): - opts = ['driver=quorum', 'vote-threshold=2'] - -@@ -560,9 +561,6 @@ class TestQuorum(iotests.QMPTestCase): - os.remove(img) - - def test_stream_quorum(self): -- if not iotests.supports_quorum(): -- return -- - self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', self.children[0]), - qemu_io('-f', iotests.imgfmt, '-rU', '-c', 'map', self.backing[0]), - 'image file map matches backing file before streaming') -diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040 -index 762ad1e..74f62c3 100755 ---- a/tests/qemu-iotests/040 -+++ b/tests/qemu-iotests/040 -@@ -106,6 +106,7 @@ class TestSingleDrive(ImageCommitTestCase): - self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xab 0 524288', backing_img).find("verification failed")) - self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed")) - -+ @iotests.skip_if_unsupported(['throttle']) - def test_commit_with_filter_and_quit(self): - result = self.vm.qmp('object-add', qom_type='throttle-group', id='tg') - self.assert_qmp(result, 'return', {}) -@@ -125,6 +126,7 @@ class TestSingleDrive(ImageCommitTestCase): - self.has_quit = True - - # Same as above, but this time we add the filter after starting the job -+ @iotests.skip_if_unsupported(['throttle']) - def test_commit_plus_filter_and_quit(self): - result = self.vm.qmp('object-add', qom_type='throttle-group', id='tg') - self.assert_qmp(result, 'return', {}) -diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 -index 8568426..a543b15 100755 ---- a/tests/qemu-iotests/041 -+++ b/tests/qemu-iotests/041 -@@ -871,6 +871,7 @@ class TestRepairQuorum(iotests.QMPTestCase): - image_len = 1 * 1024 * 1024 # MB - IMAGES = [ quorum_img1, quorum_img2, quorum_img3 ] - -+ @iotests.skip_if_unsupported(['quorum']) - def setUp(self): - self.vm = iotests.VM() - -@@ -891,9 +892,8 @@ class TestRepairQuorum(iotests.QMPTestCase): - #assemble the quorum block device from the individual files - args = { "driver": "quorum", "node-name": "quorum0", - "vote-threshold": 2, "children": [ "img0", "img1", "img2" ] } -- if iotests.supports_quorum(): -- result = self.vm.qmp("blockdev-add", **args) -- self.assert_qmp(result, 'return', {}) -+ result = self.vm.qmp("blockdev-add", **args) -+ self.assert_qmp(result, 'return', {}) - - - def tearDown(self): -@@ -906,9 +906,6 @@ class TestRepairQuorum(iotests.QMPTestCase): - pass - - def test_complete(self): -- if not iotests.supports_quorum(): -- return -- - self.assert_no_active_block_jobs() - - result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0', -@@ -925,9 +922,6 @@ class TestRepairQuorum(iotests.QMPTestCase): - 'target image does not match source after mirroring') - - def test_cancel(self): -- if not iotests.supports_quorum(): -- return -- - self.assert_no_active_block_jobs() - - result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0', -@@ -942,9 +936,6 @@ class TestRepairQuorum(iotests.QMPTestCase): - self.vm.shutdown() - - def test_cancel_after_ready(self): -- if not iotests.supports_quorum(): -- return -- - self.assert_no_active_block_jobs() - - result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0', -@@ -961,9 +952,6 @@ class TestRepairQuorum(iotests.QMPTestCase): - 'target image does not match source after mirroring') - - def test_pause(self): -- if not iotests.supports_quorum(): -- return -- - self.assert_no_active_block_jobs() - - result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0', -@@ -989,9 +977,6 @@ class TestRepairQuorum(iotests.QMPTestCase): - 'target image does not match source after mirroring') - - def test_medium_not_found(self): -- if not iotests.supports_quorum(): -- return -- - if iotests.qemu_default_machine != 'pc': - return - -@@ -1003,9 +988,6 @@ class TestRepairQuorum(iotests.QMPTestCase): - self.assert_qmp(result, 'error/class', 'GenericError') - - def test_image_not_found(self): -- if not iotests.supports_quorum(): -- return -- - result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0', - sync='full', node_name='repair0', replaces='img1', - mode='existing', target=quorum_repair_img, -@@ -1013,9 +995,6 @@ class TestRepairQuorum(iotests.QMPTestCase): - self.assert_qmp(result, 'error/class', 'GenericError') - - def test_device_not_found(self): -- if not iotests.supports_quorum(): -- return -- - result = self.vm.qmp('drive-mirror', job_id='job0', - device='nonexistent', sync='full', - node_name='repair0', -@@ -1024,9 +1003,6 @@ class TestRepairQuorum(iotests.QMPTestCase): - self.assert_qmp(result, 'error/class', 'GenericError') - - def test_wrong_sync_mode(self): -- if not iotests.supports_quorum(): -- return -- - result = self.vm.qmp('drive-mirror', device='quorum0', job_id='job0', - node_name='repair0', - replaces='img1', -@@ -1034,27 +1010,18 @@ class TestRepairQuorum(iotests.QMPTestCase): - self.assert_qmp(result, 'error/class', 'GenericError') - - def test_no_node_name(self): -- if not iotests.supports_quorum(): -- return -- - result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0', - sync='full', replaces='img1', - target=quorum_repair_img, format=iotests.imgfmt) - self.assert_qmp(result, 'error/class', 'GenericError') - - def test_nonexistent_replaces(self): -- if not iotests.supports_quorum(): -- return -- - result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0', - sync='full', node_name='repair0', replaces='img77', - target=quorum_repair_img, format=iotests.imgfmt) - self.assert_qmp(result, 'error/class', 'GenericError') - - def test_after_a_quorum_snapshot(self): -- if not iotests.supports_quorum(): -- return -- - result = self.vm.qmp('blockdev-snapshot-sync', node_name='img1', - snapshot_file=quorum_snapshot_file, - snapshot_node_name="snap1"); -diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 -index 919131d..ed972f9 100644 ---- a/tests/qemu-iotests/245 -+++ b/tests/qemu-iotests/245 -@@ -478,6 +478,7 @@ class TestBlockdevReopen(iotests.QMPTestCase): - # This test verifies that we can't change the children of a block - # device during a reopen operation in a way that would create - # cycles in the node graph -+ @iotests.skip_if_unsupported(['blkverify']) - def test_graph_cycles(self): - opts = [] - -@@ -534,6 +535,7 @@ class TestBlockdevReopen(iotests.QMPTestCase): - self.assert_qmp(result, 'return', {}) - - # Misc reopen tests with different block drivers -+ @iotests.skip_if_unsupported(['quorum', 'throttle']) - def test_misc_drivers(self): - #################### - ###### quorum ###### --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-Add-qemu_io_log.patch b/SOURCES/kvm-iotests-Add-qemu_io_log.patch deleted file mode 100644 index a65bc5a..0000000 --- a/SOURCES/kvm-iotests-Add-qemu_io_log.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 2be333e847c01397bb6a92b2e4c60e904957675d Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 8 Jun 2020 15:01:37 +0100 -Subject: [PATCH 09/17] iotests: Add qemu_io_log() - -RH-Author: Kevin Wolf -Message-id: <20200608150140.38218-9-kwolf@redhat.com> -Patchwork-id: 97451 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 08/11] iotests: Add qemu_io_log() -Bugzilla: 1780574 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz - -Add a function that runs qemu-io and logs the output with the -appropriate filters applied. - -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Alberto Garcia -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit a96f0350e3d95c98f2bff1863d14493af5c1d360) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/iotests.py | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index be20d56..7a9c779 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -162,6 +162,11 @@ def qemu_io(*args): - sys.stderr.write('qemu-io received signal %i: %s\n' % (-exitcode, ' '.join(args))) - return subp.communicate()[0] - -+def qemu_io_log(*args): -+ result = qemu_io(*args) -+ log(result, filters=[filter_testfiles, filter_qemu_io]) -+ return result -+ - def qemu_io_silent(*args): - '''Run qemu-io and return the exit code, suppressing stdout''' - args = qemu_io_args + list(args) --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-Add-test-291-to-for-qemu-img-bitmap-coverage.patch b/SOURCES/kvm-iotests-Add-test-291-to-for-qemu-img-bitmap-coverage.patch deleted file mode 100644 index 6144043..0000000 --- a/SOURCES/kvm-iotests-Add-test-291-to-for-qemu-img-bitmap-coverage.patch +++ /dev/null @@ -1,253 +0,0 @@ -From eccae2f252513d2965ef919022c3ed068da275bd Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Tue, 2 Jun 2020 02:34:20 +0100 -Subject: [PATCH 15/26] iotests: Add test 291 to for qemu-img bitmap coverage - -RH-Author: Eric Blake -Message-id: <20200602023420.2133649-13-eblake@redhat.com> -Patchwork-id: 97079 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 12/12] iotests: Add test 291 to for qemu-img bitmap coverage -Bugzilla: 1779893 1779904 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Add a new test covering the 'qemu-img bitmap' subcommand, as well as -'qemu-img convert --bitmaps', both added in recent patches. - -Signed-off-by: Eric Blake -Reviewed-by: Max Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20200521192137.1120211-6-eblake@redhat.com> -(cherry picked from commit cf2d1203dcfc2bf964453d83a2302231ce77f2dc) - -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - tests/qemu-iotests/group - context: other tests not backported - tests/qemu-iotests/291.out - zstd compression not backported -Signed-off-by: Eric Blake - -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/291 | 112 +++++++++++++++++++++++++++++++++++++++++++++ - tests/qemu-iotests/291.out | 78 +++++++++++++++++++++++++++++++ - tests/qemu-iotests/group | 1 + - 3 files changed, 191 insertions(+) - create mode 100755 tests/qemu-iotests/291 - create mode 100644 tests/qemu-iotests/291.out - -diff --git a/tests/qemu-iotests/291 b/tests/qemu-iotests/291 -new file mode 100755 -index 0000000..3ca83b9 ---- /dev/null -+++ b/tests/qemu-iotests/291 -@@ -0,0 +1,112 @@ -+#!/usr/bin/env bash -+# -+# Test qemu-img bitmap handling -+# -+# Copyright (C) 2018-2020 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+seq="$(basename $0)" -+echo "QA output created by $seq" -+ -+status=1 # failure is the default! -+ -+_cleanup() -+{ -+ _cleanup_test_img -+ nbd_server_stop -+} -+trap "_cleanup; exit \$status" 0 1 2 3 15 -+ -+# get standard environment, filters and checks -+. ./common.rc -+. ./common.filter -+. ./common.nbd -+ -+_supported_fmt qcow2 -+_supported_proto file -+_supported_os Linux -+_require_command QEMU_NBD -+ -+echo -+echo "=== Initial image setup ===" -+echo -+ -+# Create backing image with one bitmap -+TEST_IMG="$TEST_IMG.base" _make_test_img 10M -+$QEMU_IMG bitmap --add -f $IMGFMT "$TEST_IMG.base" b0 -+$QEMU_IO -c 'w 3M 1M' -f $IMGFMT "$TEST_IMG.base" | _filter_qemu_io -+ -+# Create initial image and populate two bitmaps: one active, one inactive. -+ORIG_IMG=$TEST_IMG -+TEST_IMG=$TEST_IMG.orig -+_make_test_img -b "$ORIG_IMG.base" -F $IMGFMT 10M -+$QEMU_IO -c 'w 0 1M' -f $IMGFMT "$TEST_IMG" | _filter_qemu_io -+$QEMU_IMG bitmap --add -g 512k -f $IMGFMT "$TEST_IMG" b1 -+$QEMU_IMG bitmap --add --disable -f $IMGFMT "$TEST_IMG" b2 -+$QEMU_IO -c 'w 3M 1M' -f $IMGFMT "$TEST_IMG" | _filter_qemu_io -+$QEMU_IMG bitmap --clear -f $IMGFMT "$TEST_IMG" b1 -+$QEMU_IO -c 'w 1M 1M' -f $IMGFMT "$TEST_IMG" | _filter_qemu_io -+$QEMU_IMG bitmap --disable -f $IMGFMT "$TEST_IMG" b1 -+$QEMU_IMG bitmap --enable -f $IMGFMT "$TEST_IMG" b2 -+$QEMU_IO -c 'w 2M 1M' -f $IMGFMT "$TEST_IMG" | _filter_qemu_io -+ -+echo -+echo "=== Bitmap preservation not possible to non-qcow2 ===" -+echo -+ -+TEST_IMG=$ORIG_IMG -+$QEMU_IMG convert --bitmaps -O raw "$TEST_IMG.orig" "$TEST_IMG" && -+ echo "unexpected success" -+ -+echo -+echo "=== Convert with bitmap preservation ===" -+echo -+ -+# Only bitmaps from the active layer are copied -+$QEMU_IMG convert --bitmaps -O qcow2 "$TEST_IMG.orig" "$TEST_IMG" -+$QEMU_IMG info "$TEST_IMG" | _filter_img_info --format-specific -+# But we can also merge in bitmaps from other layers. This test is a bit -+# contrived to cover more code paths, in reality, you could merge directly -+# into b0 without going through tmp -+$QEMU_IMG bitmap --add --disable -f $IMGFMT "$TEST_IMG" b0 -+$QEMU_IMG bitmap --add --merge b0 -b "$TEST_IMG.base" -F $IMGFMT \ -+ -f $IMGFMT "$TEST_IMG" tmp -+$QEMU_IMG bitmap --merge tmp -f $IMGFMT "$TEST_IMG" b0 -+$QEMU_IMG bitmap --remove --image-opts \ -+ driver=$IMGFMT,file.driver=file,file.filename="$TEST_IMG" tmp -+$QEMU_IMG info "$TEST_IMG" | _filter_img_info --format-specific -+ -+echo -+echo "=== Check bitmap contents ===" -+echo -+ -+# x-dirty-bitmap is a hack for reading bitmaps; it abuses block status to -+# report "data":false for portions of the bitmap which are set -+IMG="driver=nbd,server.type=unix,server.path=$nbd_unix_socket" -+nbd_server_start_unix_socket -r -f qcow2 -B b0 "$TEST_IMG" -+$QEMU_IMG map --output=json --image-opts \ -+ "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b0" | _filter_qemu_img_map -+nbd_server_start_unix_socket -r -f qcow2 -B b1 "$TEST_IMG" -+$QEMU_IMG map --output=json --image-opts \ -+ "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b1" | _filter_qemu_img_map -+nbd_server_start_unix_socket -r -f qcow2 -B b2 "$TEST_IMG" -+$QEMU_IMG map --output=json --image-opts \ -+ "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b2" | _filter_qemu_img_map -+ -+# success, all done -+echo '*** done' -+rm -f $seq.full -+status=0 -diff --git a/tests/qemu-iotests/291.out b/tests/qemu-iotests/291.out -new file mode 100644 -index 0000000..14e5cfc ---- /dev/null -+++ b/tests/qemu-iotests/291.out -@@ -0,0 +1,78 @@ -+QA output created by 291 -+ -+=== Initial image setup === -+ -+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=10485760 -+wrote 1048576/1048576 bytes at offset 3145728 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+Formatting 'TEST_DIR/t.IMGFMT.orig', fmt=IMGFMT size=10485760 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT -+wrote 1048576/1048576 bytes at offset 0 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+wrote 1048576/1048576 bytes at offset 3145728 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+wrote 1048576/1048576 bytes at offset 1048576 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+wrote 1048576/1048576 bytes at offset 2097152 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+=== Bitmap preservation not possible to non-qcow2 === -+ -+qemu-img: Format driver 'raw' does not support bitmaps -+ -+=== Convert with bitmap preservation === -+ -+image: TEST_DIR/t.IMGFMT -+file format: IMGFMT -+virtual size: 10 MiB (10485760 bytes) -+disk size: 4.39 MiB -+Format specific information: -+ compat: 1.1 -+ lazy refcounts: false -+ bitmaps: -+ [0]: -+ flags: -+ name: b1 -+ granularity: 524288 -+ [1]: -+ flags: -+ [0]: auto -+ name: b2 -+ granularity: 65536 -+ refcount bits: 16 -+ corrupt: false -+image: TEST_DIR/t.IMGFMT -+file format: IMGFMT -+virtual size: 10 MiB (10485760 bytes) -+disk size: 4.48 MiB -+Format specific information: -+ compat: 1.1 -+ lazy refcounts: false -+ bitmaps: -+ [0]: -+ flags: -+ name: b1 -+ granularity: 524288 -+ [1]: -+ flags: -+ [0]: auto -+ name: b2 -+ granularity: 65536 -+ [2]: -+ flags: -+ name: b0 -+ granularity: 65536 -+ refcount bits: 16 -+ corrupt: false -+ -+=== Check bitmap contents === -+ -+[{ "start": 0, "length": 3145728, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, -+{ "start": 3145728, "length": 1048576, "depth": 0, "zero": false, "data": false}, -+{ "start": 4194304, "length": 6291456, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] -+[{ "start": 0, "length": 1048576, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, -+{ "start": 1048576, "length": 1048576, "depth": 0, "zero": false, "data": false}, -+{ "start": 2097152, "length": 8388608, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] -+[{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, -+{ "start": 2097152, "length": 1048576, "depth": 0, "zero": false, "data": false}, -+{ "start": 3145728, "length": 7340032, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] -+*** done -diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 9c565cf..033b54d 100644 ---- a/tests/qemu-iotests/group -+++ b/tests/qemu-iotests/group -@@ -290,3 +290,4 @@ - 280 rw migration quick - 281 rw quick - 284 rw -+291 rw quick --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-Add-test-for-image-creation-fallback.patch b/SOURCES/kvm-iotests-Add-test-for-image-creation-fallback.patch deleted file mode 100644 index a8ea8f7..0000000 --- a/SOURCES/kvm-iotests-Add-test-for-image-creation-fallback.patch +++ /dev/null @@ -1,138 +0,0 @@ -From 55f3a02574da226299d99bd74d12dd91b0f228dc Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Wed, 11 Mar 2020 10:51:46 +0000 -Subject: [PATCH 05/20] iotests: Add test for image creation fallback - -RH-Author: Maxim Levitsky -Message-id: <20200311105147.13208-6-mlevitsk@redhat.com> -Patchwork-id: 94228 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 5/6] iotests: Add test for image creation fallback -Bugzilla: 1640894 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz - -From: Max Reitz - -Signed-off-by: Max Reitz -Message-Id: <20200122164532.178040-6-mreitz@redhat.com> -Reviewed-by: Eric Blake -Reviewed-by: Maxim Levitsky -[mreitz: Added a note that NBD does not support resizing, which is why - the second case is expected to fail] -Signed-off-by: Max Reitz -(cherry picked from commit 4dddeac115c5a2c5f74731fda0afd031a0b45490) -Signed-off-by: Maxim Levitsky - -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/259 | 62 ++++++++++++++++++++++++++++++++++++++++++++++ - tests/qemu-iotests/259.out | 14 +++++++++++ - tests/qemu-iotests/group | 1 + - 3 files changed, 77 insertions(+) - create mode 100755 tests/qemu-iotests/259 - create mode 100644 tests/qemu-iotests/259.out - -diff --git a/tests/qemu-iotests/259 b/tests/qemu-iotests/259 -new file mode 100755 -index 0000000..62e29af ---- /dev/null -+++ b/tests/qemu-iotests/259 -@@ -0,0 +1,62 @@ -+#!/usr/bin/env bash -+# -+# Test generic image creation fallback (by using NBD) -+# -+# Copyright (C) 2019 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+# creator -+owner=mreitz@redhat.com -+ -+seq=$(basename $0) -+echo "QA output created by $seq" -+ -+status=1 # failure is the default! -+ -+_cleanup() -+{ -+ _cleanup_test_img -+} -+trap "_cleanup; exit \$status" 0 1 2 3 15 -+ -+# get standard environment, filters and checks -+. ./common.rc -+. ./common.filter -+ -+_supported_fmt raw -+_supported_proto nbd -+_supported_os Linux -+ -+ -+_make_test_img 64M -+ -+echo -+echo '--- Testing creation ---' -+ -+$QEMU_IMG create -f qcow2 "$TEST_IMG" 64M | _filter_img_create -+$QEMU_IMG info "$TEST_IMG" | _filter_img_info -+ -+echo -+echo '--- Testing creation for which the node would need to grow ---' -+ -+# NBD does not support resizing, so this will fail -+$QEMU_IMG create -f qcow2 -o preallocation=metadata "$TEST_IMG" 64M 2>&1 \ -+ | _filter_img_create -+ -+# success, all done -+echo "*** done" -+rm -f $seq.full -+status=0 -diff --git a/tests/qemu-iotests/259.out b/tests/qemu-iotests/259.out -new file mode 100644 -index 0000000..ffed19c ---- /dev/null -+++ b/tests/qemu-iotests/259.out -@@ -0,0 +1,14 @@ -+QA output created by 259 -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 -+ -+--- Testing creation --- -+Formatting 'TEST_DIR/t.IMGFMT', fmt=qcow2 size=67108864 -+image: TEST_DIR/t.IMGFMT -+file format: qcow2 -+virtual size: 64 MiB (67108864 bytes) -+disk size: unavailable -+ -+--- Testing creation for which the node would need to grow --- -+qemu-img: TEST_DIR/t.IMGFMT: Could not resize image: Image format driver does not support resize -+Formatting 'TEST_DIR/t.IMGFMT', fmt=qcow2 size=67108864 preallocation=metadata -+*** done -diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index c0e8197..e47cbfc 100644 ---- a/tests/qemu-iotests/group -+++ b/tests/qemu-iotests/group -@@ -273,6 +273,7 @@ - 256 rw quick - 257 rw - 258 rw quick -+259 rw auto quick - 260 rw quick - 261 rw - 262 rw quick migration --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-Backup-with-different-source-target-size.patch b/SOURCES/kvm-iotests-Backup-with-different-source-target-size.patch deleted file mode 100644 index 4008413..0000000 --- a/SOURCES/kvm-iotests-Backup-with-different-source-target-size.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 456c5e79c32e3f2f9319a7d1452fe523aded7835 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 3 Jun 2020 16:03:21 +0100 -Subject: [PATCH 22/26] iotests: Backup with different source/target size - -RH-Author: Kevin Wolf -Message-id: <20200603160325.67506-8-kwolf@redhat.com> -Patchwork-id: 97106 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 07/11] iotests: Backup with different source/target size -Bugzilla: 1778593 -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz -RH-Acked-by: Stefano Garzarella - -This tests that the backup job catches situations where the target node -has a different size than the source node. It must also forbid resize -operations when the job is already running. - -Signed-off-by: Kevin Wolf -Message-Id: <20200430142755.315494-5-kwolf@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit 0a82a9273062d05764e3df3637b3aa95ad8291c6) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/055 | 42 ++++++++++++++++++++++++++++++++++++++++-- - tests/qemu-iotests/055.out | 4 ++-- - 2 files changed, 42 insertions(+), 4 deletions(-) - -diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055 -index c9cdc06..1c70389 100755 ---- a/tests/qemu-iotests/055 -+++ b/tests/qemu-iotests/055 -@@ -48,8 +48,10 @@ class TestSingleDrive(iotests.QMPTestCase): - def setUp(self): - qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len)) - -- self.vm = iotests.VM().add_drive('blkdebug::' + test_img) -- self.vm.add_drive(blockdev_target_img, interface="none") -+ self.vm = iotests.VM() -+ self.vm.add_drive('blkdebug::' + test_img, 'node-name=source') -+ self.vm.add_drive(blockdev_target_img, 'node-name=target', -+ interface="none") - if iotests.qemu_default_machine == 'pc': - self.vm.add_drive(None, 'media=cdrom', 'ide') - self.vm.launch() -@@ -112,6 +114,42 @@ class TestSingleDrive(iotests.QMPTestCase): - def test_pause_blockdev_backup(self): - self.do_test_pause('blockdev-backup', 'drive1', blockdev_target_img) - -+ def do_test_resize_blockdev_backup(self, device, node): -+ def pre_finalize(): -+ result = self.vm.qmp('block_resize', device=device, size=65536) -+ self.assert_qmp(result, 'error/class', 'GenericError') -+ -+ result = self.vm.qmp('block_resize', node_name=node, size=65536) -+ self.assert_qmp(result, 'error/class', 'GenericError') -+ -+ result = self.vm.qmp('blockdev-backup', job_id='job0', device='drive0', -+ target='drive1', sync='full', auto_finalize=False, -+ auto_dismiss=False) -+ self.assert_qmp(result, 'return', {}) -+ -+ self.vm.run_job('job0', auto_finalize=False, pre_finalize=pre_finalize, -+ use_log=False) -+ -+ def test_source_resize_blockdev_backup(self): -+ self.do_test_resize_blockdev_backup('drive0', 'source') -+ -+ def test_target_resize_blockdev_backup(self): -+ self.do_test_resize_blockdev_backup('drive1', 'target') -+ -+ def do_test_target_size(self, size): -+ result = self.vm.qmp('block_resize', device='drive1', size=size) -+ self.assert_qmp(result, 'return', {}) -+ -+ result = self.vm.qmp('blockdev-backup', job_id='job0', device='drive0', -+ target='drive1', sync='full') -+ self.assert_qmp(result, 'error/class', 'GenericError') -+ -+ def test_small_target(self): -+ self.do_test_target_size(image_len // 2) -+ -+ def test_large_target(self): -+ self.do_test_target_size(image_len * 2) -+ - def test_medium_not_found(self): - if iotests.qemu_default_machine != 'pc': - return -diff --git a/tests/qemu-iotests/055.out b/tests/qemu-iotests/055.out -index 5c26d15..0a5e958 100644 ---- a/tests/qemu-iotests/055.out -+++ b/tests/qemu-iotests/055.out -@@ -1,5 +1,5 @@ --.................................... -+........................................ - ---------------------------------------------------------------------- --Ran 36 tests -+Ran 40 tests - - OK --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-Create-VM.blockdev_create.patch b/SOURCES/kvm-iotests-Create-VM.blockdev_create.patch deleted file mode 100644 index 805b31a..0000000 --- a/SOURCES/kvm-iotests-Create-VM.blockdev_create.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 05fedde1374abb180cd2b51457385d8128aa7fe4 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 7 Feb 2020 11:24:00 +0000 -Subject: [PATCH 03/18] iotests: Create VM.blockdev_create() - -RH-Author: Kevin Wolf -Message-id: <20200207112404.25198-3-kwolf@redhat.com> -Patchwork-id: 93748 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 2/6] iotests: Create VM.blockdev_create() -Bugzilla: 1781637 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -We have several almost identical copies of a blockdev_create() function -in different test cases. Time to create one unified function in -iotests.py. - -To keep the diff managable, this patch only creates the function and -follow-up patches will convert the individual test cases. - -Signed-off-by: Kevin Wolf -(cherry picked from commit e9dbd1cae86f7cb6f8e470e1485aeb0c6e23ae64) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/iotests.py | 16 ++++++++++++++++ - 1 file changed, 16 insertions(+) - -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index 3cff671..5741efb 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -638,6 +638,22 @@ class VM(qtest.QEMUQtestMachine): - elif status == 'null': - return error - -+ # Returns None on success, and an error string on failure -+ def blockdev_create(self, options, job_id='job0', filters=None): -+ if filters is None: -+ filters = [filter_qmp_testfiles] -+ result = self.qmp_log('blockdev-create', filters=filters, -+ job_id=job_id, options=options) -+ -+ if 'return' in result: -+ assert result['return'] == {} -+ job_result = self.run_job(job_id) -+ else: -+ job_result = result['error'] -+ -+ log("") -+ return job_result -+ - def enable_migration_events(self, name): - log('Enabling migration QMP events on %s...' % name) - log(self.qmp('migrate-set-capabilities', capabilities=[ --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-Filter-testfiles-out-in-filter_img_info.patch b/SOURCES/kvm-iotests-Filter-testfiles-out-in-filter_img_info.patch deleted file mode 100644 index 60c08ec..0000000 --- a/SOURCES/kvm-iotests-Filter-testfiles-out-in-filter_img_info.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 8dc8a17d4e98aae41db01cbc073e69de44291b63 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 8 Jun 2020 15:01:38 +0100 -Subject: [PATCH 10/17] iotests: Filter testfiles out in filter_img_info() - -RH-Author: Kevin Wolf -Message-id: <20200608150140.38218-10-kwolf@redhat.com> -Patchwork-id: 97455 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 09/11] iotests: Filter testfiles out in filter_img_info() -Bugzilla: 1780574 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz - -We want to keep TEST_IMG for the full path of the main test image, but -filter_testfiles() must be called for other test images before replacing -other things like the image format because the test directory path could -contain the format as a substring. - -Insert a filter_testfiles() call between both. - -Signed-off-by: Kevin Wolf -Reviewed-by: Max Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20200424125448.63318-9-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit fd586ce8bee50d98773436214dc9e644ddda54aa) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/iotests.py | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index 7a9c779..cd5df36 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -335,8 +335,9 @@ def filter_img_info(output, filename): - for line in output.split('\n'): - if 'disk size' in line or 'actual-size' in line: - continue -- line = line.replace(filename, 'TEST_IMG') \ -- .replace(imgfmt, 'IMGFMT') -+ line = line.replace(filename, 'TEST_IMG') -+ line = filter_testfiles(line) -+ line = line.replace(imgfmt, 'IMGFMT') - line = re.sub('iters: [0-9]+', 'iters: XXX', line) - line = re.sub('uuid: [-a-f0-9]+', 'uuid: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', line) - line = re.sub('cid: [0-9]+', 'cid: XXXXXXXXXX', line) --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-Fix-run_job-with-use_log-False.patch b/SOURCES/kvm-iotests-Fix-run_job-with-use_log-False.patch deleted file mode 100644 index b105fc2..0000000 --- a/SOURCES/kvm-iotests-Fix-run_job-with-use_log-False.patch +++ /dev/null @@ -1,47 +0,0 @@ -From bb7b968a02c97564596b73d8d080cd745d96ed6b Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:35 +0000 -Subject: [PATCH 15/20] iotests: Fix run_job() with use_log=False - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-10-kwolf@redhat.com> -Patchwork-id: 94284 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 09/13] iotests: Fix run_job() with use_log=False -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -The 'job-complete' QMP command should be run with qmp() rather than -qmp_log() if use_log=False is passed. - -Signed-off-by: Kevin Wolf -Message-Id: <20200310113831.27293-4-kwolf@redhat.com> -Reviewed-by: Peter Krempa -Signed-off-by: Kevin Wolf -(cherry picked from commit b31b532122ec6f68d17168449c034d2197bf96ec) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/iotests.py | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index 0c55f7b..46f880c 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -618,7 +618,10 @@ class VM(qtest.QEMUQtestMachine): - if use_log: - log('Job failed: %s' % (j['error'])) - elif status == 'ready': -- self.qmp_log('job-complete', id=job) -+ if use_log: -+ self.qmp_log('job-complete', id=job) -+ else: -+ self.qmp('job-complete', id=job) - elif status == 'pending' and not auto_finalize: - if pre_finalize: - pre_finalize() --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-Fix-test-178.patch b/SOURCES/kvm-iotests-Fix-test-178.patch deleted file mode 100644 index 5e54daa..0000000 --- a/SOURCES/kvm-iotests-Fix-test-178.patch +++ /dev/null @@ -1,59 +0,0 @@ -From a04d324e41a40a6893bc94109994afc017f17192 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Tue, 2 Jun 2020 02:34:16 +0100 -Subject: [PATCH 11/26] iotests: Fix test 178 - -RH-Author: Eric Blake -Message-id: <20200602023420.2133649-9-eblake@redhat.com> -Patchwork-id: 97075 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 08/12] iotests: Fix test 178 -Bugzilla: 1779893 1779904 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -A recent change to qemu-img changed expected error message output, but -178 takes long enough to execute that it does not get run by 'make -check' or './check -g quick'. - -Fixes: 43d589b074 -Signed-off-by: Eric Blake -Reviewed-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20200521192137.1120211-2-eblake@redhat.com> -(cherry picked from commit ca01b7a641527052e3e8961845b40b81706ce5f9) -Signed-off-by: Eric Blake -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/178.out.qcow2 | 2 +- - tests/qemu-iotests/178.out.raw | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/tests/qemu-iotests/178.out.qcow2 b/tests/qemu-iotests/178.out.qcow2 -index 9e7d8c4..345eab3 100644 ---- a/tests/qemu-iotests/178.out.qcow2 -+++ b/tests/qemu-iotests/178.out.qcow2 -@@ -13,7 +13,7 @@ qemu-img: Invalid option list: , - qemu-img: Invalid parameter 'snapshot.foo' - qemu-img: Failed in parsing snapshot param 'snapshot.foo' - qemu-img: --output must be used with human or json as argument. --qemu-img: Image size must be less than 8 EiB! -+qemu-img: Invalid image size specified. Must be between 0 and 9223372036854775807. - qemu-img: Unknown file format 'foo' - - == Size calculation for a new file (human) == -diff --git a/tests/qemu-iotests/178.out.raw b/tests/qemu-iotests/178.out.raw -index 6478365..15da915 100644 ---- a/tests/qemu-iotests/178.out.raw -+++ b/tests/qemu-iotests/178.out.raw -@@ -13,7 +13,7 @@ qemu-img: Invalid option list: , - qemu-img: Invalid parameter 'snapshot.foo' - qemu-img: Failed in parsing snapshot param 'snapshot.foo' - qemu-img: --output must be used with human or json as argument. --qemu-img: Image size must be less than 8 EiB! -+qemu-img: Invalid image size specified. Must be between 0 and 9223372036854775807. - qemu-img: Unknown file format 'foo' - - == Size calculation for a new file (human) == --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-Let-_make_test_img-parse-its-parameters.patch b/SOURCES/kvm-iotests-Let-_make_test_img-parse-its-parameters.patch deleted file mode 100644 index d24f5e7..0000000 --- a/SOURCES/kvm-iotests-Let-_make_test_img-parse-its-parameters.patch +++ /dev/null @@ -1,91 +0,0 @@ -From 3c96dbd74fb67e2ae1a116b2771290b192041707 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Tue, 2 Jun 2020 02:34:10 +0100 -Subject: [PATCH 05/26] iotests: Let _make_test_img parse its parameters - -RH-Author: Eric Blake -Message-id: <20200602023420.2133649-3-eblake@redhat.com> -Patchwork-id: 97070 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 02/12] iotests: Let _make_test_img parse its parameters -Bugzilla: 1779893 1779904 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -From: Max Reitz - -This will allow us to add more options than just -b. - -Signed-off-by: Max Reitz -Reviewed-by: Maxim Levitsky -Message-id: 20191107163708.833192-9-mreitz@redhat.com -Signed-off-by: Max Reitz -(cherry picked from commit eea871d047701b563cfd66c1566b9ff6d163882b) -Signed-off-by: Eric Blake -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/common.rc | 28 ++++++++++++++++++++-------- - 1 file changed, 20 insertions(+), 8 deletions(-) - -diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc -index 0cc8acc..99fef4d 100644 ---- a/tests/qemu-iotests/common.rc -+++ b/tests/qemu-iotests/common.rc -@@ -302,12 +302,12 @@ _make_test_img() - # extra qemu-img options can be added by tests - # at least one argument (the image size) needs to be added - local extra_img_options="" -- local image_size=$* - local optstr="" - local img_name="" - local use_backing=0 - local backing_file="" - local object_options="" -+ local misc_params=() - - if [ -n "$TEST_IMG_FILE" ]; then - img_name=$TEST_IMG_FILE -@@ -323,11 +323,23 @@ _make_test_img() - optstr=$(_optstr_add "$optstr" "key-secret=keysec0") - fi - -- if [ "$1" = "-b" ]; then -- use_backing=1 -- backing_file=$2 -- image_size=$3 -- fi -+ for param; do -+ if [ "$use_backing" = "1" -a -z "$backing_file" ]; then -+ backing_file=$param -+ continue -+ fi -+ -+ case "$param" in -+ -b) -+ use_backing=1 -+ ;; -+ -+ *) -+ misc_params=("${misc_params[@]}" "$param") -+ ;; -+ esac -+ done -+ - if [ \( "$IMGFMT" = "qcow2" -o "$IMGFMT" = "qed" \) -a -n "$CLUSTER_SIZE" ]; then - optstr=$(_optstr_add "$optstr" "cluster_size=$CLUSTER_SIZE") - fi -@@ -343,9 +355,9 @@ _make_test_img() - # XXX(hch): have global image options? - ( - if [ $use_backing = 1 ]; then -- $QEMU_IMG create $object_options -f $IMGFMT $extra_img_options -b "$backing_file" "$img_name" $image_size 2>&1 -+ $QEMU_IMG create $object_options -f $IMGFMT $extra_img_options -b "$backing_file" "$img_name" "${misc_params[@]}" 2>&1 - else -- $QEMU_IMG create $object_options -f $IMGFMT $extra_img_options "$img_name" $image_size 2>&1 -+ $QEMU_IMG create $object_options -f $IMGFMT $extra_img_options "$img_name" "${misc_params[@]}" 2>&1 - fi - ) | _filter_img_create - --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-Mirror-with-different-source-target-size.patch b/SOURCES/kvm-iotests-Mirror-with-different-source-target-size.patch deleted file mode 100644 index 7757632..0000000 --- a/SOURCES/kvm-iotests-Mirror-with-different-source-target-size.patch +++ /dev/null @@ -1,110 +0,0 @@ -From aff543186ff316d66b2c7acb434c6c17bdb8da78 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 3 Jun 2020 16:03:25 +0100 -Subject: [PATCH 26/26] iotests: Mirror with different source/target size - -RH-Author: Kevin Wolf -Message-id: <20200603160325.67506-12-kwolf@redhat.com> -Patchwork-id: 97109 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 11/11] iotests: Mirror with different source/target size -Bugzilla: 1778593 -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz -RH-Acked-by: Stefano Garzarella - -This tests that the mirror job catches situations where the target node -has a different size than the source node. It must also forbid resize -operations when the job is already running. - -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -Message-Id: <20200511135825.219437-5-kwolf@redhat.com> -Reviewed-by: Max Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit 16cea4ee1c8e5a69a058e76f426b2e17974d8d7d) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/041 | 45 +++++++++++++++++++++++++++++++++++++++++++++ - tests/qemu-iotests/041.out | 4 ++-- - 2 files changed, 47 insertions(+), 2 deletions(-) - -diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 -index a543b15..20fb68a 100755 ---- a/tests/qemu-iotests/041 -+++ b/tests/qemu-iotests/041 -@@ -240,6 +240,49 @@ class TestSingleBlockdev(TestSingleDrive): - target=self.qmp_target) - self.assert_qmp(result, 'error/class', 'GenericError') - -+ def do_test_resize(self, device, node): -+ def pre_finalize(): -+ if device: -+ result = self.vm.qmp('block_resize', device=device, size=65536) -+ self.assert_qmp(result, 'error/class', 'GenericError') -+ -+ result = self.vm.qmp('block_resize', node_name=node, size=65536) -+ self.assert_qmp(result, 'error/class', 'GenericError') -+ -+ result = self.vm.qmp(self.qmp_cmd, job_id='job0', device='drive0', -+ sync='full', target=self.qmp_target, -+ auto_finalize=False, auto_dismiss=False) -+ self.assert_qmp(result, 'return', {}) -+ -+ result = self.vm.run_job('job0', auto_finalize=False, -+ pre_finalize=pre_finalize, use_log=False) -+ self.assertEqual(result, None) -+ -+ def test_source_resize(self): -+ self.do_test_resize('drive0', 'top') -+ -+ def test_target_resize(self): -+ self.do_test_resize(None, self.qmp_target) -+ -+ def do_test_target_size(self, size): -+ result = self.vm.qmp('block_resize', node_name=self.qmp_target, -+ size=size) -+ self.assert_qmp(result, 'return', {}) -+ -+ result = self.vm.qmp(self.qmp_cmd, job_id='job0', -+ device='drive0', sync='full', auto_dismiss=False, -+ target=self.qmp_target) -+ self.assert_qmp(result, 'return', {}) -+ -+ result = self.vm.run_job('job0', use_log=False) -+ self.assertEqual(result, 'Source and target image have different sizes') -+ -+ def test_small_target(self): -+ self.do_test_target_size(self.image_len // 2) -+ -+ def test_large_target(self): -+ self.do_test_target_size(self.image_len * 2) -+ - test_large_cluster = None - test_image_not_found = None - test_small_buffer2 = None -@@ -251,6 +294,8 @@ class TestSingleDriveZeroLength(TestSingleDrive): - - class TestSingleBlockdevZeroLength(TestSingleBlockdev): - image_len = 0 -+ test_small_target = None -+ test_large_target = None - - class TestSingleDriveUnalignedLength(TestSingleDrive): - image_len = 1025 * 1024 -diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out -index 2c448b4..3ea6aa4 100644 ---- a/tests/qemu-iotests/041.out -+++ b/tests/qemu-iotests/041.out -@@ -1,5 +1,5 @@ --.......................................................................................... -+.................................................................................................... - ---------------------------------------------------------------------- --Ran 90 tests -+Ran 100 tests - - OK --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch b/SOURCES/kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch deleted file mode 100644 index 17e4a41..0000000 --- a/SOURCES/kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch +++ /dev/null @@ -1,122 +0,0 @@ -From 7e23b64dc20b64ca6fa887cd06cc5e52374f6268 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:30 +0000 -Subject: [PATCH 10/20] iotests: Refactor blockdev-reopen test for iothreads - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-5-kwolf@redhat.com> -Patchwork-id: 94281 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 04/13] iotests: Refactor blockdev-reopen test for iothreads -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -We'll want to test more than one successful case in the future, so -prepare the test for that by a refactoring that runs each scenario in a -separate VM. - -test_iothreads_switch_{backing,overlay} currently produce errors, but -these are cases that should actually work, by switching either the -backing file node or the overlay node to the AioContext of the other -node. - -Signed-off-by: Kevin Wolf -Tested-by: Peter Krempa -Message-Id: <20200306141413.30705-2-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 97518e11c3d902a32386d33797044f6b79bccc6f) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/245 | 47 ++++++++++++++++++++++++++++++++++++---------- - tests/qemu-iotests/245.out | 4 ++-- - 2 files changed, 39 insertions(+), 12 deletions(-) - -diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 -index e66a23c..f69c2fa 100644 ---- a/tests/qemu-iotests/245 -+++ b/tests/qemu-iotests/245 -@@ -968,8 +968,7 @@ class TestBlockdevReopen(iotests.QMPTestCase): - self.assertEqual(self.get_node('hd1'), None) - self.assert_qmp(self.get_node('hd2'), 'ro', True) - -- # We don't allow setting a backing file that uses a different AioContext -- def test_iothreads(self): -+ def run_test_iothreads(self, iothread_a, iothread_b, errmsg = None): - opts = hd_opts(0) - result = self.vm.qmp('blockdev-add', conv_keys = False, **opts) - self.assert_qmp(result, 'return', {}) -@@ -984,20 +983,48 @@ class TestBlockdevReopen(iotests.QMPTestCase): - result = self.vm.qmp('object-add', qom_type='iothread', id='iothread1') - self.assert_qmp(result, 'return', {}) - -- result = self.vm.qmp('x-blockdev-set-iothread', node_name='hd0', iothread='iothread0') -+ result = self.vm.qmp('device_add', driver='virtio-scsi', id='scsi0', -+ iothread=iothread_a) - self.assert_qmp(result, 'return', {}) - -- self.reopen(opts, {'backing': 'hd2'}, "Cannot use a new backing file with a different AioContext") -- -- result = self.vm.qmp('x-blockdev-set-iothread', node_name='hd2', iothread='iothread1') -+ result = self.vm.qmp('device_add', driver='virtio-scsi', id='scsi1', -+ iothread=iothread_b) - self.assert_qmp(result, 'return', {}) - -- self.reopen(opts, {'backing': 'hd2'}, "Cannot use a new backing file with a different AioContext") -+ if iothread_a: -+ result = self.vm.qmp('device_add', driver='scsi-hd', drive='hd0', -+ share_rw=True, bus="scsi0.0") -+ self.assert_qmp(result, 'return', {}) - -- result = self.vm.qmp('x-blockdev-set-iothread', node_name='hd2', iothread='iothread0') -- self.assert_qmp(result, 'return', {}) -+ if iothread_b: -+ result = self.vm.qmp('device_add', driver='scsi-hd', drive='hd2', -+ share_rw=True, bus="scsi1.0") -+ self.assert_qmp(result, 'return', {}) - -- self.reopen(opts, {'backing': 'hd2'}) -+ # Attaching the backing file may or may not work -+ self.reopen(opts, {'backing': 'hd2'}, errmsg) -+ -+ # But removing the backing file should always work -+ self.reopen(opts, {'backing': None}) -+ -+ self.vm.shutdown() -+ -+ # We don't allow setting a backing file that uses a different AioContext if -+ # neither of them can switch to the other AioContext -+ def test_iothreads_error(self): -+ self.run_test_iothreads('iothread0', 'iothread1', -+ "Cannot use a new backing file with a different AioContext") -+ -+ def test_iothreads_compatible_users(self): -+ self.run_test_iothreads('iothread0', 'iothread0') -+ -+ def test_iothreads_switch_backing(self): -+ self.run_test_iothreads('iothread0', None, -+ "Cannot use a new backing file with a different AioContext") -+ -+ def test_iothreads_switch_overlay(self): -+ self.run_test_iothreads(None, 'iothread0', -+ "Cannot use a new backing file with a different AioContext") - - if __name__ == '__main__': - iotests.main(supported_fmts=["qcow2"], -diff --git a/tests/qemu-iotests/245.out b/tests/qemu-iotests/245.out -index a19de52..682b933 100644 ---- a/tests/qemu-iotests/245.out -+++ b/tests/qemu-iotests/245.out -@@ -1,6 +1,6 @@ --.................. -+..................... - ---------------------------------------------------------------------- --Ran 18 tests -+Ran 21 tests - - OK - {"execute": "job-finalize", "arguments": {"id": "commit0"}} --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-Support-job-complete-in-run_job.patch b/SOURCES/kvm-iotests-Support-job-complete-in-run_job.patch deleted file mode 100644 index 08971a0..0000000 --- a/SOURCES/kvm-iotests-Support-job-complete-in-run_job.patch +++ /dev/null @@ -1,46 +0,0 @@ -From a3778aef0be61dead835af39073a62bbf72c8e20 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 7 Feb 2020 11:23:59 +0000 -Subject: [PATCH 02/18] iotests: Support job-complete in run_job() - -RH-Author: Kevin Wolf -Message-id: <20200207112404.25198-2-kwolf@redhat.com> -Patchwork-id: 93746 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 1/6] iotests: Support job-complete in run_job() -Bugzilla: 1781637 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -Automatically complete jobs that have a 'ready' state and need an -explicit job-complete. Without this, run_job() would hang for such -jobs. - -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Alberto Garcia -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit 4688c4e32ec76004676470f11734478799673d6d) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/iotests.py | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index df07089..3cff671 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -617,6 +617,8 @@ class VM(qtest.QEMUQtestMachine): - error = j['error'] - if use_log: - log('Job failed: %s' % (j['error'])) -+ elif status == 'ready': -+ self.qmp_log('job-complete', id=job) - elif status == 'pending' and not auto_finalize: - if pre_finalize: - pre_finalize() --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-Test-committing-to-short-backing-file.patch b/SOURCES/kvm-iotests-Test-committing-to-short-backing-file.patch deleted file mode 100644 index fbbaac6..0000000 --- a/SOURCES/kvm-iotests-Test-committing-to-short-backing-file.patch +++ /dev/null @@ -1,480 +0,0 @@ -From e2a1b3fd32be8bb730656a6f22eb4f543b120c9d Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 8 Jun 2020 15:01:39 +0100 -Subject: [PATCH 11/17] iotests: Test committing to short backing file - -RH-Author: Kevin Wolf -Message-id: <20200608150140.38218-11-kwolf@redhat.com> -Patchwork-id: 97453 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 10/11] iotests: Test committing to short backing file -Bugzilla: 1780574 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz - -Signed-off-by: Kevin Wolf -Message-Id: <20200424125448.63318-10-kwolf@redhat.com> -Reviewed-by: Max Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit bf03dede475e29a16f9188ea85a4d77cd3dcf2b7) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/274 | 155 ++++++++++++++++++++++++++ - tests/qemu-iotests/274.out | 268 +++++++++++++++++++++++++++++++++++++++++++++ - tests/qemu-iotests/group | 1 + - 3 files changed, 424 insertions(+) - create mode 100755 tests/qemu-iotests/274 - create mode 100644 tests/qemu-iotests/274.out - -diff --git a/tests/qemu-iotests/274 b/tests/qemu-iotests/274 -new file mode 100755 -index 0000000..e951f72 ---- /dev/null -+++ b/tests/qemu-iotests/274 -@@ -0,0 +1,155 @@ -+#!/usr/bin/env python3 -+# -+# Copyright (C) 2019 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+# Creator/Owner: Kevin Wolf -+# -+# Some tests for short backing files and short overlays -+ -+import iotests -+ -+iotests.verify_image_format(supported_fmts=['qcow2']) -+iotests.verify_platform(['linux']) -+ -+size_short = 1 * 1024 * 1024 -+size_long = 2 * 1024 * 1024 -+size_diff = size_long - size_short -+ -+def create_chain() -> None: -+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, base, -+ str(size_long)) -+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', base, mid, -+ str(size_short)) -+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', mid, top, -+ str(size_long)) -+ -+ iotests.qemu_io_log('-c', 'write -P 1 0 %d' % size_long, base) -+ -+def create_vm() -> iotests.VM: -+ vm = iotests.VM() -+ vm.add_blockdev('file,filename=%s,node-name=base-file' % base) -+ vm.add_blockdev('%s,file=base-file,node-name=base' % iotests.imgfmt) -+ vm.add_blockdev('file,filename=%s,node-name=mid-file' % mid) -+ vm.add_blockdev('%s,file=mid-file,node-name=mid,backing=base' -+ % iotests.imgfmt) -+ vm.add_drive(top, 'backing=mid,node-name=top') -+ return vm -+ -+with iotests.FilePath('base') as base, \ -+ iotests.FilePath('mid') as mid, \ -+ iotests.FilePath('top') as top: -+ -+ iotests.log('== Commit tests ==') -+ -+ create_chain() -+ -+ iotests.log('=== Check visible data ===') -+ -+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, top) -+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), top) -+ -+ iotests.log('=== Checking allocation status ===') -+ -+ iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short, -+ '-c', 'alloc %d %d' % (size_short, size_diff), -+ base) -+ -+ iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short, -+ '-c', 'alloc %d %d' % (size_short, size_diff), -+ mid) -+ -+ iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short, -+ '-c', 'alloc %d %d' % (size_short, size_diff), -+ top) -+ -+ iotests.log('=== Checking map ===') -+ -+ iotests.qemu_img_log('map', '--output=json', base) -+ iotests.qemu_img_log('map', '--output=human', base) -+ iotests.qemu_img_log('map', '--output=json', mid) -+ iotests.qemu_img_log('map', '--output=human', mid) -+ iotests.qemu_img_log('map', '--output=json', top) -+ iotests.qemu_img_log('map', '--output=human', top) -+ -+ iotests.log('=== Testing qemu-img commit (top -> mid) ===') -+ -+ iotests.qemu_img_log('commit', top) -+ iotests.img_info_log(mid) -+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid) -+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid) -+ -+ iotests.log('=== Testing HMP commit (top -> mid) ===') -+ -+ create_chain() -+ with create_vm() as vm: -+ vm.launch() -+ vm.qmp_log('human-monitor-command', command_line='commit drive0') -+ -+ iotests.img_info_log(mid) -+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid) -+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid) -+ -+ iotests.log('=== Testing QMP active commit (top -> mid) ===') -+ -+ create_chain() -+ with create_vm() as vm: -+ vm.launch() -+ vm.qmp_log('block-commit', device='top', base_node='mid', -+ job_id='job0', auto_dismiss=False) -+ vm.run_job('job0', wait=5) -+ -+ iotests.img_info_log(mid) -+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid) -+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid) -+ -+ -+ iotests.log('== Resize tests ==') -+ -+ # Use different sizes for different allocation modes: -+ # -+ # We want to have at least one test where 32 bit truncation in the size of -+ # the overlapping area becomes visible. This is covered by the -+ # prealloc='off' case (1G to 6G is an overlap of 5G). -+ # -+ # However, we can only do this for modes that don't preallocate data -+ # because otherwise we might run out of space on the test host. -+ # -+ # We also want to test some unaligned combinations. -+ for (prealloc, base_size, top_size_old, top_size_new, off) in [ -+ ('off', '6G', '1G', '8G', '5G'), -+ ('metadata', '32G', '30G', '33G', '31G'), -+ ('falloc', '10M', '5M', '15M', '9M'), -+ ('full', '16M', '8M', '12M', '11M'), -+ ('off', '384k', '253k', '512k', '253k'), -+ ('off', '400k', '256k', '512k', '336k'), -+ ('off', '512k', '256k', '500k', '436k')]: -+ -+ iotests.log('=== preallocation=%s ===' % prealloc) -+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, base, base_size) -+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', base, top, -+ top_size_old) -+ iotests.qemu_io_log('-c', 'write -P 1 %s 64k' % off, base) -+ -+ # After this, top_size_old to base_size should be allocated/zeroed. -+ # -+ # In theory, leaving base_size to top_size_new unallocated would be -+ # correct, but in practice, if we zero out anything, we zero out -+ # everything up to top_size_new. -+ iotests.qemu_img_log('resize', '-f', iotests.imgfmt, -+ '--preallocation', prealloc, top, top_size_new) -+ iotests.qemu_io_log('-c', 'read -P 0 %s 64k' % off, top) -+ iotests.qemu_io_log('-c', 'map', top) -+ iotests.qemu_img_log('map', '--output=json', top) -diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out -new file mode 100644 -index 0000000..1a796fd ---- /dev/null -+++ b/tests/qemu-iotests/274.out -@@ -0,0 +1,268 @@ -+== Commit tests == -+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+wrote 2097152/2097152 bytes at offset 0 -+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+=== Check visible data === -+read 1048576/1048576 bytes at offset 0 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+read 1048576/1048576 bytes at offset 1048576 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+=== Checking allocation status === -+1048576/1048576 bytes allocated at offset 0 bytes -+1048576/1048576 bytes allocated at offset 1 MiB -+ -+0/1048576 bytes allocated at offset 0 bytes -+0/0 bytes allocated at offset 1 MiB -+ -+0/1048576 bytes allocated at offset 0 bytes -+0/1048576 bytes allocated at offset 1 MiB -+ -+=== Checking map === -+[{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": 327680}] -+ -+Offset Length Mapped to File -+0 0x200000 0x50000 TEST_DIR/PID-base -+ -+[{ "start": 0, "length": 1048576, "depth": 1, "zero": false, "data": true, "offset": 327680}] -+ -+Offset Length Mapped to File -+0 0x100000 0x50000 TEST_DIR/PID-base -+ -+[{ "start": 0, "length": 1048576, "depth": 2, "zero": false, "data": true, "offset": 327680}, -+{ "start": 1048576, "length": 1048576, "depth": 0, "zero": true, "data": false}] -+ -+Offset Length Mapped to File -+0 0x100000 0x50000 TEST_DIR/PID-base -+ -+=== Testing qemu-img commit (top -> mid) === -+Image committed. -+ -+image: TEST_IMG -+file format: IMGFMT -+virtual size: 2 MiB (2097152 bytes) -+cluster_size: 65536 -+backing file: TEST_DIR/PID-base -+Format specific information: -+ compat: 1.1 -+ lazy refcounts: false -+ refcount bits: 16 -+ corrupt: false -+ -+read 1048576/1048576 bytes at offset 0 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+read 1048576/1048576 bytes at offset 1048576 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+=== Testing HMP commit (top -> mid) === -+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+wrote 2097152/2097152 bytes at offset 0 -+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+{"execute": "human-monitor-command", "arguments": {"command-line": "commit drive0"}} -+{"return": ""} -+image: TEST_IMG -+file format: IMGFMT -+virtual size: 2 MiB (2097152 bytes) -+cluster_size: 65536 -+backing file: TEST_DIR/PID-base -+Format specific information: -+ compat: 1.1 -+ lazy refcounts: false -+ refcount bits: 16 -+ corrupt: false -+ -+read 1048576/1048576 bytes at offset 0 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+read 1048576/1048576 bytes at offset 1048576 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+=== Testing QMP active commit (top -> mid) === -+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+wrote 2097152/2097152 bytes at offset 0 -+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+{"execute": "block-commit", "arguments": {"auto-dismiss": false, "base-node": "mid", "device": "top", "job-id": "job0"}} -+{"return": {}} -+{"execute": "job-complete", "arguments": {"id": "job0"}} -+{"return": {}} -+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"execute": "job-dismiss", "arguments": {"id": "job0"}} -+{"return": {}} -+image: TEST_IMG -+file format: IMGFMT -+virtual size: 2 MiB (2097152 bytes) -+cluster_size: 65536 -+backing file: TEST_DIR/PID-base -+Format specific information: -+ compat: 1.1 -+ lazy refcounts: false -+ refcount bits: 16 -+ corrupt: false -+ -+read 1048576/1048576 bytes at offset 0 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+read 1048576/1048576 bytes at offset 1048576 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+== Resize tests == -+=== preallocation=off === -+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=6442450944 cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=1073741824 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+wrote 65536/65536 bytes at offset 5368709120 -+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+Image resized. -+ -+read 65536/65536 bytes at offset 5368709120 -+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+1 GiB (0x40000000) bytes not allocated at offset 0 bytes (0x0) -+7 GiB (0x1c0000000) bytes allocated at offset 1 GiB (0x40000000) -+ -+[{ "start": 0, "length": 1073741824, "depth": 1, "zero": true, "data": false}, -+{ "start": 1073741824, "length": 7516192768, "depth": 0, "zero": true, "data": false}] -+ -+=== preallocation=metadata === -+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=34359738368 cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=32212254720 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+wrote 65536/65536 bytes at offset 33285996544 -+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+Image resized. -+ -+read 65536/65536 bytes at offset 33285996544 -+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+30 GiB (0x780000000) bytes not allocated at offset 0 bytes (0x0) -+3 GiB (0xc0000000) bytes allocated at offset 30 GiB (0x780000000) -+ -+[{ "start": 0, "length": 32212254720, "depth": 1, "zero": true, "data": false}, -+{ "start": 32212254720, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 327680}, -+{ "start": 32749125632, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 537264128}, -+{ "start": 33285996544, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 1074200576}, -+{ "start": 33822867456, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 1611137024}, -+{ "start": 34359738368, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 2148139008}, -+{ "start": 34896609280, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 2685075456}] -+ -+=== preallocation=falloc === -+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=10485760 cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=5242880 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+wrote 65536/65536 bytes at offset 9437184 -+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+Image resized. -+ -+read 65536/65536 bytes at offset 9437184 -+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+5 MiB (0x500000) bytes not allocated at offset 0 bytes (0x0) -+10 MiB (0xa00000) bytes allocated at offset 5 MiB (0x500000) -+ -+[{ "start": 0, "length": 5242880, "depth": 1, "zero": true, "data": false}, -+{ "start": 5242880, "length": 10485760, "depth": 0, "zero": true, "data": false, "offset": 327680}] -+ -+=== preallocation=full === -+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=16777216 cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=8388608 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+wrote 65536/65536 bytes at offset 11534336 -+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+Image resized. -+ -+read 65536/65536 bytes at offset 11534336 -+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+8 MiB (0x800000) bytes not allocated at offset 0 bytes (0x0) -+4 MiB (0x400000) bytes allocated at offset 8 MiB (0x800000) -+ -+[{ "start": 0, "length": 8388608, "depth": 1, "zero": true, "data": false}, -+{ "start": 8388608, "length": 4194304, "depth": 0, "zero": true, "data": false, "offset": 327680}] -+ -+=== preallocation=off === -+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=393216 cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=259072 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+wrote 65536/65536 bytes at offset 259072 -+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+Image resized. -+ -+read 65536/65536 bytes at offset 259072 -+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+192 KiB (0x30000) bytes not allocated at offset 0 bytes (0x0) -+320 KiB (0x50000) bytes allocated at offset 192 KiB (0x30000) -+ -+[{ "start": 0, "length": 196608, "depth": 1, "zero": true, "data": false}, -+{ "start": 196608, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": 327680}, -+{ "start": 262144, "length": 262144, "depth": 0, "zero": true, "data": false}] -+ -+=== preallocation=off === -+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=409600 cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=262144 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+wrote 65536/65536 bytes at offset 344064 -+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+Image resized. -+ -+read 65536/65536 bytes at offset 344064 -+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+256 KiB (0x40000) bytes not allocated at offset 0 bytes (0x0) -+256 KiB (0x40000) bytes allocated at offset 256 KiB (0x40000) -+ -+[{ "start": 0, "length": 262144, "depth": 1, "zero": true, "data": false}, -+{ "start": 262144, "length": 262144, "depth": 0, "zero": true, "data": false}] -+ -+=== preallocation=off === -+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=524288 cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=262144 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+wrote 65536/65536 bytes at offset 446464 -+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+Image resized. -+ -+read 65536/65536 bytes at offset 446464 -+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+256 KiB (0x40000) bytes not allocated at offset 0 bytes (0x0) -+244 KiB (0x3d000) bytes allocated at offset 256 KiB (0x40000) -+ -+[{ "start": 0, "length": 262144, "depth": 1, "zero": true, "data": false}, -+{ "start": 262144, "length": 249856, "depth": 0, "zero": true, "data": false}] -+ -diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 033b54d..cddae00 100644 ---- a/tests/qemu-iotests/group -+++ b/tests/qemu-iotests/group -@@ -286,6 +286,7 @@ - 270 rw backing quick - 272 rw - 273 backing quick -+274 rw backing - 277 rw quick - 280 rw migration quick - 281 rw quick --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-Test-external-snapshot-with-VM-state.patch b/SOURCES/kvm-iotests-Test-external-snapshot-with-VM-state.patch deleted file mode 100644 index 6fcb2f6..0000000 --- a/SOURCES/kvm-iotests-Test-external-snapshot-with-VM-state.patch +++ /dev/null @@ -1,189 +0,0 @@ -From 38b0cff9703fc740c30f5874973ac1be88f94d9f Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 7 Feb 2020 11:24:03 +0000 -Subject: [PATCH 06/18] iotests: Test external snapshot with VM state - -RH-Author: Kevin Wolf -Message-id: <20200207112404.25198-6-kwolf@redhat.com> -Patchwork-id: 93752 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 5/6] iotests: Test external snapshot with VM state -Bugzilla: 1781637 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -This tests creating an external snapshot with VM state (which results in -an active overlay over an inactive backing file, which is also the root -node of an inactive BlockBackend), re-activating the images and -performing some operations to test that the re-activation worked as -intended. - -Signed-off-by: Kevin Wolf -(cherry picked from commit f62f08ab7a9d902da70078992248ec5c98f652ad) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/280 | 83 ++++++++++++++++++++++++++++++++++++++++++++++ - tests/qemu-iotests/280.out | 50 ++++++++++++++++++++++++++++ - tests/qemu-iotests/group | 1 + - 3 files changed, 134 insertions(+) - create mode 100755 tests/qemu-iotests/280 - create mode 100644 tests/qemu-iotests/280.out - -diff --git a/tests/qemu-iotests/280 b/tests/qemu-iotests/280 -new file mode 100755 -index 0000000..0b1fa8e ---- /dev/null -+++ b/tests/qemu-iotests/280 -@@ -0,0 +1,83 @@ -+#!/usr/bin/env python -+# -+# Copyright (C) 2019 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+# Creator/Owner: Kevin Wolf -+# -+# Test migration to file for taking an external snapshot with VM state. -+ -+import iotests -+import os -+ -+iotests.verify_image_format(supported_fmts=['qcow2']) -+iotests.verify_protocol(supported=['file']) -+iotests.verify_platform(['linux']) -+ -+with iotests.FilePath('base') as base_path , \ -+ iotests.FilePath('top') as top_path, \ -+ iotests.VM() as vm: -+ -+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, base_path, '64M') -+ -+ iotests.log('=== Launch VM ===') -+ vm.add_object('iothread,id=iothread0') -+ vm.add_blockdev('file,filename=%s,node-name=base-file' % (base_path)) -+ vm.add_blockdev('%s,file=base-file,node-name=base-fmt' % (iotests.imgfmt)) -+ vm.add_device('virtio-blk,drive=base-fmt,iothread=iothread0,id=vda') -+ vm.launch() -+ -+ vm.enable_migration_events('VM') -+ -+ iotests.log('\n=== Migrate to file ===') -+ vm.qmp_log('migrate', uri='exec:cat > /dev/null') -+ -+ with iotests.Timeout(3, 'Migration does not complete'): -+ vm.wait_migration() -+ -+ iotests.log('\nVM is now stopped:') -+ iotests.log(vm.qmp('query-migrate')['return']['status']) -+ vm.qmp_log('query-status') -+ -+ iotests.log('\n=== Create a snapshot of the disk image ===') -+ vm.blockdev_create({ -+ 'driver': 'file', -+ 'filename': top_path, -+ 'size': 0, -+ }) -+ vm.qmp_log('blockdev-add', node_name='top-file', -+ driver='file', filename=top_path, -+ filters=[iotests.filter_qmp_testfiles]) -+ -+ vm.blockdev_create({ -+ 'driver': iotests.imgfmt, -+ 'file': 'top-file', -+ 'size': 1024 * 1024, -+ }) -+ vm.qmp_log('blockdev-add', node_name='top-fmt', -+ driver=iotests.imgfmt, file='top-file') -+ -+ vm.qmp_log('blockdev-snapshot', node='base-fmt', overlay='top-fmt') -+ -+ iotests.log('\n=== Resume the VM and simulate a write request ===') -+ vm.qmp_log('cont') -+ iotests.log(vm.hmp_qemu_io('-d vda/virtio-backend', 'write 4k 4k')) -+ -+ iotests.log('\n=== Commit it to the backing file ===') -+ result = vm.qmp_log('block-commit', job_id='job0', auto_dismiss=False, -+ device='top-fmt', top_node='top-fmt', -+ filters=[iotests.filter_qmp_testfiles]) -+ if 'return' in result: -+ vm.run_job('job0') -diff --git a/tests/qemu-iotests/280.out b/tests/qemu-iotests/280.out -new file mode 100644 -index 0000000..5d382fa ---- /dev/null -+++ b/tests/qemu-iotests/280.out -@@ -0,0 +1,50 @@ -+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=67108864 cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+=== Launch VM === -+Enabling migration QMP events on VM... -+{"return": {}} -+ -+=== Migrate to file === -+{"execute": "migrate", "arguments": {"uri": "exec:cat > /dev/null"}} -+{"return": {}} -+{"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+ -+VM is now stopped: -+completed -+{"execute": "query-status", "arguments": {}} -+{"return": {"running": false, "singlestep": false, "status": "postmigrate"}} -+ -+=== Create a snapshot of the disk image === -+{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "file", "filename": "TEST_DIR/PID-top", "size": 0}}} -+{"return": {}} -+{"execute": "job-dismiss", "arguments": {"id": "job0"}} -+{"return": {}} -+ -+{"execute": "blockdev-add", "arguments": {"driver": "file", "filename": "TEST_DIR/PID-top", "node-name": "top-file"}} -+{"return": {}} -+{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "qcow2", "file": "top-file", "size": 1048576}}} -+{"return": {}} -+{"execute": "job-dismiss", "arguments": {"id": "job0"}} -+{"return": {}} -+ -+{"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": "top-file", "node-name": "top-fmt"}} -+{"return": {}} -+{"execute": "blockdev-snapshot", "arguments": {"node": "base-fmt", "overlay": "top-fmt"}} -+{"return": {}} -+ -+=== Resume the VM and simulate a write request === -+{"execute": "cont", "arguments": {}} -+{"return": {}} -+{"return": ""} -+ -+=== Commit it to the backing file === -+{"execute": "block-commit", "arguments": {"auto-dismiss": false, "device": "top-fmt", "job-id": "job0", "top-node": "top-fmt"}} -+{"return": {}} -+{"execute": "job-complete", "arguments": {"id": "job0"}} -+{"return": {}} -+{"data": {"device": "job0", "len": 65536, "offset": 65536, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"device": "job0", "len": 65536, "offset": 65536, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"execute": "job-dismiss", "arguments": {"id": "job0"}} -+{"return": {}} -diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 06cc734..01301cd 100644 ---- a/tests/qemu-iotests/group -+++ b/tests/qemu-iotests/group -@@ -286,3 +286,4 @@ - 272 rw - 273 backing quick - 277 rw quick -+280 rw migration quick --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch b/SOURCES/kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch deleted file mode 100644 index b09439b..0000000 --- a/SOURCES/kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch +++ /dev/null @@ -1,322 +0,0 @@ -From 6b9a6ba9ed753ad7aa714b35de938ebeeb4fa6cb Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 10:27:49 +0000 -Subject: [PATCH 16/18] iotests: Test handling of AioContexts with some - blockdev actions - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-10-slp@redhat.com> -Patchwork-id: 93762 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 9/9] iotests: Test handling of AioContexts with some blockdev actions -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -Includes the following tests: - - - Adding a dirty bitmap. - * RHBZ: 1782175 - - - Starting a drive-mirror to an NBD-backed target. - * RHBZ: 1746217, 1773517 - - - Aborting an external snapshot transaction. - * RHBZ: 1779036 - - - Aborting a blockdev backup transaction. - * RHBZ: 1782111 - -For each one of them, a VM with a number of disks running in an -IOThread AioContext is used. - -Signed-off-by: Sergio Lopez -Signed-off-by: Kevin Wolf -(cherry picked from commit 9b8c59e7610b9c5315ef093d801843dbe8debfac) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/281 | 247 +++++++++++++++++++++++++++++++++++++++++++++ - tests/qemu-iotests/281.out | 5 + - tests/qemu-iotests/group | 1 + - 3 files changed, 253 insertions(+) - create mode 100755 tests/qemu-iotests/281 - create mode 100644 tests/qemu-iotests/281.out - -diff --git a/tests/qemu-iotests/281 b/tests/qemu-iotests/281 -new file mode 100755 -index 0000000..269d583 ---- /dev/null -+++ b/tests/qemu-iotests/281 -@@ -0,0 +1,247 @@ -+#!/usr/bin/env python -+# -+# Test cases for blockdev + IOThread interactions -+# -+# Copyright (C) 2019 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+import os -+import iotests -+from iotests import qemu_img -+ -+image_len = 64 * 1024 * 1024 -+ -+# Test for RHBZ#1782175 -+class TestDirtyBitmapIOThread(iotests.QMPTestCase): -+ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') -+ images = { 'drive0': drive0_img } -+ -+ def setUp(self): -+ for name in self.images: -+ qemu_img('create', '-f', iotests.imgfmt, -+ self.images[name], str(image_len)) -+ -+ self.vm = iotests.VM() -+ self.vm.add_object('iothread,id=iothread0') -+ -+ for name in self.images: -+ self.vm.add_blockdev('driver=file,filename=%s,node-name=file_%s' -+ % (self.images[name], name)) -+ self.vm.add_blockdev('driver=qcow2,file=file_%s,node-name=%s' -+ % (name, name)) -+ -+ self.vm.launch() -+ self.vm.qmp('x-blockdev-set-iothread', -+ node_name='drive0', iothread='iothread0', -+ force=True) -+ -+ def tearDown(self): -+ self.vm.shutdown() -+ for name in self.images: -+ os.remove(self.images[name]) -+ -+ def test_add_dirty_bitmap(self): -+ result = self.vm.qmp( -+ 'block-dirty-bitmap-add', -+ node='drive0', -+ name='bitmap1', -+ persistent=True, -+ ) -+ -+ self.assert_qmp(result, 'return', {}) -+ -+ -+# Test for RHBZ#1746217 & RHBZ#1773517 -+class TestNBDMirrorIOThread(iotests.QMPTestCase): -+ nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') -+ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') -+ mirror_img = os.path.join(iotests.test_dir, 'mirror.img') -+ images = { 'drive0': drive0_img, 'mirror': mirror_img } -+ -+ def setUp(self): -+ for name in self.images: -+ qemu_img('create', '-f', iotests.imgfmt, -+ self.images[name], str(image_len)) -+ -+ self.vm_src = iotests.VM(path_suffix='src') -+ self.vm_src.add_object('iothread,id=iothread0') -+ self.vm_src.add_blockdev('driver=file,filename=%s,node-name=file0' -+ % (self.drive0_img)) -+ self.vm_src.add_blockdev('driver=qcow2,file=file0,node-name=drive0') -+ self.vm_src.launch() -+ self.vm_src.qmp('x-blockdev-set-iothread', -+ node_name='drive0', iothread='iothread0', -+ force=True) -+ -+ self.vm_tgt = iotests.VM(path_suffix='tgt') -+ self.vm_tgt.add_object('iothread,id=iothread0') -+ self.vm_tgt.add_blockdev('driver=file,filename=%s,node-name=file0' -+ % (self.mirror_img)) -+ self.vm_tgt.add_blockdev('driver=qcow2,file=file0,node-name=drive0') -+ self.vm_tgt.launch() -+ self.vm_tgt.qmp('x-blockdev-set-iothread', -+ node_name='drive0', iothread='iothread0', -+ force=True) -+ -+ def tearDown(self): -+ self.vm_src.shutdown() -+ self.vm_tgt.shutdown() -+ for name in self.images: -+ os.remove(self.images[name]) -+ -+ def test_nbd_mirror(self): -+ result = self.vm_tgt.qmp( -+ 'nbd-server-start', -+ addr={ -+ 'type': 'unix', -+ 'data': { 'path': self.nbd_sock } -+ } -+ ) -+ self.assert_qmp(result, 'return', {}) -+ -+ result = self.vm_tgt.qmp( -+ 'nbd-server-add', -+ device='drive0', -+ writable=True -+ ) -+ self.assert_qmp(result, 'return', {}) -+ -+ result = self.vm_src.qmp( -+ 'drive-mirror', -+ device='drive0', -+ target='nbd+unix:///drive0?socket=' + self.nbd_sock, -+ sync='full', -+ mode='existing', -+ speed=64*1024*1024, -+ job_id='j1' -+ ) -+ self.assert_qmp(result, 'return', {}) -+ -+ self.vm_src.event_wait(name="BLOCK_JOB_READY") -+ -+ -+# Test for RHBZ#1779036 -+class TestExternalSnapshotAbort(iotests.QMPTestCase): -+ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') -+ snapshot_img = os.path.join(iotests.test_dir, 'snapshot.img') -+ images = { 'drive0': drive0_img, 'snapshot': snapshot_img } -+ -+ def setUp(self): -+ for name in self.images: -+ qemu_img('create', '-f', iotests.imgfmt, -+ self.images[name], str(image_len)) -+ -+ self.vm = iotests.VM() -+ self.vm.add_object('iothread,id=iothread0') -+ self.vm.add_blockdev('driver=file,filename=%s,node-name=file0' -+ % (self.drive0_img)) -+ self.vm.add_blockdev('driver=qcow2,file=file0,node-name=drive0') -+ self.vm.launch() -+ self.vm.qmp('x-blockdev-set-iothread', -+ node_name='drive0', iothread='iothread0', -+ force=True) -+ -+ def tearDown(self): -+ self.vm.shutdown() -+ for name in self.images: -+ os.remove(self.images[name]) -+ -+ def test_external_snapshot_abort(self): -+ # Use a two actions transaction with a bogus values on the second -+ # one to trigger an abort of the transaction. -+ result = self.vm.qmp('transaction', actions=[ -+ { -+ 'type': 'blockdev-snapshot-sync', -+ 'data': { 'node-name': 'drive0', -+ 'snapshot-file': self.snapshot_img, -+ 'snapshot-node-name': 'snap1', -+ 'mode': 'absolute-paths', -+ 'format': 'qcow2' } -+ }, -+ { -+ 'type': 'blockdev-snapshot-sync', -+ 'data': { 'node-name': 'drive0', -+ 'snapshot-file': '/fakesnapshot', -+ 'snapshot-node-name': 'snap2', -+ 'mode': 'absolute-paths', -+ 'format': 'qcow2' } -+ }, -+ ]) -+ -+ # Crashes on failure, we expect this error. -+ self.assert_qmp(result, 'error/class', 'GenericError') -+ -+ -+# Test for RHBZ#1782111 -+class TestBlockdevBackupAbort(iotests.QMPTestCase): -+ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') -+ drive1_img = os.path.join(iotests.test_dir, 'drive1.img') -+ snap0_img = os.path.join(iotests.test_dir, 'snap0.img') -+ snap1_img = os.path.join(iotests.test_dir, 'snap1.img') -+ images = { 'drive0': drive0_img, -+ 'drive1': drive1_img, -+ 'snap0': snap0_img, -+ 'snap1': snap1_img } -+ -+ def setUp(self): -+ for name in self.images: -+ qemu_img('create', '-f', iotests.imgfmt, -+ self.images[name], str(image_len)) -+ -+ self.vm = iotests.VM() -+ self.vm.add_object('iothread,id=iothread0') -+ self.vm.add_device('virtio-scsi,iothread=iothread0') -+ -+ for name in self.images: -+ self.vm.add_blockdev('driver=file,filename=%s,node-name=file_%s' -+ % (self.images[name], name)) -+ self.vm.add_blockdev('driver=qcow2,file=file_%s,node-name=%s' -+ % (name, name)) -+ -+ self.vm.add_device('scsi-hd,drive=drive0') -+ self.vm.add_device('scsi-hd,drive=drive1') -+ self.vm.launch() -+ -+ def tearDown(self): -+ self.vm.shutdown() -+ for name in self.images: -+ os.remove(self.images[name]) -+ -+ def test_blockdev_backup_abort(self): -+ # Use a two actions transaction with a bogus values on the second -+ # one to trigger an abort of the transaction. -+ result = self.vm.qmp('transaction', actions=[ -+ { -+ 'type': 'blockdev-backup', -+ 'data': { 'device': 'drive0', -+ 'target': 'snap0', -+ 'sync': 'full', -+ 'job-id': 'j1' } -+ }, -+ { -+ 'type': 'blockdev-backup', -+ 'data': { 'device': 'drive1', -+ 'target': 'snap1', -+ 'sync': 'full' } -+ }, -+ ]) -+ -+ # Hangs on failure, we expect this error. -+ self.assert_qmp(result, 'error/class', 'GenericError') -+ -+if __name__ == '__main__': -+ iotests.main(supported_fmts=['qcow2'], -+ supported_protocols=['file']) -diff --git a/tests/qemu-iotests/281.out b/tests/qemu-iotests/281.out -new file mode 100644 -index 0000000..89968f3 ---- /dev/null -+++ b/tests/qemu-iotests/281.out -@@ -0,0 +1,5 @@ -+.... -+---------------------------------------------------------------------- -+Ran 4 tests -+ -+OK -diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 01301cd..c0e8197 100644 ---- a/tests/qemu-iotests/group -+++ b/tests/qemu-iotests/group -@@ -287,3 +287,4 @@ - 273 backing quick - 277 rw quick - 280 rw migration quick -+281 rw quick --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch b/SOURCES/kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch deleted file mode 100644 index 58ef198..0000000 --- a/SOURCES/kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch +++ /dev/null @@ -1,162 +0,0 @@ -From 239f7bdeef48a3c0b07098617371b9955dc55348 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:36 +0000 -Subject: [PATCH 16/20] iotests: Test mirror with temporarily disabled target - backing file - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-11-kwolf@redhat.com> -Patchwork-id: 94288 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 10/13] iotests: Test mirror with temporarily disabled target backing file -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -The newly tested scenario is a common live storage migration scenario: -The target node is opened without a backing file so that the active -layer is mirrored while its backing chain can be copied in the -background. - -The backing chain should be attached to the mirror target node when -finalising the job, just before switching the users of the source node -to the new copy (at which point the mirror job still has a reference to -the node). drive-mirror did this automatically, but with blockdev-mirror -this is the job of the QMP client. - -This patch adds test cases for two ways to achieve the desired result, -using either x-blockdev-reopen or blockdev-snapshot. - -Signed-off-by: Kevin Wolf -Message-Id: <20200310113831.27293-5-kwolf@redhat.com> -Reviewed-by: Peter Krempa -Signed-off-by: Kevin Wolf -(cherry picked from commit 8bdee9f10eac2aefdcc5095feef756354c87bdec) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/155 | 56 +++++++++++++++++++++++++++++++++++++++++----- - tests/qemu-iotests/155.out | 4 ++-- - 2 files changed, 53 insertions(+), 7 deletions(-) - -diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155 -index d7ef257..3053e50 100755 ---- a/tests/qemu-iotests/155 -+++ b/tests/qemu-iotests/155 -@@ -45,10 +45,15 @@ target_img = os.path.join(iotests.test_dir, 'target.' + iotests.imgfmt) - # image during runtime, only makes sense if - # target_blockdev_backing is not None - # (None: same as target_backing) -+# target_open_with_backing: If True, the target image is added with its backing -+# chain opened right away. If False, blockdev-add -+# opens it without a backing file and job completion -+# is supposed to open the backing chain. - - class BaseClass(iotests.QMPTestCase): - target_blockdev_backing = None - target_real_backing = None -+ target_open_with_backing = True - - def setUp(self): - qemu_img('create', '-f', iotests.imgfmt, back0_img, '1440K') -@@ -80,9 +85,13 @@ class BaseClass(iotests.QMPTestCase): - options = { 'node-name': 'target', - 'driver': iotests.imgfmt, - 'file': { 'driver': 'file', -+ 'node-name': 'target-file', - 'filename': target_img } } -- if self.target_blockdev_backing: -- options['backing'] = self.target_blockdev_backing -+ -+ if not self.target_open_with_backing: -+ options['backing'] = None -+ elif self.target_blockdev_backing: -+ options['backing'] = self.target_blockdev_backing - - result = self.vm.qmp('blockdev-add', **options) - self.assert_qmp(result, 'return', {}) -@@ -147,10 +156,14 @@ class BaseClass(iotests.QMPTestCase): - # cmd: Mirroring command to execute, either drive-mirror or blockdev-mirror - - class MirrorBaseClass(BaseClass): -+ def openBacking(self): -+ pass -+ - def runMirror(self, sync): - if self.cmd == 'blockdev-mirror': - result = self.vm.qmp(self.cmd, job_id='mirror-job', device='source', -- sync=sync, target='target') -+ sync=sync, target='target', -+ auto_finalize=False) - else: - if self.existing: - mode = 'existing' -@@ -159,11 +172,12 @@ class MirrorBaseClass(BaseClass): - result = self.vm.qmp(self.cmd, job_id='mirror-job', device='source', - sync=sync, target=target_img, - format=iotests.imgfmt, mode=mode, -- node_name='target') -+ node_name='target', auto_finalize=False) - - self.assert_qmp(result, 'return', {}) - -- self.complete_and_wait('mirror-job') -+ self.vm.run_job('mirror-job', use_log=False, auto_finalize=False, -+ pre_finalize=self.openBacking, auto_dismiss=True) - - def testFull(self): - self.runMirror('full') -@@ -221,6 +235,38 @@ class TestBlockdevMirrorForcedBacking(MirrorBaseClass): - target_blockdev_backing = { 'driver': 'null-co' } - target_real_backing = 'null-co://' - -+# Attach the backing chain only during completion, with blockdev-reopen -+class TestBlockdevMirrorReopen(MirrorBaseClass): -+ cmd = 'blockdev-mirror' -+ existing = True -+ target_backing = 'null-co://' -+ target_open_with_backing = False -+ -+ def openBacking(self): -+ if not self.target_open_with_backing: -+ result = self.vm.qmp('blockdev-add', node_name="backing", -+ driver="null-co") -+ self.assert_qmp(result, 'return', {}) -+ result = self.vm.qmp('x-blockdev-reopen', node_name="target", -+ driver=iotests.imgfmt, file="target-file", -+ backing="backing") -+ self.assert_qmp(result, 'return', {}) -+ -+# Attach the backing chain only during completion, with blockdev-snapshot -+class TestBlockdevMirrorSnapshot(MirrorBaseClass): -+ cmd = 'blockdev-mirror' -+ existing = True -+ target_backing = 'null-co://' -+ target_open_with_backing = False -+ -+ def openBacking(self): -+ if not self.target_open_with_backing: -+ result = self.vm.qmp('blockdev-add', node_name="backing", -+ driver="null-co") -+ self.assert_qmp(result, 'return', {}) -+ result = self.vm.qmp('blockdev-snapshot', node="backing", -+ overlay="target") -+ self.assert_qmp(result, 'return', {}) - - class TestCommit(BaseClass): - existing = False -diff --git a/tests/qemu-iotests/155.out b/tests/qemu-iotests/155.out -index 4176bb9..4fd1c2d 100644 ---- a/tests/qemu-iotests/155.out -+++ b/tests/qemu-iotests/155.out -@@ -1,5 +1,5 @@ --................... -+......................... - ---------------------------------------------------------------------- --Ran 19 tests -+Ran 25 tests - - OK --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-Use-complete_and_wait-in-155.patch b/SOURCES/kvm-iotests-Use-complete_and_wait-in-155.patch deleted file mode 100644 index 38b41be..0000000 --- a/SOURCES/kvm-iotests-Use-complete_and_wait-in-155.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 872fbd32d06bda4aba3a7e67a95f76f62e475dbe Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:27 +0000 -Subject: [PATCH 07/20] iotests: Use complete_and_wait() in 155 - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-2-kwolf@redhat.com> -Patchwork-id: 94279 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 01/13] iotests: Use complete_and_wait() in 155 -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -From: Max Reitz - -This way, we get to see errors during the completion phase. - -Signed-off-by: Max Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20200218103454.296704-14-mreitz@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 6644d0e6192b36cdf2902c9774e1afb8ab2e7223) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/155 | 7 +------ - 1 file changed, 1 insertion(+), 6 deletions(-) - -diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155 -index e194859..d7ef257 100755 ---- a/tests/qemu-iotests/155 -+++ b/tests/qemu-iotests/155 -@@ -163,12 +163,7 @@ class MirrorBaseClass(BaseClass): - - self.assert_qmp(result, 'return', {}) - -- self.vm.event_wait('BLOCK_JOB_READY') -- -- result = self.vm.qmp('block-job-complete', device='mirror-job') -- self.assert_qmp(result, 'return', {}) -- -- self.vm.event_wait('BLOCK_JOB_COMPLETED') -+ self.complete_and_wait('mirror-job') - - def testFull(self): - self.runMirror('full') --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-block-status-cache-New-test.patch b/SOURCES/kvm-iotests-block-status-cache-New-test.patch new file mode 100644 index 0000000..25f057c --- /dev/null +++ b/SOURCES/kvm-iotests-block-status-cache-New-test.patch @@ -0,0 +1,197 @@ +From 0ba4c0836f702bb3abbd173c7ee486a8247331ae Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 18 Jan 2022 18:00:00 +0100 +Subject: [PATCH 7/7] iotests/block-status-cache: New test + +RH-Author: Hanna Reitz +RH-MergeRequest: 112: block/io: Update BSC only if want_zero is true +RH-Commit: [2/2] ba86b4db32c33e17a85f476d445ef0523cf8f60e +RH-Bugzilla: 2041480 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf + +Add a new test to verify that want_zero=false block-status calls do not +pollute the block-status cache for want_zero=true calls. + +We check want_zero=true calls and their results using `qemu-img map` +(over NBD), and want_zero=false calls also using `qemu-img map` over +NBD, but using the qemu:allocation-depth context. + +(This test case cannot be integrated into nbd-qemu-allocation, because +that is a qcow2 test, and this is a raw test.) + +Signed-off-by: Hanna Reitz +Message-Id: <20220118170000.49423-3-hreitz@redhat.com> +Reviewed-by: Nir Soffer +Reviewed-by: Eric Blake +Tested-by: Eric Blake +Signed-off-by: Eric Blake +(cherry picked from commit 6384dd534d742123d26c008d9794b20bc41359d5) +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/tests/block-status-cache | 139 ++++++++++++++++++ + .../qemu-iotests/tests/block-status-cache.out | 5 + + 2 files changed, 144 insertions(+) + create mode 100755 tests/qemu-iotests/tests/block-status-cache + create mode 100644 tests/qemu-iotests/tests/block-status-cache.out + +diff --git a/tests/qemu-iotests/tests/block-status-cache b/tests/qemu-iotests/tests/block-status-cache +new file mode 100755 +index 0000000000..6fa10bb8f8 +--- /dev/null ++++ b/tests/qemu-iotests/tests/block-status-cache +@@ -0,0 +1,139 @@ ++#!/usr/bin/env python3 ++# group: rw quick ++# ++# Test cases for the block-status cache. ++# ++# Copyright (C) 2022 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++import os ++import signal ++import iotests ++from iotests import qemu_img_create, qemu_img_pipe, qemu_nbd ++ ++ ++image_size = 1 * 1024 * 1024 ++test_img = os.path.join(iotests.test_dir, 'test.img') ++ ++nbd_pidfile = os.path.join(iotests.test_dir, 'nbd.pid') ++nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') ++ ++ ++class TestBscWithNbd(iotests.QMPTestCase): ++ def setUp(self) -> None: ++ """Just create an empty image with a read-only NBD server on it""" ++ assert qemu_img_create('-f', iotests.imgfmt, test_img, ++ str(image_size)) == 0 ++ ++ # Pass --allocation-depth to enable the qemu:allocation-depth context, ++ # which we are going to query to provoke a block-status inquiry with ++ # want_zero=false. ++ assert qemu_nbd(f'--socket={nbd_sock}', ++ f'--format={iotests.imgfmt}', ++ '--persistent', ++ '--allocation-depth', ++ '--read-only', ++ f'--pid-file={nbd_pidfile}', ++ test_img) \ ++ == 0 ++ ++ def tearDown(self) -> None: ++ with open(nbd_pidfile, encoding='utf-8') as f: ++ pid = int(f.read()) ++ os.kill(pid, signal.SIGTERM) ++ os.remove(nbd_pidfile) ++ os.remove(test_img) ++ ++ def test_with_zero_bug(self) -> None: ++ """ ++ Verify that the block-status cache is not corrupted by a ++ want_zero=false call. ++ We can provoke a want_zero=false call with `qemu-img map` over NBD with ++ x-dirty-bitmap=qemu:allocation-depth, so we first run a normal `map` ++ (which results in want_zero=true), then using said ++ qemu:allocation-depth context, and finally another normal `map` to ++ verify that the cache has not been corrupted. ++ """ ++ ++ nbd_img_opts = f'driver=nbd,server.type=unix,server.path={nbd_sock}' ++ nbd_img_opts_alloc_depth = nbd_img_opts + \ ++ ',x-dirty-bitmap=qemu:allocation-depth' ++ ++ # Normal map, results in want_zero=true. ++ # This will probably detect an allocated data sector first (qemu likes ++ # to allocate the first sector to facilitate alignment probing), and ++ # then the rest to be zero. The BSC will thus contain (if anything) ++ # one range covering the first sector. ++ map_pre = qemu_img_pipe('map', '--output=json', '--image-opts', ++ nbd_img_opts) ++ ++ # qemu:allocation-depth maps for want_zero=false. ++ # want_zero=false should (with the file driver, which the server is ++ # using) report everything as data. While this is sufficient for ++ # want_zero=false, this is nothing that should end up in the ++ # block-status cache. ++ # Due to a bug, this information did end up in the cache, though, and ++ # this would lead to wrong information being returned on subsequent ++ # want_zero=true calls. ++ # ++ # We need to run this map twice: On the first call, we probably still ++ # have the first sector in the cache, and so this will be served from ++ # the cache; and only the subsequent range will be queried from the ++ # block driver. This subsequent range will then be entered into the ++ # cache. ++ # If we did a want_zero=true call at this point, we would thus get ++ # correct information: The first sector is not covered by the cache, so ++ # we would get fresh block-status information from the driver, which ++ # would return a data range, and this would then go into the cache, ++ # evicting the wrong range from the want_zero=false call before. ++ # ++ # Therefore, we need a second want_zero=false map to reproduce: ++ # Since the first sector is not in the cache, the query for its status ++ # will go to the driver, which will return a result that reports the ++ # whole image to be a single data area. This result will then go into ++ # the cache, and so the cache will then report the whole image to ++ # contain data. ++ # ++ # Note that once the cache reports the whole image to contain data, any ++ # subsequent map operation will be served from the cache, and so we can ++ # never loop too many times here. ++ for _ in range(2): ++ # (Ignore the result, this is just to contaminate the cache) ++ qemu_img_pipe('map', '--output=json', '--image-opts', ++ nbd_img_opts_alloc_depth) ++ ++ # Now let's see whether the cache reports everything as data, or ++ # whether we get correct information (i.e. the same as we got on our ++ # first attempt). ++ map_post = qemu_img_pipe('map', '--output=json', '--image-opts', ++ nbd_img_opts) ++ ++ if map_pre != map_post: ++ print('ERROR: Map information differs before and after querying ' + ++ 'qemu:allocation-depth') ++ print('Before:') ++ print(map_pre) ++ print('After:') ++ print(map_post) ++ ++ self.fail("Map information differs") ++ ++ ++if __name__ == '__main__': ++ # The block-status cache only works on the protocol layer, so to test it, ++ # we can only use the raw format ++ iotests.main(supported_fmts=['raw'], ++ supported_protocols=['file']) +diff --git a/tests/qemu-iotests/tests/block-status-cache.out b/tests/qemu-iotests/tests/block-status-cache.out +new file mode 100644 +index 0000000000..ae1213e6f8 +--- /dev/null ++++ b/tests/qemu-iotests/tests/block-status-cache.out +@@ -0,0 +1,5 @@ ++. ++---------------------------------------------------------------------- ++Ran 1 tests ++ ++OK +-- +2.27.0 + diff --git a/SOURCES/kvm-iotests-don-t-use-format-for-drive_add.patch b/SOURCES/kvm-iotests-don-t-use-format-for-drive_add.patch deleted file mode 100644 index f95e17a..0000000 --- a/SOURCES/kvm-iotests-don-t-use-format-for-drive_add.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 127360c2fa0fefa18ff828bfec3985e04791d665 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 3 Jun 2020 16:03:16 +0100 -Subject: [PATCH 17/26] iotests: don't use 'format' for drive_add -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Kevin Wolf -Message-id: <20200603160325.67506-3-kwolf@redhat.com> -Patchwork-id: 97102 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 02/11] iotests: don't use 'format' for drive_add -Bugzilla: 1778593 -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz -RH-Acked-by: Stefano Garzarella - -From: John Snow - -It shadows (with a different type) the built-in format. -Use something else. - -Signed-off-by: John Snow -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Max Reitz -Message-Id: <20200331000014.11581-3-jsnow@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Max Reitz -(cherry picked from commit 1d3d4b630c6ea8b19420c097f0c448b6ded95072) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/055 | 3 ++- - tests/qemu-iotests/iotests.py | 6 +++--- - 2 files changed, 5 insertions(+), 4 deletions(-) - -diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055 -index c732a11..eb50c9f 100755 ---- a/tests/qemu-iotests/055 -+++ b/tests/qemu-iotests/055 -@@ -469,7 +469,8 @@ class TestDriveCompression(iotests.QMPTestCase): - qemu_img('create', '-f', fmt, blockdev_target_img, - str(TestDriveCompression.image_len), *args) - if attach_target: -- self.vm.add_drive(blockdev_target_img, format=fmt, interface="none") -+ self.vm.add_drive(blockdev_target_img, -+ img_format=fmt, interface="none") - - self.vm.launch() - -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index 46f880c..be20d56 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -481,20 +481,20 @@ class VM(qtest.QEMUQtestMachine): - self._args.append(opts) - return self - -- def add_drive(self, path, opts='', interface='virtio', format=imgfmt): -+ def add_drive(self, path, opts='', interface='virtio', img_format=imgfmt): - '''Add a virtio-blk drive to the VM''' - options = ['if=%s' % interface, - 'id=drive%d' % self._num_drives] - - if path is not None: - options.append('file=%s' % path) -- options.append('format=%s' % format) -+ options.append('format=%s' % img_format) - options.append('cache=%s' % cachemode) - - if opts: - options.append(opts) - -- if format == 'luks' and 'key-secret' not in opts: -+ if img_format == 'luks' and 'key-secret' not in opts: - # default luks support - if luks_default_secret_object not in self._args: - self.add_object(luks_default_secret_object) --- -1.8.3.1 - diff --git a/SOURCES/kvm-iotests-stream-error-on-reset-New-test.patch b/SOURCES/kvm-iotests-stream-error-on-reset-New-test.patch new file mode 100644 index 0000000..0214854 --- /dev/null +++ b/SOURCES/kvm-iotests-stream-error-on-reset-New-test.patch @@ -0,0 +1,198 @@ +From ffdec41922a34b6fe4e7e11f259553d65b41563e Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 11 Jan 2022 15:36:13 +0000 +Subject: [PATCH 4/6] iotests/stream-error-on-reset: New test + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 109: block-backend: prevent dangling BDS pointers across aio_poll() +RH-Commit: [2/2] 0ecb7010d9c121398e7ee22ee47dd85d89bcd941 +RH-Bugzilla: 2021778 2036178 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Kevin Wolf + +Author: Hanna Reitz + +Test the following scenario: +- Simple stream block in two-layer backing chain (base and top) +- The job is drained via blk_drain(), then an error occurs while the job + settles the ongoing request +- And so the job completes while in blk_drain() + +This was reported as a segfault, but is fixed by "block-backend: prevent +dangling BDS pointers across aio_poll()". + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2036178 +Signed-off-by: Hanna Reitz +Signed-off-by: Stefan Hajnoczi +Message-Id: <20220111153613.25453-3-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 2ca1d5d6b91f8a52a5c651f660b2f58c94bf97ba) +Signed-off-by: Stefan Hajnoczi +--- + .../qemu-iotests/tests/stream-error-on-reset | 140 ++++++++++++++++++ + .../tests/stream-error-on-reset.out | 5 + + 2 files changed, 145 insertions(+) + create mode 100755 tests/qemu-iotests/tests/stream-error-on-reset + create mode 100644 tests/qemu-iotests/tests/stream-error-on-reset.out + +diff --git a/tests/qemu-iotests/tests/stream-error-on-reset b/tests/qemu-iotests/tests/stream-error-on-reset +new file mode 100755 +index 0000000000..7eaedb24d7 +--- /dev/null ++++ b/tests/qemu-iotests/tests/stream-error-on-reset +@@ -0,0 +1,140 @@ ++#!/usr/bin/env python3 ++# group: rw quick ++# ++# Test what happens when a stream job completes in a blk_drain(). ++# ++# Copyright (C) 2022 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++import os ++import iotests ++from iotests import imgfmt, qemu_img_create, qemu_io_silent, QMPTestCase ++ ++ ++image_size = 1 * 1024 * 1024 ++data_size = 64 * 1024 ++base = os.path.join(iotests.test_dir, 'base.img') ++top = os.path.join(iotests.test_dir, 'top.img') ++ ++ ++# We want to test completing a stream job in a blk_drain(). ++# ++# The blk_drain() we are going to use is a virtio-scsi device resetting, ++# which we can trigger by resetting the system. ++# ++# In order to have the block job complete on drain, we (1) throttle its ++# base image so we can start the drain after it has begun, but before it ++# completes, and (2) make it encounter an I/O error on the ensuing write. ++# (If it completes regularly, the completion happens after the drain for ++# some reason.) ++ ++class TestStreamErrorOnReset(QMPTestCase): ++ def setUp(self) -> None: ++ """ ++ Create two images: ++ - base image {base} with {data_size} bytes allocated ++ - top image {top} without any data allocated ++ ++ And the following VM configuration: ++ - base image throttled to {data_size} ++ - top image with a blkdebug configuration so the first write access ++ to it will result in an error ++ - top image is attached to a virtio-scsi device ++ """ ++ assert qemu_img_create('-f', imgfmt, base, str(image_size)) == 0 ++ assert qemu_io_silent('-c', f'write 0 {data_size}', base) == 0 ++ assert qemu_img_create('-f', imgfmt, top, str(image_size)) == 0 ++ ++ self.vm = iotests.VM() ++ self.vm.add_args('-accel', 'tcg') # Make throttling work properly ++ self.vm.add_object(self.vm.qmp_to_opts({ ++ 'qom-type': 'throttle-group', ++ 'id': 'thrgr', ++ 'x-bps-total': str(data_size) ++ })) ++ self.vm.add_blockdev(self.vm.qmp_to_opts({ ++ 'driver': imgfmt, ++ 'node-name': 'base', ++ 'file': { ++ 'driver': 'throttle', ++ 'throttle-group': 'thrgr', ++ 'file': { ++ 'driver': 'file', ++ 'filename': base ++ } ++ } ++ })) ++ self.vm.add_blockdev(self.vm.qmp_to_opts({ ++ 'driver': imgfmt, ++ 'node-name': 'top', ++ 'file': { ++ 'driver': 'blkdebug', ++ 'node-name': 'top-blkdebug', ++ 'inject-error': [{ ++ 'event': 'pwritev', ++ 'immediately': 'true', ++ 'once': 'true' ++ }], ++ 'image': { ++ 'driver': 'file', ++ 'filename': top ++ } ++ }, ++ 'backing': 'base' ++ })) ++ self.vm.add_device(self.vm.qmp_to_opts({ ++ 'driver': 'virtio-scsi', ++ 'id': 'vscsi' ++ })) ++ self.vm.add_device(self.vm.qmp_to_opts({ ++ 'driver': 'scsi-hd', ++ 'bus': 'vscsi.0', ++ 'drive': 'top' ++ })) ++ self.vm.launch() ++ ++ def tearDown(self) -> None: ++ self.vm.shutdown() ++ os.remove(top) ++ os.remove(base) ++ ++ def test_stream_error_on_reset(self) -> None: ++ # Launch a stream job, which will take at least a second to ++ # complete, because the base image is throttled (so we can ++ # get in between it having started and it having completed) ++ res = self.vm.qmp('block-stream', job_id='stream', device='top') ++ self.assert_qmp(res, 'return', {}) ++ ++ while True: ++ ev = self.vm.event_wait('JOB_STATUS_CHANGE') ++ if ev['data']['status'] == 'running': ++ # Once the stream job is running, reset the system, which ++ # forces the virtio-scsi device to be reset, thus draining ++ # the stream job, and making it complete. Completing ++ # inside of that drain should not result in a segfault. ++ res = self.vm.qmp('system_reset') ++ self.assert_qmp(res, 'return', {}) ++ elif ev['data']['status'] == 'null': ++ # The test is done once the job is gone ++ break ++ ++ ++if __name__ == '__main__': ++ # Passes with any format with backing file support, but qed and ++ # qcow1 do not seem to exercise the used-to-be problematic code ++ # path, so there is no point in having them in this list ++ iotests.main(supported_fmts=['qcow2', 'vmdk'], ++ supported_protocols=['file']) +diff --git a/tests/qemu-iotests/tests/stream-error-on-reset.out b/tests/qemu-iotests/tests/stream-error-on-reset.out +new file mode 100644 +index 0000000000..ae1213e6f8 +--- /dev/null ++++ b/tests/qemu-iotests/tests/stream-error-on-reset.out +@@ -0,0 +1,5 @@ ++. ++---------------------------------------------------------------------- ++Ran 1 tests ++ ++OK +-- +2.27.0 + diff --git a/SOURCES/kvm-iotests.py-Add-QemuStorageDaemon-class.patch b/SOURCES/kvm-iotests.py-Add-QemuStorageDaemon-class.patch new file mode 100644 index 0000000..539897f --- /dev/null +++ b/SOURCES/kvm-iotests.py-Add-QemuStorageDaemon-class.patch @@ -0,0 +1,92 @@ +From 34ffcd1a463bd3c1d36ed2f33dd6335b35b38460 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:09 +0100 +Subject: [PATCH 3/6] iotests.py: Add QemuStorageDaemon class + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [3/6] 754fe76bc5e8be57f4b78f176531014c4a12b044 +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +This is a rather simple class that allows creating a QSD instance +running in the background and stopping it when no longer needed. + +The __del__ handler is a safety net for when something goes so wrong in +a test that e.g. the tearDown() method is not called (e.g. setUp() +launches the QSD, but then launching a VM fails). We do not want the +QSD to continue running after the test has failed, so __del__() will +take care to kill it. + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 091dc7b2b5553a529bff9a7bf9ad3bc85bc5bdcd) +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/iotests.py | 40 +++++++++++++++++++++++++++++++++++ + 1 file changed, 40 insertions(+) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index 83bfedb902..a51b5ce8cd 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -72,6 +72,8 @@ + qemu_prog = os.environ.get('QEMU_PROG', 'qemu') + qemu_opts = os.environ.get('QEMU_OPTIONS', '').strip().split(' ') + ++qsd_prog = os.environ.get('QSD_PROG', 'qemu-storage-daemon') ++ + gdb_qemu_env = os.environ.get('GDB_OPTIONS') + qemu_gdb = [] + if gdb_qemu_env: +@@ -312,6 +314,44 @@ def cmd(self, cmd): + return self._read_output() + + ++class QemuStorageDaemon: ++ def __init__(self, *args: str, instance_id: str = 'a'): ++ assert '--pidfile' not in args ++ self.pidfile = os.path.join(test_dir, f'qsd-{instance_id}-pid') ++ all_args = [qsd_prog] + list(args) + ['--pidfile', self.pidfile] ++ ++ # Cannot use with here, we want the subprocess to stay around ++ # pylint: disable=consider-using-with ++ self._p = subprocess.Popen(all_args) ++ while not os.path.exists(self.pidfile): ++ if self._p.poll() is not None: ++ cmd = ' '.join(all_args) ++ raise RuntimeError( ++ 'qemu-storage-daemon terminated with exit code ' + ++ f'{self._p.returncode}: {cmd}') ++ ++ time.sleep(0.01) ++ ++ with open(self.pidfile, encoding='utf-8') as f: ++ self._pid = int(f.read().strip()) ++ ++ assert self._pid == self._p.pid ++ ++ def stop(self, kill_signal=15): ++ self._p.send_signal(kill_signal) ++ self._p.wait() ++ self._p = None ++ ++ try: ++ os.remove(self.pidfile) ++ except OSError: ++ pass ++ ++ def __del__(self): ++ if self._p is not None: ++ self.stop(kill_signal=9) ++ ++ + def qemu_nbd(*args): + '''Run qemu-nbd in daemon mode and return the parent's exit code''' + return subprocess.call(qemu_nbd_args + ['--fork'] + list(args)) +-- +2.27.0 + diff --git a/SOURCES/kvm-iotests.py-Let-wait_migration-wait-even-more.patch b/SOURCES/kvm-iotests.py-Let-wait_migration-wait-even-more.patch deleted file mode 100644 index cda8037..0000000 --- a/SOURCES/kvm-iotests.py-Let-wait_migration-wait-even-more.patch +++ /dev/null @@ -1,123 +0,0 @@ -From d6df1426ae65b3a0d50bdbb1f8a7246386dd6ebf Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 7 Feb 2020 11:24:04 +0000 -Subject: [PATCH 07/18] iotests.py: Let wait_migration wait even more - -RH-Author: Kevin Wolf -Message-id: <20200207112404.25198-7-kwolf@redhat.com> -Patchwork-id: 93751 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 6/6] iotests.py: Let wait_migration wait even more -Bugzilla: 1781637 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -From: Max Reitz - -The "migration completed" event may be sent (on the source, to be -specific) before the migration is actually completed, so the VM runstate -will still be "finish-migrate" instead of "postmigrate". So ask the -users of VM.wait_migration() to specify the final runstate they desire -and then poll the VM until it has reached that state. (This should be -over very quickly, so busy polling is fine.) - -Without this patch, I see intermittent failures in the new iotest 280 -under high system load. I have not yet seen such failures with other -iotests that use VM.wait_migration() and query-status afterwards, but -maybe they just occur even more rarely, or it is because they also wait -on the destination VM to be running. - -Signed-off-by: Max Reitz -Signed-off-by: Kevin Wolf -(cherry picked from commit 8da7969bd7014f6de037d8ae132b40721944b186) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/234 | 8 ++++---- - tests/qemu-iotests/262 | 4 ++-- - tests/qemu-iotests/280 | 2 +- - tests/qemu-iotests/iotests.py | 6 +++++- - 4 files changed, 12 insertions(+), 8 deletions(-) - -diff --git a/tests/qemu-iotests/234 b/tests/qemu-iotests/234 -index 34c818c..59a7f94 100755 ---- a/tests/qemu-iotests/234 -+++ b/tests/qemu-iotests/234 -@@ -69,9 +69,9 @@ with iotests.FilePath('img') as img_path, \ - iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo_a))) - with iotests.Timeout(3, 'Migration does not complete'): - # Wait for the source first (which includes setup=setup) -- vm_a.wait_migration() -+ vm_a.wait_migration('postmigrate') - # Wait for the destination second (which does not) -- vm_b.wait_migration() -+ vm_b.wait_migration('running') - - iotests.log(vm_a.qmp('query-migrate')['return']['status']) - iotests.log(vm_b.qmp('query-migrate')['return']['status']) -@@ -98,9 +98,9 @@ with iotests.FilePath('img') as img_path, \ - iotests.log(vm_b.qmp('migrate', uri='exec:cat >%s' % (fifo_b))) - with iotests.Timeout(3, 'Migration does not complete'): - # Wait for the source first (which includes setup=setup) -- vm_b.wait_migration() -+ vm_b.wait_migration('postmigrate') - # Wait for the destination second (which does not) -- vm_a.wait_migration() -+ vm_a.wait_migration('running') - - iotests.log(vm_a.qmp('query-migrate')['return']['status']) - iotests.log(vm_b.qmp('query-migrate')['return']['status']) -diff --git a/tests/qemu-iotests/262 b/tests/qemu-iotests/262 -index 0963daa..bbcb526 100755 ---- a/tests/qemu-iotests/262 -+++ b/tests/qemu-iotests/262 -@@ -71,9 +71,9 @@ with iotests.FilePath('img') as img_path, \ - iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo))) - with iotests.Timeout(3, 'Migration does not complete'): - # Wait for the source first (which includes setup=setup) -- vm_a.wait_migration() -+ vm_a.wait_migration('postmigrate') - # Wait for the destination second (which does not) -- vm_b.wait_migration() -+ vm_b.wait_migration('running') - - iotests.log(vm_a.qmp('query-migrate')['return']['status']) - iotests.log(vm_b.qmp('query-migrate')['return']['status']) -diff --git a/tests/qemu-iotests/280 b/tests/qemu-iotests/280 -index 0b1fa8e..85e9114 100755 ---- a/tests/qemu-iotests/280 -+++ b/tests/qemu-iotests/280 -@@ -45,7 +45,7 @@ with iotests.FilePath('base') as base_path , \ - vm.qmp_log('migrate', uri='exec:cat > /dev/null') - - with iotests.Timeout(3, 'Migration does not complete'): -- vm.wait_migration() -+ vm.wait_migration('postmigrate') - - iotests.log('\nVM is now stopped:') - iotests.log(vm.qmp('query-migrate')['return']['status']) -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index 5741efb..0c55f7b 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -663,12 +663,16 @@ class VM(qtest.QEMUQtestMachine): - } - ])) - -- def wait_migration(self): -+ def wait_migration(self, expect_runstate): - while True: - event = self.event_wait('MIGRATION') - log(event, filters=[filter_qmp_event]) - if event['data']['status'] == 'completed': - break -+ # The event may occur in finish-migrate, so wait for the expected -+ # post-migration runstate -+ while self.qmp('query-status')['return']['status'] != expect_runstate: -+ pass - - def node_info(self, node_name): - nodes = self.qmp('query-named-block-nodes') --- -1.8.3.1 - diff --git a/SOURCES/kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch b/SOURCES/kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch deleted file mode 100644 index 2ee9dcd..0000000 --- a/SOURCES/kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 1c508d56d154caf5fbf53e7dabafd707236cb16b Mon Sep 17 00:00:00 2001 -From: jmaloy -Date: Wed, 29 Jan 2020 13:45:18 +0000 -Subject: [PATCH 06/15] iscsi: Cap block count from GET LBA STATUS - (CVE-2020-1711) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: jmaloy -Message-id: <20200129134518.1293-2-jmaloy@redhat.com> -Patchwork-id: 93571 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] iscsi: Cap block count from GET LBA STATUS (CVE-2020-1711) -Bugzilla: 1794503 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Kevin Wolf -RH-Acked-by: Philippe Mathieu-Daudé - -From: Felipe Franciosi - -When querying an iSCSI server for the provisioning status of blocks (via -GET LBA STATUS), Qemu only validates that the response descriptor zero's -LBA matches the one requested. Given the SCSI spec allows servers to -respond with the status of blocks beyond the end of the LUN, Qemu may -have its heap corrupted by clearing/setting too many bits at the end of -its allocmap for the LUN. - -A malicious guest in control of the iSCSI server could carefully program -Qemu's heap (by selectively setting the bitmap) and then smash it. - -This limits the number of bits that iscsi_co_block_status() will try to -update in the allocmap so it can't overflow the bitmap. - -Fixes: CVE-2020-1711 -Cc: qemu-stable@nongnu.org -Signed-off-by: Felipe Franciosi -Signed-off-by: Peter Turschmid -Signed-off-by: Raphael Norwitz -Signed-off-by: Kevin Wolf -(cherry picked from commit 693fd2acdf14dd86c0bf852610f1c2cca80a74dc) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - block/iscsi.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/block/iscsi.c b/block/iscsi.c -index 2aea7e3..cbd5729 100644 ---- a/block/iscsi.c -+++ b/block/iscsi.c -@@ -701,7 +701,7 @@ static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, - struct scsi_get_lba_status *lbas = NULL; - struct scsi_lba_status_descriptor *lbasd = NULL; - struct IscsiTask iTask; -- uint64_t lba; -+ uint64_t lba, max_bytes; - int ret; - - iscsi_co_init_iscsitask(iscsilun, &iTask); -@@ -721,6 +721,7 @@ static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, - } - - lba = offset / iscsilun->block_size; -+ max_bytes = (iscsilun->num_blocks - lba) * iscsilun->block_size; - - qemu_mutex_lock(&iscsilun->mutex); - retry: -@@ -764,7 +765,7 @@ retry: - goto out_unlock; - } - -- *pnum = (int64_t) lbasd->num_blocks * iscsilun->block_size; -+ *pnum = MIN((int64_t) lbasd->num_blocks * iscsilun->block_size, max_bytes); - - if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED || - lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) { --- -1.8.3.1 - diff --git a/SOURCES/kvm-iscsi-Drop-iscsi_co_create_opts.patch b/SOURCES/kvm-iscsi-Drop-iscsi_co_create_opts.patch deleted file mode 100644 index a6d0baf..0000000 --- a/SOURCES/kvm-iscsi-Drop-iscsi_co_create_opts.patch +++ /dev/null @@ -1,113 +0,0 @@ -From 58b7d33e1bc17b89103ceaa39f5722a69b35d810 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Wed, 11 Mar 2020 10:51:45 +0000 -Subject: [PATCH 04/20] iscsi: Drop iscsi_co_create_opts() - -RH-Author: Maxim Levitsky -Message-id: <20200311105147.13208-5-mlevitsk@redhat.com> -Patchwork-id: 94226 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 4/6] iscsi: Drop iscsi_co_create_opts() -Bugzilla: 1640894 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz - -From: Max Reitz - -The generic fallback implementation effectively does the same. - -Reviewed-by: Maxim Levitsky -Signed-off-by: Max Reitz -Message-Id: <20200122164532.178040-5-mreitz@redhat.com> -Signed-off-by: Max Reitz -(cherry picked from commit 80f0900905b555f00d644894c786b6d66ac2e00e) -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block/iscsi.c | 56 -------------------------------------------------------- - 1 file changed, 56 deletions(-) - -diff --git a/block/iscsi.c b/block/iscsi.c -index cbd5729..b45da65 100644 ---- a/block/iscsi.c -+++ b/block/iscsi.c -@@ -2164,58 +2164,6 @@ static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset, - return 0; - } - --static int coroutine_fn iscsi_co_create_opts(const char *filename, QemuOpts *opts, -- Error **errp) --{ -- int ret = 0; -- int64_t total_size = 0; -- BlockDriverState *bs; -- IscsiLun *iscsilun = NULL; -- QDict *bs_options; -- Error *local_err = NULL; -- -- bs = bdrv_new(); -- -- /* Read out options */ -- total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), -- BDRV_SECTOR_SIZE); -- bs->opaque = g_new0(struct IscsiLun, 1); -- iscsilun = bs->opaque; -- -- bs_options = qdict_new(); -- iscsi_parse_filename(filename, bs_options, &local_err); -- if (local_err) { -- error_propagate(errp, local_err); -- ret = -EINVAL; -- } else { -- ret = iscsi_open(bs, bs_options, 0, NULL); -- } -- qobject_unref(bs_options); -- -- if (ret != 0) { -- goto out; -- } -- iscsi_detach_aio_context(bs); -- if (iscsilun->type != TYPE_DISK) { -- ret = -ENODEV; -- goto out; -- } -- if (bs->total_sectors < total_size) { -- ret = -ENOSPC; -- goto out; -- } -- -- ret = 0; --out: -- if (iscsilun->iscsi != NULL) { -- iscsi_destroy_context(iscsilun->iscsi); -- } -- g_free(bs->opaque); -- bs->opaque = NULL; -- bdrv_unref(bs); -- return ret; --} -- - static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) - { - IscsiLun *iscsilun = bs->opaque; -@@ -2486,8 +2434,6 @@ static BlockDriver bdrv_iscsi = { - .bdrv_parse_filename = iscsi_parse_filename, - .bdrv_file_open = iscsi_open, - .bdrv_close = iscsi_close, -- .bdrv_co_create_opts = iscsi_co_create_opts, -- .create_opts = &iscsi_create_opts, - .bdrv_reopen_prepare = iscsi_reopen_prepare, - .bdrv_reopen_commit = iscsi_reopen_commit, - .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, -@@ -2525,8 +2471,6 @@ static BlockDriver bdrv_iser = { - .bdrv_parse_filename = iscsi_parse_filename, - .bdrv_file_open = iscsi_open, - .bdrv_close = iscsi_close, -- .bdrv_co_create_opts = iscsi_co_create_opts, -- .create_opts = &iscsi_create_opts, - .bdrv_reopen_prepare = iscsi_reopen_prepare, - .bdrv_reopen_commit = iscsi_reopen_commit, - .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, --- -1.8.3.1 - diff --git a/SOURCES/kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch b/SOURCES/kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch deleted file mode 100644 index e38428b..0000000 --- a/SOURCES/kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch +++ /dev/null @@ -1,213 +0,0 @@ -From 3f16b8a33bd7503cbe857fbeb45fff7301b6bb5f Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 8 Apr 2020 17:29:12 +0100 -Subject: [PATCH 1/6] job: take each job's lock individually in job_txn_apply - -RH-Author: Kevin Wolf -Message-id: <20200408172917.18712-2-kwolf@redhat.com> -Patchwork-id: 94597 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/6] job: take each job's lock individually in job_txn_apply -Bugzilla: 1817621 -RH-Acked-by: Eric Blake -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -From: Stefan Reiter - -All callers of job_txn_apply hold a single job's lock, but different -jobs within a transaction can have different contexts, thus we need to -lock each one individually before applying the callback function. - -Similar to job_completed_txn_abort this also requires releasing the -caller's context before and reacquiring it after to avoid recursive -locks which might break AIO_WAIT_WHILE in the callback. This is safe, since -existing code would already have to take this into account, lest -job_completed_txn_abort might have broken. - -This also brings to light a different issue: When a callback function in -job_txn_apply moves it's job to a different AIO context, callers will -try to release the wrong lock (now that we re-acquire the lock -correctly, previously it would just continue with the old lock, leaving -the job unlocked for the rest of the return path). Fix this by not caching -the job's context. - -This is only necessary for qmp_block_job_finalize, qmp_job_finalize and -job_exit, since everyone else calls through job_exit. - -One test needed adapting, since it calls job_finalize directly, so it -manually needs to acquire the correct context. - -Signed-off-by: Stefan Reiter -Message-Id: <20200407115651.69472-2-s.reiter@proxmox.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit b660a84bbb0eb1a76b505648d31d5e82594fb75e) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 9 +++++++++ - job-qmp.c | 9 +++++++++ - job.c | 50 ++++++++++++++++++++++++++++++++++++++++---------- - tests/test-blockjob.c | 2 ++ - 4 files changed, 60 insertions(+), 10 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index c8d4b51..86eb115 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -4215,7 +4215,16 @@ void qmp_block_job_finalize(const char *id, Error **errp) - } - - trace_qmp_block_job_finalize(job); -+ job_ref(&job->job); - job_finalize(&job->job, errp); -+ -+ /* -+ * Job's context might have changed via job_finalize (and job_txn_apply -+ * automatically acquires the new one), so make sure we release the correct -+ * one. -+ */ -+ aio_context = blk_get_aio_context(job->blk); -+ job_unref(&job->job); - aio_context_release(aio_context); - } - -diff --git a/job-qmp.c b/job-qmp.c -index fbfed25..a201220 100644 ---- a/job-qmp.c -+++ b/job-qmp.c -@@ -114,7 +114,16 @@ void qmp_job_finalize(const char *id, Error **errp) - } - - trace_qmp_job_finalize(job); -+ job_ref(job); - job_finalize(job, errp); -+ -+ /* -+ * Job's context might have changed via job_finalize (and job_txn_apply -+ * automatically acquires the new one), so make sure we release the correct -+ * one. -+ */ -+ aio_context = job->aio_context; -+ job_unref(job); - aio_context_release(aio_context); - } - -diff --git a/job.c b/job.c -index 04409b4..48fc4ad 100644 ---- a/job.c -+++ b/job.c -@@ -136,17 +136,38 @@ static void job_txn_del_job(Job *job) - } - } - --static int job_txn_apply(JobTxn *txn, int fn(Job *)) -+static int job_txn_apply(Job *job, int fn(Job *)) - { -- Job *job, *next; -+ AioContext *inner_ctx; -+ Job *other_job, *next; -+ JobTxn *txn = job->txn; - int rc = 0; - -- QLIST_FOREACH_SAFE(job, &txn->jobs, txn_list, next) { -- rc = fn(job); -+ /* -+ * Similar to job_completed_txn_abort, we take each job's lock before -+ * applying fn, but since we assume that outer_ctx is held by the caller, -+ * we need to release it here to avoid holding the lock twice - which would -+ * break AIO_WAIT_WHILE from within fn. -+ */ -+ job_ref(job); -+ aio_context_release(job->aio_context); -+ -+ QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { -+ inner_ctx = other_job->aio_context; -+ aio_context_acquire(inner_ctx); -+ rc = fn(other_job); -+ aio_context_release(inner_ctx); - if (rc) { - break; - } - } -+ -+ /* -+ * Note that job->aio_context might have been changed by calling fn, so we -+ * can't use a local variable to cache it. -+ */ -+ aio_context_acquire(job->aio_context); -+ job_unref(job); - return rc; - } - -@@ -774,11 +795,11 @@ static void job_do_finalize(Job *job) - assert(job && job->txn); - - /* prepare the transaction to complete */ -- rc = job_txn_apply(job->txn, job_prepare); -+ rc = job_txn_apply(job, job_prepare); - if (rc) { - job_completed_txn_abort(job); - } else { -- job_txn_apply(job->txn, job_finalize_single); -+ job_txn_apply(job, job_finalize_single); - } - } - -@@ -824,10 +845,10 @@ static void job_completed_txn_success(Job *job) - assert(other_job->ret == 0); - } - -- job_txn_apply(txn, job_transition_to_pending); -+ job_txn_apply(job, job_transition_to_pending); - - /* If no jobs need manual finalization, automatically do so */ -- if (job_txn_apply(txn, job_needs_finalize) == 0) { -+ if (job_txn_apply(job, job_needs_finalize) == 0) { - job_do_finalize(job); - } - } -@@ -849,9 +870,10 @@ static void job_completed(Job *job) - static void job_exit(void *opaque) - { - Job *job = (Job *)opaque; -- AioContext *ctx = job->aio_context; -+ AioContext *ctx; - -- aio_context_acquire(ctx); -+ job_ref(job); -+ aio_context_acquire(job->aio_context); - - /* This is a lie, we're not quiescent, but still doing the completion - * callbacks. However, completion callbacks tend to involve operations that -@@ -862,6 +884,14 @@ static void job_exit(void *opaque) - - job_completed(job); - -+ /* -+ * Note that calling job_completed can move the job to a different -+ * aio_context, so we cannot cache from above. job_txn_apply takes care of -+ * acquiring the new lock, and we ref/unref to avoid job_completed freeing -+ * the job underneath us. -+ */ -+ ctx = job->aio_context; -+ job_unref(job); - aio_context_release(ctx); - } - -diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c -index 7844c9f..6d857fd 100644 ---- a/tests/test-blockjob.c -+++ b/tests/test-blockjob.c -@@ -368,7 +368,9 @@ static void test_cancel_concluded(void) - aio_poll(qemu_get_aio_context(), true); - assert(job->status == JOB_STATUS_PENDING); - -+ aio_context_acquire(job->aio_context); - job_finalize(job, &error_abort); -+ aio_context_release(job->aio_context); - assert(job->status == JOB_STATUS_CONCLUDED); - - cancel_common(s); --- -1.8.3.1 - diff --git a/SOURCES/kvm-lan9118-switch-to-use-qemu_receive_packet-for-loopba.patch b/SOURCES/kvm-lan9118-switch-to-use-qemu_receive_packet-for-loopba.patch deleted file mode 100644 index 902af6c..0000000 --- a/SOURCES/kvm-lan9118-switch-to-use-qemu_receive_packet-for-loopba.patch +++ /dev/null @@ -1,53 +0,0 @@ -From e2cafb929acb74377754cb688419575b139b922a Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 29 Jun 2021 03:42:47 -0400 -Subject: [PATCH 9/9] lan9118: switch to use qemu_receive_packet() for loopback -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20210629034247.3286477-10-jmaloy@redhat.com> -Patchwork-id: 101790 -O-Subject: [RHEL-8.4.0.z qemu-kvm PATCH v2 9/9] lan9118: switch to use qemu_receive_packet() for loopback -Bugzilla: 1932917 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth - -From: Alexander Bulekov - -This patch switches to use qemu_receive_packet() which can detect -reentrancy and return early. - -This is intended to address CVE-2021-3416. - -Cc: Prasad J Pandit -Cc: qemu-stable@nongnu.org -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Jason Wang - -(cherry picked from commit 37cee01784ff0df13e5209517e1b3594a5e792d1) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/lan9118.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/net/lan9118.c b/hw/net/lan9118.c -index ed551f2178..7bb4633f0f 100644 ---- a/hw/net/lan9118.c -+++ b/hw/net/lan9118.c -@@ -667,7 +667,7 @@ static void do_tx_packet(lan9118_state *s) - /* FIXME: Honor TX disable, and allow queueing of packets. */ - if (s->phy_control & 0x4000) { - /* This assumes the receive routine doesn't touch the VLANClient. */ -- lan9118_receive(qemu_get_queue(s->nic), s->txp->data, s->txp->len); -+ qemu_receive_packet(qemu_get_queue(s->nic), s->txp->data, s->txp->len); - } else { - qemu_send_packet(qemu_get_queue(s->nic), s->txp->data, s->txp->len); - } --- -2.27.0 - diff --git a/SOURCES/kvm-libqos-pci-pc-use-32-bit-write-for-EJ-register.patch b/SOURCES/kvm-libqos-pci-pc-use-32-bit-write-for-EJ-register.patch deleted file mode 100644 index 71a2eac..0000000 --- a/SOURCES/kvm-libqos-pci-pc-use-32-bit-write-for-EJ-register.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 2687e0348e3e4d377b4f5356e46948dc2b371b6d Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Wed, 21 Apr 2021 22:30:02 -0400 -Subject: [PATCH 3/7] libqos: pci-pc: use 32-bit write for EJ register -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20210421223006.19650-3-jmaloy@redhat.com> -Patchwork-id: 101484 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH v2 2/6] libqos: pci-pc: use 32-bit write for EJ register -Bugzilla: 1842478 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laszlo Ersek - -From: Paolo Bonzini - -The memory region ops have min_access_size == 4 so obey it. - -Tested-by: Thomas Huth -Signed-off-by: Paolo Bonzini - -(cherry picked from commit 4b7c06837ae0b1ff56473202a42e7e386f53d6db) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - tests/libqos/pci-pc.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tests/libqos/pci-pc.c b/tests/libqos/pci-pc.c -index 0bc591d1da..3bb2eb3ba8 100644 ---- a/tests/libqos/pci-pc.c -+++ b/tests/libqos/pci-pc.c -@@ -186,7 +186,7 @@ void qpci_unplug_acpi_device_test(QTestState *qts, const char *id, uint8_t slot) - g_assert(!qdict_haskey(response, "error")); - qobject_unref(response); - -- qtest_outb(qts, ACPI_PCIHP_ADDR + PCI_EJ_BASE, 1 << slot); -+ qtest_outl(qts, ACPI_PCIHP_ADDR + PCI_EJ_BASE, 1 << slot); - - qtest_qmp_eventwait(qts, "DEVICE_DELETED"); - } --- -2.27.0 - diff --git a/SOURCES/kvm-libqos-usb-hcd-ehci-use-32-bit-write-for-config-regi.patch b/SOURCES/kvm-libqos-usb-hcd-ehci-use-32-bit-write-for-config-regi.patch deleted file mode 100644 index 424a60c..0000000 --- a/SOURCES/kvm-libqos-usb-hcd-ehci-use-32-bit-write-for-config-regi.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 6320b4e76965b1cf64da4307f4d313fe6b2aa971 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Wed, 21 Apr 2021 22:30:01 -0400 -Subject: [PATCH 2/7] libqos: usb-hcd-ehci: use 32-bit write for config - register -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20210421223006.19650-2-jmaloy@redhat.com> -Patchwork-id: 101478 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH v2 1/6] libqos: usb-hcd-ehci: use 32-bit write for config register -Bugzilla: 1842478 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laszlo Ersek - -From: Paolo Bonzini - -The memory region ops have min_access_size == 4 so obey it. - -Tested-by: Thomas Huth -Signed-off-by: Paolo Bonzini - -(cherry picked from commit 89ed83d8b23c11d250c290593cad3ca839d5b053) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - tests/usb-hcd-ehci-test.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tests/usb-hcd-ehci-test.c b/tests/usb-hcd-ehci-test.c -index 5251d539e9..c51e8bb223 100644 ---- a/tests/usb-hcd-ehci-test.c -+++ b/tests/usb-hcd-ehci-test.c -@@ -96,7 +96,7 @@ static void pci_ehci_port_1(void) - static void pci_ehci_config(void) - { - /* hands over all ports from companion uhci to ehci */ -- qpci_io_writew(ehci1.dev, ehci1.bar, 0x60, 1); -+ qpci_io_writel(ehci1.dev, ehci1.bar, 0x60, 1); - } - - static void pci_uhci_port_2(void) --- -2.27.0 - diff --git a/SOURCES/kvm-libvhost-user-Fix-some-memtable-remap-cases.patch b/SOURCES/kvm-libvhost-user-Fix-some-memtable-remap-cases.patch deleted file mode 100644 index e362efe..0000000 --- a/SOURCES/kvm-libvhost-user-Fix-some-memtable-remap-cases.patch +++ /dev/null @@ -1,117 +0,0 @@ -From ee360b70f179cf540faebe7e55b34e323e2bb179 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:09 +0100 -Subject: [PATCH 098/116] libvhost-user: Fix some memtable remap cases -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-95-dgilbert@redhat.com> -Patchwork-id: 93548 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 094/112] libvhost-user: Fix some memtable remap cases -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -If a new setmemtable command comes in once the vhost threads are -running, it will remap the guests address space and the threads -will now be looking in the wrong place. - -Fortunately we're running this command under lock, so we can -update the queue mappings so that threads will look in the new-right -place. - -Note: This doesn't fix things that the threads might be doing -without a lock (e.g. a readv/writev!) That's for another time. - -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 49e9ec749d4db62ae51f76354143cee183912a1d) -Signed-off-by: Miroslav Rezanina ---- - contrib/libvhost-user/libvhost-user.c | 33 +++++++++++++++++++++++++-------- - contrib/libvhost-user/libvhost-user.h | 3 +++ - 2 files changed, 28 insertions(+), 8 deletions(-) - -diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c -index 63e4106..b89bf18 100644 ---- a/contrib/libvhost-user/libvhost-user.c -+++ b/contrib/libvhost-user/libvhost-user.c -@@ -565,6 +565,21 @@ vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg) - } - - static bool -+map_ring(VuDev *dev, VuVirtq *vq) -+{ -+ vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr); -+ vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr); -+ vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr); -+ -+ DPRINT("Setting virtq addresses:\n"); -+ DPRINT(" vring_desc at %p\n", vq->vring.desc); -+ DPRINT(" vring_used at %p\n", vq->vring.used); -+ DPRINT(" vring_avail at %p\n", vq->vring.avail); -+ -+ return !(vq->vring.desc && vq->vring.used && vq->vring.avail); -+} -+ -+static bool - vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg) - { - int i; -@@ -767,6 +782,14 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg) - close(vmsg->fds[i]); - } - -+ for (i = 0; i < dev->max_queues; i++) { -+ if (dev->vq[i].vring.desc) { -+ if (map_ring(dev, &dev->vq[i])) { -+ vu_panic(dev, "remaping queue %d during setmemtable", i); -+ } -+ } -+ } -+ - return false; - } - -@@ -853,18 +876,12 @@ vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg) - DPRINT(" avail_user_addr: 0x%016" PRIx64 "\n", vra->avail_user_addr); - DPRINT(" log_guest_addr: 0x%016" PRIx64 "\n", vra->log_guest_addr); - -+ vq->vra = *vra; - vq->vring.flags = vra->flags; -- vq->vring.desc = qva_to_va(dev, vra->desc_user_addr); -- vq->vring.used = qva_to_va(dev, vra->used_user_addr); -- vq->vring.avail = qva_to_va(dev, vra->avail_user_addr); - vq->vring.log_guest_addr = vra->log_guest_addr; - -- DPRINT("Setting virtq addresses:\n"); -- DPRINT(" vring_desc at %p\n", vq->vring.desc); -- DPRINT(" vring_used at %p\n", vq->vring.used); -- DPRINT(" vring_avail at %p\n", vq->vring.avail); - -- if (!(vq->vring.desc && vq->vring.used && vq->vring.avail)) { -+ if (map_ring(dev, vq)) { - vu_panic(dev, "Invalid vring_addr message"); - return false; - } -diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h -index 1844b6f..5cb7708 100644 ---- a/contrib/libvhost-user/libvhost-user.h -+++ b/contrib/libvhost-user/libvhost-user.h -@@ -327,6 +327,9 @@ typedef struct VuVirtq { - int err_fd; - unsigned int enable; - bool started; -+ -+ /* Guest addresses of our ring */ -+ struct vhost_vring_addr vra; - } VuVirtq; - - enum VuWatchCondtion { --- -1.8.3.1 - diff --git a/SOURCES/kvm-libvhost-user-handle-endianness-as-mandated-by-the-s.patch b/SOURCES/kvm-libvhost-user-handle-endianness-as-mandated-by-the-s.patch deleted file mode 100644 index 0e55df4..0000000 --- a/SOURCES/kvm-libvhost-user-handle-endianness-as-mandated-by-the-s.patch +++ /dev/null @@ -1,290 +0,0 @@ -From cadb72854b44f53c07ea60d7a6149ccac5928a82 Mon Sep 17 00:00:00 2001 -From: Claudio Imbrenda -Date: Tue, 27 Oct 2020 12:02:15 -0400 -Subject: [PATCH 02/18] libvhost-user: handle endianness as mandated by the - spec - -RH-Author: Claudio Imbrenda -Message-id: <20201027120217.2997314-2-cimbrend@redhat.com> -Patchwork-id: 98723 -O-Subject: [RHEL8.4 qemu-kvm PATCH 1/3] libvhost-user: handle endianness as mandated by the spec -Bugzilla: 1857733 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Cornelia Huck - -From: Marc Hartmayer - -Since virtio existed even before it got standardized, the virtio -standard defines the following types of virtio devices: - - + legacy device (pre-virtio 1.0) - + non-legacy or VIRTIO 1.0 device - + transitional device (which can act both as legacy and non-legacy) - -Virtio 1.0 defines the fields of the virtqueues as little endian, -while legacy uses guest's native endian [1]. Currently libvhost-user -does not handle virtio endianness at all, i.e. it works only if the -native endianness matches with whatever is actually needed. That means -things break spectacularly on big-endian targets. Let us handle virtio -endianness for non-legacy as required by the virtio specification [1] -and fence legacy virtio, as there is no safe way to figure out the -needed endianness conversions for all cases. The fencing of legacy -virtio devices is done in `vu_set_features_exec`. - -[1] https://docs.oasis-open.org/virtio/virtio/v1.1/cs01/virtio-v1.1-cs01.html#x1-210003 - -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Marc Hartmayer -Message-id: 20200901150019.29229-3-mhartmay@linux.ibm.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 2ffc54708087c6e524297957be2fc5d543abb767) -Signed-off-by: Danilo C. L. de Paula ---- - contrib/libvhost-user/libvhost-user.c | 77 +++++++++++++++------------ - 1 file changed, 43 insertions(+), 34 deletions(-) - -diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c -index b89bf185013..b8350b067e3 100644 ---- a/contrib/libvhost-user/libvhost-user.c -+++ b/contrib/libvhost-user/libvhost-user.c -@@ -42,6 +42,7 @@ - - #include "qemu/atomic.h" - #include "qemu/osdep.h" -+#include "qemu/bswap.h" - #include "qemu/memfd.h" - - #include "libvhost-user.h" -@@ -522,6 +523,14 @@ vu_set_features_exec(VuDev *dev, VhostUserMsg *vmsg) - DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); - - dev->features = vmsg->payload.u64; -+ if (!vu_has_feature(dev, VIRTIO_F_VERSION_1)) { -+ /* -+ * We only support devices conforming to VIRTIO 1.0 or -+ * later -+ */ -+ vu_panic(dev, "virtio legacy devices aren't supported by libvhost-user"); -+ return false; -+ } - - if (!(dev->features & VHOST_USER_F_PROTOCOL_FEATURES)) { - vu_set_enable_all_rings(dev, true); -@@ -886,7 +895,7 @@ vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg) - return false; - } - -- vq->used_idx = vq->vring.used->idx; -+ vq->used_idx = lduw_le_p(&vq->vring.used->idx); - - if (vq->last_avail_idx != vq->used_idx) { - bool resume = dev->iface->queue_is_processed_in_order && -@@ -998,7 +1007,7 @@ vu_check_queue_inflights(VuDev *dev, VuVirtq *vq) - return 0; - } - -- vq->used_idx = vq->vring.used->idx; -+ vq->used_idx = lduw_le_p(&vq->vring.used->idx); - vq->resubmit_num = 0; - vq->resubmit_list = NULL; - vq->counter = 0; -@@ -1737,13 +1746,13 @@ vu_queue_started(const VuDev *dev, const VuVirtq *vq) - static inline uint16_t - vring_avail_flags(VuVirtq *vq) - { -- return vq->vring.avail->flags; -+ return lduw_le_p(&vq->vring.avail->flags); - } - - static inline uint16_t - vring_avail_idx(VuVirtq *vq) - { -- vq->shadow_avail_idx = vq->vring.avail->idx; -+ vq->shadow_avail_idx = lduw_le_p(&vq->vring.avail->idx); - - return vq->shadow_avail_idx; - } -@@ -1751,7 +1760,7 @@ vring_avail_idx(VuVirtq *vq) - static inline uint16_t - vring_avail_ring(VuVirtq *vq, int i) - { -- return vq->vring.avail->ring[i]; -+ return lduw_le_p(&vq->vring.avail->ring[i]); - } - - static inline uint16_t -@@ -1839,12 +1848,12 @@ virtqueue_read_next_desc(VuDev *dev, struct vring_desc *desc, - int i, unsigned int max, unsigned int *next) - { - /* If this descriptor says it doesn't chain, we're done. */ -- if (!(desc[i].flags & VRING_DESC_F_NEXT)) { -+ if (!(lduw_le_p(&desc[i].flags) & VRING_DESC_F_NEXT)) { - return VIRTQUEUE_READ_DESC_DONE; - } - - /* Check they're not leading us off end of descriptors. */ -- *next = desc[i].next; -+ *next = lduw_le_p(&desc[i].next); - /* Make sure compiler knows to grab that: we don't want it changing! */ - smp_wmb(); - -@@ -1887,8 +1896,8 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes, - } - desc = vq->vring.desc; - -- if (desc[i].flags & VRING_DESC_F_INDIRECT) { -- if (desc[i].len % sizeof(struct vring_desc)) { -+ if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_INDIRECT) { -+ if (ldl_le_p(&desc[i].len) % sizeof(struct vring_desc)) { - vu_panic(dev, "Invalid size for indirect buffer table"); - goto err; - } -@@ -1901,8 +1910,8 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes, - - /* loop over the indirect descriptor table */ - indirect = 1; -- desc_addr = desc[i].addr; -- desc_len = desc[i].len; -+ desc_addr = ldq_le_p(&desc[i].addr); -+ desc_len = ldl_le_p(&desc[i].len); - max = desc_len / sizeof(struct vring_desc); - read_len = desc_len; - desc = vu_gpa_to_va(dev, &read_len, desc_addr); -@@ -1929,10 +1938,10 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes, - goto err; - } - -- if (desc[i].flags & VRING_DESC_F_WRITE) { -- in_total += desc[i].len; -+ if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_WRITE) { -+ in_total += ldl_le_p(&desc[i].len); - } else { -- out_total += desc[i].len; -+ out_total += ldl_le_p(&desc[i].len); - } - if (in_total >= max_in_bytes && out_total >= max_out_bytes) { - goto done; -@@ -2047,7 +2056,7 @@ vring_used_flags_set_bit(VuVirtq *vq, int mask) - - flags = (uint16_t *)((char*)vq->vring.used + - offsetof(struct vring_used, flags)); -- *flags |= mask; -+ stw_le_p(flags, lduw_le_p(flags) | mask); - } - - static inline void -@@ -2057,7 +2066,7 @@ vring_used_flags_unset_bit(VuVirtq *vq, int mask) - - flags = (uint16_t *)((char*)vq->vring.used + - offsetof(struct vring_used, flags)); -- *flags &= ~mask; -+ stw_le_p(flags, lduw_le_p(flags) & ~mask); - } - - static inline void -@@ -2067,7 +2076,7 @@ vring_set_avail_event(VuVirtq *vq, uint16_t val) - return; - } - -- *((uint16_t *) &vq->vring.used->ring[vq->vring.num]) = val; -+ stw_le_p(&vq->vring.used->ring[vq->vring.num], val); - } - - void -@@ -2156,14 +2165,14 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz) - struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE]; - int rc; - -- if (desc[i].flags & VRING_DESC_F_INDIRECT) { -- if (desc[i].len % sizeof(struct vring_desc)) { -+ if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_INDIRECT) { -+ if (ldl_le_p(&desc[i].len) % sizeof(struct vring_desc)) { - vu_panic(dev, "Invalid size for indirect buffer table"); - } - - /* loop over the indirect descriptor table */ -- desc_addr = desc[i].addr; -- desc_len = desc[i].len; -+ desc_addr = ldq_le_p(&desc[i].addr); -+ desc_len = ldl_le_p(&desc[i].len); - max = desc_len / sizeof(struct vring_desc); - read_len = desc_len; - desc = vu_gpa_to_va(dev, &read_len, desc_addr); -@@ -2185,10 +2194,10 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz) - - /* Collect all the descriptors */ - do { -- if (desc[i].flags & VRING_DESC_F_WRITE) { -+ if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_WRITE) { - virtqueue_map_desc(dev, &in_num, iov + out_num, - VIRTQUEUE_MAX_SIZE - out_num, true, -- desc[i].addr, desc[i].len); -+ ldq_le_p(&desc[i].addr), ldl_le_p(&desc[i].len)); - } else { - if (in_num) { - vu_panic(dev, "Incorrect order for descriptors"); -@@ -2196,7 +2205,7 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz) - } - virtqueue_map_desc(dev, &out_num, iov, - VIRTQUEUE_MAX_SIZE, false, -- desc[i].addr, desc[i].len); -+ ldq_le_p(&desc[i].addr), ldl_le_p(&desc[i].len)); - } - - /* If we've got too many, that implies a descriptor loop. */ -@@ -2392,14 +2401,14 @@ vu_log_queue_fill(VuDev *dev, VuVirtq *vq, - max = vq->vring.num; - i = elem->index; - -- if (desc[i].flags & VRING_DESC_F_INDIRECT) { -- if (desc[i].len % sizeof(struct vring_desc)) { -+ if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_INDIRECT) { -+ if (ldl_le_p(&desc[i].len) % sizeof(struct vring_desc)) { - vu_panic(dev, "Invalid size for indirect buffer table"); - } - - /* loop over the indirect descriptor table */ -- desc_addr = desc[i].addr; -- desc_len = desc[i].len; -+ desc_addr = ldq_le_p(&desc[i].addr); -+ desc_len = ldl_le_p(&desc[i].len); - max = desc_len / sizeof(struct vring_desc); - read_len = desc_len; - desc = vu_gpa_to_va(dev, &read_len, desc_addr); -@@ -2425,9 +2434,9 @@ vu_log_queue_fill(VuDev *dev, VuVirtq *vq, - return; - } - -- if (desc[i].flags & VRING_DESC_F_WRITE) { -- min = MIN(desc[i].len, len); -- vu_log_write(dev, desc[i].addr, min); -+ if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_WRITE) { -+ min = MIN(ldl_le_p(&desc[i].len), len); -+ vu_log_write(dev, ldq_le_p(&desc[i].addr), min); - len -= min; - } - -@@ -2452,15 +2461,15 @@ vu_queue_fill(VuDev *dev, VuVirtq *vq, - - idx = (idx + vq->used_idx) % vq->vring.num; - -- uelem.id = elem->index; -- uelem.len = len; -+ stl_le_p(&uelem.id, elem->index); -+ stl_le_p(&uelem.len, len); - vring_used_write(dev, vq, &uelem, idx); - } - - static inline - void vring_used_idx_set(VuDev *dev, VuVirtq *vq, uint16_t val) - { -- vq->vring.used->idx = val; -+ stw_le_p(&vq->vring.used->idx, val); - vu_log_write(dev, - vq->vring.log_guest_addr + offsetof(struct vring_used, idx), - sizeof(vq->vring.used->idx)); --- -2.27.0 - diff --git a/SOURCES/kvm-linux-headers-Add-VFIO_CCW_REQ_IRQ_INDEX.patch b/SOURCES/kvm-linux-headers-Add-VFIO_CCW_REQ_IRQ_INDEX.patch deleted file mode 100644 index d9c81cf..0000000 --- a/SOURCES/kvm-linux-headers-Add-VFIO_CCW_REQ_IRQ_INDEX.patch +++ /dev/null @@ -1,43 +0,0 @@ -From f844ca939adb619cce8426e104b0039a7eba70a6 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 11 May 2021 11:24:04 -0400 -Subject: [PATCH 1/5] linux-headers: Add VFIO_CCW_REQ_IRQ_INDEX - -RH-Author: Thomas Huth -Message-id: <20210511112405.297037-2-thuth@redhat.com> -Patchwork-id: 101537 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 1/2] linux-headers: Add VFIO_CCW_REQ_IRQ_INDEX -Bugzilla: 1940450 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1940450 -Upstream-status: N/A - -This is based on upstream commit b3c818a47f ("Update linux headers to -5.11-rc2"), but has been reduced to the single hunk that is required -for the next patch (there were too many unrelated conflicts in the other -files for doing full backport of the original upstream commit). - -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - linux-headers/linux/vfio.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h -index f660bd7bac..9c8810bef4 100644 ---- a/linux-headers/linux/vfio.h -+++ b/linux-headers/linux/vfio.h -@@ -580,6 +580,7 @@ enum { - enum { - VFIO_CCW_IO_IRQ_INDEX, - VFIO_CCW_CRW_IRQ_INDEX, -+ VFIO_CCW_REQ_IRQ_INDEX, - VFIO_CCW_NUM_IRQS - }; - --- -2.27.0 - diff --git a/SOURCES/kvm-linux-headers-Partial-update-against-Linux-5.9-rc4.patch b/SOURCES/kvm-linux-headers-Partial-update-against-Linux-5.9-rc4.patch deleted file mode 100644 index 1217a6c..0000000 --- a/SOURCES/kvm-linux-headers-Partial-update-against-Linux-5.9-rc4.patch +++ /dev/null @@ -1,83 +0,0 @@ -From d9a63d12b5804eb172a040a16d7e725853c41a8c Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Wed, 11 Nov 2020 12:03:12 -0500 -Subject: [PATCH 12/18] linux-headers: Partial update against Linux 5.9-rc4 - -RH-Author: Thomas Huth -Message-id: <20201111120316.707489-9-thuth@redhat.com> -Patchwork-id: 99505 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 08/12] linux-headers: Partial update against Linux 5.9-rc4 -Bugzilla: 1798506 -RH-Acked-by: Jens Freimann -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -Upstream-status: N/A - -This is based on upstream commit e6546342a830e520d14ef03aa95677611de0d90c -but only the two files have been included (there were too many conflicts -in the other unrelated files, so they have been dropped from this patch). - -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - linux-headers/asm-s390/kvm.h | 7 +++++-- - linux-headers/linux/kvm.h | 6 ++++++ - 2 files changed, 11 insertions(+), 2 deletions(-) - -diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h -index 0138ccb0d89..f053b8304a8 100644 ---- a/linux-headers/asm-s390/kvm.h -+++ b/linux-headers/asm-s390/kvm.h -@@ -231,11 +231,13 @@ struct kvm_guest_debug_arch { - #define KVM_SYNC_GSCB (1UL << 9) - #define KVM_SYNC_BPBC (1UL << 10) - #define KVM_SYNC_ETOKEN (1UL << 11) -+#define KVM_SYNC_DIAG318 (1UL << 12) - - #define KVM_SYNC_S390_VALID_FIELDS \ - (KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \ - KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \ -- KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN) -+ KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN | \ -+ KVM_SYNC_DIAG318) - - /* length and alignment of the sdnx as a power of two */ - #define SDNXC 8 -@@ -264,7 +266,8 @@ struct kvm_sync_regs { - __u8 reserved2 : 7; - __u8 padding1[51]; /* riccb needs to be 64byte aligned */ - __u8 riccb[64]; /* runtime instrumentation controls block */ -- __u8 padding2[192]; /* sdnx needs to be 256byte aligned */ -+ __u64 diag318; /* diagnose 0x318 info */ -+ __u8 padding2[184]; /* sdnx needs to be 256byte aligned */ - union { - __u8 sdnx[SDNXL]; /* state description annex */ - struct { -diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h -index 578cd97c0d9..6bba4ec136b 100644 ---- a/linux-headers/linux/kvm.h -+++ b/linux-headers/linux/kvm.h -@@ -276,6 +276,7 @@ struct kvm_run { - /* KVM_EXIT_FAIL_ENTRY */ - struct { - __u64 hardware_entry_failure_reason; -+ __u32 cpu; - } fail_entry; - /* KVM_EXIT_EXCEPTION */ - struct { -@@ -1011,6 +1012,11 @@ struct kvm_ppc_resize_hpt { - #define KVM_CAP_S390_VCPU_RESETS 179 - #define KVM_CAP_S390_PROTECTED 180 - #define KVM_CAP_PPC_SECURE_GUEST 181 -+#define KVM_CAP_HALT_POLL 182 -+#define KVM_CAP_ASYNC_PF_INT 183 -+#define KVM_CAP_LAST_CPU 184 -+#define KVM_CAP_SMALLER_MAXPHYADDR 185 -+#define KVM_CAP_S390_DIAG318 186 - - #ifdef KVM_CAP_IRQ_ROUTING - --- -2.27.0 - diff --git a/SOURCES/kvm-linux-headers-add-vfio-DMA-available-capability.patch b/SOURCES/kvm-linux-headers-add-vfio-DMA-available-capability.patch deleted file mode 100644 index f62026d..0000000 --- a/SOURCES/kvm-linux-headers-add-vfio-DMA-available-capability.patch +++ /dev/null @@ -1,54 +0,0 @@ -From b50c47e1a9fbe8876e231afbb5ed85945c8038da Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Tue, 19 Jan 2021 12:50:40 -0500 -Subject: [PATCH 1/7] linux-headers: add vfio DMA available capability - -RH-Author: Cornelia Huck -Message-id: <20210119125046.472811-2-cohuck@redhat.com> -Patchwork-id: 100674 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/7] linux-headers: add vfio DMA available capability -Bugzilla: 1905391 -RH-Acked-by: David Hildenbrand -RH-Acked-by: Auger Eric -RH-Acked-by: Thomas Huth - -UPSTREAM: RHEL only - -This is the part of 53ba2eee52bf ("linux-headers: update against -5.10-rc1") required for DMA limiting. - -Signed-off-by: Cornelia Huck -Signed-off-by: Danilo C. L. de Paula ---- - linux-headers/linux/vfio.h | 15 +++++++++++++++ - 1 file changed, 15 insertions(+) - -diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h -index 9e227348b30..f660bd7bace 100644 ---- a/linux-headers/linux/vfio.h -+++ b/linux-headers/linux/vfio.h -@@ -751,6 +751,21 @@ struct vfio_iommu_type1_info_cap_iova_range { - struct vfio_iova_range iova_ranges[]; - }; - -+/* -+ * The DMA available capability allows to report the current number of -+ * simultaneously outstanding DMA mappings that are allowed. -+ * -+ * The structure below defines version 1 of this capability. -+ * -+ * avail: specifies the current number of outstanding DMA mappings allowed. -+ */ -+#define VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL 3 -+ -+struct vfio_iommu_type1_info_dma_avail { -+ struct vfio_info_cap_header header; -+ __u32 avail; -+}; -+ - #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) - - /** --- -2.27.0 - diff --git a/SOURCES/kvm-linux-headers-support-vfio-ccw-features.patch b/SOURCES/kvm-linux-headers-support-vfio-ccw-features.patch deleted file mode 100644 index 4eb95bf..0000000 --- a/SOURCES/kvm-linux-headers-support-vfio-ccw-features.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 1da0eecb9f2086c880fdaf1260ae775bbfbf5f02 Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Tue, 23 Jun 2020 09:25:37 -0400 -Subject: [PATCH 03/12] linux-headers: support vfio-ccw features - -RH-Author: Cornelia Huck -Message-id: <20200623092543.358315-4-cohuck@redhat.com> -Patchwork-id: 97696 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 3/9] linux-headers: support vfio-ccw features -Bugzilla: 1660916 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: David Hildenbrand -RH-Acked-by: Thomas Huth - -Partial update to support CRW and SCHIB regions. - -Upstream: n/a - -Signed-off-by: Cornelia Huck -Signed-off-by: Danilo C. L. de Paula ---- - linux-headers/linux/vfio.h | 3 +++ - linux-headers/linux/vfio_ccw.h | 19 +++++++++++++++++++ - 2 files changed, 22 insertions(+) - -diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h -index fb10370d29..9e227348b3 100644 ---- a/linux-headers/linux/vfio.h -+++ b/linux-headers/linux/vfio.h -@@ -378,6 +378,8 @@ struct vfio_region_gfx_edid { - - /* sub-types for VFIO_REGION_TYPE_CCW */ - #define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD (1) -+#define VFIO_REGION_SUBTYPE_CCW_SCHIB (2) -+#define VFIO_REGION_SUBTYPE_CCW_CRW (3) - - /* - * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped -@@ -577,6 +579,7 @@ enum { - - enum { - VFIO_CCW_IO_IRQ_INDEX, -+ VFIO_CCW_CRW_IRQ_INDEX, - VFIO_CCW_NUM_IRQS - }; - -diff --git a/linux-headers/linux/vfio_ccw.h b/linux-headers/linux/vfio_ccw.h -index fcc3e69ef5..6375d6ff25 100644 ---- a/linux-headers/linux/vfio_ccw.h -+++ b/linux-headers/linux/vfio_ccw.h -@@ -34,4 +34,23 @@ struct ccw_cmd_region { - __u32 ret_code; - } __attribute__((packed)); - -+/* -+ * Used for processing commands that read the subchannel-information block -+ * Reading this region triggers a stsch() to hardware -+ * Note: this is controlled by a capability -+ */ -+struct ccw_schib_region { -+#define SCHIB_AREA_SIZE 52 -+ __u8 schib_area[SCHIB_AREA_SIZE]; -+} __attribute__((packed)); -+ -+/* -+ * Used for returning a Channel Report Word to userspace. -+ * Note: this is controlled by a capability -+ */ -+struct ccw_crw_region { -+ __u32 crw; -+ __u32 pad; -+} __attribute__((packed)); -+ - #endif --- -2.27.0 - diff --git a/SOURCES/kvm-linux-headers-update-kvm.h.patch b/SOURCES/kvm-linux-headers-update-kvm.h.patch deleted file mode 100644 index 1834e33..0000000 --- a/SOURCES/kvm-linux-headers-update-kvm.h.patch +++ /dev/null @@ -1,119 +0,0 @@ -From 9d1b94d3739567245578f30866facc13edb3be92 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:53:44 -0400 -Subject: [PATCH 02/42] linux-headers: update kvm.h - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-3-thuth@redhat.com> -Patchwork-id: 97020 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 02/38] linux-headers: update kvm.h -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -Upstream-status: n/a - -Update kvm.h for the upcoming new s390x reset and protected virtualization -ioctls. This patch is based on commit ddda37483dd17c9936fdde9ebf8f6ca2692b3842 -and commit dc6f8d458a4ccc360723993f31d310d06469f55f, but I dropped all -(unrequired) changes to the other linux-header files. - -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - linux-headers/linux/kvm.h | 55 +++++++++++++++++++++++++++++++++++++-- - 1 file changed, 53 insertions(+), 2 deletions(-) - -diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h -index 3d9b18f7f8..578cd97c0d 100644 ---- a/linux-headers/linux/kvm.h -+++ b/linux-headers/linux/kvm.h -@@ -468,12 +468,17 @@ struct kvm_s390_mem_op { - __u32 size; /* amount of bytes */ - __u32 op; /* type of operation */ - __u64 buf; /* buffer in userspace */ -- __u8 ar; /* the access register number */ -- __u8 reserved[31]; /* should be set to 0 */ -+ union { -+ __u8 ar; /* the access register number */ -+ __u32 sida_offset; /* offset into the sida */ -+ __u8 reserved[32]; /* should be set to 0 */ -+ }; - }; - /* types for kvm_s390_mem_op->op */ - #define KVM_S390_MEMOP_LOGICAL_READ 0 - #define KVM_S390_MEMOP_LOGICAL_WRITE 1 -+#define KVM_S390_MEMOP_SIDA_READ 2 -+#define KVM_S390_MEMOP_SIDA_WRITE 3 - /* flags for kvm_s390_mem_op->flags */ - #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) - #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) -@@ -1000,6 +1005,12 @@ struct kvm_ppc_resize_hpt { - #define KVM_CAP_PMU_EVENT_FILTER 173 - #define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174 - #define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175 -+#define KVM_CAP_PPC_GUEST_DEBUG_SSTEP 176 -+#define KVM_CAP_ARM_NISV_TO_USER 177 -+#define KVM_CAP_ARM_INJECT_EXT_DABT 178 -+#define KVM_CAP_S390_VCPU_RESETS 179 -+#define KVM_CAP_S390_PROTECTED 180 -+#define KVM_CAP_PPC_SECURE_GUEST 181 - - #ifdef KVM_CAP_IRQ_ROUTING - -@@ -1461,6 +1472,43 @@ struct kvm_enc_region { - /* Available with KVM_CAP_ARM_SVE */ - #define KVM_ARM_VCPU_FINALIZE _IOW(KVMIO, 0xc2, int) - -+/* Available with KVM_CAP_S390_VCPU_RESETS */ -+#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) -+#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) -+ -+struct kvm_s390_pv_sec_parm { -+ __u64 origin; -+ __u64 length; -+}; -+ -+struct kvm_s390_pv_unp { -+ __u64 addr; -+ __u64 size; -+ __u64 tweak; -+}; -+ -+enum pv_cmd_id { -+ KVM_PV_ENABLE, -+ KVM_PV_DISABLE, -+ KVM_PV_SET_SEC_PARMS, -+ KVM_PV_UNPACK, -+ KVM_PV_VERIFY, -+ KVM_PV_PREP_RESET, -+ KVM_PV_UNSHARE_ALL, -+}; -+ -+struct kvm_pv_cmd { -+ __u32 cmd; /* Command to be executed */ -+ __u16 rc; /* Ultravisor return code */ -+ __u16 rrc; /* Ultravisor return reason code */ -+ __u64 data; /* Data or address */ -+ __u32 flags; /* flags for future extensions. Must be 0 for now */ -+ __u32 reserved[3]; -+}; -+ -+/* Available with KVM_CAP_S390_PROTECTED */ -+#define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd) -+ - /* Secure Encrypted Virtualization command */ - enum sev_cmd_id { - /* Guest initialization commands */ -@@ -1611,4 +1659,7 @@ struct kvm_hyperv_eventfd { - #define KVM_HYPERV_CONN_ID_MASK 0x00ffffff - #define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) - -+#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) -+#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) -+ - #endif /* __LINUX_KVM_H */ --- -2.27.0 - diff --git a/SOURCES/kvm-memory-Add-IOMMUTLBEvent.patch b/SOURCES/kvm-memory-Add-IOMMUTLBEvent.patch deleted file mode 100644 index 5d73c97..0000000 --- a/SOURCES/kvm-memory-Add-IOMMUTLBEvent.patch +++ /dev/null @@ -1,590 +0,0 @@ -From 43a460bde62359c3fa2b1fc6c90d9e13ee7b9a6c Mon Sep 17 00:00:00 2001 -From: eperezma -Date: Tue, 12 Jan 2021 14:36:35 -0500 -Subject: [PATCH 11/17] memory: Add IOMMUTLBEvent -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: eperezma -Message-id: <20210112143638.374060-11-eperezma@redhat.com> -Patchwork-id: 100603 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 10/13] memory: Add IOMMUTLBEvent -Bugzilla: 1843852 -RH-Acked-by: Xiao Wang -RH-Acked-by: Peter Xu -RH-Acked-by: Auger Eric - -This way we can tell between regular IOMMUTLBEntry (entry of IOMMU -hardware) and notifications. - -In the notifications, we set explicitly if it is a MAPs or an UNMAP, -instead of trusting in entry permissions to differentiate them. - -Signed-off-by: Eugenio Pérez -Reviewed-by: Peter Xu -Reviewed-by: Juan Quintela -Acked-by: Jason Wang -Message-Id: <20201116165506.31315-3-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Matthew Rosato -Acked-by: David Gibson -(cherry picked from commit 5039caf3c449c49e625d34e134463260cf8e00e0) - -Conflicts: - hw/s390x/s390-pci-inst.c: Context because of the lack of commit - ("37fa32de707 s390x/pci: Honor DMA limits set by vfio"). - hw/virtio/virtio-iommu.c: It does not exist in rhel. - -Signed-off-by: Eugenio Pérez -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/smmu-common.c | 13 +++--- - hw/arm/smmuv3.c | 13 +++--- - hw/i386/intel_iommu.c | 88 ++++++++++++++++++++++------------------ - hw/misc/tz-mpc.c | 32 ++++++++------- - hw/ppc/spapr_iommu.c | 15 +++---- - hw/s390x/s390-pci-inst.c | 27 +++++++----- - include/exec/memory.h | 27 ++++++------ - memory.c | 20 ++++----- - 8 files changed, 127 insertions(+), 108 deletions(-) - -diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c -index dfabe381182..a519c97614a 100644 ---- a/hw/arm/smmu-common.c -+++ b/hw/arm/smmu-common.c -@@ -464,14 +464,15 @@ IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid) - /* Unmap the whole notifier's range */ - static void smmu_unmap_notifier_range(IOMMUNotifier *n) - { -- IOMMUTLBEntry entry; -+ IOMMUTLBEvent event; - -- entry.target_as = &address_space_memory; -- entry.iova = n->start; -- entry.perm = IOMMU_NONE; -- entry.addr_mask = n->end - n->start; -+ event.type = IOMMU_NOTIFIER_UNMAP; -+ event.entry.target_as = &address_space_memory; -+ event.entry.iova = n->start; -+ event.entry.perm = IOMMU_NONE; -+ event.entry.addr_mask = n->end - n->start; - -- memory_region_notify_iommu_one(n, &entry); -+ memory_region_notify_iommu_one(n, &event); - } - - /* Unmap all notifiers attached to @mr */ -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index ef8a877c5d8..10b8393beeb 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -783,7 +783,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, - uint8_t tg, uint64_t num_pages) - { - SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu); -- IOMMUTLBEntry entry; -+ IOMMUTLBEvent event; - uint8_t granule = tg; - - if (!tg) { -@@ -806,12 +806,13 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, - granule = tt->granule_sz; - } - -- entry.target_as = &address_space_memory; -- entry.iova = iova; -- entry.addr_mask = num_pages * (1 << granule) - 1; -- entry.perm = IOMMU_NONE; -+ event.type = IOMMU_NOTIFIER_UNMAP; -+ event.entry.target_as = &address_space_memory; -+ event.entry.iova = iova; -+ event.entry.addr_mask = num_pages * (1 << granule) - 1; -+ event.entry.perm = IOMMU_NONE; - -- memory_region_notify_iommu_one(n, &entry); -+ memory_region_notify_iommu_one(n, &event); - } - - /* invalidate an asid/iova range tuple in all mr's */ -diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c -index 463f107ad12..9fedbac82de 100644 ---- a/hw/i386/intel_iommu.c -+++ b/hw/i386/intel_iommu.c -@@ -1016,7 +1016,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, - } - } - --typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private); -+typedef int (*vtd_page_walk_hook)(IOMMUTLBEvent *event, void *private); - - /** - * Constant information used during page walking -@@ -1037,11 +1037,12 @@ typedef struct { - uint16_t domain_id; - } vtd_page_walk_info; - --static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info) -+static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info) - { - VTDAddressSpace *as = info->as; - vtd_page_walk_hook hook_fn = info->hook_fn; - void *private = info->private; -+ IOMMUTLBEntry *entry = &event->entry; - DMAMap target = { - .iova = entry->iova, - .size = entry->addr_mask, -@@ -1050,7 +1051,7 @@ static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info) - }; - DMAMap *mapped = iova_tree_find(as->iova_tree, &target); - -- if (entry->perm == IOMMU_NONE && !info->notify_unmap) { -+ if (event->type == IOMMU_NOTIFIER_UNMAP && !info->notify_unmap) { - trace_vtd_page_walk_one_skip_unmap(entry->iova, entry->addr_mask); - return 0; - } -@@ -1058,7 +1059,7 @@ static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info) - assert(hook_fn); - - /* Update local IOVA mapped ranges */ -- if (entry->perm) { -+ if (event->type == IOMMU_NOTIFIER_MAP) { - if (mapped) { - /* If it's exactly the same translation, skip */ - if (!memcmp(mapped, &target, sizeof(target))) { -@@ -1084,19 +1085,21 @@ static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info) - int ret; - - /* Emulate an UNMAP */ -+ event->type = IOMMU_NOTIFIER_UNMAP; - entry->perm = IOMMU_NONE; - trace_vtd_page_walk_one(info->domain_id, - entry->iova, - entry->translated_addr, - entry->addr_mask, - entry->perm); -- ret = hook_fn(entry, private); -+ ret = hook_fn(event, private); - if (ret) { - return ret; - } - /* Drop any existing mapping */ - iova_tree_remove(as->iova_tree, &target); -- /* Recover the correct permission */ -+ /* Recover the correct type */ -+ event->type = IOMMU_NOTIFIER_MAP; - entry->perm = cache_perm; - } - } -@@ -1113,7 +1116,7 @@ static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info) - trace_vtd_page_walk_one(info->domain_id, entry->iova, - entry->translated_addr, entry->addr_mask, - entry->perm); -- return hook_fn(entry, private); -+ return hook_fn(event, private); - } - - /** -@@ -1134,7 +1137,7 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, - uint32_t offset; - uint64_t slpte; - uint64_t subpage_size, subpage_mask; -- IOMMUTLBEntry entry; -+ IOMMUTLBEvent event; - uint64_t iova = start; - uint64_t iova_next; - int ret = 0; -@@ -1188,13 +1191,15 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, - * - * In either case, we send an IOTLB notification down. - */ -- entry.target_as = &address_space_memory; -- entry.iova = iova & subpage_mask; -- entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur); -- entry.addr_mask = ~subpage_mask; -+ event.entry.target_as = &address_space_memory; -+ event.entry.iova = iova & subpage_mask; -+ event.entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur); -+ event.entry.addr_mask = ~subpage_mask; - /* NOTE: this is only meaningful if entry_valid == true */ -- entry.translated_addr = vtd_get_slpte_addr(slpte, info->aw); -- ret = vtd_page_walk_one(&entry, info); -+ event.entry.translated_addr = vtd_get_slpte_addr(slpte, info->aw); -+ event.type = event.entry.perm ? IOMMU_NOTIFIER_MAP : -+ IOMMU_NOTIFIER_UNMAP; -+ ret = vtd_page_walk_one(&event, info); - } - - if (ret < 0) { -@@ -1373,10 +1378,10 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, - return 0; - } - --static int vtd_sync_shadow_page_hook(IOMMUTLBEntry *entry, -+static int vtd_sync_shadow_page_hook(IOMMUTLBEvent *event, - void *private) - { -- memory_region_notify_iommu((IOMMUMemoryRegion *)private, 0, *entry); -+ memory_region_notify_iommu(private, 0, *event); - return 0; - } - -@@ -1936,14 +1941,17 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, - * page tables. We just deliver the PSI down to - * invalidate caches. - */ -- IOMMUTLBEntry entry = { -- .target_as = &address_space_memory, -- .iova = addr, -- .translated_addr = 0, -- .addr_mask = size - 1, -- .perm = IOMMU_NONE, -+ IOMMUTLBEvent event = { -+ .type = IOMMU_NOTIFIER_UNMAP, -+ .entry = { -+ .target_as = &address_space_memory, -+ .iova = addr, -+ .translated_addr = 0, -+ .addr_mask = size - 1, -+ .perm = IOMMU_NONE, -+ }, - }; -- memory_region_notify_iommu(&vtd_as->iommu, 0, entry); -+ memory_region_notify_iommu(&vtd_as->iommu, 0, event); - } - } - } -@@ -2355,7 +2363,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, - VTDInvDesc *inv_desc) - { - VTDAddressSpace *vtd_dev_as; -- IOMMUTLBEntry entry; -+ IOMMUTLBEvent event; - struct VTDBus *vtd_bus; - hwaddr addr; - uint64_t sz; -@@ -2403,12 +2411,13 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, - sz = VTD_PAGE_SIZE; - } - -- entry.target_as = &vtd_dev_as->as; -- entry.addr_mask = sz - 1; -- entry.iova = addr; -- entry.perm = IOMMU_NONE; -- entry.translated_addr = 0; -- memory_region_notify_iommu(&vtd_dev_as->iommu, 0, entry); -+ event.type = IOMMU_NOTIFIER_UNMAP; -+ event.entry.target_as = &vtd_dev_as->as; -+ event.entry.addr_mask = sz - 1; -+ event.entry.iova = addr; -+ event.entry.perm = IOMMU_NONE; -+ event.entry.translated_addr = 0; -+ memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event); - - done: - return true; -@@ -3419,19 +3428,20 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) - size = remain = end - start + 1; - - while (remain >= VTD_PAGE_SIZE) { -- IOMMUTLBEntry entry; -+ IOMMUTLBEvent event; - uint64_t mask = get_naturally_aligned_size(start, remain, s->aw_bits); - - assert(mask); - -- entry.iova = start; -- entry.addr_mask = mask - 1; -- entry.target_as = &address_space_memory; -- entry.perm = IOMMU_NONE; -+ event.type = IOMMU_NOTIFIER_UNMAP; -+ event.entry.iova = start; -+ event.entry.addr_mask = mask - 1; -+ event.entry.target_as = &address_space_memory; -+ event.entry.perm = IOMMU_NONE; - /* This field is meaningless for unmap */ -- entry.translated_addr = 0; -+ event.entry.translated_addr = 0; - -- memory_region_notify_iommu_one(n, &entry); -+ memory_region_notify_iommu_one(n, &event); - - start += mask; - remain -= mask; -@@ -3467,9 +3477,9 @@ static void vtd_address_space_refresh_all(IntelIOMMUState *s) - vtd_switch_address_space_all(s); - } - --static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private) -+static int vtd_replay_hook(IOMMUTLBEvent *event, void *private) - { -- memory_region_notify_iommu_one((IOMMUNotifier *)private, entry); -+ memory_region_notify_iommu_one(private, event); - return 0; - } - -diff --git a/hw/misc/tz-mpc.c b/hw/misc/tz-mpc.c -index 49dd6050bd3..e2fbd1065d8 100644 ---- a/hw/misc/tz-mpc.c -+++ b/hw/misc/tz-mpc.c -@@ -82,8 +82,10 @@ static void tz_mpc_iommu_notify(TZMPC *s, uint32_t lutidx, - /* Called when the LUT word at lutidx has changed from oldlut to newlut; - * must call the IOMMU notifiers for the changed blocks. - */ -- IOMMUTLBEntry entry = { -- .addr_mask = s->blocksize - 1, -+ IOMMUTLBEvent event = { -+ .entry = { -+ .addr_mask = s->blocksize - 1, -+ } - }; - hwaddr addr = lutidx * s->blocksize * 32; - int i; -@@ -100,26 +102,28 @@ static void tz_mpc_iommu_notify(TZMPC *s, uint32_t lutidx, - block_is_ns = newlut & (1 << i); - - trace_tz_mpc_iommu_notify(addr); -- entry.iova = addr; -- entry.translated_addr = addr; -+ event.entry.iova = addr; -+ event.entry.translated_addr = addr; - -- entry.perm = IOMMU_NONE; -- memory_region_notify_iommu(&s->upstream, IOMMU_IDX_S, entry); -- memory_region_notify_iommu(&s->upstream, IOMMU_IDX_NS, entry); -+ event.type = IOMMU_NOTIFIER_UNMAP; -+ event.entry.perm = IOMMU_NONE; -+ memory_region_notify_iommu(&s->upstream, IOMMU_IDX_S, event); -+ memory_region_notify_iommu(&s->upstream, IOMMU_IDX_NS, event); - -- entry.perm = IOMMU_RW; -+ event.type = IOMMU_NOTIFIER_MAP; -+ event.entry.perm = IOMMU_RW; - if (block_is_ns) { -- entry.target_as = &s->blocked_io_as; -+ event.entry.target_as = &s->blocked_io_as; - } else { -- entry.target_as = &s->downstream_as; -+ event.entry.target_as = &s->downstream_as; - } -- memory_region_notify_iommu(&s->upstream, IOMMU_IDX_S, entry); -+ memory_region_notify_iommu(&s->upstream, IOMMU_IDX_S, event); - if (block_is_ns) { -- entry.target_as = &s->downstream_as; -+ event.entry.target_as = &s->downstream_as; - } else { -- entry.target_as = &s->blocked_io_as; -+ event.entry.target_as = &s->blocked_io_as; - } -- memory_region_notify_iommu(&s->upstream, IOMMU_IDX_NS, entry); -+ memory_region_notify_iommu(&s->upstream, IOMMU_IDX_NS, event); - } - } - -diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c -index 3d3bcc86496..9d3ec7e2c07 100644 ---- a/hw/ppc/spapr_iommu.c -+++ b/hw/ppc/spapr_iommu.c -@@ -445,7 +445,7 @@ static void spapr_tce_reset(DeviceState *dev) - static target_ulong put_tce_emu(SpaprTceTable *tcet, target_ulong ioba, - target_ulong tce) - { -- IOMMUTLBEntry entry; -+ IOMMUTLBEvent event; - hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift); - unsigned long index = (ioba - tcet->bus_offset) >> tcet->page_shift; - -@@ -457,12 +457,13 @@ static target_ulong put_tce_emu(SpaprTceTable *tcet, target_ulong ioba, - - tcet->table[index] = tce; - -- entry.target_as = &address_space_memory, -- entry.iova = (ioba - tcet->bus_offset) & page_mask; -- entry.translated_addr = tce & page_mask; -- entry.addr_mask = ~page_mask; -- entry.perm = spapr_tce_iommu_access_flags(tce); -- memory_region_notify_iommu(&tcet->iommu, 0, entry); -+ event.entry.target_as = &address_space_memory, -+ event.entry.iova = (ioba - tcet->bus_offset) & page_mask; -+ event.entry.translated_addr = tce & page_mask; -+ event.entry.addr_mask = ~page_mask; -+ event.entry.perm = spapr_tce_iommu_access_flags(tce); -+ event.type = event.entry.perm ? IOMMU_NOTIFIER_MAP : IOMMU_NOTIFIER_UNMAP; -+ memory_region_notify_iommu(&tcet->iommu, 0, event); - - return H_SUCCESS; - } -diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c -index 92c7e45df5f..27b189e6d75 100644 ---- a/hw/s390x/s390-pci-inst.c -+++ b/hw/s390x/s390-pci-inst.c -@@ -575,15 +575,18 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) - static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry) - { - S390IOTLBEntry *cache = g_hash_table_lookup(iommu->iotlb, &entry->iova); -- IOMMUTLBEntry notify = { -- .target_as = &address_space_memory, -- .iova = entry->iova, -- .translated_addr = entry->translated_addr, -- .perm = entry->perm, -- .addr_mask = ~PAGE_MASK, -+ IOMMUTLBEvent event = { -+ .type = entry->perm ? IOMMU_NOTIFIER_MAP : IOMMU_NOTIFIER_UNMAP, -+ .entry = { -+ .target_as = &address_space_memory, -+ .iova = entry->iova, -+ .translated_addr = entry->translated_addr, -+ .perm = entry->perm, -+ .addr_mask = ~PAGE_MASK, -+ }, - }; - -- if (entry->perm == IOMMU_NONE) { -+ if (event.type == IOMMU_NOTIFIER_UNMAP) { - if (!cache) { - return; - } -@@ -595,9 +598,11 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry) - return; - } - -- notify.perm = IOMMU_NONE; -- memory_region_notify_iommu(&iommu->iommu_mr, 0, notify); -- notify.perm = entry->perm; -+ event.type = IOMMU_NOTIFIER_UNMAP; -+ event.entry.perm = IOMMU_NONE; -+ memory_region_notify_iommu(&iommu->iommu_mr, 0, event); -+ event.type = IOMMU_NOTIFIER_MAP; -+ event.entry.perm = entry->perm; - } - - cache = g_new(S390IOTLBEntry, 1); -@@ -608,7 +613,7 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry) - g_hash_table_replace(iommu->iotlb, &cache->iova, cache); - } - -- memory_region_notify_iommu(&iommu->iommu_mr, 0, notify); -+ memory_region_notify_iommu(&iommu->iommu_mr, 0, event); - } - - int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) -diff --git a/include/exec/memory.h b/include/exec/memory.h -index b6466ab6d57..80e36077cdb 100644 ---- a/include/exec/memory.h -+++ b/include/exec/memory.h -@@ -106,6 +106,11 @@ struct IOMMUNotifier { - }; - typedef struct IOMMUNotifier IOMMUNotifier; - -+typedef struct IOMMUTLBEvent { -+ IOMMUNotifierFlag type; -+ IOMMUTLBEntry entry; -+} IOMMUTLBEvent; -+ - /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */ - #define RAM_PREALLOC (1 << 0) - -@@ -1047,24 +1052,18 @@ uint64_t memory_region_iommu_get_min_page_size(IOMMUMemoryRegion *iommu_mr); - /** - * memory_region_notify_iommu: notify a change in an IOMMU translation entry. - * -- * The notification type will be decided by entry.perm bits: -- * -- * - For UNMAP (cache invalidation) notifies: set entry.perm to IOMMU_NONE. -- * - For MAP (newly added entry) notifies: set entry.perm to the -- * permission of the page (which is definitely !IOMMU_NONE). -- * - * Note: for any IOMMU implementation, an in-place mapping change - * should be notified with an UNMAP followed by a MAP. - * - * @iommu_mr: the memory region that was changed - * @iommu_idx: the IOMMU index for the translation table which has changed -- * @entry: the new entry in the IOMMU translation table. The entry -- * replaces all old entries for the same virtual I/O address range. -- * Deleted entries have .@perm == 0. -+ * @event: TLB event with the new entry in the IOMMU translation table. -+ * The entry replaces all old entries for the same virtual I/O address -+ * range. - */ - void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, - int iommu_idx, -- IOMMUTLBEntry entry); -+ IOMMUTLBEvent event); - - /** - * memory_region_notify_iommu_one: notify a change in an IOMMU translation -@@ -1074,12 +1073,12 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, - * notifies a specific notifier, not all of them. - * - * @notifier: the notifier to be notified -- * @entry: the new entry in the IOMMU translation table. The entry -- * replaces all old entries for the same virtual I/O address range. -- * Deleted entries have .@perm == 0. -+ * @event: TLB event with the new entry in the IOMMU translation table. -+ * The entry replaces all old entries for the same virtual I/O address -+ * range. - */ - void memory_region_notify_iommu_one(IOMMUNotifier *notifier, -- IOMMUTLBEntry *entry); -+ IOMMUTLBEvent *event); - - /** - * memory_region_register_iommu_notifier: register a notifier for changes to -diff --git a/memory.c b/memory.c -index 43bd3359bf8..3bd99b8ac4a 100644 ---- a/memory.c -+++ b/memory.c -@@ -1912,11 +1912,15 @@ void memory_region_unregister_iommu_notifier(MemoryRegion *mr, - } - - void memory_region_notify_iommu_one(IOMMUNotifier *notifier, -- IOMMUTLBEntry *entry) -+ IOMMUTLBEvent *event) - { -- IOMMUNotifierFlag request_flags; -+ IOMMUTLBEntry *entry = &event->entry; - hwaddr entry_end = entry->iova + entry->addr_mask; - -+ if (event->type == IOMMU_NOTIFIER_UNMAP) { -+ assert(entry->perm == IOMMU_NONE); -+ } -+ - /* - * Skip the notification if the notification does not overlap - * with registered range. -@@ -1927,20 +1931,14 @@ void memory_region_notify_iommu_one(IOMMUNotifier *notifier, - - assert(entry->iova >= notifier->start && entry_end <= notifier->end); - -- if (entry->perm & IOMMU_RW) { -- request_flags = IOMMU_NOTIFIER_MAP; -- } else { -- request_flags = IOMMU_NOTIFIER_UNMAP; -- } -- -- if (notifier->notifier_flags & request_flags) { -+ if (event->type & notifier->notifier_flags) { - notifier->notify(notifier, entry); - } - } - - void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, - int iommu_idx, -- IOMMUTLBEntry entry) -+ IOMMUTLBEvent event) - { - IOMMUNotifier *iommu_notifier; - -@@ -1948,7 +1946,7 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, - - IOMMU_NOTIFIER_FOREACH(iommu_notifier, iommu_mr) { - if (iommu_notifier->iommu_idx == iommu_idx) { -- memory_region_notify_iommu_one(iommu_notifier, &entry); -+ memory_region_notify_iommu_one(iommu_notifier, &event); - } - } - } --- -2.27.0 - diff --git a/SOURCES/kvm-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch b/SOURCES/kvm-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch deleted file mode 100644 index 89eb9c9..0000000 --- a/SOURCES/kvm-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch +++ /dev/null @@ -1,89 +0,0 @@ -From f0fa537af2e1e5f827eeb74dc5b3e12776917a67 Mon Sep 17 00:00:00 2001 -From: eperezma -Date: Tue, 12 Jan 2021 14:36:36 -0500 -Subject: [PATCH 12/17] memory: Add IOMMU_NOTIFIER_DEVIOTLB_UNMAP - IOMMUTLBNotificationType -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: eperezma -Message-id: <20210112143638.374060-12-eperezma@redhat.com> -Patchwork-id: 100604 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 11/13] memory: Add IOMMU_NOTIFIER_DEVIOTLB_UNMAP IOMMUTLBNotificationType -Bugzilla: 1843852 -RH-Acked-by: Xiao Wang -RH-Acked-by: Peter Xu -RH-Acked-by: Auger Eric - -This allows us to differentiate between regular IOMMU map/unmap events -and DEVIOTLB unmap. Doing so, notifiers that only need device IOTLB -invalidations will not receive regular IOMMU unmappings. - -Adapt intel and vhost to use it. - -Signed-off-by: Eugenio Pérez -Reviewed-by: Peter Xu -Reviewed-by: Juan Quintela -Acked-by: Jason Wang -Message-Id: <20201116165506.31315-4-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit b68ba1ca57677acf870d5ab10579e6105c1f5338) -Signed-off-by: Eugenio Pérez -Signed-off-by: Danilo C. L. de Paula ---- - hw/i386/intel_iommu.c | 2 +- - hw/virtio/vhost.c | 2 +- - include/exec/memory.h | 7 ++++++- - 3 files changed, 8 insertions(+), 3 deletions(-) - -diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c -index 9fedbac82de..3640bc2ed15 100644 ---- a/hw/i386/intel_iommu.c -+++ b/hw/i386/intel_iommu.c -@@ -2411,7 +2411,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, - sz = VTD_PAGE_SIZE; - } - -- event.type = IOMMU_NOTIFIER_UNMAP; -+ event.type = IOMMU_NOTIFIER_DEVIOTLB_UNMAP; - event.entry.target_as = &vtd_dev_as->as; - event.entry.addr_mask = sz - 1; - event.entry.iova = addr; -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index 9182a00495e..78a5df3b379 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -704,7 +704,7 @@ static void vhost_iommu_region_add(MemoryListener *listener, - iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr, - MEMTXATTRS_UNSPECIFIED); - iommu_notifier_init(&iommu->n, vhost_iommu_unmap_notify, -- IOMMU_NOTIFIER_UNMAP, -+ IOMMU_NOTIFIER_DEVIOTLB_UNMAP, - section->offset_within_region, - int128_get64(end), - iommu_idx); -diff --git a/include/exec/memory.h b/include/exec/memory.h -index 80e36077cdb..403dc0c0572 100644 ---- a/include/exec/memory.h -+++ b/include/exec/memory.h -@@ -87,9 +87,14 @@ typedef enum { - IOMMU_NOTIFIER_UNMAP = 0x1, - /* Notify entry changes (newly created entries) */ - IOMMU_NOTIFIER_MAP = 0x2, -+ /* Notify changes on device IOTLB entries */ -+ IOMMU_NOTIFIER_DEVIOTLB_UNMAP = 0x04, - } IOMMUNotifierFlag; - --#define IOMMU_NOTIFIER_ALL (IOMMU_NOTIFIER_MAP | IOMMU_NOTIFIER_UNMAP) -+#define IOMMU_NOTIFIER_IOTLB_EVENTS (IOMMU_NOTIFIER_MAP | IOMMU_NOTIFIER_UNMAP) -+#define IOMMU_NOTIFIER_DEVIOTLB_EVENTS IOMMU_NOTIFIER_DEVIOTLB_UNMAP -+#define IOMMU_NOTIFIER_ALL (IOMMU_NOTIFIER_IOTLB_EVENTS | \ -+ IOMMU_NOTIFIER_DEVIOTLB_EVENTS) - - struct IOMMUNotifier; - typedef void (*IOMMUNotify)(struct IOMMUNotifier *notifier, --- -2.27.0 - diff --git a/SOURCES/kvm-memory-Rename-memory_region_notify_one-to-memory_reg.patch b/SOURCES/kvm-memory-Rename-memory_region_notify_one-to-memory_reg.patch deleted file mode 100644 index 8921c14..0000000 --- a/SOURCES/kvm-memory-Rename-memory_region_notify_one-to-memory_reg.patch +++ /dev/null @@ -1,146 +0,0 @@ -From e876535fd5ed10abf0dbeb55ec7098664412068e Mon Sep 17 00:00:00 2001 -From: eperezma -Date: Tue, 12 Jan 2021 14:36:34 -0500 -Subject: [PATCH 10/17] memory: Rename memory_region_notify_one to - memory_region_notify_iommu_one -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: eperezma -Message-id: <20210112143638.374060-10-eperezma@redhat.com> -Patchwork-id: 100602 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 09/13] memory: Rename memory_region_notify_one to memory_region_notify_iommu_one -Bugzilla: 1843852 -RH-Acked-by: Xiao Wang -RH-Acked-by: Peter Xu -RH-Acked-by: Auger Eric - -Previous name didn't reflect the iommu operation. - -Signed-off-by: Eugenio Pérez -Reviewed-by: Peter Xu -Reviewed-by: David Gibson -Reviewed-by: Juan Quintela -Reviewed-by: Eric Auger -Acked-by: Jason Wang -Message-Id: <20201116165506.31315-2-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 3b5ebf8532afdc1518bd8b0961ed802bc3f5f07c) -Signed-off-by: Eugenio Pérez -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/smmu-common.c | 2 +- - hw/arm/smmuv3.c | 2 +- - hw/i386/intel_iommu.c | 4 ++-- - include/exec/memory.h | 6 +++--- - memory.c | 6 +++--- - 5 files changed, 10 insertions(+), 10 deletions(-) - -diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c -index 9780404f002..dfabe381182 100644 ---- a/hw/arm/smmu-common.c -+++ b/hw/arm/smmu-common.c -@@ -471,7 +471,7 @@ static void smmu_unmap_notifier_range(IOMMUNotifier *n) - entry.perm = IOMMU_NONE; - entry.addr_mask = n->end - n->start; - -- memory_region_notify_one(n, &entry); -+ memory_region_notify_iommu_one(n, &entry); - } - - /* Unmap all notifiers attached to @mr */ -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index a418fab2aa6..ef8a877c5d8 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -811,7 +811,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, - entry.addr_mask = num_pages * (1 << granule) - 1; - entry.perm = IOMMU_NONE; - -- memory_region_notify_one(n, &entry); -+ memory_region_notify_iommu_one(n, &entry); - } - - /* invalidate an asid/iova range tuple in all mr's */ -diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c -index 43c94b993b4..463f107ad12 100644 ---- a/hw/i386/intel_iommu.c -+++ b/hw/i386/intel_iommu.c -@@ -3431,7 +3431,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) - /* This field is meaningless for unmap */ - entry.translated_addr = 0; - -- memory_region_notify_one(n, &entry); -+ memory_region_notify_iommu_one(n, &entry); - - start += mask; - remain -= mask; -@@ -3469,7 +3469,7 @@ static void vtd_address_space_refresh_all(IntelIOMMUState *s) - - static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private) - { -- memory_region_notify_one((IOMMUNotifier *)private, entry); -+ memory_region_notify_iommu_one((IOMMUNotifier *)private, entry); - return 0; - } - -diff --git a/include/exec/memory.h b/include/exec/memory.h -index e499dc215b3..b6466ab6d57 100644 ---- a/include/exec/memory.h -+++ b/include/exec/memory.h -@@ -226,7 +226,7 @@ enum IOMMUMemoryRegionAttr { - * The IOMMU implementation must use the IOMMU notifier infrastructure - * to report whenever mappings are changed, by calling - * memory_region_notify_iommu() (or, if necessary, by calling -- * memory_region_notify_one() for each registered notifier). -+ * memory_region_notify_iommu_one() for each registered notifier). - * - * Conceptually an IOMMU provides a mapping from input address - * to an output TLB entry. If the IOMMU is aware of memory transaction -@@ -1067,7 +1067,7 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, - IOMMUTLBEntry entry); - - /** -- * memory_region_notify_one: notify a change in an IOMMU translation -+ * memory_region_notify_iommu_one: notify a change in an IOMMU translation - * entry to a single notifier - * - * This works just like memory_region_notify_iommu(), but it only -@@ -1078,7 +1078,7 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, - * replaces all old entries for the same virtual I/O address range. - * Deleted entries have .@perm == 0. - */ --void memory_region_notify_one(IOMMUNotifier *notifier, -+void memory_region_notify_iommu_one(IOMMUNotifier *notifier, - IOMMUTLBEntry *entry); - - /** -diff --git a/memory.c b/memory.c -index 06484c2bff2..43bd3359bf8 100644 ---- a/memory.c -+++ b/memory.c -@@ -1911,8 +1911,8 @@ void memory_region_unregister_iommu_notifier(MemoryRegion *mr, - memory_region_update_iommu_notify_flags(iommu_mr, NULL); - } - --void memory_region_notify_one(IOMMUNotifier *notifier, -- IOMMUTLBEntry *entry) -+void memory_region_notify_iommu_one(IOMMUNotifier *notifier, -+ IOMMUTLBEntry *entry) - { - IOMMUNotifierFlag request_flags; - hwaddr entry_end = entry->iova + entry->addr_mask; -@@ -1948,7 +1948,7 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, - - IOMMU_NOTIFIER_FOREACH(iommu_notifier, iommu_mr) { - if (iommu_notifier->iommu_idx == iommu_idx) { -- memory_region_notify_one(iommu_notifier, &entry); -+ memory_region_notify_iommu_one(iommu_notifier, &entry); - } - } - } --- -2.27.0 - diff --git a/SOURCES/kvm-memory-Revert-memory-accept-mismatching-sizes-in-mem.patch b/SOURCES/kvm-memory-Revert-memory-accept-mismatching-sizes-in-mem.patch deleted file mode 100644 index f81c86f..0000000 --- a/SOURCES/kvm-memory-Revert-memory-accept-mismatching-sizes-in-mem.patch +++ /dev/null @@ -1,104 +0,0 @@ -From 13f4ebe4708f4f4dc20d710e475a42d520459860 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Wed, 21 Apr 2021 22:30:03 -0400 -Subject: [PATCH 4/7] memory: Revert "memory: accept mismatching sizes in - memory_region_access_valid" -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20210421223006.19650-4-jmaloy@redhat.com> -Patchwork-id: 101480 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH v2 3/6] memory: Revert "memory: accept mismatching sizes in memory_region_access_valid" -Bugzilla: 1842478 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laszlo Ersek - -From: "Michael S. Tsirkin" - -Memory API documentation documents valid .min_access_size and .max_access_size -fields and explains that any access outside these boundaries is blocked. - -This is what devices seem to assume. - -However this is not what the implementation does: it simply -ignores the boundaries unless there's an "accepts" callback. - -Naturally, this breaks a bunch of devices. - -Revert to the documented behaviour. - -Devices that want to allow any access can just drop the valid field, -or add the impl field to have accesses converted to appropriate -length. - -Cc: qemu-stable@nongnu.org -Reviewed-by: Richard Henderson -Fixes: CVE-2020-13754 -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1842363 -Fixes: a014ed07bd5a ("memory: accept mismatching sizes in memory_region_access_valid") -Signed-off-by: Michael S. Tsirkin -Message-Id: <20200610134731.1514409-1-mst@redhat.com> -Signed-off-by: Paolo Bonzini - -(cherry picked from commit 5d971f9e672507210e77d020d89e0e89165c8fc9) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - memory.c | 29 +++++++++-------------------- - 1 file changed, 9 insertions(+), 20 deletions(-) - -diff --git a/memory.c b/memory.c -index 5a4a80842d..0cfcb72a5a 100644 ---- a/memory.c -+++ b/memory.c -@@ -1351,35 +1351,24 @@ bool memory_region_access_valid(MemoryRegion *mr, - bool is_write, - MemTxAttrs attrs) - { -- int access_size_min, access_size_max; -- int access_size, i; -- -- if (!mr->ops->valid.unaligned && (addr & (size - 1))) { -+ if (mr->ops->valid.accepts -+ && !mr->ops->valid.accepts(mr->opaque, addr, size, is_write, attrs)) { - return false; - } - -- if (!mr->ops->valid.accepts) { -- return true; -- } -- -- access_size_min = mr->ops->valid.min_access_size; -- if (!mr->ops->valid.min_access_size) { -- access_size_min = 1; -+ if (!mr->ops->valid.unaligned && (addr & (size - 1))) { -+ return false; - } - -- access_size_max = mr->ops->valid.max_access_size; -+ /* Treat zero as compatibility all valid */ - if (!mr->ops->valid.max_access_size) { -- access_size_max = 4; -+ return true; - } - -- access_size = MAX(MIN(size, access_size_max), access_size_min); -- for (i = 0; i < size; i += access_size) { -- if (!mr->ops->valid.accepts(mr->opaque, addr + i, access_size, -- is_write, attrs)) { -- return false; -- } -+ if (size > mr->ops->valid.max_access_size -+ || size < mr->ops->valid.min_access_size) { -+ return false; - } -- - return true; - } - --- -2.27.0 - diff --git a/SOURCES/kvm-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch b/SOURCES/kvm-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch deleted file mode 100644 index de56901..0000000 --- a/SOURCES/kvm-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 8c5154729effda3f762bfb8224f9c61dab8b2986 Mon Sep 17 00:00:00 2001 -From: eperezma -Date: Tue, 12 Jan 2021 14:36:38 -0500 -Subject: [PATCH 14/17] memory: Skip bad range assertion if notifier is - DEVIOTLB_UNMAP type -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: eperezma -Message-id: <20210112143638.374060-14-eperezma@redhat.com> -Patchwork-id: 100606 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 13/13] memory: Skip bad range assertion if notifier is DEVIOTLB_UNMAP type -Bugzilla: 1843852 -RH-Acked-by: Xiao Wang -RH-Acked-by: Peter Xu -RH-Acked-by: Auger Eric - -Device IOTLB invalidations can unmap arbitrary ranges, eiter outside of -the memory region or even [0, ~0ULL] for all the space. The assertion -could be hit by a guest, and rhel7 guest effectively hit it. - -Signed-off-by: Eugenio Pérez -Reviewed-by: Peter Xu -Reviewed-by: Juan Quintela -Acked-by: Jason Wang -Message-Id: <20201116165506.31315-6-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 1804857f19f612f6907832e35599cdb51d4ec764) -Signed-off-by: Eugenio Pérez -Signed-off-by: Danilo C. L. de Paula ---- - memory.c | 11 +++++++++-- - 1 file changed, 9 insertions(+), 2 deletions(-) - -diff --git a/memory.c b/memory.c -index 3bd99b8ac4a..5a4a80842d7 100644 ---- a/memory.c -+++ b/memory.c -@@ -1916,6 +1916,7 @@ void memory_region_notify_iommu_one(IOMMUNotifier *notifier, - { - IOMMUTLBEntry *entry = &event->entry; - hwaddr entry_end = entry->iova + entry->addr_mask; -+ IOMMUTLBEntry tmp = *entry; - - if (event->type == IOMMU_NOTIFIER_UNMAP) { - assert(entry->perm == IOMMU_NONE); -@@ -1929,10 +1930,16 @@ void memory_region_notify_iommu_one(IOMMUNotifier *notifier, - return; - } - -- assert(entry->iova >= notifier->start && entry_end <= notifier->end); -+ if (notifier->notifier_flags & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) { -+ /* Crop (iova, addr_mask) to range */ -+ tmp.iova = MAX(tmp.iova, notifier->start); -+ tmp.addr_mask = MIN(entry_end, notifier->end) - tmp.iova; -+ } else { -+ assert(entry->iova >= notifier->start && entry_end <= notifier->end); -+ } - - if (event->type & notifier->notifier_flags) { -- notifier->notify(notifier, entry); -+ notifier->notify(notifier, &tmp); - } - } - --- -2.27.0 - diff --git a/SOURCES/kvm-memory-clamp-cached-translation-in-case-it-points-to.patch b/SOURCES/kvm-memory-clamp-cached-translation-in-case-it-points-to.patch deleted file mode 100644 index 8b8f67a..0000000 --- a/SOURCES/kvm-memory-clamp-cached-translation-in-case-it-points-to.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 354946f1e5fee0a69282bdf284c969b03a78a53e Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Wed, 13 Jan 2021 00:42:23 -0500 -Subject: [PATCH 15/17] memory: clamp cached translation in case it points to - an MMIO region - -RH-Author: Jon Maloy -Message-id: <20210113004223.871394-2-jmaloy@redhat.com> -Patchwork-id: 100618 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] memory: clamp cached translation in case it points to an MMIO region -Bugzilla: 1904393 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Thomas Huth - -From: Paolo Bonzini - -In using the address_space_translate_internal API, address_space_cache_init -forgot one piece of advice that can be found in the code for -address_space_translate_internal: - - /* MMIO registers can be expected to perform full-width accesses based only - * on their address, without considering adjacent registers that could - * decode to completely different MemoryRegions. When such registers - * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO - * regions overlap wildly. For this reason we cannot clamp the accesses - * here. - * - * If the length is small (as is the case for address_space_ldl/stl), - * everything works fine. If the incoming length is large, however, - * the caller really has to do the clamping through memory_access_size. - */ - -address_space_cache_init is exactly one such case where "the incoming length -is large", therefore we need to clamp the resulting length---not to -memory_access_size though, since we are not doing an access yet, but to -the size of the resulting section. This ensures that subsequent accesses -to the cached MemoryRegionSection will be in range. - -With this patch, the enclosed testcase notices that the used ring does -not fit into the MSI-X table and prints a "qemu-system-x86_64: Cannot map used" -error. - -Signed-off-by: Paolo Bonzini - -(cherry picked from 4bfb024bc76973d40a359476dc0291f46e435442) -- Manually applied to file exec.c, where the code to correct - is located in this version. -- Skipped the fuzzing test part, which is hard to apply on this code. -Signed-off-by: Jon Maloy - -Signed-off-by: Danilo C. L. de Paula ---- - exec.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/exec.c b/exec.c -index ffdb5185353..09ed0cfc756 100644 ---- a/exec.c -+++ b/exec.c -@@ -3620,6 +3620,7 @@ int64_t address_space_cache_init(MemoryRegionCache *cache, - AddressSpaceDispatch *d; - hwaddr l; - MemoryRegion *mr; -+ Int128 diff; - - assert(len > 0); - -@@ -3628,6 +3629,15 @@ int64_t address_space_cache_init(MemoryRegionCache *cache, - d = flatview_to_dispatch(cache->fv); - cache->mrs = *address_space_translate_internal(d, addr, &cache->xlat, &l, true); - -+ /* -+ * cache->xlat is now relative to cache->mrs.mr, not to the section itself. -+ * Take that into account to compute how many bytes are there between -+ * cache->xlat and the end of the section. -+ */ -+ diff = int128_sub(cache->mrs.size, -+ int128_make64(cache->xlat - cache->mrs.offset_within_region)); -+ l = int128_get64(int128_min(diff, int128_make64(l))); -+ - mr = cache->mrs.mr; - memory_region_ref(mr); - if (memory_access_is_direct(mr, is_write)) { --- -2.27.0 - diff --git a/SOURCES/kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch b/SOURCES/kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch deleted file mode 100644 index 3477af5..0000000 --- a/SOURCES/kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch +++ /dev/null @@ -1,179 +0,0 @@ -From 38a032829b6b8d523b4cee05f732031e66fc2e41 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Fri, 31 Jan 2020 17:12:56 +0000 -Subject: [PATCH 14/15] migration: Change SaveStateEntry.instance_id into - uint32_t - -RH-Author: Peter Xu -Message-id: <20200131171257.1066593-3-peterx@redhat.com> -Patchwork-id: 93629 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/3] migration: Change SaveStateEntry.instance_id into uint32_t -Bugzilla: 1529231 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert - -It was always used as 32bit, so define it as used to be clear. -Instead of using -1 as the auto-gen magic value, we switch to -UINT32_MAX. We also make sure that we don't auto-gen this value to -avoid overflowed instance IDs without being noticed. - -Suggested-by: Juan Quintela -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 93062e23619e057743757ee53bf7f8e07f7a3710) -Signed-off-by: Peter Xu -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - include/migration/vmstate.h - migration/savevm.c - stubs/vmstate.c - Due to missing 3cad405bab ("vmstate: replace DeviceState with - VMStateIf", 2020-01-06) - -Signed-off-by: Danilo C. L. de Paula ---- - hw/intc/apic_common.c | 2 +- - include/migration/register.h | 2 +- - include/migration/vmstate.h | 2 +- - migration/savevm.c | 18 ++++++++++-------- - stubs/vmstate.c | 2 +- - 5 files changed, 14 insertions(+), 12 deletions(-) - -diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c -index f2c3a7f..54b8731 100644 ---- a/hw/intc/apic_common.c -+++ b/hw/intc/apic_common.c -@@ -268,7 +268,7 @@ static void apic_common_realize(DeviceState *dev, Error **errp) - APICCommonState *s = APIC_COMMON(dev); - APICCommonClass *info; - static DeviceState *vapic; -- int instance_id = s->id; -+ uint32_t instance_id = s->id; - - info = APIC_COMMON_GET_CLASS(s); - info->realize(dev, errp); -diff --git a/include/migration/register.h b/include/migration/register.h -index a13359a..f3ba10b 100644 ---- a/include/migration/register.h -+++ b/include/migration/register.h -@@ -69,7 +69,7 @@ typedef struct SaveVMHandlers { - } SaveVMHandlers; - - int register_savevm_live(const char *idstr, -- int instance_id, -+ uint32_t instance_id, - int version_id, - const SaveVMHandlers *ops, - void *opaque); -diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h -index 883f1cf..296609c 100644 ---- a/include/migration/vmstate.h -+++ b/include/migration/vmstate.h -@@ -1158,7 +1158,7 @@ bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque); - #define VMSTATE_INSTANCE_ID_ANY -1 - - /* Returns: 0 on success, -1 on failure */ --int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, -+int vmstate_register_with_alias_id(DeviceState *dev, uint32_t instance_id, - const VMStateDescription *vmsd, - void *base, int alias_id, - int required_for_version, -diff --git a/migration/savevm.c b/migration/savevm.c -index e2e8e0a..a80bb52 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -233,7 +233,7 @@ typedef struct CompatEntry { - typedef struct SaveStateEntry { - QTAILQ_ENTRY(SaveStateEntry) entry; - char idstr[256]; -- int instance_id; -+ uint32_t instance_id; - int alias_id; - int version_id; - /* version id read from the stream */ -@@ -665,10 +665,10 @@ void dump_vmstate_json_to_file(FILE *out_file) - fclose(out_file); - } - --static int calculate_new_instance_id(const char *idstr) -+static uint32_t calculate_new_instance_id(const char *idstr) - { - SaveStateEntry *se; -- int instance_id = 0; -+ uint32_t instance_id = 0; - - QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { - if (strcmp(idstr, se->idstr) == 0 -@@ -676,6 +676,8 @@ static int calculate_new_instance_id(const char *idstr) - instance_id = se->instance_id + 1; - } - } -+ /* Make sure we never loop over without being noticed */ -+ assert(instance_id != VMSTATE_INSTANCE_ID_ANY); - return instance_id; - } - -@@ -730,7 +732,7 @@ static void savevm_state_handler_insert(SaveStateEntry *nse) - Meanwhile pass -1 as instance_id if you do not already have a clearly - distinguishing id for all instances of your device class. */ - int register_savevm_live(const char *idstr, -- int instance_id, -+ uint32_t instance_id, - int version_id, - const SaveVMHandlers *ops, - void *opaque) -@@ -784,7 +786,7 @@ void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque) - } - } - --int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, -+int vmstate_register_with_alias_id(DeviceState *dev, uint32_t instance_id, - const VMStateDescription *vmsd, - void *opaque, int alias_id, - int required_for_version, -@@ -1600,7 +1602,7 @@ int qemu_save_device_state(QEMUFile *f) - return qemu_file_get_error(f); - } - --static SaveStateEntry *find_se(const char *idstr, int instance_id) -+static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id) - { - SaveStateEntry *se; - -@@ -2267,7 +2269,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) - /* Find savevm section */ - se = find_se(idstr, instance_id); - if (se == NULL) { -- error_report("Unknown savevm section or instance '%s' %d. " -+ error_report("Unknown savevm section or instance '%s' %"PRIu32". " - "Make sure that your current VM setup matches your " - "saved VM setup, including any hotplugged devices", - idstr, instance_id); -@@ -2291,7 +2293,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) - - ret = vmstate_load(f, se); - if (ret < 0) { -- error_report("error while loading state for instance 0x%x of" -+ error_report("error while loading state for instance 0x%"PRIx32" of" - " device '%s'", instance_id, idstr); - return ret; - } -diff --git a/stubs/vmstate.c b/stubs/vmstate.c -index e1e89b8..4ed5cc6 100644 ---- a/stubs/vmstate.c -+++ b/stubs/vmstate.c -@@ -4,7 +4,7 @@ - const VMStateDescription vmstate_dummy = {}; - - int vmstate_register_with_alias_id(DeviceState *dev, -- int instance_id, -+ uint32_t instance_id, - const VMStateDescription *vmsd, - void *base, int alias_id, - int required_for_version, --- -1.8.3.1 - diff --git a/SOURCES/kvm-migration-Create-migration_is_running.patch b/SOURCES/kvm-migration-Create-migration_is_running.patch deleted file mode 100644 index c9593de..0000000 --- a/SOURCES/kvm-migration-Create-migration_is_running.patch +++ /dev/null @@ -1,119 +0,0 @@ -From c9e3d13d70a24bf606ce351886b27bdca25ef4dc Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:41 +0000 -Subject: [PATCH 09/18] migration: Create migration_is_running() - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-9-quintela@redhat.com> -Patchwork-id: 94115 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 08/10] migration: Create migration_is_running() -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -This function returns true if we are in the middle of a migration. -It is like migration_is_setup_or_active() with CANCELLING and COLO. -Adapt all callers that are needed. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -(cherry picked from commit 392d87e21325fdb01210176faa07472b4985ccf0) -Signed-off-by: Danilo C. L. de Paula ---- - migration/migration.c | 29 ++++++++++++++++++++++++----- - migration/migration.h | 1 + - migration/savevm.c | 4 +--- - 3 files changed, 26 insertions(+), 8 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 30c53c6..eb50d77 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -831,6 +831,27 @@ bool migration_is_setup_or_active(int state) - } - } - -+bool migration_is_running(int state) -+{ -+ switch (state) { -+ case MIGRATION_STATUS_ACTIVE: -+ case MIGRATION_STATUS_POSTCOPY_ACTIVE: -+ case MIGRATION_STATUS_POSTCOPY_PAUSED: -+ case MIGRATION_STATUS_POSTCOPY_RECOVER: -+ case MIGRATION_STATUS_SETUP: -+ case MIGRATION_STATUS_PRE_SWITCHOVER: -+ case MIGRATION_STATUS_DEVICE: -+ case MIGRATION_STATUS_WAIT_UNPLUG: -+ case MIGRATION_STATUS_CANCELLING: -+ case MIGRATION_STATUS_COLO: -+ return true; -+ -+ default: -+ return false; -+ -+ } -+} -+ - static void populate_time_info(MigrationInfo *info, MigrationState *s) - { - info->has_status = true; -@@ -1090,7 +1111,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, - MigrationCapabilityStatusList *cap; - bool cap_list[MIGRATION_CAPABILITY__MAX]; - -- if (migration_is_setup_or_active(s->state)) { -+ if (migration_is_running(s->state)) { - error_setg(errp, QERR_MIGRATION_ACTIVE); - return; - } -@@ -1603,7 +1624,7 @@ static void migrate_fd_cancel(MigrationState *s) - - do { - old_state = s->state; -- if (!migration_is_setup_or_active(old_state)) { -+ if (!migration_is_running(old_state)) { - break; - } - /* If the migration is paused, kick it out of the pause */ -@@ -1900,9 +1921,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - return true; - } - -- if (migration_is_setup_or_active(s->state) || -- s->state == MIGRATION_STATUS_CANCELLING || -- s->state == MIGRATION_STATUS_COLO) { -+ if (migration_is_running(s->state)) { - error_setg(errp, QERR_MIGRATION_ACTIVE); - return false; - } -diff --git a/migration/migration.h b/migration/migration.h -index 0b1b0d4..a2b2336 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -279,6 +279,7 @@ void migrate_fd_error(MigrationState *s, const Error *error); - void migrate_fd_connect(MigrationState *s, Error *error_in); - - bool migration_is_setup_or_active(int state); -+bool migration_is_running(int state); - - void migrate_init(MigrationState *s); - bool migration_is_blocked(Error **errp); -diff --git a/migration/savevm.c b/migration/savevm.c -index a80bb52..144ecf0 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -1506,9 +1506,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) - MigrationState *ms = migrate_get_current(); - MigrationStatus status; - -- if (migration_is_setup_or_active(ms->state) || -- ms->state == MIGRATION_STATUS_CANCELLING || -- ms->state == MIGRATION_STATUS_COLO) { -+ if (migration_is_running(ms->state)) { - error_setg(errp, QERR_MIGRATION_ACTIVE); - return -EINVAL; - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch b/SOURCES/kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch deleted file mode 100644 index c2ead53..0000000 --- a/SOURCES/kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch +++ /dev/null @@ -1,257 +0,0 @@ -From 2659af9267586fb626f543773bf3f844727e473b Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Fri, 31 Jan 2020 17:12:55 +0000 -Subject: [PATCH 13/15] migration: Define VMSTATE_INSTANCE_ID_ANY - -RH-Author: Peter Xu -Message-id: <20200131171257.1066593-2-peterx@redhat.com> -Patchwork-id: 93630 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/3] migration: Define VMSTATE_INSTANCE_ID_ANY -Bugzilla: 1529231 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert - -Define the new macro VMSTATE_INSTANCE_ID_ANY for callers who wants to -auto-generate the vmstate instance ID. Previously it was hard coded -as -1 instead of this macro. It helps to change this default value in -the follow up patches. No functional change. - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 1df2c9a26fcb2fa32d099f8e9adcdae4207872e3) -Signed-off-by: Peter Xu -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - backends/dbus-vmstate.c - File deleted - hw/core/qdev.c - hw/misc/max111x.c - hw/net/eepro100.c - Due to missing commit 3cad405bab ("vmstate: replace - DeviceState with VMStateIf", 2020-01-06) - -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/stellaris.c | 2 +- - hw/core/qdev.c | 3 ++- - hw/display/ads7846.c | 2 +- - hw/i2c/core.c | 2 +- - hw/input/stellaris_input.c | 3 ++- - hw/intc/apic_common.c | 2 +- - hw/misc/max111x.c | 2 +- - hw/net/eepro100.c | 2 +- - hw/pci/pci.c | 2 +- - hw/ppc/spapr.c | 2 +- - hw/timer/arm_timer.c | 2 +- - hw/tpm/tpm_emulator.c | 3 ++- - include/migration/vmstate.h | 2 ++ - migration/savevm.c | 8 ++++---- - 14 files changed, 21 insertions(+), 16 deletions(-) - -diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c -index b198066..bb025e0 100644 ---- a/hw/arm/stellaris.c -+++ b/hw/arm/stellaris.c -@@ -708,7 +708,7 @@ static int stellaris_sys_init(uint32_t base, qemu_irq irq, - memory_region_init_io(&s->iomem, NULL, &ssys_ops, s, "ssys", 0x00001000); - memory_region_add_subregion(get_system_memory(), base, &s->iomem); - ssys_reset(s); -- vmstate_register(NULL, -1, &vmstate_stellaris_sys, s); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_stellaris_sys, s); - return 0; - } - -diff --git a/hw/core/qdev.c b/hw/core/qdev.c -index cf1ba28..40f6b2b 100644 ---- a/hw/core/qdev.c -+++ b/hw/core/qdev.c -@@ -890,7 +890,8 @@ static void device_set_realized(Object *obj, bool value, Error **errp) - dev->canonical_path = object_get_canonical_path(OBJECT(dev)); - - if (qdev_get_vmsd(dev)) { -- if (vmstate_register_with_alias_id(dev, -1, qdev_get_vmsd(dev), dev, -+ if (vmstate_register_with_alias_id(dev, VMSTATE_INSTANCE_ID_ANY, -+ qdev_get_vmsd(dev), dev, - dev->instance_id_alias, - dev->alias_required_for_version, - &local_err) < 0) { -diff --git a/hw/display/ads7846.c b/hw/display/ads7846.c -index c12272a..9228b40 100644 ---- a/hw/display/ads7846.c -+++ b/hw/display/ads7846.c -@@ -154,7 +154,7 @@ static void ads7846_realize(SSISlave *d, Error **errp) - - ads7846_int_update(s); - -- vmstate_register(NULL, -1, &vmstate_ads7846, s); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_ads7846, s); - } - - static void ads7846_class_init(ObjectClass *klass, void *data) -diff --git a/hw/i2c/core.c b/hw/i2c/core.c -index 92cd489..d770035 100644 ---- a/hw/i2c/core.c -+++ b/hw/i2c/core.c -@@ -61,7 +61,7 @@ I2CBus *i2c_init_bus(DeviceState *parent, const char *name) - - bus = I2C_BUS(qbus_create(TYPE_I2C_BUS, parent, name)); - QLIST_INIT(&bus->current_devs); -- vmstate_register(NULL, -1, &vmstate_i2c_bus, bus); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_i2c_bus, bus); - return bus; - } - -diff --git a/hw/input/stellaris_input.c b/hw/input/stellaris_input.c -index 59892b0..e6ee5e1 100644 ---- a/hw/input/stellaris_input.c -+++ b/hw/input/stellaris_input.c -@@ -88,5 +88,6 @@ void stellaris_gamepad_init(int n, qemu_irq *irq, const int *keycode) - } - s->num_buttons = n; - qemu_add_kbd_event_handler(stellaris_gamepad_put_key, s); -- vmstate_register(NULL, -1, &vmstate_stellaris_gamepad, s); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, -+ &vmstate_stellaris_gamepad, s); - } -diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c -index 375cb6a..f2c3a7f 100644 ---- a/hw/intc/apic_common.c -+++ b/hw/intc/apic_common.c -@@ -284,7 +284,7 @@ static void apic_common_realize(DeviceState *dev, Error **errp) - } - - if (s->legacy_instance_id) { -- instance_id = -1; -+ instance_id = VMSTATE_INSTANCE_ID_ANY; - } - vmstate_register_with_alias_id(NULL, instance_id, &vmstate_apic_common, - s, -1, 0, NULL); -diff --git a/hw/misc/max111x.c b/hw/misc/max111x.c -index a713149..81ee73e 100644 ---- a/hw/misc/max111x.c -+++ b/hw/misc/max111x.c -@@ -146,7 +146,7 @@ static int max111x_init(SSISlave *d, int inputs) - s->input[7] = 0x80; - s->com = 0; - -- vmstate_register(dev, -1, &vmstate_max111x, s); -+ vmstate_register(dev, VMSTATE_INSTANCE_ID_ANY, &vmstate_max111x, s); - return 0; - } - -diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c -index cc2dd8b..39920c6 100644 ---- a/hw/net/eepro100.c -+++ b/hw/net/eepro100.c -@@ -1874,7 +1874,7 @@ static void e100_nic_realize(PCIDevice *pci_dev, Error **errp) - - s->vmstate = g_memdup(&vmstate_eepro100, sizeof(vmstate_eepro100)); - s->vmstate->name = qemu_get_queue(s->nic)->model; -- vmstate_register(&pci_dev->qdev, -1, s->vmstate, s); -+ vmstate_register(&pci_dev->qdev, VMSTATE_INSTANCE_ID_ANY, s->vmstate, s); - } - - static void eepro100_instance_init(Object *obj) -diff --git a/hw/pci/pci.c b/hw/pci/pci.c -index cbc7a32..fed019d 100644 ---- a/hw/pci/pci.c -+++ b/hw/pci/pci.c -@@ -124,7 +124,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp) - bus->machine_done.notify = pcibus_machine_done; - qemu_add_machine_init_done_notifier(&bus->machine_done); - -- vmstate_register(NULL, -1, &vmstate_pcibus, bus); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_pcibus, bus); - } - - static void pcie_bus_realize(BusState *qbus, Error **errp) -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 8749c72..c12862d 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -3028,7 +3028,7 @@ static void spapr_machine_init(MachineState *machine) - * interface, this is a legacy from the sPAPREnvironment structure - * which predated MachineState but had a similar function */ - vmstate_register(NULL, 0, &vmstate_spapr, spapr); -- register_savevm_live("spapr/htab", -1, 1, -+ register_savevm_live("spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1, - &savevm_htab_handlers, spapr); - - qbus_set_hotplug_handler(sysbus_get_default(), OBJECT(machine), -diff --git a/hw/timer/arm_timer.c b/hw/timer/arm_timer.c -index af524fa..beaa285 100644 ---- a/hw/timer/arm_timer.c -+++ b/hw/timer/arm_timer.c -@@ -180,7 +180,7 @@ static arm_timer_state *arm_timer_init(uint32_t freq) - s->control = TIMER_CTRL_IE; - - s->timer = ptimer_init(arm_timer_tick, s, PTIMER_POLICY_DEFAULT); -- vmstate_register(NULL, -1, &vmstate_arm_timer, s); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_arm_timer, s); - return s; - } - -diff --git a/hw/tpm/tpm_emulator.c b/hw/tpm/tpm_emulator.c -index 22f9113..da7b490 100644 ---- a/hw/tpm/tpm_emulator.c -+++ b/hw/tpm/tpm_emulator.c -@@ -914,7 +914,8 @@ static void tpm_emulator_inst_init(Object *obj) - tpm_emu->cur_locty_number = ~0; - qemu_mutex_init(&tpm_emu->mutex); - -- vmstate_register(NULL, -1, &vmstate_tpm_emulator, obj); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, -+ &vmstate_tpm_emulator, obj); - } - - /* -diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h -index ac4f46a..883f1cf 100644 ---- a/include/migration/vmstate.h -+++ b/include/migration/vmstate.h -@@ -1155,6 +1155,8 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd, - - bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque); - -+#define VMSTATE_INSTANCE_ID_ANY -1 -+ - /* Returns: 0 on success, -1 on failure */ - int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, - const VMStateDescription *vmsd, -diff --git a/migration/savevm.c b/migration/savevm.c -index a71b930..e2e8e0a 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -750,7 +750,7 @@ int register_savevm_live(const char *idstr, - - pstrcat(se->idstr, sizeof(se->idstr), idstr); - -- if (instance_id == -1) { -+ if (instance_id == VMSTATE_INSTANCE_ID_ANY) { - se->instance_id = calculate_new_instance_id(se->idstr); - } else { - se->instance_id = instance_id; -@@ -817,14 +817,14 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, - - se->compat = g_new0(CompatEntry, 1); - pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name); -- se->compat->instance_id = instance_id == -1 ? -+ se->compat->instance_id = instance_id == VMSTATE_INSTANCE_ID_ANY ? - calculate_compat_instance_id(vmsd->name) : instance_id; -- instance_id = -1; -+ instance_id = VMSTATE_INSTANCE_ID_ANY; - } - } - pstrcat(se->idstr, sizeof(se->idstr), vmsd->name); - -- if (instance_id == -1) { -+ if (instance_id == VMSTATE_INSTANCE_ID_ANY) { - se->instance_id = calculate_new_instance_id(se->idstr); - } else { - se->instance_id = instance_id; --- -1.8.3.1 - diff --git a/SOURCES/kvm-migration-Don-t-send-data-if-we-have-stopped.patch b/SOURCES/kvm-migration-Don-t-send-data-if-we-have-stopped.patch deleted file mode 100644 index 9a36714..0000000 --- a/SOURCES/kvm-migration-Don-t-send-data-if-we-have-stopped.patch +++ /dev/null @@ -1,42 +0,0 @@ -From ab07e0b41c50a85940d798a9a65a58698fd2edfb Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:40 +0000 -Subject: [PATCH 08/18] migration: Don't send data if we have stopped - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-8-quintela@redhat.com> -Patchwork-id: 94114 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 07/10] migration: Don't send data if we have stopped -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -If we do a cancel, we got out without one error, but we can't do the -rest of the output as in a normal situation. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -(cherry picked from commit b69a0227a803256ad270283872d40ff768f4d56d) -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/migration/ram.c b/migration/ram.c -index a0257ee..902c56c 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -3511,7 +3511,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) - ram_control_after_iterate(f, RAM_CONTROL_ROUND); - - out: -- if (ret >= 0) { -+ if (ret >= 0 -+ && migration_is_setup_or_active(migrate_get_current()->state)) { - multifd_send_sync_main(rs); - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); --- -1.8.3.1 - diff --git a/SOURCES/kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch b/SOURCES/kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch deleted file mode 100644 index 01cb0f1..0000000 --- a/SOURCES/kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 71b05ab5782aa1e38c016be6264a14f5650d2a87 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:35 +0000 -Subject: [PATCH 03/18] migration: Make sure that we don't call write() in case - of error - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-3-quintela@redhat.com> -Patchwork-id: 94113 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 02/10] migration: Make sure that we don't call write() in case of error -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -If we are exiting due to an error/finish/.... Just don't try to even -touch the channel with one IO operation. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Juan Quintela -(cherry picked from commit 4d65a6216bfc44891ac298b74a6921d479805131) -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 25 +++++++++++++++++++++++++ - 1 file changed, 25 insertions(+) - -diff --git a/migration/ram.c b/migration/ram.c -index 65580e3..8c783b3 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -899,6 +899,12 @@ struct { - uint64_t packet_num; - /* send channels ready */ - QemuSemaphore channels_ready; -+ /* -+ * Have we already run terminate threads. There is a race when it -+ * happens that we got one error while we are exiting. -+ * We will use atomic operations. Only valid values are 0 and 1. -+ */ -+ int exiting; - } *multifd_send_state; - - /* -@@ -927,6 +933,10 @@ static int multifd_send_pages(RAMState *rs) - MultiFDPages_t *pages = multifd_send_state->pages; - uint64_t transferred; - -+ if (atomic_read(&multifd_send_state->exiting)) { -+ return -1; -+ } -+ - qemu_sem_wait(&multifd_send_state->channels_ready); - for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) { - p = &multifd_send_state->params[i]; -@@ -1008,6 +1018,16 @@ static void multifd_send_terminate_threads(Error *err) - } - } - -+ /* -+ * We don't want to exit each threads twice. Depending on where -+ * we get the error, or if there are two independent errors in two -+ * threads at the same time, we can end calling this function -+ * twice. -+ */ -+ if (atomic_xchg(&multifd_send_state->exiting, 1)) { -+ return; -+ } -+ - for (i = 0; i < migrate_multifd_channels(); i++) { - MultiFDSendParams *p = &multifd_send_state->params[i]; - -@@ -1117,6 +1137,10 @@ static void *multifd_send_thread(void *opaque) - - while (true) { - qemu_sem_wait(&p->sem); -+ -+ if (atomic_read(&multifd_send_state->exiting)) { -+ break; -+ } - qemu_mutex_lock(&p->mutex); - - if (p->pending_job) { -@@ -1225,6 +1249,7 @@ int multifd_save_setup(void) - multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); - multifd_send_state->pages = multifd_pages_init(page_count); - qemu_sem_init(&multifd_send_state->channels_ready, 0); -+ atomic_set(&multifd_send_state->exiting, 0); - - for (i = 0; i < thread_count; i++) { - MultiFDSendParams *p = &multifd_send_state->params[i]; --- -1.8.3.1 - diff --git a/SOURCES/kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch b/SOURCES/kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch deleted file mode 100644 index 4a7fb28..0000000 --- a/SOURCES/kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 3c4f6f0c2bf5562f2aa26f964848ae53e6ac4790 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:43 +0000 -Subject: [PATCH 11/18] migration: Maybe VM is paused when migration is - cancelled - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-11-quintela@redhat.com> -Patchwork-id: 94120 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 10/10] migration: Maybe VM is paused when migration is cancelled -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -From: Zhimin Feng - -If the migration is cancelled when it is in the completion phase, -the migration state is set to MIGRATION_STATUS_CANCELLING. -The VM maybe wait for the 'pause_sem' semaphore in migration_maybe_pause -function, so that VM always is paused. - -Reported-by: Euler Robot -Signed-off-by: Zhimin Feng -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 8958338b10abcb346b54a8038a491fda2db1c853) -Signed-off-by: Danilo C. L. de Paula ---- - migration/migration.c | 24 ++++++++++++++++-------- - 1 file changed, 16 insertions(+), 8 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index eb50d77..ed18c59 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2786,14 +2786,22 @@ static int migration_maybe_pause(MigrationState *s, - /* This block intentionally left blank */ - } - -- qemu_mutex_unlock_iothread(); -- migrate_set_state(&s->state, *current_active_state, -- MIGRATION_STATUS_PRE_SWITCHOVER); -- qemu_sem_wait(&s->pause_sem); -- migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, -- new_state); -- *current_active_state = new_state; -- qemu_mutex_lock_iothread(); -+ /* -+ * If the migration is cancelled when it is in the completion phase, -+ * the migration state is set to MIGRATION_STATUS_CANCELLING. -+ * So we don't need to wait a semaphore, otherwise we would always -+ * wait for the 'pause_sem' semaphore. -+ */ -+ if (s->state != MIGRATION_STATUS_CANCELLING) { -+ qemu_mutex_unlock_iothread(); -+ migrate_set_state(&s->state, *current_active_state, -+ MIGRATION_STATUS_PRE_SWITCHOVER); -+ qemu_sem_wait(&s->pause_sem); -+ migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, -+ new_state); -+ *current_active_state = new_state; -+ qemu_mutex_lock_iothread(); -+ } - - return s->state == new_state ? 0 : -EINVAL; - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-migration-Rate-limit-inside-host-pages.patch b/SOURCES/kvm-migration-Rate-limit-inside-host-pages.patch deleted file mode 100644 index 2d3d519..0000000 --- a/SOURCES/kvm-migration-Rate-limit-inside-host-pages.patch +++ /dev/null @@ -1,172 +0,0 @@ -From 8e8f421cce99543081f225acf46541312cfbc371 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Tue, 17 Mar 2020 17:05:18 +0000 -Subject: [PATCH 1/2] migration: Rate limit inside host pages - -RH-Author: Laurent Vivier -Message-id: <20200317170518.9303-1-lvivier@redhat.com> -Patchwork-id: 94374 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] migration: Rate limit inside host pages -Bugzilla: 1814336 -RH-Acked-by: Peter Xu -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert - -From: "Dr. David Alan Gilbert" - -When using hugepages, rate limiting is necessary within each huge -page, since a 1G huge page can take a significant time to send, so -you end up with bursty behaviour. - -Fixes: 4c011c37ecb3 ("postcopy: Send whole huge pages") -Reported-by: Lin Ma -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Juan Quintela -Reviewed-by: Peter Xu -Signed-off-by: Juan Quintela -(cherry picked from commit 97e1e06780e70f6e98a0d2df881e0c0927d3aeb6) -Signed-off-by: Laurent Vivier - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1814336 -BRANCH: rhel-av-8.2.0 -UPSTREAM: Merged -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=27283241 -TESTED: Tested that the migration abort doesn't trigger an error message in - the kernel logs on P9 - -Signed-off-by: Danilo C. L. de Paula ---- - migration/migration.c | 57 ++++++++++++++++++++++++++++---------------------- - migration/migration.h | 1 + - migration/ram.c | 2 ++ - migration/trace-events | 4 ++-- - 4 files changed, 37 insertions(+), 27 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index ed18c59..e31d0f5 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -3253,6 +3253,37 @@ void migration_consume_urgent_request(void) - qemu_sem_wait(&migrate_get_current()->rate_limit_sem); - } - -+/* Returns true if the rate limiting was broken by an urgent request */ -+bool migration_rate_limit(void) -+{ -+ int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); -+ MigrationState *s = migrate_get_current(); -+ -+ bool urgent = false; -+ migration_update_counters(s, now); -+ if (qemu_file_rate_limit(s->to_dst_file)) { -+ /* -+ * Wait for a delay to do rate limiting OR -+ * something urgent to post the semaphore. -+ */ -+ int ms = s->iteration_start_time + BUFFER_DELAY - now; -+ trace_migration_rate_limit_pre(ms); -+ if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { -+ /* -+ * We were woken by one or more urgent things but -+ * the timedwait will have consumed one of them. -+ * The service routine for the urgent wake will dec -+ * the semaphore itself for each item it consumes, -+ * so add this one we just eat back. -+ */ -+ qemu_sem_post(&s->rate_limit_sem); -+ urgent = true; -+ } -+ trace_migration_rate_limit_post(urgent); -+ } -+ return urgent; -+} -+ - /* - * Master migration thread on the source VM. - * It drives the migration and pumps the data down the outgoing channel. -@@ -3319,8 +3350,6 @@ static void *migration_thread(void *opaque) - trace_migration_thread_setup_complete(); - - while (migration_is_active(s)) { -- int64_t current_time; -- - if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { - MigIterateState iter_state = migration_iteration_run(s); - if (iter_state == MIG_ITERATE_SKIP) { -@@ -3347,29 +3376,7 @@ static void *migration_thread(void *opaque) - update_iteration_initial_status(s); - } - -- current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); -- -- migration_update_counters(s, current_time); -- -- urgent = false; -- if (qemu_file_rate_limit(s->to_dst_file)) { -- /* Wait for a delay to do rate limiting OR -- * something urgent to post the semaphore. -- */ -- int ms = s->iteration_start_time + BUFFER_DELAY - current_time; -- trace_migration_thread_ratelimit_pre(ms); -- if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { -- /* We were worken by one or more urgent things but -- * the timedwait will have consumed one of them. -- * The service routine for the urgent wake will dec -- * the semaphore itself for each item it consumes, -- * so add this one we just eat back. -- */ -- qemu_sem_post(&s->rate_limit_sem); -- urgent = true; -- } -- trace_migration_thread_ratelimit_post(urgent); -- } -+ urgent = migration_rate_limit(); - } - - trace_migration_thread_after_loop(); -diff --git a/migration/migration.h b/migration/migration.h -index a2b2336..a15e8d8 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -347,5 +347,6 @@ extern bool migrate_pre_2_2; - - void migration_make_urgent_request(void); - void migration_consume_urgent_request(void); -+bool migration_rate_limit(void); - - #endif -diff --git a/migration/ram.c b/migration/ram.c -index 3891eff..5344c7d 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -2661,6 +2661,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, - - pages += tmppages; - pss->page++; -+ /* Allow rate limiting to happen in the middle of huge pages */ -+ migration_rate_limit(); - } while ((pss->page & (pagesize_bits - 1)) && - offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); - -diff --git a/migration/trace-events b/migration/trace-events -index 6dee7b5..2f9129e 100644 ---- a/migration/trace-events -+++ b/migration/trace-events -@@ -138,12 +138,12 @@ migrate_send_rp_recv_bitmap(char *name, int64_t size) "block '%s' size 0x%"PRIi6 - migration_completion_file_err(void) "" - migration_completion_postcopy_end(void) "" - migration_completion_postcopy_end_after_complete(void) "" -+migration_rate_limit_pre(int ms) "%d ms" -+migration_rate_limit_post(int urgent) "urgent: %d" - migration_return_path_end_before(void) "" - migration_return_path_end_after(int rp_error) "%d" - migration_thread_after_loop(void) "" - migration_thread_file_err(void) "" --migration_thread_ratelimit_pre(int ms) "%d ms" --migration_thread_ratelimit_post(int urgent) "urgent: %d" - migration_thread_setup_complete(void) "" - open_return_path_on_source(void) "" - open_return_path_on_source_continue(void) "" --- -1.8.3.1 - diff --git a/SOURCES/kvm-migration-multifd-clean-pages-after-filling-packet.patch b/SOURCES/kvm-migration-multifd-clean-pages-after-filling-packet.patch deleted file mode 100644 index 5fa7fde..0000000 --- a/SOURCES/kvm-migration-multifd-clean-pages-after-filling-packet.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 32ee75b7f4a31d6080e5659e2a0285a046ef1036 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:34 +0000 -Subject: [PATCH 02/18] migration/multifd: clean pages after filling packet - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-2-quintela@redhat.com> -Patchwork-id: 94112 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 01/10] migration/multifd: clean pages after filling packet -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -From: Wei Yang - -This is a preparation for the next patch: - - not use multifd during postcopy. - -Without enabling postcopy, everything looks good. While after enabling -postcopy, migration may fail even not use multifd during postcopy. The -reason is the pages is not properly cleared and *old* target page will -continue to be transferred. - -After clean pages, migration succeeds. - -Signed-off-by: Wei Yang -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit eab54aa78ffd9fb7895b20fc2761ee998479489b) -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 5078f94..65580e3 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -944,10 +944,10 @@ static int multifd_send_pages(RAMState *rs) - } - qemu_mutex_unlock(&p->mutex); - } -- p->pages->used = 0; -+ assert(!p->pages->used); -+ assert(!p->pages->block); - - p->packet_num = multifd_send_state->packet_num++; -- p->pages->block = NULL; - multifd_send_state->pages = p->pages; - p->pages = pages; - transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len; -@@ -1129,6 +1129,8 @@ static void *multifd_send_thread(void *opaque) - p->flags = 0; - p->num_packets++; - p->num_pages += used; -+ p->pages->used = 0; -+ p->pages->block = NULL; - qemu_mutex_unlock(&p->mutex); - - trace_multifd_send(p->id, packet_num, used, flags, --- -1.8.3.1 - diff --git a/SOURCES/kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch b/SOURCES/kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch deleted file mode 100644 index 0c5fe80..0000000 --- a/SOURCES/kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 2c14a6831954a59256cc8d1980da0ad705a3a3fa Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:37 +0000 -Subject: [PATCH 05/18] migration/multifd: fix destroyed mutex access in - terminating multifd threads - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-5-quintela@redhat.com> -Patchwork-id: 94119 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 04/10] migration/multifd: fix destroyed mutex access in terminating multifd threads -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -From: Jiahui Cen - -One multifd will lock all the other multifds' IOChannel mutex to inform them -to quit by setting p->quit or shutting down p->c. In this senario, if some -multifds had already been terminated and multifd_load_cleanup/multifd_save_cleanup -had destroyed their mutex, it could cause destroyed mutex access when trying -lock their mutex. - -Here is the coredump stack: - #0 0x00007f81a2794437 in raise () from /usr/lib64/libc.so.6 - #1 0x00007f81a2795b28 in abort () from /usr/lib64/libc.so.6 - #2 0x00007f81a278d1b6 in __assert_fail_base () from /usr/lib64/libc.so.6 - #3 0x00007f81a278d262 in __assert_fail () from /usr/lib64/libc.so.6 - #4 0x000055eb1bfadbd3 in qemu_mutex_lock_impl (mutex=0x55eb1e2d1988, file=, line=) at util/qemu-thread-posix.c:64 - #5 0x000055eb1bb4564a in multifd_send_terminate_threads (err=) at migration/ram.c:1015 - #6 0x000055eb1bb4bb7f in multifd_send_thread (opaque=0x55eb1e2d19f8) at migration/ram.c:1171 - #7 0x000055eb1bfad628 in qemu_thread_start (args=0x55eb1e170450) at util/qemu-thread-posix.c:502 - #8 0x00007f81a2b36df5 in start_thread () from /usr/lib64/libpthread.so.0 - #9 0x00007f81a286048d in clone () from /usr/lib64/libc.so.6 - -To fix it up, let's destroy the mutex after all the other multifd threads had -been terminated. - -Signed-off-by: Jiahui Cen -Signed-off-by: Ying Fang -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 9560a48ecc0c20d87bc458a6db77fba651605819) -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/migration/ram.c b/migration/ram.c -index 860f781..6c55c5d 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1052,6 +1052,10 @@ void multifd_save_cleanup(void) - if (p->running) { - qemu_thread_join(&p->thread); - } -+ } -+ for (i = 0; i < migrate_multifd_channels(); i++) { -+ MultiFDSendParams *p = &multifd_send_state->params[i]; -+ - socket_send_channel_destroy(p->c); - p->c = NULL; - qemu_mutex_destroy(&p->mutex); -@@ -1335,6 +1339,10 @@ int multifd_load_cleanup(Error **errp) - qemu_sem_post(&p->sem_sync); - qemu_thread_join(&p->thread); - } -+ } -+ for (i = 0; i < migrate_multifd_channels(); i++) { -+ MultiFDRecvParams *p = &multifd_recv_state->params[i]; -+ - object_unref(OBJECT(p->c)); - p->c = NULL; - qemu_mutex_destroy(&p->mutex); --- -1.8.3.1 - diff --git a/SOURCES/kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch b/SOURCES/kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch deleted file mode 100644 index 9e9683c..0000000 --- a/SOURCES/kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 517a99c5fba163bf684978fe3d9476b619481391 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:42 +0000 -Subject: [PATCH 10/18] migration/multifd: fix nullptr access in - multifd_send_terminate_threads - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-10-quintela@redhat.com> -Patchwork-id: 94117 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 09/10] migration/multifd: fix nullptr access in multifd_send_terminate_threads -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -From: Zhimin Feng - -If the multifd_send_threads is not created when migration is failed, -multifd_save_cleanup would be called twice. In this senario, the -multifd_send_state is accessed after it has been released, the result -is that the source VM is crashing down. - -Here is the coredump stack: - Program received signal SIGSEGV, Segmentation fault. - 0x00005629333a78ef in multifd_send_terminate_threads (err=err@entry=0x0) at migration/ram.c:1012 - 1012 MultiFDSendParams *p = &multifd_send_state->params[i]; - #0 0x00005629333a78ef in multifd_send_terminate_threads (err=err@entry=0x0) at migration/ram.c:1012 - #1 0x00005629333ab8a9 in multifd_save_cleanup () at migration/ram.c:1028 - #2 0x00005629333abaea in multifd_new_send_channel_async (task=0x562935450e70, opaque=) at migration/ram.c:1202 - #3 0x000056293373a562 in qio_task_complete (task=task@entry=0x562935450e70) at io/task.c:196 - #4 0x000056293373a6e0 in qio_task_thread_result (opaque=0x562935450e70) at io/task.c:111 - #5 0x00007f475d4d75a7 in g_idle_dispatch () from /usr/lib64/libglib-2.0.so.0 - #6 0x00007f475d4da9a9 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0 - #7 0x0000562933785b33 in glib_pollfds_poll () at util/main-loop.c:219 - #8 os_host_main_loop_wait (timeout=) at util/main-loop.c:242 - #9 main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:518 - #10 0x00005629334c5acf in main_loop () at vl.c:1810 - #11 0x000056293334d7bb in main (argc=, argv=, envp=) at vl.c:4471 - -If the multifd_send_threads is not created when migration is failed. -In this senario, we don't call multifd_save_cleanup in multifd_new_send_channel_async. - -Signed-off-by: Zhimin Feng -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 9c4d333c092e9c26d38f740ff3616deb42f21681) -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 10 +++++++++- - 1 file changed, 9 insertions(+), 1 deletion(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 902c56c..3891eff 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1229,7 +1229,15 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) - trace_multifd_new_send_channel_async(p->id); - if (qio_task_propagate_error(task, &local_err)) { - migrate_set_error(migrate_get_current(), local_err); -- multifd_save_cleanup(); -+ /* Error happen, we need to tell who pay attention to me */ -+ qemu_sem_post(&multifd_send_state->channels_ready); -+ qemu_sem_post(&p->sem_sync); -+ /* -+ * Although multifd_send_thread is not created, but main migration -+ * thread neet to judge whether it is running, so we need to mark -+ * its status. -+ */ -+ p->quit = true; - } else { - p->c = QIO_CHANNEL(sioc); - qio_channel_set_delay(p->c, false); --- -1.8.3.1 - diff --git a/SOURCES/kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch b/SOURCES/kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch deleted file mode 100644 index e780698..0000000 --- a/SOURCES/kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 7f664fe26ff67f8131faa7a81a388b8a5b51403f Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:36 +0000 -Subject: [PATCH 04/18] migration/multifd: fix nullptr access in terminating - multifd threads - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-4-quintela@redhat.com> -Patchwork-id: 94110 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 03/10] migration/multifd: fix nullptr access in terminating multifd threads -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -From: Jiahui Cen - -One multifd channel will shutdown all the other multifd's IOChannel when it -fails to receive an IOChannel. In this senario, if some multifds had not -received its IOChannel yet, it would try to shutdown its IOChannel which could -cause nullptr access at qio_channel_shutdown. - -Here is the coredump stack: - #0 object_get_class (obj=obj@entry=0x0) at qom/object.c:908 - #1 0x00005563fdbb8f4a in qio_channel_shutdown (ioc=0x0, how=QIO_CHANNEL_SHUTDOWN_BOTH, errp=0x0) at io/channel.c:355 - #2 0x00005563fd7b4c5f in multifd_recv_terminate_threads (err=) at migration/ram.c:1280 - #3 0x00005563fd7bc019 in multifd_recv_new_channel (ioc=ioc@entry=0x556400255610, errp=errp@entry=0x7ffec07dce00) at migration/ram.c:1478 - #4 0x00005563fda82177 in migration_ioc_process_incoming (ioc=ioc@entry=0x556400255610, errp=errp@entry=0x7ffec07dce30) at migration/migration.c:605 - #5 0x00005563fda8567d in migration_channel_process_incoming (ioc=0x556400255610) at migration/channel.c:44 - #6 0x00005563fda83ee0 in socket_accept_incoming_migration (listener=0x5563fff6b920, cioc=0x556400255610, opaque=) at migration/socket.c:166 - #7 0x00005563fdbc25cd in qio_net_listener_channel_func (ioc=, condition=, opaque=) at io/net-listener.c:54 - #8 0x00007f895b6fe9a9 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0 - #9 0x00005563fdc18136 in glib_pollfds_poll () at util/main-loop.c:218 - #10 0x00005563fdc181b5 in os_host_main_loop_wait (timeout=1000000000) at util/main-loop.c:241 - #11 0x00005563fdc183a2 in main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:517 - #12 0x00005563fd8edb37 in main_loop () at vl.c:1791 - #13 0x00005563fd74fd45 in main (argc=, argv=, envp=) at vl.c:4473 - -To fix it up, let's check p->c before calling qio_channel_shutdown. - -Signed-off-by: Jiahui Cen -Signed-off-by: Ying Fang -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit f76e32eb05041ab001184ab16afb56524adccd0c) -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 8c783b3..860f781 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1307,7 +1307,9 @@ static void multifd_recv_terminate_threads(Error *err) - - normal quit, i.e. everything went fine, just finished - - error quit: We close the channels so the channel threads - finish the qio_channel_read_all_eof() */ -- qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); -+ if (p->c) { -+ qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); -+ } - qemu_mutex_unlock(&p->mutex); - } - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch b/SOURCES/kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch deleted file mode 100644 index c20cb6c..0000000 --- a/SOURCES/kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch +++ /dev/null @@ -1,123 +0,0 @@ -From 261ee33e0e6711fadd3049e4640bb731ee3d44ff Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 24 Feb 2020 16:57:10 +0000 -Subject: [PATCH 9/9] mirror: Don't let an operation wait for itself - -RH-Author: Kevin Wolf -Message-id: <20200224165710.4830-3-kwolf@redhat.com> -Patchwork-id: 94045 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] mirror: Don't let an operation wait for itself -Bugzilla: 1794692 -RH-Acked-by: John Snow -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz - -mirror_wait_for_free_in_flight_slot() just picks a random operation to -wait for. However, when mirror_co_read() waits for free slots, its -MirrorOp is already in s->ops_in_flight, so if not enough slots are -immediately available, an operation can end up waiting for itself to -complete, which results in a hang. - -Fix this by passing the current MirrorOp and skipping this operation -when picking an operation to wait for. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1794692 -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -(cherry picked from commit 7e6c4ff792734e196c8ca82564c56b5e7c6288ca) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/mirror.c | 21 ++++++++++++--------- - 1 file changed, 12 insertions(+), 9 deletions(-) - -diff --git a/block/mirror.c b/block/mirror.c -index 8959e42..cacbc70 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -283,11 +283,14 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, - } - - static inline void coroutine_fn --mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) -+mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active) - { - MirrorOp *op; - - QTAILQ_FOREACH(op, &s->ops_in_flight, next) { -+ if (self == op) { -+ continue; -+ } - /* Do not wait on pseudo ops, because it may in turn wait on - * some other operation to start, which may in fact be the - * caller of this function. Since there is only one pseudo op -@@ -302,10 +305,10 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) - } - - static inline void coroutine_fn --mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) -+mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s, MirrorOp *self) - { - /* Only non-active operations use up in-flight slots */ -- mirror_wait_for_any_operation(s, false); -+ mirror_wait_for_any_operation(s, self, false); - } - - /* Perform a mirror copy operation. -@@ -348,7 +351,7 @@ static void coroutine_fn mirror_co_read(void *opaque) - - while (s->buf_free_count < nb_chunks) { - trace_mirror_yield_in_flight(s, op->offset, s->in_flight); -- mirror_wait_for_free_in_flight_slot(s); -+ mirror_wait_for_free_in_flight_slot(s, op); - } - - /* Now make a QEMUIOVector taking enough granularity-sized chunks -@@ -555,7 +558,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) - - while (s->in_flight >= MAX_IN_FLIGHT) { - trace_mirror_yield_in_flight(s, offset, s->in_flight); -- mirror_wait_for_free_in_flight_slot(s); -+ mirror_wait_for_free_in_flight_slot(s, pseudo_op); - } - - if (s->ret < 0) { -@@ -609,7 +612,7 @@ static void mirror_free_init(MirrorBlockJob *s) - static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s) - { - while (s->in_flight > 0) { -- mirror_wait_for_free_in_flight_slot(s); -+ mirror_wait_for_free_in_flight_slot(s, NULL); - } - } - -@@ -794,7 +797,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) - if (s->in_flight >= MAX_IN_FLIGHT) { - trace_mirror_yield(s, UINT64_MAX, s->buf_free_count, - s->in_flight); -- mirror_wait_for_free_in_flight_slot(s); -+ mirror_wait_for_free_in_flight_slot(s, NULL); - continue; - } - -@@ -947,7 +950,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) - /* Do not start passive operations while there are active - * writes in progress */ - while (s->in_active_write_counter) { -- mirror_wait_for_any_operation(s, true); -+ mirror_wait_for_any_operation(s, NULL, true); - } - - if (s->ret < 0) { -@@ -973,7 +976,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) - if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || - (cnt == 0 && s->in_flight > 0)) { - trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); -- mirror_wait_for_free_in_flight_slot(s); -+ mirror_wait_for_free_in_flight_slot(s, NULL); - continue; - } else if (cnt != 0) { - delay_ns = mirror_iteration(s); --- -1.8.3.1 - diff --git a/SOURCES/kvm-mirror-Make-sure-that-source-and-target-size-match.patch b/SOURCES/kvm-mirror-Make-sure-that-source-and-target-size-match.patch deleted file mode 100644 index 09d1152..0000000 --- a/SOURCES/kvm-mirror-Make-sure-that-source-and-target-size-match.patch +++ /dev/null @@ -1,89 +0,0 @@ -From 98bf67db979927a5c7bbdc4a17c35d60b5f38e71 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 3 Jun 2020 16:03:24 +0100 -Subject: [PATCH 25/26] mirror: Make sure that source and target size match - -RH-Author: Kevin Wolf -Message-id: <20200603160325.67506-11-kwolf@redhat.com> -Patchwork-id: 97110 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 10/11] mirror: Make sure that source and target size match -Bugzilla: 1778593 -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz -RH-Acked-by: Stefano Garzarella - -If the target is shorter than the source, mirror would copy data until -it reaches the end of the target and then fail with an I/O error when -trying to write past the end. - -If the target is longer than the source, the mirror job would complete -successfully, but the target wouldn't actually be an accurate copy of -the source image (it would contain some additional garbage at the end). - -Fix this by checking that both images have the same size when the job -starts. - -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -Message-Id: <20200511135825.219437-4-kwolf@redhat.com> -Reviewed-by: Max Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit e83dd6808c6e0975970f37b49b27cc37bb54eea8) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/mirror.c | 21 ++++++++++++--------- - 1 file changed, 12 insertions(+), 9 deletions(-) - -diff --git a/block/mirror.c b/block/mirror.c -index 5e5a521..0d32fca 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -859,6 +859,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) - BlockDriverState *target_bs = blk_bs(s->target); - bool need_drain = true; - int64_t length; -+ int64_t target_length; - BlockDriverInfo bdi; - char backing_filename[2]; /* we only need 2 characters because we are only - checking for a NULL string */ -@@ -874,24 +875,26 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) - goto immediate_exit; - } - -+ target_length = blk_getlength(s->target); -+ if (target_length < 0) { -+ ret = target_length; -+ goto immediate_exit; -+ } -+ - /* Active commit must resize the base image if its size differs from the - * active layer. */ - if (s->base == blk_bs(s->target)) { -- int64_t base_length; -- -- base_length = blk_getlength(s->target); -- if (base_length < 0) { -- ret = base_length; -- goto immediate_exit; -- } -- -- if (s->bdev_length > base_length) { -+ if (s->bdev_length > target_length) { - ret = blk_truncate(s->target, s->bdev_length, false, - PREALLOC_MODE_OFF, NULL); - if (ret < 0) { - goto immediate_exit; - } - } -+ } else if (s->bdev_length != target_length) { -+ error_setg(errp, "Source and target image have different sizes"); -+ ret = -EINVAL; -+ goto immediate_exit; - } - - if (s->bdev_length == 0) { --- -1.8.3.1 - diff --git a/SOURCES/kvm-mirror-Store-MirrorOp.co-for-debuggability.patch b/SOURCES/kvm-mirror-Store-MirrorOp.co-for-debuggability.patch deleted file mode 100644 index 67f3e54..0000000 --- a/SOURCES/kvm-mirror-Store-MirrorOp.co-for-debuggability.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 27fe3b8d42a2c99de01ce20e4b0727079c12da65 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 24 Feb 2020 16:57:09 +0000 -Subject: [PATCH 8/9] mirror: Store MirrorOp.co for debuggability - -RH-Author: Kevin Wolf -Message-id: <20200224165710.4830-2-kwolf@redhat.com> -Patchwork-id: 94044 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] mirror: Store MirrorOp.co for debuggability -Bugzilla: 1794692 -RH-Acked-by: John Snow -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz - -If a coroutine is launched, but the coroutine pointer isn't stored -anywhere, debugging any problems inside the coroutine is quite hard. -Let's store the coroutine pointer of a mirror operation in MirrorOp to -have it available in the debugger. - -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -(cherry picked from commit eed325b92c3e68417121ea23f96e33af6a4654ed) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/mirror.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/block/mirror.c b/block/mirror.c -index f0f2d9d..8959e42 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -103,6 +103,7 @@ struct MirrorOp { - bool is_pseudo_op; - bool is_active_write; - CoQueue waiting_requests; -+ Coroutine *co; - - QTAILQ_ENTRY(MirrorOp) next; - }; -@@ -429,6 +430,7 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, - default: - abort(); - } -+ op->co = co; - - QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); - qemu_coroutine_enter(co); --- -1.8.3.1 - diff --git a/SOURCES/kvm-mirror-Wait-only-for-in-flight-operations.patch b/SOURCES/kvm-mirror-Wait-only-for-in-flight-operations.patch deleted file mode 100644 index a06d30e..0000000 --- a/SOURCES/kvm-mirror-Wait-only-for-in-flight-operations.patch +++ /dev/null @@ -1,95 +0,0 @@ -From bddf389330e11fb0ce17413c1bfa2264a281ded2 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 30 Mar 2020 11:19:24 +0100 -Subject: [PATCH 4/4] mirror: Wait only for in-flight operations - -RH-Author: Kevin Wolf -Message-id: <20200330111924.22938-3-kwolf@redhat.com> -Patchwork-id: 94463 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] mirror: Wait only for in-flight operations -Bugzilla: 1794692 -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -mirror_wait_for_free_in_flight_slot() just picks a random operation to -wait for. However, a MirrorOp is already in s->ops_in_flight when -mirror_co_read() waits for free slots, so if not enough slots are -immediately available, an operation can end up waiting for itself, or -two or more operations can wait for each other to complete, which -results in a hang. - -Fix this by adding a flag to MirrorOp that tells us if the request is -already in flight (and therefore occupies slots that it will later -free), and picking only such operations for waiting. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1794692 -Signed-off-by: Kevin Wolf -Message-Id: <20200326153628.4869-3-kwolf@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit ce8cabbd17cf738ddfc68384440c38e5dd2fdf97) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/mirror.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/block/mirror.c b/block/mirror.c -index 8959e42..5e5a521 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -102,6 +102,7 @@ struct MirrorOp { - - bool is_pseudo_op; - bool is_active_write; -+ bool is_in_flight; - CoQueue waiting_requests; - Coroutine *co; - -@@ -293,7 +294,9 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) - * caller of this function. Since there is only one pseudo op - * at any given time, we will always find some real operation - * to wait on. */ -- if (!op->is_pseudo_op && op->is_active_write == active) { -+ if (!op->is_pseudo_op && op->is_in_flight && -+ op->is_active_write == active) -+ { - qemu_co_queue_wait(&op->waiting_requests, NULL); - return; - } -@@ -367,6 +370,7 @@ static void coroutine_fn mirror_co_read(void *opaque) - /* Copy the dirty cluster. */ - s->in_flight++; - s->bytes_in_flight += op->bytes; -+ op->is_in_flight = true; - trace_mirror_one_iteration(s, op->offset, op->bytes); - - ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes, -@@ -382,6 +386,7 @@ static void coroutine_fn mirror_co_zero(void *opaque) - op->s->in_flight++; - op->s->bytes_in_flight += op->bytes; - *op->bytes_handled = op->bytes; -+ op->is_in_flight = true; - - ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes, - op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0); -@@ -396,6 +401,7 @@ static void coroutine_fn mirror_co_discard(void *opaque) - op->s->in_flight++; - op->s->bytes_in_flight += op->bytes; - *op->bytes_handled = op->bytes; -+ op->is_in_flight = true; - - ret = blk_co_pdiscard(op->s->target, op->offset, op->bytes); - mirror_write_complete(op, ret); -@@ -1306,6 +1312,7 @@ static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s, - .offset = offset, - .bytes = bytes, - .is_active_write = true, -+ .is_in_flight = true, - }; - qemu_co_queue_init(&op->waiting_requests); - QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); --- -1.8.3.1 - diff --git a/SOURCES/kvm-misc-Replace-zero-length-arrays-with-flexible-array-.patch b/SOURCES/kvm-misc-Replace-zero-length-arrays-with-flexible-array-.patch deleted file mode 100644 index eb4e9af..0000000 --- a/SOURCES/kvm-misc-Replace-zero-length-arrays-with-flexible-array-.patch +++ /dev/null @@ -1,255 +0,0 @@ -From 67878e1306f9ea6ccd30437327147c46de196a36 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Wed, 11 Nov 2020 12:03:13 -0500 -Subject: [PATCH 13/18] misc: Replace zero-length arrays with flexible array - member (manual) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20201111120316.707489-10-thuth@redhat.com> -Patchwork-id: 99506 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 09/12] misc: Replace zero-length arrays with flexible array member (manual) -Bugzilla: 1798506 -RH-Acked-by: Jens Freimann -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Philippe Mathieu-Daudé - -Description copied from Linux kernel commit from Gustavo A. R. Silva -(see [3]): - ---v-- description start --v-- - - The current codebase makes use of the zero-length array language - extension to the C90 standard, but the preferred mechanism to - declare variable-length types such as these ones is a flexible - array member [1], introduced in C99: - - struct foo { - int stuff; - struct boo array[]; - }; - - By making use of the mechanism above, we will get a compiler - warning in case the flexible array does not occur last in the - structure, which will help us prevent some kind of undefined - behavior bugs from being unadvertenly introduced [2] to the - Linux codebase from now on. - ---^-- description end --^-- - -Do the similar housekeeping in the QEMU codebase (which uses -C99 since commit 7be41675f7cb). - -All these instances of code were found with the help of the -following command (then manual analysis, without modifying -structures only having a single flexible array member, such -QEDTable in block/qed.h): - - git grep -F '[0];' - -[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html -[2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=76497732932f -[3] https://git.kernel.org/pub/scm/linux/kernel/git/gustavoars/linux.git/commit/?id=17642a2fbd2c1 - -Inspired-by: Gustavo A. R. Silva -Reviewed-by: David Hildenbrand -Signed-off-by: Philippe Mathieu-Daudé -Signed-off-by: Paolo Bonzini -(cherry picked from commit 880a7817c1a82a93d3f83dfb25dce1f0db629c66) -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - block/vmdk.c | 2 +- - docs/interop/vhost-user.rst | 4 ++-- - hw/char/sclpconsole-lm.c | 2 +- - hw/char/sclpconsole.c | 2 +- - hw/s390x/virtio-ccw.c | 2 +- - include/hw/acpi/acpi-defs.h | 4 ++-- - include/hw/boards.h | 2 +- - include/hw/s390x/event-facility.h | 2 +- - include/hw/s390x/sclp.h | 8 ++++---- - target/s390x/ioinst.c | 2 +- - 10 files changed, 15 insertions(+), 15 deletions(-) - -diff --git a/block/vmdk.c b/block/vmdk.c -index 1bd39917290..8ec18f35a53 100644 ---- a/block/vmdk.c -+++ b/block/vmdk.c -@@ -187,7 +187,7 @@ typedef struct VmdkMetaData { - typedef struct VmdkGrainMarker { - uint64_t lba; - uint32_t size; -- uint8_t data[0]; -+ uint8_t data[]; - } QEMU_PACKED VmdkGrainMarker; - - enum { -diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst -index 7827b710aa0..71b20ce83dd 100644 ---- a/docs/interop/vhost-user.rst -+++ b/docs/interop/vhost-user.rst -@@ -563,7 +563,7 @@ For split virtqueue, queue region can be implemented as: - uint16_t used_idx; - - /* Used to track the state of each descriptor in descriptor table */ -- DescStateSplit desc[0]; -+ DescStateSplit desc[]; - } QueueRegionSplit; - - To track inflight I/O, the queue region should be processed as follows: -@@ -685,7 +685,7 @@ For packed virtqueue, queue region can be implemented as: - uint8_t padding[7]; - - /* Used to track the state of each descriptor fetched from descriptor ring */ -- DescStatePacked desc[0]; -+ DescStatePacked desc[]; - } QueueRegionPacked; - - To track inflight I/O, the queue region should be processed as follows: -diff --git a/hw/char/sclpconsole-lm.c b/hw/char/sclpconsole-lm.c -index 392606259d5..a9a6f2b204c 100644 ---- a/hw/char/sclpconsole-lm.c -+++ b/hw/char/sclpconsole-lm.c -@@ -31,7 +31,7 @@ - typedef struct OprtnsCommand { - EventBufferHeader header; - MDMSU message_unit; -- char data[0]; -+ char data[]; - } QEMU_PACKED OprtnsCommand; - - /* max size for line-mode data in 4K SCCB page */ -diff --git a/hw/char/sclpconsole.c b/hw/char/sclpconsole.c -index da126f0133f..55697130a0a 100644 ---- a/hw/char/sclpconsole.c -+++ b/hw/char/sclpconsole.c -@@ -25,7 +25,7 @@ - - typedef struct ASCIIConsoleData { - EventBufferHeader ebh; -- char data[0]; -+ char data[]; - } QEMU_PACKED ASCIIConsoleData; - - /* max size for ASCII data in 4K SCCB page */ -diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c -index 6580ce5907d..aa2c75a49c6 100644 ---- a/hw/s390x/virtio-ccw.c -+++ b/hw/s390x/virtio-ccw.c -@@ -193,7 +193,7 @@ typedef struct VirtioThinintInfo { - typedef struct VirtioRevInfo { - uint16_t revision; - uint16_t length; -- uint8_t data[0]; -+ uint8_t data[]; - } QEMU_PACKED VirtioRevInfo; - - /* Specify where the virtqueues for the subchannel are in guest memory. */ -diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h -index 57a3f58b0c9..b80188b430f 100644 ---- a/include/hw/acpi/acpi-defs.h -+++ b/include/hw/acpi/acpi-defs.h -@@ -152,7 +152,7 @@ typedef struct AcpiSerialPortConsoleRedirection - */ - struct AcpiRsdtDescriptorRev1 { - ACPI_TABLE_HEADER_DEF /* ACPI common table header */ -- uint32_t table_offset_entry[0]; /* Array of pointers to other */ -+ uint32_t table_offset_entry[]; /* Array of pointers to other */ - /* ACPI tables */ - } QEMU_PACKED; - typedef struct AcpiRsdtDescriptorRev1 AcpiRsdtDescriptorRev1; -@@ -162,7 +162,7 @@ typedef struct AcpiRsdtDescriptorRev1 AcpiRsdtDescriptorRev1; - */ - struct AcpiXsdtDescriptorRev2 { - ACPI_TABLE_HEADER_DEF /* ACPI common table header */ -- uint64_t table_offset_entry[0]; /* Array of pointers to other */ -+ uint64_t table_offset_entry[]; /* Array of pointers to other */ - /* ACPI tables */ - } QEMU_PACKED; - typedef struct AcpiXsdtDescriptorRev2 AcpiXsdtDescriptorRev2; -diff --git a/include/hw/boards.h b/include/hw/boards.h -index 2920bdef5b4..a5e92f6c373 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -101,7 +101,7 @@ typedef struct CPUArchId { - */ - typedef struct { - int len; -- CPUArchId cpus[0]; -+ CPUArchId cpus[]; - } CPUArchIdList; - - /** -diff --git a/include/hw/s390x/event-facility.h b/include/hw/s390x/event-facility.h -index bdc32a3c091..700a610f33c 100644 ---- a/include/hw/s390x/event-facility.h -+++ b/include/hw/s390x/event-facility.h -@@ -122,7 +122,7 @@ typedef struct MDBO { - - typedef struct MDB { - MdbHeader header; -- MDBO mdbo[0]; -+ MDBO mdbo[]; - } QEMU_PACKED MDB; - - typedef struct SclpMsg { -diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h -index df2fa4169b0..62e2aa1d9f1 100644 ---- a/include/hw/s390x/sclp.h -+++ b/include/hw/s390x/sclp.h -@@ -133,7 +133,7 @@ typedef struct ReadInfo { - uint16_t highest_cpu; - uint8_t _reserved5[124 - 122]; /* 122-123 */ - uint32_t hmfai; -- struct CPUEntry entries[0]; -+ struct CPUEntry entries[]; - } QEMU_PACKED ReadInfo; - - typedef struct ReadCpuInfo { -@@ -143,7 +143,7 @@ typedef struct ReadCpuInfo { - uint16_t nr_standby; /* 12-13 */ - uint16_t offset_standby; /* 14-15 */ - uint8_t reserved0[24-16]; /* 16-23 */ -- struct CPUEntry entries[0]; -+ struct CPUEntry entries[]; - } QEMU_PACKED ReadCpuInfo; - - typedef struct ReadStorageElementInfo { -@@ -152,7 +152,7 @@ typedef struct ReadStorageElementInfo { - uint16_t assigned; - uint16_t standby; - uint8_t _reserved0[16 - 14]; /* 14-15 */ -- uint32_t entries[0]; -+ uint32_t entries[]; - } QEMU_PACKED ReadStorageElementInfo; - - typedef struct AttachStorageElement { -@@ -160,7 +160,7 @@ typedef struct AttachStorageElement { - uint8_t _reserved0[10 - 8]; /* 8-9 */ - uint16_t assigned; - uint8_t _reserved1[16 - 12]; /* 12-15 */ -- uint32_t entries[0]; -+ uint32_t entries[]; - } QEMU_PACKED AttachStorageElement; - - typedef struct AssignStorage { -diff --git a/target/s390x/ioinst.c b/target/s390x/ioinst.c -index b6be300cc48..a412926d278 100644 ---- a/target/s390x/ioinst.c -+++ b/target/s390x/ioinst.c -@@ -387,7 +387,7 @@ typedef struct ChscResp { - uint16_t len; - uint16_t code; - uint32_t param; -- char data[0]; -+ char data[]; - } QEMU_PACKED ChscResp; - - #define CHSC_MIN_RESP_LEN 0x0008 --- -2.27.0 - diff --git a/SOURCES/kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch b/SOURCES/kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch deleted file mode 100644 index bca0b4c..0000000 --- a/SOURCES/kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 78c7fb5afcb298631df47f6b71cf764f921c15f4 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:38 +0000 -Subject: [PATCH 06/18] multifd: Make sure that we don't do any IO after an - error - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-6-quintela@redhat.com> -Patchwork-id: 94118 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 05/10] multifd: Make sure that we don't do any IO after an error -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -(cherry picked from commit 3d4095b222d97393b1c2c6e514951ec7798f1c43) -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 22 +++++++++++++--------- - 1 file changed, 13 insertions(+), 9 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 6c55c5d..a0257ee 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -3440,7 +3440,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) - { - RAMState **temp = opaque; - RAMState *rs = *temp; -- int ret; -+ int ret = 0; - int i; - int64_t t0; - int done = 0; -@@ -3511,12 +3511,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) - ram_control_after_iterate(f, RAM_CONTROL_ROUND); - - out: -- multifd_send_sync_main(rs); -- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -- qemu_fflush(f); -- ram_counters.transferred += 8; -+ if (ret >= 0) { -+ multifd_send_sync_main(rs); -+ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -+ qemu_fflush(f); -+ ram_counters.transferred += 8; - -- ret = qemu_file_get_error(f); -+ ret = qemu_file_get_error(f); -+ } - if (ret < 0) { - return ret; - } -@@ -3568,9 +3570,11 @@ static int ram_save_complete(QEMUFile *f, void *opaque) - ram_control_after_iterate(f, RAM_CONTROL_FINISH); - } - -- multifd_send_sync_main(rs); -- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -- qemu_fflush(f); -+ if (ret >= 0) { -+ multifd_send_sync_main(rs); -+ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -+ qemu_fflush(f); -+ } - - return ret; - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-nbd-server-Avoid-long-error-message-assertions-CVE-2.patch b/SOURCES/kvm-nbd-server-Avoid-long-error-message-assertions-CVE-2.patch deleted file mode 100644 index 94d2c98..0000000 --- a/SOURCES/kvm-nbd-server-Avoid-long-error-message-assertions-CVE-2.patch +++ /dev/null @@ -1,161 +0,0 @@ -From f49ff2ed5675f1d0cddc404842e9d6e4e572d5a7 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Wed, 10 Jun 2020 18:32:01 -0400 -Subject: [PATCH 1/2] nbd/server: Avoid long error message assertions - CVE-2020-10761 - -RH-Author: Eric Blake -Message-id: <20200610183202.3780750-2-eblake@redhat.com> -Patchwork-id: 97494 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/2] nbd/server: Avoid long error message assertions CVE-2020-10761 -Bugzilla: 1845384 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -Ever since commit 36683283 (v2.8), the server code asserts that error -strings sent to the client are well-formed per the protocol by not -exceeding the maximum string length of 4096. At the time the server -first started sending error messages, the assertion could not be -triggered, because messages were completely under our control. -However, over the years, we have added latent scenarios where a client -could trigger the server to attempt an error message that would -include the client's information if it passed other checks first: - -- requesting NBD_OPT_INFO/GO on an export name that is not present - (commit 0cfae925 in v2.12 echoes the name) - -- requesting NBD_OPT_LIST/SET_META_CONTEXT on an export name that is - not present (commit e7b1948d in v2.12 echoes the name) - -At the time, those were still safe because we flagged names larger -than 256 bytes with a different message; but that changed in commit -93676c88 (v4.2) when we raised the name limit to 4096 to match the NBD -string limit. (That commit also failed to change the magic number -4096 in nbd_negotiate_send_rep_err to the just-introduced named -constant.) So with that commit, long client names appended to server -text can now trigger the assertion, and thus be used as a denial of -service attack against a server. As a mitigating factor, if the -server requires TLS, the client cannot trigger the problematic paths -unless it first supplies TLS credentials, and such trusted clients are -less likely to try to intentionally crash the server. - -We may later want to further sanitize the user-supplied strings we -place into our error messages, such as scrubbing out control -characters, but that is less important to the CVE fix, so it can be a -later patch to the new nbd_sanitize_name. - -Consideration was given to changing the assertion in -nbd_negotiate_send_rep_verr to instead merely log a server error and -truncate the message, to avoid leaving a latent path that could -trigger a future CVE DoS on any new error message. However, this -merely complicates the code for something that is already (correctly) -flagging coding errors, and now that we are aware of the long message -pitfall, we are less likely to introduce such errors in the future, -which would make such error handling dead code. - -Reported-by: Xueqiang Wei -CC: qemu-stable@nongnu.org -Fixes: https://bugzilla.redhat.com/1843684 CVE-2020-10761 -Fixes: 93676c88d7 -Signed-off-by: Eric Blake -Message-Id: <20200610163741.3745251-2-eblake@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 5c4fe018c025740fef4a0a4421e8162db0c3eefd) -Signed-off-by: Eric Blake -Signed-off-by: Eduardo Lima (Etrunko) ---- - nbd/server.c | 23 ++++++++++++++++++++--- - tests/qemu-iotests/143 | 4 ++++ - tests/qemu-iotests/143.out | 2 ++ - 3 files changed, 26 insertions(+), 3 deletions(-) - -diff --git a/nbd/server.c b/nbd/server.c -index 24ebc1a805..d5b9df092c 100644 ---- a/nbd/server.c -+++ b/nbd/server.c -@@ -217,7 +217,7 @@ nbd_negotiate_send_rep_verr(NBDClient *client, uint32_t type, - - msg = g_strdup_vprintf(fmt, va); - len = strlen(msg); -- assert(len < 4096); -+ assert(len < NBD_MAX_STRING_SIZE); - trace_nbd_negotiate_send_rep_err(msg); - ret = nbd_negotiate_send_rep_len(client, type, len, errp); - if (ret < 0) { -@@ -231,6 +231,19 @@ nbd_negotiate_send_rep_verr(NBDClient *client, uint32_t type, - return 0; - } - -+/* -+ * Return a malloc'd copy of @name suitable for use in an error reply. -+ */ -+static char * -+nbd_sanitize_name(const char *name) -+{ -+ if (strnlen(name, 80) < 80) { -+ return g_strdup(name); -+ } -+ /* XXX Should we also try to sanitize any control characters? */ -+ return g_strdup_printf("%.80s...", name); -+} -+ - /* Send an error reply. - * Return -errno on error, 0 on success. */ - static int GCC_FMT_ATTR(4, 5) -@@ -595,9 +608,11 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp) - - exp = nbd_export_find(name); - if (!exp) { -+ g_autofree char *sane_name = nbd_sanitize_name(name); -+ - return nbd_negotiate_send_rep_err(client, NBD_REP_ERR_UNKNOWN, - errp, "export '%s' not present", -- name); -+ sane_name); - } - - /* Don't bother sending NBD_INFO_NAME unless client requested it */ -@@ -995,8 +1010,10 @@ static int nbd_negotiate_meta_queries(NBDClient *client, - - meta->exp = nbd_export_find(export_name); - if (meta->exp == NULL) { -+ g_autofree char *sane_name = nbd_sanitize_name(export_name); -+ - return nbd_opt_drop(client, NBD_REP_ERR_UNKNOWN, errp, -- "export '%s' not present", export_name); -+ "export '%s' not present", sane_name); - } - - ret = nbd_opt_read(client, &nb_queries, sizeof(nb_queries), errp); -diff --git a/tests/qemu-iotests/143 b/tests/qemu-iotests/143 -index f649b36195..d2349903b1 100755 ---- a/tests/qemu-iotests/143 -+++ b/tests/qemu-iotests/143 -@@ -58,6 +58,10 @@ _send_qemu_cmd $QEMU_HANDLE \ - $QEMU_IO_PROG -f raw -c quit \ - "nbd+unix:///no_such_export?socket=$SOCK_DIR/nbd" 2>&1 \ - | _filter_qemu_io | _filter_nbd -+# Likewise, with longest possible name permitted in NBD protocol -+$QEMU_IO_PROG -f raw -c quit \ -+ "nbd+unix:///$(printf %4096d 1 | tr ' ' a)?socket=$SOCK_DIR/nbd" 2>&1 \ -+ | _filter_qemu_io | _filter_nbd | sed 's/aaaa*aa/aa--aa/' - - _send_qemu_cmd $QEMU_HANDLE \ - "{ 'execute': 'quit' }" \ -diff --git a/tests/qemu-iotests/143.out b/tests/qemu-iotests/143.out -index 1f4001c601..fc9c0a761f 100644 ---- a/tests/qemu-iotests/143.out -+++ b/tests/qemu-iotests/143.out -@@ -5,6 +5,8 @@ QA output created by 143 - {"return": {}} - qemu-io: can't open device nbd+unix:///no_such_export?socket=SOCK_DIR/nbd: Requested export not available - server reported: export 'no_such_export' not present -+qemu-io: can't open device nbd+unix:///aa--aa1?socket=SOCK_DIR/nbd: Requested export not available -+server reported: export 'aa--aa...' not present - { 'execute': 'quit' } - {"return": {}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} --- -2.27.0 - diff --git a/SOURCES/kvm-net-check-if-the-file-descriptor-is-valid-before-usi.patch b/SOURCES/kvm-net-check-if-the-file-descriptor-is-valid-before-usi.patch deleted file mode 100644 index 654a64f..0000000 --- a/SOURCES/kvm-net-check-if-the-file-descriptor-is-valid-before-usi.patch +++ /dev/null @@ -1,301 +0,0 @@ -From 512c7e92808dff66779f7421f1c17a081f18d7e6 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 29 Jul 2021 04:56:46 -0400 -Subject: [PATCH 13/14] net: check if the file descriptor is valid before using - it -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -Message-id: <20210726102337.6359-2-lvivier@redhat.com> -Patchwork-id: 101924 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 1/2] net: check if the file descriptor is valid before using it -Bugzilla: 1982134 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Philippe Mathieu-Daudé - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1982134 -BRANCH: rhel-8.5.0 -UPSTREAM: Merged -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=38380653 - -qemu_set_nonblock() checks that the file descriptor can be used and, if -not, crashes QEMU. An assert() is used for that. The use of assert() is -used to detect programming error and the coredump will allow to debug -the problem. - -But in the case of the tap device, this assert() can be triggered by -a misconfiguration by the user. At startup, it's not a real problem, but it -can also happen during the hot-plug of a new device, and here it's a -problem because we can crash a perfectly healthy system. - -For instance: - # ip link add link virbr0 name macvtap0 type macvtap mode bridge - # ip link set macvtap0 up - # TAP=/dev/tap$(ip -o link show macvtap0 | cut -d: -f1) - # qemu-system-x86_64 -machine q35 -device pcie-root-port,id=pcie-root-port-0 -monitor stdio 9<> $TAP - (qemu) netdev_add type=tap,id=hostnet0,vhost=on,fd=9 - (qemu) device_add driver=virtio-net-pci,netdev=hostnet0,id=net0,bus=pcie-root-port-0 - (qemu) device_del net0 - (qemu) netdev_del hostnet0 - (qemu) netdev_add type=tap,id=hostnet1,vhost=on,fd=9 - qemu-system-x86_64: .../util/oslib-posix.c:247: qemu_set_nonblock: Assertion `f != -1' failed. - Aborted (core dumped) - -To avoid that, add a function, qemu_try_set_nonblock(), that allows to report the -problem without crashing. - -In the same way, we also update the function for vhostfd in net_init_tap_one() and -for fd in net_init_socket() (both descriptors are provided by the user and can -be wrong). - -Signed-off-by: Laurent Vivier -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Jason Wang -(cherry picked from commit 894022e616016fe81745753f14adfbd680a1c7ee) -Signed-off-by: Laurent Vivier -Signed-off-by: Miroslav Rezanina ---- - include/qemu/sockets.h | 1 + - net/socket.c | 9 +++++-- - net/tap.c | 25 +++++++++++++++--- - util/oslib-posix.c | 26 +++++++++++++------ - util/oslib-win32.c | 57 ++++++++++++++++++++++++------------------ - 5 files changed, 79 insertions(+), 39 deletions(-) - -diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h -index 57cd049d6e..7d1f813576 100644 ---- a/include/qemu/sockets.h -+++ b/include/qemu/sockets.h -@@ -18,6 +18,7 @@ int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen); - int socket_set_cork(int fd, int v); - int socket_set_nodelay(int fd); - void qemu_set_block(int fd); -+int qemu_try_set_nonblock(int fd); - void qemu_set_nonblock(int fd); - int socket_set_fast_reuse(int fd); - -diff --git a/net/socket.c b/net/socket.c -index c92354049b..2d21fddd9c 100644 ---- a/net/socket.c -+++ b/net/socket.c -@@ -725,13 +725,18 @@ int net_init_socket(const Netdev *netdev, const char *name, - } - - if (sock->has_fd) { -- int fd; -+ int fd, ret; - - fd = monitor_fd_param(cur_mon, sock->fd, errp); - if (fd == -1) { - return -1; - } -- qemu_set_nonblock(fd); -+ ret = qemu_try_set_nonblock(fd); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", -+ name, fd); -+ return -1; -+ } - if (!net_socket_fd_init(peer, "socket", name, fd, 1, sock->mcast, - errp)) { - return -1; -diff --git a/net/tap.c b/net/tap.c -index 6207f61f84..41a20102fd 100644 ---- a/net/tap.c -+++ b/net/tap.c -@@ -689,6 +689,8 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, - } - - if (vhostfdname) { -+ int ret; -+ - vhostfd = monitor_fd_param(cur_mon, vhostfdname, &err); - if (vhostfd == -1) { - if (tap->has_vhostforce && tap->vhostforce) { -@@ -698,7 +700,12 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, - } - return; - } -- qemu_set_nonblock(vhostfd); -+ ret = qemu_try_set_nonblock(vhostfd); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", -+ name, fd); -+ return; -+ } - } else { - vhostfd = open("/dev/vhost-net", O_RDWR); - if (vhostfd < 0) { -@@ -766,6 +773,7 @@ int net_init_tap(const Netdev *netdev, const char *name, - Error *err = NULL; - const char *vhostfdname; - char ifname[128]; -+ int ret = 0; - - assert(netdev->type == NET_CLIENT_DRIVER_TAP); - tap = &netdev->u.tap; -@@ -795,7 +803,12 @@ int net_init_tap(const Netdev *netdev, const char *name, - return -1; - } - -- qemu_set_nonblock(fd); -+ ret = qemu_try_set_nonblock(fd); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", -+ name, fd); -+ return -1; -+ } - - vnet_hdr = tap_probe_vnet_hdr(fd); - -@@ -810,7 +823,6 @@ int net_init_tap(const Netdev *netdev, const char *name, - char **fds; - char **vhost_fds; - int nfds = 0, nvhosts = 0; -- int ret = 0; - - if (tap->has_ifname || tap->has_script || tap->has_downscript || - tap->has_vnet_hdr || tap->has_helper || tap->has_queues || -@@ -843,7 +855,12 @@ int net_init_tap(const Netdev *netdev, const char *name, - goto free_fail; - } - -- qemu_set_nonblock(fd); -+ ret = qemu_try_set_nonblock(fd); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", -+ name, fd); -+ goto free_fail; -+ } - - if (i == 0) { - vnet_hdr = tap_probe_vnet_hdr(fd); -diff --git a/util/oslib-posix.c b/util/oslib-posix.c -index 8f88e4dbe1..db70416dbb 100644 ---- a/util/oslib-posix.c -+++ b/util/oslib-posix.c -@@ -240,25 +240,35 @@ void qemu_set_block(int fd) - assert(f != -1); - } - --void qemu_set_nonblock(int fd) -+int qemu_try_set_nonblock(int fd) - { - int f; - f = fcntl(fd, F_GETFL); -- assert(f != -1); -- f = fcntl(fd, F_SETFL, f | O_NONBLOCK); --#ifdef __OpenBSD__ - if (f == -1) { -+ return -errno; -+ } -+ if (fcntl(fd, F_SETFL, f | O_NONBLOCK) == -1) { -+#ifdef __OpenBSD__ - /* - * Previous to OpenBSD 6.3, fcntl(F_SETFL) is not permitted on - * memory devices and sets errno to ENODEV. - * It's OK if we fail to set O_NONBLOCK on devices like /dev/null, - * because they will never block anyway. - */ -- assert(errno == ENODEV); -- } --#else -- assert(f != -1); -+ if (errno == ENODEV) { -+ return 0; -+ } - #endif -+ return -errno; -+ } -+ return 0; -+} -+ -+void qemu_set_nonblock(int fd) -+{ -+ int f; -+ f = qemu_try_set_nonblock(fd); -+ assert(f == 0); - } - - int socket_set_fast_reuse(int fd) -diff --git a/util/oslib-win32.c b/util/oslib-win32.c -index 3b49d27297..7eedbe5859 100644 ---- a/util/oslib-win32.c -+++ b/util/oslib-win32.c -@@ -132,31 +132,6 @@ struct tm *localtime_r(const time_t *timep, struct tm *result) - } - #endif /* CONFIG_LOCALTIME_R */ - --void qemu_set_block(int fd) --{ -- unsigned long opt = 0; -- WSAEventSelect(fd, NULL, 0); -- ioctlsocket(fd, FIONBIO, &opt); --} -- --void qemu_set_nonblock(int fd) --{ -- unsigned long opt = 1; -- ioctlsocket(fd, FIONBIO, &opt); -- qemu_fd_register(fd); --} -- --int socket_set_fast_reuse(int fd) --{ -- /* Enabling the reuse of an endpoint that was used by a socket still in -- * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows -- * fast reuse is the default and SO_REUSEADDR does strange things. So we -- * don't have to do anything here. More info can be found at: -- * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */ -- return 0; --} -- -- - static int socket_error(void) - { - switch (WSAGetLastError()) { -@@ -233,6 +208,38 @@ static int socket_error(void) - } - } - -+void qemu_set_block(int fd) -+{ -+ unsigned long opt = 0; -+ WSAEventSelect(fd, NULL, 0); -+ ioctlsocket(fd, FIONBIO, &opt); -+} -+ -+int qemu_try_set_nonblock(int fd) -+{ -+ unsigned long opt = 1; -+ if (ioctlsocket(fd, FIONBIO, &opt) != NO_ERROR) { -+ return -socket_error(); -+ } -+ qemu_fd_register(fd); -+ return 0; -+} -+ -+void qemu_set_nonblock(int fd) -+{ -+ (void)qemu_try_set_nonblock(fd); -+} -+ -+int socket_set_fast_reuse(int fd) -+{ -+ /* Enabling the reuse of an endpoint that was used by a socket still in -+ * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows -+ * fast reuse is the default and SO_REUSEADDR does strange things. So we -+ * don't have to do anything here. More info can be found at: -+ * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */ -+ return 0; -+} -+ - int inet_aton(const char *cp, struct in_addr *ia) - { - uint32_t addr = inet_addr(cp); --- -2.27.0 - diff --git a/SOURCES/kvm-net-detect-errors-from-probing-vnet-hdr-flag-for-TAP.patch b/SOURCES/kvm-net-detect-errors-from-probing-vnet-hdr-flag-for-TAP.patch deleted file mode 100644 index 8718c71..0000000 --- a/SOURCES/kvm-net-detect-errors-from-probing-vnet-hdr-flag-for-TAP.patch +++ /dev/null @@ -1,221 +0,0 @@ -From 3475ea6598896edb689ca8ba6fb81781e2517b6f Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 29 Jul 2021 04:56:49 -0400 -Subject: [PATCH 14/14] net: detect errors from probing vnet hdr flag for TAP - devices -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -Message-id: <20210726102337.6359-3-lvivier@redhat.com> -Patchwork-id: 101923 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 2/2] net: detect errors from probing vnet hdr flag for TAP devices -Bugzilla: 1982134 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Philippe Mathieu-Daudé - -From: "Daniel P. Berrange" - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1982134 -BRANCH: rhel-8.5.0 -UPSTREAM: Merged -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=38380653 - -When QEMU sets up a tap based network device backend, it mostly ignores errors -reported from various ioctl() calls it makes, assuming the TAP file descriptor -is valid. This assumption can easily be violated when the user is passing in a -pre-opened file descriptor. At best, the ioctls may fail with a -EBADF, but if -the user passes in a bogus FD number that happens to clash with a FD number that -QEMU has opened internally for another reason, a wide variety of errnos may -result, as the TUNGETIFF ioctl number may map to a completely different command -on a different type of file. - -By ignoring all these errors, QEMU sets up a zombie network backend that will -never pass any data. Even worse, when QEMU shuts down, or that network backend -is hot-removed, it will close this bogus file descriptor, which could belong to -another QEMU device backend. - -There's no obvious guaranteed reliable way to detect that a FD genuinely is a -TAP device, as opposed to a UNIX socket, or pipe, or something else. Checking -the errno from probing vnet hdr flag though, does catch the big common cases. -ie calling TUNGETIFF will return EBADF for an invalid FD, and ENOTTY when FD is -a UNIX socket, or pipe which catches accidental collisions with FDs used for -stdio, or monitor socket. - -Previously the example below where bogus fd 9 collides with the FD used for the -chardev saw: - -$ ./x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hostnet0,fd=9 \ - -chardev socket,id=charchannel0,path=/tmp/qga,server,nowait \ - -monitor stdio -vnc :0 -qemu-system-x86_64: -netdev tap,id=hostnet0,fd=9: TUNGETIFF ioctl() failed: Inappropriate ioctl for device -TUNSETOFFLOAD ioctl() failed: Bad address -QEMU 2.9.1 monitor - type 'help' for more information -(qemu) Warning: netdev hostnet0 has no peer - -which gives a running QEMU with a zombie network backend. - -With this change applied we get an error message and QEMU immediately exits -before carrying on and making a bigger disaster: - -$ ./x86_64-softmmu/qemu-system-x86_64 -netdev tap,id=hostnet0,fd=9 \ - -chardev socket,id=charchannel0,path=/tmp/qga,server,nowait \ - -monitor stdio -vnc :0 -qemu-system-x86_64: -netdev tap,id=hostnet0,vhost=on,fd=9: Unable to query TUNGETIFF on FD 9: Inappropriate ioctl for device - -Reported-by: Dr. David Alan Gilbert -Signed-off-by: Daniel P. Berrange -Tested-by: Dr. David Alan Gilbert -Message-id: 20171027085548.3472-1-berrange@redhat.com -[lv: to simplify, don't check on EINVAL with TUNGETIFF as it exists since v2.6.27] -Signed-off-by: Laurent Vivier -Signed-off-by: Jason Wang -(cherry picked from commit e7b347d0bf640adb1c998d317eaf44d2d7cbd973) -Signed-off-by: Laurent Vivier -Signed-off-by: Miroslav Rezanina ---- - net/tap-bsd.c | 2 +- - net/tap-linux.c | 8 +++++--- - net/tap-solaris.c | 2 +- - net/tap-stub.c | 2 +- - net/tap.c | 25 ++++++++++++++++++++----- - net/tap_int.h | 2 +- - 6 files changed, 29 insertions(+), 12 deletions(-) - -diff --git a/net/tap-bsd.c b/net/tap-bsd.c -index a5c3707f80..77aaf674b1 100644 ---- a/net/tap-bsd.c -+++ b/net/tap-bsd.c -@@ -211,7 +211,7 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp) - { - } - --int tap_probe_vnet_hdr(int fd) -+int tap_probe_vnet_hdr(int fd, Error **errp) - { - return 0; - } -diff --git a/net/tap-linux.c b/net/tap-linux.c -index e0dd442ee3..b0635e9e32 100644 ---- a/net/tap-linux.c -+++ b/net/tap-linux.c -@@ -147,13 +147,15 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp) - } - } - --int tap_probe_vnet_hdr(int fd) -+int tap_probe_vnet_hdr(int fd, Error **errp) - { - struct ifreq ifr; - - if (ioctl(fd, TUNGETIFF, &ifr) != 0) { -- error_report("TUNGETIFF ioctl() failed: %s", strerror(errno)); -- return 0; -+ /* TUNGETIFF is available since kernel v2.6.27 */ -+ error_setg_errno(errp, errno, -+ "Unable to query TUNGETIFF on FD %d", fd); -+ return -1; - } - - return ifr.ifr_flags & IFF_VNET_HDR; -diff --git a/net/tap-solaris.c b/net/tap-solaris.c -index 4725d2314e..ae2ba68284 100644 ---- a/net/tap-solaris.c -+++ b/net/tap-solaris.c -@@ -206,7 +206,7 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp) - { - } - --int tap_probe_vnet_hdr(int fd) -+int tap_probe_vnet_hdr(int fd, Error **errp) - { - return 0; - } -diff --git a/net/tap-stub.c b/net/tap-stub.c -index a9ab8f8293..de525a2e69 100644 ---- a/net/tap-stub.c -+++ b/net/tap-stub.c -@@ -37,7 +37,7 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp) - { - } - --int tap_probe_vnet_hdr(int fd) -+int tap_probe_vnet_hdr(int fd, Error **errp) - { - return 0; - } -diff --git a/net/tap.c b/net/tap.c -index 41a20102fd..b37ccae00c 100644 ---- a/net/tap.c -+++ b/net/tap.c -@@ -597,7 +597,11 @@ int net_init_bridge(const Netdev *netdev, const char *name, - } - - qemu_set_nonblock(fd); -- vnet_hdr = tap_probe_vnet_hdr(fd); -+ vnet_hdr = tap_probe_vnet_hdr(fd, errp); -+ if (vnet_hdr < 0) { -+ close(fd); -+ return -1; -+ } - s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr); - - snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper, -@@ -810,7 +814,11 @@ int net_init_tap(const Netdev *netdev, const char *name, - return -1; - } - -- vnet_hdr = tap_probe_vnet_hdr(fd); -+ vnet_hdr = tap_probe_vnet_hdr(fd, errp); -+ if (vnet_hdr < 0) { -+ close(fd); -+ return -1; -+ } - - net_init_tap_one(tap, peer, "tap", name, NULL, - script, downscript, -@@ -863,8 +871,11 @@ int net_init_tap(const Netdev *netdev, const char *name, - } - - if (i == 0) { -- vnet_hdr = tap_probe_vnet_hdr(fd); -- } else if (vnet_hdr != tap_probe_vnet_hdr(fd)) { -+ vnet_hdr = tap_probe_vnet_hdr(fd, errp); -+ if (vnet_hdr < 0) { -+ goto free_fail; -+ } -+ } else if (vnet_hdr != tap_probe_vnet_hdr(fd, NULL)) { - error_setg(errp, - "vnet_hdr not consistent across given tap fds"); - ret = -1; -@@ -909,7 +920,11 @@ free_fail: - } - - qemu_set_nonblock(fd); -- vnet_hdr = tap_probe_vnet_hdr(fd); -+ vnet_hdr = tap_probe_vnet_hdr(fd, errp); -+ if (vnet_hdr < 0) { -+ close(fd); -+ return -1; -+ } - - net_init_tap_one(tap, peer, "bridge", name, ifname, - script, downscript, vhostfdname, -diff --git a/net/tap_int.h b/net/tap_int.h -index e3194b23f4..225a49ea48 100644 ---- a/net/tap_int.h -+++ b/net/tap_int.h -@@ -34,7 +34,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, - ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen); - - void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp); --int tap_probe_vnet_hdr(int fd); -+int tap_probe_vnet_hdr(int fd, Error **errp); - int tap_probe_vnet_hdr_len(int fd, int len); - int tap_probe_has_ufo(int fd); - void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo); --- -2.27.0 - diff --git a/SOURCES/kvm-net-forbid-the-reentrant-RX.patch b/SOURCES/kvm-net-forbid-the-reentrant-RX.patch deleted file mode 100644 index aaf57ed..0000000 --- a/SOURCES/kvm-net-forbid-the-reentrant-RX.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 1e01e2f96fd5e903394eab59365d5363394c8b18 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 13 Apr 2021 18:59:12 -0400 -Subject: [PATCH 3/5] net: forbid the reentrant RX - -RH-Author: Jon Maloy -Message-id: <20210413185912.3811035-2-jmaloy@redhat.com> -Patchwork-id: 101467 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 1/1] net: forbid the reentrant RX -Bugzilla: 1859175 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth -RH-Acked-by: Xiao Wang - -From: Jason Wang - -The memory API allows DMA into NIC's MMIO area. This means the NIC's -RX routine must be reentrant. Instead of auditing all the NIC, we can -simply detect the reentrancy and return early. The queue->delivering -is set and cleared by qemu_net_queue_deliver() for other queue helpers -to know whether the delivering in on going (NIC's receive is being -called). We can check it and return early in qemu_net_queue_flush() to -forbid reentrant RX. - -Signed-off-by: Jason Wang - -(cherry picked from commit 22dc8663d9fc7baa22100544c600b6285a63c7a3) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - net/queue.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/net/queue.c b/net/queue.c -index 61276ca4be..c679d79f4b 100644 ---- a/net/queue.c -+++ b/net/queue.c -@@ -250,6 +250,9 @@ void qemu_net_queue_purge(NetQueue *queue, NetClientState *from) - - bool qemu_net_queue_flush(NetQueue *queue) - { -+ if (queue->delivering) -+ return false; -+ - while (!QTAILQ_EMPTY(&queue->packets)) { - NetPacket *packet; - int ret; --- -2.27.0 - diff --git a/SOURCES/kvm-net-introduce-qemu_receive_packet.patch b/SOURCES/kvm-net-introduce-qemu_receive_packet.patch deleted file mode 100644 index 8de8cae..0000000 --- a/SOURCES/kvm-net-introduce-qemu_receive_packet.patch +++ /dev/null @@ -1,187 +0,0 @@ -From 89732bf03b26daaebbd3e6e031e79459ae3f77e1 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 29 Jun 2021 03:42:39 -0400 -Subject: [PATCH 1/9] net: introduce qemu_receive_packet() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20210629034247.3286477-2-jmaloy@redhat.com> -Patchwork-id: 101785 -O-Subject: [RHEL-8.4.0.z qemu-kvm PATCH v2 1/9] net: introduce qemu_receive_packet() -Bugzilla: 1932917 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth - -From: Jason Wang - -Some NIC supports loopback mode and this is done by calling -nc->info->receive() directly which in fact suppresses the effort of -reentrancy check that is done in qemu_net_queue_send(). - -Unfortunately we can't use qemu_net_queue_send() here since for -loopback there's no sender as peer, so this patch introduce a -qemu_receive_packet() which is used for implementing loopback mode -for a NIC with this check. - -NIC that supports loopback mode will be converted to this helper. - -This is intended to address CVE-2021-3416. - -Cc: Prasad J Pandit -Reviewed-by: Philippe Mathieu-Daudé -Cc: qemu-stable@nongnu.org -Signed-off-by: Jason Wang - -(cherry picked from commit 705df5466c98f3efdd2b68d3b31dad86858acad7) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - include/net/net.h | 5 +++++ - include/net/queue.h | 8 ++++++++ - net/net.c | 38 +++++++++++++++++++++++++++++++------- - net/queue.c | 22 ++++++++++++++++++++++ - 4 files changed, 66 insertions(+), 7 deletions(-) - -diff --git a/include/net/net.h b/include/net/net.h -index e175ba9677..1b32a8aaec 100644 ---- a/include/net/net.h -+++ b/include/net/net.h -@@ -142,12 +142,17 @@ void *qemu_get_nic_opaque(NetClientState *nc); - void qemu_del_net_client(NetClientState *nc); - typedef void (*qemu_nic_foreach)(NICState *nic, void *opaque); - void qemu_foreach_nic(qemu_nic_foreach func, void *opaque); -+int qemu_can_receive_packet(NetClientState *nc); - int qemu_can_send_packet(NetClientState *nc); - ssize_t qemu_sendv_packet(NetClientState *nc, const struct iovec *iov, - int iovcnt); - ssize_t qemu_sendv_packet_async(NetClientState *nc, const struct iovec *iov, - int iovcnt, NetPacketSent *sent_cb); - ssize_t qemu_send_packet(NetClientState *nc, const uint8_t *buf, int size); -+ssize_t qemu_receive_packet(NetClientState *nc, const uint8_t *buf, int size); -+ssize_t qemu_receive_packet_iov(NetClientState *nc, -+ const struct iovec *iov, -+ int iovcnt); - ssize_t qemu_send_packet_raw(NetClientState *nc, const uint8_t *buf, int size); - ssize_t qemu_send_packet_async(NetClientState *nc, const uint8_t *buf, - int size, NetPacketSent *sent_cb); -diff --git a/include/net/queue.h b/include/net/queue.h -index c0269bb1dc..9f2f289d77 100644 ---- a/include/net/queue.h -+++ b/include/net/queue.h -@@ -55,6 +55,14 @@ void qemu_net_queue_append_iov(NetQueue *queue, - - void qemu_del_net_queue(NetQueue *queue); - -+ssize_t qemu_net_queue_receive(NetQueue *queue, -+ const uint8_t *data, -+ size_t size); -+ -+ssize_t qemu_net_queue_receive_iov(NetQueue *queue, -+ const struct iovec *iov, -+ int iovcnt); -+ - ssize_t qemu_net_queue_send(NetQueue *queue, - NetClientState *sender, - unsigned flags, -diff --git a/net/net.c b/net/net.c -index 84aa6d8d00..d0b651ca95 100644 ---- a/net/net.c -+++ b/net/net.c -@@ -516,6 +516,17 @@ int qemu_set_vnet_be(NetClientState *nc, bool is_be) - #endif - } - -+int qemu_can_receive_packet(NetClientState *nc) -+{ -+ if (nc->receive_disabled) { -+ return 0; -+ } else if (nc->info->can_receive && -+ !nc->info->can_receive(nc)) { -+ return 0; -+ } -+ return 1; -+} -+ - int qemu_can_send_packet(NetClientState *sender) - { - int vm_running = runstate_is_running(); -@@ -528,13 +539,7 @@ int qemu_can_send_packet(NetClientState *sender) - return 1; - } - -- if (sender->peer->receive_disabled) { -- return 0; -- } else if (sender->peer->info->can_receive && -- !sender->peer->info->can_receive(sender->peer)) { -- return 0; -- } -- return 1; -+ return qemu_can_receive_packet(sender->peer); - } - - static ssize_t filter_receive_iov(NetClientState *nc, -@@ -667,6 +672,25 @@ ssize_t qemu_send_packet(NetClientState *nc, const uint8_t *buf, int size) - return qemu_send_packet_async(nc, buf, size, NULL); - } - -+ssize_t qemu_receive_packet(NetClientState *nc, const uint8_t *buf, int size) -+{ -+ if (!qemu_can_receive_packet(nc)) { -+ return 0; -+ } -+ -+ return qemu_net_queue_receive(nc->incoming_queue, buf, size); -+} -+ -+ssize_t qemu_receive_packet_iov(NetClientState *nc, const struct iovec *iov, -+ int iovcnt) -+{ -+ if (!qemu_can_receive_packet(nc)) { -+ return 0; -+ } -+ -+ return qemu_net_queue_receive_iov(nc->incoming_queue, iov, iovcnt); -+} -+ - ssize_t qemu_send_packet_raw(NetClientState *nc, const uint8_t *buf, int size) - { - return qemu_send_packet_async_with_flags(nc, QEMU_NET_PACKET_FLAG_RAW, -diff --git a/net/queue.c b/net/queue.c -index c679d79f4b..5f0f9ffcaf 100644 ---- a/net/queue.c -+++ b/net/queue.c -@@ -182,6 +182,28 @@ static ssize_t qemu_net_queue_deliver_iov(NetQueue *queue, - return ret; - } - -+ssize_t qemu_net_queue_receive(NetQueue *queue, -+ const uint8_t *data, -+ size_t size) -+{ -+ if (queue->delivering) { -+ return 0; -+ } -+ -+ return qemu_net_queue_deliver(queue, NULL, 0, data, size); -+} -+ -+ssize_t qemu_net_queue_receive_iov(NetQueue *queue, -+ const struct iovec *iov, -+ int iovcnt) -+{ -+ if (queue->delivering) { -+ return 0; -+ } -+ -+ return qemu_net_queue_deliver_iov(queue, NULL, 0, iov, iovcnt); -+} -+ - ssize_t qemu_net_queue_send(NetQueue *queue, - NetClientState *sender, - unsigned flags, --- -2.27.0 - diff --git a/SOURCES/kvm-net-remove-an-assert-call-in-eth_get_gso_type.patch b/SOURCES/kvm-net-remove-an-assert-call-in-eth_get_gso_type.patch deleted file mode 100644 index b619e78..0000000 --- a/SOURCES/kvm-net-remove-an-assert-call-in-eth_get_gso_type.patch +++ /dev/null @@ -1,59 +0,0 @@ -From b7de63e72c479df42c324c058a487517210fa069 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 13 Apr 2021 19:21:50 -0400 -Subject: [PATCH 1/5] net: remove an assert call in eth_get_gso_type - -RH-Author: Jon Maloy -Message-id: <20210413192150.3817133-2-jmaloy@redhat.com> -Patchwork-id: 101469 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 1/1] net: remove an assert call in eth_get_gso_type -Bugzilla: 1892350 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Xiao Wang - -From: Prasad J Pandit - -eth_get_gso_type() routine returns segmentation offload type based on -L3 protocol type. It calls g_assert_not_reached if L3 protocol is -unknown, making the following return statement unreachable. Remove the -g_assert call, it maybe triggered by a guest user. - -Reported-by: Gaoning Pan -Signed-off-by: Prasad J Pandit -Signed-off-by: Jason Wang - -(cherry picked from commit 7564bf7701f00214cdc8a678a9f7df765244def1) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - net/eth.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/net/eth.c b/net/eth.c -index 0c1d413ee2..1e0821c5f8 100644 ---- a/net/eth.c -+++ b/net/eth.c -@@ -16,6 +16,7 @@ - */ - - #include "qemu/osdep.h" -+#include "qemu/log.h" - #include "net/eth.h" - #include "net/checksum.h" - #include "net/tap.h" -@@ -71,9 +72,8 @@ eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto) - return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state; - } - } -- -- /* Unsupported offload */ -- g_assert_not_reached(); -+ qemu_log_mask(LOG_UNIMP, "%s: probably not GSO frame, " -+ "unknown L3 protocol: 0x%04"PRIx16"\n", __func__, l3_proto); - - return VIRTIO_NET_HDR_GSO_NONE | ecn_state; - } --- -2.27.0 - diff --git a/SOURCES/kvm-numa-Enable-numa-for-SGX-EPC-sections.patch b/SOURCES/kvm-numa-Enable-numa-for-SGX-EPC-sections.patch new file mode 100644 index 0000000..68f7647 --- /dev/null +++ b/SOURCES/kvm-numa-Enable-numa-for-SGX-EPC-sections.patch @@ -0,0 +1,287 @@ +From 35bf6693fb5bba5a9d5fdf4a7fdac06ce574b83d Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Mon, 1 Nov 2021 12:20:05 -0400 +Subject: [PATCH 1/7] numa: Enable numa for SGX EPC sections + +RH-Author: Paul Lai +RH-MergeRequest: 111: numa: Enable numa for SGX EPC sections +RH-Commit: [1/5] c29297cbacc4cb65c9ac125db349a767aa2574af +RH-Bugzilla: 1518984 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck + +The basic SGX did not enable numa for SGX EPC sections, which +result in all EPC sections located in numa node 0. This patch +enable SGX numa function in the guest and the EPC section can +work with RAM as one numa node. + +The Guest kernel related log: +[ 0.009981] ACPI: SRAT: Node 0 PXM 0 [mem 0x180000000-0x183ffffff] +[ 0.009982] ACPI: SRAT: Node 1 PXM 1 [mem 0x184000000-0x185bfffff] +The SRAT table can normally show SGX EPC sections menory info in different +numa nodes. + +The SGX EPC numa related command: + ...... + -m 4G,maxmem=20G \ + -smp sockets=2,cores=2 \ + -cpu host,+sgx-provisionkey \ + -object memory-backend-ram,size=2G,host-nodes=0,policy=bind,id=node0 \ + -object memory-backend-epc,id=mem0,size=64M,prealloc=on,host-nodes=0,policy=bind \ + -numa node,nodeid=0,cpus=0-1,memdev=node0 \ + -object memory-backend-ram,size=2G,host-nodes=1,policy=bind,id=node1 \ + -object memory-backend-epc,id=mem1,size=28M,prealloc=on,host-nodes=1,policy=bind \ + -numa node,nodeid=1,cpus=2-3,memdev=node1 \ + -M sgx-epc.0.memdev=mem0,sgx-epc.0.node=0,sgx-epc.1.memdev=mem1,sgx-epc.1.node=1 \ + ...... + +Signed-off-by: Yang Zhong +Message-Id: <20211101162009.62161-2-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 1105812382e1126d86dddc16b3700f8c79dc93d1) +Signed-off-by: Paul Lai +--- + hw/core/numa.c | 5 ++--- + hw/i386/acpi-build.c | 2 ++ + hw/i386/sgx-epc.c | 3 +++ + hw/i386/sgx-stub.c | 4 ++++ + hw/i386/sgx.c | 44 +++++++++++++++++++++++++++++++++++++++ + include/hw/i386/sgx-epc.h | 3 +++ + monitor/hmp-cmds.c | 1 + + qapi/machine.json | 10 ++++++++- + qemu-options.hx | 4 ++-- + 9 files changed, 70 insertions(+), 6 deletions(-) + +diff --git a/hw/core/numa.c b/hw/core/numa.c +index e6050b2273..1aa05dcf42 100644 +--- a/hw/core/numa.c ++++ b/hw/core/numa.c +@@ -784,9 +784,8 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[]) + break; + case MEMORY_DEVICE_INFO_KIND_SGX_EPC: + se = value->u.sgx_epc.data; +- /* TODO: once we support numa, assign to right node */ +- node_mem[0].node_mem += se->size; +- node_mem[0].node_plugged_mem += se->size; ++ node_mem[se->node].node_mem += se->size; ++ node_mem[se->node].node_plugged_mem = 0; + break; + default: + g_assert_not_reached(); +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index 447ea35275..a4478e77b7 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -2071,6 +2071,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) + nvdimm_build_srat(table_data); + } + ++ sgx_epc_build_srat(table_data); ++ + /* + * TODO: this part is not in ACPI spec and current linux kernel boots fine + * without these entries. But I recall there were issues the last time I +diff --git a/hw/i386/sgx-epc.c b/hw/i386/sgx-epc.c +index e508827e78..96b2940d75 100644 +--- a/hw/i386/sgx-epc.c ++++ b/hw/i386/sgx-epc.c +@@ -21,6 +21,7 @@ + + static Property sgx_epc_properties[] = { + DEFINE_PROP_UINT64(SGX_EPC_ADDR_PROP, SGXEPCDevice, addr, 0), ++ DEFINE_PROP_UINT32(SGX_EPC_NUMA_NODE_PROP, SGXEPCDevice, node, 0), + DEFINE_PROP_LINK(SGX_EPC_MEMDEV_PROP, SGXEPCDevice, hostmem, + TYPE_MEMORY_BACKEND_EPC, HostMemoryBackendEpc *), + DEFINE_PROP_END_OF_LIST(), +@@ -139,6 +140,8 @@ static void sgx_epc_md_fill_device_info(const MemoryDeviceState *md, + se->memaddr = epc->addr; + se->size = object_property_get_uint(OBJECT(epc), SGX_EPC_SIZE_PROP, + NULL); ++ se->node = object_property_get_uint(OBJECT(epc), SGX_EPC_NUMA_NODE_PROP, ++ NULL); + se->memdev = object_get_canonical_path(OBJECT(epc->hostmem)); + + info->u.sgx_epc.data = se; +diff --git a/hw/i386/sgx-stub.c b/hw/i386/sgx-stub.c +index c9b379e665..26833eb233 100644 +--- a/hw/i386/sgx-stub.c ++++ b/hw/i386/sgx-stub.c +@@ -6,6 +6,10 @@ + #include "qapi/error.h" + #include "qapi/qapi-commands-misc-target.h" + ++void sgx_epc_build_srat(GArray *table_data) ++{ ++} ++ + SGXInfo *qmp_query_sgx(Error **errp) + { + error_setg(errp, "SGX support is not compiled in"); +diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c +index 8fef3dd8fa..d04299904a 100644 +--- a/hw/i386/sgx.c ++++ b/hw/i386/sgx.c +@@ -23,6 +23,7 @@ + #include "sysemu/hw_accel.h" + #include "sysemu/reset.h" + #include ++#include "hw/acpi/aml-build.h" + + #define SGX_MAX_EPC_SECTIONS 8 + #define SGX_CPUID_EPC_INVALID 0x0 +@@ -36,6 +37,46 @@ + + #define RETRY_NUM 2 + ++static int sgx_epc_device_list(Object *obj, void *opaque) ++{ ++ GSList **list = opaque; ++ ++ if (object_dynamic_cast(obj, TYPE_SGX_EPC)) { ++ *list = g_slist_append(*list, DEVICE(obj)); ++ } ++ ++ object_child_foreach(obj, sgx_epc_device_list, opaque); ++ return 0; ++} ++ ++static GSList *sgx_epc_get_device_list(void) ++{ ++ GSList *list = NULL; ++ ++ object_child_foreach(qdev_get_machine(), sgx_epc_device_list, &list); ++ return list; ++} ++ ++void sgx_epc_build_srat(GArray *table_data) ++{ ++ GSList *device_list = sgx_epc_get_device_list(); ++ ++ for (; device_list; device_list = device_list->next) { ++ DeviceState *dev = device_list->data; ++ Object *obj = OBJECT(dev); ++ uint64_t addr, size; ++ int node; ++ ++ node = object_property_get_uint(obj, SGX_EPC_NUMA_NODE_PROP, ++ &error_abort); ++ addr = object_property_get_uint(obj, SGX_EPC_ADDR_PROP, &error_abort); ++ size = object_property_get_uint(obj, SGX_EPC_SIZE_PROP, &error_abort); ++ ++ build_srat_memory(table_data, addr, size, node, MEM_AFFINITY_ENABLED); ++ } ++ g_slist_free(device_list); ++} ++ + static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) + { + return (low & MAKE_64BIT_MASK(12, 20)) + +@@ -226,6 +267,9 @@ void pc_machine_init_sgx_epc(PCMachineState *pcms) + /* set the memdev link with memory backend */ + object_property_parse(obj, SGX_EPC_MEMDEV_PROP, list->value->memdev, + &error_fatal); ++ /* set the numa node property for sgx epc object */ ++ object_property_set_uint(obj, SGX_EPC_NUMA_NODE_PROP, list->value->node, ++ &error_fatal); + object_property_set_bool(obj, "realized", true, &error_fatal); + object_unref(obj); + } +diff --git a/include/hw/i386/sgx-epc.h b/include/hw/i386/sgx-epc.h +index a6a65be854..581fac389a 100644 +--- a/include/hw/i386/sgx-epc.h ++++ b/include/hw/i386/sgx-epc.h +@@ -25,6 +25,7 @@ + #define SGX_EPC_ADDR_PROP "addr" + #define SGX_EPC_SIZE_PROP "size" + #define SGX_EPC_MEMDEV_PROP "memdev" ++#define SGX_EPC_NUMA_NODE_PROP "node" + + /** + * SGXEPCDevice: +@@ -38,6 +39,7 @@ typedef struct SGXEPCDevice { + + /* public */ + uint64_t addr; ++ uint32_t node; + HostMemoryBackendEpc *hostmem; + } SGXEPCDevice; + +@@ -56,6 +58,7 @@ typedef struct SGXEPCState { + } SGXEPCState; + + bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size); ++void sgx_epc_build_srat(GArray *table_data); + + static inline uint64_t sgx_epc_above_4g_end(SGXEPCState *sgx_epc) + { +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 9c91bf93e9..2669156b28 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -1810,6 +1810,7 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict) + se->id ? se->id : ""); + monitor_printf(mon, " memaddr: 0x%" PRIx64 "\n", se->memaddr); + monitor_printf(mon, " size: %" PRIu64 "\n", se->size); ++ monitor_printf(mon, " node: %" PRId64 "\n", se->node); + monitor_printf(mon, " memdev: %s\n", se->memdev); + break; + default: +diff --git a/qapi/machine.json b/qapi/machine.json +index 067e3f5378..16e771affc 100644 +--- a/qapi/machine.json ++++ b/qapi/machine.json +@@ -1207,12 +1207,15 @@ + # + # @memdev: memory backend linked with device + # ++# @node: the numa node ++# + # Since: 6.2 + ## + { 'struct': 'SgxEPCDeviceInfo', + 'data': { '*id': 'str', + 'memaddr': 'size', + 'size': 'size', ++ 'node': 'int', + 'memdev': 'str' + } + } +@@ -1285,10 +1288,15 @@ + # + # @memdev: memory backend linked with device + # ++# @node: the numa node ++# + # Since: 6.2 + ## + { 'struct': 'SgxEPC', +- 'data': { 'memdev': 'str' } } ++ 'data': { 'memdev': 'str', ++ 'node': 'int' ++ } ++} + + ## + # @SgxEPCProperties: +diff --git a/qemu-options.hx b/qemu-options.hx +index 94c4a8dbaf..4b7798088b 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -127,11 +127,11 @@ SRST + ERST + + DEF("M", HAS_ARG, QEMU_OPTION_M, +- " sgx-epc.0.memdev=memid\n", ++ " sgx-epc.0.memdev=memid,sgx-epc.0.node=numaid\n", + QEMU_ARCH_ALL) + + SRST +-``sgx-epc.0.memdev=@var{memid}`` ++``sgx-epc.0.memdev=@var{memid},sgx-epc.0.node=@var{numaid}`` + Define an SGX EPC section. + ERST + +-- +2.27.0 + diff --git a/SOURCES/kvm-numa-Extend-CLI-to-provide-initiator-information-for.patch b/SOURCES/kvm-numa-Extend-CLI-to-provide-initiator-information-for.patch deleted file mode 100644 index 6d9382c..0000000 --- a/SOURCES/kvm-numa-Extend-CLI-to-provide-initiator-information-for.patch +++ /dev/null @@ -1,318 +0,0 @@ -From 70f8bbb27f9f357ea83ff6639fc00aa60fc902b9 Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Thu, 21 May 2020 23:56:47 +0100 -Subject: [PATCH 04/12] numa: Extend CLI to provide initiator information for - numa nodes - -RH-Author: plai@redhat.com -Message-id: <20200521235655.27141-4-plai@redhat.com> -Patchwork-id: 96736 -O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 03/11] numa: Extend CLI to provide initiator information for numa nodes -Bugzilla: 1600217 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Igor Mammedov -RH-Acked-by: Eduardo Habkost - -From: Tao Xu - -In ACPI 6.3 chapter 5.2.27 Heterogeneous Memory Attribute Table (HMAT), -The initiator represents processor which access to memory. And in 5.2.27.3 -Memory Proximity Domain Attributes Structure, the attached initiator is -defined as where the memory controller responsible for a memory proximity -domain. With attached initiator information, the topology of heterogeneous -memory can be described. Add new machine property 'hmat' to enable all -HMAT specific options. - -Extend CLI of "-numa node" option to indicate the initiator numa node-id. -In the linux kernel, the codes in drivers/acpi/hmat/hmat.c parse and report -the platform's HMAT tables. Before using initiator option, enable HMAT with --machine hmat=on. - -Acked-by: Markus Armbruster -Reviewed-by: Igor Mammedov -Reviewed-by: Jingqi Liu -Suggested-by: Dan Williams -Signed-off-by: Tao Xu -Message-Id: <20191213011929.2520-2-tao3.xu@intel.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 244b3f4485a07c7ce4b7123d6ce9d8c6012756e8) -Signed-off-by: Paul Lai -Signed-off-by: Danilo C. L. de Paula ---- - hw/core/machine.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++ - hw/core/numa.c | 23 ++++++++++++++++++ - include/sysemu/numa.h | 5 ++++ - qapi/machine.json | 10 +++++++- - qemu-options.hx | 35 ++++++++++++++++++++++++---- - 5 files changed, 131 insertions(+), 6 deletions(-) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 19c78c6..cb21ae1 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -688,6 +688,20 @@ static void machine_set_nvdimm(Object *obj, bool value, Error **errp) - ms->nvdimms_state->is_enabled = value; - } - -+static bool machine_get_hmat(Object *obj, Error **errp) -+{ -+ MachineState *ms = MACHINE(obj); -+ -+ return ms->numa_state->hmat_enabled; -+} -+ -+static void machine_set_hmat(Object *obj, bool value, Error **errp) -+{ -+ MachineState *ms = MACHINE(obj); -+ -+ ms->numa_state->hmat_enabled = value; -+} -+ - static char *machine_get_nvdimm_persistence(Object *obj, Error **errp) - { - MachineState *ms = MACHINE(obj); -@@ -815,6 +829,7 @@ void machine_set_cpu_numa_node(MachineState *machine, - const CpuInstanceProperties *props, Error **errp) - { - MachineClass *mc = MACHINE_GET_CLASS(machine); -+ NodeInfo *numa_info = machine->numa_state->nodes; - bool match = false; - int i; - -@@ -884,6 +899,17 @@ void machine_set_cpu_numa_node(MachineState *machine, - match = true; - slot->props.node_id = props->node_id; - slot->props.has_node_id = props->has_node_id; -+ -+ if (machine->numa_state->hmat_enabled) { -+ if ((numa_info[props->node_id].initiator < MAX_NODES) && -+ (props->node_id != numa_info[props->node_id].initiator)) { -+ error_setg(errp, "The initiator of CPU NUMA node %" PRId64 -+ " should be itself", props->node_id); -+ return; -+ } -+ numa_info[props->node_id].has_cpu = true; -+ numa_info[props->node_id].initiator = props->node_id; -+ } - } - - if (!match) { -@@ -1130,6 +1156,13 @@ static void machine_initfn(Object *obj) - - if (mc->cpu_index_to_instance_props && mc->get_default_cpu_node_id) { - ms->numa_state = g_new0(NumaState, 1); -+ object_property_add_bool(obj, "hmat", -+ machine_get_hmat, machine_set_hmat, -+ &error_abort); -+ object_property_set_description(obj, "hmat", -+ "Set on/off to enable/disable " -+ "ACPI Heterogeneous Memory Attribute " -+ "Table (HMAT)", NULL); - } - - /* Register notifier when init is done for sysbus sanity checks */ -@@ -1218,6 +1251,32 @@ static char *cpu_slot_to_string(const CPUArchId *cpu) - return g_string_free(s, false); - } - -+static void numa_validate_initiator(NumaState *numa_state) -+{ -+ int i; -+ NodeInfo *numa_info = numa_state->nodes; -+ -+ for (i = 0; i < numa_state->num_nodes; i++) { -+ if (numa_info[i].initiator == MAX_NODES) { -+ error_report("The initiator of NUMA node %d is missing, use " -+ "'-numa node,initiator' option to declare it", i); -+ exit(1); -+ } -+ -+ if (!numa_info[numa_info[i].initiator].present) { -+ error_report("NUMA node %" PRIu16 " is missing, use " -+ "'-numa node' option to declare it first", -+ numa_info[i].initiator); -+ exit(1); -+ } -+ -+ if (!numa_info[numa_info[i].initiator].has_cpu) { -+ error_report("The initiator of NUMA node %d is invalid", i); -+ exit(1); -+ } -+ } -+} -+ - static void machine_numa_finish_cpu_init(MachineState *machine) - { - int i; -@@ -1258,6 +1317,11 @@ static void machine_numa_finish_cpu_init(MachineState *machine) - machine_set_cpu_numa_node(machine, &props, &error_fatal); - } - } -+ -+ if (machine->numa_state->hmat_enabled) { -+ numa_validate_initiator(machine->numa_state); -+ } -+ - if (s->len && !qtest_enabled()) { - warn_report("CPU(s) not present in any NUMA nodes: %s", - s->str); -diff --git a/hw/core/numa.c b/hw/core/numa.c -index 19f082d..a07eef9 100644 ---- a/hw/core/numa.c -+++ b/hw/core/numa.c -@@ -129,6 +129,29 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node, - numa_info[nodenr].node_mem = object_property_get_uint(o, "size", NULL); - numa_info[nodenr].node_memdev = MEMORY_BACKEND(o); - } -+ -+ /* -+ * If not set the initiator, set it to MAX_NODES. And if -+ * HMAT is enabled and this node has no cpus, QEMU will raise error. -+ */ -+ numa_info[nodenr].initiator = MAX_NODES; -+ if (node->has_initiator) { -+ if (!ms->numa_state->hmat_enabled) { -+ error_setg(errp, "ACPI Heterogeneous Memory Attribute Table " -+ "(HMAT) is disabled, enable it with -machine hmat=on " -+ "before using any of hmat specific options"); -+ return; -+ } -+ -+ if (node->initiator >= MAX_NODES) { -+ error_report("The initiator id %" PRIu16 " expects an integer " -+ "between 0 and %d", node->initiator, -+ MAX_NODES - 1); -+ return; -+ } -+ -+ numa_info[nodenr].initiator = node->initiator; -+ } - numa_info[nodenr].present = true; - max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1); - ms->numa_state->num_nodes++; -diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h -index ae9c41d..788cbec 100644 ---- a/include/sysemu/numa.h -+++ b/include/sysemu/numa.h -@@ -18,6 +18,8 @@ struct NodeInfo { - uint64_t node_mem; - struct HostMemoryBackend *node_memdev; - bool present; -+ bool has_cpu; -+ uint16_t initiator; - uint8_t distance[MAX_NODES]; - }; - -@@ -33,6 +35,9 @@ struct NumaState { - /* Allow setting NUMA distance for different NUMA nodes */ - bool have_numa_distance; - -+ /* Detect if HMAT support is enabled. */ -+ bool hmat_enabled; -+ - /* NUMA nodes information */ - NodeInfo nodes[MAX_NODES]; - }; -diff --git a/qapi/machine.json b/qapi/machine.json -index ca26779..27d0e37 100644 ---- a/qapi/machine.json -+++ b/qapi/machine.json -@@ -463,6 +463,13 @@ - # @memdev: memory backend object. If specified for one node, - # it must be specified for all nodes. - # -+# @initiator: defined in ACPI 6.3 Chapter 5.2.27.3 Table 5-145, -+# points to the nodeid which has the memory controller -+# responsible for this NUMA node. This field provides -+# additional information as to the initiator node that -+# is closest (as in directly attached) to this node, and -+# therefore has the best performance (since 5.0) -+# - # Since: 2.1 - ## - { 'struct': 'NumaNodeOptions', -@@ -470,7 +477,8 @@ - '*nodeid': 'uint16', - '*cpus': ['uint16'], - '*mem': 'size', -- '*memdev': 'str' }} -+ '*memdev': 'str', -+ '*initiator': 'uint16' }} - - ## - # @NumaDistOptions: -diff --git a/qemu-options.hx b/qemu-options.hx -index df1d27b..e2ce754 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -43,7 +43,8 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \ - " suppress-vmdesc=on|off disables self-describing migration (default=off)\n" - " nvdimm=on|off controls NVDIMM support (default=off)\n" - " enforce-config-section=on|off enforce configuration section migration (default=off)\n" -- " memory-encryption=@var{} memory encryption object to use (default=none)\n", -+ " memory-encryption=@var{} memory encryption object to use (default=none)\n" -+ " hmat=on|off controls ACPI HMAT support (default=off)\n", - QEMU_ARCH_ALL) - STEXI - @item -machine [type=]@var{name}[,prop=@var{value}[,...]] -@@ -103,6 +104,9 @@ NOTE: this parameter is deprecated. Please use @option{-global} - @option{migration.send-configuration}=@var{on|off} instead. - @item memory-encryption=@var{} - Memory encryption object to use. The default is none. -+@item hmat=on|off -+Enables or disables ACPI Heterogeneous Memory Attribute Table (HMAT) support. -+The default is off. - @end table - ETEXI - -@@ -161,14 +165,14 @@ If any on the three values is given, the total number of CPUs @var{n} can be omi - ETEXI - - DEF("numa", HAS_ARG, QEMU_OPTION_numa, -- "-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n" -- "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n" -+ "-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n" -+ "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n" - "-numa dist,src=source,dst=destination,val=distance\n" - "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n", - QEMU_ARCH_ALL) - STEXI --@item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}] --@itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}] -+@item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}] -+@itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}] - @itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance} - @itemx -numa cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}] - @findex -numa -@@ -215,6 +219,27 @@ split equally between them. - @samp{mem} and @samp{memdev} are mutually exclusive. Furthermore, - if one node uses @samp{memdev}, all of them have to use it. - -+@samp{initiator} is an additional option that points to an @var{initiator} -+NUMA node that has best performance (the lowest latency or largest bandwidth) -+to this NUMA @var{node}. Note that this option can be set only when -+the machine property 'hmat' is set to 'on'. -+ -+Following example creates a machine with 2 NUMA nodes, node 0 has CPU. -+node 1 has only memory, and its initiator is node 0. Note that because -+node 0 has CPU, by default the initiator of node 0 is itself and must be -+itself. -+@example -+-machine hmat=on \ -+-m 2G,slots=2,maxmem=4G \ -+-object memory-backend-ram,size=1G,id=m0 \ -+-object memory-backend-ram,size=1G,id=m1 \ -+-numa node,nodeid=0,memdev=m0 \ -+-numa node,nodeid=1,memdev=m1,initiator=0 \ -+-smp 2,sockets=2,maxcpus=2 \ -+-numa cpu,node-id=0,socket-id=0 \ -+-numa cpu,node-id=0,socket-id=1 -+@end example -+ - @var{source} and @var{destination} are NUMA node IDs. - @var{distance} is the NUMA distance from @var{source} to @var{destination}. - The distance from a node to itself is always 10. If any pair of nodes is --- -1.8.3.1 - diff --git a/SOURCES/kvm-numa-Extend-CLI-to-provide-memory-latency-and-bandwi.patch b/SOURCES/kvm-numa-Extend-CLI-to-provide-memory-latency-and-bandwi.patch deleted file mode 100644 index 306abeb..0000000 --- a/SOURCES/kvm-numa-Extend-CLI-to-provide-memory-latency-and-bandwi.patch +++ /dev/null @@ -1,545 +0,0 @@ -From 32341d8cf680625def040b44d70b197f2399bbdb Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Thu, 21 May 2020 23:56:48 +0100 -Subject: [PATCH 05/12] numa: Extend CLI to provide memory latency and - bandwidth information - -RH-Author: plai@redhat.com -Message-id: <20200521235655.27141-5-plai@redhat.com> -Patchwork-id: 96731 -O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 04/11] numa: Extend CLI to provide memory latency and bandwidth information -Bugzilla: 1600217 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Igor Mammedov -RH-Acked-by: Eduardo Habkost - -From: Liu Jingqi - -Add -numa hmat-lb option to provide System Locality Latency and -Bandwidth Information. These memory attributes help to build -System Locality Latency and Bandwidth Information Structure(s) -in ACPI Heterogeneous Memory Attribute Table (HMAT). Before using -hmat-lb option, enable HMAT with -machine hmat=on. - -Acked-by: Markus Armbruster -Signed-off-by: Liu Jingqi -Signed-off-by: Tao Xu -Message-Id: <20191213011929.2520-3-tao3.xu@intel.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Igor Mammedov -(cherry picked from commit 9b12dfa03a94d7f7a4b54eb67229a31e58193384) -Signed-off-by: Paul Lai -Signed-off-by: Danilo C. L. de Paula ---- - hw/core/numa.c | 194 ++++++++++++++++++++++++++++++++++++++++++++++++++ - include/sysemu/numa.h | 53 ++++++++++++++ - qapi/machine.json | 93 +++++++++++++++++++++++- - qemu-options.hx | 47 +++++++++++- - 4 files changed, 384 insertions(+), 3 deletions(-) - -diff --git a/hw/core/numa.c b/hw/core/numa.c -index a07eef9..58fe713 100644 ---- a/hw/core/numa.c -+++ b/hw/core/numa.c -@@ -23,6 +23,7 @@ - */ - - #include "qemu/osdep.h" -+#include "qemu/units.h" - #include "sysemu/hostmem.h" - #include "sysemu/numa.h" - #include "sysemu/sysemu.h" -@@ -194,6 +195,186 @@ void parse_numa_distance(MachineState *ms, NumaDistOptions *dist, Error **errp) - ms->numa_state->have_numa_distance = true; - } - -+void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node, -+ Error **errp) -+{ -+ int i, first_bit, last_bit; -+ uint64_t max_entry, temp_base, bitmap_copy; -+ NodeInfo *numa_info = numa_state->nodes; -+ HMAT_LB_Info *hmat_lb = -+ numa_state->hmat_lb[node->hierarchy][node->data_type]; -+ HMAT_LB_Data lb_data = {}; -+ HMAT_LB_Data *lb_temp; -+ -+ /* Error checking */ -+ if (node->initiator > numa_state->num_nodes) { -+ error_setg(errp, "Invalid initiator=%d, it should be less than %d", -+ node->initiator, numa_state->num_nodes); -+ return; -+ } -+ if (node->target > numa_state->num_nodes) { -+ error_setg(errp, "Invalid target=%d, it should be less than %d", -+ node->target, numa_state->num_nodes); -+ return; -+ } -+ if (!numa_info[node->initiator].has_cpu) { -+ error_setg(errp, "Invalid initiator=%d, it isn't an " -+ "initiator proximity domain", node->initiator); -+ return; -+ } -+ if (!numa_info[node->target].present) { -+ error_setg(errp, "The target=%d should point to an existing node", -+ node->target); -+ return; -+ } -+ -+ if (!hmat_lb) { -+ hmat_lb = g_malloc0(sizeof(*hmat_lb)); -+ numa_state->hmat_lb[node->hierarchy][node->data_type] = hmat_lb; -+ hmat_lb->list = g_array_new(false, true, sizeof(HMAT_LB_Data)); -+ } -+ hmat_lb->hierarchy = node->hierarchy; -+ hmat_lb->data_type = node->data_type; -+ lb_data.initiator = node->initiator; -+ lb_data.target = node->target; -+ -+ if (node->data_type <= HMATLB_DATA_TYPE_WRITE_LATENCY) { -+ /* Input latency data */ -+ -+ if (!node->has_latency) { -+ error_setg(errp, "Missing 'latency' option"); -+ return; -+ } -+ if (node->has_bandwidth) { -+ error_setg(errp, "Invalid option 'bandwidth' since " -+ "the data type is latency"); -+ return; -+ } -+ -+ /* Detect duplicate configuration */ -+ for (i = 0; i < hmat_lb->list->len; i++) { -+ lb_temp = &g_array_index(hmat_lb->list, HMAT_LB_Data, i); -+ -+ if (node->initiator == lb_temp->initiator && -+ node->target == lb_temp->target) { -+ error_setg(errp, "Duplicate configuration of the latency for " -+ "initiator=%d and target=%d", node->initiator, -+ node->target); -+ return; -+ } -+ } -+ -+ hmat_lb->base = hmat_lb->base ? hmat_lb->base : UINT64_MAX; -+ -+ if (node->latency) { -+ /* Calculate the temporary base and compressed latency */ -+ max_entry = node->latency; -+ temp_base = 1; -+ while (QEMU_IS_ALIGNED(max_entry, 10)) { -+ max_entry /= 10; -+ temp_base *= 10; -+ } -+ -+ /* Calculate the max compressed latency */ -+ temp_base = MIN(hmat_lb->base, temp_base); -+ max_entry = node->latency / hmat_lb->base; -+ max_entry = MAX(hmat_lb->range_bitmap, max_entry); -+ -+ /* -+ * For latency hmat_lb->range_bitmap record the max compressed -+ * latency which should be less than 0xFFFF (UINT16_MAX) -+ */ -+ if (max_entry >= UINT16_MAX) { -+ error_setg(errp, "Latency %" PRIu64 " between initiator=%d and " -+ "target=%d should not differ from previously entered " -+ "min or max values on more than %d", node->latency, -+ node->initiator, node->target, UINT16_MAX - 1); -+ return; -+ } else { -+ hmat_lb->base = temp_base; -+ hmat_lb->range_bitmap = max_entry; -+ } -+ -+ /* -+ * Set lb_info_provided bit 0 as 1, -+ * latency information is provided -+ */ -+ numa_info[node->target].lb_info_provided |= BIT(0); -+ } -+ lb_data.data = node->latency; -+ } else if (node->data_type >= HMATLB_DATA_TYPE_ACCESS_BANDWIDTH) { -+ /* Input bandwidth data */ -+ if (!node->has_bandwidth) { -+ error_setg(errp, "Missing 'bandwidth' option"); -+ return; -+ } -+ if (node->has_latency) { -+ error_setg(errp, "Invalid option 'latency' since " -+ "the data type is bandwidth"); -+ return; -+ } -+ if (!QEMU_IS_ALIGNED(node->bandwidth, MiB)) { -+ error_setg(errp, "Bandwidth %" PRIu64 " between initiator=%d and " -+ "target=%d should be 1MB aligned", node->bandwidth, -+ node->initiator, node->target); -+ return; -+ } -+ -+ /* Detect duplicate configuration */ -+ for (i = 0; i < hmat_lb->list->len; i++) { -+ lb_temp = &g_array_index(hmat_lb->list, HMAT_LB_Data, i); -+ -+ if (node->initiator == lb_temp->initiator && -+ node->target == lb_temp->target) { -+ error_setg(errp, "Duplicate configuration of the bandwidth for " -+ "initiator=%d and target=%d", node->initiator, -+ node->target); -+ return; -+ } -+ } -+ -+ hmat_lb->base = hmat_lb->base ? hmat_lb->base : 1; -+ -+ if (node->bandwidth) { -+ /* Keep bitmap unchanged when bandwidth out of range */ -+ bitmap_copy = hmat_lb->range_bitmap; -+ bitmap_copy |= node->bandwidth; -+ first_bit = ctz64(bitmap_copy); -+ temp_base = UINT64_C(1) << first_bit; -+ max_entry = node->bandwidth / temp_base; -+ last_bit = 64 - clz64(bitmap_copy); -+ -+ /* -+ * For bandwidth, first_bit record the base unit of bandwidth bits, -+ * last_bit record the last bit of the max bandwidth. The max -+ * compressed bandwidth should be less than 0xFFFF (UINT16_MAX) -+ */ -+ if ((last_bit - first_bit) > UINT16_BITS || -+ max_entry >= UINT16_MAX) { -+ error_setg(errp, "Bandwidth %" PRIu64 " between initiator=%d " -+ "and target=%d should not differ from previously " -+ "entered values on more than %d", node->bandwidth, -+ node->initiator, node->target, UINT16_MAX - 1); -+ return; -+ } else { -+ hmat_lb->base = temp_base; -+ hmat_lb->range_bitmap = bitmap_copy; -+ } -+ -+ /* -+ * Set lb_info_provided bit 1 as 1, -+ * bandwidth information is provided -+ */ -+ numa_info[node->target].lb_info_provided |= BIT(1); -+ } -+ lb_data.data = node->bandwidth; -+ } else { -+ assert(0); -+ } -+ -+ g_array_append_val(hmat_lb->list, lb_data); -+} -+ - void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp) - { - Error *err = NULL; -@@ -231,6 +412,19 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp) - machine_set_cpu_numa_node(ms, qapi_NumaCpuOptions_base(&object->u.cpu), - &err); - break; -+ case NUMA_OPTIONS_TYPE_HMAT_LB: -+ if (!ms->numa_state->hmat_enabled) { -+ error_setg(errp, "ACPI Heterogeneous Memory Attribute Table " -+ "(HMAT) is disabled, enable it with -machine hmat=on " -+ "before using any of hmat specific options"); -+ return; -+ } -+ -+ parse_numa_hmat_lb(ms->numa_state, &object->u.hmat_lb, &err); -+ if (err) { -+ goto end; -+ } -+ break; - default: - abort(); - } -diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h -index 788cbec..70f93c8 100644 ---- a/include/sysemu/numa.h -+++ b/include/sysemu/numa.h -@@ -14,11 +14,34 @@ struct CPUArchId; - #define NUMA_DISTANCE_MAX 254 - #define NUMA_DISTANCE_UNREACHABLE 255 - -+/* the value of AcpiHmatLBInfo flags */ -+enum { -+ HMAT_LB_MEM_MEMORY = 0, -+ HMAT_LB_MEM_CACHE_1ST_LEVEL = 1, -+ HMAT_LB_MEM_CACHE_2ND_LEVEL = 2, -+ HMAT_LB_MEM_CACHE_3RD_LEVEL = 3, -+ HMAT_LB_LEVELS /* must be the last entry */ -+}; -+ -+/* the value of AcpiHmatLBInfo data type */ -+enum { -+ HMAT_LB_DATA_ACCESS_LATENCY = 0, -+ HMAT_LB_DATA_READ_LATENCY = 1, -+ HMAT_LB_DATA_WRITE_LATENCY = 2, -+ HMAT_LB_DATA_ACCESS_BANDWIDTH = 3, -+ HMAT_LB_DATA_READ_BANDWIDTH = 4, -+ HMAT_LB_DATA_WRITE_BANDWIDTH = 5, -+ HMAT_LB_TYPES /* must be the last entry */ -+}; -+ -+#define UINT16_BITS 16 -+ - struct NodeInfo { - uint64_t node_mem; - struct HostMemoryBackend *node_memdev; - bool present; - bool has_cpu; -+ uint8_t lb_info_provided; - uint16_t initiator; - uint8_t distance[MAX_NODES]; - }; -@@ -28,6 +51,31 @@ struct NumaNodeMem { - uint64_t node_plugged_mem; - }; - -+struct HMAT_LB_Data { -+ uint8_t initiator; -+ uint8_t target; -+ uint64_t data; -+}; -+typedef struct HMAT_LB_Data HMAT_LB_Data; -+ -+struct HMAT_LB_Info { -+ /* Indicates it's memory or the specified level memory side cache. */ -+ uint8_t hierarchy; -+ -+ /* Present the type of data, access/read/write latency or bandwidth. */ -+ uint8_t data_type; -+ -+ /* The range bitmap of bandwidth for calculating common base */ -+ uint64_t range_bitmap; -+ -+ /* The common base unit for latencies or bandwidths */ -+ uint64_t base; -+ -+ /* Array to store the latencies or bandwidths */ -+ GArray *list; -+}; -+typedef struct HMAT_LB_Info HMAT_LB_Info; -+ - struct NumaState { - /* Number of NUMA nodes */ - int num_nodes; -@@ -40,11 +88,16 @@ struct NumaState { - - /* NUMA nodes information */ - NodeInfo nodes[MAX_NODES]; -+ -+ /* NUMA nodes HMAT Locality Latency and Bandwidth Information */ -+ HMAT_LB_Info *hmat_lb[HMAT_LB_LEVELS][HMAT_LB_TYPES]; - }; - typedef struct NumaState NumaState; - - void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp); - void parse_numa_opts(MachineState *ms); -+void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node, -+ Error **errp); - void numa_complete_configuration(MachineState *ms); - void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms); - extern QemuOptsList qemu_numa_opts; -diff --git a/qapi/machine.json b/qapi/machine.json -index 27d0e37..cf8faf5 100644 ---- a/qapi/machine.json -+++ b/qapi/machine.json -@@ -426,10 +426,12 @@ - # - # @cpu: property based CPU(s) to node mapping (Since: 2.10) - # -+# @hmat-lb: memory latency and bandwidth information (Since: 5.0) -+# - # Since: 2.1 - ## - { 'enum': 'NumaOptionsType', -- 'data': [ 'node', 'dist', 'cpu' ] } -+ 'data': [ 'node', 'dist', 'cpu', 'hmat-lb' ] } - - ## - # @NumaOptions: -@@ -444,7 +446,8 @@ - 'data': { - 'node': 'NumaNodeOptions', - 'dist': 'NumaDistOptions', -- 'cpu': 'NumaCpuOptions' }} -+ 'cpu': 'NumaCpuOptions', -+ 'hmat-lb': 'NumaHmatLBOptions' }} - - ## - # @NumaNodeOptions: -@@ -558,6 +561,92 @@ - 'data' : {} } - - ## -+# @HmatLBMemoryHierarchy: -+# -+# The memory hierarchy in the System Locality Latency and Bandwidth -+# Information Structure of HMAT (Heterogeneous Memory Attribute Table) -+# -+# For more information about @HmatLBMemoryHierarchy, see chapter -+# 5.2.27.4: Table 5-146: Field "Flags" of ACPI 6.3 spec. -+# -+# @memory: the structure represents the memory performance -+# -+# @first-level: first level of memory side cache -+# -+# @second-level: second level of memory side cache -+# -+# @third-level: third level of memory side cache -+# -+# Since: 5.0 -+## -+{ 'enum': 'HmatLBMemoryHierarchy', -+ 'data': [ 'memory', 'first-level', 'second-level', 'third-level' ] } -+ -+## -+# @HmatLBDataType: -+# -+# Data type in the System Locality Latency and Bandwidth -+# Information Structure of HMAT (Heterogeneous Memory Attribute Table) -+# -+# For more information about @HmatLBDataType, see chapter -+# 5.2.27.4: Table 5-146: Field "Data Type" of ACPI 6.3 spec. -+# -+# @access-latency: access latency (nanoseconds) -+# -+# @read-latency: read latency (nanoseconds) -+# -+# @write-latency: write latency (nanoseconds) -+# -+# @access-bandwidth: access bandwidth (Bytes per second) -+# -+# @read-bandwidth: read bandwidth (Bytes per second) -+# -+# @write-bandwidth: write bandwidth (Bytes per second) -+# -+# Since: 5.0 -+## -+{ 'enum': 'HmatLBDataType', -+ 'data': [ 'access-latency', 'read-latency', 'write-latency', -+ 'access-bandwidth', 'read-bandwidth', 'write-bandwidth' ] } -+ -+## -+# @NumaHmatLBOptions: -+# -+# Set the system locality latency and bandwidth information -+# between Initiator and Target proximity Domains. -+# -+# For more information about @NumaHmatLBOptions, see chapter -+# 5.2.27.4: Table 5-146 of ACPI 6.3 spec. -+# -+# @initiator: the Initiator Proximity Domain. -+# -+# @target: the Target Proximity Domain. -+# -+# @hierarchy: the Memory Hierarchy. Indicates the performance -+# of memory or side cache. -+# -+# @data-type: presents the type of data, access/read/write -+# latency or hit latency. -+# -+# @latency: the value of latency from @initiator to @target -+# proximity domain, the latency unit is "ns(nanosecond)". -+# -+# @bandwidth: the value of bandwidth between @initiator and @target -+# proximity domain, the bandwidth unit is -+# "Bytes per second". -+# -+# Since: 5.0 -+## -+{ 'struct': 'NumaHmatLBOptions', -+ 'data': { -+ 'initiator': 'uint16', -+ 'target': 'uint16', -+ 'hierarchy': 'HmatLBMemoryHierarchy', -+ 'data-type': 'HmatLBDataType', -+ '*latency': 'uint64', -+ '*bandwidth': 'size' }} -+ -+## - # @HostMemPolicy: - # - # Host memory policy types -diff --git a/qemu-options.hx b/qemu-options.hx -index e2ce754..86d9d8a 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -168,16 +168,19 @@ DEF("numa", HAS_ARG, QEMU_OPTION_numa, - "-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n" - "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n" - "-numa dist,src=source,dst=destination,val=distance\n" -- "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n", -+ "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n" -+ "-numa hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n", - QEMU_ARCH_ALL) - STEXI - @item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}] - @itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}] - @itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance} - @itemx -numa cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}] -+@itemx -numa hmat-lb,initiator=@var{node},target=@var{node},hierarchy=@var{hierarchy},data-type=@var{tpye}[,latency=@var{lat}][,bandwidth=@var{bw}] - @findex -numa - Define a NUMA node and assign RAM and VCPUs to it. - Set the NUMA distance from a source node to a destination node. -+Set the ACPI Heterogeneous Memory Attributes for the given nodes. - - Legacy VCPU assignment uses @samp{cpus} option where - @var{firstcpu} and @var{lastcpu} are CPU indexes. Each -@@ -256,6 +259,48 @@ specified resources, it just assigns existing resources to NUMA - nodes. This means that one still has to use the @option{-m}, - @option{-smp} options to allocate RAM and VCPUs respectively. - -+Use @samp{hmat-lb} to set System Locality Latency and Bandwidth Information -+between initiator and target NUMA nodes in ACPI Heterogeneous Attribute Memory Table (HMAT). -+Initiator NUMA node can create memory requests, usually it has one or more processors. -+Target NUMA node contains addressable memory. -+ -+In @samp{hmat-lb} option, @var{node} are NUMA node IDs. @var{hierarchy} is the memory -+hierarchy of the target NUMA node: if @var{hierarchy} is 'memory', the structure -+represents the memory performance; if @var{hierarchy} is 'first-level|second-level|third-level', -+this structure represents aggregated performance of memory side caches for each domain. -+@var{type} of 'data-type' is type of data represented by this structure instance: -+if 'hierarchy' is 'memory', 'data-type' is 'access|read|write' latency or 'access|read|write' -+bandwidth of the target memory; if 'hierarchy' is 'first-level|second-level|third-level', -+'data-type' is 'access|read|write' hit latency or 'access|read|write' hit bandwidth of the -+target memory side cache. -+ -+@var{lat} is latency value in nanoseconds. @var{bw} is bandwidth value, -+the possible value and units are NUM[M|G|T], mean that the bandwidth value are -+NUM byte per second (or MB/s, GB/s or TB/s depending on used suffix). -+Note that if latency or bandwidth value is 0, means the corresponding latency or -+bandwidth information is not provided. -+ -+For example, the following options describe 2 NUMA nodes. Node 0 has 2 cpus and -+a ram, node 1 has only a ram. The processors in node 0 access memory in node -+0 with access-latency 5 nanoseconds, access-bandwidth is 200 MB/s; -+The processors in NUMA node 0 access memory in NUMA node 1 with access-latency 10 -+nanoseconds, access-bandwidth is 100 MB/s. -+@example -+-machine hmat=on \ -+-m 2G \ -+-object memory-backend-ram,size=1G,id=m0 \ -+-object memory-backend-ram,size=1G,id=m1 \ -+-smp 2 \ -+-numa node,nodeid=0,memdev=m0 \ -+-numa node,nodeid=1,memdev=m1,initiator=0 \ -+-numa cpu,node-id=0,socket-id=0 \ -+-numa cpu,node-id=0,socket-id=1 \ -+-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=5 \ -+-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=200M \ -+-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,latency=10 \ -+-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M -+@end example -+ - ETEXI - - DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd, --- -1.8.3.1 - diff --git a/SOURCES/kvm-numa-Extend-CLI-to-provide-memory-side-cache-informa.patch b/SOURCES/kvm-numa-Extend-CLI-to-provide-memory-side-cache-informa.patch deleted file mode 100644 index a17db22..0000000 --- a/SOURCES/kvm-numa-Extend-CLI-to-provide-memory-side-cache-informa.patch +++ /dev/null @@ -1,326 +0,0 @@ -From 8cd3544b1347b248b9d04eb3d6c9b9bde3a13655 Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Thu, 21 May 2020 23:56:49 +0100 -Subject: [PATCH 06/12] numa: Extend CLI to provide memory side cache - information - -RH-Author: plai@redhat.com -Message-id: <20200521235655.27141-6-plai@redhat.com> -Patchwork-id: 96740 -O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 05/11] numa: Extend CLI to provide memory side cache information -Bugzilla: 1600217 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Igor Mammedov -RH-Acked-by: Eduardo Habkost - -From: Liu Jingqi - -Add -numa hmat-cache option to provide Memory Side Cache Information. -These memory attributes help to build Memory Side Cache Information -Structure(s) in ACPI Heterogeneous Memory Attribute Table (HMAT). -Before using hmat-cache option, enable HMAT with -machine hmat=on. - -Acked-by: Markus Armbruster -Signed-off-by: Liu Jingqi -Signed-off-by: Tao Xu -Message-Id: <20191213011929.2520-4-tao3.xu@intel.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Igor Mammedov -(cherry picked from commit c412a48d4d91e8f8b89aae02de0f44f1f0b729e5) -Signed-off-by: Paul Lai -Signed-off-by: Danilo C. L. de Paula ---- - hw/core/numa.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++ - include/sysemu/numa.h | 5 ++++ - qapi/machine.json | 81 +++++++++++++++++++++++++++++++++++++++++++++++++-- - qemu-options.hx | 17 +++++++++-- - 4 files changed, 179 insertions(+), 4 deletions(-) - -diff --git a/hw/core/numa.c b/hw/core/numa.c -index 58fe713..0d1b4be 100644 ---- a/hw/core/numa.c -+++ b/hw/core/numa.c -@@ -375,6 +375,73 @@ void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node, - g_array_append_val(hmat_lb->list, lb_data); - } - -+void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node, -+ Error **errp) -+{ -+ int nb_numa_nodes = ms->numa_state->num_nodes; -+ NodeInfo *numa_info = ms->numa_state->nodes; -+ NumaHmatCacheOptions *hmat_cache = NULL; -+ -+ if (node->node_id >= nb_numa_nodes) { -+ error_setg(errp, "Invalid node-id=%" PRIu32 ", it should be less " -+ "than %d", node->node_id, nb_numa_nodes); -+ return; -+ } -+ -+ if (numa_info[node->node_id].lb_info_provided != (BIT(0) | BIT(1))) { -+ error_setg(errp, "The latency and bandwidth information of " -+ "node-id=%" PRIu32 " should be provided before memory side " -+ "cache attributes", node->node_id); -+ return; -+ } -+ -+ if (node->level < 1 || node->level >= HMAT_LB_LEVELS) { -+ error_setg(errp, "Invalid level=%" PRIu8 ", it should be larger than 0 " -+ "and less than or equal to %d", node->level, -+ HMAT_LB_LEVELS - 1); -+ return; -+ } -+ -+ assert(node->associativity < HMAT_CACHE_ASSOCIATIVITY__MAX); -+ assert(node->policy < HMAT_CACHE_WRITE_POLICY__MAX); -+ if (ms->numa_state->hmat_cache[node->node_id][node->level]) { -+ error_setg(errp, "Duplicate configuration of the side cache for " -+ "node-id=%" PRIu32 " and level=%" PRIu8, -+ node->node_id, node->level); -+ return; -+ } -+ -+ if ((node->level > 1) && -+ ms->numa_state->hmat_cache[node->node_id][node->level - 1] && -+ (node->size >= -+ ms->numa_state->hmat_cache[node->node_id][node->level - 1]->size)) { -+ error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8 -+ " should be less than the size(%" PRIu64 ") of " -+ "level=%u", node->size, node->level, -+ ms->numa_state->hmat_cache[node->node_id] -+ [node->level - 1]->size, -+ node->level - 1); -+ return; -+ } -+ -+ if ((node->level < HMAT_LB_LEVELS - 1) && -+ ms->numa_state->hmat_cache[node->node_id][node->level + 1] && -+ (node->size <= -+ ms->numa_state->hmat_cache[node->node_id][node->level + 1]->size)) { -+ error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8 -+ " should be larger than the size(%" PRIu64 ") of " -+ "level=%u", node->size, node->level, -+ ms->numa_state->hmat_cache[node->node_id] -+ [node->level + 1]->size, -+ node->level + 1); -+ return; -+ } -+ -+ hmat_cache = g_malloc0(sizeof(*hmat_cache)); -+ memcpy(hmat_cache, node, sizeof(*hmat_cache)); -+ ms->numa_state->hmat_cache[node->node_id][node->level] = hmat_cache; -+} -+ - void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp) - { - Error *err = NULL; -@@ -425,6 +492,19 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp) - goto end; - } - break; -+ case NUMA_OPTIONS_TYPE_HMAT_CACHE: -+ if (!ms->numa_state->hmat_enabled) { -+ error_setg(errp, "ACPI Heterogeneous Memory Attribute Table " -+ "(HMAT) is disabled, enable it with -machine hmat=on " -+ "before using any of hmat specific options"); -+ return; -+ } -+ -+ parse_numa_hmat_cache(ms, &object->u.hmat_cache, &err); -+ if (err) { -+ goto end; -+ } -+ break; - default: - abort(); - } -diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h -index 70f93c8..ba693cc 100644 ---- a/include/sysemu/numa.h -+++ b/include/sysemu/numa.h -@@ -91,6 +91,9 @@ struct NumaState { - - /* NUMA nodes HMAT Locality Latency and Bandwidth Information */ - HMAT_LB_Info *hmat_lb[HMAT_LB_LEVELS][HMAT_LB_TYPES]; -+ -+ /* Memory Side Cache Information Structure */ -+ NumaHmatCacheOptions *hmat_cache[MAX_NODES][HMAT_LB_LEVELS]; - }; - typedef struct NumaState NumaState; - -@@ -98,6 +101,8 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp); - void parse_numa_opts(MachineState *ms); - void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node, - Error **errp); -+void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node, -+ Error **errp); - void numa_complete_configuration(MachineState *ms); - void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms); - extern QemuOptsList qemu_numa_opts; -diff --git a/qapi/machine.json b/qapi/machine.json -index cf8faf5..b3d30bc 100644 ---- a/qapi/machine.json -+++ b/qapi/machine.json -@@ -428,10 +428,12 @@ - # - # @hmat-lb: memory latency and bandwidth information (Since: 5.0) - # -+# @hmat-cache: memory side cache information (Since: 5.0) -+# - # Since: 2.1 - ## - { 'enum': 'NumaOptionsType', -- 'data': [ 'node', 'dist', 'cpu', 'hmat-lb' ] } -+ 'data': [ 'node', 'dist', 'cpu', 'hmat-lb', 'hmat-cache' ] } - - ## - # @NumaOptions: -@@ -447,7 +449,8 @@ - 'node': 'NumaNodeOptions', - 'dist': 'NumaDistOptions', - 'cpu': 'NumaCpuOptions', -- 'hmat-lb': 'NumaHmatLBOptions' }} -+ 'hmat-lb': 'NumaHmatLBOptions', -+ 'hmat-cache': 'NumaHmatCacheOptions' }} - - ## - # @NumaNodeOptions: -@@ -647,6 +650,80 @@ - '*bandwidth': 'size' }} - - ## -+# @HmatCacheAssociativity: -+# -+# Cache associativity in the Memory Side Cache Information Structure -+# of HMAT -+# -+# For more information of @HmatCacheAssociativity, see chapter -+# 5.2.27.5: Table 5-147 of ACPI 6.3 spec. -+# -+# @none: None (no memory side cache in this proximity domain, -+# or cache associativity unknown) -+# -+# @direct: Direct Mapped -+# -+# @complex: Complex Cache Indexing (implementation specific) -+# -+# Since: 5.0 -+## -+{ 'enum': 'HmatCacheAssociativity', -+ 'data': [ 'none', 'direct', 'complex' ] } -+ -+## -+# @HmatCacheWritePolicy: -+# -+# Cache write policy in the Memory Side Cache Information Structure -+# of HMAT -+# -+# For more information of @HmatCacheWritePolicy, see chapter -+# 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec. -+# -+# @none: None (no memory side cache in this proximity domain, -+# or cache write policy unknown) -+# -+# @write-back: Write Back (WB) -+# -+# @write-through: Write Through (WT) -+# -+# Since: 5.0 -+## -+{ 'enum': 'HmatCacheWritePolicy', -+ 'data': [ 'none', 'write-back', 'write-through' ] } -+ -+## -+# @NumaHmatCacheOptions: -+# -+# Set the memory side cache information for a given memory domain. -+# -+# For more information of @NumaHmatCacheOptions, see chapter -+# 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec. -+# -+# @node-id: the memory proximity domain to which the memory belongs. -+# -+# @size: the size of memory side cache in bytes. -+# -+# @level: the cache level described in this structure. -+# -+# @associativity: the cache associativity, -+# none/direct-mapped/complex(complex cache indexing). -+# -+# @policy: the write policy, none/write-back/write-through. -+# -+# @line: the cache Line size in bytes. -+# -+# Since: 5.0 -+## -+{ 'struct': 'NumaHmatCacheOptions', -+ 'data': { -+ 'node-id': 'uint32', -+ 'size': 'size', -+ 'level': 'uint8', -+ 'associativity': 'HmatCacheAssociativity', -+ 'policy': 'HmatCacheWritePolicy', -+ 'line': 'uint16' }} -+ -+## - # @HostMemPolicy: - # - # Host memory policy types -diff --git a/qemu-options.hx b/qemu-options.hx -index 86d9d8a..8fe05b6 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -169,7 +169,8 @@ DEF("numa", HAS_ARG, QEMU_OPTION_numa, - "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n" - "-numa dist,src=source,dst=destination,val=distance\n" - "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n" -- "-numa hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n", -+ "-numa hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n" -+ "-numa hmat-cache,node-id=node,size=size,level=level[,associativity=none|direct|complex][,policy=none|write-back|write-through][,line=size]\n", - QEMU_ARCH_ALL) - STEXI - @item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}] -@@ -177,6 +178,7 @@ STEXI - @itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance} - @itemx -numa cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}] - @itemx -numa hmat-lb,initiator=@var{node},target=@var{node},hierarchy=@var{hierarchy},data-type=@var{tpye}[,latency=@var{lat}][,bandwidth=@var{bw}] -+@itemx -numa hmat-cache,node-id=@var{node},size=@var{size},level=@var{level}[,associativity=@var{str}][,policy=@var{str}][,line=@var{size}] - @findex -numa - Define a NUMA node and assign RAM and VCPUs to it. - Set the NUMA distance from a source node to a destination node. -@@ -280,11 +282,20 @@ NUM byte per second (or MB/s, GB/s or TB/s depending on used suffix). - Note that if latency or bandwidth value is 0, means the corresponding latency or - bandwidth information is not provided. - -+In @samp{hmat-cache} option, @var{node-id} is the NUMA-id of the memory belongs. -+@var{size} is the size of memory side cache in bytes. @var{level} is the cache -+level described in this structure, note that the cache level 0 should not be used -+with @samp{hmat-cache} option. @var{associativity} is the cache associativity, -+the possible value is 'none/direct(direct-mapped)/complex(complex cache indexing)'. -+@var{policy} is the write policy. @var{line} is the cache Line size in bytes. -+ - For example, the following options describe 2 NUMA nodes. Node 0 has 2 cpus and - a ram, node 1 has only a ram. The processors in node 0 access memory in node - 0 with access-latency 5 nanoseconds, access-bandwidth is 200 MB/s; - The processors in NUMA node 0 access memory in NUMA node 1 with access-latency 10 - nanoseconds, access-bandwidth is 100 MB/s. -+And for memory side cache information, NUMA node 0 and 1 both have 1 level memory -+cache, size is 10KB, policy is write-back, the cache Line size is 8 bytes: - @example - -machine hmat=on \ - -m 2G \ -@@ -298,7 +309,9 @@ nanoseconds, access-bandwidth is 100 MB/s. - -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=5 \ - -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=200M \ - -numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,latency=10 \ ---numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M -+-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M \ -+-numa hmat-cache,node-id=0,size=10K,level=1,associativity=direct,policy=write-back,line=8 \ -+-numa hmat-cache,node-id=1,size=10K,level=1,associativity=direct,policy=write-back,line=8 - @end example - - ETEXI --- -1.8.3.1 - diff --git a/SOURCES/kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch b/SOURCES/kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch new file mode 100644 index 0000000..659dc22 --- /dev/null +++ b/SOURCES/kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch @@ -0,0 +1,210 @@ +From ea46a86ba6319ea98573c65af5186cd5399ab0ce Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Mon, 1 Nov 2021 12:20:07 -0400 +Subject: [PATCH 2/7] numa: Support SGX numa in the monitor and Libvirt + interfaces + +RH-Author: Paul Lai +RH-MergeRequest: 111: numa: Enable numa for SGX EPC sections +RH-Commit: [2/5] 403c4f98dccd023293cd3246081ae12f4782bed0 +RH-Bugzilla: 1518984 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck + +Add the SGXEPCSection list into SGXInfo to show the multiple +SGX EPC sections detailed info, not the total size like before. +This patch can enable numa support for 'info sgx' command and +QMP interfaces. The new interfaces show each EPC section info +in one numa node. Libvirt can use QMP interface to get the +detailed host SGX EPC capabilities to decide how to allocate +host EPC sections to guest. + +(qemu) info sgx + SGX support: enabled + SGX1 support: enabled + SGX2 support: enabled + FLC support: enabled + NUMA node #0: size=67108864 + NUMA node #1: size=29360128 + +The QMP interface show: +(QEMU) query-sgx +{"return": {"sgx": true, "sgx2": true, "sgx1": true, "sections": \ +[{"node": 0, "size": 67108864}, {"node": 1, "size": 29360128}], "flc": true}} + +(QEMU) query-sgx-capabilities +{"return": {"sgx": true, "sgx2": true, "sgx1": true, "sections": \ +[{"node": 0, "size": 17070817280}, {"node": 1, "size": 17079205888}], "flc": true}} + +Signed-off-by: Yang Zhong +Message-Id: <20211101162009.62161-4-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 4755927ae12547c2e7cb22c5fa1b39038c6c11b1) +Signed-off-by: Paul Lai +--- + hw/i386/sgx.c | 51 +++++++++++++++++++++++++++++++++++-------- + qapi/misc-target.json | 19 ++++++++++++++-- + 2 files changed, 59 insertions(+), 11 deletions(-) + +diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c +index d04299904a..5de5dd0893 100644 +--- a/hw/i386/sgx.c ++++ b/hw/i386/sgx.c +@@ -83,11 +83,13 @@ static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) + ((high & MAKE_64BIT_MASK(0, 20)) << 32); + } + +-static uint64_t sgx_calc_host_epc_section_size(void) ++static SGXEPCSectionList *sgx_calc_host_epc_sections(void) + { ++ SGXEPCSectionList *head = NULL, **tail = &head; ++ SGXEPCSection *section; + uint32_t i, type; + uint32_t eax, ebx, ecx, edx; +- uint64_t size = 0; ++ uint32_t j = 0; + + for (i = 0; i < SGX_MAX_EPC_SECTIONS; i++) { + host_cpuid(0x12, i + 2, &eax, &ebx, &ecx, &edx); +@@ -101,10 +103,13 @@ static uint64_t sgx_calc_host_epc_section_size(void) + break; + } + +- size += sgx_calc_section_metric(ecx, edx); ++ section = g_new0(SGXEPCSection, 1); ++ section->node = j++; ++ section->size = sgx_calc_section_metric(ecx, edx); ++ QAPI_LIST_APPEND(tail, section); + } + +- return size; ++ return head; + } + + static void sgx_epc_reset(void *opaque) +@@ -168,13 +173,35 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp) + info->sgx1 = eax & (1U << 0) ? true : false; + info->sgx2 = eax & (1U << 1) ? true : false; + +- info->section_size = sgx_calc_host_epc_section_size(); ++ info->sections = sgx_calc_host_epc_sections(); + + close(fd); + + return info; + } + ++static SGXEPCSectionList *sgx_get_epc_sections_list(void) ++{ ++ GSList *device_list = sgx_epc_get_device_list(); ++ SGXEPCSectionList *head = NULL, **tail = &head; ++ SGXEPCSection *section; ++ ++ for (; device_list; device_list = device_list->next) { ++ DeviceState *dev = device_list->data; ++ Object *obj = OBJECT(dev); ++ ++ section = g_new0(SGXEPCSection, 1); ++ section->node = object_property_get_uint(obj, SGX_EPC_NUMA_NODE_PROP, ++ &error_abort); ++ section->size = object_property_get_uint(obj, SGX_EPC_SIZE_PROP, ++ &error_abort); ++ QAPI_LIST_APPEND(tail, section); ++ } ++ g_slist_free(device_list); ++ ++ return head; ++} ++ + SGXInfo *qmp_query_sgx(Error **errp) + { + SGXInfo *info = NULL; +@@ -193,14 +220,13 @@ SGXInfo *qmp_query_sgx(Error **errp) + return NULL; + } + +- SGXEPCState *sgx_epc = &pcms->sgx_epc; + info = g_new0(SGXInfo, 1); + + info->sgx = true; + info->sgx1 = true; + info->sgx2 = true; + info->flc = true; +- info->section_size = sgx_epc->size; ++ info->sections = sgx_get_epc_sections_list(); + + return info; + } +@@ -208,6 +234,7 @@ SGXInfo *qmp_query_sgx(Error **errp) + void hmp_info_sgx(Monitor *mon, const QDict *qdict) + { + Error *err = NULL; ++ SGXEPCSectionList *section_list, *section; + g_autoptr(SGXInfo) info = qmp_query_sgx(&err); + + if (err) { +@@ -222,8 +249,14 @@ void hmp_info_sgx(Monitor *mon, const QDict *qdict) + info->sgx2 ? "enabled" : "disabled"); + monitor_printf(mon, "FLC support: %s\n", + info->flc ? "enabled" : "disabled"); +- monitor_printf(mon, "size: %" PRIu64 "\n", +- info->section_size); ++ ++ section_list = info->sections; ++ for (section = section_list; section; section = section->next) { ++ monitor_printf(mon, "NUMA node #%" PRId64 ": ", ++ section->value->node); ++ monitor_printf(mon, "size=%" PRIu64 "\n", ++ section->value->size); ++ } + } + + bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size) +diff --git a/qapi/misc-target.json b/qapi/misc-target.json +index 5aa2b95b7d..1022aa0184 100644 +--- a/qapi/misc-target.json ++++ b/qapi/misc-target.json +@@ -337,6 +337,21 @@ + 'if': 'TARGET_ARM' } + + ++## ++# @SGXEPCSection: ++# ++# Information about intel SGX EPC section info ++# ++# @node: the numa node ++# ++# @size: the size of epc section ++# ++# Since: 6.2 ++## ++{ 'struct': 'SGXEPCSection', ++ 'data': { 'node': 'int', ++ 'size': 'uint64'}} ++ + ## + # @SGXInfo: + # +@@ -350,7 +365,7 @@ + # + # @flc: true if FLC is supported + # +-# @section-size: The EPC section size for guest ++# @sections: The EPC sections info for guest + # + # Since: 6.2 + ## +@@ -359,7 +374,7 @@ + 'sgx1': 'bool', + 'sgx2': 'bool', + 'flc': 'bool', +- 'section-size': 'uint64'}, ++ 'sections': ['SGXEPCSection']}, + 'if': 'TARGET_I386' } + + ## +-- +2.27.0 + diff --git a/SOURCES/kvm-numa-properly-check-if-numa-is-supported.patch b/SOURCES/kvm-numa-properly-check-if-numa-is-supported.patch deleted file mode 100644 index c602256..0000000 --- a/SOURCES/kvm-numa-properly-check-if-numa-is-supported.patch +++ /dev/null @@ -1,81 +0,0 @@ -From e3a1c2ff0d7b930b1782d59d093fd15471d3aee1 Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Thu, 21 May 2020 23:56:46 +0100 -Subject: [PATCH 03/12] numa: properly check if numa is supported - -RH-Author: plai@redhat.com -Message-id: <20200521235655.27141-3-plai@redhat.com> -Patchwork-id: 96732 -O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 02/11] numa: properly check if numa is supported -Bugzilla: 1600217 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Igor Mammedov -RH-Acked-by: Eduardo Habkost - -From: Igor Mammedov - -Commit aa57020774b, by mistake used MachineClass::numa_mem_supported -to check if NUMA is supported by machine and also as unrelated change -set it to true for sbsa-ref board. - -Luckily change didn't break machines that support NUMA, as the field -is set to true for them. - -But the field is not intended for checking if NUMA is supported and -will be flipped to false within this release for new machine types. - -Fix it: - - by using previously used condition - !mc->cpu_index_to_instance_props || !mc->get_default_cpu_node_id - the first time and then use MachineState::numa_state down the road - to check if NUMA is supported - - dropping stray sbsa-ref chunk - -Fixes: aa57020774b690a22be72453b8e91c9b5a68c516 -Signed-off-by: Igor Mammedov -Message-Id: <1576154936-178362-3-git-send-email-imammedo@redhat.com> -Signed-off-by: Eduardo Habkost -(cherry picked from commit fcd3f2cc124600385dba46c69a80626985c15b50) -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/sbsa-ref.c | 1 - - hw/core/machine.c | 4 ++-- - 2 files changed, 2 insertions(+), 3 deletions(-) - -diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c -index 27046cc..c6261d4 100644 ---- a/hw/arm/sbsa-ref.c -+++ b/hw/arm/sbsa-ref.c -@@ -791,7 +791,6 @@ static void sbsa_ref_class_init(ObjectClass *oc, void *data) - mc->possible_cpu_arch_ids = sbsa_ref_possible_cpu_arch_ids; - mc->cpu_index_to_instance_props = sbsa_ref_cpu_index_to_props; - mc->get_default_cpu_node_id = sbsa_ref_get_default_cpu_node_id; -- mc->numa_mem_supported = true; - } - - static const TypeInfo sbsa_ref_info = { -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 5a025d1..19c78c6 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -1128,7 +1128,7 @@ static void machine_initfn(Object *obj) - NULL); - } - -- if (mc->numa_mem_supported) { -+ if (mc->cpu_index_to_instance_props && mc->get_default_cpu_node_id) { - ms->numa_state = g_new0(NumaState, 1); - } - -@@ -1272,7 +1272,7 @@ void machine_run_board_init(MachineState *machine) - { - MachineClass *machine_class = MACHINE_GET_CLASS(machine); - -- if (machine_class->numa_mem_supported) { -+ if (machine->numa_state) { - numa_complete_configuration(machine); - if (machine->numa_state->num_nodes) { - machine_numa_finish_cpu_init(machine); --- -1.8.3.1 - diff --git a/SOURCES/kvm-numa-remove-not-needed-check.patch b/SOURCES/kvm-numa-remove-not-needed-check.patch deleted file mode 100644 index cbe677f..0000000 --- a/SOURCES/kvm-numa-remove-not-needed-check.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 348115bbd0d60fada6f7d9fa27848044690a4bc3 Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Thu, 21 May 2020 23:56:45 +0100 -Subject: [PATCH 02/12] numa: remove not needed check - -RH-Author: plai@redhat.com -Message-id: <20200521235655.27141-2-plai@redhat.com> -Patchwork-id: 96738 -O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 01/11] numa: remove not needed check -Bugzilla: 1600217 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Igor Mammedov -RH-Acked-by: Eduardo Habkost - -From: Igor Mammedov - -Currently parse_numa_node() is always called from already numa -enabled context. -Drop unnecessary check if numa is supported. - -Signed-off-by: Igor Mammedov -Message-Id: <1576154936-178362-2-git-send-email-imammedo@redhat.com> -Signed-off-by: Eduardo Habkost -(cherry picked from commit 5275db59aa7ff8a26bd6aa5d07cb4d53de5cfab5) -Signed-off-by: Paul Lai -Signed-off-by: Danilo C. L. de Paula ---- - hw/core/numa.c | 7 +------ - 1 file changed, 1 insertion(+), 6 deletions(-) - -diff --git a/hw/core/numa.c b/hw/core/numa.c -index e3332a9..19f082d 100644 ---- a/hw/core/numa.c -+++ b/hw/core/numa.c -@@ -83,10 +83,6 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node, - return; - } - -- if (!mc->cpu_index_to_instance_props || !mc->get_default_cpu_node_id) { -- error_setg(errp, "NUMA is not supported by this machine-type"); -- return; -- } - for (cpus = node->cpus; cpus; cpus = cpus->next) { - CpuInstanceProperties props; - if (cpus->value >= max_cpus) { -@@ -178,9 +174,8 @@ void parse_numa_distance(MachineState *ms, NumaDistOptions *dist, Error **errp) - void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp) - { - Error *err = NULL; -- MachineClass *mc = MACHINE_GET_CLASS(ms); - -- if (!mc->numa_mem_supported) { -+ if (!ms->numa_state) { - error_setg(errp, "NUMA is not supported by this machine-type"); - goto end; - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch b/SOURCES/kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch deleted file mode 100644 index 008874f..0000000 --- a/SOURCES/kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch +++ /dev/null @@ -1,250 +0,0 @@ -From aac48d07764ce73c2ba23e3f05ccd29db190024a Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Thu, 8 Oct 2020 11:06:43 -0400 -Subject: [PATCH 04/14] nvram: Exit QEMU if NVRAM cannot contain all -prom-env - data - -RH-Author: Greg Kurz -Message-id: <20201008110643.155902-2-gkurz@redhat.com> -Patchwork-id: 98577 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] nvram: Exit QEMU if NVRAM cannot contain all -prom-env data -Bugzilla: 1874780 -RH-Acked-by: David Gibson -RH-Acked-by: Laurent Vivier -RH-Acked-by: Thomas Huth - -From: Greg Kurz - -Since commit 61f20b9dc5b7 ("spapr_nvram: Pre-initialize the NVRAM to -support the -prom-env parameter"), pseries machines can pre-initialize -the "system" partition in the NVRAM with the data passed to all -prom-env -parameters on the QEMU command line. - -In this case it is assumed that all the data fits in 64 KiB, but the user -can easily pass more and crash QEMU: - -$ qemu-system-ppc64 -M pseries $(for ((x=0;x<128;x++)); do \ - echo -n " -prom-env " ; printf "%0.sx" {1..1024}; \ - done) # this requires ~128 Kib -malloc(): corrupted top size -Aborted (core dumped) - -This happens because we don't check if all the prom-env data fits in -the NVRAM and chrp_nvram_set_var() happily memcpy() it passed the -buffer. - -This crash affects basically all ppc/ppc64 machine types that use -prom-env: -- pseries (all versions) -- g3beige -- mac99 - -and also sparc/sparc64 machine types: -- LX -- SPARCClassic -- SPARCbook -- SS-10 -- SS-20 -- SS-4 -- SS-5 -- SS-600MP -- Voyager -- sun4u -- sun4v - -Add a max_len argument to chrp_nvram_create_system_partition() so that -it can check the available size before writing to memory. - -Since NVRAM is populated at machine init, it seems reasonable to consider -this error as fatal. So, instead of reporting an error when we detect that -the NVRAM is too small and adapt all machine types to handle it, we simply -exit QEMU in all cases. This is still better than crashing. If someone -wants another behavior, I guess this can be reworked later. - -Tested with: - -$ yes q | \ - (for arch in ppc ppc64 sparc sparc64; do \ - echo == $arch ==; \ - qemu=${arch}-softmmu/qemu-system-$arch; \ - for mach in $($qemu -M help | awk '! /^Supported/ { print $1 }'); do \ - echo $mach; \ - $qemu -M $mach -monitor stdio -nodefaults -nographic \ - $(for ((x=0;x<128;x++)); do \ - echo -n " -prom-env " ; printf "%0.sx" {1..1024}; \ - done) >/dev/null; \ - done; echo; \ - done) - -Without the patch, affected machine types cause QEMU to report some -memory corruption and crash: - -malloc(): corrupted top size - -free(): invalid size - -*** stack smashing detected ***: terminated - -With the patch, QEMU prints the following message and exits: - -NVRAM is too small. Try to pass less data to -prom-env - -It seems that the conditions for the crash have always existed, but it -affects pseries, the machine type I care for, since commit 61f20b9dc5b7 -only. - -Fixes: 61f20b9dc5b7 ("spapr_nvram: Pre-initialize the NVRAM to support the -prom-env parameter") -RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1867739 -Reported-by: John Snow -Reviewed-by: Laurent Vivier -Signed-off-by: Greg Kurz -Message-Id: <159736033937.350502.12402444542194031035.stgit@bahia.lan> -Signed-off-by: David Gibson -(cherry picked from commit 37035df51eaabb8d26b71da75b88a1c6727de8fa) -Signed-off-by: Greg Kurz -Signed-off-by: Danilo C. L. de Paula ---- - hw/nvram/chrp_nvram.c | 24 +++++++++++++++++++++--- - hw/nvram/mac_nvram.c | 2 +- - hw/nvram/spapr_nvram.c | 3 ++- - hw/sparc/sun4m.c | 2 +- - hw/sparc64/sun4u.c | 2 +- - include/hw/nvram/chrp_nvram.h | 3 ++- - 6 files changed, 28 insertions(+), 8 deletions(-) - -diff --git a/hw/nvram/chrp_nvram.c b/hw/nvram/chrp_nvram.c -index d969f26704..d4d10a7c03 100644 ---- a/hw/nvram/chrp_nvram.c -+++ b/hw/nvram/chrp_nvram.c -@@ -21,14 +21,21 @@ - - #include "qemu/osdep.h" - #include "qemu/cutils.h" -+#include "qemu/error-report.h" - #include "hw/nvram/chrp_nvram.h" - #include "sysemu/sysemu.h" - --static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str) -+static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str, -+ int max_len) - { - int len; - - len = strlen(str) + 1; -+ -+ if (max_len < len) { -+ return -1; -+ } -+ - memcpy(&nvram[addr], str, len); - - return addr + len; -@@ -38,19 +45,26 @@ static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str) - * Create a "system partition", used for the Open Firmware - * environment variables. - */ --int chrp_nvram_create_system_partition(uint8_t *data, int min_len) -+int chrp_nvram_create_system_partition(uint8_t *data, int min_len, int max_len) - { - ChrpNvramPartHdr *part_header; - unsigned int i; - int end; - -+ if (max_len < sizeof(*part_header)) { -+ goto fail; -+ } -+ - part_header = (ChrpNvramPartHdr *)data; - part_header->signature = CHRP_NVPART_SYSTEM; - pstrcpy(part_header->name, sizeof(part_header->name), "system"); - - end = sizeof(ChrpNvramPartHdr); - for (i = 0; i < nb_prom_envs; i++) { -- end = chrp_nvram_set_var(data, end, prom_envs[i]); -+ end = chrp_nvram_set_var(data, end, prom_envs[i], max_len - end); -+ if (end == -1) { -+ goto fail; -+ } - } - - /* End marker */ -@@ -65,6 +79,10 @@ int chrp_nvram_create_system_partition(uint8_t *data, int min_len) - chrp_nvram_finish_partition(part_header, end); - - return end; -+ -+fail: -+ error_report("NVRAM is too small. Try to pass less data to -prom-env"); -+ exit(EXIT_FAILURE); - } - - /** -diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c -index 9a47e35b8e..ecfb36182f 100644 ---- a/hw/nvram/mac_nvram.c -+++ b/hw/nvram/mac_nvram.c -@@ -152,7 +152,7 @@ static void pmac_format_nvram_partition_of(MacIONVRAMState *nvr, int off, - - /* OpenBIOS nvram variables partition */ - sysp_end = chrp_nvram_create_system_partition(&nvr->data[off], -- DEF_SYSTEM_SIZE) + off; -+ DEF_SYSTEM_SIZE, len) + off; - - /* Free space partition */ - chrp_nvram_create_free_partition(&nvr->data[sysp_end], len - sysp_end); -diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c -index 838082b451..225cd69b49 100644 ---- a/hw/nvram/spapr_nvram.c -+++ b/hw/nvram/spapr_nvram.c -@@ -188,7 +188,8 @@ static void spapr_nvram_realize(SpaprVioDevice *dev, Error **errp) - } - } else if (nb_prom_envs > 0) { - /* Create a system partition to pass the -prom-env variables */ -- chrp_nvram_create_system_partition(nvram->buf, MIN_NVRAM_SIZE / 4); -+ chrp_nvram_create_system_partition(nvram->buf, MIN_NVRAM_SIZE / 4, -+ nvram->size); - chrp_nvram_create_free_partition(&nvram->buf[MIN_NVRAM_SIZE / 4], - nvram->size - MIN_NVRAM_SIZE / 4); - } -diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c -index 2aaa5bf1ae..cf2d0762d9 100644 ---- a/hw/sparc/sun4m.c -+++ b/hw/sparc/sun4m.c -@@ -142,7 +142,7 @@ static void nvram_init(Nvram *nvram, uint8_t *macaddr, - memset(image, '\0', sizeof(image)); - - /* OpenBIOS nvram variables partition */ -- sysp_end = chrp_nvram_create_system_partition(image, 0); -+ sysp_end = chrp_nvram_create_system_partition(image, 0, 0x1fd0); - - /* Free space partition */ - chrp_nvram_create_free_partition(&image[sysp_end], 0x1fd0 - sysp_end); -diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c -index 955082773b..f5295a687e 100644 ---- a/hw/sparc64/sun4u.c -+++ b/hw/sparc64/sun4u.c -@@ -137,7 +137,7 @@ static int sun4u_NVRAM_set_params(Nvram *nvram, uint16_t NVRAM_size, - memset(image, '\0', sizeof(image)); - - /* OpenBIOS nvram variables partition */ -- sysp_end = chrp_nvram_create_system_partition(image, 0); -+ sysp_end = chrp_nvram_create_system_partition(image, 0, 0x1fd0); - - /* Free space partition */ - chrp_nvram_create_free_partition(&image[sysp_end], 0x1fd0 - sysp_end); -diff --git a/include/hw/nvram/chrp_nvram.h b/include/hw/nvram/chrp_nvram.h -index 09941a9be4..4a0f5c21b8 100644 ---- a/include/hw/nvram/chrp_nvram.h -+++ b/include/hw/nvram/chrp_nvram.h -@@ -50,7 +50,8 @@ chrp_nvram_finish_partition(ChrpNvramPartHdr *header, uint32_t size) - header->checksum = sum & 0xff; - } - --int chrp_nvram_create_system_partition(uint8_t *data, int min_len); -+/* chrp_nvram_create_system_partition() failure is fatal */ -+int chrp_nvram_create_system_partition(uint8_t *data, int min_len, int max_len); - int chrp_nvram_create_free_partition(uint8_t *data, int len); - - #endif --- -2.27.0 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Allow-booting-in-case-the-first-vir.patch b/SOURCES/kvm-pc-bios-s390-ccw-Allow-booting-in-case-the-first-vir.patch deleted file mode 100644 index 270b926..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-Allow-booting-in-case-the-first-vir.patch +++ /dev/null @@ -1,112 +0,0 @@ -From e46aaac6f1ad67753face896e827ad1da920b9e5 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 9 Oct 2020 10:08:47 -0400 -Subject: [PATCH 11/14] pc-bios/s390-ccw: Allow booting in case the first - virtio-blk disk is bad - -RH-Author: Thomas Huth -Message-id: <20201009100849.264994-8-thuth@redhat.com> -Patchwork-id: 98601 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 7/9] pc-bios/s390-ccw: Allow booting in case the first virtio-blk disk is bad -Bugzilla: 1846975 -RH-Acked-by: Jens Freimann -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -If you try to boot with two virtio-blk disks (without bootindex), and -only the second one is bootable, the s390-ccw bios currently stops at -the first disk and does not continue booting from the second one. This -is annoying - and all other major QEMU firmwares succeed to boot from -the second disk in this case, so we should do the same in the s390-ccw -bios, too. - -Reviewed-by: Cornelia Huck -Message-Id: <20200806105349.632-8-thuth@redhat.com> -Signed-off-by: Thomas Huth -(cherry picked from commit 5dc739f343cd06ecb9b058294564ce7504856f3f) -Signed-off-by: Danilo C. L. de Paula ---- - pc-bios/s390-ccw/bootmap.c | 34 +++++++++++++++++++++++----------- - pc-bios/s390-ccw/main.c | 2 +- - 2 files changed, 24 insertions(+), 12 deletions(-) - -diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c -index d13b7cbd15..e91ea719ff 100644 ---- a/pc-bios/s390-ccw/bootmap.c -+++ b/pc-bios/s390-ccw/bootmap.c -@@ -289,11 +289,18 @@ static void ipl_eckd_cdl(void) - read_block(1, ipl2, "Cannot read IPL2 record at block 1"); - - mbr = &ipl2->mbr; -- IPL_assert(magic_match(mbr, ZIPL_MAGIC), "No zIPL section in IPL2 record."); -- IPL_assert(block_size_ok(mbr->blockptr.xeckd.bptr.size), -- "Bad block size in zIPL section of IPL2 record."); -- IPL_assert(mbr->dev_type == DEV_TYPE_ECKD, -- "Non-ECKD device type in zIPL section of IPL2 record."); -+ if (!magic_match(mbr, ZIPL_MAGIC)) { -+ sclp_print("No zIPL section in IPL2 record.\n"); -+ return; -+ } -+ if (!block_size_ok(mbr->blockptr.xeckd.bptr.size)) { -+ sclp_print("Bad block size in zIPL section of IPL2 record.\n"); -+ return; -+ } -+ if (!mbr->dev_type == DEV_TYPE_ECKD) { -+ sclp_print("Non-ECKD device type in zIPL section of IPL2 record.\n"); -+ return; -+ } - - /* save pointer to Boot Map Table */ - bmt_block_nr = eckd_block_num(&mbr->blockptr.xeckd.bptr.chs); -@@ -303,10 +310,14 @@ static void ipl_eckd_cdl(void) - - memset(sec, FREE_SPACE_FILLER, sizeof(sec)); - read_block(2, vlbl, "Cannot read Volume Label at block 2"); -- IPL_assert(magic_match(vlbl->key, VOL1_MAGIC), -- "Invalid magic of volume label block"); -- IPL_assert(magic_match(vlbl->f.key, VOL1_MAGIC), -- "Invalid magic of volser block"); -+ if (!magic_match(vlbl->key, VOL1_MAGIC)) { -+ sclp_print("Invalid magic of volume label block.\n"); -+ return; -+ } -+ if (!magic_match(vlbl->f.key, VOL1_MAGIC)) { -+ sclp_print("Invalid magic of volser block.\n"); -+ return; -+ } - print_volser(vlbl->f.volser); - - run_eckd_boot_script(bmt_block_nr, s1b_block_nr); -@@ -400,7 +411,8 @@ static void ipl_eckd(void) - read_block(0, mbr, "Cannot read block 0 on DASD"); - - if (magic_match(mbr->magic, IPL1_MAGIC)) { -- ipl_eckd_cdl(); /* no return */ -+ ipl_eckd_cdl(); /* only returns in case of error */ -+ return; - } - - /* LDL/CMS? */ -@@ -827,5 +839,5 @@ void zipl_load(void) - panic("\n! Unknown IPL device type !\n"); - } - -- panic("\n* this can never happen *\n"); -+ sclp_print("zIPL load failed.\n"); - } -diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c -index 5c1c98341d..b5c721c395 100644 ---- a/pc-bios/s390-ccw/main.c -+++ b/pc-bios/s390-ccw/main.c -@@ -249,7 +249,7 @@ static void ipl_boot_device(void) - break; - case CU_TYPE_VIRTIO: - if (virtio_setup() == 0) { -- zipl_load(); /* no return */ -+ zipl_load(); /* Only returns in case of errors */ - } - break; - default: --- -2.27.0 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Do-not-bail-out-early-if-not-findin.patch b/SOURCES/kvm-pc-bios-s390-ccw-Do-not-bail-out-early-if-not-findin.patch deleted file mode 100644 index 4a295ca..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-Do-not-bail-out-early-if-not-findin.patch +++ /dev/null @@ -1,214 +0,0 @@ -From 6f44767aeda52048e7c9ee4b5fcc30353c71cbc1 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 9 Oct 2020 10:08:45 -0400 -Subject: [PATCH 09/14] pc-bios/s390-ccw: Do not bail out early if not finding - a SCSI disk - -RH-Author: Thomas Huth -Message-id: <20201009100849.264994-6-thuth@redhat.com> -Patchwork-id: 98599 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 5/9] pc-bios/s390-ccw: Do not bail out early if not finding a SCSI disk -Bugzilla: 1846975 -RH-Acked-by: Jens Freimann -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -In case the user did not specify a boot device, we want to continue -looking for other devices if there are no valid SCSI disks on a virtio- -scsi controller. As a first step, do not panic in this case and let -the control flow carry the error to the upper functions instead. - -Message-Id: <20200806105349.632-6-thuth@redhat.com> -Reviewed-by: Cornelia Huck -Signed-off-by: Thomas Huth -(cherry picked from commit 605751b5a5334e187761b0b8a8266a216897bf70) -Signed-off-by: Danilo C. L. de Paula ---- - pc-bios/s390-ccw/main.c | 14 ++++++++++---- - pc-bios/s390-ccw/s390-ccw.h | 2 +- - pc-bios/s390-ccw/virtio-blkdev.c | 7 +++++-- - pc-bios/s390-ccw/virtio-scsi.c | 28 ++++++++++++++++++++-------- - pc-bios/s390-ccw/virtio-scsi.h | 2 +- - 5 files changed, 37 insertions(+), 16 deletions(-) - -diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c -index d6fd218074..456733fbee 100644 ---- a/pc-bios/s390-ccw/main.c -+++ b/pc-bios/s390-ccw/main.c -@@ -227,7 +227,7 @@ static void find_boot_device(void) - IPL_assert(found, "Boot device not found\n"); - } - --static void virtio_setup(void) -+static int virtio_setup(void) - { - VDev *vdev = virtio_get_device(); - QemuIplParameters *early_qipl = (QemuIplParameters *)QIPL_ADDRESS; -@@ -242,9 +242,14 @@ static void virtio_setup(void) - sclp_print("Network boot device detected\n"); - vdev->netboot_start_addr = qipl.netboot_start_addr; - } else { -- virtio_blk_setup_device(blk_schid); -+ int ret = virtio_blk_setup_device(blk_schid); -+ if (ret) { -+ return ret; -+ } - IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected"); - } -+ -+ return 0; - } - - static void ipl_boot_device(void) -@@ -255,8 +260,9 @@ static void ipl_boot_device(void) - dasd_ipl(blk_schid, cutype); /* no return */ - break; - case CU_TYPE_VIRTIO: -- virtio_setup(); -- zipl_load(); /* no return */ -+ if (virtio_setup() == 0) { -+ zipl_load(); /* no return */ -+ } - break; - default: - print_int("Attempting to boot from unexpected device type", cutype); -diff --git a/pc-bios/s390-ccw/s390-ccw.h b/pc-bios/s390-ccw/s390-ccw.h -index ae432c40b8..e7cf36eb91 100644 ---- a/pc-bios/s390-ccw/s390-ccw.h -+++ b/pc-bios/s390-ccw/s390-ccw.h -@@ -70,7 +70,7 @@ int sclp_read(char *str, size_t count); - unsigned long virtio_load_direct(ulong rec_list1, ulong rec_list2, - ulong subchan_id, void *load_addr); - bool virtio_is_supported(SubChannelId schid); --void virtio_blk_setup_device(SubChannelId schid); -+int virtio_blk_setup_device(SubChannelId schid); - int virtio_read(ulong sector, void *load_addr); - u64 get_clock(void); - ulong get_second(void); -diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c -index 11c56261ca..7d35050292 100644 ---- a/pc-bios/s390-ccw/virtio-blkdev.c -+++ b/pc-bios/s390-ccw/virtio-blkdev.c -@@ -263,9 +263,10 @@ uint64_t virtio_get_blocks(void) - return 0; - } - --void virtio_blk_setup_device(SubChannelId schid) -+int virtio_blk_setup_device(SubChannelId schid) - { - VDev *vdev = virtio_get_device(); -+ int ret = 0; - - vdev->schid = schid; - virtio_setup_ccw(vdev); -@@ -288,9 +289,11 @@ void virtio_blk_setup_device(SubChannelId schid) - "Config: CDB size mismatch"); - - sclp_print("Using virtio-scsi.\n"); -- virtio_scsi_setup(vdev); -+ ret = virtio_scsi_setup(vdev); - break; - default: - panic("\n! No IPL device available !\n"); - } -+ -+ return ret; - } -diff --git a/pc-bios/s390-ccw/virtio-scsi.c b/pc-bios/s390-ccw/virtio-scsi.c -index 4fe4b9d261..88691edb89 100644 ---- a/pc-bios/s390-ccw/virtio-scsi.c -+++ b/pc-bios/s390-ccw/virtio-scsi.c -@@ -192,7 +192,12 @@ static bool scsi_read_capacity(VDev *vdev, - - /* virtio-scsi routines */ - --static void virtio_scsi_locate_device(VDev *vdev) -+/* -+ * Tries to locate a SCSI device and and adds the information for the found -+ * device to the vdev->scsi_device structure. -+ * Returns 0 if SCSI device could be located, or a error code < 0 otherwise -+ */ -+static int virtio_scsi_locate_device(VDev *vdev) - { - const uint16_t channel = 0; /* again, it's what QEMU does */ - uint16_t target; -@@ -218,7 +223,7 @@ static void virtio_scsi_locate_device(VDev *vdev) - IPL_check(sdev->channel == 0, "non-zero channel requested"); - IPL_check(sdev->target <= vdev->config.scsi.max_target, "target# high"); - IPL_check(sdev->lun <= vdev->config.scsi.max_lun, "LUN# high"); -- return; -+ return 0; - } - - for (target = 0; target <= vdev->config.scsi.max_target; target++) { -@@ -245,18 +250,20 @@ static void virtio_scsi_locate_device(VDev *vdev) - */ - sdev->lun = r->lun[0].v16[0]; /* it's returned this way */ - debug_print_int("Have to use LUN", sdev->lun); -- return; /* we have to use this device */ -+ return 0; /* we have to use this device */ - } - for (i = 0; i < luns; i++) { - if (r->lun[i].v64) { - /* Look for non-zero LUN - we have where to choose from */ - sdev->lun = r->lun[i].v16[0]; - debug_print_int("Will use LUN", sdev->lun); -- return; /* we have found a device */ -+ return 0; /* we have found a device */ - } - } - } -- panic("\n! Cannot locate virtio-scsi device !\n"); -+ -+ sclp_print("Warning: Could not locate a usable virtio-scsi device\n"); -+ return -ENODEV; - } - - int virtio_scsi_read_many(VDev *vdev, -@@ -320,17 +327,20 @@ static void scsi_parse_capacity_report(void *data, - } - } - --void virtio_scsi_setup(VDev *vdev) -+int virtio_scsi_setup(VDev *vdev) - { - int retry_test_unit_ready = 3; - uint8_t data[256]; - uint32_t data_size = sizeof(data); - ScsiInquiryEvpdPages *evpd = &scsi_inquiry_evpd_pages_response; - ScsiInquiryEvpdBl *evpd_bl = &scsi_inquiry_evpd_bl_response; -- int i; -+ int i, ret; - - vdev->scsi_device = &default_scsi_device; -- virtio_scsi_locate_device(vdev); -+ ret = virtio_scsi_locate_device(vdev); -+ if (ret < 0) { -+ return ret; -+ } - - /* We have to "ping" the device before it becomes readable */ - while (!scsi_test_unit_ready(vdev)) { -@@ -415,4 +425,6 @@ void virtio_scsi_setup(VDev *vdev) - } - scsi_parse_capacity_report(data, &vdev->scsi_last_block, - (uint32_t *) &vdev->scsi_block_size); -+ -+ return 0; - } -diff --git a/pc-bios/s390-ccw/virtio-scsi.h b/pc-bios/s390-ccw/virtio-scsi.h -index 4c4f4bbc31..4b14c2c2f9 100644 ---- a/pc-bios/s390-ccw/virtio-scsi.h -+++ b/pc-bios/s390-ccw/virtio-scsi.h -@@ -67,7 +67,7 @@ static inline bool virtio_scsi_response_ok(const VirtioScsiCmdResp *r) - return r->response == VIRTIO_SCSI_S_OK && r->status == CDB_STATUS_GOOD; - } - --void virtio_scsi_setup(VDev *vdev); -+int virtio_scsi_setup(VDev *vdev); - int virtio_scsi_read_many(VDev *vdev, - ulong sector, void *load_addr, int sec_num); - --- -2.27.0 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Introduce-ENODEV-define-and-remove-.patch b/SOURCES/kvm-pc-bios-s390-ccw-Introduce-ENODEV-define-and-remove-.patch deleted file mode 100644 index 4385267..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-Introduce-ENODEV-define-and-remove-.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 7b3a7cbfc5872e088f13e11f5c38dc5ac80c3330 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 9 Oct 2020 10:08:43 -0400 -Subject: [PATCH 07/14] pc-bios/s390-ccw: Introduce ENODEV define and remove - guards of others - -RH-Author: Thomas Huth -Message-id: <20201009100849.264994-4-thuth@redhat.com> -Patchwork-id: 98597 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 3/9] pc-bios/s390-ccw: Introduce ENODEV define and remove guards of others -Bugzilla: 1846975 -RH-Acked-by: Jens Freimann -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Remove the "#ifndef E..." guards from the defines here - the header -guard S390_CCW_H at the top of the file should avoid double definition, -and if the error code is defined in a different file already, we're in -trouble anyway, then it's better to see the error at compile time instead -of hunting weird behavior during runtime later. -Also define ENODEV - we will use this in a later patch. - -Message-Id: <20200806105349.632-4-thuth@redhat.com> -Reviewed-by: Cornelia Huck -Reviewed-by: Janosch Frank -Signed-off-by: Thomas Huth -(cherry picked from commit f3180b0266386b31deb7bb83fcaea68af7d1bcee) -Signed-off-by: Danilo C. L. de Paula ---- - pc-bios/s390-ccw/s390-ccw.h | 6 ++---- - 1 file changed, 2 insertions(+), 4 deletions(-) - -diff --git a/pc-bios/s390-ccw/s390-ccw.h b/pc-bios/s390-ccw/s390-ccw.h -index 21f27e7990..ae432c40b8 100644 ---- a/pc-bios/s390-ccw/s390-ccw.h -+++ b/pc-bios/s390-ccw/s390-ccw.h -@@ -27,12 +27,10 @@ typedef unsigned long long __u64; - #define false 0 - #define PAGE_SIZE 4096 - --#ifndef EIO - #define EIO 1 --#endif --#ifndef EBUSY - #define EBUSY 2 --#endif -+#define ENODEV 3 -+ - #ifndef NULL - #define NULL 0 - #endif --- -2.27.0 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Makefile-Compile-with-std-gnu99-fwr.patch b/SOURCES/kvm-pc-bios-s390-ccw-Makefile-Compile-with-std-gnu99-fwr.patch deleted file mode 100644 index 8f44646..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-Makefile-Compile-with-std-gnu99-fwr.patch +++ /dev/null @@ -1,60 +0,0 @@ -From eda3b6620e779ff89df46a0fb9022016bffd7f44 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 9 Oct 2020 10:08:41 -0400 -Subject: [PATCH 05/14] pc-bios/s390-ccw/Makefile: Compile with -std=gnu99, - -fwrapv and -fno-common - -RH-Author: Thomas Huth -Message-id: <20201009100849.264994-2-thuth@redhat.com> -Patchwork-id: 98595 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/9] pc-bios/s390-ccw/Makefile: Compile with -std=gnu99, -fwrapv and -fno-common -Bugzilla: 1846975 -RH-Acked-by: Jens Freimann -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -The main QEMU code is compiled with -std=gnu99, -fwrapv and -fno-common. -We should use the same flags for the s390-ccw bios, too, to avoid that -we get different behavior with different compiler versions that changed -their default settings in the course of time (it happened at least with --std=... and -fno-common in the past already). - -While we're at it, also group the other flags here in a little bit nicer -fashion: Move the two "-m" flags out of the "-f" area and specify them on -a separate line. - -Reviewed-by: Claudio Imbrenda -Acked-by: Cornelia Huck -Acked-by: Janosch Frank -Message-Id: <20200806105349.632-2-thuth@redhat.com> -Signed-off-by: Thomas Huth -(cherry picked from commit 4f6a1eb886961f1f9da2d553c4b0e5ef69cd3801) -Conflicts: Simple contextual conflict due to meson reworks in upstream -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - pc-bios/s390-ccw/Makefile | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile -index a048b6b077..e776a2a5ec 100644 ---- a/pc-bios/s390-ccw/Makefile -+++ b/pc-bios/s390-ccw/Makefile -@@ -13,10 +13,11 @@ OBJECTS = start.o main.o bootmap.o jump2ipl.o sclp.o menu.o \ - virtio.o virtio-scsi.o virtio-blkdev.o libc.o cio.o dasd-ipl.o - - QEMU_CFLAGS := $(filter -W%, $(QEMU_CFLAGS)) --QEMU_CFLAGS += -ffreestanding -fno-delete-null-pointer-checks -msoft-float --QEMU_CFLAGS += -march=z900 -fPIE -fno-strict-aliasing --QEMU_CFLAGS += -fno-asynchronous-unwind-tables -+QEMU_CFLAGS += -ffreestanding -fno-delete-null-pointer-checks -fno-common -fPIE -+QEMU_CFLAGS += -fwrapv -fno-strict-aliasing -fno-asynchronous-unwind-tables - QEMU_CFLAGS += $(call cc-option, $(QEMU_CFLAGS), -fno-stack-protector) -+QEMU_CFLAGS += -msoft-float -march=z900 -+QEMU_CFLAGS += -std=gnu99 - LDFLAGS += -Wl,-pie -nostdlib - - build-all: s390-ccw.img s390-netboot.img --- -2.27.0 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Move-ipl-related-code-from-main-int.patch b/SOURCES/kvm-pc-bios-s390-ccw-Move-ipl-related-code-from-main-int.patch deleted file mode 100644 index bbeac9e..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-Move-ipl-related-code-from-main-int.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 740590240bec03dc6ca208963112d3c2999f353e Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 9 Oct 2020 10:08:42 -0400 -Subject: [PATCH 06/14] pc-bios/s390-ccw: Move ipl-related code from main() - into a separate function - -RH-Author: Thomas Huth -Message-id: <20201009100849.264994-3-thuth@redhat.com> -Patchwork-id: 98596 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 2/9] pc-bios/s390-ccw: Move ipl-related code from main() into a separate function -Bugzilla: 1846975 -RH-Acked-by: Jens Freimann -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Let's move this part of the code into a separate function to be able -to use it from multiple spots later. - -Reviewed-by: Claudio Imbrenda -Reviewed-by: Cornelia Huck -Reviewed-by: Janosch Frank -Message-Id: <20200806105349.632-3-thuth@redhat.com> -Signed-off-by: Thomas Huth -(cherry picked from commit d1f060a8b515a0b1d14c38f2c8f86ab54e79c3dc) -Signed-off-by: Danilo C. L. de Paula ---- - pc-bios/s390-ccw/main.c | 20 ++++++++++++-------- - 1 file changed, 12 insertions(+), 8 deletions(-) - -diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c -index 4e65b411e1..5e565be5b1 100644 ---- a/pc-bios/s390-ccw/main.c -+++ b/pc-bios/s390-ccw/main.c -@@ -232,14 +232,8 @@ static void virtio_setup(void) - } - } - --int main(void) -+static void ipl_boot_device(void) - { -- sclp_setup(); -- css_setup(); -- boot_setup(); -- find_boot_device(); -- enable_subchannel(blk_schid); -- - switch (cutype) { - case CU_TYPE_DASD_3990: - case CU_TYPE_DASD_2107: -@@ -251,8 +245,18 @@ int main(void) - break; - default: - print_int("Attempting to boot from unexpected device type", cutype); -- panic(""); -+ panic("\nBoot failed.\n"); - } -+} -+ -+int main(void) -+{ -+ sclp_setup(); -+ css_setup(); -+ boot_setup(); -+ find_boot_device(); -+ enable_subchannel(blk_schid); -+ ipl_boot_device(); - - panic("Failed to load OS from hard disk\n"); - return 0; /* make compiler happy */ --- -2.27.0 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Move-the-inner-logic-of-find_subch-.patch b/SOURCES/kvm-pc-bios-s390-ccw-Move-the-inner-logic-of-find_subch-.patch deleted file mode 100644 index 3aa5dfd..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-Move-the-inner-logic-of-find_subch-.patch +++ /dev/null @@ -1,154 +0,0 @@ -From d90cbb55fe3ec232091a24137cab45419aac8bc5 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 9 Oct 2020 10:08:44 -0400 -Subject: [PATCH 08/14] pc-bios/s390-ccw: Move the inner logic of find_subch() - to a separate function - -RH-Author: Thomas Huth -Message-id: <20201009100849.264994-5-thuth@redhat.com> -Patchwork-id: 98598 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 4/9] pc-bios/s390-ccw: Move the inner logic of find_subch() to a separate function -Bugzilla: 1846975 -RH-Acked-by: Jens Freimann -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Move the code to a separate function to be able to re-use it from a -different spot later. - -Reviewed-by: Claudio Imbrenda -Message-Id: <20200806105349.632-5-thuth@redhat.com> -Reviewed-by: Cornelia Huck -Reviewed-by: Janosch Frank -Signed-off-by: Thomas Huth -(cherry picked from commit d2cf4af1f4af02f6f2d5827d9a06c31690084d3b) -Signed-off-by: Danilo C. L. de Paula ---- - pc-bios/s390-ccw/main.c | 99 ++++++++++++++++++++++++----------------- - 1 file changed, 57 insertions(+), 42 deletions(-) - -diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c -index 5e565be5b1..d6fd218074 100644 ---- a/pc-bios/s390-ccw/main.c -+++ b/pc-bios/s390-ccw/main.c -@@ -60,6 +60,60 @@ unsigned int get_loadparm_index(void) - return atoui(loadparm_str); - } - -+static int is_dev_possibly_bootable(int dev_no, int sch_no) -+{ -+ bool is_virtio; -+ Schib schib; -+ int r; -+ -+ blk_schid.sch_no = sch_no; -+ r = stsch_err(blk_schid, &schib); -+ if (r == 3 || r == -EIO) { -+ return -ENODEV; -+ } -+ if (!schib.pmcw.dnv) { -+ return false; -+ } -+ -+ enable_subchannel(blk_schid); -+ cutype = cu_type(blk_schid); -+ -+ /* -+ * Note: we always have to run virtio_is_supported() here to make -+ * sure that the vdev.senseid data gets pre-initialized correctly -+ */ -+ is_virtio = virtio_is_supported(blk_schid); -+ -+ /* No specific devno given, just return whether the device is possibly bootable */ -+ if (dev_no < 0) { -+ switch (cutype) { -+ case CU_TYPE_VIRTIO: -+ if (is_virtio) { -+ /* -+ * Skip net devices since no IPLB is created and therefore -+ * no network bootloader has been loaded -+ */ -+ if (virtio_get_device_type() != VIRTIO_ID_NET) { -+ return true; -+ } -+ } -+ return false; -+ case CU_TYPE_DASD_3990: -+ case CU_TYPE_DASD_2107: -+ return true; -+ default: -+ return false; -+ } -+ } -+ -+ /* Caller asked for a specific devno */ -+ if (schib.pmcw.dev == dev_no) { -+ return true; -+ } -+ -+ return false; -+} -+ - /* - * Find the subchannel connected to the given device (dev_no) and fill in the - * subchannel information block (schib) with the connected subchannel's info. -@@ -71,53 +125,14 @@ unsigned int get_loadparm_index(void) - */ - static bool find_subch(int dev_no) - { -- Schib schib; - int i, r; -- bool is_virtio; - - for (i = 0; i < 0x10000; i++) { -- blk_schid.sch_no = i; -- r = stsch_err(blk_schid, &schib); -- if ((r == 3) || (r == -EIO)) { -+ r = is_dev_possibly_bootable(dev_no, i); -+ if (r < 0) { - break; - } -- if (!schib.pmcw.dnv) { -- continue; -- } -- -- enable_subchannel(blk_schid); -- cutype = cu_type(blk_schid); -- -- /* -- * Note: we always have to run virtio_is_supported() here to make -- * sure that the vdev.senseid data gets pre-initialized correctly -- */ -- is_virtio = virtio_is_supported(blk_schid); -- -- /* No specific devno given, just return 1st possibly bootable device */ -- if (dev_no < 0) { -- switch (cutype) { -- case CU_TYPE_VIRTIO: -- if (is_virtio) { -- /* -- * Skip net devices since no IPLB is created and therefore -- * no network bootloader has been loaded -- */ -- if (virtio_get_device_type() != VIRTIO_ID_NET) { -- return true; -- } -- } -- continue; -- case CU_TYPE_DASD_3990: -- case CU_TYPE_DASD_2107: -- return true; -- default: -- continue; -- } -- } -- -- /* Caller asked for a specific devno */ -- if (schib.pmcw.dev == dev_no) { -+ if (r == true) { - return true; - } - } --- -2.27.0 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Scan-through-all-devices-if-no-boot.patch b/SOURCES/kvm-pc-bios-s390-ccw-Scan-through-all-devices-if-no-boot.patch deleted file mode 100644 index c8e3017..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-Scan-through-all-devices-if-no-boot.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 911dc631f9ab68c6acfd4b401fbcfaa3b58a4fb6 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 9 Oct 2020 10:08:46 -0400 -Subject: [PATCH 10/14] pc-bios/s390-ccw: Scan through all devices if no boot - device specified - -RH-Author: Thomas Huth -Message-id: <20201009100849.264994-7-thuth@redhat.com> -Patchwork-id: 98600 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 6/9] pc-bios/s390-ccw: Scan through all devices if no boot device specified -Bugzilla: 1846975 -RH-Acked-by: Jens Freimann -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -If no boot device has been specified (via "bootindex=..."), the s390-ccw -bios scans through all devices to find a bootable device. But so far, it -stops at the very first block device (including virtio-scsi controllers -without attached devices) that it finds, no matter whether it is bootable -or not. That leads to some weird situatation where it is e.g. possible -to boot via: - - qemu-system-s390x -hda /path/to/disk.qcow2 - -but not if there is e.g. a virtio-scsi controller specified before: - - qemu-system-s390x -device virtio-scsi -hda /path/to/disk.qcow2 - -While using "bootindex=..." is clearly the preferred way of booting -on s390x, we still can make the life for the users at least a little -bit easier if we look at all available devices to find a bootable one. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1846975 -Reviewed-by: Cornelia Huck -Message-Id: <20200806105349.632-7-thuth@redhat.com> -Signed-off-by: Thomas Huth -(cherry picked from commit 869d0e2f593dd37297c366203f006b9acd1b7b45) -Signed-off-by: Danilo C. L. de Paula ---- - pc-bios/s390-ccw/main.c | 46 +++++++++++++++++++++++++++-------------- - 1 file changed, 31 insertions(+), 15 deletions(-) - -diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c -index 456733fbee..5c1c98341d 100644 ---- a/pc-bios/s390-ccw/main.c -+++ b/pc-bios/s390-ccw/main.c -@@ -191,20 +191,8 @@ static void boot_setup(void) - static void find_boot_device(void) - { - VDev *vdev = virtio_get_device(); -- int ssid; - bool found; - -- if (!have_iplb) { -- for (ssid = 0; ssid < 0x3; ssid++) { -- blk_schid.ssid = ssid; -- found = find_subch(-1); -- if (found) { -- return; -- } -- } -- panic("Could not find a suitable boot device (none specified)\n"); -- } -- - switch (iplb.pbt) { - case S390_IPL_TYPE_CCW: - debug_print_int("device no. ", iplb.ccw.devno); -@@ -270,14 +258,42 @@ static void ipl_boot_device(void) - } - } - -+/* -+ * No boot device has been specified, so we have to scan through the -+ * channels to find one. -+ */ -+static void probe_boot_device(void) -+{ -+ int ssid, sch_no, ret; -+ -+ for (ssid = 0; ssid < 0x3; ssid++) { -+ blk_schid.ssid = ssid; -+ for (sch_no = 0; sch_no < 0x10000; sch_no++) { -+ ret = is_dev_possibly_bootable(-1, sch_no); -+ if (ret < 0) { -+ break; -+ } -+ if (ret == true) { -+ ipl_boot_device(); /* Only returns if unsuccessful */ -+ } -+ } -+ } -+ -+ sclp_print("Could not find a suitable boot device (none specified)\n"); -+} -+ - int main(void) - { - sclp_setup(); - css_setup(); - boot_setup(); -- find_boot_device(); -- enable_subchannel(blk_schid); -- ipl_boot_device(); -+ if (have_iplb) { -+ find_boot_device(); -+ enable_subchannel(blk_schid); -+ ipl_boot_device(); -+ } else { -+ probe_boot_device(); -+ } - - panic("Failed to load OS from hard disk\n"); - return 0; /* make compiler happy */ --- -2.27.0 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-break-loop-if-a-null-block-number-i.patch b/SOURCES/kvm-pc-bios-s390-ccw-break-loop-if-a-null-block-number-i.patch deleted file mode 100644 index 414cc13..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-break-loop-if-a-null-block-number-i.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 56ae2d8a1ee3a35e2eed4f4baa61f97184189b47 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 18 May 2021 13:51:24 -0400 -Subject: [PATCH 4/5] pc-bios/s390-ccw: break loop if a null block number is - reached -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20210518135125.191329-3-thuth@redhat.com> -Patchwork-id: 101549 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 2/3] pc-bios/s390-ccw: break loop if a null block number is reached -Bugzilla: 1942880 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Break the loop if `cur_block_nr` is a null block number because this -means that the end of chunk is reached. In this case we will try to -boot the default entry. - -Fixes: ba831b25262a ("s390-ccw: read stage2 boot loader data to find menu") -Reviewed-by: Collin Walling -Signed-off-by: Marc Hartmayer -Message-Id: <20200924085926.21709-3-mhartmay@linux.ibm.com> -Signed-off-by: Thomas Huth -(cherry picked from commit 468184ec9024f4f7b55247f70ec57554e8a500d7) -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - pc-bios/s390-ccw/bootmap.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c -index bb6e003270..624f524331 100644 ---- a/pc-bios/s390-ccw/bootmap.c -+++ b/pc-bios/s390-ccw/bootmap.c -@@ -192,7 +192,7 @@ static int eckd_get_boot_menu_index(block_number_t s1b_block_nr) - for (i = 0; i < STAGE2_BLK_CNT_MAX; i++) { - cur_block_nr = eckd_block_num(&s1b->seek[i].chs); - -- if (!cur_block_nr) { -+ if (!cur_block_nr || is_null_block_number(cur_block_nr)) { - break; - } - --- -2.27.0 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-don-t-try-to-read-the-next-block-if.patch b/SOURCES/kvm-pc-bios-s390-ccw-don-t-try-to-read-the-next-block-if.patch deleted file mode 100644 index 2597118..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-don-t-try-to-read-the-next-block-if.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 52ba1903b2c8ce69e8cd1de2a78c2c63cc60383b Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 18 May 2021 13:51:25 -0400 -Subject: [PATCH 5/5] pc-bios/s390-ccw: don't try to read the next block if end - of chunk is reached -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20210518135125.191329-4-thuth@redhat.com> -Patchwork-id: 101550 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 3/3] pc-bios/s390-ccw: don't try to read the next block if end of chunk is reached -Bugzilla: 1942880 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Don't read the block if a null block number is reached, because this means that -the end of chunk is reached. - -Reviewed-by: Collin Walling -Signed-off-by: Marc Hartmayer -Message-Id: <20210416074736.17409-1-mhartmay@linux.ibm.com> -Signed-off-by: Thomas Huth -(cherry picked from commit a6625d38cce3901a7c1cba069f0abcf743a293f1) -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - pc-bios/s390-ccw/bootmap.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c -index 624f524331..8458b15cb6 100644 ---- a/pc-bios/s390-ccw/bootmap.c -+++ b/pc-bios/s390-ccw/bootmap.c -@@ -212,7 +212,7 @@ static int eckd_get_boot_menu_index(block_number_t s1b_block_nr) - next_block_nr = eckd_block_num(&s1b->seek[i + 1].chs); - } - -- if (next_block_nr) { -+ if (next_block_nr && !is_null_block_number(next_block_nr)) { - read_block(next_block_nr, s2_next_blk, - "Cannot read stage2 boot loader"); - } --- -2.27.0 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-fix-off-by-one-error.patch b/SOURCES/kvm-pc-bios-s390-ccw-fix-off-by-one-error.patch deleted file mode 100644 index 691bed4..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-fix-off-by-one-error.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 0e9bdb960045f98d70f765bbb585f1647e5fea08 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 18 May 2021 13:51:23 -0400 -Subject: [PATCH 3/5] pc-bios/s390-ccw: fix off-by-one error -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20210518135125.191329-2-thuth@redhat.com> -Patchwork-id: 101548 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 1/3] pc-bios/s390-ccw: fix off-by-one error -Bugzilla: 1942880 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -This error takes effect when the magic value "zIPL" is located at the -end of a block. For example if s2_cur_blk = 0x7fe18000 and the magic -value "zIPL" is located at 0x7fe18ffc - 0x7fe18fff. - -Fixes: ba831b25262a ("s390-ccw: read stage2 boot loader data to find menu") -Reviewed-by: Collin Walling -Signed-off-by: Marc Hartmayer -Message-Id: <20200924085926.21709-2-mhartmay@linux.ibm.com> -Reviewed-by: Thomas Huth -[thuth: Use "<= ... - 4" instead of "< ... - 3"] -Signed-off-by: Thomas Huth -(cherry picked from commit 5f97ba0c74ccace0a4014460de9751ff3c6f454a) -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - pc-bios/s390-ccw/bootmap.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c -index e91ea719ff..bb6e003270 100644 ---- a/pc-bios/s390-ccw/bootmap.c -+++ b/pc-bios/s390-ccw/bootmap.c -@@ -163,7 +163,7 @@ static bool find_zipl_boot_menu_banner(int *offset) - int i; - - /* Menu banner starts with "zIPL" */ -- for (i = 0; i < virtio_get_block_size() - 4; i++) { -+ for (i = 0; i <= virtio_get_block_size() - 4; i++) { - if (magic_match(s2_cur_blk + i, ZIPL_MAGIC_EBCDIC)) { - *offset = i; - return true; --- -2.27.0 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-main-Remove-superfluous-call-to-ena.patch b/SOURCES/kvm-pc-bios-s390-ccw-main-Remove-superfluous-call-to-ena.patch deleted file mode 100644 index cf1466a..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-main-Remove-superfluous-call-to-ena.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 541d06b7dc1cd3ad4722850f3a7f5df12b8d6fba Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 9 Oct 2020 10:08:48 -0400 -Subject: [PATCH 12/14] pc-bios/s390-ccw/main: Remove superfluous call to - enable_subchannel() - -RH-Author: Thomas Huth -Message-id: <20201009100849.264994-9-thuth@redhat.com> -Patchwork-id: 98602 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 8/9] pc-bios/s390-ccw/main: Remove superfluous call to enable_subchannel() -Bugzilla: 1846975 -RH-Acked-by: Jens Freimann -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -enable_subchannel() is already done during is_dev_possibly_bootable() -(which is called from find_boot_device() -> find_subch()), so there -is no need to do this again in the main() function. - -Message-Id: <20200806105349.632-9-thuth@redhat.com> -Reviewed-by: Cornelia Huck -Signed-off-by: Thomas Huth -(cherry picked from commit 49d4388ec03fd8c7701b907a4e11c437a28f8572) -Signed-off-by: Danilo C. L. de Paula ---- - pc-bios/s390-ccw/main.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c -index b5c721c395..e3a1a3053d 100644 ---- a/pc-bios/s390-ccw/main.c -+++ b/pc-bios/s390-ccw/main.c -@@ -289,7 +289,6 @@ int main(void) - boot_setup(); - if (have_iplb) { - find_boot_device(); -- enable_subchannel(blk_schid); - ipl_boot_device(); - } else { - probe_boot_device(); --- -2.27.0 - diff --git a/SOURCES/kvm-pc-bios-s390x-Clear-out-leftover-S390EP-string.patch b/SOURCES/kvm-pc-bios-s390x-Clear-out-leftover-S390EP-string.patch deleted file mode 100644 index 8334b7b..0000000 --- a/SOURCES/kvm-pc-bios-s390x-Clear-out-leftover-S390EP-string.patch +++ /dev/null @@ -1,87 +0,0 @@ -From c6f62870f27ece45e944d1818f6aa04b3e024959 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Thu, 10 Dec 2020 08:32:41 -0500 -Subject: [PATCH 5/5] pc-bios: s390x: Clear out leftover S390EP string - -RH-Author: Thomas Huth -Message-id: <20201210083241.173509-5-thuth@redhat.com> -Patchwork-id: 100369 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 4/4] pc-bios: s390x: Clear out leftover S390EP string -Bugzilla: 1903135 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Jens Freimann -RH-Acked-by: David Hildenbrand - -From: Eric Farman - -A Linux binary will have the string "S390EP" at address 0x10008, -which is important in getting the guest up off the ground. In the -case of a reboot (specifically chreipl going to a new device), -we should defer to the PSW at address zero for the new config, -which will re-write "S390EP" from the new image. - -Let's clear it out at this point so that a reipl to, say, a DASD -passthrough device drives the IPL path from scratch without disrupting -disrupting the order of operations for other boots. - -Rather than hardcoding the address of this magic (again), let's -define it somewhere so that the two users are visibly related. - -Signed-off-by: Eric Farman -Message-Id: <20201120160117.59366-3-farman@linux.ibm.com> -Signed-off-by: Thomas Huth -(cherry picked from commit 3d6519968bb10260fc724c491fb4275f7c0b78ac) -Signed-off-by: Danilo C. L. de Paula ---- - pc-bios/s390-ccw/jump2ipl.c | 2 +- - pc-bios/s390-ccw/main.c | 6 ++++++ - pc-bios/s390-ccw/s390-arch.h | 3 +++ - 3 files changed, 10 insertions(+), 1 deletion(-) - -diff --git a/pc-bios/s390-ccw/jump2ipl.c b/pc-bios/s390-ccw/jump2ipl.c -index 767012bf0c9..6c6823b5db8 100644 ---- a/pc-bios/s390-ccw/jump2ipl.c -+++ b/pc-bios/s390-ccw/jump2ipl.c -@@ -78,7 +78,7 @@ void jump_to_low_kernel(void) - * kernel start address (when jumping to the PSW-at-zero address instead, - * the kernel startup code fails when we booted from a network device). - */ -- if (!memcmp((char *)0x10008, "S390EP", 6)) { -+ if (!memcmp((char *)S390EP, "S390EP", 6)) { - jump_to_IPL_code(KERN_IMAGE_START); - } - -diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c -index e3a1a3053d0..c04b910082b 100644 ---- a/pc-bios/s390-ccw/main.c -+++ b/pc-bios/s390-ccw/main.c -@@ -185,6 +185,12 @@ static void boot_setup(void) - memcpy(lpmsg + 10, loadparm_str, 8); - sclp_print(lpmsg); - -+ /* -+ * Clear out any potential S390EP magic (see jump_to_low_kernel()), -+ * so we don't taint our decision-making process during a reboot. -+ */ -+ memset((char *)S390EP, 0, 6); -+ - have_iplb = store_iplb(&iplb); - } - -diff --git a/pc-bios/s390-ccw/s390-arch.h b/pc-bios/s390-ccw/s390-arch.h -index 6da44d4436c..a741488aaa1 100644 ---- a/pc-bios/s390-ccw/s390-arch.h -+++ b/pc-bios/s390-ccw/s390-arch.h -@@ -95,6 +95,9 @@ typedef struct LowCore { - - extern LowCore *lowcore; - -+/* Location of "S390EP" in a Linux binary (see arch/s390/boot/head.S) */ -+#define S390EP 0x10008 -+ - static inline void set_prefix(uint32_t address) - { - asm volatile("spx %0" : : "m" (address) : "memory"); --- -2.27.0 - diff --git a/SOURCES/kvm-pc-bios-s390x-Ensure-Read-IPL-memory-is-clean.patch b/SOURCES/kvm-pc-bios-s390x-Ensure-Read-IPL-memory-is-clean.patch deleted file mode 100644 index 9d09be3..0000000 --- a/SOURCES/kvm-pc-bios-s390x-Ensure-Read-IPL-memory-is-clean.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 6b19062226ecebf63d2d0b0ff05b5bcfa7a05818 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Thu, 10 Dec 2020 08:32:40 -0500 -Subject: [PATCH 4/5] pc-bios: s390x: Ensure Read IPL memory is clean - -RH-Author: Thomas Huth -Message-id: <20201210083241.173509-4-thuth@redhat.com> -Patchwork-id: 100372 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 3/4] pc-bios: s390x: Ensure Read IPL memory is clean -Bugzilla: 1903135 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Jens Freimann -RH-Acked-by: David Hildenbrand - -From: Eric Farman - -If, for example, we boot off a virtio device and chreipl to a vfio-ccw -device, the space at lowcore will be non-zero. We build a Read IPL CCW -at address zero, but it will have leftover PSW data that will conflict -with the Format-0 CCW being generated: - -0x0: 00080000 80010000 - ------ Ccw0.cda - -- Ccw0.chainData - -- Reserved bits - -The data address will be overwritten with the correct value (0x0), but -the apparent data chain bit will cause subsequent memory to be used as -the target of the data store, which may not be where we expect (0x0). - -Clear out this space when we boot from DASD, so that we know it exists -exactly as we expect. - -Signed-off-by: Eric Farman -Reviewed-by: Jason J. Herne -Reviewed-by: Janosch Frank -Acked-by: Christian Borntraeger -Acked-by: Cornelia Huck -Message-Id: <20201120160117.59366-2-farman@linux.ibm.com> -Signed-off-by: Thomas Huth -(cherry picked from commit d8e5bbdd0d6fa8d9b5ac15de62c87105d92ff558) -Signed-off-by: Danilo C. L. de Paula ---- - pc-bios/s390-ccw/dasd-ipl.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/pc-bios/s390-ccw/dasd-ipl.c b/pc-bios/s390-ccw/dasd-ipl.c -index 0fc879bb8e8..71cbae2f16e 100644 ---- a/pc-bios/s390-ccw/dasd-ipl.c -+++ b/pc-bios/s390-ccw/dasd-ipl.c -@@ -100,6 +100,9 @@ static void make_readipl(void) - { - Ccw0 *ccwIplRead = (Ccw0 *)0x00; - -+ /* Clear out any existing data */ -+ memset(ccwIplRead, 0, sizeof(Ccw0)); -+ - /* Create Read IPL ccw at address 0 */ - ccwIplRead->cmd_code = CCW_CMD_READ_IPL; - ccwIplRead->cda = 0x00; /* Read into address 0x00 in main memory */ --- -2.27.0 - diff --git a/SOURCES/kvm-pc-bios-s390x-Fix-reset-psw-mask.patch b/SOURCES/kvm-pc-bios-s390x-Fix-reset-psw-mask.patch deleted file mode 100644 index 9c45e92..0000000 --- a/SOURCES/kvm-pc-bios-s390x-Fix-reset-psw-mask.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 38ba55dd27a3b8308f0ce2e82a4c3eba3f197d20 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:53:53 -0400 -Subject: [PATCH 11/42] pc-bios/s390x: Fix reset psw mask - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-12-thuth@redhat.com> -Patchwork-id: 97034 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 11/38] pc-bios/s390x: Fix reset psw mask -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -We need to set the short psw indication bit in the reset psw, as it is -a short psw. - -Exposed by "s390x: Properly fetch and test the short psw on diag308 -subc 0/1". - -Fixes: 962982329029 ("pc-bios/s390-ccw: do a subsystem reset before running the guest") -Signed-off-by: Janosch Frank -Message-Id: <20191203132813.2734-5-frankja@linux.ibm.com> -Acked-by: Christian Borntraeger -Signed-off-by: Cornelia Huck -(cherry picked from commit 5c6f0d5f46a77d77460dfb518cf1e1e4145c276e) -Signed-off-by: Danilo C. L. de Paula ---- - pc-bios/s390-ccw/jump2ipl.c | 12 +++++++----- - 1 file changed, 7 insertions(+), 5 deletions(-) - -diff --git a/pc-bios/s390-ccw/jump2ipl.c b/pc-bios/s390-ccw/jump2ipl.c -index 266f1502b9..da13c43cc0 100644 ---- a/pc-bios/s390-ccw/jump2ipl.c -+++ b/pc-bios/s390-ccw/jump2ipl.c -@@ -12,11 +12,11 @@ - #define KERN_IMAGE_START 0x010000UL - #define PSW_MASK_64 0x0000000100000000ULL - #define PSW_MASK_32 0x0000000080000000ULL --#define IPL_PSW_MASK (PSW_MASK_32 | PSW_MASK_64) -+#define PSW_MASK_SHORTPSW 0x0008000000000000ULL -+#define RESET_PSW_MASK (PSW_MASK_SHORTPSW | PSW_MASK_32 | PSW_MASK_64) - - typedef struct ResetInfo { -- uint32_t ipl_mask; -- uint32_t ipl_addr; -+ uint64_t ipl_psw; - uint32_t ipl_continue; - } ResetInfo; - -@@ -50,7 +50,9 @@ void jump_to_IPL_code(uint64_t address) - ResetInfo *current = 0; - - save = *current; -- current->ipl_addr = (uint32_t) (uint64_t) &jump_to_IPL_2; -+ -+ current->ipl_psw = (uint64_t) &jump_to_IPL_2; -+ current->ipl_psw |= RESET_PSW_MASK; - current->ipl_continue = address & 0x7fffffff; - - debug_print_int("set IPL addr to", current->ipl_continue); -@@ -82,7 +84,7 @@ void jump_to_low_kernel(void) - } - - /* Trying to get PSW at zero address */ -- if (*((uint64_t *)0) & IPL_PSW_MASK) { -+ if (*((uint64_t *)0) & RESET_PSW_MASK) { - jump_to_IPL_code((*((uint64_t *)0)) & 0x7fffffff); - } - --- -2.27.0 - diff --git a/SOURCES/kvm-pc-bios-s390x-Rename-PSW_MASK_ZMODE-to-PSW_MASK_64.patch b/SOURCES/kvm-pc-bios-s390x-Rename-PSW_MASK_ZMODE-to-PSW_MASK_64.patch deleted file mode 100644 index 8ba4530..0000000 --- a/SOURCES/kvm-pc-bios-s390x-Rename-PSW_MASK_ZMODE-to-PSW_MASK_64.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 494ce6ed658a806af36d4f50600e44740a446011 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Thu, 10 Dec 2020 08:32:38 -0500 -Subject: [PATCH 2/5] pc-bios: s390x: Rename PSW_MASK_ZMODE to PSW_MASK_64 - -RH-Author: Thomas Huth -Message-id: <20201210083241.173509-2-thuth@redhat.com> -Patchwork-id: 100370 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/4] pc-bios: s390x: Rename PSW_MASK_ZMODE to PSW_MASK_64 -Bugzilla: 1903135 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Jens Freimann -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -This constant enables 64 bit addressing, not the ESAME architecture, -so it shouldn't be named ZMODE. - -Signed-off-by: Janosch Frank -Reviewed-by: Thomas Huth -Message-Id: <20200624075226.92728-7-frankja@linux.ibm.com> -Signed-off-by: Thomas Huth -(cherry picked from commit b88faa1c899db2fae8b5b168aeb6c47bef090f27) -Signed-off-by: Danilo C. L. de Paula ---- - pc-bios/s390-ccw/s390-arch.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/pc-bios/s390-ccw/s390-arch.h b/pc-bios/s390-ccw/s390-arch.h -index 5f36361c022..73852029d4e 100644 ---- a/pc-bios/s390-ccw/s390-arch.h -+++ b/pc-bios/s390-ccw/s390-arch.h -@@ -29,7 +29,7 @@ _Static_assert(sizeof(struct PSWLegacy) == 8, "PSWLegacy size incorrect"); - #define PSW_MASK_WAIT 0x0002000000000000ULL - #define PSW_MASK_EAMODE 0x0000000100000000ULL - #define PSW_MASK_BAMODE 0x0000000080000000ULL --#define PSW_MASK_ZMODE (PSW_MASK_EAMODE | PSW_MASK_BAMODE) -+#define PSW_MASK_64 (PSW_MASK_EAMODE | PSW_MASK_BAMODE) - - /* Low core mapping */ - typedef struct LowCore { --- -2.27.0 - diff --git a/SOURCES/kvm-pc-bios-s390x-Save-iplb-location-in-lowcore.patch b/SOURCES/kvm-pc-bios-s390x-Save-iplb-location-in-lowcore.patch deleted file mode 100644 index 2db2f93..0000000 --- a/SOURCES/kvm-pc-bios-s390x-Save-iplb-location-in-lowcore.patch +++ /dev/null @@ -1,145 +0,0 @@ -From 8350ad9c0f54519a06ec396c2997330615f4b470 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:53:58 -0400 -Subject: [PATCH 16/42] pc-bios: s390x: Save iplb location in lowcore - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-17-thuth@redhat.com> -Patchwork-id: 97027 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 16/38] pc-bios: s390x: Save iplb location in lowcore -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -The POP states that for a list directed IPL the IPLB is stored into -memory by the machine loader and its address is stored at offset 0x14 -of the lowcore. - -ZIPL currently uses the address in offset 0x14 to access the IPLB and -acquire flags about secure boot. If the IPLB address points into -memory which has an unsupported mix of flags set, ZIPL will panic -instead of booting the OS. - -As the lowcore can have quite a high entropy for a guest that did drop -out of protected mode (i.e. rebooted) we encountered the ZIPL panic -quite often. - -Signed-off-by: Janosch Frank -Tested-by: Marc Hartmayer -Message-Id: <20200304114231.23493-19-frankja@linux.ibm.com> -Reviewed-by: Christian Borntraeger -Reviewed-by: David Hildenbrand -Signed-off-by: Christian Borntraeger -(cherry picked from commit 9bfc04f9ef6802fff0fc77130ff345a541783363) -Signed-off-by: Danilo C. L. de Paula ---- - pc-bios/s390-ccw/jump2ipl.c | 1 + - pc-bios/s390-ccw/main.c | 8 +++++++- - pc-bios/s390-ccw/netmain.c | 1 + - pc-bios/s390-ccw/s390-arch.h | 10 ++++++++-- - pc-bios/s390-ccw/s390-ccw.h | 1 + - 5 files changed, 18 insertions(+), 3 deletions(-) - -diff --git a/pc-bios/s390-ccw/jump2ipl.c b/pc-bios/s390-ccw/jump2ipl.c -index da13c43cc0..4eba2510b0 100644 ---- a/pc-bios/s390-ccw/jump2ipl.c -+++ b/pc-bios/s390-ccw/jump2ipl.c -@@ -35,6 +35,7 @@ void jump_to_IPL_code(uint64_t address) - { - /* store the subsystem information _after_ the bootmap was loaded */ - write_subsystem_identification(); -+ write_iplb_location(); - - /* prevent unknown IPL types in the guest */ - if (iplb.pbt == S390_IPL_TYPE_QEMU_SCSI) { -diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c -index a21b386280..4e65b411e1 100644 ---- a/pc-bios/s390-ccw/main.c -+++ b/pc-bios/s390-ccw/main.c -@@ -9,6 +9,7 @@ - */ - - #include "libc.h" -+#include "helper.h" - #include "s390-arch.h" - #include "s390-ccw.h" - #include "cio.h" -@@ -22,7 +23,7 @@ QemuIplParameters qipl; - IplParameterBlock iplb __attribute__((__aligned__(PAGE_SIZE))); - static bool have_iplb; - static uint16_t cutype; --LowCore const *lowcore; /* Yes, this *is* a pointer to address 0 */ -+LowCore *lowcore; /* Yes, this *is* a pointer to address 0 */ - - #define LOADPARM_PROMPT "PROMPT " - #define LOADPARM_EMPTY " " -@@ -42,6 +43,11 @@ void write_subsystem_identification(void) - *zeroes = 0; - } - -+void write_iplb_location(void) -+{ -+ lowcore->ptr_iplb = ptr2u32(&iplb); -+} -+ - void panic(const char *string) - { - sclp_print(string); -diff --git a/pc-bios/s390-ccw/netmain.c b/pc-bios/s390-ccw/netmain.c -index f2dcc01e27..309ffa30d9 100644 ---- a/pc-bios/s390-ccw/netmain.c -+++ b/pc-bios/s390-ccw/netmain.c -@@ -40,6 +40,7 @@ - #define DEFAULT_TFTP_RETRIES 20 - - extern char _start[]; -+void write_iplb_location(void) {} - - #define KERNEL_ADDR ((void *)0L) - #define KERNEL_MAX_SIZE ((long)_start) -diff --git a/pc-bios/s390-ccw/s390-arch.h b/pc-bios/s390-ccw/s390-arch.h -index 504fc7c2f0..5f36361c02 100644 ---- a/pc-bios/s390-ccw/s390-arch.h -+++ b/pc-bios/s390-ccw/s390-arch.h -@@ -36,7 +36,13 @@ typedef struct LowCore { - /* prefix area: defined by architecture */ - PSWLegacy ipl_psw; /* 0x000 */ - uint32_t ccw1[2]; /* 0x008 */ -- uint32_t ccw2[2]; /* 0x010 */ -+ union { -+ uint32_t ccw2[2]; /* 0x010 */ -+ struct { -+ uint32_t reserved10; -+ uint32_t ptr_iplb; -+ }; -+ }; - uint8_t pad1[0x80 - 0x18]; /* 0x018 */ - uint32_t ext_params; /* 0x080 */ - uint16_t cpu_addr; /* 0x084 */ -@@ -85,7 +91,7 @@ typedef struct LowCore { - PSW io_new_psw; /* 0x1f0 */ - } __attribute__((packed, aligned(8192))) LowCore; - --extern LowCore const *lowcore; -+extern LowCore *lowcore; - - static inline void set_prefix(uint32_t address) - { -diff --git a/pc-bios/s390-ccw/s390-ccw.h b/pc-bios/s390-ccw/s390-ccw.h -index 11bce7d73c..21f27e7990 100644 ---- a/pc-bios/s390-ccw/s390-ccw.h -+++ b/pc-bios/s390-ccw/s390-ccw.h -@@ -57,6 +57,7 @@ void consume_io_int(void); - /* main.c */ - void panic(const char *string); - void write_subsystem_identification(void); -+void write_iplb_location(void); - extern char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE))); - unsigned int get_loadparm_index(void); - --- -2.27.0 - diff --git a/SOURCES/kvm-pc-bios-s390x-Use-PSW-masks-where-possible-and-intro.patch b/SOURCES/kvm-pc-bios-s390x-Use-PSW-masks-where-possible-and-intro.patch deleted file mode 100644 index 576447d..0000000 --- a/SOURCES/kvm-pc-bios-s390x-Use-PSW-masks-where-possible-and-intro.patch +++ /dev/null @@ -1,89 +0,0 @@ -From 35891c9334058c02f3ee83eee1a986802387c18b Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Thu, 10 Dec 2020 08:32:39 -0500 -Subject: [PATCH 3/5] pc-bios: s390x: Use PSW masks where possible and - introduce PSW_MASK_SHORT_ADDR - -RH-Author: Thomas Huth -Message-id: <20201210083241.173509-3-thuth@redhat.com> -Patchwork-id: 100371 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 2/4] pc-bios: s390x: Use PSW masks where possible and introduce PSW_MASK_SHORT_ADDR -Bugzilla: 1903135 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Jens Freimann -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -Let's move some of the PSW mask defines into s390-arch.h and use them -in jump2ipl.c. Also let's introduce a new constant for the address -mask of 8 byte (short) PSWs. - -Signed-off-by: Janosch Frank -Reviewed-by: David Hildenbrand -Reviewed-by: Thomas Huth -Message-Id: <20200624075226.92728-8-frankja@linux.ibm.com> -Signed-off-by: Thomas Huth -(cherry picked from commit fe75c657b8ee962da79f5d3518b139e26dc69c24) -Signed-off-by: Danilo C. L. de Paula ---- - pc-bios/s390-ccw/jump2ipl.c | 10 ++++------ - pc-bios/s390-ccw/s390-arch.h | 2 ++ - 2 files changed, 6 insertions(+), 6 deletions(-) - -diff --git a/pc-bios/s390-ccw/jump2ipl.c b/pc-bios/s390-ccw/jump2ipl.c -index 4eba2510b04..767012bf0c9 100644 ---- a/pc-bios/s390-ccw/jump2ipl.c -+++ b/pc-bios/s390-ccw/jump2ipl.c -@@ -8,12 +8,10 @@ - - #include "libc.h" - #include "s390-ccw.h" -+#include "s390-arch.h" - - #define KERN_IMAGE_START 0x010000UL --#define PSW_MASK_64 0x0000000100000000ULL --#define PSW_MASK_32 0x0000000080000000ULL --#define PSW_MASK_SHORTPSW 0x0008000000000000ULL --#define RESET_PSW_MASK (PSW_MASK_SHORTPSW | PSW_MASK_32 | PSW_MASK_64) -+#define RESET_PSW_MASK (PSW_MASK_SHORTPSW | PSW_MASK_64) - - typedef struct ResetInfo { - uint64_t ipl_psw; -@@ -54,7 +52,7 @@ void jump_to_IPL_code(uint64_t address) - - current->ipl_psw = (uint64_t) &jump_to_IPL_2; - current->ipl_psw |= RESET_PSW_MASK; -- current->ipl_continue = address & 0x7fffffff; -+ current->ipl_continue = address & PSW_MASK_SHORT_ADDR; - - debug_print_int("set IPL addr to", current->ipl_continue); - -@@ -86,7 +84,7 @@ void jump_to_low_kernel(void) - - /* Trying to get PSW at zero address */ - if (*((uint64_t *)0) & RESET_PSW_MASK) { -- jump_to_IPL_code((*((uint64_t *)0)) & 0x7fffffff); -+ jump_to_IPL_code((*((uint64_t *)0)) & PSW_MASK_SHORT_ADDR); - } - - /* No other option left, so use the Linux kernel start address */ -diff --git a/pc-bios/s390-ccw/s390-arch.h b/pc-bios/s390-ccw/s390-arch.h -index 73852029d4e..6da44d4436c 100644 ---- a/pc-bios/s390-ccw/s390-arch.h -+++ b/pc-bios/s390-ccw/s390-arch.h -@@ -26,9 +26,11 @@ _Static_assert(sizeof(struct PSWLegacy) == 8, "PSWLegacy size incorrect"); - - /* s390 psw bit masks */ - #define PSW_MASK_IOINT 0x0200000000000000ULL -+#define PSW_MASK_SHORTPSW 0x0008000000000000ULL - #define PSW_MASK_WAIT 0x0002000000000000ULL - #define PSW_MASK_EAMODE 0x0000000100000000ULL - #define PSW_MASK_BAMODE 0x0000000080000000ULL -+#define PSW_MASK_SHORT_ADDR 0x000000007fffffffULL - #define PSW_MASK_64 (PSW_MASK_EAMODE | PSW_MASK_BAMODE) - - /* Low core mapping */ --- -2.27.0 - diff --git a/SOURCES/kvm-pcie_root_port-Add-hotplug-disabling-option.patch b/SOURCES/kvm-pcie_root_port-Add-hotplug-disabling-option.patch deleted file mode 100644 index 57f3c3b..0000000 --- a/SOURCES/kvm-pcie_root_port-Add-hotplug-disabling-option.patch +++ /dev/null @@ -1,153 +0,0 @@ -From 8587278a20283851081d4d282d11ef6bafd17dc2 Mon Sep 17 00:00:00 2001 -From: Julia Suvorova -Date: Tue, 17 Mar 2020 13:56:39 -0400 -Subject: [PATCH 1/2] pcie_root_port: Add hotplug disabling option -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Julia Suvorova -Message-id: <20200317135639.65085-1-jusual@redhat.com> -Patchwork-id: 94367 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/1] pcie_root_port: Add hotplug disabling option -Bugzilla: 1790899 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Peter Xu - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1790899 -BRANCH: rhel-av-8.2.1 -UPSTREAM: merged -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=27302449 - -Make hot-plug/hot-unplug on PCIe Root Ports optional to allow libvirt -manage it and restrict unplug for the whole machine. This is going to -prevent user-initiated unplug in guests (Windows mostly). -Hotplug is enabled by default. -Usage: - -device pcie-root-port,hotplug=off,... - -If you want to disable hot-unplug on some downstream ports of one -switch, disable hot-unplug on PCIe Root Port connected to the upstream -port as well as on the selected downstream ports. - -Discussion related: - https://lists.gnu.org/archive/html/qemu-devel/2020-02/msg00530.html - -Signed-off-by: Julia Suvorova -Message-Id: <20200226174607.205941-1-jusual@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Ján Tomko -(cherry picked from commit 530a0963184e57e71a5b538e9161f115df533e96) -Signed-off-by: Jon Maloy ---- - hw/pci-bridge/pcie_root_port.c | 2 +- - hw/pci-bridge/xio3130_downstream.c | 2 +- - hw/pci/pcie.c | 11 +++++++---- - hw/pci/pcie_port.c | 1 + - include/hw/pci/pcie.h | 2 +- - include/hw/pci/pcie_port.h | 3 +++ - 6 files changed, 14 insertions(+), 7 deletions(-) - -diff --git a/hw/pci-bridge/pcie_root_port.c b/hw/pci-bridge/pcie_root_port.c -index 012c2cb12c..db80e2ec23 100644 ---- a/hw/pci-bridge/pcie_root_port.c -+++ b/hw/pci-bridge/pcie_root_port.c -@@ -94,7 +94,7 @@ static void rp_realize(PCIDevice *d, Error **errp) - - pcie_cap_arifwd_init(d); - pcie_cap_deverr_init(d); -- pcie_cap_slot_init(d, s->slot); -+ pcie_cap_slot_init(d, s); - pcie_cap_root_init(d); - - pcie_chassis_create(s->chassis); -diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c -index a9f084b863..4489ce4a40 100644 ---- a/hw/pci-bridge/xio3130_downstream.c -+++ b/hw/pci-bridge/xio3130_downstream.c -@@ -94,7 +94,7 @@ static void xio3130_downstream_realize(PCIDevice *d, Error **errp) - } - pcie_cap_flr_init(d); - pcie_cap_deverr_init(d); -- pcie_cap_slot_init(d, s->slot); -+ pcie_cap_slot_init(d, s); - pcie_cap_arifwd_init(d); - - pcie_chassis_create(s->chassis); -diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c -index 08718188bb..0eb3a2a5d2 100644 ---- a/hw/pci/pcie.c -+++ b/hw/pci/pcie.c -@@ -495,7 +495,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, - - /* pci express slot for pci express root/downstream port - PCI express capability slot registers */ --void pcie_cap_slot_init(PCIDevice *dev, uint16_t slot) -+void pcie_cap_slot_init(PCIDevice *dev, PCIESlot *s) - { - uint32_t pos = dev->exp.exp_cap; - -@@ -505,13 +505,16 @@ void pcie_cap_slot_init(PCIDevice *dev, uint16_t slot) - pci_long_test_and_clear_mask(dev->config + pos + PCI_EXP_SLTCAP, - ~PCI_EXP_SLTCAP_PSN); - pci_long_test_and_set_mask(dev->config + pos + PCI_EXP_SLTCAP, -- (slot << PCI_EXP_SLTCAP_PSN_SHIFT) | -+ (s->slot << PCI_EXP_SLTCAP_PSN_SHIFT) | - PCI_EXP_SLTCAP_EIP | -- PCI_EXP_SLTCAP_HPS | -- PCI_EXP_SLTCAP_HPC | - PCI_EXP_SLTCAP_PIP | - PCI_EXP_SLTCAP_AIP | - PCI_EXP_SLTCAP_ABP); -+ if (s->hotplug) { -+ pci_long_test_and_set_mask(dev->config + pos + PCI_EXP_SLTCAP, -+ PCI_EXP_SLTCAP_HPS | -+ PCI_EXP_SLTCAP_HPC); -+ } - - if (dev->cap_present & QEMU_PCIE_SLTCAP_PCP) { - pci_long_test_and_set_mask(dev->config + pos + PCI_EXP_SLTCAP, -diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c -index c19a9be592..36dac33d98 100644 ---- a/hw/pci/pcie_port.c -+++ b/hw/pci/pcie_port.c -@@ -147,6 +147,7 @@ static const TypeInfo pcie_port_type_info = { - static Property pcie_slot_props[] = { - DEFINE_PROP_UINT8("chassis", PCIESlot, chassis, 0), - DEFINE_PROP_UINT16("slot", PCIESlot, slot, 0), -+ DEFINE_PROP_BOOL("hotplug", PCIESlot, hotplug, true), - DEFINE_PROP_END_OF_LIST() - }; - -diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h -index 7064875835..14c58ebdb6 100644 ---- a/include/hw/pci/pcie.h -+++ b/include/hw/pci/pcie.h -@@ -104,7 +104,7 @@ void pcie_cap_deverr_reset(PCIDevice *dev); - void pcie_cap_lnkctl_init(PCIDevice *dev); - void pcie_cap_lnkctl_reset(PCIDevice *dev); - --void pcie_cap_slot_init(PCIDevice *dev, uint16_t slot); -+void pcie_cap_slot_init(PCIDevice *dev, PCIESlot *s); - void pcie_cap_slot_reset(PCIDevice *dev); - void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slt_ctl, uint16_t *slt_sta); - void pcie_cap_slot_write_config(PCIDevice *dev, -diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h -index 7515430087..7072cc8731 100644 ---- a/include/hw/pci/pcie_port.h -+++ b/include/hw/pci/pcie_port.h -@@ -55,6 +55,9 @@ struct PCIESlot { - - /* Disable ACS (really for a pcie_root_port) */ - bool disable_acs; -+ -+ /* Indicates whether hot-plug is enabled on the slot */ -+ bool hotplug; - QLIST_ENTRY(PCIESlot) next; - }; - --- -2.18.2 - diff --git a/SOURCES/kvm-pcnet-switch-to-use-qemu_receive_packet-for-loopback.patch b/SOURCES/kvm-pcnet-switch-to-use-qemu_receive_packet-for-loopback.patch deleted file mode 100644 index 8c33334..0000000 --- a/SOURCES/kvm-pcnet-switch-to-use-qemu_receive_packet-for-loopback.patch +++ /dev/null @@ -1,54 +0,0 @@ -From b36a9259e085b4d32532d896e485889181b130ae Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 29 Jun 2021 03:42:45 -0400 -Subject: [PATCH 7/9] pcnet: switch to use qemu_receive_packet() for loopback -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20210629034247.3286477-8-jmaloy@redhat.com> -Patchwork-id: 101791 -O-Subject: [RHEL-8.4.0.z qemu-kvm PATCH v2 7/9] pcnet: switch to use qemu_receive_packet() for loopback -Bugzilla: 1932917 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth - -From: Alexander Bulekov - -This patch switches to use qemu_receive_packet() which can detect -reentrancy and return early. - -This is intended to address CVE-2021-3416. - -Cc: Prasad J Pandit -Cc: qemu-stable@nongnu.org -Buglink: https://bugs.launchpad.net/qemu/+bug/1917085 -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Jason Wang - -(cherry picked from commit 99ccfaa1edafd79f7a3a0ff7b58ae4da7c514928) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/pcnet.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/net/pcnet.c b/hw/net/pcnet.c -index f3f18d8598..dcd3fc4948 100644 ---- a/hw/net/pcnet.c -+++ b/hw/net/pcnet.c -@@ -1250,7 +1250,7 @@ txagain: - if (BCR_SWSTYLE(s) == 1) - add_crc = !GET_FIELD(tmd.status, TMDS, NOFCS); - s->looptest = add_crc ? PCNET_LOOPTEST_CRC : PCNET_LOOPTEST_NOCRC; -- pcnet_receive(qemu_get_queue(s->nic), s->buffer, s->xmit_pos); -+ qemu_receive_packet(qemu_get_queue(s->nic), s->buffer, s->xmit_pos); - s->looptest = 0; - } else { - if (s->nic) { --- -2.27.0 - diff --git a/SOURCES/kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch b/SOURCES/kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch deleted file mode 100644 index 2dbdb16..0000000 --- a/SOURCES/kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 22fc9bd7e7ae0b72c6f6e483eb66cf996f519766 Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Tue, 21 Jan 2020 05:16:11 +0000 -Subject: [PATCH 01/15] ppc: Deassert the external interrupt pin in KVM on - reset -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: David Gibson -Message-id: <20200121051613.388295-2-dgibson@redhat.com> -Patchwork-id: 93429 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 1/3] ppc: Deassert the external interrupt pin in KVM on reset -Bugzilla: 1776638 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laurent Vivier -RH-Acked-by: Thomas Huth - -From: Greg Kurz - -When a CPU is reset, QEMU makes sure no interrupt is pending by clearing -CPUPPCstate::pending_interrupts in ppc_cpu_reset(). In the case of a -complete machine emulation, eg. a sPAPR machine, an external interrupt -request could still be pending in KVM though, eg. an IPI. It will be -eventually presented to the guest, which is supposed to acknowledge it at -the interrupt controller. If the interrupt controller is emulated in QEMU, -either XICS or XIVE, ppc_set_irq() won't deassert the external interrupt -pin in KVM since it isn't pending anymore for QEMU. When the vCPU re-enters -the guest, the interrupt request is still pending and the vCPU will try -again to acknowledge it. This causes an infinite loop and eventually hangs -the guest. - -The code has been broken since the beginning. The issue wasn't hit before -because accel=kvm,kernel-irqchip=off is an awkward setup that never got -used until recently with the LC92x IBM systems (aka, Boston). - -Add a ppc_irq_reset() function to do the necessary cleanup, ie. deassert -the IRQ pins of the CPU in QEMU and most importantly the external interrupt -pin for this vCPU in KVM. - -Reported-by: Satheesh Rajendran -Signed-off-by: Greg Kurz -Message-Id: <157548861740.3650476.16879693165328764758.stgit@bahia.lan> -Signed-off-by: David Gibson -(cherry picked from commit 401774387aeb37f2ada9bb18f7c7e307b21a3e93) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1776638 - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/ppc.c | 8 ++++++++ - include/hw/ppc/ppc.h | 2 ++ - target/ppc/translate_init.inc.c | 1 + - 3 files changed, 11 insertions(+) - -diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c -index 52a18eb..d554b64 100644 ---- a/hw/ppc/ppc.c -+++ b/hw/ppc/ppc.c -@@ -1510,3 +1510,11 @@ PowerPCCPU *ppc_get_vcpu_by_pir(int pir) - - return NULL; - } -+ -+void ppc_irq_reset(PowerPCCPU *cpu) -+{ -+ CPUPPCState *env = &cpu->env; -+ -+ env->irq_input_state = 0; -+ kvmppc_set_interrupt(cpu, PPC_INTERRUPT_EXT, 0); -+} -diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h -index 4bdcb8b..5dd7531 100644 ---- a/include/hw/ppc/ppc.h -+++ b/include/hw/ppc/ppc.h -@@ -76,6 +76,7 @@ static inline void ppc970_irq_init(PowerPCCPU *cpu) {} - static inline void ppcPOWER7_irq_init(PowerPCCPU *cpu) {} - static inline void ppcPOWER9_irq_init(PowerPCCPU *cpu) {} - static inline void ppce500_irq_init(PowerPCCPU *cpu) {} -+static inline void ppc_irq_reset(PowerPCCPU *cpu) {} - #else - void ppc40x_irq_init(PowerPCCPU *cpu); - void ppce500_irq_init(PowerPCCPU *cpu); -@@ -83,6 +84,7 @@ void ppc6xx_irq_init(PowerPCCPU *cpu); - void ppc970_irq_init(PowerPCCPU *cpu); - void ppcPOWER7_irq_init(PowerPCCPU *cpu); - void ppcPOWER9_irq_init(PowerPCCPU *cpu); -+void ppc_irq_reset(PowerPCCPU *cpu); - #endif - - /* PPC machines for OpenBIOS */ -diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c -index ba726de..64a8380 100644 ---- a/target/ppc/translate_init.inc.c -+++ b/target/ppc/translate_init.inc.c -@@ -10461,6 +10461,7 @@ static void ppc_cpu_reset(CPUState *s) - env->pending_interrupts = 0; - s->exception_index = POWERPC_EXCP_NONE; - env->error_code = 0; -+ ppc_irq_reset(cpu); - - /* tininess for underflow is detected before rounding */ - set_float_detect_tininess(float_tininess_before_rounding, --- -1.8.3.1 - diff --git a/SOURCES/kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch b/SOURCES/kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch deleted file mode 100644 index 457d149..0000000 --- a/SOURCES/kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch +++ /dev/null @@ -1,112 +0,0 @@ -From f2f57c1ed926384e074d2048cdbdc30ee2f426eb Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Tue, 21 Jan 2020 05:16:13 +0000 -Subject: [PATCH 03/15] ppc: Don't use CPUPPCState::irq_input_state with modern - Book3s CPU models -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: David Gibson -Message-id: <20200121051613.388295-4-dgibson@redhat.com> -Patchwork-id: 93431 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 3/3] ppc: Don't use CPUPPCState::irq_input_state with modern Book3s CPU models -Bugzilla: 1776638 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laurent Vivier -RH-Acked-by: Thomas Huth - -From: Greg Kurz - -The power7_set_irq() and power9_set_irq() functions set this but it is -never used actually. Modern Book3s compatible CPUs are only supported -by the pnv and spapr machines. They have an interrupt controller, XICS -for POWER7/8 and XIVE for POWER9, whose models don't require to track -IRQ input states at the CPU level. - -Drop these lines to avoid confusion. - -Signed-off-by: Greg Kurz -Message-Id: <157548862861.3650476.16622818876928044450.stgit@bahia.lan> -Signed-off-by: David Gibson -(cherry picked from commit c1ad0b892ce20cf2b5e619c79e8a0c4c66b235dc) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1776638 - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/ppc.c | 16 ++-------------- - target/ppc/cpu.h | 4 +++- - 2 files changed, 5 insertions(+), 15 deletions(-) - -diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c -index d554b64..730a41f 100644 ---- a/hw/ppc/ppc.c -+++ b/hw/ppc/ppc.c -@@ -275,10 +275,9 @@ void ppc970_irq_init(PowerPCCPU *cpu) - static void power7_set_irq(void *opaque, int pin, int level) - { - PowerPCCPU *cpu = opaque; -- CPUPPCState *env = &cpu->env; - - LOG_IRQ("%s: env %p pin %d level %d\n", __func__, -- env, pin, level); -+ &cpu->env, pin, level); - - switch (pin) { - case POWER7_INPUT_INT: -@@ -292,11 +291,6 @@ static void power7_set_irq(void *opaque, int pin, int level) - LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); - return; - } -- if (level) { -- env->irq_input_state |= 1 << pin; -- } else { -- env->irq_input_state &= ~(1 << pin); -- } - } - - void ppcPOWER7_irq_init(PowerPCCPU *cpu) -@@ -311,10 +305,9 @@ void ppcPOWER7_irq_init(PowerPCCPU *cpu) - static void power9_set_irq(void *opaque, int pin, int level) - { - PowerPCCPU *cpu = opaque; -- CPUPPCState *env = &cpu->env; - - LOG_IRQ("%s: env %p pin %d level %d\n", __func__, -- env, pin, level); -+ &cpu->env, pin, level); - - switch (pin) { - case POWER9_INPUT_INT: -@@ -334,11 +327,6 @@ static void power9_set_irq(void *opaque, int pin, int level) - LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); - return; - } -- if (level) { -- env->irq_input_state |= 1 << pin; -- } else { -- env->irq_input_state &= ~(1 << pin); -- } - } - - void ppcPOWER9_irq_init(PowerPCCPU *cpu) -diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index 5c53801..8887f76 100644 ---- a/target/ppc/cpu.h -+++ b/target/ppc/cpu.h -@@ -1090,7 +1090,9 @@ struct CPUPPCState { - #if !defined(CONFIG_USER_ONLY) - /* - * This is the IRQ controller, which is implementation dependent -- * and only relevant when emulating a complete machine. -+ * and only relevant when emulating a complete machine. Note that -+ * this isn't used by recent Book3s compatible CPUs (POWER7 and -+ * newer). - */ - uint32_t irq_input_state; - void **irq_inputs; --- -1.8.3.1 - diff --git a/SOURCES/kvm-ppc-spapr-Add-hotremovable-flag-on-DIMM-LMBs-on-drme.patch b/SOURCES/kvm-ppc-spapr-Add-hotremovable-flag-on-DIMM-LMBs-on-drme.patch deleted file mode 100644 index 380007c..0000000 --- a/SOURCES/kvm-ppc-spapr-Add-hotremovable-flag-on-DIMM-LMBs-on-drme.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 5b826e7ed09ecf3b2837d147fec6b593f629e450 Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Fri, 4 Dec 2020 15:07:59 -0500 -Subject: [PATCH 01/14] ppc/spapr: Add hotremovable flag on DIMM LMBs on - drmem_v2 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Greg Kurz -Message-id: <20201204150800.264829-2-gkurz@redhat.com> -Patchwork-id: 100217 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/2] ppc/spapr: Add hotremovable flag on DIMM LMBs on drmem_v2 -Bugzilla: 1901837 -RH-Acked-by: Danilo de Paula -RH-Acked-by: David Gibson -RH-Acked-by: Laurent Vivier - -From: Leonardo Bras - -On reboot, all memory that was previously added using object_add and -device_add is placed in this DIMM area. - -The new SPAPR_LMB_FLAGS_HOTREMOVABLE flag helps Linux to put this memory in -the correct memory zone, so no unmovable allocations are made there, -allowing the object to be easily hot-removed by device_del and -object_del. - -This new flag was accepted in Power Architecture documentation. - -Signed-off-by: Leonardo Bras -Reviewed-by: Bharata B Rao -Message-Id: <20200511200201.58537-1-leobras.c@gmail.com> -[dwg: Fixed syntax error spotted by Cédric Le Goater] -Signed-off-by: David Gibson -(cherry picked from commit 0911a60c76b8598f1863c6951b2b690059465153) -Signed-off-by: Greg Kurz - -Conflicts: - hw/ppc/pnv.c - -The changes in this file clearly don't belong to this -patch. Same goes for the changes in target/ppc/cpu.h and -target/ppc/excp_helper.c. Something went wrong when the -patch was applied. Anyway, downstream doesn't especially -care for pnv, so just drop the changes. - -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr.c | 3 ++- - include/hw/ppc/spapr.h | 1 + - 2 files changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index a330f038b95..c74079702d0 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -690,7 +690,8 @@ static int spapr_populate_drmem_v2(SpaprMachineState *spapr, void *fdt, - g_assert(drc); - elem = spapr_get_drconf_cell(size / lmb_size, addr, - spapr_drc_index(drc), node, -- SPAPR_LMB_FLAGS_ASSIGNED); -+ (SPAPR_LMB_FLAGS_ASSIGNED | -+ SPAPR_LMB_FLAGS_HOTREMOVABLE)); - QSIMPLEQ_INSERT_TAIL(&drconf_queue, elem, entry); - nr_entries++; - cur_addr = addr + size; -diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index aa89cc4a95c..e047dabf300 100644 ---- a/include/hw/ppc/spapr.h -+++ b/include/hw/ppc/spapr.h -@@ -847,6 +847,7 @@ int spapr_rtc_import_offset(SpaprRtcState *rtc, int64_t legacy_offset); - #define SPAPR_LMB_FLAGS_ASSIGNED 0x00000008 - #define SPAPR_LMB_FLAGS_DRC_INVALID 0x00000020 - #define SPAPR_LMB_FLAGS_RESERVED 0x00000080 -+#define SPAPR_LMB_FLAGS_HOTREMOVABLE 0x00000100 - - void spapr_do_system_reset_on_cpu(CPUState *cs, run_on_cpu_data arg); - --- -2.27.0 - diff --git a/SOURCES/kvm-ppc-spapr-re-assert-IRQs-during-event-scan-if-there-.patch b/SOURCES/kvm-ppc-spapr-re-assert-IRQs-during-event-scan-if-there-.patch deleted file mode 100644 index ee0b19a..0000000 --- a/SOURCES/kvm-ppc-spapr-re-assert-IRQs-during-event-scan-if-there-.patch +++ /dev/null @@ -1,67 +0,0 @@ -From e4065c7739c8ea3f6f88898295ed899a1059806e Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Fri, 4 Dec 2020 15:08:00 -0500 -Subject: [PATCH 02/14] ppc/spapr: re-assert IRQs during event-scan if there - are pending - -RH-Author: Greg Kurz -Message-id: <20201204150800.264829-3-gkurz@redhat.com> -Patchwork-id: 100216 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 2/2] ppc/spapr: re-assert IRQs during event-scan if there are pending -Bugzilla: 1901837 -RH-Acked-by: Danilo de Paula -RH-Acked-by: David Gibson -RH-Acked-by: Laurent Vivier - -From: Laurent Vivier - -If we hotplug a CPU during the first second of the kernel boot, -the IRQ can be sent to the kernel while the RTAS event handler -is not installed. The event is queued, but the kernel doesn't -collect it and ignores the new CPU. - -As the code relies on edge-triggered IRQ, we can re-assert it -during the event-scan RTAS call if there are still pending -events (as it is already done in check-exception). - -Signed-off-by: Laurent Vivier -Message-Id: <20201015210318.117386-1-lvivier@redhat.com> -Reviewed-by: Greg Kurz -Signed-off-by: David Gibson -(cherry picked from commit dff669d6a15fb92b063cb5aa691b4bb498727404) -Signed-off-by: Greg Kurz -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr_events.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c -index e355e000d07..15b92b63adb 100644 ---- a/hw/ppc/spapr_events.c -+++ b/hw/ppc/spapr_events.c -@@ -692,10 +692,22 @@ static void event_scan(PowerPCCPU *cpu, SpaprMachineState *spapr, - target_ulong args, - uint32_t nret, target_ulong rets) - { -+ int i; - if (nargs != 4 || nret != 1) { - rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); - return; - } -+ -+ for (i = 0; i < EVENT_CLASS_MAX; i++) { -+ if (rtas_event_log_contains(EVENT_CLASS_MASK(i))) { -+ const SpaprEventSource *source = -+ spapr_event_sources_get_source(spapr->event_sources, i); -+ -+ g_assert(source->enabled); -+ qemu_irq_pulse(spapr_qirq(spapr, source->irq)); -+ } -+ } -+ - rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND); - } - --- -2.27.0 - diff --git a/SOURCES/kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch b/SOURCES/kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch deleted file mode 100644 index 9c25b76..0000000 --- a/SOURCES/kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 428eb7260718b69b1f3f421d03bce10b8785fc49 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:39 +0000 -Subject: [PATCH 19/20] qapi: Add '@allow-write-only-overlay' feature for - 'blockdev-snapshot' - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-14-kwolf@redhat.com> -Patchwork-id: 94290 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 13/13] qapi: Add '@allow-write-only-overlay' feature for 'blockdev-snapshot' -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -From: Peter Krempa - -Anounce that 'blockdev-snapshot' command's permissions allow changing -of the backing file if the 'consistent_read' permission is not required. - -This is useful for libvirt to allow late opening of the backing chain -during a blockdev-mirror. - -Signed-off-by: Peter Krempa -Signed-off-by: Kevin Wolf -Message-Id: <20200310113831.27293-8-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit c6bdc312f30d5c7326aa2fdca3e0f98c15eb541a) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - qapi/block-core.json | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/qapi/block-core.json b/qapi/block-core.json -index a1e85b0..a64ad81 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -1541,6 +1541,12 @@ - # - # For the arguments, see the documentation of BlockdevSnapshot. - # -+# Features: -+# @allow-write-only-overlay: If present, the check whether this operation is safe -+# was relaxed so that it can be used to change -+# backing file of a destination of a blockdev-mirror. -+# (since 5.0) -+# - # Since: 2.5 - # - # Example: -@@ -1561,7 +1567,8 @@ - # - ## - { 'command': 'blockdev-snapshot', -- 'data': 'BlockdevSnapshot' } -+ 'data': 'BlockdevSnapshot', -+ 'features': [ 'allow-write-only-overlay' ] } - - ## - # @change-backing-file: --- -1.8.3.1 - diff --git a/SOURCES/kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch b/SOURCES/kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch new file mode 100644 index 0000000..5ef458c --- /dev/null +++ b/SOURCES/kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch @@ -0,0 +1,214 @@ +From d0cd7be4d347ebe118eb8f3f2fc2eb3e3eb77e3a Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Thu, 20 Jan 2022 17:31:04 -0500 +Subject: [PATCH 5/7] qapi: Cleanup SGX related comments and restore + @section-size +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paul Lai +RH-MergeRequest: 111: numa: Enable numa for SGX EPC sections +RH-Commit: [5/5] 497dbeaebb7b8f99f5f8a7de58000dcab0d0c22d +RH-Bugzilla: 1518984 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck + +The SGX NUMA patches were merged into Qemu 7.0 release, we need +clarify detailed version history information and also change +some related comments, which make SGX related comments clearer. + +The QMP command schema promises backwards compatibility as standard. +We temporarily restore "@section-size", which can avoid incompatible +API breakage. The "@section-size" will be deprecated in 7.2 version. + +Suggested-by: Daniel P. Berrangé +Signed-off-by: Yang Zhong +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Message-Id: <20220120223104.437161-1-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit a66bd91f030827742778a9e0da19fe55716b4a60) +Signed-off-by: Paul Lai +--- + docs/about/deprecated.rst | 13 +++++++++++++ + hw/i386/sgx.c | 11 +++++++++-- + qapi/machine.json | 4 ++-- + qapi/misc-target.json | 22 +++++++++++++++++----- + 4 files changed, 41 insertions(+), 9 deletions(-) + +diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst +index ff7488cb63..33925edf45 100644 +--- a/docs/about/deprecated.rst ++++ b/docs/about/deprecated.rst +@@ -270,6 +270,19 @@ accepted incorrect commands will return an error. Users should make sure that + all arguments passed to ``device_add`` are consistent with the documented + property types. + ++``query-sgx`` return value member ``section-size`` (since 7.0) ++'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' ++ ++Member ``section-size`` in return value elements with meta-type ``uint64`` is ++deprecated. Use ``sections`` instead. ++ ++ ++``query-sgx-capabilities`` return value member ``section-size`` (since 7.0) ++''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' ++ ++Member ``section-size`` in return value elements with meta-type ``uint64`` is ++deprecated. Use ``sections`` instead. ++ + System accelerators + ------------------- + +diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c +index 5de5dd0893..a2b318dd93 100644 +--- a/hw/i386/sgx.c ++++ b/hw/i386/sgx.c +@@ -83,7 +83,7 @@ static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) + ((high & MAKE_64BIT_MASK(0, 20)) << 32); + } + +-static SGXEPCSectionList *sgx_calc_host_epc_sections(void) ++static SGXEPCSectionList *sgx_calc_host_epc_sections(uint64_t *size) + { + SGXEPCSectionList *head = NULL, **tail = &head; + SGXEPCSection *section; +@@ -106,6 +106,7 @@ static SGXEPCSectionList *sgx_calc_host_epc_sections(void) + section = g_new0(SGXEPCSection, 1); + section->node = j++; + section->size = sgx_calc_section_metric(ecx, edx); ++ *size += section->size; + QAPI_LIST_APPEND(tail, section); + } + +@@ -156,6 +157,7 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp) + { + SGXInfo *info = NULL; + uint32_t eax, ebx, ecx, edx; ++ uint64_t size = 0; + + int fd = qemu_open_old("/dev/sgx_vepc", O_RDWR); + if (fd < 0) { +@@ -173,7 +175,8 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp) + info->sgx1 = eax & (1U << 0) ? true : false; + info->sgx2 = eax & (1U << 1) ? true : false; + +- info->sections = sgx_calc_host_epc_sections(); ++ info->sections = sgx_calc_host_epc_sections(&size); ++ info->section_size = size; + + close(fd); + +@@ -220,12 +223,14 @@ SGXInfo *qmp_query_sgx(Error **errp) + return NULL; + } + ++ SGXEPCState *sgx_epc = &pcms->sgx_epc; + info = g_new0(SGXInfo, 1); + + info->sgx = true; + info->sgx1 = true; + info->sgx2 = true; + info->flc = true; ++ info->section_size = sgx_epc->size; + info->sections = sgx_get_epc_sections_list(); + + return info; +@@ -249,6 +254,8 @@ void hmp_info_sgx(Monitor *mon, const QDict *qdict) + info->sgx2 ? "enabled" : "disabled"); + monitor_printf(mon, "FLC support: %s\n", + info->flc ? "enabled" : "disabled"); ++ monitor_printf(mon, "size: %" PRIu64 "\n", ++ info->section_size); + + section_list = info->sections; + for (section = section_list; section; section = section->next) { +diff --git a/qapi/machine.json b/qapi/machine.json +index 16e771affc..a9f33d0f27 100644 +--- a/qapi/machine.json ++++ b/qapi/machine.json +@@ -1207,7 +1207,7 @@ + # + # @memdev: memory backend linked with device + # +-# @node: the numa node ++# @node: the numa node (Since: 7.0) + # + # Since: 6.2 + ## +@@ -1288,7 +1288,7 @@ + # + # @memdev: memory backend linked with device + # +-# @node: the numa node ++# @node: the numa node (Since: 7.0) + # + # Since: 6.2 + ## +diff --git a/qapi/misc-target.json b/qapi/misc-target.json +index 1022aa0184..4bc45d2474 100644 +--- a/qapi/misc-target.json ++++ b/qapi/misc-target.json +@@ -344,9 +344,9 @@ + # + # @node: the numa node + # +-# @size: the size of epc section ++# @size: the size of EPC section + # +-# Since: 6.2 ++# Since: 7.0 + ## + { 'struct': 'SGXEPCSection', + 'data': { 'node': 'int', +@@ -365,7 +365,13 @@ + # + # @flc: true if FLC is supported + # +-# @sections: The EPC sections info for guest ++# @section-size: The EPC section size for guest ++# Redundant with @sections. Just for backward compatibility. ++# ++# @sections: The EPC sections info for guest (Since: 7.0) ++# ++# Features: ++# @deprecated: Member @section-size is deprecated. Use @sections instead. + # + # Since: 6.2 + ## +@@ -374,6 +380,8 @@ + 'sgx1': 'bool', + 'sgx2': 'bool', + 'flc': 'bool', ++ 'section-size': { 'type': 'uint64', ++ 'features': [ 'deprecated' ] }, + 'sections': ['SGXEPCSection']}, + 'if': 'TARGET_I386' } + +@@ -390,7 +398,9 @@ + # + # -> { "execute": "query-sgx" } + # <- { "return": { "sgx": true, "sgx1" : true, "sgx2" : true, +-# "flc": true, "section-size" : 0 } } ++# "flc": true, "section-size" : 96468992, ++# "sections": [{"node": 0, "size": 67108864}, ++# {"node": 1, "size": 29360128}]} } + # + ## + { 'command': 'query-sgx', 'returns': 'SGXInfo', 'if': 'TARGET_I386' } +@@ -408,7 +418,9 @@ + # + # -> { "execute": "query-sgx-capabilities" } + # <- { "return": { "sgx": true, "sgx1" : true, "sgx2" : true, +-# "flc": true, "section-size" : 0 } } ++# "flc": true, "section-size" : 96468992, ++# "section" : [{"node": 0, "size": 67108864}, ++# {"node": 1, "size": 29360128}]} } + # + ## + { 'command': 'query-sgx-capabilities', 'returns': 'SGXInfo', 'if': 'TARGET_I386' } +-- +2.27.0 + diff --git a/SOURCES/kvm-qapi-enable-use-of-g_autoptr-with-QAPI-types.patch b/SOURCES/kvm-qapi-enable-use-of-g_autoptr-with-QAPI-types.patch deleted file mode 100644 index bf296d8..0000000 --- a/SOURCES/kvm-qapi-enable-use-of-g_autoptr-with-QAPI-types.patch +++ /dev/null @@ -1,237 +0,0 @@ -From 34f664093db2a6275fcddd768684c7319cfc01b4 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 16 Dec 2020 16:06:06 -0500 -Subject: [PATCH 05/14] qapi: enable use of g_autoptr with QAPI types -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20201216160615.324213-2-marcandre.lureau@redhat.com> -Patchwork-id: 100472 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 01/10] qapi: enable use of g_autoptr with QAPI types -Bugzilla: 1859494 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Stefan Hajnoczi - -From: Daniel P. Berrangé - -Currently QAPI generates a type and function for free'ing it: - - typedef struct QCryptoBlockCreateOptions QCryptoBlockCreateOptions; - void qapi_free_QCryptoBlockCreateOptions(QCryptoBlockCreateOptions *obj); - -This is used in the traditional manner: - - QCryptoBlockCreateOptions *opts = NULL; - - opts = g_new0(QCryptoBlockCreateOptions, 1); - - ....do stuff with opts... - - qapi_free_QCryptoBlockCreateOptions(opts); - -Since bumping the min glib to 2.48, QEMU has incrementally adopted the -use of g_auto/g_autoptr. This allows the compiler to run a function to -free a variable when it goes out of scope, the benefit being the -compiler can guarantee it is freed in all possible code ptahs. - -This benefit is applicable to QAPI types too, and given the seriously -long method names for some qapi_free_XXXX() functions, is much less -typing. This change thus makes the code generator emit: - - G_DEFINE_AUTOPTR_CLEANUP_FUNC(QCryptoBlockCreateOptions, - qapi_free_QCryptoBlockCreateOptions) - -The above code example now becomes - - g_autoptr(QCryptoBlockCreateOptions) opts = NULL; - - opts = g_new0(QCryptoBlockCreateOptions, 1); - - ....do stuff with opts... - -Note, if the local pointer needs to live beyond the scope holding the -variable, then g_steal_pointer can be used. This is useful to return the -pointer to the caller in the success codepath, while letting it be freed -in all error codepaths. - - return g_steal_pointer(&opts); - -The crypto/block.h header needs updating to avoid symbol clash now that -the g_autoptr support is a standard QAPI feature. - -Signed-off-by: Daniel P. Berrangé -Message-Id: <20200723153845.2934357-1-berrange@redhat.com> -Reviewed-by: Markus Armbruster -Reviewed-by: Eric Blake -Signed-off-by: Markus Armbruster - -(cherry picked from commit 221db5daf6b3666f1c8e4ca06ae45892e99a112f) -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - docs/devel/qapi-code-gen.txt | 2 ++ - scripts/qapi/types.py | 1 + - tests/test-qobject-input-visitor.c | 23 +++++++---------------- - 3 files changed, 10 insertions(+), 16 deletions(-) - -diff --git a/docs/devel/qapi-code-gen.txt b/docs/devel/qapi-code-gen.txt -index 45c93a43cc3..ca59c695fac 100644 ---- a/docs/devel/qapi-code-gen.txt -+++ b/docs/devel/qapi-code-gen.txt -@@ -1278,6 +1278,7 @@ Example: - }; - - void qapi_free_UserDefOne(UserDefOne *obj); -+ G_DEFINE_AUTOPTR_CLEANUP_FUNC(UserDefOne, qapi_free_UserDefOne) - - struct UserDefOneList { - UserDefOneList *next; -@@ -1285,6 +1286,7 @@ Example: - }; - - void qapi_free_UserDefOneList(UserDefOneList *obj); -+ G_DEFINE_AUTOPTR_CLEANUP_FUNC(UserDefOneList, qapi_free_UserDefOneList) - - struct q_obj_my_command_arg { - UserDefOneList *arg1; -diff --git a/scripts/qapi/types.py b/scripts/qapi/types.py -index d8751daa049..c3be141dc90 100644 ---- a/scripts/qapi/types.py -+++ b/scripts/qapi/types.py -@@ -213,6 +213,7 @@ def gen_type_cleanup_decl(name): - ret = mcgen(''' - - void qapi_free_%(c_name)s(%(c_name)s *obj); -+G_DEFINE_AUTOPTR_CLEANUP_FUNC(%(c_name)s, qapi_free_%(c_name)s) - ''', - c_name=c_name(name)) - return ret -diff --git a/tests/test-qobject-input-visitor.c b/tests/test-qobject-input-visitor.c -index 6bacabf0632..e41b91a2a6f 100644 ---- a/tests/test-qobject-input-visitor.c -+++ b/tests/test-qobject-input-visitor.c -@@ -417,7 +417,7 @@ static void test_visitor_in_struct(TestInputVisitorData *data, - static void test_visitor_in_struct_nested(TestInputVisitorData *data, - const void *unused) - { -- UserDefTwo *udp = NULL; -+ g_autoptr(UserDefTwo) udp = NULL; - Visitor *v; - - v = visitor_input_test_init(data, "{ 'string0': 'string0', " -@@ -433,8 +433,6 @@ static void test_visitor_in_struct_nested(TestInputVisitorData *data, - g_assert_cmpstr(udp->dict1->dict2->userdef->string, ==, "string"); - g_assert_cmpstr(udp->dict1->dict2->string, ==, "string2"); - g_assert(udp->dict1->has_dict3 == false); -- -- qapi_free_UserDefTwo(udp); - } - - static void test_visitor_in_list(TestInputVisitorData *data, -@@ -546,7 +544,7 @@ static void test_visitor_in_union_flat(TestInputVisitorData *data, - const void *unused) - { - Visitor *v; -- UserDefFlatUnion *tmp; -+ g_autoptr(UserDefFlatUnion) tmp = NULL; - UserDefUnionBase *base; - - v = visitor_input_test_init(data, -@@ -563,8 +561,6 @@ static void test_visitor_in_union_flat(TestInputVisitorData *data, - - base = qapi_UserDefFlatUnion_base(tmp); - g_assert(&base->enum1 == &tmp->enum1); -- -- qapi_free_UserDefFlatUnion(tmp); - } - - static void test_visitor_in_alternate(TestInputVisitorData *data, -@@ -690,7 +686,7 @@ static void test_list_union_integer_helper(TestInputVisitorData *data, - const void *unused, - UserDefListUnionKind kind) - { -- UserDefListUnion *cvalue = NULL; -+ g_autoptr(UserDefListUnion) cvalue = NULL; - Visitor *v; - GString *gstr_list = g_string_new(""); - GString *gstr_union = g_string_new(""); -@@ -782,7 +778,6 @@ static void test_list_union_integer_helper(TestInputVisitorData *data, - - g_string_free(gstr_union, true); - g_string_free(gstr_list, true); -- qapi_free_UserDefListUnion(cvalue); - } - - static void test_visitor_in_list_union_int(TestInputVisitorData *data, -@@ -851,7 +846,7 @@ static void test_visitor_in_list_union_uint64(TestInputVisitorData *data, - static void test_visitor_in_list_union_bool(TestInputVisitorData *data, - const void *unused) - { -- UserDefListUnion *cvalue = NULL; -+ g_autoptr(UserDefListUnion) cvalue = NULL; - boolList *elem = NULL; - Visitor *v; - GString *gstr_list = g_string_new(""); -@@ -879,13 +874,12 @@ static void test_visitor_in_list_union_bool(TestInputVisitorData *data, - - g_string_free(gstr_union, true); - g_string_free(gstr_list, true); -- qapi_free_UserDefListUnion(cvalue); - } - - static void test_visitor_in_list_union_string(TestInputVisitorData *data, - const void *unused) - { -- UserDefListUnion *cvalue = NULL; -+ g_autoptr(UserDefListUnion) cvalue = NULL; - strList *elem = NULL; - Visitor *v; - GString *gstr_list = g_string_new(""); -@@ -914,7 +908,6 @@ static void test_visitor_in_list_union_string(TestInputVisitorData *data, - - g_string_free(gstr_union, true); - g_string_free(gstr_list, true); -- qapi_free_UserDefListUnion(cvalue); - } - - #define DOUBLE_STR_MAX 16 -@@ -922,7 +915,7 @@ static void test_visitor_in_list_union_string(TestInputVisitorData *data, - static void test_visitor_in_list_union_number(TestInputVisitorData *data, - const void *unused) - { -- UserDefListUnion *cvalue = NULL; -+ g_autoptr(UserDefListUnion) cvalue = NULL; - numberList *elem = NULL; - Visitor *v; - GString *gstr_list = g_string_new(""); -@@ -957,7 +950,6 @@ static void test_visitor_in_list_union_number(TestInputVisitorData *data, - - g_string_free(gstr_union, true); - g_string_free(gstr_list, true); -- qapi_free_UserDefListUnion(cvalue); - } - - static void input_visitor_test_add(const char *testpath, -@@ -1253,7 +1245,7 @@ static void test_visitor_in_fail_alternate(TestInputVisitorData *data, - static void do_test_visitor_in_qmp_introspect(TestInputVisitorData *data, - const QLitObject *qlit) - { -- SchemaInfoList *schema = NULL; -+ g_autoptr(SchemaInfoList) schema = NULL; - QObject *obj = qobject_from_qlit(qlit); - Visitor *v; - -@@ -1262,7 +1254,6 @@ static void do_test_visitor_in_qmp_introspect(TestInputVisitorData *data, - visit_type_SchemaInfoList(v, NULL, &schema, &error_abort); - g_assert(schema); - -- qapi_free_SchemaInfoList(schema); - qobject_unref(obj); - visit_free(v); - } --- -2.27.0 - diff --git a/SOURCES/kvm-qcow2-Expose-bitmaps-size-during-measure.patch b/SOURCES/kvm-qcow2-Expose-bitmaps-size-during-measure.patch deleted file mode 100644 index 48c15c5..0000000 --- a/SOURCES/kvm-qcow2-Expose-bitmaps-size-during-measure.patch +++ /dev/null @@ -1,495 +0,0 @@ -From af4d66e07c86d7593f7d18ae4b6a2151123b529b Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Tue, 2 Jun 2020 02:34:17 +0100 -Subject: [PATCH 12/26] qcow2: Expose bitmaps' size during measure - -RH-Author: Eric Blake -Message-id: <20200602023420.2133649-10-eblake@redhat.com> -Patchwork-id: 97072 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 09/12] qcow2: Expose bitmaps' size during measure -Bugzilla: 1779893 1779904 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -It's useful to know how much space can be occupied by qcow2 persistent -bitmaps, even though such metadata is unrelated to the guest-visible -data. Report this value as an additional QMP field, present when -measuring an existing image and output format that both support -bitmaps. Update iotest 178 and 190 to updated output, as well as new -coverage in 190 demonstrating non-zero values made possible with the -recently-added qemu-img bitmap command (see 3b51ab4b). - -The new 'bitmaps size:' field is displayed automatically as part of -'qemu-img measure' any time it is present in QMP (that is, any time -both the source image being measured and destination format support -bitmaps, even if the measurement is 0 because there are no bitmaps -present). If the field is absent, it means that no bitmaps can be -copied (source, destination, or both lack bitmaps, including when -measuring based on size rather than on a source image). This behavior -is compatible with an upcoming patch adding 'qemu-img convert ---bitmaps': that command will fail in the same situations where this -patch omits the field. - -The addition of a new field demonstrates why we should always -zero-initialize qapi C structs; while the qcow2 driver still fully -populates all fields, the raw and crypto drivers had to be tweaked to -avoid uninitialized data. - -Consideration was also given towards having a 'qemu-img measure ---bitmaps' which errors out when bitmaps are not possible, and -otherwise sums the bitmaps into the existing allocation totals rather -than displaying as a separate field, as a potential convenience -factor. But this was ultimately decided to be more complexity than -necessary when the QMP interface was sufficient enough with bitmaps -remaining a separate field. - -See also: https://bugzilla.redhat.com/1779904 - -Reported-by: Nir Soffer -Signed-off-by: Eric Blake -Message-Id: <20200521192137.1120211-3-eblake@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 5d72c68b49769c927e90b78af6d90f6a384b26ac) - -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - block/crypto.c - commit a9da6e49 not present (no measure support) - docs/tools/qemu-img.rst - changes in qemu-img.texi instead -Signed-off-by: Eric Blake - -Signed-off-by: Danilo C. L. de Paula ---- - block/qcow2-bitmap.c | 36 ++++++++++++++++++++++++++++++ - block/qcow2.c | 14 +++++++++--- - block/qcow2.h | 2 ++ - block/raw-format.c | 2 +- - qapi/block-core.json | 16 +++++++++----- - qemu-img.c | 3 +++ - qemu-img.texi | 7 ++++++ - tests/qemu-iotests/178.out.qcow2 | 16 ++++++++++++++ - tests/qemu-iotests/190 | 47 ++++++++++++++++++++++++++++++++++++++-- - tests/qemu-iotests/190.out | 27 ++++++++++++++++++++++- - 10 files changed, 158 insertions(+), 12 deletions(-) - -diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c -index cbac905..10d1297 100644 ---- a/block/qcow2-bitmap.c -+++ b/block/qcow2-bitmap.c -@@ -1766,3 +1766,39 @@ bool qcow2_supports_persistent_dirty_bitmap(BlockDriverState *bs) - - return s->qcow_version >= 3; - } -+ -+/* -+ * Compute the space required for bitmaps in @bs. -+ * -+ * The computation is based as if copying to a new image with the -+ * given @cluster_size, which may differ from the cluster size in @bs. -+ */ -+uint64_t qcow2_get_persistent_dirty_bitmap_size(BlockDriverState *bs, -+ uint32_t cluster_size) -+{ -+ uint64_t bitmaps_size = 0; -+ BdrvDirtyBitmap *bm; -+ size_t bitmap_dir_size = 0; -+ -+ FOR_EACH_DIRTY_BITMAP(bs, bm) { -+ if (bdrv_dirty_bitmap_get_persistence(bm)) { -+ const char *name = bdrv_dirty_bitmap_name(bm); -+ uint32_t granularity = bdrv_dirty_bitmap_granularity(bm); -+ uint64_t bmbytes = -+ get_bitmap_bytes_needed(bdrv_dirty_bitmap_size(bm), -+ granularity); -+ uint64_t bmclusters = DIV_ROUND_UP(bmbytes, cluster_size); -+ -+ /* Assume the entire bitmap is allocated */ -+ bitmaps_size += bmclusters * cluster_size; -+ /* Also reserve space for the bitmap table entries */ -+ bitmaps_size += ROUND_UP(bmclusters * sizeof(uint64_t), -+ cluster_size); -+ /* And space for contribution to bitmap directory size */ -+ bitmap_dir_size += calc_dir_entry_size(strlen(name), 0); -+ } -+ } -+ bitmaps_size += ROUND_UP(bitmap_dir_size, cluster_size); -+ -+ return bitmaps_size; -+} -diff --git a/block/qcow2.c b/block/qcow2.c -index 36b0f7d..dbd870a 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -4751,16 +4751,24 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, - required = virtual_size; - } - -- info = g_new(BlockMeasureInfo, 1); -+ info = g_new0(BlockMeasureInfo, 1); - info->fully_allocated = - qcow2_calc_prealloc_size(virtual_size, cluster_size, - ctz32(refcount_bits)) + luks_payload_size; - -- /* Remove data clusters that are not required. This overestimates the -+ /* -+ * Remove data clusters that are not required. This overestimates the - * required size because metadata needed for the fully allocated file is -- * still counted. -+ * still counted. Show bitmaps only if both source and destination -+ * would support them. - */ - info->required = info->fully_allocated - virtual_size + required; -+ info->has_bitmaps = version >= 3 && in_bs && -+ bdrv_supports_persistent_dirty_bitmap(in_bs); -+ if (info->has_bitmaps) { -+ info->bitmaps = qcow2_get_persistent_dirty_bitmap_size(in_bs, -+ cluster_size); -+ } - return info; - - err: -diff --git a/block/qcow2.h b/block/qcow2.h -index ceb1ceb..3297e6b 100644 ---- a/block/qcow2.h -+++ b/block/qcow2.h -@@ -768,6 +768,8 @@ int qcow2_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, - const char *name, - Error **errp); - bool qcow2_supports_persistent_dirty_bitmap(BlockDriverState *bs); -+uint64_t qcow2_get_persistent_dirty_bitmap_size(BlockDriverState *bs, -+ uint32_t cluster_size); - - ssize_t coroutine_fn - qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size, -diff --git a/block/raw-format.c b/block/raw-format.c -index 93b25e1..4bb54f4 100644 ---- a/block/raw-format.c -+++ b/block/raw-format.c -@@ -346,7 +346,7 @@ static BlockMeasureInfo *raw_measure(QemuOpts *opts, BlockDriverState *in_bs, - BDRV_SECTOR_SIZE); - } - -- info = g_new(BlockMeasureInfo, 1); -+ info = g_new0(BlockMeasureInfo, 1); - info->required = required; - - /* Unallocated sectors count towards the file size in raw images */ -diff --git a/qapi/block-core.json b/qapi/block-core.json -index a64ad81..2893209 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -689,18 +689,24 @@ - # efficiently so file size may be smaller than virtual disk size. - # - # The values are upper bounds that are guaranteed to fit the new image file. --# Subsequent modification, such as internal snapshot or bitmap creation, may --# require additional space and is not covered here. -+# Subsequent modification, such as internal snapshot or further bitmap -+# creation, may require additional space and is not covered here. - # --# @required: Size required for a new image file, in bytes. -+# @required: Size required for a new image file, in bytes, when copying just -+# allocated guest-visible contents. - # - # @fully-allocated: Image file size, in bytes, once data has been written --# to all sectors. -+# to all sectors, when copying just guest-visible contents. -+# -+# @bitmaps: Additional size required if all the top-level bitmap metadata -+# in the source image were to be copied to the destination, -+# present only when source and destination both support -+# persistent bitmaps. (since 5.1) - # - # Since: 2.10 - ## - { 'struct': 'BlockMeasureInfo', -- 'data': {'required': 'int', 'fully-allocated': 'int'} } -+ 'data': {'required': 'int', 'fully-allocated': 'int', '*bitmaps': 'int'} } - - ## - # @query-block: -diff --git a/qemu-img.c b/qemu-img.c -index 11a4537..b57856e 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -5212,6 +5212,9 @@ static int img_measure(int argc, char **argv) - if (output_format == OFORMAT_HUMAN) { - printf("required size: %" PRIu64 "\n", info->required); - printf("fully allocated size: %" PRIu64 "\n", info->fully_allocated); -+ if (info->has_bitmaps) { -+ printf("bitmaps size: %" PRIu64 "\n", info->bitmaps); -+ } - } else { - dump_json_block_measure_info(info); - } -diff --git a/qemu-img.texi b/qemu-img.texi -index abf2771..3670b96 100644 ---- a/qemu-img.texi -+++ b/qemu-img.texi -@@ -576,6 +576,7 @@ The following fields are reported: - @example - required size: 524288 - fully allocated size: 1074069504 -+bitmaps size: 0 - @end example - - The @code{required size} is the file size of the new image. It may be smaller -@@ -586,6 +587,12 @@ been written to all sectors. This is the maximum size that the image file can - occupy with the exception of internal snapshots, dirty bitmaps, vmstate data, - and other advanced image format features. - -+The @code{bitmaps size} is the additional size required in order to -+copy bitmaps from a source image in addition to the guest-visible -+data; the line is omitted if either source or destination lacks -+bitmap support, or 0 if bitmaps are supported but there is nothing to -+copy. -+ - @item snapshot [--object @var{objectdef}] [--image-opts] [-U] [-q] [-l | -a @var{snapshot} | -c @var{snapshot} | -d @var{snapshot}] @var{filename} - - List, apply, create or delete snapshots in image @var{filename}. -diff --git a/tests/qemu-iotests/178.out.qcow2 b/tests/qemu-iotests/178.out.qcow2 -index 345eab3..b9ed41b 100644 ---- a/tests/qemu-iotests/178.out.qcow2 -+++ b/tests/qemu-iotests/178.out.qcow2 -@@ -37,6 +37,7 @@ qemu-img: The image size is too large (try using a larger cluster size) - Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=0 - required size: 196608 - fully allocated size: 196608 -+bitmaps size: 0 - - converted image file size in bytes: 196608 - -@@ -45,6 +46,7 @@ converted image file size in bytes: 196608 - Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 - required size: 393216 - fully allocated size: 1074135040 -+bitmaps size: 0 - wrote 512/512 bytes at offset 512 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - wrote 65536/65536 bytes at offset 65536 -@@ -53,6 +55,7 @@ wrote 64512/64512 bytes at offset 134217728 - 63 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - required size: 589824 - fully allocated size: 1074135040 -+bitmaps size: 0 - - converted image file size in bytes: 524288 - -@@ -60,6 +63,7 @@ converted image file size in bytes: 524288 - - required size: 524288 - fully allocated size: 1074135040 -+bitmaps size: 0 - - converted image file size in bytes: 458752 - -@@ -67,16 +71,19 @@ converted image file size in bytes: 458752 - - required size: 1074135040 - fully allocated size: 1074135040 -+bitmaps size: 0 - - == qcow2 input image and LUKS encryption == - - required size: 2686976 - fully allocated size: 1076232192 -+bitmaps size: 0 - - == qcow2 input image and preallocation (human) == - - required size: 1074135040 - fully allocated size: 1074135040 -+bitmaps size: 0 - - converted image file size in bytes: 1074135040 - -@@ -87,6 +94,7 @@ wrote 8388608/8388608 bytes at offset 0 - 8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - required size: 8716288 - fully allocated size: 8716288 -+bitmaps size: 0 - - converted image file size in bytes: 8716288 - -@@ -173,6 +181,7 @@ qemu-img: The image size is too large (try using a larger cluster size) - - Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=0 - { -+ "bitmaps": 0, - "required": 196608, - "fully-allocated": 196608 - } -@@ -183,6 +192,7 @@ converted image file size in bytes: 196608 - - Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 - { -+ "bitmaps": 0, - "required": 393216, - "fully-allocated": 1074135040 - } -@@ -193,6 +203,7 @@ wrote 65536/65536 bytes at offset 65536 - wrote 64512/64512 bytes at offset 134217728 - 63 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { -+ "bitmaps": 0, - "required": 589824, - "fully-allocated": 1074135040 - } -@@ -202,6 +213,7 @@ converted image file size in bytes: 524288 - == qcow2 input image with internal snapshot (json) == - - { -+ "bitmaps": 0, - "required": 524288, - "fully-allocated": 1074135040 - } -@@ -211,6 +223,7 @@ converted image file size in bytes: 458752 - == qcow2 input image and a backing file (json) == - - { -+ "bitmaps": 0, - "required": 1074135040, - "fully-allocated": 1074135040 - } -@@ -218,6 +231,7 @@ converted image file size in bytes: 458752 - == qcow2 input image and LUKS encryption == - - { -+ "bitmaps": 0, - "required": 2686976, - "fully-allocated": 1076232192 - } -@@ -225,6 +239,7 @@ converted image file size in bytes: 458752 - == qcow2 input image and preallocation (json) == - - { -+ "bitmaps": 0, - "required": 1074135040, - "fully-allocated": 1074135040 - } -@@ -237,6 +252,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=8388608 - wrote 8388608/8388608 bytes at offset 0 - 8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - { -+ "bitmaps": 0, - "required": 8716288, - "fully-allocated": 8716288 - } -diff --git a/tests/qemu-iotests/190 b/tests/qemu-iotests/190 -index eb766ad..5084ccd 100755 ---- a/tests/qemu-iotests/190 -+++ b/tests/qemu-iotests/190 -@@ -2,7 +2,7 @@ - # - # qemu-img measure sub-command tests on huge qcow2 files - # --# Copyright (C) 2017 Red Hat, Inc. -+# Copyright (C) 2017-2020 Red Hat, Inc. - # - # This program is free software; you can redistribute it and/or modify - # it under the terms of the GNU General Public License as published by -@@ -42,7 +42,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 - _supported_fmt qcow2 - _supported_proto file - --echo "== Huge file ==" -+echo "== Huge file without bitmaps ==" - echo - - IMGOPTS='cluster_size=2M' _make_test_img 2T -@@ -51,6 +51,49 @@ $QEMU_IMG measure -O raw -f qcow2 "$TEST_IMG" - $QEMU_IMG measure -O qcow2 -o cluster_size=64k -f qcow2 "$TEST_IMG" - $QEMU_IMG measure -O qcow2 -o cluster_size=2M -f qcow2 "$TEST_IMG" - -+echo -+echo "== Huge file with bitmaps ==" -+echo -+ -+$QEMU_IMG bitmap --add --granularity 512 -f qcow2 "$TEST_IMG" b1 -+$QEMU_IMG bitmap --add -g 2M -f qcow2 "$TEST_IMG" b2 -+ -+# No bitmap without a source -+$QEMU_IMG measure -O qcow2 --size 10M -+# No bitmap output, since raw does not support it -+$QEMU_IMG measure -O raw -f qcow2 "$TEST_IMG" -+# No bitmap output, since no bitmaps on raw source. Munge required size, as -+# some filesystems store the qcow2 file with less sparseness than others -+$QEMU_IMG measure -O qcow2 -f raw "$TEST_IMG" | -+ sed '/^required size:/ s/[0-9][0-9]*/SIZE/' -+# No bitmap output, since v2 does not support it -+$QEMU_IMG measure -O qcow2 -o compat=0.10 -f qcow2 "$TEST_IMG" -+ -+# Compute expected output: bitmap clusters + bitmap tables + bitmaps directory -+echo -+val2T=$((2*1024*1024*1024*1024)) -+cluster=$((64*1024)) -+b1clusters=$(( (val2T/512/8 + cluster - 1) / cluster )) -+b2clusters=$(( (val2T/2/1024/1024/8 + cluster - 1) / cluster )) -+echo expected bitmap $((b1clusters * cluster + -+ (b1clusters * 8 + cluster - 1) / cluster * cluster + -+ b2clusters * cluster + -+ (b2clusters * 8 + cluster - 1) / cluster * cluster + -+ cluster)) -+$QEMU_IMG measure -O qcow2 -o cluster_size=64k -f qcow2 "$TEST_IMG" -+ -+# Compute expected output: bitmap clusters + bitmap tables + bitmaps directory -+echo -+cluster=$((2*1024*1024)) -+b1clusters=$(( (val2T/512/8 + cluster - 1) / cluster )) -+b2clusters=$(( (val2T/2/1024/1024/8 + cluster - 1) / cluster )) -+echo expected bitmap $((b1clusters * cluster + -+ (b1clusters * 8 + cluster - 1) / cluster * cluster + -+ b2clusters * cluster + -+ (b2clusters * 8 + cluster - 1) / cluster * cluster + -+ cluster)) -+$QEMU_IMG measure --output=json -O qcow2 -o cluster_size=2M -f qcow2 "$TEST_IMG" -+ - # success, all done - echo "*** done" - rm -f $seq.full -diff --git a/tests/qemu-iotests/190.out b/tests/qemu-iotests/190.out -index d001942..ed9d821 100644 ---- a/tests/qemu-iotests/190.out -+++ b/tests/qemu-iotests/190.out -@@ -1,11 +1,36 @@ - QA output created by 190 --== Huge file == -+== Huge file without bitmaps == - - Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2199023255552 - required size: 2199023255552 - fully allocated size: 2199023255552 - required size: 335806464 - fully allocated size: 2199359062016 -+bitmaps size: 0 - required size: 18874368 - fully allocated size: 2199042129920 -+bitmaps size: 0 -+ -+== Huge file with bitmaps == -+ -+required size: 327680 -+fully allocated size: 10813440 -+required size: 2199023255552 -+fully allocated size: 2199023255552 -+required size: SIZE -+fully allocated size: 17170432 -+required size: 335806464 -+fully allocated size: 2199359062016 -+ -+expected bitmap 537198592 -+required size: 335806464 -+fully allocated size: 2199359062016 -+bitmaps size: 537198592 -+ -+expected bitmap 545259520 -+{ -+ "bitmaps": 545259520, -+ "required": 18874368, -+ "fully-allocated": 2199042129920 -+} - *** done --- -1.8.3.1 - diff --git a/SOURCES/kvm-qcow2-Fix-alloc_cluster_abort-for-pre-existing-clust.patch b/SOURCES/kvm-qcow2-Fix-alloc_cluster_abort-for-pre-existing-clust.patch deleted file mode 100644 index 43ff282..0000000 --- a/SOURCES/kvm-qcow2-Fix-alloc_cluster_abort-for-pre-existing-clust.patch +++ /dev/null @@ -1,47 +0,0 @@ -From bd97bbbce54da301407d51cae35e09ba2a12b160 Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Mon, 13 Jul 2020 14:24:48 -0400 -Subject: [PATCH 1/4] qcow2: Fix alloc_cluster_abort() for pre-existing - clusters - -RH-Author: Max Reitz -Message-id: <20200713142451.289703-2-mreitz@redhat.com> -Patchwork-id: 97954 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 1/4] qcow2: Fix alloc_cluster_abort() for pre-existing clusters -Bugzilla: 1807057 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Kevin Wolf - -handle_alloc() reuses preallocated zero clusters. If anything goes -wrong during the data write, we do not change their L2 entry, so we -must not let qcow2_alloc_cluster_abort() free them. - -Fixes: 8b24cd141549b5b264baeddd4e72902cfb5de23b -Cc: qemu-stable@nongnu.org -Signed-off-by: Max Reitz -Message-Id: <20200225143130.111267-2-mreitz@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 3ede935fdbbd5f7b24b4724bbfb8938acb5956d8) -Signed-off-by: Max Reitz -Signed-off-by: Danilo C. L. de Paula ---- - block/qcow2-cluster.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c -index 9d04f8d77b..1970797ce5 100644 ---- a/block/qcow2-cluster.c -+++ b/block/qcow2-cluster.c -@@ -1015,7 +1015,7 @@ err: - void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) - { - BDRVQcow2State *s = bs->opaque; -- if (!has_data_file(bs)) { -+ if (!has_data_file(bs) && !m->keep_old_clusters) { - qcow2_free_clusters(bs, m->alloc_offset, - m->nb_clusters << s->cluster_bits, - QCOW2_DISCARD_NEVER); --- -2.27.0 - diff --git a/SOURCES/kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch b/SOURCES/kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch deleted file mode 100644 index 1a7ace5..0000000 --- a/SOURCES/kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch +++ /dev/null @@ -1,52 +0,0 @@ -From ecc4fb6e1941035e1d9def1f69b779fbea216caf Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 24 Feb 2020 16:13:07 +0000 -Subject: [PATCH 7/9] qcow2: Fix qcow2_alloc_cluster_abort() for external data - file - -RH-Author: Kevin Wolf -Message-id: <20200224161307.29783-2-kwolf@redhat.com> -Patchwork-id: 94042 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] qcow2: Fix qcow2_alloc_cluster_abort() for external data file -Bugzilla: 1703907 -RH-Acked-by: John Snow -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz - -For external data file, cluster allocations return an offset in the data -file and are not refcounted. In this case, there is nothing to do for -qcow2_alloc_cluster_abort(). Freeing the same offset in the qcow2 file -is wrong and causes crashes in the better case or image corruption in -the worse case. - -Signed-off-by: Kevin Wolf -Message-Id: <20200211094900.17315-3-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit c3b6658c1a5a3fb24d6c27b2594cf86146f75b22) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/qcow2-cluster.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c -index 8982b7b..dc3c270 100644 ---- a/block/qcow2-cluster.c -+++ b/block/qcow2-cluster.c -@@ -1015,8 +1015,11 @@ err: - void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) - { - BDRVQcow2State *s = bs->opaque; -- qcow2_free_clusters(bs, m->alloc_offset, m->nb_clusters << s->cluster_bits, -- QCOW2_DISCARD_NEVER); -+ if (!has_data_file(bs)) { -+ qcow2_free_clusters(bs, m->alloc_offset, -+ m->nb_clusters << s->cluster_bits, -+ QCOW2_DISCARD_NEVER); -+ } - } - - /* --- -1.8.3.1 - diff --git a/SOURCES/kvm-qcow2-Forward-ZERO_WRITE-flag-for-full-preallocation.patch b/SOURCES/kvm-qcow2-Forward-ZERO_WRITE-flag-for-full-preallocation.patch deleted file mode 100644 index 522ba60..0000000 --- a/SOURCES/kvm-qcow2-Forward-ZERO_WRITE-flag-for-full-preallocation.patch +++ /dev/null @@ -1,98 +0,0 @@ -From 4290173219e15065e9a7c2e95774ac979b5fd869 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 8 Jun 2020 15:01:40 +0100 -Subject: [PATCH 12/17] qcow2: Forward ZERO_WRITE flag for full preallocation - -RH-Author: Kevin Wolf -Message-id: <20200608150140.38218-12-kwolf@redhat.com> -Patchwork-id: 97456 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 11/11] qcow2: Forward ZERO_WRITE flag for full preallocation -Bugzilla: 1780574 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz - -The BDRV_REQ_ZERO_WRITE is currently implemented in a way that first the -image is possibly preallocated and then the zero flag is added to all -clusters. This means that a copy-on-write operation may be needed when -writing to these clusters, despite having used preallocation, negating -one of the major benefits of preallocation. - -Instead, try to forward the BDRV_REQ_ZERO_WRITE to the protocol driver, -and if the protocol driver can ensure that the new area reads as zeros, -we can skip setting the zero flag in the qcow2 layer. - -Unfortunately, the same approach doesn't work for metadata -preallocation, so we'll still set the zero flag there. - -Signed-off-by: Kevin Wolf -Reviewed-by: Max Reitz -Message-Id: <20200424142701.67053-1-kwolf@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit eb8a0cf3ba26611f3981f8f45ac6a868975a68cc) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/qcow2.c | 22 +++++++++++++++++++--- - tests/qemu-iotests/274.out | 4 ++-- - 2 files changed, 21 insertions(+), 5 deletions(-) - -diff --git a/block/qcow2.c b/block/qcow2.c -index f3d6cb0..b783662 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -4153,9 +4153,25 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, - /* Allocate the data area */ - new_file_size = allocation_start + - nb_new_data_clusters * s->cluster_size; -- /* Image file grows, so @exact does not matter */ -- ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0, -- errp); -+ /* -+ * Image file grows, so @exact does not matter. -+ * -+ * If we need to zero out the new area, try first whether the protocol -+ * driver can already take care of this. -+ */ -+ if (flags & BDRV_REQ_ZERO_WRITE) { -+ ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, -+ BDRV_REQ_ZERO_WRITE, NULL); -+ if (ret >= 0) { -+ flags &= ~BDRV_REQ_ZERO_WRITE; -+ } -+ } else { -+ ret = -1; -+ } -+ if (ret < 0) { -+ ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0, -+ errp); -+ } - if (ret < 0) { - error_prepend(errp, "Failed to resize underlying file: "); - qcow2_free_clusters(bs, allocation_start, -diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out -index 1a796fd..9d6fdeb 100644 ---- a/tests/qemu-iotests/274.out -+++ b/tests/qemu-iotests/274.out -@@ -187,7 +187,7 @@ read 65536/65536 bytes at offset 9437184 - 10 MiB (0xa00000) bytes allocated at offset 5 MiB (0x500000) - - [{ "start": 0, "length": 5242880, "depth": 1, "zero": true, "data": false}, --{ "start": 5242880, "length": 10485760, "depth": 0, "zero": true, "data": false, "offset": 327680}] -+{ "start": 5242880, "length": 10485760, "depth": 0, "zero": false, "data": true, "offset": 327680}] - - === preallocation=full === - Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=16777216 cluster_size=65536 lazy_refcounts=off refcount_bits=16 -@@ -206,7 +206,7 @@ read 65536/65536 bytes at offset 11534336 - 4 MiB (0x400000) bytes allocated at offset 8 MiB (0x800000) - - [{ "start": 0, "length": 8388608, "depth": 1, "zero": true, "data": false}, --{ "start": 8388608, "length": 4194304, "depth": 0, "zero": true, "data": false, "offset": 327680}] -+{ "start": 8388608, "length": 4194304, "depth": 0, "zero": false, "data": true, "offset": 327680}] - - === preallocation=off === - Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=393216 cluster_size=65536 lazy_refcounts=off refcount_bits=16 --- -1.8.3.1 - diff --git a/SOURCES/kvm-qcow2-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch b/SOURCES/kvm-qcow2-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch deleted file mode 100644 index 454759e..0000000 --- a/SOURCES/kvm-qcow2-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch +++ /dev/null @@ -1,101 +0,0 @@ -From 3e603e344b81b3ecfea6fb9589ba91f70a22139d Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 8 Jun 2020 15:01:33 +0100 -Subject: [PATCH 05/17] qcow2: Support BDRV_REQ_ZERO_WRITE for truncate - -RH-Author: Kevin Wolf -Message-id: <20200608150140.38218-5-kwolf@redhat.com> -Patchwork-id: 97449 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 04/11] qcow2: Support BDRV_REQ_ZERO_WRITE for truncate -Bugzilla: 1780574 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz - -If BDRV_REQ_ZERO_WRITE is set and we're extending the image, calling -qcow2_cluster_zeroize() with flags=0 does the right thing: It doesn't -undo any previous preallocation, but just adds the zero flag to all -relevant L2 entries. If an external data file is in use, a write_zeroes -request to the data file is made instead. - -Signed-off-by: Kevin Wolf -Message-Id: <20200424125448.63318-5-kwolf@redhat.com> -Reviewed-by: Eric Blake -Reviewed-by: Max Reitz -Signed-off-by: Kevin Wolf -(cherry picked from commit f01643fb8b47e8a70c04bbf45e0f12a9e5bc54de) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/qcow2-cluster.c | 2 +- - block/qcow2.c | 34 ++++++++++++++++++++++++++++++++++ - 2 files changed, 35 insertions(+), 1 deletion(-) - -diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c -index dc3c270..9d04f8d 100644 ---- a/block/qcow2-cluster.c -+++ b/block/qcow2-cluster.c -@@ -1784,7 +1784,7 @@ int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset, - /* Caller must pass aligned values, except at image end */ - assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); - assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) || -- end_offset == bs->total_sectors << BDRV_SECTOR_BITS); -+ end_offset >= bs->total_sectors << BDRV_SECTOR_BITS); - - /* The zero flag is only supported by version 3 and newer */ - if (s->qcow_version < 3) { -diff --git a/block/qcow2.c b/block/qcow2.c -index 86aa74a..f3d6cb0 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -1726,6 +1726,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, - } - - bs->supported_zero_flags = header.version >= 3 ? BDRV_REQ_MAY_UNMAP : 0; -+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; - - /* Repair image if dirty */ - if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only && -@@ -4197,6 +4198,39 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, - g_assert_not_reached(); - } - -+ if ((flags & BDRV_REQ_ZERO_WRITE) && offset > old_length) { -+ uint64_t zero_start = QEMU_ALIGN_UP(old_length, s->cluster_size); -+ -+ /* -+ * Use zero clusters as much as we can. qcow2_cluster_zeroize() -+ * requires a cluster-aligned start. The end may be unaligned if it is -+ * at the end of the image (which it is here). -+ */ -+ ret = qcow2_cluster_zeroize(bs, zero_start, offset - zero_start, 0); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "Failed to zero out new clusters"); -+ goto fail; -+ } -+ -+ /* Write explicit zeros for the unaligned head */ -+ if (zero_start > old_length) { -+ uint64_t len = zero_start - old_length; -+ uint8_t *buf = qemu_blockalign0(bs, len); -+ QEMUIOVector qiov; -+ qemu_iovec_init_buf(&qiov, buf, len); -+ -+ qemu_co_mutex_unlock(&s->lock); -+ ret = qcow2_co_pwritev_part(bs, old_length, len, &qiov, 0, 0); -+ qemu_co_mutex_lock(&s->lock); -+ -+ qemu_vfree(buf); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "Failed to zero out the new area"); -+ goto fail; -+ } -+ } -+ } -+ - if (prealloc != PREALLOC_MODE_OFF) { - /* Flush metadata before actually changing the image size */ - ret = qcow2_write_caches(bs); --- -1.8.3.1 - diff --git a/SOURCES/kvm-qemu-file-Don-t-do-IO-after-shutdown.patch b/SOURCES/kvm-qemu-file-Don-t-do-IO-after-shutdown.patch deleted file mode 100644 index 88a6e31..0000000 --- a/SOURCES/kvm-qemu-file-Don-t-do-IO-after-shutdown.patch +++ /dev/null @@ -1,92 +0,0 @@ -From d84814e298e3b05fb5bc61cc8e641a5e104d32d5 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:39 +0000 -Subject: [PATCH 07/18] qemu-file: Don't do IO after shutdown - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-7-quintela@redhat.com> -Patchwork-id: 94116 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 06/10] qemu-file: Don't do IO after shutdown -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -Be sure that we are not doing neither read/write after shutdown of the -QEMUFile. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -(cherry picked from commit a555b8092abc6f1bbe4b64c516679cbd68fcfbd8) -Signed-off-by: Danilo C. L. de Paula ---- - migration/qemu-file.c | 22 +++++++++++++++++++++- - 1 file changed, 21 insertions(+), 1 deletion(-) - -diff --git a/migration/qemu-file.c b/migration/qemu-file.c -index 26fb25d..bbb2b63 100644 ---- a/migration/qemu-file.c -+++ b/migration/qemu-file.c -@@ -53,6 +53,8 @@ struct QEMUFile { - - int last_error; - Error *last_error_obj; -+ /* has the file has been shutdown */ -+ bool shutdown; - }; - - /* -@@ -61,10 +63,18 @@ struct QEMUFile { - */ - int qemu_file_shutdown(QEMUFile *f) - { -+ int ret; -+ -+ f->shutdown = true; - if (!f->ops->shut_down) { - return -ENOSYS; - } -- return f->ops->shut_down(f->opaque, true, true, NULL); -+ ret = f->ops->shut_down(f->opaque, true, true, NULL); -+ -+ if (!f->last_error) { -+ qemu_file_set_error(f, -EIO); -+ } -+ return ret; - } - - /* -@@ -214,6 +224,9 @@ void qemu_fflush(QEMUFile *f) - return; - } - -+ if (f->shutdown) { -+ return; -+ } - if (f->iovcnt > 0) { - expect = iov_size(f->iov, f->iovcnt); - ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos, -@@ -328,6 +341,10 @@ static ssize_t qemu_fill_buffer(QEMUFile *f) - f->buf_index = 0; - f->buf_size = pending; - -+ if (f->shutdown) { -+ return 0; -+ } -+ - len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos, - IO_BUF_SIZE - pending, &local_error); - if (len > 0) { -@@ -642,6 +659,9 @@ int64_t qemu_ftell(QEMUFile *f) - - int qemu_file_rate_limit(QEMUFile *f) - { -+ if (f->shutdown) { -+ return 1; -+ } - if (qemu_file_get_error(f)) { - return 1; - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-qemu-img-Add-bitmap-sub-command.patch b/SOURCES/kvm-qemu-img-Add-bitmap-sub-command.patch deleted file mode 100644 index eb80188..0000000 --- a/SOURCES/kvm-qemu-img-Add-bitmap-sub-command.patch +++ /dev/null @@ -1,398 +0,0 @@ -From 53baacb72e8561391841363b2acbd85a783cbc66 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Tue, 2 Jun 2020 02:34:15 +0100 -Subject: [PATCH 10/26] qemu-img: Add bitmap sub-command - -RH-Author: Eric Blake -Message-id: <20200602023420.2133649-8-eblake@redhat.com> -Patchwork-id: 97074 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 07/12] qemu-img: Add bitmap sub-command -Bugzilla: 1779893 1779904 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Include actions for --add, --remove, --clear, --enable, --disable, and ---merge (note that --clear is a bit of fluff, because the same can be -accomplished by removing a bitmap and then adding a new one in its -place, but it matches what QMP commands exist). Listing is omitted, -because it does not require a bitmap name and because it was already -possible with 'qemu-img info'. A single command line can play one or -more bitmap commands in sequence on the same bitmap name (although all -added bitmaps share the same granularity, and and all merged bitmaps -come from the same source file). Merge defaults to other bitmaps in -the primary image, but can also be told to merge bitmaps from a -distinct image. - -While this supports --image-opts for the file being modified, I did -not think it worth the extra complexity to support that for the source -file in a cross-file merges. Likewise, I chose to have --merge only -take a single source rather than following the QMP support for -multiple merges in one go (although you can still use more than one ---merge in the command line); in part because qemu-img is offline and -therefore atomicity is not an issue. - -Upcoming patches will add iotest coverage of these commands while -also testing other features. - -Signed-off-by: Eric Blake -Reviewed-by: Max Reitz -Message-Id: <20200513011648.166876-7-eblake@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 3b51ab4bf0f49a01cc2db7b954e0669e081719b5) - -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - docs/tools/qemu-img.rst - lives in qemu-img.texi instead; plus - fix a typo in the text for --merge rather than waiting for - a one-line upstream followup patch - qemu-img-cmds.hx - context, use texi instead of rst - qemu-img.c - context -Signed-off-by: Eric Blake - -Signed-off-by: Danilo C. L. de Paula ---- - qemu-img-cmds.hx | 6 ++ - qemu-img.c | 248 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ - qemu-img.texi | 27 ++++++ - 3 files changed, 281 insertions(+) - -diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx -index 1c93e6d..1a6a8e9 100644 ---- a/qemu-img-cmds.hx -+++ b/qemu-img-cmds.hx -@@ -25,6 +25,12 @@ STEXI - @item bench [-c @var{count}] [-d @var{depth}] [-f @var{fmt}] [--flush-interval=@var{flush_interval}] [-n] [--no-drain] [-o @var{offset}] [--pattern=@var{pattern}] [-q] [-s @var{buffer_size}] [-S @var{step_size}] [-t @var{cache}] [-w] [-U] @var{filename} - ETEXI - -+DEF("bitmap", img_bitmap, -+ "bitmap (--merge SOURCE | --add | --remove | --clear | --enable | --disable)... [-b source_file [-F source_fmt]] [-g granularity] [--object objectdef] [--image-opts | -f fmt] filename bitmap") -+STEXI -+.. option:: bitmap (--merge @var{source} | --add | --remove | --clear | --enable | --disable)... [-b @var{source_file} [-F @var{source_fmt}]] [-g @var{granularity}] [--object @var{objectdef}] [--image-opts | -f @var{fmt}] @var{filename} @var{bitmap} -+ETEXI -+ - DEF("check", img_check, - "check [--object objectdef] [--image-opts] [-q] [-f fmt] [--output=ofmt] [-r [leaks | all]] [-T src_cache] [-U] filename") - STEXI -diff --git a/qemu-img.c b/qemu-img.c -index e69529b..11a4537 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -28,6 +28,7 @@ - #include "qemu-common.h" - #include "qemu-version.h" - #include "qapi/error.h" -+#include "qapi/qapi-commands-block-core.h" - #include "qapi/qapi-visit-block-core.h" - #include "qapi/qobject-output-visitor.h" - #include "qapi/qmp/qjson.h" -@@ -70,6 +71,12 @@ enum { - OPTION_PREALLOCATION = 265, - OPTION_SHRINK = 266, - OPTION_SALVAGE = 267, -+ OPTION_ADD = 269, -+ OPTION_REMOVE = 270, -+ OPTION_CLEAR = 271, -+ OPTION_ENABLE = 272, -+ OPTION_DISABLE = 273, -+ OPTION_MERGE = 274, - }; - - typedef enum OutputFormat { -@@ -168,6 +175,14 @@ static void QEMU_NORETURN help(void) - " '-n' skips the target volume creation (useful if the volume is created\n" - " prior to running qemu-img)\n" - "\n" -+ "Parameters to bitmap subcommand:\n" -+ " 'bitmap' is the name of the bitmap to manipulate, through one or more\n" -+ " actions from '--add', '--remove', '--clear', '--enable', '--disable',\n" -+ " or '--merge source'\n" -+ " '-g granularity' sets the granularity for '--add' actions\n" -+ " '-b source' and '-F src_fmt' tell '--merge' actions to find the source\n" -+ " bitmaps from an alternative file\n" -+ "\n" - "Parameters to check subcommand:\n" - " '-r' tries to repair any inconsistencies that are found during the check.\n" - " '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n" -@@ -4402,6 +4417,239 @@ out: - return 0; - } - -+enum ImgBitmapAct { -+ BITMAP_ADD, -+ BITMAP_REMOVE, -+ BITMAP_CLEAR, -+ BITMAP_ENABLE, -+ BITMAP_DISABLE, -+ BITMAP_MERGE, -+}; -+typedef struct ImgBitmapAction { -+ enum ImgBitmapAct act; -+ const char *src; /* only used for merge */ -+ QSIMPLEQ_ENTRY(ImgBitmapAction) next; -+} ImgBitmapAction; -+ -+static int img_bitmap(int argc, char **argv) -+{ -+ Error *err = NULL; -+ int c, ret = 1; -+ QemuOpts *opts = NULL; -+ const char *fmt = NULL, *src_fmt = NULL, *src_filename = NULL; -+ const char *filename, *bitmap; -+ BlockBackend *blk = NULL, *src = NULL; -+ BlockDriverState *bs = NULL, *src_bs = NULL; -+ bool image_opts = false; -+ int64_t granularity = 0; -+ bool add = false, merge = false; -+ QSIMPLEQ_HEAD(, ImgBitmapAction) actions; -+ ImgBitmapAction *act, *act_next; -+ const char *op; -+ -+ QSIMPLEQ_INIT(&actions); -+ -+ for (;;) { -+ static const struct option long_options[] = { -+ {"help", no_argument, 0, 'h'}, -+ {"object", required_argument, 0, OPTION_OBJECT}, -+ {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, -+ {"add", no_argument, 0, OPTION_ADD}, -+ {"remove", no_argument, 0, OPTION_REMOVE}, -+ {"clear", no_argument, 0, OPTION_CLEAR}, -+ {"enable", no_argument, 0, OPTION_ENABLE}, -+ {"disable", no_argument, 0, OPTION_DISABLE}, -+ {"merge", required_argument, 0, OPTION_MERGE}, -+ {"granularity", required_argument, 0, 'g'}, -+ {"source-file", required_argument, 0, 'b'}, -+ {"source-format", required_argument, 0, 'F'}, -+ {0, 0, 0, 0} -+ }; -+ c = getopt_long(argc, argv, ":b:f:F:g:h", long_options, NULL); -+ if (c == -1) { -+ break; -+ } -+ -+ switch (c) { -+ case ':': -+ missing_argument(argv[optind - 1]); -+ break; -+ case '?': -+ unrecognized_option(argv[optind - 1]); -+ break; -+ case 'h': -+ help(); -+ break; -+ case 'b': -+ src_filename = optarg; -+ break; -+ case 'f': -+ fmt = optarg; -+ break; -+ case 'F': -+ src_fmt = optarg; -+ break; -+ case 'g': -+ granularity = cvtnum("granularity", optarg); -+ if (granularity < 0) { -+ return 1; -+ } -+ break; -+ case OPTION_ADD: -+ act = g_new0(ImgBitmapAction, 1); -+ act->act = BITMAP_ADD; -+ QSIMPLEQ_INSERT_TAIL(&actions, act, next); -+ add = true; -+ break; -+ case OPTION_REMOVE: -+ act = g_new0(ImgBitmapAction, 1); -+ act->act = BITMAP_REMOVE; -+ QSIMPLEQ_INSERT_TAIL(&actions, act, next); -+ break; -+ case OPTION_CLEAR: -+ act = g_new0(ImgBitmapAction, 1); -+ act->act = BITMAP_CLEAR; -+ QSIMPLEQ_INSERT_TAIL(&actions, act, next); -+ break; -+ case OPTION_ENABLE: -+ act = g_new0(ImgBitmapAction, 1); -+ act->act = BITMAP_ENABLE; -+ QSIMPLEQ_INSERT_TAIL(&actions, act, next); -+ break; -+ case OPTION_DISABLE: -+ act = g_new0(ImgBitmapAction, 1); -+ act->act = BITMAP_DISABLE; -+ QSIMPLEQ_INSERT_TAIL(&actions, act, next); -+ break; -+ case OPTION_MERGE: -+ act = g_new0(ImgBitmapAction, 1); -+ act->act = BITMAP_MERGE; -+ act->src = optarg; -+ QSIMPLEQ_INSERT_TAIL(&actions, act, next); -+ merge = true; -+ break; -+ case OPTION_OBJECT: -+ opts = qemu_opts_parse_noisily(&qemu_object_opts, optarg, true); -+ if (!opts) { -+ goto out; -+ } -+ break; -+ case OPTION_IMAGE_OPTS: -+ image_opts = true; -+ break; -+ } -+ } -+ -+ if (qemu_opts_foreach(&qemu_object_opts, -+ user_creatable_add_opts_foreach, -+ qemu_img_object_print_help, &error_fatal)) { -+ goto out; -+ } -+ -+ if (QSIMPLEQ_EMPTY(&actions)) { -+ error_report("Need at least one of --add, --remove, --clear, " -+ "--enable, --disable, or --merge"); -+ goto out; -+ } -+ -+ if (granularity && !add) { -+ error_report("granularity only supported with --add"); -+ goto out; -+ } -+ if (src_fmt && !src_filename) { -+ error_report("-F only supported with -b"); -+ goto out; -+ } -+ if (src_filename && !merge) { -+ error_report("Merge bitmap source file only supported with " -+ "--merge"); -+ goto out; -+ } -+ -+ if (optind != argc - 2) { -+ error_report("Expecting filename and bitmap name"); -+ goto out; -+ } -+ -+ filename = argv[optind]; -+ bitmap = argv[optind + 1]; -+ -+ blk = img_open(image_opts, filename, fmt, BDRV_O_RDWR, false, false, -+ false); -+ if (!blk) { -+ goto out; -+ } -+ bs = blk_bs(blk); -+ if (src_filename) { -+ src = img_open(false, src_filename, src_fmt, 0, false, false, false); -+ if (!src) { -+ goto out; -+ } -+ src_bs = blk_bs(src); -+ } else { -+ src_bs = bs; -+ } -+ -+ QSIMPLEQ_FOREACH_SAFE(act, &actions, next, act_next) { -+ switch (act->act) { -+ case BITMAP_ADD: -+ qmp_block_dirty_bitmap_add(bs->node_name, bitmap, -+ !!granularity, granularity, true, true, -+ false, false, &err); -+ op = "add"; -+ break; -+ case BITMAP_REMOVE: -+ qmp_block_dirty_bitmap_remove(bs->node_name, bitmap, &err); -+ op = "remove"; -+ break; -+ case BITMAP_CLEAR: -+ qmp_block_dirty_bitmap_clear(bs->node_name, bitmap, &err); -+ op = "clear"; -+ break; -+ case BITMAP_ENABLE: -+ qmp_block_dirty_bitmap_enable(bs->node_name, bitmap, &err); -+ op = "enable"; -+ break; -+ case BITMAP_DISABLE: -+ qmp_block_dirty_bitmap_disable(bs->node_name, bitmap, &err); -+ op = "disable"; -+ break; -+ case BITMAP_MERGE: { -+ BlockDirtyBitmapMergeSource *merge_src; -+ BlockDirtyBitmapMergeSourceList *list; -+ -+ merge_src = g_new0(BlockDirtyBitmapMergeSource, 1); -+ merge_src->type = QTYPE_QDICT; -+ merge_src->u.external.node = g_strdup(src_bs->node_name); -+ merge_src->u.external.name = g_strdup(act->src); -+ list = g_new0(BlockDirtyBitmapMergeSourceList, 1); -+ list->value = merge_src; -+ qmp_block_dirty_bitmap_merge(bs->node_name, bitmap, list, &err); -+ qapi_free_BlockDirtyBitmapMergeSourceList(list); -+ op = "merge"; -+ break; -+ } -+ default: -+ g_assert_not_reached(); -+ } -+ -+ if (err) { -+ error_reportf_err(err, "Operation %s on bitmap %s failed: ", -+ op, bitmap); -+ goto out; -+ } -+ g_free(act); -+ } -+ -+ ret = 0; -+ -+ out: -+ blk_unref(src); -+ blk_unref(blk); -+ qemu_opts_del(opts); -+ return ret; -+} -+ - #define C_BS 01 - #define C_COUNT 02 - #define C_IF 04 -diff --git a/qemu-img.texi b/qemu-img.texi -index b5156d6..abf2771 100644 ---- a/qemu-img.texi -+++ b/qemu-img.texi -@@ -230,6 +230,33 @@ specified as well. - For write tests, by default a buffer filled with zeros is written. This can be - overridden with a pattern byte specified by @var{pattern}. - -+@item bitmap (--merge @var{source} | --add | --remove | --clear | --enable | --disable)... [-b @var{source_file} [-F @var{source_fmt}]] [-g @var{granularity}] [--object @var{objectdef}] [--image-opts | -f @var{fmt}] @var{filename} @var{bitmap} -+ -+Perform one or more modifications of the persistent bitmap @var{bitmap} -+in the disk image @var{filename}. The various modifications are: -+ -+@table @option -+@item add -+create @var{bitmap}, enabled to record future edits. -+@item remove -+remove @var{bitmap}. -+@item clear -+clear @var{bitmap}. -+@item enable -+change @var{bitmap} to start recording future edits. -+@item disable -+change @var{bitmap} to stop recording future edits. -+@item merge @var{source} -+merge the contents of the @var{source} bitmap into @var{bitmap}. -+@end table -+ -+Additional options include @option{-g} which sets a non-default -+@var{granularity} for @option{--add}, and @option{-b} and @option{-F} -+which select an alternative source file for all @var{source} bitmaps used by -+@option{--merge}. -+ -+To see what bitmaps are present in an image, use @code{qemu-img info}. -+ - @item check [--object @var{objectdef}] [--image-opts] [-q] [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] [-T @var{src_cache}] [-U] @var{filename} - - Perform a consistency check on the disk image @var{filename}. The command can --- -1.8.3.1 - diff --git a/SOURCES/kvm-qemu-img-Add-convert-bitmaps-option.patch b/SOURCES/kvm-qemu-img-Add-convert-bitmaps-option.patch deleted file mode 100644 index 20eca9f..0000000 --- a/SOURCES/kvm-qemu-img-Add-convert-bitmaps-option.patch +++ /dev/null @@ -1,244 +0,0 @@ -From f2add7d5955770318824c3eee774bec2dd850936 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Tue, 2 Jun 2020 02:34:19 +0100 -Subject: [PATCH 14/26] qemu-img: Add convert --bitmaps option - -RH-Author: Eric Blake -Message-id: <20200602023420.2133649-12-eblake@redhat.com> -Patchwork-id: 97076 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 11/12] qemu-img: Add convert --bitmaps option -Bugzilla: 1779893 1779904 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Make it easier to copy all the persistent bitmaps of (the top layer -of) a source image along with its guest-visible contents, by adding a -boolean flag for use with qemu-img convert. This is basically -shorthand, as the same effect could be accomplished with a series of -'qemu-img bitmap --add' and 'qemu-img bitmap --merge -b source' -commands, or by their corresponding QMP commands. - -Note that this command will fail in the same scenarios where 'qemu-img -measure' omits a 'bitmaps size:' line, namely, when either the source -or the destination lacks persistent bitmap support altogether. - -See also https://bugzilla.redhat.com/show_bug.cgi?id=1779893 - -While touching this, clean up a couple coding issues spotted in the -same function: an extra blank line, and merging back-to-back 'if -(!skip_create)' blocks. - -Signed-off-by: Eric Blake -Message-Id: <20200521192137.1120211-5-eblake@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 15e39ad95078d528dfb9a75417453cab60332b77) - -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - docs/tools/qemu-img.rst - qemu-img.texi instead - qemu-img.c - context: no --target-is-zero - qemu-img-cmds.hx - context: texi instead of rst -Signed-off-by: Eric Blake - -Signed-off-by: Danilo C. L. de Paula ---- - qemu-img-cmds.hx | 4 ++-- - qemu-img.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- - qemu-img.texi | 4 +++- - 3 files changed, 72 insertions(+), 6 deletions(-) - -diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx -index 1a6a8e9..48144aa 100644 ---- a/qemu-img-cmds.hx -+++ b/qemu-img-cmds.hx -@@ -50,9 +50,9 @@ STEXI - ETEXI - - DEF("convert", img_convert, -- "convert [--object objectdef] [--image-opts] [--target-image-opts] [-U] [-C] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-B backing_file] [-o options] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] [--salvage] filename [filename2 [...]] output_filename") -+ "convert [--object objectdef] [--image-opts] [--target-image-opts] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-B backing_file] [-o options] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] [--salvage] filename [filename2 [...]] output_filename") - STEXI --@item convert [--object @var{objectdef}] [--image-opts] [--target-image-opts] [-U] [-C] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-B @var{backing_file}] [-o @var{options}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] [--salvage] @var{filename} [@var{filename2} [...]] @var{output_filename} -+@item convert [--object @var{objectdef}] [--image-opts] [--target-image-opts] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-B @var{backing_file}] [-o @var{options}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] [--salvage] @var{filename} [@var{filename2} [...]] @var{output_filename} - ETEXI - - DEF("create", img_create, -diff --git a/qemu-img.c b/qemu-img.c -index 39e1586..6dc881b 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -77,6 +77,7 @@ enum { - OPTION_ENABLE = 272, - OPTION_DISABLE = 273, - OPTION_MERGE = 274, -+ OPTION_BITMAPS = 275, - }; - - typedef enum OutputFormat { -@@ -190,6 +191,7 @@ static void QEMU_NORETURN help(void) - " hiding corruption that has already occurred.\n" - "\n" - "Parameters to convert subcommand:\n" -+ " '--bitmaps' copies all top-level persistent bitmaps to destination\n" - " '-m' specifies how many coroutines work in parallel during the convert\n" - " process (defaults to 8)\n" - " '-W' allow to write to the target out of order rather than sequential\n" -@@ -2084,6 +2086,39 @@ static int convert_do_copy(ImgConvertState *s) - return s->ret; - } - -+static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst) -+{ -+ BdrvDirtyBitmap *bm; -+ Error *err = NULL; -+ -+ FOR_EACH_DIRTY_BITMAP(src, bm) { -+ const char *name; -+ -+ if (!bdrv_dirty_bitmap_get_persistence(bm)) { -+ continue; -+ } -+ name = bdrv_dirty_bitmap_name(bm); -+ qmp_block_dirty_bitmap_add(dst->node_name, name, -+ true, bdrv_dirty_bitmap_granularity(bm), -+ true, true, -+ true, !bdrv_dirty_bitmap_enabled(bm), -+ &err); -+ if (err) { -+ error_reportf_err(err, "Failed to create bitmap %s: ", name); -+ return -1; -+ } -+ -+ do_dirty_bitmap_merge(dst->node_name, name, src->node_name, name, -+ &err); -+ if (err) { -+ error_reportf_err(err, "Failed to populate bitmap %s: ", name); -+ return -1; -+ } -+ } -+ -+ return 0; -+} -+ - #define MAX_BUF_SECTORS 32768 - - static int img_convert(int argc, char **argv) -@@ -2105,6 +2140,7 @@ static int img_convert(int argc, char **argv) - int64_t ret = -EINVAL; - bool force_share = false; - bool explict_min_sparse = false; -+ bool bitmaps = false; - - ImgConvertState s = (ImgConvertState) { - /* Need at least 4k of zeros for sparse detection */ -@@ -2123,6 +2159,7 @@ static int img_convert(int argc, char **argv) - {"force-share", no_argument, 0, 'U'}, - {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS}, - {"salvage", no_argument, 0, OPTION_SALVAGE}, -+ {"bitmaps", no_argument, 0, OPTION_BITMAPS}, - {0, 0, 0, 0} - }; - c = getopt_long(argc, argv, ":hf:O:B:Cco:l:S:pt:T:qnm:WU", -@@ -2248,6 +2285,9 @@ static int img_convert(int argc, char **argv) - case OPTION_TARGET_IMAGE_OPTS: - tgt_image_opts = true; - break; -+ case OPTION_BITMAPS: -+ bitmaps = true; -+ break; - } - } - -@@ -2304,7 +2344,6 @@ static int img_convert(int argc, char **argv) - goto fail_getopt; - } - -- - /* ret is still -EINVAL until here */ - ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough); - if (ret < 0) { -@@ -2458,6 +2497,20 @@ static int img_convert(int argc, char **argv) - } - } - -+ /* Determine if bitmaps need copying */ -+ if (bitmaps) { -+ if (s.src_num > 1) { -+ error_report("Copying bitmaps only possible with single source"); -+ ret = -1; -+ goto out; -+ } -+ if (!bdrv_supports_persistent_dirty_bitmap(blk_bs(s.src[0]))) { -+ error_report("Source lacks bitmap support"); -+ ret = -1; -+ goto out; -+ } -+ } -+ - /* - * The later open call will need any decryption secrets, and - * bdrv_create() will purge "opts", so extract them now before -@@ -2466,9 +2519,7 @@ static int img_convert(int argc, char **argv) - if (!skip_create) { - open_opts = qdict_new(); - qemu_opt_foreach(opts, img_add_key_secrets, open_opts, &error_abort); -- } - -- if (!skip_create) { - /* Create the new image */ - ret = bdrv_create(drv, out_filename, opts, &local_err); - if (ret < 0) { -@@ -2506,6 +2557,13 @@ static int img_convert(int argc, char **argv) - } - out_bs = blk_bs(s.target); - -+ if (bitmaps && !bdrv_supports_persistent_dirty_bitmap(out_bs)) { -+ error_report("Format driver '%s' does not support bitmaps", -+ out_bs->drv->format_name); -+ ret = -1; -+ goto out; -+ } -+ - if (s.compressed && !block_driver_can_compress(out_bs->drv)) { - error_report("Compression not supported for this file format"); - ret = -1; -@@ -2565,6 +2623,12 @@ static int img_convert(int argc, char **argv) - } - - ret = convert_do_copy(&s); -+ -+ /* Now copy the bitmaps */ -+ if (bitmaps && ret == 0) { -+ ret = convert_copy_bitmaps(blk_bs(s.src[0]), out_bs); -+ } -+ - out: - if (!ret) { - qemu_progress_print(100, 0); -diff --git a/qemu-img.texi b/qemu-img.texi -index 3670b96..b95d019 100644 ---- a/qemu-img.texi -+++ b/qemu-img.texi -@@ -161,6 +161,8 @@ Parameters to convert subcommand: - - @table @option - -+@item --bitmaps -+Additionally copy all persistent bitmaps from the top layer of the source - @item -n - Skip the creation of the target volume - @item -m -@@ -357,7 +359,7 @@ Error on reading data - - @end table - --@item convert [--object @var{objectdef}] [--image-opts] [--target-image-opts] [-U] [-C] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-B @var{backing_file}] [-o @var{options}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename} -+@item convert [--object @var{objectdef}] [--image-opts] [--target-image-opts] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-B @var{backing_file}] [-o @var{options}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename} - - Convert the disk image @var{filename} or a snapshot @var{snapshot_param} - to disk image @var{output_filename} using format @var{output_fmt}. It can be optionally compressed (@code{-c} --- -1.8.3.1 - diff --git a/SOURCES/kvm-qemu-img-Factor-out-code-for-merging-bitmaps.patch b/SOURCES/kvm-qemu-img-Factor-out-code-for-merging-bitmaps.patch deleted file mode 100644 index c4012b7..0000000 --- a/SOURCES/kvm-qemu-img-Factor-out-code-for-merging-bitmaps.patch +++ /dev/null @@ -1,89 +0,0 @@ -From 58816c3709e5058e8805333ca011cc4e793d67ff Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Tue, 2 Jun 2020 02:34:18 +0100 -Subject: [PATCH 13/26] qemu-img: Factor out code for merging bitmaps -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Blake -Message-id: <20200602023420.2133649-11-eblake@redhat.com> -Patchwork-id: 97078 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 10/12] qemu-img: Factor out code for merging bitmaps -Bugzilla: 1779893 1779904 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -The next patch will add another client that wants to merge dirty -bitmaps; it will be easier to refactor the code to construct the QAPI -struct correctly into a helper function. - -Signed-off-by: Eric Blake -Message-Id: <20200521192137.1120211-4-eblake@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 6c729dd832207d7347ecb074912f538e2942f269) -Signed-off-by: Eric Blake -Signed-off-by: Danilo C. L. de Paula ---- - qemu-img.c | 34 +++++++++++++++++++++------------- - 1 file changed, 21 insertions(+), 13 deletions(-) - -diff --git a/qemu-img.c b/qemu-img.c -index b57856e..39e1586 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -1582,6 +1582,24 @@ out4: - return ret; - } - -+/* Convenience wrapper around qmp_block_dirty_bitmap_merge */ -+static void do_dirty_bitmap_merge(const char *dst_node, const char *dst_name, -+ const char *src_node, const char *src_name, -+ Error **errp) -+{ -+ BlockDirtyBitmapMergeSource *merge_src; -+ BlockDirtyBitmapMergeSourceList *list; -+ -+ merge_src = g_new0(BlockDirtyBitmapMergeSource, 1); -+ merge_src->type = QTYPE_QDICT; -+ merge_src->u.external.node = g_strdup(src_node); -+ merge_src->u.external.name = g_strdup(src_name); -+ list = g_new0(BlockDirtyBitmapMergeSourceList, 1); -+ list->value = merge_src; -+ qmp_block_dirty_bitmap_merge(dst_node, dst_name, list, errp); -+ qapi_free_BlockDirtyBitmapMergeSourceList(list); -+} -+ - enum ImgConvertBlockStatus { - BLK_DATA, - BLK_ZERO, -@@ -4614,21 +4632,11 @@ static int img_bitmap(int argc, char **argv) - qmp_block_dirty_bitmap_disable(bs->node_name, bitmap, &err); - op = "disable"; - break; -- case BITMAP_MERGE: { -- BlockDirtyBitmapMergeSource *merge_src; -- BlockDirtyBitmapMergeSourceList *list; -- -- merge_src = g_new0(BlockDirtyBitmapMergeSource, 1); -- merge_src->type = QTYPE_QDICT; -- merge_src->u.external.node = g_strdup(src_bs->node_name); -- merge_src->u.external.name = g_strdup(act->src); -- list = g_new0(BlockDirtyBitmapMergeSourceList, 1); -- list->value = merge_src; -- qmp_block_dirty_bitmap_merge(bs->node_name, bitmap, list, &err); -- qapi_free_BlockDirtyBitmapMergeSourceList(list); -+ case BITMAP_MERGE: -+ do_dirty_bitmap_merge(bs->node_name, bitmap, src_bs->node_name, -+ act->src, &err); - op = "merge"; - break; -- } - default: - g_assert_not_reached(); - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-qemu-img-convert-Don-t-pre-zero-images.patch b/SOURCES/kvm-qemu-img-convert-Don-t-pre-zero-images.patch deleted file mode 100644 index 28311f4..0000000 --- a/SOURCES/kvm-qemu-img-convert-Don-t-pre-zero-images.patch +++ /dev/null @@ -1,73 +0,0 @@ -From eea45924903f03dc6d8f20576be0a4a84d5acce4 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Feb 2021 10:16:11 -0500 -Subject: [PATCH 4/5] qemu-img convert: Don't pre-zero images - -RH-Author: Kevin Wolf -Message-id: <20210210101611.137928-2-kwolf@redhat.com> -Patchwork-id: 101030 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] qemu-img convert: Don't pre-zero images -Bugzilla: 1855250 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Max Reitz - -Since commit 5a37b60a61c, qemu-img create will pre-zero the target image -if it isn't already zero-initialised (most importantly, for host block -devices, but also iscsi etc.), so that writing explicit zeros wouldn't -be necessary later. - -This could speed up the operation significantly, in particular when the -source image file was only sparsely populated. However, it also means -that some block are written twice: Once when pre-zeroing them, and then -when they are overwritten with actual data. On a full image, the -pre-zeroing is wasted work because everything will be overwritten. - -In practice, write_zeroes typically turns out faster than writing -explicit zero buffers, but slow enough that first zeroing everything and -then overwriting parts can be a significant net loss. - -Meanwhile, qemu-img convert was rewritten in 690c7301600 and zero blocks -are now written to the target using bdrv_co_pwrite_zeroes() if the -target could be pre-zeroed. This way we already make use of the faster -write_zeroes operation, but avoid writing any blocks twice. - -Remove the pre-zeroing because these days this former optimisation has -actually turned into a pessimisation in the common case. - -Reported-by: Nir Soffer -Signed-off-by: Kevin Wolf -Message-Id: <20200622151203.35624-1-kwolf@redhat.com> -Tested-by: Nir Soffer -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit edafc70c0c8510862f2f213a3acf7067113bcd08) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - qemu-img.c | 9 --------- - 1 file changed, 9 deletions(-) - -diff --git a/qemu-img.c b/qemu-img.c -index a27ad70851..b10dc5129b 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -2029,15 +2029,6 @@ static int convert_do_copy(ImgConvertState *s) - s->has_zero_init = false; - } - -- if (!s->has_zero_init && !s->target_has_backing && -- bdrv_can_write_zeroes_with_unmap(blk_bs(s->target))) -- { -- ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK); -- if (ret == 0) { -- s->has_zero_init = true; -- } -- } -- - /* Allocate buffer for copied data. For compressed images, only one cluster - * can be copied at a time. */ - if (s->compressed) { --- -2.27.0 - diff --git a/SOURCES/kvm-qemu_img-add-cvtnum_full-to-print-error-reports.patch b/SOURCES/kvm-qemu_img-add-cvtnum_full-to-print-error-reports.patch deleted file mode 100644 index b4180b9..0000000 --- a/SOURCES/kvm-qemu_img-add-cvtnum_full-to-print-error-reports.patch +++ /dev/null @@ -1,241 +0,0 @@ -From 1a8a4ece5def912e7cfa5ef8565fc8ecef6e72c3 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Tue, 2 Jun 2020 02:34:11 +0100 -Subject: [PATCH 06/26] qemu_img: add cvtnum_full to print error reports - -RH-Author: Eric Blake -Message-id: <20200602023420.2133649-4-eblake@redhat.com> -Patchwork-id: 97067 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 03/12] qemu_img: add cvtnum_full to print error reports -Bugzilla: 1779893 1779904 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -From: Eyal Moscovici - -All calls to cvtnum check the return value and print the same error -message more or less. And so error reporting moved to cvtnum_full to -reduce code duplication and provide a single error -message. Additionally, cvtnum now wraps cvtnum_full with the existing -default range of 0 to MAX_INT64. - -Acked-by: Mark Kanda -Signed-off-by: Eyal Moscovici -Message-Id: <20200513133629.18508-2-eyal.moscovici@oracle.com> -Reviewed-by: Eric Blake -[eblake: fix printf formatting, avoid trailing space, change error wording, -reformat commit message] -Signed-off-by: Eric Blake -(cherry picked from commit 43d589b074370ebc9b340340b5f641b385da9df8) -Signed-off-by: Eric Blake - -Signed-off-by: Danilo C. L. de Paula ---- - qemu-img.c | 76 +++++++++++++++++++++------------------------- - tests/qemu-iotests/049.out | 8 ++--- - 2 files changed, 38 insertions(+), 46 deletions(-) - -diff --git a/qemu-img.c b/qemu-img.c -index 95a24b9..e69529b 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -422,19 +422,31 @@ static int add_old_style_options(const char *fmt, QemuOpts *opts, - return 0; - } - --static int64_t cvtnum(const char *s) -+static int64_t cvtnum_full(const char *name, const char *value, int64_t min, -+ int64_t max) - { - int err; -- uint64_t value; -- -- err = qemu_strtosz(s, NULL, &value); -- if (err < 0) { -+ uint64_t res; -+ -+ err = qemu_strtosz(value, NULL, &res); -+ if (err < 0 && err != -ERANGE) { -+ error_report("Invalid %s specified. You may use " -+ "k, M, G, T, P or E suffixes for", name); -+ error_report("kilobytes, megabytes, gigabytes, terabytes, " -+ "petabytes and exabytes."); - return err; - } -- if (value > INT64_MAX) { -+ if (err == -ERANGE || res > max || res < min) { -+ error_report("Invalid %s specified. Must be between %" PRId64 -+ " and %" PRId64 ".", name, min, max); - return -ERANGE; - } -- return value; -+ return res; -+} -+ -+static int64_t cvtnum(const char *name, const char *value) -+{ -+ return cvtnum_full(name, value, 0, INT64_MAX); - } - - static int img_create(int argc, char **argv) -@@ -532,16 +544,8 @@ static int img_create(int argc, char **argv) - if (optind < argc) { - int64_t sval; - -- sval = cvtnum(argv[optind++]); -+ sval = cvtnum("image size", argv[optind++]); - if (sval < 0) { -- if (sval == -ERANGE) { -- error_report("Image size must be less than 8 EiB!"); -- } else { -- error_report("Invalid image size specified! You may use k, M, " -- "G, T, P or E suffixes for "); -- error_report("kilobytes, megabytes, gigabytes, terabytes, " -- "petabytes and exabytes."); -- } - goto fail; - } - img_size = (uint64_t)sval; -@@ -2148,8 +2152,10 @@ static int img_convert(int argc, char **argv) - { - int64_t sval; - -- sval = cvtnum(optarg); -- if (sval < 0 || !QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) || -+ sval = cvtnum("buffer size for sparse output", optarg); -+ if (sval < 0) { -+ goto fail_getopt; -+ } else if (!QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) || - sval / BDRV_SECTOR_SIZE > MAX_BUF_SECTORS) { - error_report("Invalid buffer size for sparse output specified. " - "Valid sizes are multiples of %llu up to %llu. Select " -@@ -4229,9 +4235,8 @@ static int img_bench(int argc, char **argv) - break; - case 'o': - { -- offset = cvtnum(optarg); -+ offset = cvtnum("offset", optarg); - if (offset < 0) { -- error_report("Invalid offset specified"); - return 1; - } - break; -@@ -4244,9 +4249,8 @@ static int img_bench(int argc, char **argv) - { - int64_t sval; - -- sval = cvtnum(optarg); -- if (sval < 0 || sval > INT_MAX) { -- error_report("Invalid buffer size specified"); -+ sval = cvtnum_full("buffer size", optarg, 0, INT_MAX); -+ if (sval < 0) { - return 1; - } - -@@ -4257,9 +4261,8 @@ static int img_bench(int argc, char **argv) - { - int64_t sval; - -- sval = cvtnum(optarg); -- if (sval < 0 || sval > INT_MAX) { -- error_report("Invalid step size specified"); -+ sval = cvtnum_full("step_size", optarg, 0, INT_MAX); -+ if (sval < 0) { - return 1; - } - -@@ -4429,10 +4432,9 @@ static int img_dd_bs(const char *arg, - { - int64_t res; - -- res = cvtnum(arg); -+ res = cvtnum_full("bs", arg, 1, INT_MAX); - -- if (res <= 0 || res > INT_MAX) { -- error_report("invalid number: '%s'", arg); -+ if (res < 0) { - return 1; - } - in->bsz = out->bsz = res; -@@ -4444,10 +4446,9 @@ static int img_dd_count(const char *arg, - struct DdIo *in, struct DdIo *out, - struct DdInfo *dd) - { -- dd->count = cvtnum(arg); -+ dd->count = cvtnum("count", arg); - - if (dd->count < 0) { -- error_report("invalid number: '%s'", arg); - return 1; - } - -@@ -4476,10 +4477,9 @@ static int img_dd_skip(const char *arg, - struct DdIo *in, struct DdIo *out, - struct DdInfo *dd) - { -- in->offset = cvtnum(arg); -+ in->offset = cvtnum("skip", arg); - - if (in->offset < 0) { -- error_report("invalid number: '%s'", arg); - return 1; - } - -@@ -4869,16 +4869,8 @@ static int img_measure(int argc, char **argv) - { - int64_t sval; - -- sval = cvtnum(optarg); -+ sval = cvtnum("image size", optarg); - if (sval < 0) { -- if (sval == -ERANGE) { -- error_report("Image size must be less than 8 EiB!"); -- } else { -- error_report("Invalid image size specified! You may use " -- "k, M, G, T, P or E suffixes for "); -- error_report("kilobytes, megabytes, gigabytes, terabytes, " -- "petabytes and exabytes."); -- } - goto out; - } - img_size = (uint64_t)sval; -diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out -index 6b50540..8b35f3d 100644 ---- a/tests/qemu-iotests/049.out -+++ b/tests/qemu-iotests/049.out -@@ -92,19 +92,19 @@ Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1649267441664 cluster_size=65536 l - == 3. Invalid sizes == - - qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1024 --qemu-img: Image size must be less than 8 EiB! -+qemu-img: Invalid image size specified. Must be between 0 and 9223372036854775807. - - qemu-img create -f qcow2 -o size=-1024 TEST_DIR/t.qcow2 - qemu-img: TEST_DIR/t.qcow2: Value '-1024' is out of range for parameter 'size' - - qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1k --qemu-img: Image size must be less than 8 EiB! -+qemu-img: Invalid image size specified. Must be between 0 and 9223372036854775807. - - qemu-img create -f qcow2 -o size=-1k TEST_DIR/t.qcow2 - qemu-img: TEST_DIR/t.qcow2: Value '-1k' is out of range for parameter 'size' - - qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- 1kilobyte --qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes for -+qemu-img: Invalid image size specified. You may use k, M, G, T, P or E suffixes for - qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes. - - qemu-img create -f qcow2 -o size=1kilobyte TEST_DIR/t.qcow2 -@@ -113,7 +113,7 @@ Optional suffix k, M, G, T, P or E means kilo-, mega-, giga-, tera-, peta- - and exabytes, respectively. - - qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- foobar --qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes for -+qemu-img: Invalid image size specified. You may use k, M, G, T, P or E suffixes for - qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes. - - qemu-img create -f qcow2 -o size=foobar TEST_DIR/t.qcow2 --- -1.8.3.1 - diff --git a/SOURCES/kvm-qga-Use-qemu_get_host_name-instead-of-g_get_host_nam.patch b/SOURCES/kvm-qga-Use-qemu_get_host_name-instead-of-g_get_host_nam.patch deleted file mode 100644 index 3b533a5..0000000 --- a/SOURCES/kvm-qga-Use-qemu_get_host_name-instead-of-g_get_host_nam.patch +++ /dev/null @@ -1,73 +0,0 @@ -From c5f90436555d7ab2c1c28bf1cfdb5f5f8ca97816 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Thu, 24 Dec 2020 12:53:04 -0500 -Subject: [PATCH 4/5] qga: Use qemu_get_host_name() instead of - g_get_host_name() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20201224125304.62697-4-marcandre.lureau@redhat.com> -Patchwork-id: 100500 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 3/3] qga: Use qemu_get_host_name() instead of g_get_host_name() -Bugzilla: 1910326 -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Philippe Mathieu-Daudé - -From: Michal Privoznik - -Problem with g_get_host_name() is that on the first call it saves -the hostname into a global variable and from then on, every -subsequent call returns the saved hostname. Even if the hostname -changes. This doesn't play nicely with guest agent, because if -the hostname is acquired before the guest is set up (e.g. on the -first boot, or before DHCP) we will report old, invalid hostname. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1845127 - -Signed-off-by: Michal Privoznik -Reviewed-by: Daniel P. Berrangé -Cc: qemu-stable@nongnu.org -Signed-off-by: Michael Roth - -(cherry picked from commit 0d3a8f32b1e0eca279da1b0cc793efc7250c3daf) -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - qga/commands.c | 17 +++++++++++++---- - 1 file changed, 13 insertions(+), 4 deletions(-) - -diff --git a/qga/commands.c b/qga/commands.c -index 43c323ceada..93bed292d08 100644 ---- a/qga/commands.c -+++ b/qga/commands.c -@@ -502,11 +502,20 @@ int ga_parse_whence(GuestFileWhence *whence, Error **errp) - GuestHostName *qmp_guest_get_host_name(Error **errp) - { - GuestHostName *result = NULL; -- gchar const *hostname = g_get_host_name(); -- if (hostname != NULL) { -- result = g_new0(GuestHostName, 1); -- result->host_name = g_strdup(hostname); -+ g_autofree char *hostname = qemu_get_host_name(errp); -+ -+ /* -+ * We want to avoid using g_get_host_name() because that -+ * caches the result and we wouldn't reflect changes in the -+ * host name. -+ */ -+ -+ if (!hostname) { -+ hostname = g_strdup("localhost"); - } -+ -+ result = g_new0(GuestHostName, 1); -+ result->host_name = g_steal_pointer(&hostname); - return result; - } - --- -2.27.0 - diff --git a/SOURCES/kvm-qga-add-command-guest-get-disks.patch b/SOURCES/kvm-qga-add-command-guest-get-disks.patch deleted file mode 100644 index 360301d..0000000 --- a/SOURCES/kvm-qga-add-command-guest-get-disks.patch +++ /dev/null @@ -1,115 +0,0 @@ -From 58688d868656e77f67ea915544b0bb3bb60f33d8 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 16 Dec 2020 16:06:11 -0500 -Subject: [PATCH 10/14] qga: add command guest-get-disks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20201216160615.324213-7-marcandre.lureau@redhat.com> -Patchwork-id: 100475 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 06/10] qga: add command guest-get-disks -Bugzilla: 1859494 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Stefan Hajnoczi - -From: Tomáš Golembiovský - -Add API and stubs for new guest-get-disks command. - -The command guest-get-fsinfo can be used to list information about disks -and partitions but it is limited only to mounted disks with filesystem. -This new command should allow listing information about disks of the VM -regardles whether they are mounted or not. This can be usefull for -management applications for mapping virtualized devices or pass-through -devices to device names in the guest OS. - -Signed-off-by: Tomáš Golembiovský -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Marc-André Lureau -Signed-off-by: Michael Roth - -(cherry-picked from commit c27ea3f9ef7c7f29e55bde91879f8514abce9c38) -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - qga/commands-posix.c | 6 ++++++ - qga/commands-win32.c | 6 ++++++ - qga/qapi-schema.json | 31 +++++++++++++++++++++++++++++++ - 3 files changed, 43 insertions(+) - -diff --git a/qga/commands-posix.c b/qga/commands-posix.c -index c86c87ed522..5095104afc0 100644 ---- a/qga/commands-posix.c -+++ b/qga/commands-posix.c -@@ -3039,3 +3039,9 @@ GuestOSInfo *qmp_guest_get_osinfo(Error **errp) - - return info; - } -+ -+GuestDiskInfoList *qmp_guest_get_disks(Error **errp) -+{ -+ error_setg(errp, QERR_UNSUPPORTED); -+ return NULL; -+} -diff --git a/qga/commands-win32.c b/qga/commands-win32.c -index 55ba5b263af..be63fa2b208 100644 ---- a/qga/commands-win32.c -+++ b/qga/commands-win32.c -@@ -2234,3 +2234,9 @@ GuestOSInfo *qmp_guest_get_osinfo(Error **errp) - - return info; - } -+ -+GuestDiskInfoList *qmp_guest_get_disks(Error **errp) -+{ -+ error_setg(errp, QERR_UNSUPPORTED); -+ return NULL; -+} -diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json -index fb4605cc19c..22df375c92f 100644 ---- a/qga/qapi-schema.json -+++ b/qga/qapi-schema.json -@@ -852,6 +852,37 @@ - 'bus': 'int', 'target': 'int', 'unit': 'int', - '*serial': 'str', '*dev': 'str'} } - -+## -+# @GuestDiskInfo: -+# -+# @name: device node (Linux) or device UNC (Windows) -+# @partition: whether this is a partition or disk -+# @dependents: list of dependent devices; e.g. for LVs of the LVM this will -+# hold the list of PVs, for LUKS encrypted volume this will -+# contain the disk where the volume is placed. (Linux) -+# @address: disk address information (only for non-virtual devices) -+# @alias: optional alias assigned to the disk, on Linux this is a name assigned -+# by device mapper -+# -+# Since 5.2 -+## -+{ 'struct': 'GuestDiskInfo', -+ 'data': {'name': 'str', 'partition': 'bool', 'dependents': ['str'], -+ '*address': 'GuestDiskAddress', '*alias': 'str'} } -+ -+## -+# @guest-get-disks: -+# -+# Returns: The list of disks in the guest. For Windows these are only the -+# physical disks. On Linux these are all root block devices of -+# non-zero size including e.g. removable devices, loop devices, -+# NBD, etc. -+# -+# Since: 5.2 -+## -+{ 'command': 'guest-get-disks', -+ 'returns': ['GuestDiskInfo'] } -+ - ## - # @GuestFilesystemInfo: - # --- -2.27.0 - diff --git a/SOURCES/kvm-qga-add-implementation-of-guest-get-disks-for-Linux.patch b/SOURCES/kvm-qga-add-implementation-of-guest-get-disks-for-Linux.patch deleted file mode 100644 index 939a212..0000000 --- a/SOURCES/kvm-qga-add-implementation-of-guest-get-disks-for-Linux.patch +++ /dev/null @@ -1,427 +0,0 @@ -From 086957b970a8f4165249589e2bc0cc08d1800db3 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 16 Dec 2020 16:06:12 -0500 -Subject: [PATCH 11/14] qga: add implementation of guest-get-disks for Linux -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20201216160615.324213-8-marcandre.lureau@redhat.com> -Patchwork-id: 100478 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 07/10] qga: add implementation of guest-get-disks for Linux -Bugzilla: 1859494 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Stefan Hajnoczi - -From: Tomáš Golembiovský - -The command lists all disks (real and virtual) as well as disk -partitions. For each disk the list of dependent disks is also listed and -/dev path is used as a handle so it can be matched with "name" field of -other returned disk entries. For disk partitions the "dependents" list -is populated with the the parent device for easier tracking of -hierarchy. - -Example output: -{ - "return": [ - ... - { - "name": "/dev/dm-0", - "partition": false, - "dependents": [ - "/dev/sda2" - ], - "alias": "luks-7062202e-5b9b-433e-81e8-6628c40da9f7" - }, - { - "name": "/dev/sda2", - "partition": true, - "dependents": [ - "/dev/sda" - ] - }, - { - "name": "/dev/sda", - "partition": false, - "address": { - "serial": "SAMSUNG_MZ7LN512HCHP-000L1_S1ZKNXAG822493", - "bus-type": "sata", - ... - "dev": "/dev/sda", - "target": 0 - }, - "dependents": [] - }, - ... - ] -} - -Signed-off-by: Tomáš Golembiovský -Reviewed-by: Marc-André Lureau -*add missing stub for !defined(CONFIG_FSFREEZE) -*remove unused deps_dir variable -Signed-off-by: Michael Roth -(cherry picked from commit fed3956429d560a06fc2d2fcf1a01efb58659f87) -Signed-off-by: Danilo C. L. de Paula ---- - qga/commands-posix.c | 303 +++++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 292 insertions(+), 11 deletions(-) - -diff --git a/qga/commands-posix.c b/qga/commands-posix.c -index 5095104afc0..96f5ddafd3a 100644 ---- a/qga/commands-posix.c -+++ b/qga/commands-posix.c -@@ -1152,13 +1152,27 @@ static void build_guest_fsinfo_for_virtual_device(char const *syspath, - closedir(dir); - } - -+static bool is_disk_virtual(const char *devpath, Error **errp) -+{ -+ g_autofree char *syspath = realpath(devpath, NULL); -+ -+ if (!syspath) { -+ error_setg_errno(errp, errno, "realpath(\"%s\")", devpath); -+ return false; -+ } -+ return strstr(syspath, "/devices/virtual/block/") != NULL; -+} -+ - /* Dispatch to functions for virtual/real device */ - static void build_guest_fsinfo_for_device(char const *devpath, - GuestFilesystemInfo *fs, - Error **errp) - { -- char *syspath = realpath(devpath, NULL); -+ ERRP_GUARD(); -+ g_autofree char *syspath = NULL; -+ bool is_virtual = false; - -+ syspath = realpath(devpath, NULL); - if (!syspath) { - error_setg_errno(errp, errno, "realpath(\"%s\")", devpath); - return; -@@ -1169,16 +1183,281 @@ static void build_guest_fsinfo_for_device(char const *devpath, - } - - g_debug(" parse sysfs path '%s'", syspath); -- -- if (strstr(syspath, "/devices/virtual/block/")) { -+ is_virtual = is_disk_virtual(syspath, errp); -+ if (*errp != NULL) { -+ return; -+ } -+ if (is_virtual) { - build_guest_fsinfo_for_virtual_device(syspath, fs, errp); - } else { - build_guest_fsinfo_for_real_device(syspath, fs, errp); - } -+} -+ -+#ifdef CONFIG_LIBUDEV -+ -+/* -+ * Wrapper around build_guest_fsinfo_for_device() for getting just -+ * the disk address. -+ */ -+static GuestDiskAddress *get_disk_address(const char *syspath, Error **errp) -+{ -+ g_autoptr(GuestFilesystemInfo) fs = NULL; - -- free(syspath); -+ fs = g_new0(GuestFilesystemInfo, 1); -+ build_guest_fsinfo_for_device(syspath, fs, errp); -+ if (fs->disk != NULL) { -+ return g_steal_pointer(&fs->disk->value); -+ } -+ return NULL; - } - -+static char *get_alias_for_syspath(const char *syspath) -+{ -+ struct udev *udev = NULL; -+ struct udev_device *udevice = NULL; -+ char *ret = NULL; -+ -+ udev = udev_new(); -+ if (udev == NULL) { -+ g_debug("failed to query udev"); -+ goto out; -+ } -+ udevice = udev_device_new_from_syspath(udev, syspath); -+ if (udevice == NULL) { -+ g_debug("failed to query udev for path: %s", syspath); -+ goto out; -+ } else { -+ const char *alias = udev_device_get_property_value( -+ udevice, "DM_NAME"); -+ /* -+ * NULL means there was an error and empty string means there is no -+ * alias. In case of no alias we return NULL instead of empty string. -+ */ -+ if (alias == NULL) { -+ g_debug("failed to query udev for device alias for: %s", -+ syspath); -+ } else if (*alias != 0) { -+ ret = g_strdup(alias); -+ } -+ } -+ -+out: -+ udev_unref(udev); -+ udev_device_unref(udevice); -+ return ret; -+} -+ -+static char *get_device_for_syspath(const char *syspath) -+{ -+ struct udev *udev = NULL; -+ struct udev_device *udevice = NULL; -+ char *ret = NULL; -+ -+ udev = udev_new(); -+ if (udev == NULL) { -+ g_debug("failed to query udev"); -+ goto out; -+ } -+ udevice = udev_device_new_from_syspath(udev, syspath); -+ if (udevice == NULL) { -+ g_debug("failed to query udev for path: %s", syspath); -+ goto out; -+ } else { -+ ret = g_strdup(udev_device_get_devnode(udevice)); -+ } -+ -+out: -+ udev_unref(udev); -+ udev_device_unref(udevice); -+ return ret; -+} -+ -+static void get_disk_deps(const char *disk_dir, GuestDiskInfo *disk) -+{ -+ g_autofree char *deps_dir = NULL; -+ const gchar *dep; -+ GDir *dp_deps = NULL; -+ -+ /* List dependent disks */ -+ deps_dir = g_strdup_printf("%s/slaves", disk_dir); -+ g_debug(" listing entries in: %s", deps_dir); -+ dp_deps = g_dir_open(deps_dir, 0, NULL); -+ if (dp_deps == NULL) { -+ g_debug("failed to list entries in %s", deps_dir); -+ return; -+ } -+ while ((dep = g_dir_read_name(dp_deps)) != NULL) { -+ g_autofree char *dep_dir = NULL; -+ strList *dep_item = NULL; -+ char *dev_name; -+ -+ /* Add dependent disks */ -+ dep_dir = g_strdup_printf("%s/%s", deps_dir, dep); -+ dev_name = get_device_for_syspath(dep_dir); -+ if (dev_name != NULL) { -+ g_debug(" adding dependent device: %s", dev_name); -+ dep_item = g_new0(strList, 1); -+ dep_item->value = dev_name; -+ dep_item->next = disk->dependents; -+ disk->dependents = dep_item; -+ } -+ } -+ g_dir_close(dp_deps); -+} -+ -+/* -+ * Detect partitions subdirectory, name is "" or -+ * "p" -+ * -+ * @disk_name -- last component of /sys path (e.g. sda) -+ * @disk_dir -- sys path of the disk (e.g. /sys/block/sda) -+ * @disk_dev -- device node of the disk (e.g. /dev/sda) -+ */ -+static GuestDiskInfoList *get_disk_partitions( -+ GuestDiskInfoList *list, -+ const char *disk_name, const char *disk_dir, -+ const char *disk_dev) -+{ -+ GuestDiskInfoList *item, *ret = list; -+ struct dirent *de_disk; -+ DIR *dp_disk = NULL; -+ size_t len = strlen(disk_name); -+ -+ dp_disk = opendir(disk_dir); -+ while ((de_disk = readdir(dp_disk)) != NULL) { -+ g_autofree char *partition_dir = NULL; -+ char *dev_name; -+ GuestDiskInfo *partition; -+ -+ if (!(de_disk->d_type & DT_DIR)) { -+ continue; -+ } -+ -+ if (!(strncmp(disk_name, de_disk->d_name, len) == 0 && -+ ((*(de_disk->d_name + len) == 'p' && -+ isdigit(*(de_disk->d_name + len + 1))) || -+ isdigit(*(de_disk->d_name + len))))) { -+ continue; -+ } -+ -+ partition_dir = g_strdup_printf("%s/%s", -+ disk_dir, de_disk->d_name); -+ dev_name = get_device_for_syspath(partition_dir); -+ if (dev_name == NULL) { -+ g_debug("Failed to get device name for syspath: %s", -+ disk_dir); -+ continue; -+ } -+ partition = g_new0(GuestDiskInfo, 1); -+ partition->name = dev_name; -+ partition->partition = true; -+ /* Add parent disk as dependent for easier tracking of hierarchy */ -+ partition->dependents = g_new0(strList, 1); -+ partition->dependents->value = g_strdup(disk_dev); -+ -+ item = g_new0(GuestDiskInfoList, 1); -+ item->value = partition; -+ item->next = ret; -+ ret = item; -+ -+ } -+ closedir(dp_disk); -+ -+ return ret; -+} -+ -+GuestDiskInfoList *qmp_guest_get_disks(Error **errp) -+{ -+ GuestDiskInfoList *item, *ret = NULL; -+ GuestDiskInfo *disk; -+ DIR *dp = NULL; -+ struct dirent *de = NULL; -+ -+ g_debug("listing /sys/block directory"); -+ dp = opendir("/sys/block"); -+ if (dp == NULL) { -+ error_setg_errno(errp, errno, "Can't open directory \"/sys/block\""); -+ return NULL; -+ } -+ while ((de = readdir(dp)) != NULL) { -+ g_autofree char *disk_dir = NULL, *line = NULL, -+ *size_path = NULL; -+ char *dev_name; -+ Error *local_err = NULL; -+ if (de->d_type != DT_LNK) { -+ g_debug(" skipping entry: %s", de->d_name); -+ continue; -+ } -+ -+ /* Check size and skip zero-sized disks */ -+ g_debug(" checking disk size"); -+ size_path = g_strdup_printf("/sys/block/%s/size", de->d_name); -+ if (!g_file_get_contents(size_path, &line, NULL, NULL)) { -+ g_debug(" failed to read disk size"); -+ continue; -+ } -+ if (g_strcmp0(line, "0\n") == 0) { -+ g_debug(" skipping zero-sized disk"); -+ continue; -+ } -+ -+ g_debug(" adding %s", de->d_name); -+ disk_dir = g_strdup_printf("/sys/block/%s", de->d_name); -+ dev_name = get_device_for_syspath(disk_dir); -+ if (dev_name == NULL) { -+ g_debug("Failed to get device name for syspath: %s", -+ disk_dir); -+ continue; -+ } -+ disk = g_new0(GuestDiskInfo, 1); -+ disk->name = dev_name; -+ disk->partition = false; -+ disk->alias = get_alias_for_syspath(disk_dir); -+ disk->has_alias = (disk->alias != NULL); -+ item = g_new0(GuestDiskInfoList, 1); -+ item->value = disk; -+ item->next = ret; -+ ret = item; -+ -+ /* Get address for non-virtual devices */ -+ bool is_virtual = is_disk_virtual(disk_dir, &local_err); -+ if (local_err != NULL) { -+ g_debug(" failed to check disk path, ignoring error: %s", -+ error_get_pretty(local_err)); -+ error_free(local_err); -+ local_err = NULL; -+ /* Don't try to get the address */ -+ is_virtual = true; -+ } -+ if (!is_virtual) { -+ disk->address = get_disk_address(disk_dir, &local_err); -+ if (local_err != NULL) { -+ g_debug(" failed to get device info, ignoring error: %s", -+ error_get_pretty(local_err)); -+ error_free(local_err); -+ local_err = NULL; -+ } else if (disk->address != NULL) { -+ disk->has_address = true; -+ } -+ } -+ -+ get_disk_deps(disk_dir, disk); -+ ret = get_disk_partitions(ret, de->d_name, disk_dir, dev_name); -+ } -+ return ret; -+} -+ -+#else -+ -+GuestDiskInfoList *qmp_guest_get_disks(Error **errp) -+{ -+ error_setg(errp, QERR_UNSUPPORTED); -+ return NULL; -+} -+ -+#endif -+ - /* Return a list of the disk device(s)' info which @mount lies on */ - static GuestFilesystemInfo *build_guest_fsinfo(struct FsMount *mount, - Error **errp) -@@ -2770,6 +3049,13 @@ int64_t qmp_guest_fsfreeze_thaw(Error **errp) - - return 0; - } -+ -+GuestDiskInfoList *qmp_guest_get_disks(Error **errp) -+{ -+ error_setg(errp, QERR_UNSUPPORTED); -+ return NULL; -+} -+ - #endif /* CONFIG_FSFREEZE */ - - #if !defined(CONFIG_FSTRIM) -@@ -2806,7 +3092,8 @@ GList *ga_command_blacklist_init(GList *blacklist) - const char *list[] = { - "guest-get-fsinfo", "guest-fsfreeze-status", - "guest-fsfreeze-freeze", "guest-fsfreeze-freeze-list", -- "guest-fsfreeze-thaw", "guest-get-fsinfo", NULL}; -+ "guest-fsfreeze-thaw", "guest-get-fsinfo", -+ "guest-get-disks", NULL}; - char **p = (char **)list; - - while (*p) { -@@ -3039,9 +3326,3 @@ GuestOSInfo *qmp_guest_get_osinfo(Error **errp) - - return info; - } -- --GuestDiskInfoList *qmp_guest_get_disks(Error **errp) --{ -- error_setg(errp, QERR_UNSUPPORTED); -- return NULL; --} --- -2.27.0 - diff --git a/SOURCES/kvm-qga-add-implementation-of-guest-get-disks-for-Window.patch b/SOURCES/kvm-qga-add-implementation-of-guest-get-disks-for-Window.patch deleted file mode 100644 index f82d95d..0000000 --- a/SOURCES/kvm-qga-add-implementation-of-guest-get-disks-for-Window.patch +++ /dev/null @@ -1,181 +0,0 @@ -From 925163bf8498e26c19742dbd34b6b324e49c07b6 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 16 Dec 2020 16:06:13 -0500 -Subject: [PATCH 12/14] qga: add implementation of guest-get-disks for Windows -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20201216160615.324213-9-marcandre.lureau@redhat.com> -Patchwork-id: 100479 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 08/10] qga: add implementation of guest-get-disks for Windows -Bugzilla: 1859494 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Stefan Hajnoczi - -From: Tomáš Golembiovský - -The command lists all the physical disk drives. Unlike for Linux -partitions and virtual volumes are not listed. - -Example output: - -{ - "return": [ - { - "name": "\\\\.\\PhysicalDrive0", - "partition": false, - "address": { - "serial": "QM00001", - "bus-type": "sata", - ... - }, - "dependents": [] - } - ] -} - -Signed-off-by: Tomáš Golembiovský -Signed-off-by: Michael Roth - -(cherry picked from commit c67d2efd9d1771fd886e3b58771adaa62897f3d9) -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - qga/commands-win32.c | 107 ++++++++++++++++++++++++++++++++++++++++--- - 1 file changed, 101 insertions(+), 6 deletions(-) - -diff --git a/qga/commands-win32.c b/qga/commands-win32.c -index be63fa2b208..a07725e874b 100644 ---- a/qga/commands-win32.c -+++ b/qga/commands-win32.c -@@ -960,6 +960,101 @@ out: - return list; - } - -+GuestDiskInfoList *qmp_guest_get_disks(Error **errp) -+{ -+ ERRP_GUARD(); -+ GuestDiskInfoList *new = NULL, *ret = NULL; -+ HDEVINFO dev_info; -+ SP_DEVICE_INTERFACE_DATA dev_iface_data; -+ int i; -+ -+ dev_info = SetupDiGetClassDevs(&GUID_DEVINTERFACE_DISK, 0, 0, -+ DIGCF_PRESENT | DIGCF_DEVICEINTERFACE); -+ if (dev_info == INVALID_HANDLE_VALUE) { -+ error_setg_win32(errp, GetLastError(), "failed to get device tree"); -+ return NULL; -+ } -+ -+ g_debug("enumerating devices"); -+ dev_iface_data.cbSize = sizeof(SP_DEVICE_INTERFACE_DATA); -+ for (i = 0; -+ SetupDiEnumDeviceInterfaces(dev_info, NULL, &GUID_DEVINTERFACE_DISK, -+ i, &dev_iface_data); -+ i++) { -+ GuestDiskAddress *address = NULL; -+ GuestDiskInfo *disk = NULL; -+ Error *local_err = NULL; -+ g_autofree PSP_DEVICE_INTERFACE_DETAIL_DATA -+ pdev_iface_detail_data = NULL; -+ STORAGE_DEVICE_NUMBER sdn; -+ HANDLE dev_file; -+ DWORD size = 0; -+ BOOL result; -+ int attempt; -+ -+ g_debug(" getting device path"); -+ for (attempt = 0, result = FALSE; attempt < 2 && !result; attempt++) { -+ result = SetupDiGetDeviceInterfaceDetail(dev_info, -+ &dev_iface_data, pdev_iface_detail_data, size, &size, NULL); -+ if (result) { -+ break; -+ } -+ if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { -+ pdev_iface_detail_data = g_realloc(pdev_iface_detail_data, -+ size); -+ pdev_iface_detail_data->cbSize = -+ sizeof(*pdev_iface_detail_data); -+ } else { -+ g_debug("failed to get device interface details"); -+ break; -+ } -+ } -+ if (!result) { -+ g_debug("skipping device"); -+ continue; -+ } -+ -+ g_debug(" device: %s", pdev_iface_detail_data->DevicePath); -+ dev_file = CreateFile(pdev_iface_detail_data->DevicePath, 0, -+ FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL); -+ if (!DeviceIoControl(dev_file, IOCTL_STORAGE_GET_DEVICE_NUMBER, -+ NULL, 0, &sdn, sizeof(sdn), &size, NULL)) { -+ CloseHandle(dev_file); -+ debug_error("failed to get storage device number"); -+ continue; -+ } -+ CloseHandle(dev_file); -+ -+ disk = g_new0(GuestDiskInfo, 1); -+ disk->name = g_strdup_printf("\\\\.\\PhysicalDrive%lu", -+ sdn.DeviceNumber); -+ -+ g_debug(" number: %lu", sdn.DeviceNumber); -+ address = g_malloc0(sizeof(GuestDiskAddress)); -+ address->has_dev = true; -+ address->dev = g_strdup(disk->name); -+ get_single_disk_info(sdn.DeviceNumber, address, &local_err); -+ if (local_err) { -+ g_debug("failed to get disk info: %s", -+ error_get_pretty(local_err)); -+ error_free(local_err); -+ qapi_free_GuestDiskAddress(address); -+ address = NULL; -+ } else { -+ disk->address = address; -+ disk->has_address = true; -+ } -+ -+ new = g_malloc0(sizeof(GuestDiskInfoList)); -+ new->value = disk; -+ new->next = ret; -+ ret = new; -+ } -+ -+ SetupDiDestroyDeviceInfoList(dev_info); -+ return ret; -+} -+ - #else - - static GuestDiskAddressList *build_guest_disk_info(char *guid, Error **errp) -@@ -967,6 +1062,12 @@ static GuestDiskAddressList *build_guest_disk_info(char *guid, Error **errp) - return NULL; - } - -+GuestDiskInfoList *qmp_guest_get_disks(Error **errp) -+{ -+ error_setg(errp, QERR_UNSUPPORTED); -+ return NULL; -+} -+ - #endif /* CONFIG_QGA_NTDDSCSI */ - - static GuestFilesystemInfo *build_guest_fsinfo(char *guid, Error **errp) -@@ -2234,9 +2335,3 @@ GuestOSInfo *qmp_guest_get_osinfo(Error **errp) - - return info; - } -- --GuestDiskInfoList *qmp_guest_get_disks(Error **errp) --{ -- error_setg(errp, QERR_UNSUPPORTED); -- return NULL; --} --- -2.27.0 - diff --git a/SOURCES/kvm-qga-add-reset-argument-to-ssh-add-authorized-keys.patch b/SOURCES/kvm-qga-add-reset-argument-to-ssh-add-authorized-keys.patch deleted file mode 100644 index dec7f7b..0000000 --- a/SOURCES/kvm-qga-add-reset-argument-to-ssh-add-authorized-keys.patch +++ /dev/null @@ -1,176 +0,0 @@ -From 7f8888f2c53060c4536856859d5ea94d23ea9e45 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Thu, 29 Jul 2021 04:55:54 -0400 -Subject: [PATCH 03/14] qga: add *reset argument to ssh-add-authorized-keys -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20210609100615.2501448-4-marcandre.lureau@redhat.com> -Patchwork-id: 101689 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 3/4] qga: add *reset argument to ssh-add-authorized-keys -Bugzilla: 1967716 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Michal Privoznik - -From: Michael Roth - -I prefer 'reset' over 'clear', since 'clear' and keys may have some -other relations or meaning. - -Signed-off-by: Marc-André Lureau -*fix disallowed g_assert* usage reported by checkpatch -Signed-off-by: Michael Roth - -(cherry picked from commit 0e3c94758e3851f0ab30d2a1e63a73284499775d) -Signed-off-by: Marc-André Lureau -Signed-off-by: Miroslav Rezanina ---- - qga/commands-posix-ssh.c | 53 ++++++++++++++++++++++++++++++++++++---- - qga/qapi-schema.json | 3 ++- - 2 files changed, 50 insertions(+), 6 deletions(-) - -diff --git a/qga/commands-posix-ssh.c b/qga/commands-posix-ssh.c -index f74d89679c..362c9e8816 100644 ---- a/qga/commands-posix-ssh.c -+++ b/qga/commands-posix-ssh.c -@@ -168,6 +168,7 @@ read_authkeys(const char *path, Error **errp) - - void - qmp_guest_ssh_add_authorized_keys(const char *username, strList *keys, -+ bool has_reset, bool reset, - Error **errp) - { - g_autofree struct passwd *p = NULL; -@@ -178,6 +179,7 @@ qmp_guest_ssh_add_authorized_keys(const char *username, strList *keys, - size_t nkeys, nauthkeys; - - ERRP_GUARD(); -+ reset = has_reset && reset; - - if (!check_openssh_pub_keys(keys, &nkeys, errp)) { - return; -@@ -191,7 +193,9 @@ qmp_guest_ssh_add_authorized_keys(const char *username, strList *keys, - ssh_path = g_build_filename(p->pw_dir, ".ssh", NULL); - authkeys_path = g_build_filename(ssh_path, "authorized_keys", NULL); - -- authkeys = read_authkeys(authkeys_path, NULL); -+ if (!reset) { -+ authkeys = read_authkeys(authkeys_path, NULL); -+ } - if (authkeys == NULL) { - if (!g_file_test(ssh_path, G_FILE_TEST_IS_DIR) && - !mkdir_for_user(ssh_path, p, 0700, errp)) { -@@ -318,7 +322,7 @@ test_invalid_user(void) - { - Error *err = NULL; - -- qmp_guest_ssh_add_authorized_keys("", NULL, &err); -+ qmp_guest_ssh_add_authorized_keys("", NULL, FALSE, FALSE, &err); - error_free_or_abort(&err); - - qmp_guest_ssh_remove_authorized_keys("", NULL, &err); -@@ -333,7 +337,8 @@ test_invalid_key(void) - }; - Error *err = NULL; - -- qmp_guest_ssh_add_authorized_keys(g_get_user_name(), &key, &err); -+ qmp_guest_ssh_add_authorized_keys(g_get_user_name(), &key, -+ FALSE, FALSE, &err); - error_free_or_abort(&err); - - qmp_guest_ssh_remove_authorized_keys(g_get_user_name(), &key, &err); -@@ -346,13 +351,17 @@ test_add_keys(void) - Error *err = NULL; - - qmp_guest_ssh_add_authorized_keys(g_get_user_name(), -- (strList *)&test_key2, &err); -+ (strList *)&test_key2, -+ FALSE, FALSE, -+ &err); - g_assert(err == NULL); - - test_authorized_keys_equal("algo key2 comments"); - - qmp_guest_ssh_add_authorized_keys(g_get_user_name(), -- (strList *)&test_key1_2, &err); -+ (strList *)&test_key1_2, -+ FALSE, FALSE, -+ &err); - g_assert(err == NULL); - - /* key2 came first, and should'nt be duplicated */ -@@ -360,6 +369,39 @@ test_add_keys(void) - "algo key1 comments"); - } - -+static void -+test_add_reset_keys(void) -+{ -+ Error *err = NULL; -+ -+ qmp_guest_ssh_add_authorized_keys(g_get_user_name(), -+ (strList *)&test_key1_2, -+ FALSE, FALSE, -+ &err); -+ g_assert(err == NULL); -+ -+ /* reset with key2 only */ -+ test_authorized_keys_equal("algo key1 comments\n" -+ "algo key2 comments"); -+ -+ qmp_guest_ssh_add_authorized_keys(g_get_user_name(), -+ (strList *)&test_key2, -+ TRUE, TRUE, -+ &err); -+ g_assert(err == NULL); -+ -+ test_authorized_keys_equal("algo key2 comments"); -+ -+ /* empty should clear file */ -+ qmp_guest_ssh_add_authorized_keys(g_get_user_name(), -+ (strList *)NULL, -+ TRUE, TRUE, -+ &err); -+ g_assert(err == NULL); -+ -+ test_authorized_keys_equal(""); -+} -+ - static void - test_remove_keys(void) - { -@@ -393,6 +435,7 @@ int main(int argc, char *argv[]) - g_test_add_func("/qga/ssh/invalid_user", test_invalid_user); - g_test_add_func("/qga/ssh/invalid_key", test_invalid_key); - g_test_add_func("/qga/ssh/add_keys", test_add_keys); -+ g_test_add_func("/qga/ssh/add_reset_keys", test_add_reset_keys); - g_test_add_func("/qga/ssh/remove_keys", test_remove_keys); - - return g_test_run(); -diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json -index 3b85f5a03f..a70ea5da77 100644 ---- a/qga/qapi-schema.json -+++ b/qga/qapi-schema.json -@@ -1279,6 +1279,7 @@ - # - # @username: the user account to add the authorized keys - # @keys: the public keys to add (in OpenSSH/sshd(8) authorized_keys format) -+# @reset: ignore the existing content, set it with the given keys only - # - # Append public keys to user .ssh/authorized_keys on Unix systems (not - # implemented for other systems). -@@ -1288,7 +1289,7 @@ - # Since: 5.2 - ## - { 'command': 'guest-ssh-add-authorized-keys', -- 'data': { 'username': 'str', 'keys': ['str'] }, -+ 'data': { 'username': 'str', 'keys': ['str'], '*reset': 'bool' }, - 'if': 'defined(CONFIG_POSIX)' } - - ## --- -2.27.0 - diff --git a/SOURCES/kvm-qga-add-ssh-add-remove-authorized-keys.patch b/SOURCES/kvm-qga-add-ssh-add-remove-authorized-keys.patch deleted file mode 100644 index b767d42..0000000 --- a/SOURCES/kvm-qga-add-ssh-add-remove-authorized-keys.patch +++ /dev/null @@ -1,525 +0,0 @@ -From 4be6cb23235b29d6ce450c2dacaef09c52d1aeea Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Thu, 29 Jul 2021 04:55:52 -0400 -Subject: [PATCH 02/14] qga: add ssh-{add, remove}-authorized-keys -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20210609100615.2501448-3-marcandre.lureau@redhat.com> -Patchwork-id: 101688 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 2/4] qga: add ssh-{add, remove}-authorized-keys -Bugzilla: 1967716 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Michal Privoznik - -From: Marc-André Lureau - -Add new commands to add and remove SSH public keys from -~/.ssh/authorized_keys. - -I took a different approach for testing, including the unit tests right -with the code. I wanted to overwrite the function to get the user -details, I couldn't easily do that over QMP. Furthermore, I prefer -having unit tests very close to the code, and unit files that are domain -specific (commands-posix is too crowded already). FWIW, that -coding/testing style is Rust-style (where tests can or should even be -part of the documentation!). - -Fixes: -https://bugzilla.redhat.com/show_bug.cgi?id=1885332 - -Signed-off-by: Marc-André Lureau -Reviewed-by: Michal Privoznik -Reviewed-by: Daniel P. Berrangé -*squashed in fix-ups for setting file ownership and use of QAPI - conditionals for CONFIG_POSIX instead of stub definitions -*disable qga-ssh-test for now due to G_TEST_OPTION_ISOLATE_DIRS - triggering leak detector in build-oss-fuzz -*fix disallowed g_assert* usage reported by checkpatch -Signed-off-by: Michael Roth - -(cherry picked from commit 8d769ec777dccbff199711aba43aa6297fe4a0e0) -[ Fixes trivial backport conflicts and use Makefile.objs build-sys ] -Signed-off-by: Marc-André Lureau -Signed-off-by: Miroslav Rezanina ---- - qga/Makefile.objs | 2 +- - qga/commands-posix-ssh.c | 407 +++++++++++++++++++++++++++++++++++++++ - qga/qapi-schema.json | 35 ++++ - 3 files changed, 443 insertions(+), 1 deletion(-) - create mode 100644 qga/commands-posix-ssh.c - -diff --git a/qga/Makefile.objs b/qga/Makefile.objs -index 80e6bb3c2e..c8da634db0 100644 ---- a/qga/Makefile.objs -+++ b/qga/Makefile.objs -@@ -1,6 +1,6 @@ - commands-posix.o-libs := $(LIBUDEV_LIBS) - qga-obj-y = commands.o guest-agent-command-state.o main.o --qga-obj-$(CONFIG_POSIX) += commands-posix.o channel-posix.o -+qga-obj-$(CONFIG_POSIX) += commands-posix.o channel-posix.o commands-posix-ssh.o - qga-obj-$(CONFIG_WIN32) += commands-win32.o channel-win32.o service-win32.o - qga-obj-$(CONFIG_WIN32) += vss-win32.o - qga-obj-y += qapi-generated/qga-qapi-types.o qapi-generated/qga-qapi-visit.o -diff --git a/qga/commands-posix-ssh.c b/qga/commands-posix-ssh.c -new file mode 100644 -index 0000000000..f74d89679c ---- /dev/null -+++ b/qga/commands-posix-ssh.c -@@ -0,0 +1,407 @@ -+ /* -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+#include "qemu/osdep.h" -+ -+#include -+#include -+#include -+#include -+ -+#include "qapi/error.h" -+#include "qga-qapi-commands.h" -+ -+#ifdef QGA_BUILD_UNIT_TEST -+static struct passwd * -+test_get_passwd_entry(const gchar *user_name, GError **error) -+{ -+ struct passwd *p; -+ int ret; -+ -+ if (!user_name || g_strcmp0(user_name, g_get_user_name())) { -+ g_set_error(error, G_UNIX_ERROR, 0, "Invalid user name"); -+ return NULL; -+ } -+ -+ p = g_new0(struct passwd, 1); -+ p->pw_dir = (char *)g_get_home_dir(); -+ p->pw_uid = geteuid(); -+ p->pw_gid = getegid(); -+ -+ ret = g_mkdir_with_parents(p->pw_dir, 0700); -+ g_assert(ret == 0); -+ -+ return p; -+} -+ -+#define g_unix_get_passwd_entry_qemu(username, err) \ -+ test_get_passwd_entry(username, err) -+#endif -+ -+static struct passwd * -+get_passwd_entry(const char *username, Error **errp) -+{ -+ g_autoptr(GError) err = NULL; -+ struct passwd *p; -+ -+ ERRP_GUARD(); -+ -+ p = g_unix_get_passwd_entry_qemu(username, &err); -+ if (p == NULL) { -+ error_setg(errp, "failed to lookup user '%s': %s", -+ username, err->message); -+ return NULL; -+ } -+ -+ return p; -+} -+ -+static bool -+mkdir_for_user(const char *path, const struct passwd *p, -+ mode_t mode, Error **errp) -+{ -+ ERRP_GUARD(); -+ -+ if (g_mkdir(path, mode) == -1) { -+ error_setg(errp, "failed to create directory '%s': %s", -+ path, g_strerror(errno)); -+ return false; -+ } -+ -+ if (chown(path, p->pw_uid, p->pw_gid) == -1) { -+ error_setg(errp, "failed to set ownership of directory '%s': %s", -+ path, g_strerror(errno)); -+ return false; -+ } -+ -+ if (chmod(path, mode) == -1) { -+ error_setg(errp, "failed to set permissions of directory '%s': %s", -+ path, g_strerror(errno)); -+ return false; -+ } -+ -+ return true; -+} -+ -+static bool -+check_openssh_pub_key(const char *key, Error **errp) -+{ -+ ERRP_GUARD(); -+ -+ /* simple sanity-check, we may want more? */ -+ if (!key || key[0] == '#' || strchr(key, '\n')) { -+ error_setg(errp, "invalid OpenSSH public key: '%s'", key); -+ return false; -+ } -+ -+ return true; -+} -+ -+static bool -+check_openssh_pub_keys(strList *keys, size_t *nkeys, Error **errp) -+{ -+ size_t n = 0; -+ strList *k; -+ -+ ERRP_GUARD(); -+ -+ for (k = keys; k != NULL; k = k->next) { -+ if (!check_openssh_pub_key(k->value, errp)) { -+ return false; -+ } -+ n++; -+ } -+ -+ if (nkeys) { -+ *nkeys = n; -+ } -+ return true; -+} -+ -+static bool -+write_authkeys(const char *path, const GStrv keys, -+ const struct passwd *p, Error **errp) -+{ -+ g_autofree char *contents = NULL; -+ g_autoptr(GError) err = NULL; -+ -+ ERRP_GUARD(); -+ -+ contents = g_strjoinv("\n", keys); -+ if (!g_file_set_contents(path, contents, -1, &err)) { -+ error_setg(errp, "failed to write to '%s': %s", path, err->message); -+ return false; -+ } -+ -+ if (chown(path, p->pw_uid, p->pw_gid) == -1) { -+ error_setg(errp, "failed to set ownership of directory '%s': %s", -+ path, g_strerror(errno)); -+ return false; -+ } -+ -+ if (chmod(path, 0600) == -1) { -+ error_setg(errp, "failed to set permissions of '%s': %s", -+ path, g_strerror(errno)); -+ return false; -+ } -+ -+ return true; -+} -+ -+static GStrv -+read_authkeys(const char *path, Error **errp) -+{ -+ g_autoptr(GError) err = NULL; -+ g_autofree char *contents = NULL; -+ -+ ERRP_GUARD(); -+ -+ if (!g_file_get_contents(path, &contents, NULL, &err)) { -+ error_setg(errp, "failed to read '%s': %s", path, err->message); -+ return NULL; -+ } -+ -+ return g_strsplit(contents, "\n", -1); -+ -+} -+ -+void -+qmp_guest_ssh_add_authorized_keys(const char *username, strList *keys, -+ Error **errp) -+{ -+ g_autofree struct passwd *p = NULL; -+ g_autofree char *ssh_path = NULL; -+ g_autofree char *authkeys_path = NULL; -+ g_auto(GStrv) authkeys = NULL; -+ strList *k; -+ size_t nkeys, nauthkeys; -+ -+ ERRP_GUARD(); -+ -+ if (!check_openssh_pub_keys(keys, &nkeys, errp)) { -+ return; -+ } -+ -+ p = get_passwd_entry(username, errp); -+ if (p == NULL) { -+ return; -+ } -+ -+ ssh_path = g_build_filename(p->pw_dir, ".ssh", NULL); -+ authkeys_path = g_build_filename(ssh_path, "authorized_keys", NULL); -+ -+ authkeys = read_authkeys(authkeys_path, NULL); -+ if (authkeys == NULL) { -+ if (!g_file_test(ssh_path, G_FILE_TEST_IS_DIR) && -+ !mkdir_for_user(ssh_path, p, 0700, errp)) { -+ return; -+ } -+ } -+ -+ nauthkeys = authkeys ? g_strv_length(authkeys) : 0; -+ authkeys = g_realloc_n(authkeys, nauthkeys + nkeys + 1, sizeof(char *)); -+ memset(authkeys + nauthkeys, 0, (nkeys + 1) * sizeof(char *)); -+ -+ for (k = keys; k != NULL; k = k->next) { -+ if (g_strv_contains((const gchar * const *)authkeys, k->value)) { -+ continue; -+ } -+ authkeys[nauthkeys++] = g_strdup(k->value); -+ } -+ -+ write_authkeys(authkeys_path, authkeys, p, errp); -+} -+ -+void -+qmp_guest_ssh_remove_authorized_keys(const char *username, strList *keys, -+ Error **errp) -+{ -+ g_autofree struct passwd *p = NULL; -+ g_autofree char *authkeys_path = NULL; -+ g_autofree GStrv new_keys = NULL; /* do not own the strings */ -+ g_auto(GStrv) authkeys = NULL; -+ GStrv a; -+ size_t nkeys = 0; -+ -+ ERRP_GUARD(); -+ -+ if (!check_openssh_pub_keys(keys, NULL, errp)) { -+ return; -+ } -+ -+ p = get_passwd_entry(username, errp); -+ if (p == NULL) { -+ return; -+ } -+ -+ authkeys_path = g_build_filename(p->pw_dir, ".ssh", -+ "authorized_keys", NULL); -+ if (!g_file_test(authkeys_path, G_FILE_TEST_EXISTS)) { -+ return; -+ } -+ authkeys = read_authkeys(authkeys_path, errp); -+ if (authkeys == NULL) { -+ return; -+ } -+ -+ new_keys = g_new0(char *, g_strv_length(authkeys) + 1); -+ for (a = authkeys; *a != NULL; a++) { -+ strList *k; -+ -+ for (k = keys; k != NULL; k = k->next) { -+ if (g_str_equal(k->value, *a)) { -+ break; -+ } -+ } -+ if (k != NULL) { -+ continue; -+ } -+ -+ new_keys[nkeys++] = *a; -+ } -+ -+ write_authkeys(authkeys_path, new_keys, p, errp); -+} -+ -+ -+#ifdef QGA_BUILD_UNIT_TEST -+#if GLIB_CHECK_VERSION(2, 60, 0) -+static const strList test_key2 = { -+ .value = (char *)"algo key2 comments" -+}; -+ -+static const strList test_key1_2 = { -+ .value = (char *)"algo key1 comments", -+ .next = (strList *)&test_key2, -+}; -+ -+static char * -+test_get_authorized_keys_path(void) -+{ -+ return g_build_filename(g_get_home_dir(), ".ssh", "authorized_keys", NULL); -+} -+ -+static void -+test_authorized_keys_set(const char *contents) -+{ -+ g_autoptr(GError) err = NULL; -+ g_autofree char *path = NULL; -+ int ret; -+ -+ path = g_build_filename(g_get_home_dir(), ".ssh", NULL); -+ ret = g_mkdir_with_parents(path, 0700); -+ g_assert(ret == 0); -+ g_free(path); -+ -+ path = test_get_authorized_keys_path(); -+ g_file_set_contents(path, contents, -1, &err); -+ g_assert(err == NULL); -+} -+ -+static void -+test_authorized_keys_equal(const char *expected) -+{ -+ g_autoptr(GError) err = NULL; -+ g_autofree char *path = NULL; -+ g_autofree char *contents = NULL; -+ -+ path = test_get_authorized_keys_path(); -+ g_file_get_contents(path, &contents, NULL, &err); -+ g_assert(err == NULL); -+ -+ g_assert(g_strcmp0(contents, expected) == 0); -+} -+ -+static void -+test_invalid_user(void) -+{ -+ Error *err = NULL; -+ -+ qmp_guest_ssh_add_authorized_keys("", NULL, &err); -+ error_free_or_abort(&err); -+ -+ qmp_guest_ssh_remove_authorized_keys("", NULL, &err); -+ error_free_or_abort(&err); -+} -+ -+static void -+test_invalid_key(void) -+{ -+ strList key = { -+ .value = (char *)"not a valid\nkey" -+ }; -+ Error *err = NULL; -+ -+ qmp_guest_ssh_add_authorized_keys(g_get_user_name(), &key, &err); -+ error_free_or_abort(&err); -+ -+ qmp_guest_ssh_remove_authorized_keys(g_get_user_name(), &key, &err); -+ error_free_or_abort(&err); -+} -+ -+static void -+test_add_keys(void) -+{ -+ Error *err = NULL; -+ -+ qmp_guest_ssh_add_authorized_keys(g_get_user_name(), -+ (strList *)&test_key2, &err); -+ g_assert(err == NULL); -+ -+ test_authorized_keys_equal("algo key2 comments"); -+ -+ qmp_guest_ssh_add_authorized_keys(g_get_user_name(), -+ (strList *)&test_key1_2, &err); -+ g_assert(err == NULL); -+ -+ /* key2 came first, and should'nt be duplicated */ -+ test_authorized_keys_equal("algo key2 comments\n" -+ "algo key1 comments"); -+} -+ -+static void -+test_remove_keys(void) -+{ -+ Error *err = NULL; -+ static const char *authkeys = -+ "algo key1 comments\n" -+ /* originally duplicated */ -+ "algo key1 comments\n" -+ "# a commented line\n" -+ "algo some-key another\n"; -+ -+ test_authorized_keys_set(authkeys); -+ qmp_guest_ssh_remove_authorized_keys(g_get_user_name(), -+ (strList *)&test_key2, &err); -+ g_assert(err == NULL); -+ test_authorized_keys_equal(authkeys); -+ -+ qmp_guest_ssh_remove_authorized_keys(g_get_user_name(), -+ (strList *)&test_key1_2, &err); -+ g_assert(err == NULL); -+ test_authorized_keys_equal("# a commented line\n" -+ "algo some-key another\n"); -+} -+ -+int main(int argc, char *argv[]) -+{ -+ setlocale(LC_ALL, ""); -+ -+ g_test_init(&argc, &argv, G_TEST_OPTION_ISOLATE_DIRS, NULL); -+ -+ g_test_add_func("/qga/ssh/invalid_user", test_invalid_user); -+ g_test_add_func("/qga/ssh/invalid_key", test_invalid_key); -+ g_test_add_func("/qga/ssh/add_keys", test_add_keys); -+ g_test_add_func("/qga/ssh/remove_keys", test_remove_keys); -+ -+ return g_test_run(); -+} -+#else -+int main(int argc, char *argv[]) -+{ -+ g_test_message("test skipped, needs glib >= 2.60"); -+ return 0; -+} -+#endif /* GLIB_2_60 */ -+#endif /* BUILD_UNIT_TEST */ -diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json -index 4222cb92d3..3b85f5a03f 100644 ---- a/qga/qapi-schema.json -+++ b/qga/qapi-schema.json -@@ -1273,3 +1273,38 @@ - ## - { 'command': 'guest-get-osinfo', - 'returns': 'GuestOSInfo' } -+ -+## -+# @guest-ssh-add-authorized-keys: -+# -+# @username: the user account to add the authorized keys -+# @keys: the public keys to add (in OpenSSH/sshd(8) authorized_keys format) -+# -+# Append public keys to user .ssh/authorized_keys on Unix systems (not -+# implemented for other systems). -+# -+# Returns: Nothing on success. -+# -+# Since: 5.2 -+## -+{ 'command': 'guest-ssh-add-authorized-keys', -+ 'data': { 'username': 'str', 'keys': ['str'] }, -+ 'if': 'defined(CONFIG_POSIX)' } -+ -+## -+# @guest-ssh-remove-authorized-keys: -+# -+# @username: the user account to remove the authorized keys -+# @keys: the public keys to remove (in OpenSSH/sshd(8) authorized_keys format) -+# -+# Remove public keys from the user .ssh/authorized_keys on Unix systems (not -+# implemented for other systems). It's not an error if the key is already -+# missing. -+# -+# Returns: Nothing on success. -+# -+# Since: 5.2 -+## -+{ 'command': 'guest-ssh-remove-authorized-keys', -+ 'data': { 'username': 'str', 'keys': ['str'] }, -+ 'if': 'defined(CONFIG_POSIX)' } --- -2.27.0 - diff --git a/SOURCES/kvm-qga-add-ssh-get-authorized-keys.patch b/SOURCES/kvm-qga-add-ssh-get-authorized-keys.patch deleted file mode 100644 index 2b4c377..0000000 --- a/SOURCES/kvm-qga-add-ssh-get-authorized-keys.patch +++ /dev/null @@ -1,170 +0,0 @@ -From 1ed102f5489e6cf3168d9014e9a082909193b6fc Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Thu, 29 Jul 2021 04:55:57 -0400 -Subject: [PATCH 04/14] qga: add ssh-get-authorized-keys -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20210609100615.2501448-5-marcandre.lureau@redhat.com> -Patchwork-id: 101690 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 4/4] qga: add ssh-get-authorized-keys -Bugzilla: 1967716 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Michal Privoznik - -From: Marc-André Lureau - -Signed-off-by: Marc-André Lureau -*fix-up merge conflicts due to qga-ssh-test being disabled in earlier - patch due to G_TEST_OPTION_ISOLATE_DIRS triggering build-oss-fuzz - leak detector. -*fix up style and disallowed g_assert* usage reported by checkpatch -Signed-off-by: Michael Roth - -(cherry picked from commit cad97c08a1c17830d77a46780088bc0199df89d1) -[ Fix trivial schema conflict ] -Signed-off-by: Marc-André Lureau -Signed-off-by: Miroslav Rezanina ---- - qga/commands-posix-ssh.c | 66 ++++++++++++++++++++++++++++++++++++++++ - qga/qapi-schema.json | 30 ++++++++++++++++++ - 2 files changed, 96 insertions(+) - -diff --git a/qga/commands-posix-ssh.c b/qga/commands-posix-ssh.c -index 362c9e8816..749167e82d 100644 ---- a/qga/commands-posix-ssh.c -+++ b/qga/commands-posix-ssh.c -@@ -268,6 +268,46 @@ qmp_guest_ssh_remove_authorized_keys(const char *username, strList *keys, - write_authkeys(authkeys_path, new_keys, p, errp); - } - -+GuestAuthorizedKeys * -+qmp_guest_ssh_get_authorized_keys(const char *username, Error **errp) -+{ -+ g_autofree struct passwd *p = NULL; -+ g_autofree char *authkeys_path = NULL; -+ g_auto(GStrv) authkeys = NULL; -+ g_autoptr(GuestAuthorizedKeys) ret = NULL; -+ int i; -+ -+ ERRP_GUARD(); -+ -+ p = get_passwd_entry(username, errp); -+ if (p == NULL) { -+ return NULL; -+ } -+ -+ authkeys_path = g_build_filename(p->pw_dir, ".ssh", -+ "authorized_keys", NULL); -+ authkeys = read_authkeys(authkeys_path, errp); -+ if (authkeys == NULL) { -+ return NULL; -+ } -+ -+ ret = g_new0(GuestAuthorizedKeys, 1); -+ for (i = 0; authkeys[i] != NULL; i++) { -+ strList *new; -+ -+ g_strstrip(authkeys[i]); -+ if (!authkeys[i][0] || authkeys[i][0] == '#') { -+ continue; -+ } -+ -+ new = g_new0(strList, 1); -+ new->value = g_strdup(authkeys[i]); -+ new->next = ret->keys; -+ ret->keys = new; -+ } -+ -+ return g_steal_pointer(&ret); -+} - - #ifdef QGA_BUILD_UNIT_TEST - #if GLIB_CHECK_VERSION(2, 60, 0) -@@ -426,6 +466,31 @@ test_remove_keys(void) - "algo some-key another\n"); - } - -+static void -+test_get_keys(void) -+{ -+ Error *err = NULL; -+ static const char *authkeys = -+ "algo key1 comments\n" -+ "# a commented line\n" -+ "algo some-key another\n"; -+ g_autoptr(GuestAuthorizedKeys) ret = NULL; -+ strList *k; -+ size_t len = 0; -+ -+ test_authorized_keys_set(authkeys); -+ -+ ret = qmp_guest_ssh_get_authorized_keys(g_get_user_name(), &err); -+ g_assert(err == NULL); -+ -+ for (len = 0, k = ret->keys; k != NULL; k = k->next) { -+ g_assert(g_str_has_prefix(k->value, "algo ")); -+ len++; -+ } -+ -+ g_assert(len == 2); -+} -+ - int main(int argc, char *argv[]) - { - setlocale(LC_ALL, ""); -@@ -437,6 +502,7 @@ int main(int argc, char *argv[]) - g_test_add_func("/qga/ssh/add_keys", test_add_keys); - g_test_add_func("/qga/ssh/add_reset_keys", test_add_reset_keys); - g_test_add_func("/qga/ssh/remove_keys", test_remove_keys); -+ g_test_add_func("/qga/ssh/get_keys", test_get_keys); - - return g_test_run(); - } -diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json -index a70ea5da77..97bf96712e 100644 ---- a/qga/qapi-schema.json -+++ b/qga/qapi-schema.json -@@ -1274,6 +1274,36 @@ - { 'command': 'guest-get-osinfo', - 'returns': 'GuestOSInfo' } - -+## -+# @GuestAuthorizedKeys: -+# -+# @keys: public keys (in OpenSSH/sshd(8) authorized_keys format) -+# -+# Since: 5.2 -+## -+{ 'struct': 'GuestAuthorizedKeys', -+ 'data': { -+ 'keys': ['str'] -+ }, -+ 'if': 'defined(CONFIG_POSIX)' } -+ -+## -+# @guest-ssh-get-authorized-keys: -+# -+# @username: the user account to add the authorized keys -+# -+# Return the public keys from user .ssh/authorized_keys on Unix systems (not -+# implemented for other systems). -+# -+# Returns: @GuestAuthorizedKeys -+# -+# Since: 5.2 -+## -+{ 'command': 'guest-ssh-get-authorized-keys', -+ 'data': { 'username': 'str' }, -+ 'returns': 'GuestAuthorizedKeys', -+ 'if': 'defined(CONFIG_POSIX)' } -+ - ## - # @guest-ssh-add-authorized-keys: - # --- -2.27.0 - diff --git a/SOURCES/kvm-qga-commands-posix-Move-the-udev-code-from-the-pci-t.patch b/SOURCES/kvm-qga-commands-posix-Move-the-udev-code-from-the-pci-t.patch deleted file mode 100644 index 0aa2440..0000000 --- a/SOURCES/kvm-qga-commands-posix-Move-the-udev-code-from-the-pci-t.patch +++ /dev/null @@ -1,140 +0,0 @@ -From 3a63e2d29bb2fd92577d42aeb8fa956ae18df22e Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 2 Oct 2020 10:17:41 -0400 -Subject: [PATCH 02/14] qga/commands-posix: Move the udev code from the pci to - the generic function -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20201002101742.249169-3-thuth@redhat.com> -Patchwork-id: 98526 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 2/3] qga/commands-posix: Move the udev code from the pci to the generic function -Bugzilla: 1755075 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -The libudev-related code is independent from the other pci-related code -and can be re-used for non-pci devices (like ccw devices on s390x). Thus -move this part to the generic function. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1755075 -Signed-off-by: Thomas Huth -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Michael Roth -(cherry picked from commit 43dadc431bacbc5a5baee7e256288a98a3e95ce3) -Signed-off-by: Danilo C. L. de Paula ---- - qga/commands-posix.c | 62 +++++++++++++++++++++++--------------------- - 1 file changed, 33 insertions(+), 29 deletions(-) - -diff --git a/qga/commands-posix.c b/qga/commands-posix.c -index 99d6b1c8c1..6db76aadd1 100644 ---- a/qga/commands-posix.c -+++ b/qga/commands-posix.c -@@ -878,10 +878,6 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, - GuestPCIAddress *pciaddr = disk->pci_controller; - bool has_ata = false, has_host = false, has_tgt = false; - char *p, *q, *driver = NULL; --#ifdef CONFIG_LIBUDEV -- struct udev *udev = NULL; -- struct udev_device *udevice = NULL; --#endif - bool ret = false; - - p = strstr(syspath, "/devices/pci"); -@@ -940,26 +936,6 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, - pciaddr->slot = pci[2]; - pciaddr->function = pci[3]; - --#ifdef CONFIG_LIBUDEV -- udev = udev_new(); -- udevice = udev_device_new_from_syspath(udev, syspath); -- if (udev == NULL || udevice == NULL) { -- g_debug("failed to query udev"); -- } else { -- const char *devnode, *serial; -- devnode = udev_device_get_devnode(udevice); -- if (devnode != NULL) { -- disk->dev = g_strdup(devnode); -- disk->has_dev = true; -- } -- serial = udev_device_get_property_value(udevice, "ID_SERIAL"); -- if (serial != NULL && *serial != 0) { -- disk->serial = g_strdup(serial); -- disk->has_serial = true; -- } -- } --#endif -- - if (strcmp(driver, "ata_piix") == 0) { - /* a host per ide bus, target*:0::0 */ - if (!has_host || !has_tgt) { -@@ -1021,10 +997,6 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, - - cleanup: - g_free(driver); --#ifdef CONFIG_LIBUDEV -- udev_unref(udev); -- udev_device_unref(udevice); --#endif - return ret; - } - -@@ -1037,18 +1009,50 @@ static void build_guest_fsinfo_for_real_device(char const *syspath, - GuestPCIAddress *pciaddr; - GuestDiskAddressList *list = NULL; - bool has_hwinf; -+#ifdef CONFIG_LIBUDEV -+ struct udev *udev = NULL; -+ struct udev_device *udevice = NULL; -+#endif - - pciaddr = g_new0(GuestPCIAddress, 1); -+ pciaddr->domain = -1; /* -1 means field is invalid */ -+ pciaddr->bus = -1; -+ pciaddr->slot = -1; -+ pciaddr->function = -1; - - disk = g_new0(GuestDiskAddress, 1); - disk->pci_controller = pciaddr; -+ disk->bus_type = GUEST_DISK_BUS_TYPE_UNKNOWN; - - list = g_new0(GuestDiskAddressList, 1); - list->value = disk; - -+#ifdef CONFIG_LIBUDEV -+ udev = udev_new(); -+ udevice = udev_device_new_from_syspath(udev, syspath); -+ if (udev == NULL || udevice == NULL) { -+ g_debug("failed to query udev"); -+ } else { -+ const char *devnode, *serial; -+ devnode = udev_device_get_devnode(udevice); -+ if (devnode != NULL) { -+ disk->dev = g_strdup(devnode); -+ disk->has_dev = true; -+ } -+ serial = udev_device_get_property_value(udevice, "ID_SERIAL"); -+ if (serial != NULL && *serial != 0) { -+ disk->serial = g_strdup(serial); -+ disk->has_serial = true; -+ } -+ } -+ -+ udev_unref(udev); -+ udev_device_unref(udevice); -+#endif -+ - has_hwinf = build_guest_fsinfo_for_pci_dev(syspath, disk, errp); - -- if (has_hwinf) { -+ if (has_hwinf || disk->has_dev || disk->has_serial) { - list->next = fs->disk; - fs->disk = list; - } else { --- -2.27.0 - diff --git a/SOURCES/kvm-qga-commands-posix-Rework-build_guest_fsinfo_for_rea.patch b/SOURCES/kvm-qga-commands-posix-Rework-build_guest_fsinfo_for_rea.patch deleted file mode 100644 index 9915334..0000000 --- a/SOURCES/kvm-qga-commands-posix-Rework-build_guest_fsinfo_for_rea.patch +++ /dev/null @@ -1,156 +0,0 @@ -From 84bc86fdf47729bca77957a04161862ffbedbf2f Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 2 Oct 2020 10:17:40 -0400 -Subject: [PATCH 01/14] qga/commands-posix: Rework - build_guest_fsinfo_for_real_device() function -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Message-id: <20201002101742.249169-2-thuth@redhat.com> -Patchwork-id: 98527 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/3] qga/commands-posix: Rework build_guest_fsinfo_for_real_device() function -Bugzilla: 1755075 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -We are going to support non-PCI devices soon. For this we need to split -the generic GuestDiskAddress and GuestDiskAddressList memory allocation -and list chaining into a separate function first. - -Signed-off-by: Thomas Huth -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Michael Roth -(cherry picked from commit d9fe4f0fea31f0560dc40d3576bc6c48ad97109f) -Signed-off-by: Danilo C. L. de Paula ---- - qga/commands-posix.c | 65 ++++++++++++++++++++++++++++---------------- - 1 file changed, 41 insertions(+), 24 deletions(-) - -diff --git a/qga/commands-posix.c b/qga/commands-posix.c -index 1c1a165dae..99d6b1c8c1 100644 ---- a/qga/commands-posix.c -+++ b/qga/commands-posix.c -@@ -865,28 +865,30 @@ static int build_hosts(char const *syspath, char const *host, bool ata, - return i; - } - --/* Store disk device info specified by @sysfs into @fs */ --static void build_guest_fsinfo_for_real_device(char const *syspath, -- GuestFilesystemInfo *fs, -- Error **errp) -+/* -+ * Store disk device info for devices on the PCI bus. -+ * Returns true if information has been stored, or false for failure. -+ */ -+static bool build_guest_fsinfo_for_pci_dev(char const *syspath, -+ GuestDiskAddress *disk, -+ Error **errp) - { - unsigned int pci[4], host, hosts[8], tgt[3]; - int i, nhosts = 0, pcilen; -- GuestDiskAddress *disk; -- GuestPCIAddress *pciaddr; -- GuestDiskAddressList *list = NULL; -+ GuestPCIAddress *pciaddr = disk->pci_controller; - bool has_ata = false, has_host = false, has_tgt = false; - char *p, *q, *driver = NULL; - #ifdef CONFIG_LIBUDEV - struct udev *udev = NULL; - struct udev_device *udevice = NULL; - #endif -+ bool ret = false; - - p = strstr(syspath, "/devices/pci"); - if (!p || sscanf(p + 12, "%*x:%*x/%x:%x:%x.%x%n", - pci, pci + 1, pci + 2, pci + 3, &pcilen) < 4) { - g_debug("only pci device is supported: sysfs path '%s'", syspath); -- return; -+ return false; - } - - p += 12 + pcilen; -@@ -907,7 +909,7 @@ static void build_guest_fsinfo_for_real_device(char const *syspath, - } - - g_debug("unsupported driver or sysfs path '%s'", syspath); -- return; -+ return false; - } - - p = strstr(syspath, "/target"); -@@ -933,18 +935,11 @@ static void build_guest_fsinfo_for_real_device(char const *syspath, - } - } - -- pciaddr = g_malloc0(sizeof(*pciaddr)); - pciaddr->domain = pci[0]; - pciaddr->bus = pci[1]; - pciaddr->slot = pci[2]; - pciaddr->function = pci[3]; - -- disk = g_malloc0(sizeof(*disk)); -- disk->pci_controller = pciaddr; -- -- list = g_malloc0(sizeof(*list)); -- list->value = disk; -- - #ifdef CONFIG_LIBUDEV - udev = udev_new(); - udevice = udev_device_new_from_syspath(udev, syspath); -@@ -1022,21 +1017,43 @@ static void build_guest_fsinfo_for_real_device(char const *syspath, - goto cleanup; - } - -- list->next = fs->disk; -- fs->disk = list; -- goto out; -+ ret = true; - - cleanup: -- if (list) { -- qapi_free_GuestDiskAddressList(list); -- } --out: - g_free(driver); - #ifdef CONFIG_LIBUDEV - udev_unref(udev); - udev_device_unref(udevice); - #endif -- return; -+ return ret; -+} -+ -+/* Store disk device info specified by @sysfs into @fs */ -+static void build_guest_fsinfo_for_real_device(char const *syspath, -+ GuestFilesystemInfo *fs, -+ Error **errp) -+{ -+ GuestDiskAddress *disk; -+ GuestPCIAddress *pciaddr; -+ GuestDiskAddressList *list = NULL; -+ bool has_hwinf; -+ -+ pciaddr = g_new0(GuestPCIAddress, 1); -+ -+ disk = g_new0(GuestDiskAddress, 1); -+ disk->pci_controller = pciaddr; -+ -+ list = g_new0(GuestDiskAddressList, 1); -+ list->value = disk; -+ -+ has_hwinf = build_guest_fsinfo_for_pci_dev(syspath, disk, errp); -+ -+ if (has_hwinf) { -+ list->next = fs->disk; -+ fs->disk = list; -+ } else { -+ qapi_free_GuestDiskAddressList(list); -+ } - } - - static void build_guest_fsinfo_for_device(char const *devpath, --- -2.27.0 - diff --git a/SOURCES/kvm-qga-commands-posix-Support-fsinfo-for-non-PCI-virtio.patch b/SOURCES/kvm-qga-commands-posix-Support-fsinfo-for-non-PCI-virtio.patch deleted file mode 100644 index 0d37a64..0000000 --- a/SOURCES/kvm-qga-commands-posix-Support-fsinfo-for-non-PCI-virtio.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 250227a53c1d43d2bd8346922edb3452f3534be6 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 2 Oct 2020 10:17:42 -0400 -Subject: [PATCH 03/14] qga/commands-posix: Support fsinfo for non-PCI virtio - devices, too -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20201002101742.249169-4-thuth@redhat.com> -Patchwork-id: 98528 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 3/3] qga/commands-posix: Support fsinfo for non-PCI virtio devices, too -Bugzilla: 1755075 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -QEMU on s390x uses virtio via channel I/O instead of PCI by default. -Add a function to detect and provide information for virtio-scsi and -virtio-block devices here, too. - -Signed-off-by: Thomas Huth -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Michael Roth -(cherry picked from commit 23843c129d5e1ca360605e511a43a34faebb47c4) -Signed-off-by: Danilo C. L. de Paula ---- - qga/commands-posix.c | 42 +++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 41 insertions(+), 1 deletion(-) - -diff --git a/qga/commands-posix.c b/qga/commands-posix.c -index 6db76aadd1..c86c87ed52 100644 ---- a/qga/commands-posix.c -+++ b/qga/commands-posix.c -@@ -1000,6 +1000,39 @@ cleanup: - return ret; - } - -+/* -+ * Store disk device info for non-PCI virtio devices (for example s390x -+ * channel I/O devices). Returns true if information has been stored, or -+ * false for failure. -+ */ -+static bool build_guest_fsinfo_for_nonpci_virtio(char const *syspath, -+ GuestDiskAddress *disk, -+ Error **errp) -+{ -+ unsigned int tgt[3]; -+ char *p; -+ -+ if (!strstr(syspath, "/virtio") || !strstr(syspath, "/block")) { -+ g_debug("Unsupported virtio device '%s'", syspath); -+ return false; -+ } -+ -+ p = strstr(syspath, "/target"); -+ if (p && sscanf(p + 7, "%*u:%*u:%*u/%*u:%u:%u:%u", -+ &tgt[0], &tgt[1], &tgt[2]) == 3) { -+ /* virtio-scsi: target*:0:: */ -+ disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI; -+ disk->bus = tgt[0]; -+ disk->target = tgt[1]; -+ disk->unit = tgt[2]; -+ } else { -+ /* virtio-blk: 1 disk per 1 device */ -+ disk->bus_type = GUEST_DISK_BUS_TYPE_VIRTIO; -+ } -+ -+ return true; -+} -+ - /* Store disk device info specified by @sysfs into @fs */ - static void build_guest_fsinfo_for_real_device(char const *syspath, - GuestFilesystemInfo *fs, -@@ -1050,7 +1083,14 @@ static void build_guest_fsinfo_for_real_device(char const *syspath, - udev_device_unref(udevice); - #endif - -- has_hwinf = build_guest_fsinfo_for_pci_dev(syspath, disk, errp); -+ if (strstr(syspath, "/devices/pci")) { -+ has_hwinf = build_guest_fsinfo_for_pci_dev(syspath, disk, errp); -+ } else if (strstr(syspath, "/virtio")) { -+ has_hwinf = build_guest_fsinfo_for_nonpci_virtio(syspath, disk, errp); -+ } else { -+ g_debug("Unsupported device type for '%s'", syspath); -+ has_hwinf = false; -+ } - - if (has_hwinf || disk->has_dev || disk->has_serial) { - list->next = fs->disk; --- -2.27.0 - diff --git a/SOURCES/kvm-qga-fix-assert-regression-on-guest-shutdown.patch b/SOURCES/kvm-qga-fix-assert-regression-on-guest-shutdown.patch deleted file mode 100644 index 7db6e1f..0000000 --- a/SOURCES/kvm-qga-fix-assert-regression-on-guest-shutdown.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 93b37bad75d14ed4b9e96cc3587d8ae16cb96ba3 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Fri, 2 Oct 2020 17:46:08 -0400 -Subject: [PATCH 01/18] qga: fix assert regression on guest-shutdown -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20201002174608.943992-2-marcandre.lureau@redhat.com> -Patchwork-id: 98534 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] qga: fix assert regression on guest-shutdown -Bugzilla: 1884531 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Thomas Huth -RH-Acked-by: Philippe Mathieu-Daudé - -From: Marc-André Lureau - -Since commit 781f2b3d1e ("qga: process_event() simplification"), -send_response() is called unconditionally, but will assert when "rsp" is -NULL. This may happen with QCO_NO_SUCCESS_RESP commands, such as -"guest-shutdown". - -Fixes: 781f2b3d1e5ef389b44016a897fd55e7a780bf35 -Cc: Michael Roth -Reported-by: Christian Ehrhardt -Signed-off-by: Marc-André Lureau -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Christian Ehrhardt -Tested-by: Christian Ehrhardt -Cc: qemu-stable@nongnu.org -Signed-off-by: Michael Roth - -(cherry picked from commit 844bd70b5652f30bbace89499f513e3fbbb6457a) -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - qga/main.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/qga/main.c b/qga/main.c -index c35c2a21209..12fa463f4cd 100644 ---- a/qga/main.c -+++ b/qga/main.c -@@ -529,7 +529,11 @@ static int send_response(GAState *s, const QDict *rsp) - QString *payload_qstr, *response_qstr; - GIOStatus status; - -- g_assert(rsp && s->channel); -+ g_assert(s->channel); -+ -+ if (!rsp) { -+ return 0; -+ } - - payload_qstr = qobject_to_json(QOBJECT(rsp)); - if (!payload_qstr) { --- -2.27.0 - diff --git a/SOURCES/kvm-qga-fix-missing-closedir-in-qmp_guest_get_disks.patch b/SOURCES/kvm-qga-fix-missing-closedir-in-qmp_guest_get_disks.patch deleted file mode 100644 index 6ffc5bd..0000000 --- a/SOURCES/kvm-qga-fix-missing-closedir-in-qmp_guest_get_disks.patch +++ /dev/null @@ -1,54 +0,0 @@ -From c9b1eb9d6c0da9098d5410d90d290d6fca6ea7dc Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 16 Dec 2020 16:06:14 -0500 -Subject: [PATCH 13/14] qga: fix missing closedir() in qmp_guest_get_disks() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20201216160615.324213-10-marcandre.lureau@redhat.com> -Patchwork-id: 100481 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 09/10] qga: fix missing closedir() in qmp_guest_get_disks() -Bugzilla: 1859494 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Stefan Hajnoczi - -From: Michael Roth - -We opendir("/sys/block") at the beginning of the function, but we never -close it prior to returning. - -Fixes: Coverity CID 1436130 -Fixes: fed3956429d5 ("qga: add implementation of guest-get-disks for Linux") -Reported-by: Peter Maydell -Cc: Marc-André Lureau -Cc: Tomáš Golembiovský -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Michael Roth - -(cherry-picked from commit b1b9ab1c04d560f86d8da3dfca4d8b21de75fee6) -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - qga/commands-posix.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/qga/commands-posix.c b/qga/commands-posix.c -index 96f5ddafd3a..9a170dee14c 100644 ---- a/qga/commands-posix.c -+++ b/qga/commands-posix.c -@@ -1445,6 +1445,9 @@ GuestDiskInfoList *qmp_guest_get_disks(Error **errp) - get_disk_deps(disk_dir, disk); - ret = get_disk_partitions(ret, de->d_name, disk_dir, dev_name); - } -+ -+ closedir(dp); -+ - return ret; - } - --- -2.27.0 - diff --git a/SOURCES/kvm-qga-rename-Error-parameter-to-more-common-errp.patch b/SOURCES/kvm-qga-rename-Error-parameter-to-more-common-errp.patch deleted file mode 100644 index 2528d26..0000000 --- a/SOURCES/kvm-qga-rename-Error-parameter-to-more-common-errp.patch +++ /dev/null @@ -1,121 +0,0 @@ -From 457ba062cc1026a88a70ab3cb9a52acd62c5a2a8 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Thu, 24 Dec 2020 12:53:02 -0500 -Subject: [PATCH 2/5] qga: rename Error ** parameter to more common errp -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20201224125304.62697-2-marcandre.lureau@redhat.com> -Patchwork-id: 100498 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/3] qga: rename Error ** parameter to more common errp -Bugzilla: 1910326 -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Philippe Mathieu-Daudé - -From: Vladimir Sementsov-Ogievskiy - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20191205174635.18758-13-vsementsov@virtuozzo.com> -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Markus Armbruster -Signed-off-by: Markus Armbruster - -(cherry picked from commit b90abbac0b95f68a7ebac5545ab77b98f598a9c7) -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - qga/commands-posix.c | 2 +- - qga/commands-win32.c | 2 +- - qga/commands.c | 12 ++++++------ - 3 files changed, 8 insertions(+), 8 deletions(-) - -diff --git a/qga/commands-posix.c b/qga/commands-posix.c -index c02373cdf7d..29353e90c8f 100644 ---- a/qga/commands-posix.c -+++ b/qga/commands-posix.c -@@ -3134,7 +3134,7 @@ static double ga_get_login_time(struct utmpx *user_info) - return seconds + useconds; - } - --GuestUserList *qmp_guest_get_users(Error **err) -+GuestUserList *qmp_guest_get_users(Error **errp) - { - GHashTable *cache = NULL; - GuestUserList *head = NULL, *cur_item = NULL; -diff --git a/qga/commands-win32.c b/qga/commands-win32.c -index a07725e874b..618ccdfadaa 100644 ---- a/qga/commands-win32.c -+++ b/qga/commands-win32.c -@@ -2047,7 +2047,7 @@ typedef struct _GA_WTSINFOA { - - } GA_WTSINFOA; - --GuestUserList *qmp_guest_get_users(Error **err) -+GuestUserList *qmp_guest_get_users(Error **errp) - { - #define QGA_NANOSECONDS 10000000 - -diff --git a/qga/commands.c b/qga/commands.c -index 0c7d1385c23..43c323ceada 100644 ---- a/qga/commands.c -+++ b/qga/commands.c -@@ -143,7 +143,7 @@ static GuestExecInfo *guest_exec_info_find(int64_t pid_numeric) - return NULL; - } - --GuestExecStatus *qmp_guest_exec_status(int64_t pid, Error **err) -+GuestExecStatus *qmp_guest_exec_status(int64_t pid, Error **errp) - { - GuestExecInfo *gei; - GuestExecStatus *ges; -@@ -152,7 +152,7 @@ GuestExecStatus *qmp_guest_exec_status(int64_t pid, Error **err) - - gei = guest_exec_info_find(pid); - if (gei == NULL) { -- error_setg(err, QERR_INVALID_PARAMETER, "pid"); -+ error_setg(errp, QERR_INVALID_PARAMETER, "pid"); - return NULL; - } - -@@ -385,7 +385,7 @@ GuestExec *qmp_guest_exec(const char *path, - bool has_env, strList *env, - bool has_input_data, const char *input_data, - bool has_capture_output, bool capture_output, -- Error **err) -+ Error **errp) - { - GPid pid; - GuestExec *ge = NULL; -@@ -405,7 +405,7 @@ GuestExec *qmp_guest_exec(const char *path, - arglist.next = has_arg ? arg : NULL; - - if (has_input_data) { -- input = qbase64_decode(input_data, -1, &ninput, err); -+ input = qbase64_decode(input_data, -1, &ninput, errp); - if (!input) { - return NULL; - } -@@ -424,7 +424,7 @@ GuestExec *qmp_guest_exec(const char *path, - guest_exec_task_setup, NULL, &pid, has_input_data ? &in_fd : NULL, - has_output ? &out_fd : NULL, has_output ? &err_fd : NULL, &gerr); - if (!ret) { -- error_setg(err, QERR_QGA_COMMAND_FAILED, gerr->message); -+ error_setg(errp, QERR_QGA_COMMAND_FAILED, gerr->message); - g_error_free(gerr); - goto done; - } -@@ -499,7 +499,7 @@ int ga_parse_whence(GuestFileWhence *whence, Error **errp) - return -1; - } - --GuestHostName *qmp_guest_get_host_name(Error **err) -+GuestHostName *qmp_guest_get_host_name(Error **errp) - { - GuestHostName *result = NULL; - gchar const *hostname = g_get_host_name(); --- -2.27.0 - diff --git a/SOURCES/kvm-qga-update-schema-for-guest-get-disks-dependents-fie.patch b/SOURCES/kvm-qga-update-schema-for-guest-get-disks-dependents-fie.patch deleted file mode 100644 index 727015e..0000000 --- a/SOURCES/kvm-qga-update-schema-for-guest-get-disks-dependents-fie.patch +++ /dev/null @@ -1,113 +0,0 @@ -From ff881d64d3f29825ab093eb2be183658226ccba3 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 16 Dec 2020 16:06:15 -0500 -Subject: [PATCH 14/14] qga: update schema for guest-get-disks 'dependents' - field -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20201216160615.324213-11-marcandre.lureau@redhat.com> -Patchwork-id: 100480 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 10/10] qga: update schema for guest-get-disks 'dependents' field -Bugzilla: 1859494 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Stefan Hajnoczi - -From: Michael Roth - -The recently-added 'guest-get-disk' command returns a list of -GuestDiskInfo entries, which in turn have a 'dependents' field which -lists devices these entries are dependent upon. Thus, 'dependencies' -is a better name for this field. Address this by renaming the field -accordingly. - -Additionally, 'dependents' is specified as non-optional, even though -it's not implemented for w32. This is misleading, since it gives users -the impression that a particular disk might not have dependencies, -when in reality that information is simply not known to the guest -agent. Address this by making 'dependents' an optional field, and only -marking it as in-use when the facilities to obtain this information are -available to the guest agent. - -Cc: Eric Blake -Cc: Tomáš Golembiovský -Cc: Marc-André Lureau -Reviewed-by: Eric Blake -Reviewed-by: Marc-André Lureau -Signed-off-by: Michael Roth - -(cherry-picked from commit a8aa94b5f8427cc2924d8cdd417c8014db1c86c0) -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - qga/commands-posix.c | 10 ++++++---- - qga/qapi-schema.json | 8 ++++---- - 2 files changed, 10 insertions(+), 8 deletions(-) - -diff --git a/qga/commands-posix.c b/qga/commands-posix.c -index 9a170dee14c..c02373cdf7d 100644 ---- a/qga/commands-posix.c -+++ b/qga/commands-posix.c -@@ -1287,6 +1287,7 @@ static void get_disk_deps(const char *disk_dir, GuestDiskInfo *disk) - g_debug("failed to list entries in %s", deps_dir); - return; - } -+ disk->has_dependencies = true; - while ((dep = g_dir_read_name(dp_deps)) != NULL) { - g_autofree char *dep_dir = NULL; - strList *dep_item = NULL; -@@ -1299,8 +1300,8 @@ static void get_disk_deps(const char *disk_dir, GuestDiskInfo *disk) - g_debug(" adding dependent device: %s", dev_name); - dep_item = g_new0(strList, 1); - dep_item->value = dev_name; -- dep_item->next = disk->dependents; -- disk->dependents = dep_item; -+ dep_item->next = disk->dependencies; -+ disk->dependencies = dep_item; - } - } - g_dir_close(dp_deps); -@@ -1353,8 +1354,9 @@ static GuestDiskInfoList *get_disk_partitions( - partition->name = dev_name; - partition->partition = true; - /* Add parent disk as dependent for easier tracking of hierarchy */ -- partition->dependents = g_new0(strList, 1); -- partition->dependents->value = g_strdup(disk_dev); -+ partition->dependencies = g_new0(strList, 1); -+ partition->dependencies->value = g_strdup(disk_dev); -+ partition->has_dependencies = true; - - item = g_new0(GuestDiskInfoList, 1); - item->value = partition; -diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json -index 22df375c92f..4222cb92d34 100644 ---- a/qga/qapi-schema.json -+++ b/qga/qapi-schema.json -@@ -857,9 +857,9 @@ - # - # @name: device node (Linux) or device UNC (Windows) - # @partition: whether this is a partition or disk --# @dependents: list of dependent devices; e.g. for LVs of the LVM this will --# hold the list of PVs, for LUKS encrypted volume this will --# contain the disk where the volume is placed. (Linux) -+# @dependencies: list of device dependencies; e.g. for LVs of the LVM this will -+# hold the list of PVs, for LUKS encrypted volume this will -+# contain the disk where the volume is placed. (Linux) - # @address: disk address information (only for non-virtual devices) - # @alias: optional alias assigned to the disk, on Linux this is a name assigned - # by device mapper -@@ -867,7 +867,7 @@ - # Since 5.2 - ## - { 'struct': 'GuestDiskInfo', -- 'data': {'name': 'str', 'partition': 'bool', 'dependents': ['str'], -+ 'data': {'name': 'str', 'partition': 'bool', '*dependencies': ['str'], - '*address': 'GuestDiskAddress', '*alias': 'str'} } - - ## --- -2.27.0 - diff --git a/SOURCES/kvm-raw-format-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch b/SOURCES/kvm-raw-format-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch deleted file mode 100644 index 5384b51..0000000 --- a/SOURCES/kvm-raw-format-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 5d590d354e42515ea074bf2110a2ab236dbabba1 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 8 Jun 2020 15:01:34 +0100 -Subject: [PATCH 06/17] raw-format: Support BDRV_REQ_ZERO_WRITE for truncate - -RH-Author: Kevin Wolf -Message-id: <20200608150140.38218-6-kwolf@redhat.com> -Patchwork-id: 97447 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 05/11] raw-format: Support BDRV_REQ_ZERO_WRITE for truncate -Bugzilla: 1780574 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz - -The raw format driver can simply forward the flag and let its bs->file -child take care of actually providing the zeros. - -Signed-off-by: Kevin Wolf -Reviewed-by: Max Reitz -Reviewed-by: Eric Blake -Reviewed-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20200424125448.63318-6-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 1ddaabaecb7eaeb6d8948a32340af95db44c54a1) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/raw-format.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/block/raw-format.c b/block/raw-format.c -index c3acf9a..bdec466 100644 ---- a/block/raw-format.c -+++ b/block/raw-format.c -@@ -387,7 +387,7 @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, - - s->size = offset; - offset += s->offset; -- return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp); -+ return bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp); - } - - static void raw_eject(BlockDriverState *bs, bool eject_flag) -@@ -445,6 +445,8 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, - bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | - ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & - bs->file->bs->supported_zero_flags); -+ bs->supported_truncate_flags = bs->file->bs->supported_truncate_flags & -+ BDRV_REQ_ZERO_WRITE; - - if (bs->probed && !bdrv_is_read_only(bs)) { - bdrv_refresh_filename(bs->file->bs); --- -1.8.3.1 - diff --git a/SOURCES/kvm-redhat-Add-rhel8.6.0-machine-type-for-s390x.patch b/SOURCES/kvm-redhat-Add-rhel8.6.0-machine-type-for-s390x.patch new file mode 100644 index 0000000..edf8ec9 --- /dev/null +++ b/SOURCES/kvm-redhat-Add-rhel8.6.0-machine-type-for-s390x.patch @@ -0,0 +1,69 @@ +From 3541c9fc2c2dd5cf7dd583bc5645d82ea928d9e8 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 10 Dec 2021 10:07:40 +0100 +Subject: [PATCH 1/2] redhat: Add rhel8.6.0 machine type for s390x +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 90: Add rhel8.6.0 machine type for s390x +RH-Commit: [1/1] 91961fc52d708e6b30d7361fbab3572c5b5c1859 +RH-Bugzilla: 2005325 +RH-Acked-by: Greg Kurz +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2005325 + +The new machine type has better default values for the upcoming +"generation 16" mainframe. + +Signed-off-by: Thomas Huth +--- + hw/s390x/s390-virtio-ccw.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index cf13c457d6..9795eb9406 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1103,10 +1103,21 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) + DEFINE_CCW_MACHINE(2_4, "2.4", false); + #endif + ++static void ccw_machine_rhel860_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel860_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel860, "rhel8.6.0", true); ++ + static void ccw_machine_rhel850_instance_options(MachineState *machine) + { + static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_0 }; + ++ ccw_machine_rhel860_instance_options(machine); ++ + s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat); + + s390_cpudef_featoff_greater(16, 1, S390_FEAT_NNPA); +@@ -1118,10 +1129,11 @@ static void ccw_machine_rhel850_instance_options(MachineState *machine) + + static void ccw_machine_rhel850_class_options(MachineClass *mc) + { ++ ccw_machine_rhel860_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); + mc->smp_props.prefer_sockets = true; + } +-DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); ++DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false); + + static void ccw_machine_rhel840_instance_options(MachineState *machine) + { +-- +2.27.0 + diff --git a/SOURCES/kvm-redhat-Define-pseries-rhel8.6.0-machine-type.patch b/SOURCES/kvm-redhat-Define-pseries-rhel8.6.0-machine-type.patch new file mode 100644 index 0000000..760a5fd --- /dev/null +++ b/SOURCES/kvm-redhat-Define-pseries-rhel8.6.0-machine-type.patch @@ -0,0 +1,76 @@ +From 300cdf7f5b8b34e111c5e4141684af7329be46d9 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Mon, 13 Dec 2021 15:42:41 +0100 +Subject: [PATCH 2/2] redhat: Define pseries-rhel8.6.0 machine type +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Greg Kurz +RH-MergeRequest: 92: redhat: Define pseries-rhel8.6.0 machine type +RH-Commit: [1/1] 3c0f59d7ddf4bb22f382b5df7daa136730b9e866 +RH-Bugzilla: 2031041 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: David Gibson (Red Hat) +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Laurent Vivier + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2031041 + +BRANCH: rhel-8.6.0 + +UPSTREAM: RHEL only + +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=41989147 + +Signed-off-by: Greg Kurz +--- + hw/ppc/spapr.c | 18 +++++++++++++++--- + 1 file changed, 15 insertions(+), 3 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 2f27888d8a..32cfe8f006 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -5170,6 +5170,19 @@ static void spapr_machine_rhel_default_class_options(MachineClass *mc) + mc->max_cpus = 384; + } + ++/* ++ * pseries-rhel8.6.0 ++ * like pseries-6.2 ++ */ ++ ++static void spapr_machine_rhel860_class_options(MachineClass *mc) ++{ ++ /* The default machine type must apply the RHEL specific defaults */ ++ spapr_machine_rhel_default_class_options(mc); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel860, "rhel8.6.0", true); ++ + /* + * pseries-rhel8.5.0 + * like pseries-6.0 +@@ -5179,15 +5192,14 @@ static void spapr_machine_rhel850_class_options(MachineClass *mc) + { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + +- /* The default machine type must apply the RHEL specific defaults */ +- spapr_machine_rhel_default_class_options(mc); ++ spapr_machine_rhel860_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, + hw_compat_rhel_8_5_len); + smc->pre_6_2_numa_affinity = true; + mc->smp_props.prefer_sockets = true; + } + +-DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", true); ++DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", false); + + /* + * pseries-rhel8.4.0 +-- +2.27.0 + diff --git a/SOURCES/kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch b/SOURCES/kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch deleted file mode 100644 index 55be349..0000000 --- a/SOURCES/kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch +++ /dev/null @@ -1,72 +0,0 @@ -From b07219611480dd4a37b2476604a1cec35c812216 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 23 Dec 2020 12:29:24 -0500 -Subject: [PATCH 1/5] redhat: link /etc/qemu-ga/fsfreeze-hook to /etc/qemu-kvm/ -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20201223122924.341944-1-marcandre.lureau@redhat.com> -Patchwork-id: 100496 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH] redhat: link /etc/qemu-ga/fsfreeze-hook to /etc/qemu-kvm/ -Bugzilla: 1910267 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Danilo de Paula - -From: Danilo de Paula - -BZ: 1910267 -BRANCH: rhel-8.4.0 -UPSTREAM: RHEL-only -BREW: 33929331 - -When qemu-ga was introduced to RHEL-8, we used the qemu-guest-agent -from RHEL-7 as base. - -In RHEL-7, qemu-guest-agent is built as standalone package. -It's built as "qemu-ga", hence the "qemu-ga" folders. - -For RHEL-8, that should have been renamed to qemu-kvm, but I missed it. -Renaming those folders to /etc/qemu-kvm is a no go today, because -users might have populated the /etc/qemu-ga/fsfreeze-hook.d folder. - -So, in order to make qemu-ga -F works in RHEL-8, a link is being -created in the expected place, pointing to the real one. - -Also, fsfreeze-hook opens up the fsfreeze-hook.d on the same PATH where -it is stored. However, it doesn't follow symlinks. In order to fix this, -I had to change it to make sure it follows the link. - -An option would be to also link the fsfreeze-hook.d folder, but I choose -not to do so as it creates a permanent/visible change in users -environments. The downside is to keep another downstream-only change. - -Signed-off-by: Danilo C. L. de Paula - -[ cherry-picked from commit 020501879841afb788087f0455df79367c0337a0 ] -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - redhat/qemu-kvm.spec.template | 6 ++++++ - scripts/qemu-guest-agent/fsfreeze-hook | 2 +- - 2 files changed, 7 insertions(+), 1 deletion(-) - - -diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook -index 13aafd48451..e9b84ec0284 100755 ---- a/scripts/qemu-guest-agent/fsfreeze-hook -+++ b/scripts/qemu-guest-agent/fsfreeze-hook -@@ -8,7 +8,7 @@ - # request, it is issued with "thaw" argument after filesystem is thawed. - - LOGFILE=/var/log/qga-fsfreeze-hook.log --FSFREEZE_D=$(dirname -- "$0")/fsfreeze-hook.d -+FSFREEZE_D=$(dirname -- "$(realpath $0)")/fsfreeze-hook.d - - # Check whether file $1 is a backup or rpm-generated file and should be ignored - is_ignored_file() { --- -2.27.0 - diff --git a/SOURCES/kvm-replication-assert-we-own-context-before-job_cancel_.patch b/SOURCES/kvm-replication-assert-we-own-context-before-job_cancel_.patch deleted file mode 100644 index 09ef4de..0000000 --- a/SOURCES/kvm-replication-assert-we-own-context-before-job_cancel_.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 46887feac666d0d7633ff3f5af5721fe2a80a8ab Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 8 Apr 2020 17:29:13 +0100 -Subject: [PATCH 2/6] replication: assert we own context before job_cancel_sync - -RH-Author: Kevin Wolf -Message-id: <20200408172917.18712-3-kwolf@redhat.com> -Patchwork-id: 94595 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/6] replication: assert we own context before job_cancel_sync -Bugzilla: 1817621 -RH-Acked-by: Eric Blake -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -From: Stefan Reiter - -job_cancel_sync requires the job's lock to be held, all other callers -already do this (replication_stop, drive_backup_abort, -blockdev_backup_abort, job_cancel_sync_all, cancel_common). - -In this case we're in a BlockDriver handler, so we already have a lock, -just assert that it is the same as the one used for the commit_job. - -Signed-off-by: Stefan Reiter -Message-Id: <20200407115651.69472-3-s.reiter@proxmox.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 08558e33257ec796594bd411261028a93414a70c) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/replication.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/block/replication.c b/block/replication.c -index 99532ce..0ce27ee 100644 ---- a/block/replication.c -+++ b/block/replication.c -@@ -144,12 +144,15 @@ fail: - static void replication_close(BlockDriverState *bs) - { - BDRVReplicationState *s = bs->opaque; -+ Job *commit_job; - - if (s->stage == BLOCK_REPLICATION_RUNNING) { - replication_stop(s->rs, false, NULL); - } - if (s->stage == BLOCK_REPLICATION_FAILOVER) { -- job_cancel_sync(&s->commit_job->job); -+ commit_job = &s->commit_job->job; -+ assert(commit_job->aio_context == qemu_get_current_aio_context()); -+ job_cancel_sync(commit_job); - } - - if (s->mode == REPLICATION_MODE_SECONDARY) { --- -1.8.3.1 - diff --git a/SOURCES/kvm-rhel-machine-types-x86-set-prefer_sockets.patch b/SOURCES/kvm-rhel-machine-types-x86-set-prefer_sockets.patch new file mode 100644 index 0000000..d7bfc96 --- /dev/null +++ b/SOURCES/kvm-rhel-machine-types-x86-set-prefer_sockets.patch @@ -0,0 +1,52 @@ +From 0f0cbd57a8fe8f463941656f5bc75ae5754c3d2b Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 7 Dec 2021 18:39:47 +0000 +Subject: [PATCH 6/6] rhel machine types/x86: set prefer_sockets + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 96: Fixup x86 prefer_sockets +RH-Commit: [1/1] 29578bcc2f5d3408c155c155cdfa10b7a12faf4d +RH-Bugzilla: 2029582 +RH-Acked-by: Igor Mammedov +RH-Acked-by: quintela1 +RH-Acked-by: Cornelia Huck + +When I fixed up the machine types for 8.5 I missed the + prefer_sockets = true + +add them in; it looks like Power, ARM already have them, and I see them +in thuth's s390 patch. + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/i386/pc_piix.c | 1 + + hw/i386/pc_q35.c | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 37fab00733..c30057c443 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1020,6 +1020,7 @@ static void pc_machine_rhel7_options(MachineClass *m) + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + m->alias = "pc"; + m->is_default = 1; ++ m->smp_props.prefer_sockets = true; + } + + static void pc_init_rhel760(MachineState *machine) +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 78876e1101..f6e77bca0e 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -662,6 +662,7 @@ static void pc_q35_machine_rhel850_options(MachineClass *m) + hw_compat_rhel_8_5_len); + compat_props_add(m->compat_props, pc_rhel_8_5_compat, + pc_rhel_8_5_compat_len); ++ m->smp_props.prefer_sockets = true; + } + + DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850, +-- +2.27.0 + diff --git a/SOURCES/kvm-rtl8139-switch-to-use-qemu_receive_packet-for-loopba.patch b/SOURCES/kvm-rtl8139-switch-to-use-qemu_receive_packet-for-loopba.patch deleted file mode 100644 index 917e3ff..0000000 --- a/SOURCES/kvm-rtl8139-switch-to-use-qemu_receive_packet-for-loopba.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 4079c4e96f910fe7e57af13feb433f06246f1d79 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 29 Jun 2021 03:42:44 -0400 -Subject: [PATCH 6/9] rtl8139: switch to use qemu_receive_packet() for loopback -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20210629034247.3286477-7-jmaloy@redhat.com> -Patchwork-id: 101792 -O-Subject: [RHEL-8.4.0.z qemu-kvm PATCH v2 6/9] rtl8139: switch to use qemu_receive_packet() for loopback -Bugzilla: 1932917 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth - -From: Alexander Bulekov - -This patch switches to use qemu_receive_packet() which can detect -reentrancy and return early. - -This is intended to address CVE-2021-3416. - -Cc: Prasad J Pandit -Cc: qemu-stable@nongnu.org -Buglink: https://bugs.launchpad.net/qemu/+bug/1910826 -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Jason Wang - -(cherry picked from commit 5311fb805a4403bba024e83886fa0e7572265de4) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/rtl8139.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 21d80e96cf..ccb04faa4c 100644 ---- a/hw/net/rtl8139.c -+++ b/hw/net/rtl8139.c -@@ -1793,7 +1793,7 @@ static void rtl8139_transfer_frame(RTL8139State *s, uint8_t *buf, int size, - } - - DPRINTF("+++ transmit loopback mode\n"); -- rtl8139_do_receive(qemu_get_queue(s->nic), buf, size, do_interrupt); -+ qemu_receive_packet(qemu_get_queue(s->nic), buf, size); - - if (iov) { - g_free(buf2); --- -2.27.0 - diff --git a/SOURCES/kvm-s390-guest-support-for-diagnose-0x318.patch b/SOURCES/kvm-s390-guest-support-for-diagnose-0x318.patch deleted file mode 100644 index 84fc7bc..0000000 --- a/SOURCES/kvm-s390-guest-support-for-diagnose-0x318.patch +++ /dev/null @@ -1,282 +0,0 @@ -From 7ad1c4aaea6cd202449c05fc0034af6b108def4f Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Wed, 11 Nov 2020 12:03:14 -0500 -Subject: [PATCH 14/18] s390: guest support for diagnose 0x318 - -RH-Author: Thomas Huth -Message-id: <20201111120316.707489-11-thuth@redhat.com> -Patchwork-id: 99507 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 10/12] s390: guest support for diagnose 0x318 -Bugzilla: 1798506 -RH-Acked-by: Jens Freimann -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Collin Walling - -DIAGNOSE 0x318 (diag318) is an s390 instruction that allows the storage -of diagnostic information that is collected by the firmware in the case -of hardware/firmware service events. - -QEMU handles the instruction by storing the info in the CPU state. A -subsequent register sync will communicate the data to the hypervisor. - -QEMU handles the migration via a VM State Description. - -This feature depends on the Extended-Length SCCB (els) feature. If -els is not present, then a warning will be printed and the SCLP bit -that allows the Linux kernel to execute the instruction will not be -set. - -Availability of this instruction is determined by byte 134 (aka fac134) -bit 0 of the SCLP Read Info block. This coincidentally expands into the -space used for CPU entries, which means VMs running with the diag318 -capability may not be able to read information regarding all CPUs -unless the guest kernel supports an extended-length SCCB. - -This feature is not supported in protected virtualization mode. - -Signed-off-by: Collin Walling -Acked-by: Janosch Frank -Acked-by: Thomas Huth -Acked-by: David Hildenbrand -Acked-by: Claudio Imbrenda -Message-Id: <20200915194416.107460-9-walling@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit fabdada9357b9cfd980c7744ddce47e34600bbef) -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/sclp.c | 5 ++++ - include/hw/s390x/sclp.h | 8 ++++++ - target/s390x/cpu.h | 2 ++ - target/s390x/cpu_features.h | 1 + - target/s390x/cpu_features_def.inc.h | 3 +++ - target/s390x/cpu_models.c | 1 + - target/s390x/gen-features.c | 1 + - target/s390x/kvm.c | 39 +++++++++++++++++++++++++++++ - target/s390x/machine.c | 17 +++++++++++++ - 9 files changed, 77 insertions(+) - -diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c -index 8d111628e04..2931046f456 100644 ---- a/hw/s390x/sclp.c -+++ b/hw/s390x/sclp.c -@@ -139,6 +139,11 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) - s390_get_feat_block(S390_FEAT_TYPE_SCLP_CONF_CHAR_EXT, - read_info->conf_char_ext); - -+ if (s390_has_feat(S390_FEAT_EXTENDED_LENGTH_SCCB)) { -+ s390_get_feat_block(S390_FEAT_TYPE_SCLP_FAC134, -+ &read_info->fac134); -+ } -+ - read_info->facilities = cpu_to_be64(SCLP_HAS_CPU_INFO | - SCLP_HAS_IOA_RECONFIG); - -diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h -index 62e2aa1d9f1..addd904e5f4 100644 ---- a/include/hw/s390x/sclp.h -+++ b/include/hw/s390x/sclp.h -@@ -133,7 +133,15 @@ typedef struct ReadInfo { - uint16_t highest_cpu; - uint8_t _reserved5[124 - 122]; /* 122-123 */ - uint32_t hmfai; -+ uint8_t _reserved7[134 - 128]; /* 128-133 */ -+ uint8_t fac134; -+ uint8_t _reserved8[144 - 135]; /* 135-143 */ - struct CPUEntry entries[]; -+ /* -+ * When the Extended-Length SCCB (ELS) feature is enabled the -+ * start of the entries field begins at an offset denoted by the -+ * offset_cpu field, otherwise it's at an offset of 128. -+ */ - } QEMU_PACKED ReadInfo; - - typedef struct ReadCpuInfo { -diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h -index a48e655c4d4..1dc21cd311d 100644 ---- a/target/s390x/cpu.h -+++ b/target/s390x/cpu.h -@@ -117,6 +117,8 @@ struct CPUS390XState { - uint16_t external_call_addr; - DECLARE_BITMAP(emergency_signals, S390_MAX_CPUS); - -+ uint64_t diag318_info; -+ - /* Fields up to this point are cleared by a CPU reset */ - struct {} end_reset_fields; - -diff --git a/target/s390x/cpu_features.h b/target/s390x/cpu_features.h -index da695a8346e..f74f7fc3a11 100644 ---- a/target/s390x/cpu_features.h -+++ b/target/s390x/cpu_features.h -@@ -23,6 +23,7 @@ typedef enum { - S390_FEAT_TYPE_STFL, - S390_FEAT_TYPE_SCLP_CONF_CHAR, - S390_FEAT_TYPE_SCLP_CONF_CHAR_EXT, -+ S390_FEAT_TYPE_SCLP_FAC134, - S390_FEAT_TYPE_SCLP_CPU, - S390_FEAT_TYPE_MISC, - S390_FEAT_TYPE_PLO, -diff --git a/target/s390x/cpu_features_def.inc.h b/target/s390x/cpu_features_def.inc.h -index 3548d65a69a..cf7e04ee44f 100644 ---- a/target/s390x/cpu_features_def.inc.h -+++ b/target/s390x/cpu_features_def.inc.h -@@ -122,6 +122,9 @@ DEF_FEAT(SIE_CMMA, "cmma", SCLP_CONF_CHAR_EXT, 1, "SIE: Collaborative-memory-man - DEF_FEAT(SIE_PFMFI, "pfmfi", SCLP_CONF_CHAR_EXT, 9, "SIE: PFMF interpretation facility") - DEF_FEAT(SIE_IBS, "ibs", SCLP_CONF_CHAR_EXT, 10, "SIE: Interlock-and-broadcast-suppression facility") - -+/* Features exposed via SCLP SCCB Facilities byte 134 (bit numbers relative to byte-134) */ -+DEF_FEAT(DIAG_318, "diag318", SCLP_FAC134, 0, "Control program name and version codes") -+ - /* Features exposed via SCLP CPU info. */ - DEF_FEAT(SIE_F2, "sief2", SCLP_CPU, 4, "SIE: interception format 2 (Virtual SIE)") - DEF_FEAT(SIE_SKEY, "skey", SCLP_CPU, 5, "SIE: Storage-key facility") -diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index be718220d79..bf6a3faba9e 100644 ---- a/target/s390x/cpu_models.c -+++ b/target/s390x/cpu_models.c -@@ -823,6 +823,7 @@ static void check_consistency(const S390CPUModel *model) - { S390_FEAT_PTFF_STOE, S390_FEAT_MULTIPLE_EPOCH }, - { S390_FEAT_PTFF_STOUE, S390_FEAT_MULTIPLE_EPOCH }, - { S390_FEAT_AP_QUEUE_INTERRUPT_CONTROL, S390_FEAT_AP }, -+ { S390_FEAT_DIAG_318, S390_FEAT_EXTENDED_LENGTH_SCCB }, - }; - int i; - -diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c -index 6857f657fba..a1f0a6f3c6f 100644 ---- a/target/s390x/gen-features.c -+++ b/target/s390x/gen-features.c -@@ -523,6 +523,7 @@ static uint16_t full_GEN12_GA1[] = { - S390_FEAT_AP_FACILITIES_TEST, - S390_FEAT_AP, - S390_FEAT_EXTENDED_LENGTH_SCCB, -+ S390_FEAT_DIAG_318, - }; - - static uint16_t full_GEN12_GA2[] = { -diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index ef437acb5c1..e5e190d21c9 100644 ---- a/target/s390x/kvm.c -+++ b/target/s390x/kvm.c -@@ -105,6 +105,7 @@ - - #define DIAG_TIMEREVENT 0x288 - #define DIAG_IPL 0x308 -+#define DIAG_SET_CONTROL_PROGRAM_CODES 0x318 - #define DIAG_KVM_HYPERCALL 0x500 - #define DIAG_KVM_BREAKPOINT 0x501 - -@@ -602,6 +603,11 @@ int kvm_arch_put_registers(CPUState *cs, int level) - cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_ETOKEN; - } - -+ if (can_sync_regs(cs, KVM_SYNC_DIAG318)) { -+ cs->kvm_run->s.regs.diag318 = env->diag318_info; -+ cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_DIAG318; -+ } -+ - /* Finally the prefix */ - if (can_sync_regs(cs, KVM_SYNC_PREFIX)) { - cs->kvm_run->s.regs.prefix = env->psa; -@@ -741,6 +747,10 @@ int kvm_arch_get_registers(CPUState *cs) - } - } - -+ if (can_sync_regs(cs, KVM_SYNC_DIAG318)) { -+ env->diag318_info = cs->kvm_run->s.regs.diag318; -+ } -+ - return 0; - } - -@@ -1601,6 +1611,27 @@ static int handle_sw_breakpoint(S390CPU *cpu, struct kvm_run *run) - return -ENOENT; - } - -+static void handle_diag_318(S390CPU *cpu, struct kvm_run *run) -+{ -+ uint64_t reg = (run->s390_sieic.ipa & 0x00f0) >> 4; -+ uint64_t diag318_info = run->s.regs.gprs[reg]; -+ -+ /* -+ * DIAG 318 can only be enabled with KVM support. As such, let's -+ * ensure a guest cannot execute this instruction erroneously. -+ */ -+ if (!s390_has_feat(S390_FEAT_DIAG_318)) { -+ kvm_s390_program_interrupt(cpu, PGM_SPECIFICATION); -+ } -+ -+ cpu->env.diag318_info = diag318_info; -+ -+ if (can_sync_regs(CPU(cpu), KVM_SYNC_DIAG318)) { -+ run->s.regs.diag318 = diag318_info; -+ run->kvm_dirty_regs |= KVM_SYNC_DIAG318; -+ } -+} -+ - #define DIAG_KVM_CODE_MASK 0x000000000000ffff - - static int handle_diag(S390CPU *cpu, struct kvm_run *run, uint32_t ipb) -@@ -1620,6 +1651,9 @@ static int handle_diag(S390CPU *cpu, struct kvm_run *run, uint32_t ipb) - case DIAG_IPL: - kvm_handle_diag_308(cpu, run); - break; -+ case DIAG_SET_CONTROL_PROGRAM_CODES: -+ handle_diag_318(cpu, run); -+ break; - case DIAG_KVM_HYPERCALL: - r = handle_hypercall(cpu, run); - break; -@@ -2449,6 +2483,11 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp) - */ - set_bit(S390_FEAT_EXTENDED_LENGTH_SCCB, model->features); - -+ /* DIAGNOSE 0x318 is not supported under protected virtualization */ -+ if (!s390_is_pv() && kvm_check_extension(kvm_state, KVM_CAP_S390_DIAG318)) { -+ set_bit(S390_FEAT_DIAG_318, model->features); -+ } -+ - /* strip of features that are not part of the maximum model */ - bitmap_and(model->features, model->features, model->def->full_feat, - S390_FEAT_MAX); -diff --git a/target/s390x/machine.c b/target/s390x/machine.c -index 549bb6c2808..5b4e82f1ab9 100644 ---- a/target/s390x/machine.c -+++ b/target/s390x/machine.c -@@ -234,6 +234,22 @@ const VMStateDescription vmstate_etoken = { - } - }; - -+static bool diag318_needed(void *opaque) -+{ -+ return s390_has_feat(S390_FEAT_DIAG_318); -+} -+ -+const VMStateDescription vmstate_diag318 = { -+ .name = "cpu/diag318", -+ .version_id = 1, -+ .minimum_version_id = 1, -+ .needed = diag318_needed, -+ .fields = (VMStateField[]) { -+ VMSTATE_UINT64(env.diag318_info, S390CPU), -+ VMSTATE_END_OF_LIST() -+ } -+}; -+ - const VMStateDescription vmstate_s390_cpu = { - .name = "cpu", - .post_load = cpu_post_load, -@@ -270,6 +286,7 @@ const VMStateDescription vmstate_s390_cpu = { - &vmstate_gscb, - &vmstate_bpbc, - &vmstate_etoken, -+ &vmstate_diag318, - NULL - }, - }; --- -2.27.0 - diff --git a/SOURCES/kvm-s390-ipl-fix-off-by-one-in-update_machine_ipl_proper.patch b/SOURCES/kvm-s390-ipl-fix-off-by-one-in-update_machine_ipl_proper.patch deleted file mode 100644 index c45158a..0000000 --- a/SOURCES/kvm-s390-ipl-fix-off-by-one-in-update_machine_ipl_proper.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 1769600e1e3bd5ca48450de8ce8a118bf0af96f3 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:54:00 -0400 -Subject: [PATCH 18/42] s390/ipl: fix off-by-one in - update_machine_ipl_properties() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-19-thuth@redhat.com> -Patchwork-id: 97028 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 18/38] s390/ipl: fix off-by-one in update_machine_ipl_properties() -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Halil Pasic - -In update_machine_ipl_properties() the array ascii_loadparm needs to -hold the 8 char loadparm and a string terminating zero char. - -Let's increase the size of ascii_loadparm accordingly. - -Signed-off-by: Halil Pasic -Fixes: 0a01e082a428 ("s390/ipl: sync back loadparm") -Fixes: Coverity CID 1421966 -Reported-by: Peter Maydell -Message-Id: <20200320143101.41764-1-pasic@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 7722837369eb1c7e808021d79da68afa0c01c26f) -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/ipl.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c -index f25339c503..fa0409dc23 100644 ---- a/hw/s390x/ipl.c -+++ b/hw/s390x/ipl.c -@@ -537,7 +537,7 @@ static void update_machine_ipl_properties(IplParameterBlock *iplb) - /* Sync loadparm */ - if (iplb->flags & DIAG308_FLAGS_LP_VALID) { - uint8_t *ebcdic_loadparm = iplb->loadparm; -- char ascii_loadparm[8]; -+ char ascii_loadparm[9]; - int i; - - for (i = 0; i < 8 && ebcdic_loadparm[i]; i++) { --- -2.27.0 - diff --git a/SOURCES/kvm-s390-ipl-sync-back-loadparm.patch b/SOURCES/kvm-s390-ipl-sync-back-loadparm.patch deleted file mode 100644 index 49f4d3f..0000000 --- a/SOURCES/kvm-s390-ipl-sync-back-loadparm.patch +++ /dev/null @@ -1,91 +0,0 @@ -From 53053ea2e6c757e5d044655c8b61c485e0aad4ed Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:53:59 -0400 -Subject: [PATCH 17/42] s390/ipl: sync back loadparm -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-18-thuth@redhat.com> -Patchwork-id: 97039 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 17/38] s390/ipl: sync back loadparm -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Halil Pasic - -We expose loadparm as a r/w machine property, but if loadparm is set by -the guest via DIAG 308, we don't update the property. Having a -disconnect between the guest view and the QEMU property is not nice in -itself, but things get even worse for SCSI, where under certain -circumstances (see 789b5a401b "s390: Ensure IPL from SCSI works as -expected" for details) we call s390_gen_initial_iplb() on resets -effectively overwriting the guest/user supplied loadparm with the stale -value. - -Signed-off-by: Halil Pasic -Fixes: 7104bae9de ("hw/s390x: provide loadparm property for the machine") -Reported-by: Marc Hartmayer -Reviewed-by: Janosch Frank -Reviewed-by: Viktor Mihajlovski -Tested-by: Marc Hartmayer -Reviewed-by: David Hildenbrand -Message-Id: <20200309133223.100491-1-pasic@linux.ibm.com> -[borntraeger@de.ibm.com: use reverse xmas tree] -Signed-off-by: Christian Borntraeger -(cherry picked from commit 0a01e082a428b921e48b5314881b1f23a7b0fe50) -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/ipl.c | 25 +++++++++++++++++++++++++ - 1 file changed, 25 insertions(+) - -diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c -index 0b7548a549..f25339c503 100644 ---- a/hw/s390x/ipl.c -+++ b/hw/s390x/ipl.c -@@ -529,6 +529,30 @@ static bool is_virtio_scsi_device(IplParameterBlock *iplb) - return is_virtio_ccw_device_of_type(iplb, VIRTIO_ID_SCSI); - } - -+static void update_machine_ipl_properties(IplParameterBlock *iplb) -+{ -+ Object *machine = qdev_get_machine(); -+ Error *err = NULL; -+ -+ /* Sync loadparm */ -+ if (iplb->flags & DIAG308_FLAGS_LP_VALID) { -+ uint8_t *ebcdic_loadparm = iplb->loadparm; -+ char ascii_loadparm[8]; -+ int i; -+ -+ for (i = 0; i < 8 && ebcdic_loadparm[i]; i++) { -+ ascii_loadparm[i] = ebcdic2ascii[(uint8_t) ebcdic_loadparm[i]]; -+ } -+ ascii_loadparm[i] = 0; -+ object_property_set_str(machine, ascii_loadparm, "loadparm", &err); -+ } else { -+ object_property_set_str(machine, "", "loadparm", &err); -+ } -+ if (err) { -+ warn_report_err(err); -+ } -+} -+ - void s390_ipl_update_diag308(IplParameterBlock *iplb) - { - S390IPLState *ipl = get_ipl_device(); -@@ -536,6 +560,7 @@ void s390_ipl_update_diag308(IplParameterBlock *iplb) - ipl->iplb = *iplb; - ipl->iplb_valid = true; - ipl->netboot = is_virtio_net_device(iplb); -+ update_machine_ipl_properties(iplb); - } - - IplParameterBlock *s390_ipl_get_iplb(void) --- -2.27.0 - diff --git a/SOURCES/kvm-s390-kvm-fix-diag318-propagation-and-reset-functiona.patch b/SOURCES/kvm-s390-kvm-fix-diag318-propagation-and-reset-functiona.patch deleted file mode 100644 index f0f25a5..0000000 --- a/SOURCES/kvm-s390-kvm-fix-diag318-propagation-and-reset-functiona.patch +++ /dev/null @@ -1,163 +0,0 @@ -From a0ad4344984c50939be8c99371af0988551fb776 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 20 Nov 2020 11:46:09 -0500 -Subject: [PATCH 17/18] s390/kvm: fix diag318 propagation and reset - functionality - -RH-Author: Thomas Huth -Message-id: <20201120114609.408610-2-thuth@redhat.com> -Patchwork-id: 99787 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] s390/kvm: fix diag318 propagation and reset functionality -Bugzilla: 1659412 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Collin Walling - -The Control Program Name Code (CPNC) portion of the diag318 -info must be set within the SIE block of each VCPU in the -configuration. The handler will iterate through each VCPU -and dirty the diag318_info reg to be synced with KVM on a -subsequent sync_regs call. - -Additionally, the diag318 info resets must be handled via -userspace. As such, QEMU will reset this value for each -VCPU during a modified clear, load normal, and load clear -reset event. - -Fixes: fabdada9357b ("s390: guest support for diagnose 0x318") -Signed-off-by: Collin Walling -Message-Id: <20201113221022.257054-1-walling@linux.ibm.com> -Reviewed-by: Thomas Huth -Reviewed-by: Janosch Frank -Signed-off-by: Cornelia Huck -(cherry picked from commit e2c6cd567422bfa563be026b9741a1854aecdc06) -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/s390-virtio-ccw.c | 4 ++++ - target/s390x/cpu.c | 7 +++++++ - target/s390x/cpu.h | 1 + - target/s390x/kvm-stub.c | 4 ++++ - target/s390x/kvm.c | 22 +++++++++++++++++----- - target/s390x/kvm_s390x.h | 1 + - 6 files changed, 34 insertions(+), 5 deletions(-) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index e6ed13b649a..5905d2b7adc 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -489,6 +489,10 @@ static void s390_machine_reset(MachineState *machine) - default: - g_assert_not_reached(); - } -+ -+ CPU_FOREACH(t) { -+ run_on_cpu(t, s390_do_cpu_set_diag318, RUN_ON_CPU_HOST_ULONG(0)); -+ } - s390_ipl_clear_reset_request(); - } - -diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c -index 371b91b2d72..820cab96e12 100644 ---- a/target/s390x/cpu.c -+++ b/target/s390x/cpu.c -@@ -445,6 +445,13 @@ void s390_enable_css_support(S390CPU *cpu) - kvm_s390_enable_css_support(cpu); - } - } -+ -+void s390_do_cpu_set_diag318(CPUState *cs, run_on_cpu_data arg) -+{ -+ if (kvm_enabled()) { -+ kvm_s390_set_diag318(cs, arg.host_ulong); -+ } -+} - #endif - - static gchar *s390_gdb_arch_name(CPUState *cs) -diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h -index 1dc21cd311d..83a23a11b96 100644 ---- a/target/s390x/cpu.h -+++ b/target/s390x/cpu.h -@@ -774,6 +774,7 @@ int s390_set_memory_limit(uint64_t new_limit, uint64_t *hw_limit); - void s390_set_max_pagesize(uint64_t pagesize, Error **errp); - void s390_cmma_reset(void); - void s390_enable_css_support(S390CPU *cpu); -+void s390_do_cpu_set_diag318(CPUState *cs, run_on_cpu_data arg); - int s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch_id, - int vq, bool assign); - #ifndef CONFIG_USER_ONLY -diff --git a/target/s390x/kvm-stub.c b/target/s390x/kvm-stub.c -index aa185017a2a..9970b5a8c70 100644 ---- a/target/s390x/kvm-stub.c -+++ b/target/s390x/kvm-stub.c -@@ -120,3 +120,7 @@ void kvm_s390_stop_interrupt(S390CPU *cpu) - void kvm_s390_restart_interrupt(S390CPU *cpu) - { - } -+ -+void kvm_s390_set_diag318(CPUState *cs, uint64_t diag318_info) -+{ -+} -diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 6edb52f6d25..8d4406124b9 100644 ---- a/target/s390x/kvm.c -+++ b/target/s390x/kvm.c -@@ -1611,10 +1611,23 @@ static int handle_sw_breakpoint(S390CPU *cpu, struct kvm_run *run) - return -ENOENT; - } - -+void kvm_s390_set_diag318(CPUState *cs, uint64_t diag318_info) -+{ -+ CPUS390XState *env = &S390_CPU(cs)->env; -+ -+ /* Feat bit is set only if KVM supports sync for diag318 */ -+ if (s390_has_feat(S390_FEAT_DIAG_318)) { -+ env->diag318_info = diag318_info; -+ cs->kvm_run->s.regs.diag318 = diag318_info; -+ cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_DIAG318; -+ } -+} -+ - static void handle_diag_318(S390CPU *cpu, struct kvm_run *run) - { - uint64_t reg = (run->s390_sieic.ipa & 0x00f0) >> 4; - uint64_t diag318_info = run->s.regs.gprs[reg]; -+ CPUState *t; - - /* - * DIAG 318 can only be enabled with KVM support. As such, let's -@@ -1622,13 +1635,12 @@ static void handle_diag_318(S390CPU *cpu, struct kvm_run *run) - */ - if (!s390_has_feat(S390_FEAT_DIAG_318)) { - kvm_s390_program_interrupt(cpu, PGM_SPECIFICATION); -+ return; - } - -- cpu->env.diag318_info = diag318_info; -- -- if (can_sync_regs(CPU(cpu), KVM_SYNC_DIAG318)) { -- run->s.regs.diag318 = diag318_info; -- run->kvm_dirty_regs |= KVM_SYNC_DIAG318; -+ CPU_FOREACH(t) { -+ run_on_cpu(t, s390_do_cpu_set_diag318, -+ RUN_ON_CPU_HOST_ULONG(diag318_info)); - } - } - -diff --git a/target/s390x/kvm_s390x.h b/target/s390x/kvm_s390x.h -index 6ab17c81b73..25bbe98b251 100644 ---- a/target/s390x/kvm_s390x.h -+++ b/target/s390x/kvm_s390x.h -@@ -45,5 +45,6 @@ void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp); - void kvm_s390_crypto_reset(void); - void kvm_s390_restart_interrupt(S390CPU *cpu); - void kvm_s390_stop_interrupt(S390CPU *cpu); -+void kvm_s390_set_diag318(CPUState *cs, uint64_t diag318_info); - - #endif /* KVM_S390X_H */ --- -2.27.0 - diff --git a/SOURCES/kvm-s390-sclp-add-extended-length-sccb-support-for-kvm-g.patch b/SOURCES/kvm-s390-sclp-add-extended-length-sccb-support-for-kvm-g.patch deleted file mode 100644 index c05f50c..0000000 --- a/SOURCES/kvm-s390-sclp-add-extended-length-sccb-support-for-kvm-g.patch +++ /dev/null @@ -1,220 +0,0 @@ -From e1a3684f9b08fa9db35331b5c5ad11879f512e90 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Wed, 11 Nov 2020 12:03:11 -0500 -Subject: [PATCH 11/18] s390/sclp: add extended-length sccb support for kvm - guest - -RH-Author: Thomas Huth -Message-id: <20201111120316.707489-8-thuth@redhat.com> -Patchwork-id: 99504 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 07/12] s390/sclp: add extended-length sccb support for kvm guest -Bugzilla: 1798506 -RH-Acked-by: Jens Freimann -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Collin Walling - -As more features and facilities are added to the Read SCP Info (RSCPI) -response, more space is required to store them. The space used to store -these new features intrudes on the space originally used to store CPU -entries. This means as more features and facilities are added to the -RSCPI response, less space can be used to store CPU entries. - -With the Extended-Length SCCB (ELS) facility, a KVM guest can execute -the RSCPI command and determine if the SCCB is large enough to store a -complete reponse. If it is not large enough, then the required length -will be set in the SCCB header. - -The caller of the SCLP command is responsible for creating a -large-enough SCCB to store a complete response. Proper checking should -be in place, and the caller should execute the command once-more with -the large-enough SCCB. - -This facility also enables an extended SCCB for the Read CPU Info -(RCPUI) command. - -When this facility is enabled, the boundary violation response cannot -be a result from the RSCPI, RSCPI Forced, or RCPUI commands. - -In order to tolerate kernels that do not yet have full support for this -feature, a "fixed" offset to the start of the CPU Entries within the -Read SCP Info struct is set to allow for the original 248 max entries -when this feature is disabled. - -Additionally, this is introduced as a CPU feature to protect the guest -from migrating to a machine that does not support storing an extended -SCCB. This could otherwise hinder the VM from being able to read all -available CPU entries after migration (such as during re-ipl). - -Signed-off-by: Collin Walling -Reviewed-by: Thomas Huth -Acked-by: Cornelia Huck -Reviewed-by: Claudio Imbrenda -Message-Id: <20200915194416.107460-7-walling@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 1ecd6078f587cfadda8edc93d45b5072e35f2d17) -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/sclp.c | 43 +++++++++++++++++++++++++---- - include/hw/s390x/sclp.h | 1 + - target/s390x/cpu_features_def.inc.h | 1 + - target/s390x/gen-features.c | 1 + - target/s390x/kvm.c | 8 ++++++ - 5 files changed, 48 insertions(+), 6 deletions(-) - -diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c -index 017989b3888..8d111628e04 100644 ---- a/hw/s390x/sclp.c -+++ b/hw/s390x/sclp.c -@@ -49,13 +49,30 @@ static inline bool sclp_command_code_valid(uint32_t code) - return false; - } - --static bool sccb_verify_boundary(uint64_t sccb_addr, uint16_t sccb_len) -+static bool sccb_verify_boundary(uint64_t sccb_addr, uint16_t sccb_len, -+ uint32_t code) - { - uint64_t sccb_max_addr = sccb_addr + sccb_len - 1; - uint64_t sccb_boundary = (sccb_addr & PAGE_MASK) + PAGE_SIZE; - -- if (sccb_max_addr < sccb_boundary) { -- return true; -+ switch (code & SCLP_CMD_CODE_MASK) { -+ case SCLP_CMDW_READ_SCP_INFO: -+ case SCLP_CMDW_READ_SCP_INFO_FORCED: -+ case SCLP_CMDW_READ_CPU_INFO: -+ /* -+ * An extended-length SCCB is only allowed for Read SCP/CPU Info and -+ * is allowed to exceed the 4k boundary. The respective commands will -+ * set the length field to the required length if an insufficient -+ * SCCB length is provided. -+ */ -+ if (s390_has_feat(S390_FEAT_EXTENDED_LENGTH_SCCB)) { -+ return true; -+ } -+ /* fallthrough */ -+ default: -+ if (sccb_max_addr < sccb_boundary) { -+ return true; -+ } - } - - return false; -@@ -80,6 +97,12 @@ static void prepare_cpu_entries(MachineState *ms, CPUEntry *entry, int *count) - - #define SCCB_REQ_LEN(s, max_cpus) (sizeof(s) + max_cpus * sizeof(CPUEntry)) - -+static inline bool ext_len_sccb_supported(SCCBHeader header) -+{ -+ return s390_has_feat(S390_FEAT_EXTENDED_LENGTH_SCCB) && -+ header.control_mask[2] & SCLP_VARIABLE_LENGTH_RESPONSE; -+} -+ - /* Provide information about the configuration, CPUs and storage */ - static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) - { -@@ -89,10 +112,15 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) - int rnsize, rnmax; - IplParameterBlock *ipib = s390_ipl_get_iplb(); - int required_len = SCCB_REQ_LEN(ReadInfo, machine->possible_cpus->len); -- int offset_cpu = offsetof(ReadInfo, entries); -+ int offset_cpu = s390_has_feat(S390_FEAT_EXTENDED_LENGTH_SCCB) ? -+ offsetof(ReadInfo, entries) : -+ SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET; - CPUEntry *entries_start = (void *)sccb + offset_cpu; - - if (be16_to_cpu(sccb->h.length) < required_len) { -+ if (ext_len_sccb_supported(sccb->h)) { -+ sccb->h.length = cpu_to_be16(required_len); -+ } - sccb->h.response_code = cpu_to_be16(SCLP_RC_INSUFFICIENT_SCCB_LENGTH); - return; - } -@@ -153,6 +181,9 @@ static void sclp_read_cpu_info(SCLPDevice *sclp, SCCB *sccb) - int required_len = SCCB_REQ_LEN(ReadCpuInfo, machine->possible_cpus->len); - - if (be16_to_cpu(sccb->h.length) < required_len) { -+ if (ext_len_sccb_supported(sccb->h)) { -+ sccb->h.length = cpu_to_be16(required_len); -+ } - sccb->h.response_code = cpu_to_be16(SCLP_RC_INSUFFICIENT_SCCB_LENGTH); - return; - } -@@ -249,7 +280,7 @@ int sclp_service_call_protected(CPUS390XState *env, uint64_t sccb, - goto out_write; - } - -- if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb->h.length))) { -+ if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb->h.length), code)) { - work_sccb->h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION); - goto out_write; - } -@@ -302,7 +333,7 @@ int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code) - goto out_write; - } - -- if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb->h.length))) { -+ if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb->h.length), code)) { - work_sccb->h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION); - goto out_write; - } -diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h -index 55f53a46540..df2fa4169b0 100644 ---- a/include/hw/s390x/sclp.h -+++ b/include/hw/s390x/sclp.h -@@ -110,6 +110,7 @@ typedef struct CPUEntry { - uint8_t reserved1; - } QEMU_PACKED CPUEntry; - -+#define SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET 128 - typedef struct ReadInfo { - SCCBHeader h; - uint16_t rnmax; -diff --git a/target/s390x/cpu_features_def.inc.h b/target/s390x/cpu_features_def.inc.h -index 60db28351d0..3548d65a69a 100644 ---- a/target/s390x/cpu_features_def.inc.h -+++ b/target/s390x/cpu_features_def.inc.h -@@ -97,6 +97,7 @@ DEF_FEAT(GUARDED_STORAGE, "gs", STFL, 133, "Guarded-storage facility") - DEF_FEAT(VECTOR_PACKED_DECIMAL, "vxpd", STFL, 134, "Vector packed decimal facility") - DEF_FEAT(VECTOR_ENH, "vxeh", STFL, 135, "Vector enhancements facility") - DEF_FEAT(MULTIPLE_EPOCH, "mepoch", STFL, 139, "Multiple-epoch facility") -+DEF_FEAT(EXTENDED_LENGTH_SCCB, "els", STFL, 140, "Extended-length SCCB facility") - DEF_FEAT(TEST_PENDING_EXT_INTERRUPTION, "tpei", STFL, 144, "Test-pending-external-interruption facility") - DEF_FEAT(INSERT_REFERENCE_BITS_MULT, "irbm", STFL, 145, "Insert-reference-bits-multiple facility") - DEF_FEAT(MSA_EXT_8, "msa8-base", STFL, 146, "Message-security-assist-extension-8 facility (excluding subfunctions)") -diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c -index 8ddeebc5441..6857f657fba 100644 ---- a/target/s390x/gen-features.c -+++ b/target/s390x/gen-features.c -@@ -522,6 +522,7 @@ static uint16_t full_GEN12_GA1[] = { - S390_FEAT_AP_QUEUE_INTERRUPT_CONTROL, - S390_FEAT_AP_FACILITIES_TEST, - S390_FEAT_AP, -+ S390_FEAT_EXTENDED_LENGTH_SCCB, - }; - - static uint16_t full_GEN12_GA2[] = { -diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 0bbf8f81b09..ef437acb5c1 100644 ---- a/target/s390x/kvm.c -+++ b/target/s390x/kvm.c -@@ -2441,6 +2441,14 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp) - KVM_S390_VM_CRYPTO_ENABLE_APIE)) { - set_bit(S390_FEAT_AP, model->features); - } -+ -+ /* -+ * Extended-Length SCCB is handled entirely within QEMU. -+ * For PV guests this is completely fenced by the Ultravisor, as Service -+ * Call error checking and STFLE interpretation are handled via SIE. -+ */ -+ set_bit(S390_FEAT_EXTENDED_LENGTH_SCCB, model->features); -+ - /* strip of features that are not part of the maximum model */ - bitmap_and(model->features, model->features, model->def->full_feat, - S390_FEAT_MAX); --- -2.27.0 - diff --git a/SOURCES/kvm-s390-sclp-check-sccb-len-before-filling-in-data.patch b/SOURCES/kvm-s390-sclp-check-sccb-len-before-filling-in-data.patch deleted file mode 100644 index 6efc35f..0000000 --- a/SOURCES/kvm-s390-sclp-check-sccb-len-before-filling-in-data.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 6cc7c8dd7a6fac493c648c607bec4c38c0b275b6 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Wed, 11 Nov 2020 12:03:09 -0500 -Subject: [PATCH 09/18] s390/sclp: check sccb len before filling in data - -RH-Author: Thomas Huth -Message-id: <20201111120316.707489-6-thuth@redhat.com> -Patchwork-id: 99502 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 05/12] s390/sclp: check sccb len before filling in data -Bugzilla: 1798506 -RH-Acked-by: Jens Freimann -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Collin Walling - -The SCCB must be checked for a sufficient length before it is filled -with any data. If the length is insufficient, then the SCLP command -is suppressed and the proper response code is set in the SCCB header. - -While we're at it, let's cleanup the length check by placing the -calculation inside a macro. - -Fixes: 832be0d8a3bb ("s390x: sclp: Report insufficient SCCB length") -Signed-off-by: Collin Walling -Reviewed-by: Janosch Frank -Reviewed-by: David Hildenbrand -Reviewed-by: Cornelia Huck -Reviewed-by: Thomas Huth -Reviewed-by: Claudio Imbrenda -Message-Id: <20200915194416.107460-5-walling@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 0260b97824495ebfacfa8bbae0be10b0ef986bf6) -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/sclp.c | 26 ++++++++++++++------------ - 1 file changed, 14 insertions(+), 12 deletions(-) - -diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c -index cf1292beb22..2b4c6c5cfad 100644 ---- a/hw/s390x/sclp.c -+++ b/hw/s390x/sclp.c -@@ -78,6 +78,8 @@ static void prepare_cpu_entries(MachineState *ms, CPUEntry *entry, int *count) - } - } - -+#define SCCB_REQ_LEN(s, max_cpus) (sizeof(s) + max_cpus * sizeof(CPUEntry)) -+ - /* Provide information about the configuration, CPUs and storage */ - static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) - { -@@ -86,6 +88,12 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) - int cpu_count; - int rnsize, rnmax; - IplParameterBlock *ipib = s390_ipl_get_iplb(); -+ int required_len = SCCB_REQ_LEN(ReadInfo, machine->possible_cpus->len); -+ -+ if (be16_to_cpu(sccb->h.length) < required_len) { -+ sccb->h.response_code = cpu_to_be16(SCLP_RC_INSUFFICIENT_SCCB_LENGTH); -+ return; -+ } - - /* CPU information */ - prepare_cpu_entries(machine, read_info->entries, &cpu_count); -@@ -95,12 +103,6 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) - - read_info->ibc_val = cpu_to_be32(s390_get_ibc_val()); - -- if (be16_to_cpu(sccb->h.length) < -- (sizeof(ReadInfo) + cpu_count * sizeof(CPUEntry))) { -- sccb->h.response_code = cpu_to_be16(SCLP_RC_INSUFFICIENT_SCCB_LENGTH); -- return; -- } -- - /* Configuration Characteristic (Extension) */ - s390_get_feat_block(S390_FEAT_TYPE_SCLP_CONF_CHAR, - read_info->conf_char); -@@ -146,18 +148,18 @@ static void sclp_read_cpu_info(SCLPDevice *sclp, SCCB *sccb) - MachineState *machine = MACHINE(qdev_get_machine()); - ReadCpuInfo *cpu_info = (ReadCpuInfo *) sccb; - int cpu_count; -+ int required_len = SCCB_REQ_LEN(ReadCpuInfo, machine->possible_cpus->len); -+ -+ if (be16_to_cpu(sccb->h.length) < required_len) { -+ sccb->h.response_code = cpu_to_be16(SCLP_RC_INSUFFICIENT_SCCB_LENGTH); -+ return; -+ } - - prepare_cpu_entries(machine, cpu_info->entries, &cpu_count); - cpu_info->nr_configured = cpu_to_be16(cpu_count); - cpu_info->offset_configured = cpu_to_be16(offsetof(ReadCpuInfo, entries)); - cpu_info->nr_standby = cpu_to_be16(0); - -- if (be16_to_cpu(sccb->h.length) < -- (sizeof(ReadCpuInfo) + cpu_count * sizeof(CPUEntry))) { -- sccb->h.response_code = cpu_to_be16(SCLP_RC_INSUFFICIENT_SCCB_LENGTH); -- return; -- } -- - /* The standby offset is 16-byte for each CPU */ - cpu_info->offset_standby = cpu_to_be16(cpu_info->offset_configured - + cpu_info->nr_configured*sizeof(CPUEntry)); --- -2.27.0 - diff --git a/SOURCES/kvm-s390-sclp-get-machine-once-during-read-scp-cpu-info.patch b/SOURCES/kvm-s390-sclp-get-machine-once-during-read-scp-cpu-info.patch deleted file mode 100644 index 09c72b6..0000000 --- a/SOURCES/kvm-s390-sclp-get-machine-once-during-read-scp-cpu-info.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 44e8cdba29b932ee6fff7a2d00b09e6e78c3a0ef Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Wed, 11 Nov 2020 12:03:06 -0500 -Subject: [PATCH 06/18] s390/sclp: get machine once during read scp/cpu info - -RH-Author: Thomas Huth -Message-id: <20201111120316.707489-3-thuth@redhat.com> -Patchwork-id: 99499 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 02/12] s390/sclp: get machine once during read scp/cpu info -Bugzilla: 1798506 -RH-Acked-by: Jens Freimann -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Collin Walling - -Functions within read scp/cpu info will need access to the machine -state. Let's make a call to retrieve the machine state once and -pass the appropriate data to the respective functions. - -Signed-off-by: Collin Walling -Reviewed-by: David Hildenbrand -Reviewed-by: Thomas Huth -Reviewed-by: Janosch Frank -Reviewed-by: Cornelia Huck -Reviewed-by: Claudio Imbrenda -Message-Id: <20200915194416.107460-2-walling@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 912d70d2755cb9b3144eeed4014580ebc5485ce6) -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/sclp.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c -index d8ae207731f..fe7d0fece80 100644 ---- a/hw/s390x/sclp.c -+++ b/hw/s390x/sclp.c -@@ -49,9 +49,8 @@ static inline bool sclp_command_code_valid(uint32_t code) - return false; - } - --static void prepare_cpu_entries(SCLPDevice *sclp, CPUEntry *entry, int *count) -+static void prepare_cpu_entries(MachineState *ms, CPUEntry *entry, int *count) - { -- MachineState *ms = MACHINE(qdev_get_machine()); - uint8_t features[SCCB_CPU_FEATURE_LEN] = { 0 }; - int i; - -@@ -77,7 +76,7 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) - IplParameterBlock *ipib = s390_ipl_get_iplb(); - - /* CPU information */ -- prepare_cpu_entries(sclp, read_info->entries, &cpu_count); -+ prepare_cpu_entries(machine, read_info->entries, &cpu_count); - read_info->entries_cpu = cpu_to_be16(cpu_count); - read_info->offset_cpu = cpu_to_be16(offsetof(ReadInfo, entries)); - read_info->highest_cpu = cpu_to_be16(machine->smp.max_cpus - 1); -@@ -132,10 +131,11 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) - /* Provide information about the CPU */ - static void sclp_read_cpu_info(SCLPDevice *sclp, SCCB *sccb) - { -+ MachineState *machine = MACHINE(qdev_get_machine()); - ReadCpuInfo *cpu_info = (ReadCpuInfo *) sccb; - int cpu_count; - -- prepare_cpu_entries(sclp, cpu_info->entries, &cpu_count); -+ prepare_cpu_entries(machine, cpu_info->entries, &cpu_count); - cpu_info->nr_configured = cpu_to_be16(cpu_count); - cpu_info->offset_configured = cpu_to_be16(offsetof(ReadCpuInfo, entries)); - cpu_info->nr_standby = cpu_to_be16(0); --- -2.27.0 - diff --git a/SOURCES/kvm-s390-sclp-improve-special-wait-psw-logic.patch b/SOURCES/kvm-s390-sclp-improve-special-wait-psw-logic.patch deleted file mode 100644 index 2040d5c..0000000 --- a/SOURCES/kvm-s390-sclp-improve-special-wait-psw-logic.patch +++ /dev/null @@ -1,52 +0,0 @@ -From cd7da3cf1b19fef0a497fd556562040a85e579a7 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:53:57 -0400 -Subject: [PATCH 15/42] s390/sclp: improve special wait psw logic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-16-thuth@redhat.com> -Patchwork-id: 97037 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 15/38] s390/sclp: improve special wait psw logic -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Christian Borntraeger - -There is a special quiesce PSW that we check for "shutdown". Otherwise disabled -wait is detected as "crashed". Architecturally we must only check PSW bits -116-127. Fix this. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Christian Borntraeger -Message-Id: <1582204582-22995-1-git-send-email-borntraeger@de.ibm.com> -Reviewed-by: David Hildenbrand -Acked-by: Janosch Frank -Signed-off-by: Cornelia Huck -(cherry picked from commit 8b51c0961cc13e55b26bb6665ec3a341abdc7658) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/helper.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/target/s390x/helper.c b/target/s390x/helper.c -index a3a49164e4..6808dfda01 100644 ---- a/target/s390x/helper.c -+++ b/target/s390x/helper.c -@@ -89,7 +89,7 @@ hwaddr s390_cpu_get_phys_addr_debug(CPUState *cs, vaddr vaddr) - static inline bool is_special_wait_psw(uint64_t psw_addr) - { - /* signal quiesce */ -- return psw_addr == 0xfffUL; -+ return (psw_addr & 0xfffUL) == 0xfffUL; - } - - void s390_handle_wait(S390CPU *cpu) --- -2.27.0 - diff --git a/SOURCES/kvm-s390-sclp-read-sccb-from-mem-based-on-provided-lengt.patch b/SOURCES/kvm-s390-sclp-read-sccb-from-mem-based-on-provided-lengt.patch deleted file mode 100644 index adb65c7..0000000 --- a/SOURCES/kvm-s390-sclp-read-sccb-from-mem-based-on-provided-lengt.patch +++ /dev/null @@ -1,170 +0,0 @@ -From 212c129b82f0a53725a4167303de2ee0a865f82d Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Wed, 11 Nov 2020 12:03:08 -0500 -Subject: [PATCH 08/18] s390/sclp: read sccb from mem based on provided length - -RH-Author: Thomas Huth -Message-id: <20201111120316.707489-5-thuth@redhat.com> -Patchwork-id: 99501 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 04/12] s390/sclp: read sccb from mem based on provided length -Bugzilla: 1798506 -RH-Acked-by: Jens Freimann -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Collin Walling - -The header contained within the SCCB passed to the SCLP service call -contains the actual length of the SCCB. Instead of allocating a static -4K size for the work sccb, let's allow for a variable size determined -by the value in the header. The proper checks are already in place to -ensure the SCCB length is sufficent to store a full response and that -the length does not cross any explicitly-set boundaries. - -Signed-off-by: Collin Walling -Reviewed-by: Thomas Huth -Reviewed-by: Claudio Imbrenda -Message-Id: <20200915194416.107460-4-walling@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit c1db53a5910f988eeb32f031c53a50f3373fd824) -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/event-facility.c | 2 +- - hw/s390x/sclp.c | 55 ++++++++++++++++++++++----------------- - include/hw/s390x/sclp.h | 2 +- - 3 files changed, 33 insertions(+), 26 deletions(-) - -diff --git a/hw/s390x/event-facility.c b/hw/s390x/event-facility.c -index 66205697ae7..8aa7017f06b 100644 ---- a/hw/s390x/event-facility.c -+++ b/hw/s390x/event-facility.c -@@ -215,7 +215,7 @@ static uint16_t handle_sccb_read_events(SCLPEventFacility *ef, SCCB *sccb, - - event_buf = &red->ebh; - event_buf->length = 0; -- slen = sizeof(sccb->data); -+ slen = sccb_data_len(sccb); - - rc = SCLP_RC_NO_EVENT_BUFFERS_STORED; - -diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c -index 38278497319..cf1292beb22 100644 ---- a/hw/s390x/sclp.c -+++ b/hw/s390x/sclp.c -@@ -231,25 +231,29 @@ int sclp_service_call_protected(CPUS390XState *env, uint64_t sccb, - { - SCLPDevice *sclp = get_sclp_device(); - SCLPDeviceClass *sclp_c = SCLP_GET_CLASS(sclp); -- SCCB work_sccb; -- hwaddr sccb_len = sizeof(SCCB); -+ SCCBHeader header; -+ g_autofree SCCB *work_sccb = NULL; - -- s390_cpu_pv_mem_read(env_archcpu(env), 0, &work_sccb, sccb_len); -+ s390_cpu_pv_mem_read(env_archcpu(env), 0, &header, sizeof(SCCBHeader)); -+ -+ work_sccb = g_malloc0(be16_to_cpu(header.length)); -+ s390_cpu_pv_mem_read(env_archcpu(env), 0, work_sccb, -+ be16_to_cpu(header.length)); - - if (!sclp_command_code_valid(code)) { -- work_sccb.h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND); -+ work_sccb->h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND); - goto out_write; - } - -- if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb.h.length))) { -- work_sccb.h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION); -+ if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb->h.length))) { -+ work_sccb->h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION); - goto out_write; - } - -- sclp_c->execute(sclp, &work_sccb, code); -+ sclp_c->execute(sclp, work_sccb, code); - out_write: -- s390_cpu_pv_mem_write(env_archcpu(env), 0, &work_sccb, -- be16_to_cpu(work_sccb.h.length)); -+ s390_cpu_pv_mem_write(env_archcpu(env), 0, work_sccb, -+ be16_to_cpu(work_sccb->h.length)); - sclp_c->service_interrupt(sclp, SCLP_PV_DUMMY_ADDR); - return 0; - } -@@ -258,9 +262,8 @@ int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code) - { - SCLPDevice *sclp = get_sclp_device(); - SCLPDeviceClass *sclp_c = SCLP_GET_CLASS(sclp); -- SCCB work_sccb; -- -- hwaddr sccb_len = sizeof(SCCB); -+ SCCBHeader header; -+ g_autofree SCCB *work_sccb = NULL; - - /* first some basic checks on program checks */ - if (env->psw.mask & PSW_MASK_PSTATE) { -@@ -274,32 +277,36 @@ int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code) - return -PGM_SPECIFICATION; - } - -+ /* the header contains the actual length of the sccb */ -+ cpu_physical_memory_read(sccb, &header, sizeof(SCCBHeader)); -+ -+ /* Valid sccb sizes */ -+ if (be16_to_cpu(header.length) < sizeof(SCCBHeader)) { -+ return -PGM_SPECIFICATION; -+ } -+ - /* - * we want to work on a private copy of the sccb, to prevent guests - * from playing dirty tricks by modifying the memory content after - * the host has checked the values - */ -- cpu_physical_memory_read(sccb, &work_sccb, sccb_len); -- -- /* Valid sccb sizes */ -- if (be16_to_cpu(work_sccb.h.length) < sizeof(SCCBHeader)) { -- return -PGM_SPECIFICATION; -- } -+ work_sccb = g_malloc0(be16_to_cpu(header.length)); -+ cpu_physical_memory_read(sccb, work_sccb, be16_to_cpu(header.length)); - - if (!sclp_command_code_valid(code)) { -- work_sccb.h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND); -+ work_sccb->h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND); - goto out_write; - } - -- if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb.h.length))) { -- work_sccb.h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION); -+ if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb->h.length))) { -+ work_sccb->h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION); - goto out_write; - } - -- sclp_c->execute(sclp, &work_sccb, code); -+ sclp_c->execute(sclp, work_sccb, code); - out_write: -- cpu_physical_memory_write(sccb, &work_sccb, -- be16_to_cpu(work_sccb.h.length)); -+ cpu_physical_memory_write(sccb, work_sccb, -+ be16_to_cpu(work_sccb->h.length)); - - sclp_c->service_interrupt(sclp, sccb); - -diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h -index c0a3faa37d7..55f53a46540 100644 ---- a/include/hw/s390x/sclp.h -+++ b/include/hw/s390x/sclp.h -@@ -177,7 +177,7 @@ typedef struct IoaCfgSccb { - - typedef struct SCCB { - SCCBHeader h; -- char data[SCCB_DATA_LEN]; -+ char data[]; - } QEMU_PACKED SCCB; - - #define TYPE_SCLP "sclp" --- -2.27.0 - diff --git a/SOURCES/kvm-s390-sclp-rework-sclp-boundary-checks.patch b/SOURCES/kvm-s390-sclp-rework-sclp-boundary-checks.patch deleted file mode 100644 index 9bb3a55..0000000 --- a/SOURCES/kvm-s390-sclp-rework-sclp-boundary-checks.patch +++ /dev/null @@ -1,80 +0,0 @@ -From bc395a979a00bb3e16f3bd92b5b2006db4a5aee3 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Wed, 11 Nov 2020 12:03:07 -0500 -Subject: [PATCH 07/18] s390/sclp: rework sclp boundary checks - -RH-Author: Thomas Huth -Message-id: <20201111120316.707489-4-thuth@redhat.com> -Patchwork-id: 99500 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 03/12] s390/sclp: rework sclp boundary checks -Bugzilla: 1798506 -RH-Acked-by: Jens Freimann -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Collin Walling - -Rework the SCLP boundary check to account for different SCLP commands -(eventually) allowing different boundary sizes. - -Signed-off-by: Collin Walling -Reviewed-by: Cornelia Huck -Reviewed-by: Thomas Huth -Acked-by: Janosch Frank -Reviewed-by: Claudio Imbrenda -Message-Id: <20200915194416.107460-3-walling@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit db13387ca01a69d870cc16dd232375c2603596f2) -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/sclp.c | 19 ++++++++++++++++++- - 1 file changed, 18 insertions(+), 1 deletion(-) - -diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c -index fe7d0fece80..38278497319 100644 ---- a/hw/s390x/sclp.c -+++ b/hw/s390x/sclp.c -@@ -49,6 +49,18 @@ static inline bool sclp_command_code_valid(uint32_t code) - return false; - } - -+static bool sccb_verify_boundary(uint64_t sccb_addr, uint16_t sccb_len) -+{ -+ uint64_t sccb_max_addr = sccb_addr + sccb_len - 1; -+ uint64_t sccb_boundary = (sccb_addr & PAGE_MASK) + PAGE_SIZE; -+ -+ if (sccb_max_addr < sccb_boundary) { -+ return true; -+ } -+ -+ return false; -+} -+ - static void prepare_cpu_entries(MachineState *ms, CPUEntry *entry, int *count) - { - uint8_t features[SCCB_CPU_FEATURE_LEN] = { 0 }; -@@ -229,6 +241,11 @@ int sclp_service_call_protected(CPUS390XState *env, uint64_t sccb, - goto out_write; - } - -+ if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb.h.length))) { -+ work_sccb.h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION); -+ goto out_write; -+ } -+ - sclp_c->execute(sclp, &work_sccb, code); - out_write: - s390_cpu_pv_mem_write(env_archcpu(env), 0, &work_sccb, -@@ -274,7 +291,7 @@ int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code) - goto out_write; - } - -- if ((sccb + be16_to_cpu(work_sccb.h.length)) > ((sccb & PAGE_MASK) + PAGE_SIZE)) { -+ if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb.h.length))) { - work_sccb.h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION); - goto out_write; - } --- -2.27.0 - diff --git a/SOURCES/kvm-s390-sclp-use-cpu-offset-to-locate-cpu-entries.patch b/SOURCES/kvm-s390-sclp-use-cpu-offset-to-locate-cpu-entries.patch deleted file mode 100644 index cb99830..0000000 --- a/SOURCES/kvm-s390-sclp-use-cpu-offset-to-locate-cpu-entries.patch +++ /dev/null @@ -1,67 +0,0 @@ -From adf66c037e60d66f864960b24c746b767efb10b9 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Wed, 11 Nov 2020 12:03:10 -0500 -Subject: [PATCH 10/18] s390/sclp: use cpu offset to locate cpu entries - -RH-Author: Thomas Huth -Message-id: <20201111120316.707489-7-thuth@redhat.com> -Patchwork-id: 99503 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 06/12] s390/sclp: use cpu offset to locate cpu entries -Bugzilla: 1798506 -RH-Acked-by: Jens Freimann -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Collin Walling - -The start of the CPU entry region in the Read SCP Info response data is -denoted by the offset_cpu field. As such, QEMU needs to begin creating -entries at this address. - -This is in preparation for when Read SCP Info inevitably introduces new -bytes that push the start of the CPUEntry field further away. - -Read CPU Info is unlikely to ever change, so let's not bother -accounting for the offset there. - -Signed-off-by: Collin Walling -Reviewed-by: Thomas Huth -Reviewed-by: Cornelia Huck -Reviewed-by: Claudio Imbrenda -Message-Id: <20200915194416.107460-6-walling@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 1a7a568859473b1cda39a015493c5c82bb200281) -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/sclp.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c -index 2b4c6c5cfad..017989b3888 100644 ---- a/hw/s390x/sclp.c -+++ b/hw/s390x/sclp.c -@@ -89,6 +89,8 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) - int rnsize, rnmax; - IplParameterBlock *ipib = s390_ipl_get_iplb(); - int required_len = SCCB_REQ_LEN(ReadInfo, machine->possible_cpus->len); -+ int offset_cpu = offsetof(ReadInfo, entries); -+ CPUEntry *entries_start = (void *)sccb + offset_cpu; - - if (be16_to_cpu(sccb->h.length) < required_len) { - sccb->h.response_code = cpu_to_be16(SCLP_RC_INSUFFICIENT_SCCB_LENGTH); -@@ -96,9 +98,9 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) - } - - /* CPU information */ -- prepare_cpu_entries(machine, read_info->entries, &cpu_count); -+ prepare_cpu_entries(machine, entries_start, &cpu_count); - read_info->entries_cpu = cpu_to_be16(cpu_count); -- read_info->offset_cpu = cpu_to_be16(offsetof(ReadInfo, entries)); -+ read_info->offset_cpu = cpu_to_be16(offset_cpu); - read_info->highest_cpu = cpu_to_be16(machine->smp.max_cpus - 1); - - read_info->ibc_val = cpu_to_be32(s390_get_ibc_val()); --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-Add-SIDA-memory-ops.patch b/SOURCES/kvm-s390x-Add-SIDA-memory-ops.patch deleted file mode 100644 index 1b566d7..0000000 --- a/SOURCES/kvm-s390x-Add-SIDA-memory-ops.patch +++ /dev/null @@ -1,150 +0,0 @@ -From ebcd74c2267d69fe09ca03cb8bfed7bef5ea3a85 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:54:08 -0400 -Subject: [PATCH 26/42] s390x: Add SIDA memory ops - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-27-thuth@redhat.com> -Patchwork-id: 97033 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 26/38] s390x: Add SIDA memory ops -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -Protected guests save the instruction control blocks in the SIDA -instead of QEMU/KVM directly accessing the guest's memory. - -Let's introduce new functions to access the SIDA. - -The memops for doing so are available with KVM_CAP_S390_PROTECTED, so -let's check for that. - -Signed-off-by: Janosch Frank -Reviewed-by: David Hildenbrand -Reviewed-by: Christian Borntraeger -Reviewed-by: Claudio Imbrenda -Reviewed-by: Cornelia Huck -Message-Id: <20200319131921.2367-8-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 1cca8265499d394d9ed4bfb75bd6e7265b529f89) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/cpu.h | 7 ++++++- - target/s390x/kvm.c | 26 ++++++++++++++++++++++++++ - target/s390x/kvm_s390x.h | 2 ++ - target/s390x/mmu_helper.c | 14 ++++++++++++++ - 4 files changed, 48 insertions(+), 1 deletion(-) - -diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h -index 1ff84e6b3a..edf8391504 100644 ---- a/target/s390x/cpu.h -+++ b/target/s390x/cpu.h -@@ -828,7 +828,12 @@ int s390_cpu_virt_mem_rw(S390CPU *cpu, vaddr laddr, uint8_t ar, void *hostbuf, - #define s390_cpu_virt_mem_check_write(cpu, laddr, ar, len) \ - s390_cpu_virt_mem_rw(cpu, laddr, ar, NULL, len, true) - void s390_cpu_virt_mem_handle_exc(S390CPU *cpu, uintptr_t ra); -- -+int s390_cpu_pv_mem_rw(S390CPU *cpu, unsigned int offset, void *hostbuf, -+ int len, bool is_write); -+#define s390_cpu_pv_mem_read(cpu, offset, dest, len) \ -+ s390_cpu_pv_mem_rw(cpu, offset, dest, len, false) -+#define s390_cpu_pv_mem_write(cpu, offset, dest, len) \ -+ s390_cpu_pv_mem_rw(cpu, offset, dest, len, true) - - /* sigp.c */ - int s390_cpu_restart(S390CPU *cpu); -diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index af50b2c253..f67bb5ce2c 100644 ---- a/target/s390x/kvm.c -+++ b/target/s390x/kvm.c -@@ -154,6 +154,7 @@ static int cap_ri; - static int cap_gs; - static int cap_hpage_1m; - static int cap_vcpu_resets; -+static int cap_protected; - - static int active_cmma; - -@@ -351,6 +352,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) - cap_mem_op = kvm_check_extension(s, KVM_CAP_S390_MEM_OP); - cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ); - cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS); -+ cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED); - - if (!kvm_check_extension(s, KVM_CAP_S390_GMAP) - || !kvm_check_extension(s, KVM_CAP_S390_COW)) { -@@ -848,6 +850,30 @@ int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, void *hostbuf, - return ret; - } - -+int kvm_s390_mem_op_pv(S390CPU *cpu, uint64_t offset, void *hostbuf, -+ int len, bool is_write) -+{ -+ struct kvm_s390_mem_op mem_op = { -+ .sida_offset = offset, -+ .size = len, -+ .op = is_write ? KVM_S390_MEMOP_SIDA_WRITE -+ : KVM_S390_MEMOP_SIDA_READ, -+ .buf = (uint64_t)hostbuf, -+ }; -+ int ret; -+ -+ if (!cap_mem_op || !cap_protected) { -+ return -ENOSYS; -+ } -+ -+ ret = kvm_vcpu_ioctl(CPU(cpu), KVM_S390_MEM_OP, &mem_op); -+ if (ret < 0) { -+ error_report("KVM_S390_MEM_OP failed: %s", strerror(-ret)); -+ abort(); -+ } -+ return ret; -+} -+ - /* - * Legacy layout for s390: - * Older S390 KVM requires the topmost vma of the RAM to be -diff --git a/target/s390x/kvm_s390x.h b/target/s390x/kvm_s390x.h -index dea813f450..6ab17c81b7 100644 ---- a/target/s390x/kvm_s390x.h -+++ b/target/s390x/kvm_s390x.h -@@ -19,6 +19,8 @@ void kvm_s390_vcpu_interrupt(S390CPU *cpu, struct kvm_s390_irq *irq); - void kvm_s390_access_exception(S390CPU *cpu, uint16_t code, uint64_t te_code); - int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, void *hostbuf, - int len, bool is_write); -+int kvm_s390_mem_op_pv(S390CPU *cpu, vaddr addr, void *hostbuf, int len, -+ bool is_write); - void kvm_s390_program_interrupt(S390CPU *cpu, uint16_t code); - int kvm_s390_set_cpu_state(S390CPU *cpu, uint8_t cpu_state); - void kvm_s390_vcpu_interrupt_pre_save(S390CPU *cpu); -diff --git a/target/s390x/mmu_helper.c b/target/s390x/mmu_helper.c -index c9f3f34750..ec8befbdc8 100644 ---- a/target/s390x/mmu_helper.c -+++ b/target/s390x/mmu_helper.c -@@ -474,6 +474,20 @@ static int translate_pages(S390CPU *cpu, vaddr addr, int nr_pages, - return 0; - } - -+int s390_cpu_pv_mem_rw(S390CPU *cpu, unsigned int offset, void *hostbuf, -+ int len, bool is_write) -+{ -+ int ret; -+ -+ if (kvm_enabled()) { -+ ret = kvm_s390_mem_op_pv(cpu, offset, hostbuf, len, is_write); -+ } else { -+ /* Protected Virtualization is a KVM/Hardware only feature */ -+ g_assert_not_reached(); -+ } -+ return ret; -+} -+ - /** - * s390_cpu_virt_mem_rw: - * @laddr: the logical start address --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-Add-missing-vcpu-reset-functions.patch b/SOURCES/kvm-s390x-Add-missing-vcpu-reset-functions.patch deleted file mode 100644 index 9ce071e..0000000 --- a/SOURCES/kvm-s390x-Add-missing-vcpu-reset-functions.patch +++ /dev/null @@ -1,176 +0,0 @@ -From e11643b5363262e9f809762a1f2bb5c4a8f26c2a Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:53:56 -0400 -Subject: [PATCH 14/42] s390x: Add missing vcpu reset functions -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-15-thuth@redhat.com> -Patchwork-id: 97023 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 14/38] s390x: Add missing vcpu reset functions -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -Up to now we only had an ioctl to reset vcpu data QEMU couldn't reach -for the initial reset, which was also called for the clear reset. To -be architecture compliant, we also need to clear local interrupts on a -normal reset. - -Because of this and the upcoming protvirt support we need to add -ioctls for the missing clear and normal resets. - -Signed-off-by: Janosch Frank -Reviewed-by: Thomas Huth -Acked-by: David Hildenbrand -Message-Id: <20200214151636.8764-3-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit b91a03946e0f65ddd22927dd80ca1276bf89c5af) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/cpu.c | 14 ++++++++++++-- - target/s390x/kvm-stub.c | 10 +++++++++- - target/s390x/kvm.c | 42 ++++++++++++++++++++++++++++++++-------- - target/s390x/kvm_s390x.h | 4 +++- - 4 files changed, 58 insertions(+), 12 deletions(-) - -diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c -index e538a4a3e2..c0dd502b84 100644 ---- a/target/s390x/cpu.c -+++ b/target/s390x/cpu.c -@@ -144,8 +144,18 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type) - } - - /* Reset state inside the kernel that we cannot access yet from QEMU. */ -- if (kvm_enabled() && type != S390_CPU_RESET_NORMAL) { -- kvm_s390_reset_vcpu(cpu); -+ if (kvm_enabled()) { -+ switch (type) { -+ case S390_CPU_RESET_CLEAR: -+ kvm_s390_reset_vcpu_clear(cpu); -+ break; -+ case S390_CPU_RESET_INITIAL: -+ kvm_s390_reset_vcpu_initial(cpu); -+ break; -+ case S390_CPU_RESET_NORMAL: -+ kvm_s390_reset_vcpu_normal(cpu); -+ break; -+ } - } - } - -diff --git a/target/s390x/kvm-stub.c b/target/s390x/kvm-stub.c -index 5152e2bdf1..c4cd497f85 100644 ---- a/target/s390x/kvm-stub.c -+++ b/target/s390x/kvm-stub.c -@@ -83,7 +83,15 @@ void kvm_s390_cmma_reset(void) - { - } - --void kvm_s390_reset_vcpu(S390CPU *cpu) -+void kvm_s390_reset_vcpu_initial(S390CPU *cpu) -+{ -+} -+ -+void kvm_s390_reset_vcpu_clear(S390CPU *cpu) -+{ -+} -+ -+void kvm_s390_reset_vcpu_normal(S390CPU *cpu) - { - } - -diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 1c5bc7a2f9..75d82af6fc 100644 ---- a/target/s390x/kvm.c -+++ b/target/s390x/kvm.c -@@ -151,6 +151,7 @@ static int cap_s390_irq; - static int cap_ri; - static int cap_gs; - static int cap_hpage_1m; -+static int cap_vcpu_resets; - - static int active_cmma; - -@@ -342,6 +343,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) - cap_async_pf = kvm_check_extension(s, KVM_CAP_ASYNC_PF); - cap_mem_op = kvm_check_extension(s, KVM_CAP_S390_MEM_OP); - cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ); -+ cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS); - - if (!kvm_check_extension(s, KVM_CAP_S390_GMAP) - || !kvm_check_extension(s, KVM_CAP_S390_COW)) { -@@ -403,17 +405,41 @@ int kvm_arch_destroy_vcpu(CPUState *cs) - return 0; - } - --void kvm_s390_reset_vcpu(S390CPU *cpu) -+static void kvm_s390_reset_vcpu(S390CPU *cpu, unsigned long type) - { - CPUState *cs = CPU(cpu); - -- /* The initial reset call is needed here to reset in-kernel -- * vcpu data that we can't access directly from QEMU -- * (i.e. with older kernels which don't support sync_regs/ONE_REG). -- * Before this ioctl cpu_synchronize_state() is called in common kvm -- * code (kvm-all) */ -- if (kvm_vcpu_ioctl(cs, KVM_S390_INITIAL_RESET, NULL)) { -- error_report("Initial CPU reset failed on CPU %i", cs->cpu_index); -+ /* -+ * The reset call is needed here to reset in-kernel vcpu data that -+ * we can't access directly from QEMU (i.e. with older kernels -+ * which don't support sync_regs/ONE_REG). Before this ioctl -+ * cpu_synchronize_state() is called in common kvm code -+ * (kvm-all). -+ */ -+ if (kvm_vcpu_ioctl(cs, type)) { -+ error_report("CPU reset failed on CPU %i type %lx", -+ cs->cpu_index, type); -+ } -+} -+ -+void kvm_s390_reset_vcpu_initial(S390CPU *cpu) -+{ -+ kvm_s390_reset_vcpu(cpu, KVM_S390_INITIAL_RESET); -+} -+ -+void kvm_s390_reset_vcpu_clear(S390CPU *cpu) -+{ -+ if (cap_vcpu_resets) { -+ kvm_s390_reset_vcpu(cpu, KVM_S390_CLEAR_RESET); -+ } else { -+ kvm_s390_reset_vcpu(cpu, KVM_S390_INITIAL_RESET); -+ } -+} -+ -+void kvm_s390_reset_vcpu_normal(S390CPU *cpu) -+{ -+ if (cap_vcpu_resets) { -+ kvm_s390_reset_vcpu(cpu, KVM_S390_NORMAL_RESET); - } - } - -diff --git a/target/s390x/kvm_s390x.h b/target/s390x/kvm_s390x.h -index caf985955b..0b21789796 100644 ---- a/target/s390x/kvm_s390x.h -+++ b/target/s390x/kvm_s390x.h -@@ -34,7 +34,9 @@ int kvm_s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch, - int vq, bool assign); - int kvm_s390_cmma_active(void); - void kvm_s390_cmma_reset(void); --void kvm_s390_reset_vcpu(S390CPU *cpu); -+void kvm_s390_reset_vcpu_clear(S390CPU *cpu); -+void kvm_s390_reset_vcpu_normal(S390CPU *cpu); -+void kvm_s390_reset_vcpu_initial(S390CPU *cpu); - int kvm_s390_set_mem_limit(uint64_t new_limit, uint64_t *hw_limit); - void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp); - void kvm_s390_crypto_reset(void); --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-Add-unpack-facility-feature-to-GA1.patch b/SOURCES/kvm-s390x-Add-unpack-facility-feature-to-GA1.patch deleted file mode 100644 index 8ffb7b0..0000000 --- a/SOURCES/kvm-s390x-Add-unpack-facility-feature-to-GA1.patch +++ /dev/null @@ -1,76 +0,0 @@ -From ab670456375f0d9b9b2d219fd497d04ec0009e1d Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:54:16 -0400 -Subject: [PATCH 34/42] s390x: Add unpack facility feature to GA1 - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-35-thuth@redhat.com> -Patchwork-id: 97052 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 34/38] s390x: Add unpack facility feature to GA1 -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Christian Borntraeger - -The unpack facility is an indication that diagnose 308 subcodes 8-10 -are available to the guest. That means, that the guest can put itself -into protected mode. - -Once it is in protected mode, the hardware stops any attempt of VM -introspection by the hypervisor. - -Some features are currently not supported in protected mode: - * vfio devices - * Migration - * Huge page backings - -Signed-off-by: Christian Borntraeger -Signed-off-by: Janosch Frank -Reviewed-by: David Hildenbrand -Reviewed-by: Claudio Imbrenda -Reviewed-by: Cornelia Huck -Message-Id: <20200319131921.2367-17-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 572c0826615737f1c095b1b6d9e381ec40f72eb5) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/gen-features.c | 1 + - target/s390x/kvm.c | 8 ++++++++ - 2 files changed, 9 insertions(+) - -diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c -index 6278845b12..8ddeebc544 100644 ---- a/target/s390x/gen-features.c -+++ b/target/s390x/gen-features.c -@@ -562,6 +562,7 @@ static uint16_t full_GEN15_GA1[] = { - S390_FEAT_GROUP_MSA_EXT_9, - S390_FEAT_GROUP_MSA_EXT_9_PCKMO, - S390_FEAT_ETOKEN, -+ S390_FEAT_UNPACK, - }; - - /* Default features (in order of release) -diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 56fe60c49c..84d7cadd09 100644 ---- a/target/s390x/kvm.c -+++ b/target/s390x/kvm.c -@@ -2407,6 +2407,14 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp) - clear_bit(S390_FEAT_BPB, model->features); - } - -+ /* -+ * If we have support for protected virtualization, indicate -+ * the protected virtualization IPL unpack facility. -+ */ -+ if (cap_protected) { -+ set_bit(S390_FEAT_UNPACK, model->features); -+ } -+ - /* We emulate a zPCI bus and AEN, therefore we don't need HW support */ - set_bit(S390_FEAT_ZPCI, model->features); - set_bit(S390_FEAT_ADAPTER_EVENT_NOTIFICATION, model->features); --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-Beautify-diag308-handling.patch b/SOURCES/kvm-s390x-Beautify-diag308-handling.patch deleted file mode 100644 index 2ffe6a3..0000000 --- a/SOURCES/kvm-s390x-Beautify-diag308-handling.patch +++ /dev/null @@ -1,130 +0,0 @@ -From da81f2b579987ea12929f0ec803716bc16a93df7 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:53:49 -0400 -Subject: [PATCH 07/42] s390x: Beautify diag308 handling -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-8-thuth@redhat.com> -Patchwork-id: 97022 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 07/38] s390x: Beautify diag308 handling -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -Let's improve readability by: -* Using constants for the subcodes -* Moving parameter checking into a function -* Removing subcode > 6 check as the default case catches that - -Signed-off-by: Janosch Frank -Reviewed-by: Cornelia Huck -Reviewed-by: Thomas Huth -Reviewed-by: David Hildenbrand -Message-Id: <20191127175046.4911-6-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 0b7fd817e0f383760e37ca9286150d5816cf0594) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/diag.c | 54 +++++++++++++++++++++++++++------------------ - 1 file changed, 32 insertions(+), 22 deletions(-) - -diff --git a/target/s390x/diag.c b/target/s390x/diag.c -index 53c2f81f2a..b5aec06d6b 100644 ---- a/target/s390x/diag.c -+++ b/target/s390x/diag.c -@@ -53,6 +53,29 @@ int handle_diag_288(CPUS390XState *env, uint64_t r1, uint64_t r3) - #define DIAG_308_RC_NO_CONF 0x0102 - #define DIAG_308_RC_INVALID 0x0402 - -+#define DIAG308_RESET_MOD_CLR 0 -+#define DIAG308_RESET_LOAD_NORM 1 -+#define DIAG308_LOAD_CLEAR 3 -+#define DIAG308_LOAD_NORMAL_DUMP 4 -+#define DIAG308_SET 5 -+#define DIAG308_STORE 6 -+ -+static int diag308_parm_check(CPUS390XState *env, uint64_t r1, uint64_t addr, -+ uintptr_t ra, bool write) -+{ -+ if ((r1 & 1) || (addr & ~TARGET_PAGE_MASK)) { -+ s390_program_interrupt(env, PGM_SPECIFICATION, ra); -+ return -1; -+ } -+ if (!address_space_access_valid(&address_space_memory, addr, -+ sizeof(IplParameterBlock), write, -+ MEMTXATTRS_UNSPECIFIED)) { -+ s390_program_interrupt(env, PGM_ADDRESSING, ra); -+ return -1; -+ } -+ return 0; -+} -+ - void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) - { - CPUState *cs = env_cpu(env); -@@ -65,30 +88,24 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) - return; - } - -- if ((subcode & ~0x0ffffULL) || (subcode > 6)) { -+ if (subcode & ~0x0ffffULL) { - s390_program_interrupt(env, PGM_SPECIFICATION, ra); - return; - } - - switch (subcode) { -- case 0: -+ case DIAG308_RESET_MOD_CLR: - s390_ipl_reset_request(cs, S390_RESET_MODIFIED_CLEAR); - break; -- case 1: -+ case DIAG308_RESET_LOAD_NORM: - s390_ipl_reset_request(cs, S390_RESET_LOAD_NORMAL); - break; -- case 3: -+ case DIAG308_LOAD_CLEAR: -+ /* Well we still lack the clearing bit... */ - s390_ipl_reset_request(cs, S390_RESET_REIPL); - break; -- case 5: -- if ((r1 & 1) || (addr & 0x0fffULL)) { -- s390_program_interrupt(env, PGM_SPECIFICATION, ra); -- return; -- } -- if (!address_space_access_valid(&address_space_memory, addr, -- sizeof(IplParameterBlock), false, -- MEMTXATTRS_UNSPECIFIED)) { -- s390_program_interrupt(env, PGM_ADDRESSING, ra); -+ case DIAG308_SET: -+ if (diag308_parm_check(env, r1, addr, ra, false)) { - return; - } - iplb = g_new0(IplParameterBlock, 1); -@@ -110,15 +127,8 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) - out: - g_free(iplb); - return; -- case 6: -- if ((r1 & 1) || (addr & 0x0fffULL)) { -- s390_program_interrupt(env, PGM_SPECIFICATION, ra); -- return; -- } -- if (!address_space_access_valid(&address_space_memory, addr, -- sizeof(IplParameterBlock), true, -- MEMTXATTRS_UNSPECIFIED)) { -- s390_program_interrupt(env, PGM_ADDRESSING, ra); -+ case DIAG308_STORE: -+ if (diag308_parm_check(env, r1, addr, ra, true)) { - return; - } - iplb = s390_ipl_get_iplb(); --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-Don-t-do-a-normal-reset-on-the-initial-cpu.patch b/SOURCES/kvm-s390x-Don-t-do-a-normal-reset-on-the-initial-cpu.patch deleted file mode 100644 index dab8acc..0000000 --- a/SOURCES/kvm-s390x-Don-t-do-a-normal-reset-on-the-initial-cpu.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 511638161566d4944a572a31d787eb27bbc0bc8e Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:53:45 -0400 -Subject: [PATCH 03/42] s390x: Don't do a normal reset on the initial cpu -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-4-thuth@redhat.com> -Patchwork-id: 97017 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 03/38] s390x: Don't do a normal reset on the initial cpu -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -The initiating cpu needs to be reset with an initial reset. While -doing a normal reset followed by a initial reset is not wrong per se, -the Ultravisor will only allow the correct reset to be performed. - -Signed-off-by: Janosch Frank -Reviewed-by: David Hildenbrand -Reviewed-by: Cornelia Huck -Message-Id: <20191127175046.4911-2-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit ec9227339fce99412830d44a37eb0bd2fadd5f75) -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/s390-virtio-ccw.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index c2c83d2fce..4ea01c53c0 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -348,6 +348,9 @@ static void s390_machine_reset(MachineState *machine) - break; - case S390_RESET_LOAD_NORMAL: - CPU_FOREACH(t) { -+ if (t == cs) { -+ continue; -+ } - run_on_cpu(t, s390_do_cpu_reset, RUN_ON_CPU_NULL); - } - subsystem_reset(); --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-Fix-cpu-normal-reset-ri-clearing.patch b/SOURCES/kvm-s390x-Fix-cpu-normal-reset-ri-clearing.patch deleted file mode 100644 index 9b81586..0000000 --- a/SOURCES/kvm-s390x-Fix-cpu-normal-reset-ri-clearing.patch +++ /dev/null @@ -1,101 +0,0 @@ -From bdad28b11e36f657cb8909e7223a7d8fc0948c2e Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:53:51 -0400 -Subject: [PATCH 09/42] s390x: Fix cpu normal reset ri clearing -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-10-thuth@redhat.com> -Patchwork-id: 97029 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 09/38] s390x: Fix cpu normal reset ri clearing -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -As it turns out we need to clear the ri controls and PSW enablement -bit to be architecture compliant. - -Signed-off-by: Janosch Frank -Reviewed-by: Christian Borntraeger -Message-Id: <20191203132813.2734-4-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit e893baee70149896d1e43e341da4d6c614037d5d) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/cpu.c | 7 ++++++- - target/s390x/cpu.h | 7 ++++++- - 2 files changed, 12 insertions(+), 2 deletions(-) - -diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c -index bd39cb54b7..99ea09085a 100644 ---- a/target/s390x/cpu.c -+++ b/target/s390x/cpu.c -@@ -100,7 +100,7 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type) - case S390_CPU_RESET_INITIAL: - /* initial reset does not clear everything! */ - memset(&env->start_initial_reset_fields, 0, -- offsetof(CPUS390XState, end_reset_fields) - -+ offsetof(CPUS390XState, start_normal_reset_fields) - - offsetof(CPUS390XState, start_initial_reset_fields)); - - /* architectured initial value for Breaking-Event-Address register */ -@@ -123,6 +123,11 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type) - &env->fpu_status); - /* fall through */ - case S390_CPU_RESET_NORMAL: -+ env->psw.mask &= ~PSW_MASK_RI; -+ memset(&env->start_normal_reset_fields, 0, -+ offsetof(CPUS390XState, end_reset_fields) - -+ offsetof(CPUS390XState, start_normal_reset_fields)); -+ - env->pfault_token = -1UL; - env->bpbc = false; - break; -diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h -index d2af13b345..7e1c18d596 100644 ---- a/target/s390x/cpu.h -+++ b/target/s390x/cpu.h -@@ -58,7 +58,6 @@ struct CPUS390XState { - */ - uint64_t vregs[32][2] QEMU_ALIGNED(16); /* vector registers */ - uint32_t aregs[16]; /* access registers */ -- uint8_t riccb[64]; /* runtime instrumentation control */ - uint64_t gscb[4]; /* guarded storage control */ - uint64_t etoken; /* etoken */ - uint64_t etoken_extension; /* etoken extension */ -@@ -114,6 +113,10 @@ struct CPUS390XState { - uint64_t gbea; - uint64_t pp; - -+ /* Fields up to this point are not cleared by normal CPU reset */ -+ struct {} start_normal_reset_fields; -+ uint8_t riccb[64]; /* runtime instrumentation control */ -+ - /* Fields up to this point are cleared by a CPU reset */ - struct {} end_reset_fields; - -@@ -252,6 +255,7 @@ extern const VMStateDescription vmstate_s390_cpu; - #undef PSW_SHIFT_ASC - #undef PSW_MASK_CC - #undef PSW_MASK_PM -+#undef PSW_MASK_RI - #undef PSW_SHIFT_MASK_PM - #undef PSW_MASK_64 - #undef PSW_MASK_32 -@@ -273,6 +277,7 @@ extern const VMStateDescription vmstate_s390_cpu; - #define PSW_MASK_CC 0x0000300000000000ULL - #define PSW_MASK_PM 0x00000F0000000000ULL - #define PSW_SHIFT_MASK_PM 40 -+#define PSW_MASK_RI 0x0000008000000000ULL - #define PSW_MASK_64 0x0000000100000000ULL - #define PSW_MASK_32 0x0000000080000000ULL - #define PSW_MASK_ESA_ADDR 0x000000007fffffffULL --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-Move-clear-reset.patch b/SOURCES/kvm-s390x-Move-clear-reset.patch deleted file mode 100644 index 7c1614c..0000000 --- a/SOURCES/kvm-s390x-Move-clear-reset.patch +++ /dev/null @@ -1,146 +0,0 @@ -From f268cc7071ecb4322c03f3183acbcf90421da3c7 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:53:48 -0400 -Subject: [PATCH 06/42] s390x: Move clear reset -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-7-thuth@redhat.com> -Patchwork-id: 97019 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 06/38] s390x: Move clear reset -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -Let's also move the clear reset function into the reset handler. - -Signed-off-by: Janosch Frank -Message-Id: <20191127175046.4911-5-frankja@linux.ibm.com> -Reviewed-by: David Hildenbrand -Reviewed-by: Thomas Huth -Signed-off-by: Cornelia Huck -(cherry picked from commit eb8adcc3e9e3b8405c104ede72cf9f3bb2a5e226) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/cpu-qom.h | 1 + - target/s390x/cpu.c | 58 +++++++++++++----------------------------- - 2 files changed, 18 insertions(+), 41 deletions(-) - -diff --git a/target/s390x/cpu-qom.h b/target/s390x/cpu-qom.h -index 6f0a12042e..dbe5346ec9 100644 ---- a/target/s390x/cpu-qom.h -+++ b/target/s390x/cpu-qom.h -@@ -37,6 +37,7 @@ typedef struct S390CPUDef S390CPUDef; - typedef enum cpu_reset_type { - S390_CPU_RESET_NORMAL, - S390_CPU_RESET_INITIAL, -+ S390_CPU_RESET_CLEAR, - } cpu_reset_type; - - /** -diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c -index ca62fe7685..bd39cb54b7 100644 ---- a/target/s390x/cpu.c -+++ b/target/s390x/cpu.c -@@ -94,6 +94,9 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type) - s390_cpu_set_state(S390_CPU_STATE_STOPPED, cpu); - - switch (type) { -+ case S390_CPU_RESET_CLEAR: -+ memset(env, 0, offsetof(CPUS390XState, start_initial_reset_fields)); -+ /* fall through */ - case S390_CPU_RESET_INITIAL: - /* initial reset does not clear everything! */ - memset(&env->start_initial_reset_fields, 0, -@@ -107,6 +110,14 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type) - env->cregs[0] = CR0_RESET; - env->cregs[14] = CR14_RESET; - -+#if defined(CONFIG_USER_ONLY) -+ /* user mode should always be allowed to use the full FPU */ -+ env->cregs[0] |= CR0_AFP; -+ if (s390_has_feat(S390_FEAT_VECTOR)) { -+ env->cregs[0] |= CR0_VECTOR; -+ } -+#endif -+ - /* tininess for underflow is detected before rounding */ - set_float_detect_tininess(float_tininess_before_rounding, - &env->fpu_status); -@@ -125,46 +136,6 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type) - } - } - --/* CPUClass:reset() */ --static void s390_cpu_full_reset(CPUState *s) --{ -- S390CPU *cpu = S390_CPU(s); -- S390CPUClass *scc = S390_CPU_GET_CLASS(cpu); -- CPUS390XState *env = &cpu->env; -- -- scc->parent_reset(s); -- cpu->env.sigp_order = 0; -- s390_cpu_set_state(S390_CPU_STATE_STOPPED, cpu); -- -- memset(env, 0, offsetof(CPUS390XState, end_reset_fields)); -- -- /* architectured initial values for CR 0 and 14 */ -- env->cregs[0] = CR0_RESET; -- env->cregs[14] = CR14_RESET; -- --#if defined(CONFIG_USER_ONLY) -- /* user mode should always be allowed to use the full FPU */ -- env->cregs[0] |= CR0_AFP; -- if (s390_has_feat(S390_FEAT_VECTOR)) { -- env->cregs[0] |= CR0_VECTOR; -- } --#endif -- -- /* architectured initial value for Breaking-Event-Address register */ -- env->gbea = 1; -- -- env->pfault_token = -1UL; -- -- /* tininess for underflow is detected before rounding */ -- set_float_detect_tininess(float_tininess_before_rounding, -- &env->fpu_status); -- -- /* Reset state inside the kernel that we cannot access yet from QEMU. */ -- if (kvm_enabled()) { -- kvm_s390_reset_vcpu(cpu); -- } --} -- - #if !defined(CONFIG_USER_ONLY) - static void s390_cpu_machine_reset_cb(void *opaque) - { -@@ -456,6 +427,11 @@ static Property s390x_cpu_properties[] = { - DEFINE_PROP_END_OF_LIST() - }; - -+static void s390_cpu_reset_full(CPUState *s) -+{ -+ return s390_cpu_reset(s, S390_CPU_RESET_CLEAR); -+} -+ - static void s390_cpu_class_init(ObjectClass *oc, void *data) - { - S390CPUClass *scc = S390_CPU_CLASS(oc); -@@ -472,7 +448,7 @@ static void s390_cpu_class_init(ObjectClass *oc, void *data) - scc->load_normal = s390_cpu_load_normal; - #endif - scc->reset = s390_cpu_reset; -- cc->reset = s390_cpu_full_reset; -+ cc->reset = s390_cpu_reset_full; - cc->class_by_name = s390_cpu_class_by_name, - cc->has_work = s390_cpu_has_work; - #ifdef CONFIG_TCG --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-Move-diagnose-308-subcodes-and-rcs-into-ipl.h.patch b/SOURCES/kvm-s390x-Move-diagnose-308-subcodes-and-rcs-into-ipl.h.patch deleted file mode 100644 index ac183cf..0000000 --- a/SOURCES/kvm-s390x-Move-diagnose-308-subcodes-and-rcs-into-ipl.h.patch +++ /dev/null @@ -1,83 +0,0 @@ -From c9eee8aeed39976293e0d857039fcf729b821e83 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:54:03 -0400 -Subject: [PATCH 21/42] s390x: Move diagnose 308 subcodes and rcs into ipl.h -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-22-thuth@redhat.com> -Patchwork-id: 97032 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 21/38] s390x: Move diagnose 308 subcodes and rcs into ipl.h -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -They are part of the IPL process, so let's put them into the ipl -header. - -Signed-off-by: Janosch Frank -Reviewed-by: Cornelia Huck -Reviewed-by: Christian Borntraeger -Reviewed-by: David Hildenbrand -Message-Id: <20200319131921.2367-2-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 9b39d29470e9dbef24ee842a44ea56bd92b855ea) -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/ipl.h | 11 +++++++++++ - target/s390x/diag.c | 11 ----------- - 2 files changed, 11 insertions(+), 11 deletions(-) - -diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h -index 3e44abe1c6..a5665e6bfd 100644 ---- a/hw/s390x/ipl.h -+++ b/hw/s390x/ipl.h -@@ -159,6 +159,17 @@ struct S390IPLState { - typedef struct S390IPLState S390IPLState; - QEMU_BUILD_BUG_MSG(offsetof(S390IPLState, iplb) & 3, "alignment of iplb wrong"); - -+#define DIAG_308_RC_OK 0x0001 -+#define DIAG_308_RC_NO_CONF 0x0102 -+#define DIAG_308_RC_INVALID 0x0402 -+ -+#define DIAG308_RESET_MOD_CLR 0 -+#define DIAG308_RESET_LOAD_NORM 1 -+#define DIAG308_LOAD_CLEAR 3 -+#define DIAG308_LOAD_NORMAL_DUMP 4 -+#define DIAG308_SET 5 -+#define DIAG308_STORE 6 -+ - #define S390_IPL_TYPE_FCP 0x00 - #define S390_IPL_TYPE_CCW 0x02 - #define S390_IPL_TYPE_QEMU_SCSI 0xff -diff --git a/target/s390x/diag.c b/target/s390x/diag.c -index 54e5670b3f..8aba6341f9 100644 ---- a/target/s390x/diag.c -+++ b/target/s390x/diag.c -@@ -49,17 +49,6 @@ int handle_diag_288(CPUS390XState *env, uint64_t r1, uint64_t r3) - return diag288_class->handle_timer(diag288, func, timeout); - } - --#define DIAG_308_RC_OK 0x0001 --#define DIAG_308_RC_NO_CONF 0x0102 --#define DIAG_308_RC_INVALID 0x0402 -- --#define DIAG308_RESET_MOD_CLR 0 --#define DIAG308_RESET_LOAD_NORM 1 --#define DIAG308_LOAD_CLEAR 3 --#define DIAG308_LOAD_NORMAL_DUMP 4 --#define DIAG308_SET 5 --#define DIAG308_STORE 6 -- - static int diag308_parm_check(CPUS390XState *env, uint64_t r1, uint64_t addr, - uintptr_t ra, bool write) - { --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-Move-initial-reset.patch b/SOURCES/kvm-s390x-Move-initial-reset.patch deleted file mode 100644 index 0f2e9ab..0000000 --- a/SOURCES/kvm-s390x-Move-initial-reset.patch +++ /dev/null @@ -1,159 +0,0 @@ -From 0d1c0adf25a323be0663863ebe44a6aefb5f7baf Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:53:47 -0400 -Subject: [PATCH 05/42] s390x: Move initial reset -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-6-thuth@redhat.com> -Patchwork-id: 97024 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 05/38] s390x: Move initial reset -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -Let's move the intial reset into the reset handler and cleanup -afterwards. - -Signed-off-by: Janosch Frank -Reviewed-by: David Hildenbrand -Message-Id: <20191128083723.11937-1-frankja@linux.ibm.com> -Reviewed-by: Thomas Huth -Signed-off-by: Cornelia Huck -(cherry picked from commit 81b9222358e5c8f666f0d86057c75e40531d804c) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/cpu-qom.h | 2 +- - target/s390x/cpu.c | 46 +++++++++++++++++------------------------- - target/s390x/cpu.h | 2 +- - target/s390x/sigp.c | 2 +- - 4 files changed, 21 insertions(+), 31 deletions(-) - -diff --git a/target/s390x/cpu-qom.h b/target/s390x/cpu-qom.h -index f3b71bac67..6f0a12042e 100644 ---- a/target/s390x/cpu-qom.h -+++ b/target/s390x/cpu-qom.h -@@ -36,6 +36,7 @@ typedef struct S390CPUDef S390CPUDef; - - typedef enum cpu_reset_type { - S390_CPU_RESET_NORMAL, -+ S390_CPU_RESET_INITIAL, - } cpu_reset_type; - - /** -@@ -62,7 +63,6 @@ typedef struct S390CPUClass { - void (*parent_reset)(CPUState *cpu); - void (*load_normal)(CPUState *cpu); - void (*reset)(CPUState *cpu, cpu_reset_type type); -- void (*initial_cpu_reset)(CPUState *cpu); - } S390CPUClass; - - typedef struct S390CPU S390CPU; -diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c -index 67d6fbfa44..ca62fe7685 100644 ---- a/target/s390x/cpu.c -+++ b/target/s390x/cpu.c -@@ -94,6 +94,23 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type) - s390_cpu_set_state(S390_CPU_STATE_STOPPED, cpu); - - switch (type) { -+ case S390_CPU_RESET_INITIAL: -+ /* initial reset does not clear everything! */ -+ memset(&env->start_initial_reset_fields, 0, -+ offsetof(CPUS390XState, end_reset_fields) - -+ offsetof(CPUS390XState, start_initial_reset_fields)); -+ -+ /* architectured initial value for Breaking-Event-Address register */ -+ env->gbea = 1; -+ -+ /* architectured initial values for CR 0 and 14 */ -+ env->cregs[0] = CR0_RESET; -+ env->cregs[14] = CR14_RESET; -+ -+ /* tininess for underflow is detected before rounding */ -+ set_float_detect_tininess(float_tininess_before_rounding, -+ &env->fpu_status); -+ /* fall through */ - case S390_CPU_RESET_NORMAL: - env->pfault_token = -1UL; - env->bpbc = false; -@@ -101,35 +118,9 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type) - default: - g_assert_not_reached(); - } --} -- --/* S390CPUClass::initial_reset() */ --static void s390_cpu_initial_reset(CPUState *s) --{ -- S390CPU *cpu = S390_CPU(s); -- CPUS390XState *env = &cpu->env; -- -- s390_cpu_reset(s, S390_CPU_RESET_NORMAL); -- /* initial reset does not clear everything! */ -- memset(&env->start_initial_reset_fields, 0, -- offsetof(CPUS390XState, end_reset_fields) - -- offsetof(CPUS390XState, start_initial_reset_fields)); -- -- /* architectured initial values for CR 0 and 14 */ -- env->cregs[0] = CR0_RESET; -- env->cregs[14] = CR14_RESET; -- -- /* architectured initial value for Breaking-Event-Address register */ -- env->gbea = 1; -- -- env->pfault_token = -1UL; -- -- /* tininess for underflow is detected before rounding */ -- set_float_detect_tininess(float_tininess_before_rounding, -- &env->fpu_status); - - /* Reset state inside the kernel that we cannot access yet from QEMU. */ -- if (kvm_enabled()) { -+ if (kvm_enabled() && type != S390_CPU_RESET_NORMAL) { - kvm_s390_reset_vcpu(cpu); - } - } -@@ -481,7 +472,6 @@ static void s390_cpu_class_init(ObjectClass *oc, void *data) - scc->load_normal = s390_cpu_load_normal; - #endif - scc->reset = s390_cpu_reset; -- scc->initial_cpu_reset = s390_cpu_initial_reset; - cc->reset = s390_cpu_full_reset; - cc->class_by_name = s390_cpu_class_by_name, - cc->has_work = s390_cpu_has_work; -diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h -index 18123dfd5b..d2af13b345 100644 ---- a/target/s390x/cpu.h -+++ b/target/s390x/cpu.h -@@ -748,7 +748,7 @@ static inline void s390_do_cpu_initial_reset(CPUState *cs, run_on_cpu_data arg) - { - S390CPUClass *scc = S390_CPU_GET_CLASS(cs); - -- scc->initial_cpu_reset(cs); -+ scc->reset(cs, S390_CPU_RESET_INITIAL); - } - - static inline void s390_do_cpu_load_normal(CPUState *cs, run_on_cpu_data arg) -diff --git a/target/s390x/sigp.c b/target/s390x/sigp.c -index 850139b9cd..727875bb4a 100644 ---- a/target/s390x/sigp.c -+++ b/target/s390x/sigp.c -@@ -254,7 +254,7 @@ static void sigp_initial_cpu_reset(CPUState *cs, run_on_cpu_data arg) - SigpInfo *si = arg.host_ptr; - - cpu_synchronize_state(cs); -- scc->initial_cpu_reset(cs); -+ scc->reset(cs, S390_CPU_RESET_INITIAL); - cpu_synchronize_post_reset(cs); - si->cc = SIGP_CC_ORDER_CODE_ACCEPTED; - } --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-Move-reset-normal-to-shared-reset-handler.patch b/SOURCES/kvm-s390x-Move-reset-normal-to-shared-reset-handler.patch deleted file mode 100644 index 81a4368..0000000 --- a/SOURCES/kvm-s390x-Move-reset-normal-to-shared-reset-handler.patch +++ /dev/null @@ -1,145 +0,0 @@ -From 53b5a7f83f3e6b94c66cbbb97ea42bbf02cb96b4 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:53:46 -0400 -Subject: [PATCH 04/42] s390x: Move reset normal to shared reset handler -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-5-thuth@redhat.com> -Patchwork-id: 97018 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 04/38] s390x: Move reset normal to shared reset handler -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -Let's start moving the cpu reset functions into a single function with -a switch/case, so we can later use fallthroughs and share more code -between resets. - -This patch introduces the reset function by renaming cpu_reset(). - -Signed-off-by: Janosch Frank -Reviewed-by: David Hildenbrand -Message-Id: <20191127175046.4911-3-frankja@linux.ibm.com> -Reviewed-by: Thomas Huth -Signed-off-by: Cornelia Huck -(cherry picked from commit eac4f82791f1807c423e85670837db103b9d59b3) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/cpu-qom.h | 6 +++++- - target/s390x/cpu.c | 19 +++++++++++++------ - target/s390x/cpu.h | 2 +- - target/s390x/sigp.c | 2 +- - 4 files changed, 20 insertions(+), 9 deletions(-) - -diff --git a/target/s390x/cpu-qom.h b/target/s390x/cpu-qom.h -index b809ec8418..f3b71bac67 100644 ---- a/target/s390x/cpu-qom.h -+++ b/target/s390x/cpu-qom.h -@@ -34,6 +34,10 @@ - typedef struct S390CPUModel S390CPUModel; - typedef struct S390CPUDef S390CPUDef; - -+typedef enum cpu_reset_type { -+ S390_CPU_RESET_NORMAL, -+} cpu_reset_type; -+ - /** - * S390CPUClass: - * @parent_realize: The parent class' realize handler. -@@ -57,7 +61,7 @@ typedef struct S390CPUClass { - DeviceRealize parent_realize; - void (*parent_reset)(CPUState *cpu); - void (*load_normal)(CPUState *cpu); -- void (*cpu_reset)(CPUState *cpu); -+ void (*reset)(CPUState *cpu, cpu_reset_type type); - void (*initial_cpu_reset)(CPUState *cpu); - } S390CPUClass; - -diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c -index 3abe7e80fd..67d6fbfa44 100644 ---- a/target/s390x/cpu.c -+++ b/target/s390x/cpu.c -@@ -82,18 +82,25 @@ static void s390_cpu_load_normal(CPUState *s) - } - #endif - --/* S390CPUClass::cpu_reset() */ --static void s390_cpu_reset(CPUState *s) -+/* S390CPUClass::reset() */ -+static void s390_cpu_reset(CPUState *s, cpu_reset_type type) - { - S390CPU *cpu = S390_CPU(s); - S390CPUClass *scc = S390_CPU_GET_CLASS(cpu); - CPUS390XState *env = &cpu->env; - -- env->pfault_token = -1UL; -- env->bpbc = false; - scc->parent_reset(s); - cpu->env.sigp_order = 0; - s390_cpu_set_state(S390_CPU_STATE_STOPPED, cpu); -+ -+ switch (type) { -+ case S390_CPU_RESET_NORMAL: -+ env->pfault_token = -1UL; -+ env->bpbc = false; -+ break; -+ default: -+ g_assert_not_reached(); -+ } - } - - /* S390CPUClass::initial_reset() */ -@@ -102,7 +109,7 @@ static void s390_cpu_initial_reset(CPUState *s) - S390CPU *cpu = S390_CPU(s); - CPUS390XState *env = &cpu->env; - -- s390_cpu_reset(s); -+ s390_cpu_reset(s, S390_CPU_RESET_NORMAL); - /* initial reset does not clear everything! */ - memset(&env->start_initial_reset_fields, 0, - offsetof(CPUS390XState, end_reset_fields) - -@@ -473,7 +480,7 @@ static void s390_cpu_class_init(ObjectClass *oc, void *data) - #if !defined(CONFIG_USER_ONLY) - scc->load_normal = s390_cpu_load_normal; - #endif -- scc->cpu_reset = s390_cpu_reset; -+ scc->reset = s390_cpu_reset; - scc->initial_cpu_reset = s390_cpu_initial_reset; - cc->reset = s390_cpu_full_reset; - cc->class_by_name = s390_cpu_class_by_name, -diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h -index 17460ed7b3..18123dfd5b 100644 ---- a/target/s390x/cpu.h -+++ b/target/s390x/cpu.h -@@ -741,7 +741,7 @@ static inline void s390_do_cpu_reset(CPUState *cs, run_on_cpu_data arg) - { - S390CPUClass *scc = S390_CPU_GET_CLASS(cs); - -- scc->cpu_reset(cs); -+ scc->reset(cs, S390_CPU_RESET_NORMAL); - } - - static inline void s390_do_cpu_initial_reset(CPUState *cs, run_on_cpu_data arg) -diff --git a/target/s390x/sigp.c b/target/s390x/sigp.c -index 2ce22d4dc1..850139b9cd 100644 ---- a/target/s390x/sigp.c -+++ b/target/s390x/sigp.c -@@ -266,7 +266,7 @@ static void sigp_cpu_reset(CPUState *cs, run_on_cpu_data arg) - SigpInfo *si = arg.host_ptr; - - cpu_synchronize_state(cs); -- scc->cpu_reset(cs); -+ scc->reset(cs, S390_CPU_RESET_NORMAL); - cpu_synchronize_post_reset(cs); - si->cc = SIGP_CC_ORDER_CODE_ACCEPTED; - } --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-Properly-fetch-and-test-the-short-psw-on-diag3.patch b/SOURCES/kvm-s390x-Properly-fetch-and-test-the-short-psw-on-diag3.patch deleted file mode 100644 index 9447240..0000000 --- a/SOURCES/kvm-s390x-Properly-fetch-and-test-the-short-psw-on-diag3.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 7171a794e8a7d91805516174187addc3b8e6b423 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:53:54 -0400 -Subject: [PATCH 12/42] s390x: Properly fetch and test the short psw on diag308 - subc 0/1 - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-13-thuth@redhat.com> -Patchwork-id: 97025 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 12/38] s390x: Properly fetch and test the short psw on diag308 subc 0/1 -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -We need to actually fetch the cpu mask and set it. As we invert the -short psw indication in the mask, SIE will report a specification -exception, if it wasn't present in the reset psw. - -Signed-off-by: Janosch Frank -Reviewed-by: David Hildenbrand -Message-Id: <20191129142025.21453-2-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 104130cb7c106378dab944397c6a455c4a6d552f) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/cpu.c | 12 ++++++++++-- - target/s390x/cpu.h | 1 + - 2 files changed, 11 insertions(+), 2 deletions(-) - -diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c -index 99ea09085a..625daeedd1 100644 ---- a/target/s390x/cpu.c -+++ b/target/s390x/cpu.c -@@ -76,8 +76,16 @@ static bool s390_cpu_has_work(CPUState *cs) - static void s390_cpu_load_normal(CPUState *s) - { - S390CPU *cpu = S390_CPU(s); -- cpu->env.psw.addr = ldl_phys(s->as, 4) & PSW_MASK_ESA_ADDR; -- cpu->env.psw.mask = PSW_MASK_32 | PSW_MASK_64; -+ uint64_t spsw = ldq_phys(s->as, 0); -+ -+ cpu->env.psw.mask = spsw & 0xffffffff80000000ULL; -+ /* -+ * Invert short psw indication, so SIE will report a specification -+ * exception if it was not set. -+ */ -+ cpu->env.psw.mask ^= PSW_MASK_SHORTPSW; -+ cpu->env.psw.addr = spsw & 0x7fffffffULL; -+ - s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu); - } - #endif -diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h -index 7e1c18d596..7f5fa1d35b 100644 ---- a/target/s390x/cpu.h -+++ b/target/s390x/cpu.h -@@ -269,6 +269,7 @@ extern const VMStateDescription vmstate_s390_cpu; - #define PSW_MASK_EXT 0x0100000000000000ULL - #define PSW_MASK_KEY 0x00F0000000000000ULL - #define PSW_SHIFT_KEY 52 -+#define PSW_MASK_SHORTPSW 0x0008000000000000ULL - #define PSW_MASK_MCHECK 0x0004000000000000ULL - #define PSW_MASK_WAIT 0x0002000000000000ULL - #define PSW_MASK_PSTATE 0x0001000000000000ULL --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-Rename-and-use-constants-for-short-PSW-address.patch b/SOURCES/kvm-s390x-Rename-and-use-constants-for-short-PSW-address.patch deleted file mode 100644 index b1c7e01..0000000 --- a/SOURCES/kvm-s390x-Rename-and-use-constants-for-short-PSW-address.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 4bd5ae889376816238ecad1bce054b0e198cde2b Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:53:55 -0400 -Subject: [PATCH 13/42] s390x: Rename and use constants for short PSW address - and mask - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-14-thuth@redhat.com> -Patchwork-id: 97050 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 13/38] s390x: Rename and use constants for short PSW address and mask -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -Let's rename PSW_MASK_ESA_ADDR to PSW_MASK_SHORT_ADDR because we're -not working with a ESA PSW which would not support the extended -addressing bit. Also let's actually use it. - -Additionally we introduce PSW_MASK_SHORT_CTRL and use it throughout -the codebase. - -Signed-off-by: Janosch Frank -Reviewed-by: Christian Borntraeger -Reviewed-by: David Hildenbrand -Message-Id: <20200227092341.38558-1-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit b6c2dbd7214b0b2396e1dcf9668c8b48ab571115) -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/ipl.c | 2 +- - target/s390x/cpu.c | 4 ++-- - target/s390x/cpu.h | 3 ++- - 3 files changed, 5 insertions(+), 4 deletions(-) - -diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c -index ca544d64c5..0b7548a549 100644 ---- a/hw/s390x/ipl.c -+++ b/hw/s390x/ipl.c -@@ -179,7 +179,7 @@ static void s390_ipl_realize(DeviceState *dev, Error **errp) - /* if not Linux load the address of the (short) IPL PSW */ - ipl_psw = rom_ptr(4, 4); - if (ipl_psw) { -- pentry = be32_to_cpu(*ipl_psw) & 0x7fffffffUL; -+ pentry = be32_to_cpu(*ipl_psw) & PSW_MASK_SHORT_ADDR; - } else { - error_setg(&err, "Could not get IPL PSW"); - goto error; -diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c -index 625daeedd1..e538a4a3e2 100644 ---- a/target/s390x/cpu.c -+++ b/target/s390x/cpu.c -@@ -78,13 +78,13 @@ static void s390_cpu_load_normal(CPUState *s) - S390CPU *cpu = S390_CPU(s); - uint64_t spsw = ldq_phys(s->as, 0); - -- cpu->env.psw.mask = spsw & 0xffffffff80000000ULL; -+ cpu->env.psw.mask = spsw & PSW_MASK_SHORT_CTRL; - /* - * Invert short psw indication, so SIE will report a specification - * exception if it was not set. - */ - cpu->env.psw.mask ^= PSW_MASK_SHORTPSW; -- cpu->env.psw.addr = spsw & 0x7fffffffULL; -+ cpu->env.psw.addr = spsw & PSW_MASK_SHORT_ADDR; - - s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu); - } -diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h -index 7f5fa1d35b..1ff84e6b3a 100644 ---- a/target/s390x/cpu.h -+++ b/target/s390x/cpu.h -@@ -281,7 +281,8 @@ extern const VMStateDescription vmstate_s390_cpu; - #define PSW_MASK_RI 0x0000008000000000ULL - #define PSW_MASK_64 0x0000000100000000ULL - #define PSW_MASK_32 0x0000000080000000ULL --#define PSW_MASK_ESA_ADDR 0x000000007fffffffULL -+#define PSW_MASK_SHORT_ADDR 0x000000007fffffffULL -+#define PSW_MASK_SHORT_CTRL 0xffffffff80000000ULL - - #undef PSW_ASC_PRIMARY - #undef PSW_ASC_ACCREG --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-css-Refactor-the-css_queue_crw-routine.patch b/SOURCES/kvm-s390x-css-Refactor-the-css_queue_crw-routine.patch deleted file mode 100644 index 8ce7625..0000000 --- a/SOURCES/kvm-s390x-css-Refactor-the-css_queue_crw-routine.patch +++ /dev/null @@ -1,119 +0,0 @@ -From 04d4e7eda95316b64ea9dc0f4ca8801d531652e7 Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Tue, 23 Jun 2020 09:25:41 -0400 -Subject: [PATCH 07/12] s390x/css: Refactor the css_queue_crw() routine - -RH-Author: Cornelia Huck -Message-id: <20200623092543.358315-8-cohuck@redhat.com> -Patchwork-id: 97700 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 7/9] s390x/css: Refactor the css_queue_crw() routine -Bugzilla: 1660916 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: David Hildenbrand -RH-Acked-by: Thomas Huth - -From: Eric Farman - -We have a use case (vfio-ccw) where a CRW is already built and -ready to use. Rather than teasing out the components just to -reassemble it later, let's rework this code so we can queue a -fully-qualified CRW directly. - -Signed-off-by: Eric Farman -Reviewed-by: Cornelia Huck -Message-Id: <20200505125757.98209-6-farman@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit f6dde1b012e678aa64339520ef7519ec04026cf1) -Signed-off-by: Cornelia Huck -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/css.c | 44 ++++++++++++++++++++++++++++-------------- - include/hw/s390x/css.h | 1 + - 2 files changed, 30 insertions(+), 15 deletions(-) - -diff --git a/hw/s390x/css.c b/hw/s390x/css.c -index 71fd3f9a00..a8de8a0c84 100644 ---- a/hw/s390x/css.c -+++ b/hw/s390x/css.c -@@ -2170,30 +2170,23 @@ void css_subch_assign(uint8_t cssid, uint8_t ssid, uint16_t schid, - } - } - --void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited, -- int chain, uint16_t rsid) -+void css_crw_add_to_queue(CRW crw) - { - CrwContainer *crw_cont; - -- trace_css_crw(rsc, erc, rsid, chain ? "(chained)" : ""); -+ trace_css_crw((crw.flags & CRW_FLAGS_MASK_RSC) >> 8, -+ crw.flags & CRW_FLAGS_MASK_ERC, -+ crw.rsid, -+ (crw.flags & CRW_FLAGS_MASK_C) ? "(chained)" : ""); -+ - /* TODO: Maybe use a static crw pool? */ - crw_cont = g_try_new0(CrwContainer, 1); - if (!crw_cont) { - channel_subsys.crws_lost = true; - return; - } -- crw_cont->crw.flags = (rsc << 8) | erc; -- if (solicited) { -- crw_cont->crw.flags |= CRW_FLAGS_MASK_S; -- } -- if (chain) { -- crw_cont->crw.flags |= CRW_FLAGS_MASK_C; -- } -- crw_cont->crw.rsid = rsid; -- if (channel_subsys.crws_lost) { -- crw_cont->crw.flags |= CRW_FLAGS_MASK_R; -- channel_subsys.crws_lost = false; -- } -+ -+ crw_cont->crw = crw; - - QTAILQ_INSERT_TAIL(&channel_subsys.pending_crws, crw_cont, sibling); - -@@ -2204,6 +2197,27 @@ void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited, - } - } - -+void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited, -+ int chain, uint16_t rsid) -+{ -+ CRW crw; -+ -+ crw.flags = (rsc << 8) | erc; -+ if (solicited) { -+ crw.flags |= CRW_FLAGS_MASK_S; -+ } -+ if (chain) { -+ crw.flags |= CRW_FLAGS_MASK_C; -+ } -+ crw.rsid = rsid; -+ if (channel_subsys.crws_lost) { -+ crw.flags |= CRW_FLAGS_MASK_R; -+ channel_subsys.crws_lost = false; -+ } -+ -+ css_crw_add_to_queue(crw); -+} -+ - void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid, - int hotplugged, int add) - { -diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h -index 7e3a5e7433..08c869ab0a 100644 ---- a/include/hw/s390x/css.h -+++ b/include/hw/s390x/css.h -@@ -205,6 +205,7 @@ void copy_scsw_to_guest(SCSW *dest, const SCSW *src); - void css_inject_io_interrupt(SubchDev *sch); - void css_reset(void); - void css_reset_sch(SubchDev *sch); -+void css_crw_add_to_queue(CRW crw); - void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited, - int chain, uint16_t rsid); - void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid, --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-fix-build-for-without-default-devices.patch b/SOURCES/kvm-s390x-fix-build-for-without-default-devices.patch deleted file mode 100644 index 6567c04..0000000 --- a/SOURCES/kvm-s390x-fix-build-for-without-default-devices.patch +++ /dev/null @@ -1,74 +0,0 @@ -From d86158eeb752242791e3f94172ed020204040250 Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Tue, 19 Jan 2021 12:50:46 -0500 -Subject: [PATCH 7/7] s390x: fix build for --without-default-devices -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cornelia Huck -Message-id: <20210119125046.472811-8-cohuck@redhat.com> -Patchwork-id: 100681 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 7/7] s390x: fix build for --without-default-devices -Bugzilla: 1905391 -RH-Acked-by: David Hildenbrand -RH-Acked-by: Auger Eric -RH-Acked-by: Thomas Huth - -s390-pci-vfio.c calls into the vfio code, so we need it to be -built conditionally on vfio (which implies CONFIG_LINUX). - -Fixes: cd7498d07fbb ("s390x/pci: Add routine to get the vfio dma available count") -Reported-by: Philippe Mathieu-Daudé -Tested-by: Philippe Mathieu-Daudé -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Matthew Rosato -Message-Id: <20201103123237.718242-1-cohuck@redhat.com> -Acked-by: Greg Kurz -Tested-by: Greg Kurz -Signed-off-by: Cornelia Huck -(cherry picked from commit 77280d33bc9cfdbfb5b5d462259d644f5aefe9b3) -Signed-off-by: Cornelia Huck - - Conflicts: - hw/s390x/meson.build - include/hw/s390x/s390-pci-vfio.h - --> adaptions due to missing Meson rework - -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/Makefile.objs | 2 +- - include/hw/s390x/s390-pci-vfio.h | 3 ++- - 2 files changed, 3 insertions(+), 2 deletions(-) - -diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs -index 43756c9437d..dbef4b8906c 100644 ---- a/hw/s390x/Makefile.objs -+++ b/hw/s390x/Makefile.objs -@@ -7,7 +7,7 @@ obj-y += ipl.o - obj-y += css.o - obj-$(CONFIG_S390_CCW_VIRTIO) += s390-virtio-ccw.o - obj-$(CONFIG_TERMINAL3270) += 3270-ccw.o --obj-$(CONFIG_LINUX) += s390-pci-vfio.o -+obj-$(CONFIG_VFIO) += s390-pci-vfio.o - ifeq ($(CONFIG_VIRTIO_CCW),y) - obj-y += virtio-ccw.o - obj-$(CONFIG_VIRTIO_SERIAL) += virtio-ccw-serial.o -diff --git a/include/hw/s390x/s390-pci-vfio.h b/include/hw/s390x/s390-pci-vfio.h -index 539bcf04eb5..685b136d46b 100644 ---- a/include/hw/s390x/s390-pci-vfio.h -+++ b/include/hw/s390x/s390-pci-vfio.h -@@ -13,8 +13,9 @@ - #define HW_S390_PCI_VFIO_H - - #include "hw/s390x/s390-pci-bus.h" -+#include "config-devices.h" - --#ifdef CONFIG_LINUX -+#ifdef CONFIG_VFIO - bool s390_pci_update_dma_avail(int fd, unsigned int *avail); - S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, - S390PCIBusDevice *pbdev); --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-ipl-Consolidate-iplb-validity-check-into-one-f.patch b/SOURCES/kvm-s390x-ipl-Consolidate-iplb-validity-check-into-one-f.patch deleted file mode 100644 index 8b9294e..0000000 --- a/SOURCES/kvm-s390x-ipl-Consolidate-iplb-validity-check-into-one-f.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 536b6081c0739bebbb33583370f62116d0cb42da Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:54:01 -0400 -Subject: [PATCH 19/42] s390x: ipl: Consolidate iplb validity check into one - function -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-20-thuth@redhat.com> -Patchwork-id: 97038 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 19/38] s390x: ipl: Consolidate iplb validity check into one function -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -It's nicer to just call one function than calling a function for each -possible iplb type. - -Signed-off-by: Janosch Frank -Reviewed-by: David Hildenbrand -Message-Id: <20200310090950.61172-1-frankja@linux.ibm.com> -Reviewed-by: Christian Borntraeger -Signed-off-by: Christian Borntraeger -(cherry picked from commit 94c21436e5a89143f8b9cb4d089d1a2f3f4fd377) -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/ipl.h | 18 +++++++++--------- - target/s390x/diag.c | 2 +- - 2 files changed, 10 insertions(+), 10 deletions(-) - -diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h -index d4813105db..3e44abe1c6 100644 ---- a/hw/s390x/ipl.h -+++ b/hw/s390x/ipl.h -@@ -173,16 +173,16 @@ static inline bool iplb_valid_len(IplParameterBlock *iplb) - return be32_to_cpu(iplb->len) <= sizeof(IplParameterBlock); - } - --static inline bool iplb_valid_ccw(IplParameterBlock *iplb) -+static inline bool iplb_valid(IplParameterBlock *iplb) - { -- return be32_to_cpu(iplb->len) >= S390_IPLB_MIN_CCW_LEN && -- iplb->pbt == S390_IPL_TYPE_CCW; --} -- --static inline bool iplb_valid_fcp(IplParameterBlock *iplb) --{ -- return be32_to_cpu(iplb->len) >= S390_IPLB_MIN_FCP_LEN && -- iplb->pbt == S390_IPL_TYPE_FCP; -+ switch (iplb->pbt) { -+ case S390_IPL_TYPE_FCP: -+ return be32_to_cpu(iplb->len) >= S390_IPLB_MIN_FCP_LEN; -+ case S390_IPL_TYPE_CCW: -+ return be32_to_cpu(iplb->len) >= S390_IPLB_MIN_CCW_LEN; -+ default: -+ return false; -+ } - } - - #endif -diff --git a/target/s390x/diag.c b/target/s390x/diag.c -index b5aec06d6b..54e5670b3f 100644 ---- a/target/s390x/diag.c -+++ b/target/s390x/diag.c -@@ -117,7 +117,7 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) - - cpu_physical_memory_read(addr, iplb, be32_to_cpu(iplb->len)); - -- if (!iplb_valid_ccw(iplb) && !iplb_valid_fcp(iplb)) { -+ if (!iplb_valid(iplb)) { - env->regs[r1 + 1] = DIAG_308_RC_INVALID; - goto out; - } --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-kvm-Make-kvm_sclp_service_call-void.patch b/SOURCES/kvm-s390x-kvm-Make-kvm_sclp_service_call-void.patch deleted file mode 100644 index 9882324..0000000 --- a/SOURCES/kvm-s390x-kvm-Make-kvm_sclp_service_call-void.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 999cf62d870ff9aa8e9609fcbbcefef9ae1aceb6 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:53:50 -0400 -Subject: [PATCH 08/42] s390x: kvm: Make kvm_sclp_service_call void -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-9-thuth@redhat.com> -Patchwork-id: 97030 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 08/38] s390x: kvm: Make kvm_sclp_service_call void -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -It defaults to returning 0 anyway and that return value is not -necessary, as 0 is also the default rc that the caller would return. - -While doing that we can simplify the logic a bit and return early if -we inject a PGM exception. - -Signed-off-by: Janosch Frank -Reviewed-by: Thomas Huth -Message-Id: <20191129091713.4582-1-frankja@linux.ibm.com> -Reviewed-by: David Hildenbrand -Signed-off-by: Cornelia Huck -(cherry picked from commit 15b6c0370c3e2774fd9ffda5c10c6e36952e8eb6) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/kvm.c | 12 +++++------- - 1 file changed, 5 insertions(+), 7 deletions(-) - -diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index a02d569537..1c5bc7a2f9 100644 ---- a/target/s390x/kvm.c -+++ b/target/s390x/kvm.c -@@ -1159,13 +1159,13 @@ void kvm_s390_access_exception(S390CPU *cpu, uint16_t code, uint64_t te_code) - kvm_s390_vcpu_interrupt(cpu, &irq); - } - --static int kvm_sclp_service_call(S390CPU *cpu, struct kvm_run *run, -+static void kvm_sclp_service_call(S390CPU *cpu, struct kvm_run *run, - uint16_t ipbh0) - { - CPUS390XState *env = &cpu->env; - uint64_t sccb; - uint32_t code; -- int r = 0; -+ int r; - - sccb = env->regs[ipbh0 & 0xf]; - code = env->regs[(ipbh0 & 0xf0) >> 4]; -@@ -1173,11 +1173,9 @@ static int kvm_sclp_service_call(S390CPU *cpu, struct kvm_run *run, - r = sclp_service_call(env, sccb, code); - if (r < 0) { - kvm_s390_program_interrupt(cpu, -r); -- } else { -- setcc(cpu, r); -+ return; - } -- -- return 0; -+ setcc(cpu, r); - } - - static int handle_b2(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) -@@ -1240,7 +1238,7 @@ static int handle_b2(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) - setcc(cpu, 3); - break; - case PRIV_B2_SCLP_CALL: -- rc = kvm_sclp_service_call(cpu, run, ipbh0); -+ kvm_sclp_service_call(cpu, run, ipbh0); - break; - default: - rc = -1; --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-pci-Add-routine-to-get-the-vfio-dma-available-.patch b/SOURCES/kvm-s390x-pci-Add-routine-to-get-the-vfio-dma-available-.patch deleted file mode 100644 index 5e48efb..0000000 --- a/SOURCES/kvm-s390x-pci-Add-routine-to-get-the-vfio-dma-available-.patch +++ /dev/null @@ -1,150 +0,0 @@ -From 3927f54a56e29003b84e0e3726d3a0170681128b Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Tue, 19 Jan 2021 12:50:44 -0500 -Subject: [PATCH 5/7] s390x/pci: Add routine to get the vfio dma available - count - -RH-Author: Cornelia Huck -Message-id: <20210119125046.472811-6-cohuck@redhat.com> -Patchwork-id: 100679 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 5/7] s390x/pci: Add routine to get the vfio dma available count -Bugzilla: 1905391 -RH-Acked-by: David Hildenbrand -RH-Acked-by: Auger Eric -RH-Acked-by: Thomas Huth - -From: Matthew Rosato - -Create new files for separating out vfio-specific work for s390 -pci. Add the first such routine, which issues VFIO_IOMMU_GET_INFO -ioctl to collect the current dma available count. - -Signed-off-by: Matthew Rosato -Reviewed-by: Cornelia Huck -[aw: Fix non-Linux build with CONFIG_LINUX] -Signed-off-by: Alex Williamson -(cherry picked from commit cd7498d07fbb20fa04790ff7ee168a8a8d01cb30) -Signed-off-by: Cornelia Huck - - Conflicts: - hw/s390x/meson.build - --> added the file in hw/s390x/Makefile.objs instead, - since we do not use Meson yet - hw/s390x/s390-pci-vfio.c - --> NULL-initialize "info" to avoid a downstream-only - compiler warning - -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/Makefile.objs | 1 + - hw/s390x/s390-pci-vfio.c | 54 ++++++++++++++++++++++++++++++++ - include/hw/s390x/s390-pci-vfio.h | 24 ++++++++++++++ - 3 files changed, 79 insertions(+) - create mode 100644 hw/s390x/s390-pci-vfio.c - create mode 100644 include/hw/s390x/s390-pci-vfio.h - -diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs -index c4086ec3171..43756c9437d 100644 ---- a/hw/s390x/Makefile.objs -+++ b/hw/s390x/Makefile.objs -@@ -7,6 +7,7 @@ obj-y += ipl.o - obj-y += css.o - obj-$(CONFIG_S390_CCW_VIRTIO) += s390-virtio-ccw.o - obj-$(CONFIG_TERMINAL3270) += 3270-ccw.o -+obj-$(CONFIG_LINUX) += s390-pci-vfio.o - ifeq ($(CONFIG_VIRTIO_CCW),y) - obj-y += virtio-ccw.o - obj-$(CONFIG_VIRTIO_SERIAL) += virtio-ccw-serial.o -diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c -new file mode 100644 -index 00000000000..0eb22ffec4c ---- /dev/null -+++ b/hw/s390x/s390-pci-vfio.c -@@ -0,0 +1,54 @@ -+/* -+ * s390 vfio-pci interfaces -+ * -+ * Copyright 2020 IBM Corp. -+ * Author(s): Matthew Rosato -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or (at -+ * your option) any later version. See the COPYING file in the top-level -+ * directory. -+ */ -+ -+#include -+ -+#include "qemu/osdep.h" -+#include "hw/s390x/s390-pci-vfio.h" -+#include "hw/vfio/vfio-common.h" -+ -+/* -+ * Get the current DMA available count from vfio. Returns true if vfio is -+ * limiting DMA requests, false otherwise. The current available count read -+ * from vfio is returned in avail. -+ */ -+bool s390_pci_update_dma_avail(int fd, unsigned int *avail) -+{ -+ g_autofree struct vfio_iommu_type1_info *info = NULL; -+ uint32_t argsz; -+ -+ assert(avail); -+ -+ argsz = sizeof(struct vfio_iommu_type1_info); -+ info = g_malloc0(argsz); -+ -+ /* -+ * If the specified argsz is not large enough to contain all capabilities -+ * it will be updated upon return from the ioctl. Retry until we have -+ * a big enough buffer to hold the entire capability chain. -+ */ -+retry: -+ info->argsz = argsz; -+ -+ if (ioctl(fd, VFIO_IOMMU_GET_INFO, info)) { -+ return false; -+ } -+ -+ if (info->argsz > argsz) { -+ argsz = info->argsz; -+ info = g_realloc(info, argsz); -+ goto retry; -+ } -+ -+ /* If the capability exists, update with the current value */ -+ return vfio_get_info_dma_avail(info, avail); -+} -+ -diff --git a/include/hw/s390x/s390-pci-vfio.h b/include/hw/s390x/s390-pci-vfio.h -new file mode 100644 -index 00000000000..1727292e9b5 ---- /dev/null -+++ b/include/hw/s390x/s390-pci-vfio.h -@@ -0,0 +1,24 @@ -+/* -+ * s390 vfio-pci interfaces -+ * -+ * Copyright 2020 IBM Corp. -+ * Author(s): Matthew Rosato -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or (at -+ * your option) any later version. See the COPYING file in the top-level -+ * directory. -+ */ -+ -+#ifndef HW_S390_PCI_VFIO_H -+#define HW_S390_PCI_VFIO_H -+ -+#ifdef CONFIG_LINUX -+bool s390_pci_update_dma_avail(int fd, unsigned int *avail); -+#else -+static inline bool s390_pci_update_dma_avail(int fd, unsigned int *avail) -+{ -+ return false; -+} -+#endif -+ -+#endif --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-pci-Honor-DMA-limits-set-by-vfio.patch b/SOURCES/kvm-s390x-pci-Honor-DMA-limits-set-by-vfio.patch deleted file mode 100644 index 13fd6b7..0000000 --- a/SOURCES/kvm-s390x-pci-Honor-DMA-limits-set-by-vfio.patch +++ /dev/null @@ -1,357 +0,0 @@ -From 7ef9b9c593da98ad32ad20c28d17bb2700a35c29 Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Tue, 19 Jan 2021 12:50:45 -0500 -Subject: [PATCH 6/7] s390x/pci: Honor DMA limits set by vfio - -RH-Author: Cornelia Huck -Message-id: <20210119125046.472811-7-cohuck@redhat.com> -Patchwork-id: 100680 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 6/7] s390x/pci: Honor DMA limits set by vfio -Bugzilla: 1905391 -RH-Acked-by: David Hildenbrand -RH-Acked-by: Auger Eric -RH-Acked-by: Thomas Huth - -From: Matthew Rosato - -When an s390 guest is using lazy unmapping, it can result in a very -large number of oustanding DMA requests, far beyond the default -limit configured for vfio. Let's track DMA usage similar to vfio -in the host, and trigger the guest to flush their DMA mappings -before vfio runs out. - -Signed-off-by: Matthew Rosato -Reviewed-by: Cornelia Huck -[aw: non-Linux build fixes] -Signed-off-by: Alex Williamson -(cherry picked from commit 37fa32de707340f3a93959ad5a1ebc41ba1520ee) -Signed-off-by: Cornelia Huck - - Conflicts: - hw/s390x/s390-pci-bus.c - --> adapt to missing 981c3dcd9489 ("qdev: Convert to - qdev_unrealize() with Coccinelle") - hw/s390x/s390-pci-inst.c - --> adapt to out of order inclusion of 5039caf3c449 ("memory: - Add IOMMUTLBEvent") - include/hw/s390x/s390-pci-bus.h - --> adapt to missing db1015e92e04 ("Move QOM typedefs and - add missing includes") - -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/s390-pci-bus.c | 16 ++++++++---- - hw/s390x/s390-pci-inst.c | 45 +++++++++++++++++++++++++++----- - hw/s390x/s390-pci-vfio.c | 42 +++++++++++++++++++++++++++++ - include/hw/s390x/s390-pci-bus.h | 9 +++++++ - include/hw/s390x/s390-pci-inst.h | 3 +++ - include/hw/s390x/s390-pci-vfio.h | 12 +++++++++ - 6 files changed, 116 insertions(+), 11 deletions(-) - -diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c -index 6daef2b6d57..a9f6f550472 100644 ---- a/hw/s390x/s390-pci-bus.c -+++ b/hw/s390x/s390-pci-bus.c -@@ -17,6 +17,7 @@ - #include "cpu.h" - #include "hw/s390x/s390-pci-bus.h" - #include "hw/s390x/s390-pci-inst.h" -+#include "hw/s390x/s390-pci-vfio.h" - #include "hw/pci/pci_bus.h" - #include "hw/qdev-properties.h" - #include "hw/pci/pci_bridge.h" -@@ -771,6 +772,7 @@ static void s390_pcihost_realize(DeviceState *dev, Error **errp) - s->bus_no = 0; - QTAILQ_INIT(&s->pending_sei); - QTAILQ_INIT(&s->zpci_devs); -+ QTAILQ_INIT(&s->zpci_dma_limit); - - css_register_io_adapters(CSS_IO_ADAPTER_PCI, true, false, - S390_ADAPTER_SUPPRESSIBLE, &local_err); -@@ -951,17 +953,18 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, - } - } - -+ pbdev->pdev = pdev; -+ pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn); -+ pbdev->iommu->pbdev = pbdev; -+ pbdev->state = ZPCI_FS_DISABLED; -+ - if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) { - pbdev->fh |= FH_SHM_VFIO; -+ pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev); - } else { - pbdev->fh |= FH_SHM_EMUL; - } - -- pbdev->pdev = pdev; -- pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn); -- pbdev->iommu->pbdev = pbdev; -- pbdev->state = ZPCI_FS_DISABLED; -- - if (s390_pci_msix_init(pbdev)) { - error_setg(errp, "MSI-X support is mandatory " - "in the S390 architecture"); -@@ -1014,6 +1017,9 @@ static void s390_pcihost_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, - pbdev->fid = 0; - QTAILQ_REMOVE(&s->zpci_devs, pbdev, link); - g_hash_table_remove(s->zpci_table, &pbdev->idx); -+ if (pbdev->iommu->dma_limit) { -+ s390_pci_end_dma_count(s, pbdev->iommu->dma_limit); -+ } - object_property_set_bool(OBJECT(dev), false, "realized", NULL); - } - } -diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c -index b1885344f18..edbdf727984 100644 ---- a/hw/s390x/s390-pci-inst.c -+++ b/hw/s390x/s390-pci-inst.c -@@ -32,6 +32,20 @@ - } \ - } while (0) - -+static inline void inc_dma_avail(S390PCIIOMMU *iommu) -+{ -+ if (iommu->dma_limit) { -+ iommu->dma_limit->avail++; -+ } -+} -+ -+static inline void dec_dma_avail(S390PCIIOMMU *iommu) -+{ -+ if (iommu->dma_limit) { -+ iommu->dma_limit->avail--; -+ } -+} -+ - static void s390_set_status_code(CPUS390XState *env, - uint8_t r, uint64_t status_code) - { -@@ -572,7 +586,8 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) - return 0; - } - --static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry) -+static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, -+ S390IOTLBEntry *entry) - { - S390IOTLBEntry *cache = g_hash_table_lookup(iommu->iotlb, &entry->iova); - IOMMUTLBEvent event = { -@@ -588,14 +603,15 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry) - - if (event.type == IOMMU_NOTIFIER_UNMAP) { - if (!cache) { -- return; -+ goto out; - } - g_hash_table_remove(iommu->iotlb, &entry->iova); -+ inc_dma_avail(iommu); - } else { - if (cache) { - if (cache->perm == entry->perm && - cache->translated_addr == entry->translated_addr) { -- return; -+ goto out; - } - - event.type = IOMMU_NOTIFIER_UNMAP; -@@ -611,9 +627,13 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry) - cache->len = PAGE_SIZE; - cache->perm = entry->perm; - g_hash_table_replace(iommu->iotlb, &cache->iova, cache); -+ dec_dma_avail(iommu); - } - - memory_region_notify_iommu(&iommu->iommu_mr, 0, event); -+ -+out: -+ return iommu->dma_limit ? iommu->dma_limit->avail : 1; - } - - int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) -@@ -625,6 +645,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) - S390PCIIOMMU *iommu; - S390IOTLBEntry entry; - hwaddr start, end; -+ uint32_t dma_avail; - - if (env->psw.mask & PSW_MASK_PSTATE) { - s390_program_interrupt(env, PGM_PRIVILEGED, ra); -@@ -663,6 +684,11 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) - } - - iommu = pbdev->iommu; -+ if (iommu->dma_limit) { -+ dma_avail = iommu->dma_limit->avail; -+ } else { -+ dma_avail = 1; -+ } - if (!iommu->g_iota) { - error = ERR_EVENT_INVALAS; - goto err; -@@ -680,8 +706,9 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) - } - - start += entry.len; -- while (entry.iova < start && entry.iova < end) { -- s390_pci_update_iotlb(iommu, &entry); -+ while (entry.iova < start && entry.iova < end && -+ (dma_avail > 0 || entry.perm == IOMMU_NONE)) { -+ dma_avail = s390_pci_update_iotlb(iommu, &entry); - entry.iova += PAGE_SIZE; - entry.translated_addr += PAGE_SIZE; - } -@@ -694,7 +721,13 @@ err: - s390_pci_generate_error_event(error, pbdev->fh, pbdev->fid, start, 0); - } else { - pbdev->fmb.counter[ZPCI_FMB_CNT_RPCIT]++; -- setcc(cpu, ZPCI_PCI_LS_OK); -+ if (dma_avail > 0) { -+ setcc(cpu, ZPCI_PCI_LS_OK); -+ } else { -+ /* vfio DMA mappings are exhausted, trigger a RPCIT */ -+ setcc(cpu, ZPCI_PCI_LS_ERR); -+ s390_set_status_code(env, r1, ZPCI_RPCIT_ST_INSUFF_RES); -+ } - } - return 0; - } -diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c -index 0eb22ffec4c..01c1e8ac89a 100644 ---- a/hw/s390x/s390-pci-vfio.c -+++ b/hw/s390x/s390-pci-vfio.c -@@ -12,7 +12,9 @@ - #include - - #include "qemu/osdep.h" -+#include "hw/s390x/s390-pci-bus.h" - #include "hw/s390x/s390-pci-vfio.h" -+#include "hw/vfio/pci.h" - #include "hw/vfio/vfio-common.h" - - /* -@@ -52,3 +54,43 @@ retry: - return vfio_get_info_dma_avail(info, avail); - } - -+S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, -+ S390PCIBusDevice *pbdev) -+{ -+ S390PCIDMACount *cnt; -+ uint32_t avail; -+ VFIOPCIDevice *vpdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev); -+ int id; -+ -+ assert(vpdev); -+ -+ id = vpdev->vbasedev.group->container->fd; -+ -+ if (!s390_pci_update_dma_avail(id, &avail)) { -+ return NULL; -+ } -+ -+ QTAILQ_FOREACH(cnt, &s->zpci_dma_limit, link) { -+ if (cnt->id == id) { -+ cnt->users++; -+ return cnt; -+ } -+ } -+ -+ cnt = g_new0(S390PCIDMACount, 1); -+ cnt->id = id; -+ cnt->users = 1; -+ cnt->avail = avail; -+ QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link); -+ return cnt; -+} -+ -+void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt) -+{ -+ assert(cnt); -+ -+ cnt->users--; -+ if (cnt->users == 0) { -+ QTAILQ_REMOVE(&s->zpci_dma_limit, cnt, link); -+ } -+} -diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h -index 550f3cc5e92..2f2edbd0bf3 100644 ---- a/include/hw/s390x/s390-pci-bus.h -+++ b/include/hw/s390x/s390-pci-bus.h -@@ -266,6 +266,13 @@ typedef struct S390IOTLBEntry { - } S390IOTLBEntry; - - typedef struct S390PCIBusDevice S390PCIBusDevice; -+typedef struct S390PCIDMACount { -+ int id; -+ int users; -+ uint32_t avail; -+ QTAILQ_ENTRY(S390PCIDMACount) link; -+} S390PCIDMACount; -+ - typedef struct S390PCIIOMMU { - Object parent_obj; - S390PCIBusDevice *pbdev; -@@ -277,6 +284,7 @@ typedef struct S390PCIIOMMU { - uint64_t pba; - uint64_t pal; - GHashTable *iotlb; -+ S390PCIDMACount *dma_limit; - } S390PCIIOMMU; - - typedef struct S390PCIIOMMUTable { -@@ -352,6 +360,7 @@ typedef struct S390pciState { - GHashTable *zpci_table; - QTAILQ_HEAD(, SeiContainer) pending_sei; - QTAILQ_HEAD(, S390PCIBusDevice) zpci_devs; -+ QTAILQ_HEAD(, S390PCIDMACount) zpci_dma_limit; - } S390pciState; - - S390pciState *s390_get_phb(void); -diff --git a/include/hw/s390x/s390-pci-inst.h b/include/hw/s390x/s390-pci-inst.h -index fa3bf8b5aad..8ee3a3c2375 100644 ---- a/include/hw/s390x/s390-pci-inst.h -+++ b/include/hw/s390x/s390-pci-inst.h -@@ -254,6 +254,9 @@ typedef struct ClpReqRspQueryPciGrp { - #define ZPCI_STPCIFC_ST_INVAL_DMAAS 28 - #define ZPCI_STPCIFC_ST_ERROR_RECOVER 40 - -+/* Refresh PCI Translations status codes */ -+#define ZPCI_RPCIT_ST_INSUFF_RES 16 -+ - /* FIB function controls */ - #define ZPCI_FIB_FC_ENABLED 0x80 - #define ZPCI_FIB_FC_ERROR 0x40 -diff --git a/include/hw/s390x/s390-pci-vfio.h b/include/hw/s390x/s390-pci-vfio.h -index 1727292e9b5..539bcf04eb5 100644 ---- a/include/hw/s390x/s390-pci-vfio.h -+++ b/include/hw/s390x/s390-pci-vfio.h -@@ -12,13 +12,25 @@ - #ifndef HW_S390_PCI_VFIO_H - #define HW_S390_PCI_VFIO_H - -+#include "hw/s390x/s390-pci-bus.h" -+ - #ifdef CONFIG_LINUX - bool s390_pci_update_dma_avail(int fd, unsigned int *avail); -+S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, -+ S390PCIBusDevice *pbdev); -+void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt); - #else - static inline bool s390_pci_update_dma_avail(int fd, unsigned int *avail) - { - return false; - } -+static inline S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, -+ S390PCIBusDevice *pbdev) -+{ -+ return NULL; -+} -+static inline void s390_pci_end_dma_count(S390pciState *s, -+ S390PCIDMACount *cnt) { } - #endif - - #endif --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-pci-Move-header-files-to-include-hw-s390x.patch b/SOURCES/kvm-s390x-pci-Move-header-files-to-include-hw-s390x.patch deleted file mode 100644 index 27e5fa2..0000000 --- a/SOURCES/kvm-s390x-pci-Move-header-files-to-include-hw-s390x.patch +++ /dev/null @@ -1,110 +0,0 @@ -From 73fb2438518ef2073f2486fcf1dd8cddffb29228 Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Tue, 19 Jan 2021 12:50:41 -0500 -Subject: [PATCH 2/7] s390x/pci: Move header files to include/hw/s390x - -RH-Author: Cornelia Huck -Message-id: <20210119125046.472811-3-cohuck@redhat.com> -Patchwork-id: 100676 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 2/7] s390x/pci: Move header files to include/hw/s390x -Bugzilla: 1905391 -RH-Acked-by: David Hildenbrand -RH-Acked-by: Auger Eric -RH-Acked-by: Thomas Huth - -From: Matthew Rosato - -Seems a more appropriate location for them. - -Signed-off-by: Matthew Rosato -Reviewed-by: Cornelia Huck -Signed-off-by: Alex Williamson -(cherry picked from commit 408b55db8be3e3edae041d46ef8786fabc1476aa) -Signed-off-by: Cornelia Huck - - Conflicts: - hw/s390x/s390-virtio-ccw.c - --> context diff - -Signed-off-by: Danilo C. L. de Paula ---- - MAINTAINERS | 1 + - hw/s390x/s390-pci-bus.c | 4 ++-- - hw/s390x/s390-pci-inst.c | 4 ++-- - hw/s390x/s390-virtio-ccw.c | 2 +- - {hw => include/hw}/s390x/s390-pci-bus.h | 0 - {hw => include/hw}/s390x/s390-pci-inst.h | 0 - 6 files changed, 6 insertions(+), 5 deletions(-) - rename {hw => include/hw}/s390x/s390-pci-bus.h (100%) - rename {hw => include/hw}/s390x/s390-pci-inst.h (100%) - -diff --git a/MAINTAINERS b/MAINTAINERS -index 2742c955754..56ca8193d86 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -1225,6 +1225,7 @@ S390 PCI - M: Matthew Rosato - S: Supported - F: hw/s390x/s390-pci* -+F: include/hw/s390x/s390-pci* - L: qemu-s390x@nongnu.org - - UniCore32 Machines -diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c -index 2d2f4a7c419..6daef2b6d57 100644 ---- a/hw/s390x/s390-pci-bus.c -+++ b/hw/s390x/s390-pci-bus.c -@@ -15,8 +15,8 @@ - #include "qapi/error.h" - #include "qapi/visitor.h" - #include "cpu.h" --#include "s390-pci-bus.h" --#include "s390-pci-inst.h" -+#include "hw/s390x/s390-pci-bus.h" -+#include "hw/s390x/s390-pci-inst.h" - #include "hw/pci/pci_bus.h" - #include "hw/qdev-properties.h" - #include "hw/pci/pci_bridge.h" -diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c -index 27b189e6d75..b1885344f18 100644 ---- a/hw/s390x/s390-pci-inst.c -+++ b/hw/s390x/s390-pci-inst.c -@@ -13,12 +13,12 @@ - - #include "qemu/osdep.h" - #include "cpu.h" --#include "s390-pci-inst.h" --#include "s390-pci-bus.h" - #include "exec/memop.h" - #include "exec/memory-internal.h" - #include "qemu/error-report.h" - #include "sysemu/hw_accel.h" -+#include "hw/s390x/s390-pci-inst.h" -+#include "hw/s390x/s390-pci-bus.h" - #include "hw/s390x/tod.h" - - #ifndef DEBUG_S390PCI_INST -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 5b3d07f55c4..101f3b7c6e1 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -27,7 +27,7 @@ - #include "qemu/ctype.h" - #include "qemu/error-report.h" - #include "qemu/option.h" --#include "s390-pci-bus.h" -+#include "hw/s390x/s390-pci-bus.h" - #include "sysemu/reset.h" - #include "hw/s390x/storage-keys.h" - #include "hw/s390x/storage-attributes.h" -diff --git a/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h -similarity index 100% -rename from hw/s390x/s390-pci-bus.h -rename to include/hw/s390x/s390-pci-bus.h -diff --git a/hw/s390x/s390-pci-inst.h b/include/hw/s390x/s390-pci-inst.h -similarity index 100% -rename from hw/s390x/s390-pci-inst.h -rename to include/hw/s390x/s390-pci-inst.h --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-protvirt-Add-migration-blocker.patch b/SOURCES/kvm-s390x-protvirt-Add-migration-blocker.patch deleted file mode 100644 index 056f8d5..0000000 --- a/SOURCES/kvm-s390x-protvirt-Add-migration-blocker.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 0ba8d4ea1cc34230356cc446dfa8d1cb52cbd2f3 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:54:05 -0400 -Subject: [PATCH 23/42] s390x: protvirt: Add migration blocker - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-24-thuth@redhat.com> -Patchwork-id: 97043 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 23/38] s390x: protvirt: Add migration blocker -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -Migration is not yet supported. - -Signed-off-by: Janosch Frank -Reviewed-by: David Hildenbrand -Reviewed-by: Christian Borntraeger -Reviewed-by: Claudio Imbrenda -Reviewed-by: Cornelia Huck -Message-Id: <20200319131921.2367-5-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 0141e1b47707d90f5bd9d252da064ebdaca698a6) -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/s390-virtio-ccw.c | 18 ++++++++++++++++++ - 1 file changed, 18 insertions(+) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 82da1d9ab5..dbd5125232 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -44,6 +44,9 @@ - #include "sysemu/sysemu.h" - #include "hw/s390x/pv.h" - #include -+#include "migration/blocker.h" -+ -+static Error *pv_mig_blocker; - - S390CPU *s390_cpu_addr2state(uint16_t cpu_addr) - { -@@ -325,15 +328,30 @@ static void s390_machine_unprotect(S390CcwMachineState *ms) - { - s390_pv_vm_disable(); - ms->pv = false; -+ migrate_del_blocker(pv_mig_blocker); -+ error_free_or_abort(&pv_mig_blocker); - } - - static int s390_machine_protect(S390CcwMachineState *ms) - { -+ Error *local_err = NULL; - int rc; - -+ error_setg(&pv_mig_blocker, -+ "protected VMs are currently not migrateable."); -+ rc = migrate_add_blocker(pv_mig_blocker, &local_err); -+ if (rc) { -+ error_report_err(local_err); -+ error_free_or_abort(&pv_mig_blocker); -+ return rc; -+ } -+ - /* Create SE VM */ - rc = s390_pv_vm_enable(); - if (rc) { -+ error_report_err(local_err); -+ migrate_del_blocker(pv_mig_blocker); -+ error_free_or_abort(&pv_mig_blocker); - return rc; - } - --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-protvirt-Disable-address-checks-for-PV-guest-I.patch b/SOURCES/kvm-s390x-protvirt-Disable-address-checks-for-PV-guest-I.patch deleted file mode 100644 index 0cf75b0..0000000 --- a/SOURCES/kvm-s390x-protvirt-Disable-address-checks-for-PV-guest-I.patch +++ /dev/null @@ -1,135 +0,0 @@ -From 1cfcff169f392179258e4535e60d4ef9cabae3c6 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:54:13 -0400 -Subject: [PATCH 31/42] s390x: protvirt: Disable address checks for PV guest IO - emulation - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-32-thuth@redhat.com> -Patchwork-id: 97044 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 31/38] s390x: protvirt: Disable address checks for PV guest IO emulation -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -IO instruction data is routed through SIDAD for protected guests, so -adresses do not need to be checked, as this is kernel memory which is -always available. - -Also the instruction data always starts at offset 0 of the SIDAD. - -Signed-off-by: Janosch Frank -Reviewed-by: Thomas Huth -Reviewed-by: David Hildenbrand -Reviewed-by: Christian Borntraeger -Reviewed-by: Claudio Imbrenda -Reviewed-by: Cornelia Huck -Message-Id: <20200319131921.2367-13-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit c10b708752e5264a85b5c3afa0a0ccfcf6503ddf) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/ioinst.c | 35 ++++++++++++++++++++++++++++------- - 1 file changed, 28 insertions(+), 7 deletions(-) - -diff --git a/target/s390x/ioinst.c b/target/s390x/ioinst.c -index c437a1d8c6..bbcccf6be2 100644 ---- a/target/s390x/ioinst.c -+++ b/target/s390x/ioinst.c -@@ -16,6 +16,25 @@ - #include "hw/s390x/ioinst.h" - #include "trace.h" - #include "hw/s390x/s390-pci-bus.h" -+#include "hw/s390x/pv.h" -+ -+/* All I/O instructions but chsc use the s format */ -+static uint64_t get_address_from_regs(CPUS390XState *env, uint32_t ipb, -+ uint8_t *ar) -+{ -+ /* -+ * Addresses for protected guests are all offsets into the -+ * satellite block which holds the IO control structures. Those -+ * control structures are always starting at offset 0 and are -+ * always aligned and accessible. So we can return 0 here which -+ * will pass the following address checks. -+ */ -+ if (s390_is_pv()) { -+ *ar = 0; -+ return 0; -+ } -+ return decode_basedisp_s(env, ipb, ar); -+} - - int ioinst_disassemble_sch_ident(uint32_t value, int *m, int *cssid, int *ssid, - int *schid) -@@ -114,7 +133,7 @@ void ioinst_handle_msch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, uintptr_t ra) - CPUS390XState *env = &cpu->env; - uint8_t ar; - -- addr = decode_basedisp_s(env, ipb, &ar); -+ addr = get_address_from_regs(env, ipb, &ar); - if (addr & 3) { - s390_program_interrupt(env, PGM_SPECIFICATION, ra); - return; -@@ -171,7 +190,7 @@ void ioinst_handle_ssch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, uintptr_t ra) - CPUS390XState *env = &cpu->env; - uint8_t ar; - -- addr = decode_basedisp_s(env, ipb, &ar); -+ addr = get_address_from_regs(env, ipb, &ar); - if (addr & 3) { - s390_program_interrupt(env, PGM_SPECIFICATION, ra); - return; -@@ -203,7 +222,7 @@ void ioinst_handle_stcrw(S390CPU *cpu, uint32_t ipb, uintptr_t ra) - CPUS390XState *env = &cpu->env; - uint8_t ar; - -- addr = decode_basedisp_s(env, ipb, &ar); -+ addr = get_address_from_regs(env, ipb, &ar); - if (addr & 3) { - s390_program_interrupt(env, PGM_SPECIFICATION, ra); - return; -@@ -234,7 +253,7 @@ void ioinst_handle_stsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, - CPUS390XState *env = &cpu->env; - uint8_t ar; - -- addr = decode_basedisp_s(env, ipb, &ar); -+ addr = get_address_from_regs(env, ipb, &ar); - if (addr & 3) { - s390_program_interrupt(env, PGM_SPECIFICATION, ra); - return; -@@ -303,7 +322,7 @@ int ioinst_handle_tsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, uintptr_t ra) - return -EIO; - } - trace_ioinst_sch_id("tsch", cssid, ssid, schid); -- addr = decode_basedisp_s(env, ipb, &ar); -+ addr = get_address_from_regs(env, ipb, &ar); - if (addr & 3) { - s390_program_interrupt(env, PGM_SPECIFICATION, ra); - return -EIO; -@@ -601,7 +620,7 @@ void ioinst_handle_chsc(S390CPU *cpu, uint32_t ipb, uintptr_t ra) - { - ChscReq *req; - ChscResp *res; -- uint64_t addr; -+ uint64_t addr = 0; - int reg; - uint16_t len; - uint16_t command; -@@ -610,7 +629,9 @@ void ioinst_handle_chsc(S390CPU *cpu, uint32_t ipb, uintptr_t ra) - - trace_ioinst("chsc"); - reg = (ipb >> 20) & 0x00f; -- addr = env->regs[reg]; -+ if (!s390_is_pv()) { -+ addr = env->regs[reg]; -+ } - /* Page boundary? */ - if (addr & 0xfff) { - s390_program_interrupt(env, PGM_SPECIFICATION, ra); --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-protvirt-Fix-stray-error_report_err-in-s390_ma.patch b/SOURCES/kvm-s390x-protvirt-Fix-stray-error_report_err-in-s390_ma.patch deleted file mode 100644 index 9857f28..0000000 --- a/SOURCES/kvm-s390x-protvirt-Fix-stray-error_report_err-in-s390_ma.patch +++ /dev/null @@ -1,55 +0,0 @@ -From b54e5e6df5d5bbe4dc0a206be9f6b6d971ce6f43 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:54:17 -0400 -Subject: [PATCH 35/42] s390x: protvirt: Fix stray error_report_err in - s390_machine_protect -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-36-thuth@redhat.com> -Patchwork-id: 97042 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 35/38] s390x: protvirt: Fix stray error_report_err in s390_machine_protect -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -In case the protection of the machine fails at s390_pv_vm_enable(), -we'll currently report the local_error variable. Problem is that -there's no migration blocker error that we can report at this point so -the pointer is always NULL which leads to a SEGFAULT. - -Let's remove the error report. - -Signed-off-by: Janosch Frank -Reported-by: Marc Hartmayer -Fixes: 0141e1b47707 ("s390x: protvirt: Add migration blocker") -Message-Id: <20200326140505.2432-1-frankja@linux.ibm.com> -Reviewed-by: David Hildenbrand -Signed-off-by: Cornelia Huck -(cherry picked from commit 7152c9ecc6530ea145c122b0a58cc28802f630c6) -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/s390-virtio-ccw.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index b4ebe83766..c08e42bda1 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -360,7 +360,6 @@ static int s390_machine_protect(S390CcwMachineState *ms) - rc = s390_pv_vm_enable(); - if (rc) { - qemu_balloon_inhibit(false); -- error_report_err(local_err); - migrate_del_blocker(pv_mig_blocker); - error_free_or_abort(&pv_mig_blocker); - return rc; --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-protvirt-Handle-SIGP-store-status-correctly.patch b/SOURCES/kvm-s390x-protvirt-Handle-SIGP-store-status-correctly.patch deleted file mode 100644 index 4d6a44b..0000000 --- a/SOURCES/kvm-s390x-protvirt-Handle-SIGP-store-status-correctly.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 680154545d1f9d75fb33615b1900661e7d09be4e Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:54:15 -0400 -Subject: [PATCH 33/42] s390x: protvirt: Handle SIGP store status correctly - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-34-thuth@redhat.com> -Patchwork-id: 97054 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 33/38] s390x: protvirt: Handle SIGP store status correctly -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -For protected VMs status storing is not done by QEMU anymore. - -Signed-off-by: Janosch Frank -Reviewed-by: Thomas Huth -Reviewed-by: David Hildenbrand -Reviewed-by: Christian Borntraeger -Reviewed-by: Claudio Imbrenda -Reviewed-by: Cornelia Huck -Message-Id: <20200319131921.2367-15-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit f2a2d9a2bae8f6fdc5e9a40c1241e9428f15b4df) -[thuth: fixed contextual conflict due to missing commit 44eaccd091a7365fd37) -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/helper.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/target/s390x/helper.c b/target/s390x/helper.c -index 6808dfda01..36b6d3d9d1 100644 ---- a/target/s390x/helper.c -+++ b/target/s390x/helper.c -@@ -25,6 +25,7 @@ - #include "qemu/timer.h" - #include "qemu/qemu-print.h" - #include "hw/s390x/ioinst.h" -+#include "hw/s390x/pv.h" - #include "sysemu/hw_accel.h" - #include "sysemu/runstate.h" - #ifndef CONFIG_USER_ONLY -@@ -246,6 +247,11 @@ int s390_store_status(S390CPU *cpu, hwaddr addr, bool store_arch) - hwaddr len = sizeof(*sa); - int i; - -+ /* For PVMs storing will occur when this cpu enters SIE again */ -+ if (s390_is_pv()) { -+ return 0; -+ } -+ - sa = cpu_physical_memory_map(addr, &len, 1); - if (!sa) { - return -EFAULT; --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-protvirt-Inhibit-balloon-when-switching-to-pro.patch b/SOURCES/kvm-s390x-protvirt-Inhibit-balloon-when-switching-to-pro.patch deleted file mode 100644 index a843d03..0000000 --- a/SOURCES/kvm-s390x-protvirt-Inhibit-balloon-when-switching-to-pro.patch +++ /dev/null @@ -1,104 +0,0 @@ -From 095553f9dd1fec02869bf974e8cc07614d6587e5 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:54:06 -0400 -Subject: [PATCH 24/42] s390x: protvirt: Inhibit balloon when switching to - protected mode -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-25-thuth@redhat.com> -Patchwork-id: 97036 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 24/38] s390x: protvirt: Inhibit balloon when switching to protected mode -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -Ballooning in protected VMs can only be done when the guest shares the -pages it gives to the host. If pages are not shared, the integrity -checks will fail once those pages have been altered and are given back -to the guest. - -As we currently do not yet have a solution for this we will continue -like this: - -1. We block ballooning now in QEMU (with this patch). - -2. Later we will provide a change to virtio that removes the blocker -and adds VIRTIO_F_IOMMU_PLATFORM automatically by QEMU when doing the -protvirt switch. This is OK, as the balloon driver in Linux (the only -supported guest) will refuse to work with the IOMMU_PLATFORM feature -bit set. - -3. Later, we can fix the guest balloon driver to accept the IOMMU -feature bit and correctly exercise sharing and unsharing of balloon -pages. - -Signed-off-by: Janosch Frank -Reviewed-by: David Hildenbrand -Reviewed-by: Christian Borntraeger -Reviewed-by: Claudio Imbrenda -Reviewed-by: Cornelia Huck -Message-Id: <20200319131921.2367-6-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit b1697f63fd8f8201b1447bb55f595830b9cbde31) -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/s390-virtio-ccw.c | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index dbd5125232..b4ebe83766 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -42,6 +42,7 @@ - #include "hw/qdev-properties.h" - #include "hw/s390x/tod.h" - #include "sysemu/sysemu.h" -+#include "sysemu/balloon.h" - #include "hw/s390x/pv.h" - #include - #include "migration/blocker.h" -@@ -330,6 +331,7 @@ static void s390_machine_unprotect(S390CcwMachineState *ms) - ms->pv = false; - migrate_del_blocker(pv_mig_blocker); - error_free_or_abort(&pv_mig_blocker); -+ qemu_balloon_inhibit(false); - } - - static int s390_machine_protect(S390CcwMachineState *ms) -@@ -337,10 +339,18 @@ static int s390_machine_protect(S390CcwMachineState *ms) - Error *local_err = NULL; - int rc; - -+ /* -+ * Ballooning on protected VMs needs support in the guest for -+ * sharing and unsharing balloon pages. Block ballooning for -+ * now, until we have a solution to make at least Linux guests -+ * either support it or fail gracefully. -+ */ -+ qemu_balloon_inhibit(true); - error_setg(&pv_mig_blocker, - "protected VMs are currently not migrateable."); - rc = migrate_add_blocker(pv_mig_blocker, &local_err); - if (rc) { -+ qemu_balloon_inhibit(false); - error_report_err(local_err); - error_free_or_abort(&pv_mig_blocker); - return rc; -@@ -349,6 +359,7 @@ static int s390_machine_protect(S390CcwMachineState *ms) - /* Create SE VM */ - rc = s390_pv_vm_enable(); - if (rc) { -+ qemu_balloon_inhibit(false); - error_report_err(local_err); - migrate_del_blocker(pv_mig_blocker); - error_free_or_abort(&pv_mig_blocker); --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-protvirt-KVM-intercept-changes.patch b/SOURCES/kvm-s390x-protvirt-KVM-intercept-changes.patch deleted file mode 100644 index 2ac3d03..0000000 --- a/SOURCES/kvm-s390x-protvirt-KVM-intercept-changes.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 10ed4f6ad687d98f0bfe06d75775e8c541da80a0 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:54:07 -0400 -Subject: [PATCH 25/42] s390x: protvirt: KVM intercept changes - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-26-thuth@redhat.com> -Patchwork-id: 97035 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 25/38] s390x: protvirt: KVM intercept changes -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -Protected VMs no longer intercept with code 4 for an instruction -interception. Instead they have codes 104 and 108 for protected -instruction interception and protected instruction notification -respectively. - -The 104 mirrors the 4 interception. - -The 108 is a notification interception to let KVM and QEMU know that -something changed and we need to update tracking information or -perform specific tasks. It's currently taken for the following -instructions: - -* spx (To inform about the changed prefix location) -* sclp (On incorrect SCCB values, so we can inject a IRQ) -* sigp (All but "stop and store status") -* diag308 (Subcodes 0/1) - -Of these exits only sclp errors, state changing sigps and diag308 will -reach QEMU. QEMU will do its parts of the job, while the ultravisor -has done the instruction part of the job. - -Signed-off-by: Janosch Frank -Reviewed-by: David Hildenbrand -Reviewed-by: Christian Borntraeger -Reviewed-by: Claudio Imbrenda -Reviewed-by: Cornelia Huck -Message-Id: <20200319131921.2367-7-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 2585e507ffa1da01b57dbea26b1e1fe507d27198) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/kvm.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 9a0be13959..af50b2c253 100644 ---- a/target/s390x/kvm.c -+++ b/target/s390x/kvm.c -@@ -115,6 +115,8 @@ - #define ICPT_CPU_STOP 0x28 - #define ICPT_OPEREXC 0x2c - #define ICPT_IO 0x40 -+#define ICPT_PV_INSTR 0x68 -+#define ICPT_PV_INSTR_NOTIFICATION 0x6c - - #define NR_LOCAL_IRQS 32 - /* -@@ -1695,6 +1697,8 @@ static int handle_intercept(S390CPU *cpu) - (long)cs->kvm_run->psw_addr); - switch (icpt_code) { - case ICPT_INSTRUCTION: -+ case ICPT_PV_INSTR: -+ case ICPT_PV_INSTR_NOTIFICATION: - r = handle_instruction(cpu, run); - break; - case ICPT_PROGRAM: --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-protvirt-Move-IO-control-structures-over-SIDA.patch b/SOURCES/kvm-s390x-protvirt-Move-IO-control-structures-over-SIDA.patch deleted file mode 100644 index 0609546..0000000 --- a/SOURCES/kvm-s390x-protvirt-Move-IO-control-structures-over-SIDA.patch +++ /dev/null @@ -1,171 +0,0 @@ -From 8345b90f43b14435938fbbe0f3a510a60f5d0ded Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:54:14 -0400 -Subject: [PATCH 32/42] s390x: protvirt: Move IO control structures over SIDA - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-33-thuth@redhat.com> -Patchwork-id: 97040 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 32/38] s390x: protvirt: Move IO control structures over SIDA -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -For protected guests, we need to put the IO emulation results into the -SIDA, so SIE will write them into the guest at the next entry. - -Signed-off-by: Janosch Frank -Reviewed-by: David Hildenbrand -Reviewed-by: Cornelia Huck -Message-Id: <20200319131921.2367-14-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit fcc10c1470d6e9460ebcf4c30f5bbd37b921a041) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/ioinst.c | 61 +++++++++++++++++++++++++++++++------------ - 1 file changed, 45 insertions(+), 16 deletions(-) - -diff --git a/target/s390x/ioinst.c b/target/s390x/ioinst.c -index bbcccf6be2..f40c35c6ff 100644 ---- a/target/s390x/ioinst.c -+++ b/target/s390x/ioinst.c -@@ -138,7 +138,9 @@ void ioinst_handle_msch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, uintptr_t ra) - s390_program_interrupt(env, PGM_SPECIFICATION, ra); - return; - } -- if (s390_cpu_virt_mem_read(cpu, addr, ar, &schib, sizeof(schib))) { -+ if (s390_is_pv()) { -+ s390_cpu_pv_mem_read(cpu, addr, &schib, sizeof(schib)); -+ } else if (s390_cpu_virt_mem_read(cpu, addr, ar, &schib, sizeof(schib))) { - s390_cpu_virt_mem_handle_exc(cpu, ra); - return; - } -@@ -195,7 +197,9 @@ void ioinst_handle_ssch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, uintptr_t ra) - s390_program_interrupt(env, PGM_SPECIFICATION, ra); - return; - } -- if (s390_cpu_virt_mem_read(cpu, addr, ar, &orig_orb, sizeof(orb))) { -+ if (s390_is_pv()) { -+ s390_cpu_pv_mem_read(cpu, addr, &orig_orb, sizeof(orb)); -+ } else if (s390_cpu_virt_mem_read(cpu, addr, ar, &orig_orb, sizeof(orb))) { - s390_cpu_virt_mem_handle_exc(cpu, ra); - return; - } -@@ -231,14 +235,19 @@ void ioinst_handle_stcrw(S390CPU *cpu, uint32_t ipb, uintptr_t ra) - cc = css_do_stcrw(&crw); - /* 0 - crw stored, 1 - zeroes stored */ - -- if (s390_cpu_virt_mem_write(cpu, addr, ar, &crw, sizeof(crw)) == 0) { -+ if (s390_is_pv()) { -+ s390_cpu_pv_mem_write(cpu, addr, &crw, sizeof(crw)); - setcc(cpu, cc); - } else { -- if (cc == 0) { -- /* Write failed: requeue CRW since STCRW is suppressing */ -- css_undo_stcrw(&crw); -+ if (s390_cpu_virt_mem_write(cpu, addr, ar, &crw, sizeof(crw)) == 0) { -+ setcc(cpu, cc); -+ } else { -+ if (cc == 0) { -+ /* Write failed: requeue CRW since STCRW is suppressing */ -+ css_undo_stcrw(&crw); -+ } -+ s390_cpu_virt_mem_handle_exc(cpu, ra); - } -- s390_cpu_virt_mem_handle_exc(cpu, ra); - } - } - -@@ -260,6 +269,13 @@ void ioinst_handle_stsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, - } - - if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) { -+ /* -+ * The Ultravisor checks schid bit 16 to be one and bits 0-12 -+ * to be 0 and injects a operand exception itself. -+ * -+ * Hence we should never end up here. -+ */ -+ g_assert(!s390_is_pv()); - /* - * As operand exceptions have a lower priority than access exceptions, - * we check whether the memory area is writeable (injecting the -@@ -292,14 +308,17 @@ void ioinst_handle_stsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, - } - } - if (cc != 3) { -- if (s390_cpu_virt_mem_write(cpu, addr, ar, &schib, -- sizeof(schib)) != 0) { -+ if (s390_is_pv()) { -+ s390_cpu_pv_mem_write(cpu, addr, &schib, sizeof(schib)); -+ } else if (s390_cpu_virt_mem_write(cpu, addr, ar, &schib, -+ sizeof(schib)) != 0) { - s390_cpu_virt_mem_handle_exc(cpu, ra); - return; - } - } else { - /* Access exceptions have a higher priority than cc3 */ -- if (s390_cpu_virt_mem_check_write(cpu, addr, ar, sizeof(schib)) != 0) { -+ if (!s390_is_pv() && -+ s390_cpu_virt_mem_check_write(cpu, addr, ar, sizeof(schib)) != 0) { - s390_cpu_virt_mem_handle_exc(cpu, ra); - return; - } -@@ -336,7 +355,9 @@ int ioinst_handle_tsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, uintptr_t ra) - } - /* 0 - status pending, 1 - not status pending, 3 - not operational */ - if (cc != 3) { -- if (s390_cpu_virt_mem_write(cpu, addr, ar, &irb, irb_len) != 0) { -+ if (s390_is_pv()) { -+ s390_cpu_pv_mem_write(cpu, addr, &irb, irb_len); -+ } else if (s390_cpu_virt_mem_write(cpu, addr, ar, &irb, irb_len) != 0) { - s390_cpu_virt_mem_handle_exc(cpu, ra); - return -EFAULT; - } -@@ -344,7 +365,8 @@ int ioinst_handle_tsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, uintptr_t ra) - } else { - irb_len = sizeof(irb) - sizeof(irb.emw); - /* Access exceptions have a higher priority than cc3 */ -- if (s390_cpu_virt_mem_check_write(cpu, addr, ar, irb_len) != 0) { -+ if (!s390_is_pv() && -+ s390_cpu_virt_mem_check_write(cpu, addr, ar, irb_len) != 0) { - s390_cpu_virt_mem_handle_exc(cpu, ra); - return -EFAULT; - } -@@ -642,7 +664,9 @@ void ioinst_handle_chsc(S390CPU *cpu, uint32_t ipb, uintptr_t ra) - * present CHSC sub-handlers ... if we ever need more, we should take - * care of req->len here first. - */ -- if (s390_cpu_virt_mem_read(cpu, addr, reg, buf, sizeof(ChscReq))) { -+ if (s390_is_pv()) { -+ s390_cpu_pv_mem_read(cpu, addr, buf, sizeof(ChscReq)); -+ } else if (s390_cpu_virt_mem_read(cpu, addr, reg, buf, sizeof(ChscReq))) { - s390_cpu_virt_mem_handle_exc(cpu, ra); - return; - } -@@ -675,11 +699,16 @@ void ioinst_handle_chsc(S390CPU *cpu, uint32_t ipb, uintptr_t ra) - break; - } - -- if (!s390_cpu_virt_mem_write(cpu, addr + len, reg, res, -- be16_to_cpu(res->len))) { -+ if (s390_is_pv()) { -+ s390_cpu_pv_mem_write(cpu, addr + len, res, be16_to_cpu(res->len)); - setcc(cpu, 0); /* Command execution complete */ - } else { -- s390_cpu_virt_mem_handle_exc(cpu, ra); -+ if (!s390_cpu_virt_mem_write(cpu, addr + len, reg, res, -+ be16_to_cpu(res->len))) { -+ setcc(cpu, 0); /* Command execution complete */ -+ } else { -+ s390_cpu_virt_mem_handle_exc(cpu, ra); -+ } - } - } - --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-protvirt-Move-STSI-data-over-SIDAD.patch b/SOURCES/kvm-s390x-protvirt-Move-STSI-data-over-SIDAD.patch deleted file mode 100644 index 1d60070..0000000 --- a/SOURCES/kvm-s390x-protvirt-Move-STSI-data-over-SIDAD.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 27f5d8a3af2863e39b7c46a3128009988d772f15 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:54:09 -0400 -Subject: [PATCH 27/42] s390x: protvirt: Move STSI data over SIDAD - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-28-thuth@redhat.com> -Patchwork-id: 97046 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 27/38] s390x: protvirt: Move STSI data over SIDAD -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -For protected guests, we need to put the STSI emulation results into -the SIDA, so SIE will write them into the guest at the next entry. - -Signed-off-by: Janosch Frank -Reviewed-by: David Hildenbrand -Reviewed-by: Claudio Imbrenda -Reviewed-by: Cornelia Huck -Message-Id: <20200319131921.2367-9-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 7c713b8acb70fb61f9650f8a7702dec546752bb6) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/kvm.c | 11 +++++++++-- - 1 file changed, 9 insertions(+), 2 deletions(-) - -diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index f67bb5ce2c..6809a5ac40 100644 ---- a/target/s390x/kvm.c -+++ b/target/s390x/kvm.c -@@ -50,6 +50,7 @@ - #include "exec/memattrs.h" - #include "hw/s390x/s390-virtio-ccw.h" - #include "hw/s390x/s390-virtio-hcall.h" -+#include "hw/s390x/pv.h" - - #ifndef DEBUG_KVM - #define DEBUG_KVM 0 -@@ -1803,7 +1804,9 @@ static void insert_stsi_3_2_2(S390CPU *cpu, __u64 addr, uint8_t ar) - SysIB_322 sysib; - int del; - -- if (s390_cpu_virt_mem_read(cpu, addr, ar, &sysib, sizeof(sysib))) { -+ if (s390_is_pv()) { -+ s390_cpu_pv_mem_read(cpu, 0, &sysib, sizeof(sysib)); -+ } else if (s390_cpu_virt_mem_read(cpu, addr, ar, &sysib, sizeof(sysib))) { - return; - } - /* Shift the stack of Extended Names to prepare for our own data */ -@@ -1843,7 +1846,11 @@ static void insert_stsi_3_2_2(S390CPU *cpu, __u64 addr, uint8_t ar) - /* Insert UUID */ - memcpy(sysib.vm[0].uuid, &qemu_uuid, sizeof(sysib.vm[0].uuid)); - -- s390_cpu_virt_mem_write(cpu, addr, ar, &sysib, sizeof(sysib)); -+ if (s390_is_pv()) { -+ s390_cpu_pv_mem_write(cpu, 0, &sysib, sizeof(sysib)); -+ } else { -+ s390_cpu_virt_mem_write(cpu, addr, ar, &sysib, sizeof(sysib)); -+ } - } - - static int handle_stsi(S390CPU *cpu) --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-protvirt-Move-diag-308-data-over-SIDA.patch b/SOURCES/kvm-s390x-protvirt-Move-diag-308-data-over-SIDA.patch deleted file mode 100644 index 1b22719..0000000 --- a/SOURCES/kvm-s390x-protvirt-Move-diag-308-data-over-SIDA.patch +++ /dev/null @@ -1,93 +0,0 @@ -From 33d4e21cfd236aecd9e4dbe8228d058fd1f22400 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:54:12 -0400 -Subject: [PATCH 30/42] s390x: protvirt: Move diag 308 data over SIDA - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-31-thuth@redhat.com> -Patchwork-id: 97048 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 30/38] s390x: protvirt: Move diag 308 data over SIDA -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -For protected guests the IPIB is written/read to/from the SIDA, so we -need those accesses to go through s390_cpu_pv_mem_read/write(). - -Signed-off-by: Janosch Frank -Reviewed-by: David Hildenbrand -Reviewed-by: Christian Borntraeger -Reviewed-by: Claudio Imbrenda -Reviewed-by: Cornelia Huck -Message-Id: <20200319131921.2367-12-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 9c61e11238cfa8f70e3eb90aac5d3e5646e5432f) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/diag.c | 25 ++++++++++++++++++++----- - 1 file changed, 20 insertions(+), 5 deletions(-) - -diff --git a/target/s390x/diag.c b/target/s390x/diag.c -index b2cbefb8cf..1a48429564 100644 ---- a/target/s390x/diag.c -+++ b/target/s390x/diag.c -@@ -75,6 +75,7 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) - { - bool valid; - CPUState *cs = env_cpu(env); -+ S390CPU *cpu = S390_CPU(cs); - uint64_t addr = env->regs[r1]; - uint64_t subcode = env->regs[r3]; - IplParameterBlock *iplb; -@@ -111,13 +112,22 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) - return; - } - iplb = g_new0(IplParameterBlock, 1); -- cpu_physical_memory_read(addr, iplb, sizeof(iplb->len)); -+ if (!s390_is_pv()) { -+ cpu_physical_memory_read(addr, iplb, sizeof(iplb->len)); -+ } else { -+ s390_cpu_pv_mem_read(cpu, 0, iplb, sizeof(iplb->len)); -+ } -+ - if (!iplb_valid_len(iplb)) { - env->regs[r1 + 1] = DIAG_308_RC_INVALID; - goto out; - } - -- cpu_physical_memory_read(addr, iplb, be32_to_cpu(iplb->len)); -+ if (!s390_is_pv()) { -+ cpu_physical_memory_read(addr, iplb, be32_to_cpu(iplb->len)); -+ } else { -+ s390_cpu_pv_mem_read(cpu, 0, iplb, be32_to_cpu(iplb->len)); -+ } - - valid = subcode == DIAG308_PV_SET ? iplb_valid_pv(iplb) : iplb_valid(iplb); - if (!valid) { -@@ -140,12 +150,17 @@ out: - } else { - iplb = s390_ipl_get_iplb(); - } -- if (iplb) { -+ if (!iplb) { -+ env->regs[r1 + 1] = DIAG_308_RC_NO_CONF; -+ return; -+ } -+ -+ if (!s390_is_pv()) { - cpu_physical_memory_write(addr, iplb, be32_to_cpu(iplb->len)); -- env->regs[r1 + 1] = DIAG_308_RC_OK; - } else { -- env->regs[r1 + 1] = DIAG_308_RC_NO_CONF; -+ s390_cpu_pv_mem_write(cpu, 0, iplb, be32_to_cpu(iplb->len)); - } -+ env->regs[r1 + 1] = DIAG_308_RC_OK; - return; - case DIAG308_PV_START: - iplb = s390_ipl_get_iplb_pv(); --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-protvirt-SCLP-interpretation.patch b/SOURCES/kvm-s390x-protvirt-SCLP-interpretation.patch deleted file mode 100644 index 10f1930..0000000 --- a/SOURCES/kvm-s390x-protvirt-SCLP-interpretation.patch +++ /dev/null @@ -1,172 +0,0 @@ -From 5a8b40c3fdafeb49072f8643210bea00ce1478c4 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:54:10 -0400 -Subject: [PATCH 28/42] s390x: protvirt: SCLP interpretation - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-29-thuth@redhat.com> -Patchwork-id: 97053 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 28/38] s390x: protvirt: SCLP interpretation -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -SCLP for a protected guest is done over the SIDAD, so we need to use -the s390_cpu_pv_mem_* functions to access the SIDAD instead of guest -memory when reading/writing SCBs. - -To not confuse the sclp emulation, we set 0x4000 as the SCCB address, -since the function that injects the sclp external interrupt would -reject a zero sccb address. - -Signed-off-by: Janosch Frank -Reviewed-by: David Hildenbrand -Reviewed-by: Claudio Imbrenda -Reviewed-by: Cornelia Huck -Reviewed-by: Christian Borntraeger -Message-Id: <20200319131921.2367-10-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 0f73c5b30b8ba6c0828608be496d2f59a5427539) -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/sclp.c | 56 +++++++++++++++++++++++++++++++++-------- - include/hw/s390x/sclp.h | 2 ++ - target/s390x/kvm.c | 25 ++++++++++++++---- - 3 files changed, 67 insertions(+), 16 deletions(-) - -diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c -index f57ce7b739..1c380a49cc 100644 ---- a/hw/s390x/sclp.c -+++ b/hw/s390x/sclp.c -@@ -33,6 +33,22 @@ static inline SCLPDevice *get_sclp_device(void) - return sclp; - } - -+static inline bool sclp_command_code_valid(uint32_t code) -+{ -+ switch (code & SCLP_CMD_CODE_MASK) { -+ case SCLP_CMDW_READ_SCP_INFO: -+ case SCLP_CMDW_READ_SCP_INFO_FORCED: -+ case SCLP_CMDW_READ_CPU_INFO: -+ case SCLP_CMDW_CONFIGURE_IOA: -+ case SCLP_CMDW_DECONFIGURE_IOA: -+ case SCLP_CMD_READ_EVENT_DATA: -+ case SCLP_CMD_WRITE_EVENT_DATA: -+ case SCLP_CMD_WRITE_EVENT_MASK: -+ return true; -+ } -+ return false; -+} -+ - static void prepare_cpu_entries(SCLPDevice *sclp, CPUEntry *entry, int *count) - { - MachineState *ms = MACHINE(qdev_get_machine()); -@@ -193,6 +209,34 @@ static void sclp_execute(SCLPDevice *sclp, SCCB *sccb, uint32_t code) - } - } - -+/* -+ * We only need the address to have something valid for the -+ * service_interrupt call. -+ */ -+#define SCLP_PV_DUMMY_ADDR 0x4000 -+int sclp_service_call_protected(CPUS390XState *env, uint64_t sccb, -+ uint32_t code) -+{ -+ SCLPDevice *sclp = get_sclp_device(); -+ SCLPDeviceClass *sclp_c = SCLP_GET_CLASS(sclp); -+ SCCB work_sccb; -+ hwaddr sccb_len = sizeof(SCCB); -+ -+ s390_cpu_pv_mem_read(env_archcpu(env), 0, &work_sccb, sccb_len); -+ -+ if (!sclp_command_code_valid(code)) { -+ work_sccb.h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND); -+ goto out_write; -+ } -+ -+ sclp_c->execute(sclp, &work_sccb, code); -+out_write: -+ s390_cpu_pv_mem_write(env_archcpu(env), 0, &work_sccb, -+ be16_to_cpu(work_sccb.h.length)); -+ sclp_c->service_interrupt(sclp, SCLP_PV_DUMMY_ADDR); -+ return 0; -+} -+ - int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code) - { - SCLPDevice *sclp = get_sclp_device(); -@@ -230,17 +274,7 @@ int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code) - goto out; - } - -- switch (code & SCLP_CMD_CODE_MASK) { -- case SCLP_CMDW_READ_SCP_INFO: -- case SCLP_CMDW_READ_SCP_INFO_FORCED: -- case SCLP_CMDW_READ_CPU_INFO: -- case SCLP_CMDW_CONFIGURE_IOA: -- case SCLP_CMDW_DECONFIGURE_IOA: -- case SCLP_CMD_READ_EVENT_DATA: -- case SCLP_CMD_WRITE_EVENT_DATA: -- case SCLP_CMD_WRITE_EVENT_MASK: -- break; -- default: -+ if (!sclp_command_code_valid(code)) { - work_sccb.h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND); - goto out_write; - } -diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h -index c54413b78c..c0a3faa37d 100644 ---- a/include/hw/s390x/sclp.h -+++ b/include/hw/s390x/sclp.h -@@ -217,5 +217,7 @@ void s390_sclp_init(void); - void sclp_service_interrupt(uint32_t sccb); - void raise_irq_cpu_hotplug(void); - int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code); -+int sclp_service_call_protected(CPUS390XState *env, uint64_t sccb, -+ uint32_t code); - - #endif -diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 6809a5ac40..56fe60c49c 100644 ---- a/target/s390x/kvm.c -+++ b/target/s390x/kvm.c -@@ -1230,12 +1230,27 @@ static void kvm_sclp_service_call(S390CPU *cpu, struct kvm_run *run, - sccb = env->regs[ipbh0 & 0xf]; - code = env->regs[(ipbh0 & 0xf0) >> 4]; - -- r = sclp_service_call(env, sccb, code); -- if (r < 0) { -- kvm_s390_program_interrupt(cpu, -r); -- return; -+ switch (run->s390_sieic.icptcode) { -+ case ICPT_PV_INSTR_NOTIFICATION: -+ g_assert(s390_is_pv()); -+ /* The notification intercepts are currently handled by KVM */ -+ error_report("unexpected SCLP PV notification"); -+ exit(1); -+ break; -+ case ICPT_PV_INSTR: -+ g_assert(s390_is_pv()); -+ sclp_service_call_protected(env, sccb, code); -+ /* Setting the CC is done by the Ultravisor. */ -+ break; -+ case ICPT_INSTRUCTION: -+ g_assert(!s390_is_pv()); -+ r = sclp_service_call(env, sccb, code); -+ if (r < 0) { -+ kvm_s390_program_interrupt(cpu, -r); -+ return; -+ } -+ setcc(cpu, r); - } -- setcc(cpu, r); - } - - static int handle_b2(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-protvirt-Set-guest-IPL-PSW.patch b/SOURCES/kvm-s390x-protvirt-Set-guest-IPL-PSW.patch deleted file mode 100644 index ef246c7..0000000 --- a/SOURCES/kvm-s390x-protvirt-Set-guest-IPL-PSW.patch +++ /dev/null @@ -1,75 +0,0 @@ -From d738b4336c79be68b6040f73427e089f46957728 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:54:11 -0400 -Subject: [PATCH 29/42] s390x: protvirt: Set guest IPL PSW - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-30-thuth@redhat.com> -Patchwork-id: 97049 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 29/38] s390x: protvirt: Set guest IPL PSW -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -Handling of CPU reset and setting of the IPL psw from guest storage at -offset 0 is done by a Ultravisor call. Let's only fetch it if -necessary. - -Signed-off-by: Janosch Frank -Reviewed-by: Thomas Huth -Reviewed-by: David Hildenbrand -Reviewed-by: Christian Borntraeger -Reviewed-by: Claudio Imbrenda -Reviewed-by: Cornelia Huck -Message-Id: <20200319131921.2367-11-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 59181010a2ff82c3a97e9b5768ee87c38e4815f1) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/cpu.c | 26 +++++++++++++++++--------- - 1 file changed, 17 insertions(+), 9 deletions(-) - -diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c -index 8f38cd8e6f..371b91b2d7 100644 ---- a/target/s390x/cpu.c -+++ b/target/s390x/cpu.c -@@ -77,16 +77,24 @@ static bool s390_cpu_has_work(CPUState *cs) - static void s390_cpu_load_normal(CPUState *s) - { - S390CPU *cpu = S390_CPU(s); -- uint64_t spsw = ldq_phys(s->as, 0); -- -- cpu->env.psw.mask = spsw & PSW_MASK_SHORT_CTRL; -- /* -- * Invert short psw indication, so SIE will report a specification -- * exception if it was not set. -- */ -- cpu->env.psw.mask ^= PSW_MASK_SHORTPSW; -- cpu->env.psw.addr = spsw & PSW_MASK_SHORT_ADDR; -+ uint64_t spsw; - -+ if (!s390_is_pv()) { -+ spsw = ldq_phys(s->as, 0); -+ cpu->env.psw.mask = spsw & PSW_MASK_SHORT_CTRL; -+ /* -+ * Invert short psw indication, so SIE will report a specification -+ * exception if it was not set. -+ */ -+ cpu->env.psw.mask ^= PSW_MASK_SHORTPSW; -+ cpu->env.psw.addr = spsw & PSW_MASK_SHORT_ADDR; -+ } else { -+ /* -+ * Firmware requires us to set the load state before we set -+ * the cpu to operating on protected guests. -+ */ -+ s390_cpu_set_state(S390_CPU_STATE_LOAD, cpu); -+ } - s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu); - } - #endif --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-protvirt-Support-unpack-facility.patch b/SOURCES/kvm-s390x-protvirt-Support-unpack-facility.patch deleted file mode 100644 index 204de2a..0000000 --- a/SOURCES/kvm-s390x-protvirt-Support-unpack-facility.patch +++ /dev/null @@ -1,886 +0,0 @@ -From e6474080e3816e82e87c545a3d22db77c55ab053 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:54:04 -0400 -Subject: [PATCH 22/42] s390x: protvirt: Support unpack facility - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-23-thuth@redhat.com> -Patchwork-id: 97045 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 22/38] s390x: protvirt: Support unpack facility -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -The unpack facility provides the means to setup a protected guest. A -protected guest cannot be introspected by the hypervisor or any -user/administrator of the machine it is running on. - -Protected guests are encrypted at rest and need a special boot -mechanism via diag308 subcode 8 and 10. - -Code 8 sets the PV specific IPLB which is retained separately from -those set via code 5. - -Code 10 is used to unpack the VM into protected memory, verify its -integrity and start it. - -Signed-off-by: Janosch Frank -Co-developed-by: Christian Borntraeger [Changes -to machine] -Reviewed-by: David Hildenbrand -Reviewed-by: Claudio Imbrenda -Reviewed-by: Cornelia Huck -Message-Id: <20200323083606.24520-1-frankja@linux.ibm.com> -[CH: fixed up KVM_PV_VM_ -> KVM_PV_] -Signed-off-by: Cornelia Huck -(cherry picked from commit c3347ed0d2ee42a7dcf7bfe7f9c3884a9596727a) -Signed-off-by: Danilo C. L. de Paula ---- - MAINTAINERS | 2 + - hw/s390x/Makefile.objs | 1 + - hw/s390x/ipl.c | 59 +++++++++++++- - hw/s390x/ipl.h | 91 ++++++++++++++++++++- - hw/s390x/pv.c | 98 +++++++++++++++++++++++ - hw/s390x/s390-virtio-ccw.c | 119 +++++++++++++++++++++++++++- - include/hw/s390x/pv.h | 55 +++++++++++++ - include/hw/s390x/s390-virtio-ccw.h | 1 + - target/s390x/cpu.c | 1 + - target/s390x/cpu_features_def.inc.h | 1 + - target/s390x/diag.c | 39 ++++++++- - target/s390x/kvm-stub.c | 5 ++ - target/s390x/kvm.c | 5 ++ - target/s390x/kvm_s390x.h | 1 + - 14 files changed, 468 insertions(+), 10 deletions(-) - create mode 100644 hw/s390x/pv.c - create mode 100644 include/hw/s390x/pv.h - -diff --git a/MAINTAINERS b/MAINTAINERS -index 49d5d44edc..2742c95575 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -385,6 +385,8 @@ F: target/s390x/machine.c - F: target/s390x/sigp.c - F: target/s390x/cpu_features*.[ch] - F: target/s390x/cpu_models.[ch] -+F: hw/s390x/pv.c -+F: include/hw/s390x/pv.h - F: hw/intc/s390_flic.c - F: hw/intc/s390_flic_kvm.c - F: include/hw/s390x/s390_flic.h -diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs -index e02ed80b68..a46a1c7894 100644 ---- a/hw/s390x/Makefile.objs -+++ b/hw/s390x/Makefile.objs -@@ -31,6 +31,7 @@ obj-y += tod-qemu.o - obj-$(CONFIG_KVM) += tod-kvm.o - obj-$(CONFIG_KVM) += s390-skeys-kvm.o - obj-$(CONFIG_KVM) += s390-stattrib-kvm.o -+obj-$(CONFIG_KVM) += pv.o - obj-y += s390-ccw.o - obj-y += ap-device.o - obj-y += ap-bridge.o -diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c -index fa0409dc23..586d95b5b6 100644 ---- a/hw/s390x/ipl.c -+++ b/hw/s390x/ipl.c -@@ -1,10 +1,11 @@ - /* - * bootloader support - * -- * Copyright IBM, Corp. 2012 -+ * Copyright IBM, Corp. 2012, 2020 - * - * Authors: - * Christian Borntraeger -+ * Janosch Frank - * - * This work is licensed under the terms of the GNU GPL, version 2 or (at your - * option) any later version. See the COPYING file in the top-level directory. -@@ -27,6 +28,7 @@ - #include "hw/s390x/vfio-ccw.h" - #include "hw/s390x/css.h" - #include "hw/s390x/ebcdic.h" -+#include "hw/s390x/pv.h" - #include "ipl.h" - #include "qemu/error-report.h" - #include "qemu/config-file.h" -@@ -557,12 +559,31 @@ void s390_ipl_update_diag308(IplParameterBlock *iplb) - { - S390IPLState *ipl = get_ipl_device(); - -- ipl->iplb = *iplb; -- ipl->iplb_valid = true; -+ /* -+ * The IPLB set and retrieved by subcodes 8/9 is completely -+ * separate from the one managed via subcodes 5/6. -+ */ -+ if (iplb->pbt == S390_IPL_TYPE_PV) { -+ ipl->iplb_pv = *iplb; -+ ipl->iplb_valid_pv = true; -+ } else { -+ ipl->iplb = *iplb; -+ ipl->iplb_valid = true; -+ } - ipl->netboot = is_virtio_net_device(iplb); - update_machine_ipl_properties(iplb); - } - -+IplParameterBlock *s390_ipl_get_iplb_pv(void) -+{ -+ S390IPLState *ipl = get_ipl_device(); -+ -+ if (!ipl->iplb_valid_pv) { -+ return NULL; -+ } -+ return &ipl->iplb_pv; -+} -+ - IplParameterBlock *s390_ipl_get_iplb(void) - { - S390IPLState *ipl = get_ipl_device(); -@@ -651,6 +672,38 @@ static void s390_ipl_prepare_qipl(S390CPU *cpu) - cpu_physical_memory_unmap(addr, len, 1, len); - } - -+int s390_ipl_prepare_pv_header(void) -+{ -+ IplParameterBlock *ipib = s390_ipl_get_iplb_pv(); -+ IPLBlockPV *ipib_pv = &ipib->pv; -+ void *hdr = g_malloc(ipib_pv->pv_header_len); -+ int rc; -+ -+ cpu_physical_memory_read(ipib_pv->pv_header_addr, hdr, -+ ipib_pv->pv_header_len); -+ rc = s390_pv_set_sec_parms((uintptr_t)hdr, -+ ipib_pv->pv_header_len); -+ g_free(hdr); -+ return rc; -+} -+ -+int s390_ipl_pv_unpack(void) -+{ -+ IplParameterBlock *ipib = s390_ipl_get_iplb_pv(); -+ IPLBlockPV *ipib_pv = &ipib->pv; -+ int i, rc = 0; -+ -+ for (i = 0; i < ipib_pv->num_comp; i++) { -+ rc = s390_pv_unpack(ipib_pv->components[i].addr, -+ TARGET_PAGE_ALIGN(ipib_pv->components[i].size), -+ ipib_pv->components[i].tweak_pref); -+ if (rc) { -+ break; -+ } -+ } -+ return rc; -+} -+ - void s390_ipl_prepare_cpu(S390CPU *cpu) - { - S390IPLState *ipl = get_ipl_device(); -diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h -index a5665e6bfd..89b3044d7a 100644 ---- a/hw/s390x/ipl.h -+++ b/hw/s390x/ipl.h -@@ -1,8 +1,9 @@ - /* - * s390 IPL device - * -- * Copyright 2015 IBM Corp. -+ * Copyright 2015, 2020 IBM Corp. - * Author(s): Zhang Fan -+ * Janosch Frank - * - * This work is licensed under the terms of the GNU GPL, version 2 or (at - * your option) any later version. See the COPYING file in the top-level -@@ -15,6 +16,24 @@ - #include "cpu.h" - #include "hw/qdev-core.h" - -+struct IPLBlockPVComp { -+ uint64_t tweak_pref; -+ uint64_t addr; -+ uint64_t size; -+} QEMU_PACKED; -+typedef struct IPLBlockPVComp IPLBlockPVComp; -+ -+struct IPLBlockPV { -+ uint8_t reserved18[87]; /* 0x18 */ -+ uint8_t version; /* 0x6f */ -+ uint32_t reserved70; /* 0x70 */ -+ uint32_t num_comp; /* 0x74 */ -+ uint64_t pv_header_addr; /* 0x78 */ -+ uint64_t pv_header_len; /* 0x80 */ -+ struct IPLBlockPVComp components[]; -+} QEMU_PACKED; -+typedef struct IPLBlockPV IPLBlockPV; -+ - struct IplBlockCcw { - uint8_t reserved0[85]; - uint8_t ssid; -@@ -71,6 +90,7 @@ union IplParameterBlock { - union { - IplBlockCcw ccw; - IplBlockFcp fcp; -+ IPLBlockPV pv; - IplBlockQemuScsi scsi; - }; - } QEMU_PACKED; -@@ -85,8 +105,11 @@ typedef union IplParameterBlock IplParameterBlock; - - int s390_ipl_set_loadparm(uint8_t *loadparm); - void s390_ipl_update_diag308(IplParameterBlock *iplb); -+int s390_ipl_prepare_pv_header(void); -+int s390_ipl_pv_unpack(void); - void s390_ipl_prepare_cpu(S390CPU *cpu); - IplParameterBlock *s390_ipl_get_iplb(void); -+IplParameterBlock *s390_ipl_get_iplb_pv(void); - - enum s390_reset { - /* default is a reset not triggered by a CPU e.g. issued by QMP */ -@@ -94,6 +117,7 @@ enum s390_reset { - S390_RESET_REIPL, - S390_RESET_MODIFIED_CLEAR, - S390_RESET_LOAD_NORMAL, -+ S390_RESET_PV, - }; - void s390_ipl_reset_request(CPUState *cs, enum s390_reset reset_type); - void s390_ipl_get_reset_request(CPUState **cs, enum s390_reset *reset_type); -@@ -133,6 +157,7 @@ struct S390IPLState { - /*< private >*/ - DeviceState parent_obj; - IplParameterBlock iplb; -+ IplParameterBlock iplb_pv; - QemuIplParameters qipl; - uint64_t start_addr; - uint64_t compat_start_addr; -@@ -140,6 +165,7 @@ struct S390IPLState { - uint64_t compat_bios_start_addr; - bool enforce_bios; - bool iplb_valid; -+ bool iplb_valid_pv; - bool netboot; - /* reset related properties don't have to be migrated or reset */ - enum s390_reset reset_type; -@@ -162,6 +188,8 @@ QEMU_BUILD_BUG_MSG(offsetof(S390IPLState, iplb) & 3, "alignment of iplb wrong"); - #define DIAG_308_RC_OK 0x0001 - #define DIAG_308_RC_NO_CONF 0x0102 - #define DIAG_308_RC_INVALID 0x0402 -+#define DIAG_308_RC_NO_PV_CONF 0x0902 -+#define DIAG_308_RC_INVAL_FOR_PV 0x0a02 - - #define DIAG308_RESET_MOD_CLR 0 - #define DIAG308_RESET_LOAD_NORM 1 -@@ -169,12 +197,17 @@ QEMU_BUILD_BUG_MSG(offsetof(S390IPLState, iplb) & 3, "alignment of iplb wrong"); - #define DIAG308_LOAD_NORMAL_DUMP 4 - #define DIAG308_SET 5 - #define DIAG308_STORE 6 -+#define DIAG308_PV_SET 8 -+#define DIAG308_PV_STORE 9 -+#define DIAG308_PV_START 10 - - #define S390_IPL_TYPE_FCP 0x00 - #define S390_IPL_TYPE_CCW 0x02 -+#define S390_IPL_TYPE_PV 0x05 - #define S390_IPL_TYPE_QEMU_SCSI 0xff - - #define S390_IPLB_HEADER_LEN 8 -+#define S390_IPLB_MIN_PV_LEN 148 - #define S390_IPLB_MIN_CCW_LEN 200 - #define S390_IPLB_MIN_FCP_LEN 384 - #define S390_IPLB_MIN_QEMU_SCSI_LEN 200 -@@ -184,6 +217,62 @@ static inline bool iplb_valid_len(IplParameterBlock *iplb) - return be32_to_cpu(iplb->len) <= sizeof(IplParameterBlock); - } - -+static inline bool ipl_valid_pv_components(IplParameterBlock *iplb) -+{ -+ IPLBlockPV *ipib_pv = &iplb->pv; -+ int i; -+ -+ if (ipib_pv->num_comp == 0) { -+ return false; -+ } -+ -+ for (i = 0; i < ipib_pv->num_comp; i++) { -+ /* Addr must be 4k aligned */ -+ if (ipib_pv->components[i].addr & ~TARGET_PAGE_MASK) { -+ return false; -+ } -+ -+ /* Tweak prefix is monotonically increasing with each component */ -+ if (i < ipib_pv->num_comp - 1 && -+ ipib_pv->components[i].tweak_pref >= -+ ipib_pv->components[i + 1].tweak_pref) { -+ return false; -+ } -+ } -+ return true; -+} -+ -+static inline bool ipl_valid_pv_header(IplParameterBlock *iplb) -+{ -+ IPLBlockPV *ipib_pv = &iplb->pv; -+ -+ if (ipib_pv->pv_header_len > 2 * TARGET_PAGE_SIZE) { -+ return false; -+ } -+ -+ if (!address_space_access_valid(&address_space_memory, -+ ipib_pv->pv_header_addr, -+ ipib_pv->pv_header_len, -+ false, -+ MEMTXATTRS_UNSPECIFIED)) { -+ return false; -+ } -+ -+ return true; -+} -+ -+static inline bool iplb_valid_pv(IplParameterBlock *iplb) -+{ -+ if (iplb->pbt != S390_IPL_TYPE_PV || -+ be32_to_cpu(iplb->len) < S390_IPLB_MIN_PV_LEN) { -+ return false; -+ } -+ if (!ipl_valid_pv_header(iplb)) { -+ return false; -+ } -+ return ipl_valid_pv_components(iplb); -+} -+ - static inline bool iplb_valid(IplParameterBlock *iplb) - { - switch (iplb->pbt) { -diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c -new file mode 100644 -index 0000000000..a40a844806 ---- /dev/null -+++ b/hw/s390x/pv.c -@@ -0,0 +1,98 @@ -+/* -+ * Protected Virtualization functions -+ * -+ * Copyright IBM Corp. 2020 -+ * Author(s): -+ * Janosch Frank -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or (at -+ * your option) any later version. See the COPYING file in the top-level -+ * directory. -+ */ -+#include "qemu/osdep.h" -+ -+#include -+ -+#include "qemu/error-report.h" -+#include "sysemu/kvm.h" -+#include "hw/s390x/pv.h" -+ -+static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) -+{ -+ struct kvm_pv_cmd pv_cmd = { -+ .cmd = cmd, -+ .data = (uint64_t)data, -+ }; -+ int rc = kvm_vm_ioctl(kvm_state, KVM_S390_PV_COMMAND, &pv_cmd); -+ -+ if (rc) { -+ error_report("KVM PV command %d (%s) failed: header rc %x rrc %x " -+ "IOCTL rc: %d", cmd, cmdname, pv_cmd.rc, pv_cmd.rrc, -+ rc); -+ } -+ return rc; -+} -+ -+/* -+ * This macro lets us pass the command as a string to the function so -+ * we can print it on an error. -+ */ -+#define s390_pv_cmd(cmd, data) __s390_pv_cmd(cmd, #cmd, data); -+#define s390_pv_cmd_exit(cmd, data) \ -+{ \ -+ int rc; \ -+ \ -+ rc = __s390_pv_cmd(cmd, #cmd, data);\ -+ if (rc) { \ -+ exit(1); \ -+ } \ -+} -+ -+int s390_pv_vm_enable(void) -+{ -+ return s390_pv_cmd(KVM_PV_ENABLE, NULL); -+} -+ -+void s390_pv_vm_disable(void) -+{ -+ s390_pv_cmd_exit(KVM_PV_DISABLE, NULL); -+} -+ -+int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) -+{ -+ struct kvm_s390_pv_sec_parm args = { -+ .origin = origin, -+ .length = length, -+ }; -+ -+ return s390_pv_cmd(KVM_PV_SET_SEC_PARMS, &args); -+} -+ -+/* -+ * Called for each component in the SE type IPL parameter block 0. -+ */ -+int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) -+{ -+ struct kvm_s390_pv_unp args = { -+ .addr = addr, -+ .size = size, -+ .tweak = tweak, -+ }; -+ -+ return s390_pv_cmd(KVM_PV_UNPACK, &args); -+} -+ -+void s390_pv_perf_clear_reset(void) -+{ -+ s390_pv_cmd_exit(KVM_PV_PREP_RESET, NULL); -+} -+ -+int s390_pv_verify(void) -+{ -+ return s390_pv_cmd(KVM_PV_VERIFY, NULL); -+} -+ -+void s390_pv_unshare(void) -+{ -+ s390_pv_cmd_exit(KVM_PV_UNSHARE_ALL, NULL); -+} -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 4ea01c53c0..82da1d9ab5 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1,9 +1,10 @@ - /* - * virtio ccw machine - * -- * Copyright 2012 IBM Corp. -+ * Copyright 2012, 2020 IBM Corp. - * Copyright (c) 2009 Alexander Graf - * Author(s): Cornelia Huck -+ * Janosch Frank - * - * This work is licensed under the terms of the GNU GPL, version 2 or (at - * your option) any later version. See the COPYING file in the top-level -@@ -41,6 +42,8 @@ - #include "hw/qdev-properties.h" - #include "hw/s390x/tod.h" - #include "sysemu/sysemu.h" -+#include "hw/s390x/pv.h" -+#include - - S390CPU *s390_cpu_addr2state(uint16_t cpu_addr) - { -@@ -318,10 +321,78 @@ static inline void s390_do_cpu_ipl(CPUState *cs, run_on_cpu_data arg) - s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu); - } - -+static void s390_machine_unprotect(S390CcwMachineState *ms) -+{ -+ s390_pv_vm_disable(); -+ ms->pv = false; -+} -+ -+static int s390_machine_protect(S390CcwMachineState *ms) -+{ -+ int rc; -+ -+ /* Create SE VM */ -+ rc = s390_pv_vm_enable(); -+ if (rc) { -+ return rc; -+ } -+ -+ ms->pv = true; -+ -+ /* Set SE header and unpack */ -+ rc = s390_ipl_prepare_pv_header(); -+ if (rc) { -+ goto out_err; -+ } -+ -+ /* Decrypt image */ -+ rc = s390_ipl_pv_unpack(); -+ if (rc) { -+ goto out_err; -+ } -+ -+ /* Verify integrity */ -+ rc = s390_pv_verify(); -+ if (rc) { -+ goto out_err; -+ } -+ return rc; -+ -+out_err: -+ s390_machine_unprotect(ms); -+ return rc; -+} -+ -+static void s390_machine_inject_pv_error(CPUState *cs) -+{ -+ int r1 = (cs->kvm_run->s390_sieic.ipa & 0x00f0) >> 4; -+ CPUS390XState *env = &S390_CPU(cs)->env; -+ -+ /* Report that we are unable to enter protected mode */ -+ env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV; -+} -+ -+static void s390_pv_prepare_reset(S390CcwMachineState *ms) -+{ -+ CPUState *cs; -+ -+ if (!s390_is_pv()) { -+ return; -+ } -+ /* Unsharing requires all cpus to be stopped */ -+ CPU_FOREACH(cs) { -+ s390_cpu_set_state(S390_CPU_STATE_STOPPED, S390_CPU(cs)); -+ } -+ s390_pv_unshare(); -+ s390_pv_perf_clear_reset(); -+} -+ - static void s390_machine_reset(MachineState *machine) - { -+ S390CcwMachineState *ms = S390_CCW_MACHINE(machine); - enum s390_reset reset_type; - CPUState *cs, *t; -+ S390CPU *cpu; - - /* get the reset parameters, reset them once done */ - s390_ipl_get_reset_request(&cs, &reset_type); -@@ -329,9 +400,15 @@ static void s390_machine_reset(MachineState *machine) - /* all CPUs are paused and synchronized at this point */ - s390_cmma_reset(); - -+ cpu = S390_CPU(cs); -+ - switch (reset_type) { - case S390_RESET_EXTERNAL: - case S390_RESET_REIPL: -+ if (s390_is_pv()) { -+ s390_machine_unprotect(ms); -+ } -+ - qemu_devices_reset(); - s390_crypto_reset(); - -@@ -339,22 +416,56 @@ static void s390_machine_reset(MachineState *machine) - run_on_cpu(cs, s390_do_cpu_ipl, RUN_ON_CPU_NULL); - break; - case S390_RESET_MODIFIED_CLEAR: -+ /* -+ * Susbsystem reset needs to be done before we unshare memory -+ * and lose access to VIRTIO structures in guest memory. -+ */ -+ subsystem_reset(); -+ s390_crypto_reset(); -+ s390_pv_prepare_reset(ms); - CPU_FOREACH(t) { - run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL); - } -- subsystem_reset(); -- s390_crypto_reset(); - run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); - break; - case S390_RESET_LOAD_NORMAL: -+ /* -+ * Susbsystem reset needs to be done before we unshare memory -+ * and lose access to VIRTIO structures in guest memory. -+ */ -+ subsystem_reset(); -+ s390_pv_prepare_reset(ms); - CPU_FOREACH(t) { - if (t == cs) { - continue; - } - run_on_cpu(t, s390_do_cpu_reset, RUN_ON_CPU_NULL); - } -- subsystem_reset(); - run_on_cpu(cs, s390_do_cpu_initial_reset, RUN_ON_CPU_NULL); -+ run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); -+ break; -+ case S390_RESET_PV: /* Subcode 10 */ -+ subsystem_reset(); -+ s390_crypto_reset(); -+ -+ CPU_FOREACH(t) { -+ if (t == cs) { -+ continue; -+ } -+ run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL); -+ } -+ run_on_cpu(cs, s390_do_cpu_reset, RUN_ON_CPU_NULL); -+ -+ if (s390_machine_protect(ms)) { -+ s390_machine_inject_pv_error(cs); -+ /* -+ * Continue after the diag308 so the guest knows something -+ * went wrong. -+ */ -+ s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu); -+ return; -+ } -+ - run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); - break; - default: -diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h -new file mode 100644 -index 0000000000..c6cb360f2f ---- /dev/null -+++ b/include/hw/s390x/pv.h -@@ -0,0 +1,55 @@ -+/* -+ * Protected Virtualization header -+ * -+ * Copyright IBM Corp. 2020 -+ * Author(s): -+ * Janosch Frank -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or (at -+ * your option) any later version. See the COPYING file in the top-level -+ * directory. -+ */ -+#ifndef HW_S390_PV_H -+#define HW_S390_PV_H -+ -+#ifdef CONFIG_KVM -+#include "hw/s390x/s390-virtio-ccw.h" -+ -+static inline bool s390_is_pv(void) -+{ -+ static S390CcwMachineState *ccw; -+ Object *obj; -+ -+ if (ccw) { -+ return ccw->pv; -+ } -+ -+ /* we have to bail out for the "none" machine */ -+ obj = object_dynamic_cast(qdev_get_machine(), -+ TYPE_S390_CCW_MACHINE); -+ if (!obj) { -+ return false; -+ } -+ ccw = S390_CCW_MACHINE(obj); -+ return ccw->pv; -+} -+ -+int s390_pv_vm_enable(void); -+void s390_pv_vm_disable(void); -+int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); -+int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak); -+void s390_pv_perf_clear_reset(void); -+int s390_pv_verify(void); -+void s390_pv_unshare(void); -+#else /* CONFIG_KVM */ -+static inline bool s390_is_pv(void) { return false; } -+static inline int s390_pv_vm_enable(void) { return 0; } -+static inline void s390_pv_vm_disable(void) {} -+static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; } -+static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0; } -+static inline void s390_pv_perf_clear_reset(void) {} -+static inline int s390_pv_verify(void) { return 0; } -+static inline void s390_pv_unshare(void) {} -+#endif /* CONFIG_KVM */ -+ -+#endif /* HW_S390_PV_H */ -diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h -index 8aa27199c9..cd1dccc6e3 100644 ---- a/include/hw/s390x/s390-virtio-ccw.h -+++ b/include/hw/s390x/s390-virtio-ccw.h -@@ -28,6 +28,7 @@ typedef struct S390CcwMachineState { - /*< public >*/ - bool aes_key_wrap; - bool dea_key_wrap; -+ bool pv; - uint8_t loadparm[8]; - } S390CcwMachineState; - -diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c -index c0dd502b84..8f38cd8e6f 100644 ---- a/target/s390x/cpu.c -+++ b/target/s390x/cpu.c -@@ -37,6 +37,7 @@ - #include "sysemu/hw_accel.h" - #include "hw/qdev-properties.h" - #ifndef CONFIG_USER_ONLY -+#include "hw/s390x/pv.h" - #include "hw/boards.h" - #include "sysemu/arch_init.h" - #include "sysemu/sysemu.h" -diff --git a/target/s390x/cpu_features_def.inc.h b/target/s390x/cpu_features_def.inc.h -index 31dff0d84e..60db28351d 100644 ---- a/target/s390x/cpu_features_def.inc.h -+++ b/target/s390x/cpu_features_def.inc.h -@@ -107,6 +107,7 @@ DEF_FEAT(DEFLATE_BASE, "deflate-base", STFL, 151, "Deflate-conversion facility ( - DEF_FEAT(VECTOR_PACKED_DECIMAL_ENH, "vxpdeh", STFL, 152, "Vector-Packed-Decimal-Enhancement Facility") - DEF_FEAT(MSA_EXT_9, "msa9-base", STFL, 155, "Message-security-assist-extension-9 facility (excluding subfunctions)") - DEF_FEAT(ETOKEN, "etoken", STFL, 156, "Etoken facility") -+DEF_FEAT(UNPACK, "unpack", STFL, 161, "Unpack facility") - - /* Features exposed via SCLP SCCB Byte 80 - 98 (bit numbers relative to byte-80) */ - DEF_FEAT(SIE_GSLS, "gsls", SCLP_CONF_CHAR, 40, "SIE: Guest-storage-limit-suppression facility") -diff --git a/target/s390x/diag.c b/target/s390x/diag.c -index 8aba6341f9..b2cbefb8cf 100644 ---- a/target/s390x/diag.c -+++ b/target/s390x/diag.c -@@ -20,6 +20,8 @@ - #include "sysemu/cpus.h" - #include "hw/s390x/ipl.h" - #include "hw/s390x/s390-virtio-ccw.h" -+#include "hw/s390x/pv.h" -+#include "kvm_s390x.h" - - int handle_diag_288(CPUS390XState *env, uint64_t r1, uint64_t r3) - { -@@ -52,6 +54,10 @@ int handle_diag_288(CPUS390XState *env, uint64_t r1, uint64_t r3) - static int diag308_parm_check(CPUS390XState *env, uint64_t r1, uint64_t addr, - uintptr_t ra, bool write) - { -+ /* Handled by the Ultravisor */ -+ if (s390_is_pv()) { -+ return 0; -+ } - if ((r1 & 1) || (addr & ~TARGET_PAGE_MASK)) { - s390_program_interrupt(env, PGM_SPECIFICATION, ra); - return -1; -@@ -67,6 +73,7 @@ static int diag308_parm_check(CPUS390XState *env, uint64_t r1, uint64_t addr, - - void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) - { -+ bool valid; - CPUState *cs = env_cpu(env); - uint64_t addr = env->regs[r1]; - uint64_t subcode = env->regs[r3]; -@@ -82,6 +89,11 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) - return; - } - -+ if (subcode >= DIAG308_PV_SET && !s390_has_feat(S390_FEAT_UNPACK)) { -+ s390_program_interrupt(env, PGM_SPECIFICATION, ra); -+ return; -+ } -+ - switch (subcode) { - case DIAG308_RESET_MOD_CLR: - s390_ipl_reset_request(cs, S390_RESET_MODIFIED_CLEAR); -@@ -94,6 +106,7 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) - s390_ipl_reset_request(cs, S390_RESET_REIPL); - break; - case DIAG308_SET: -+ case DIAG308_PV_SET: - if (diag308_parm_check(env, r1, addr, ra, false)) { - return; - } -@@ -106,7 +119,8 @@ void handle_diag_308(CPUS390XState *env, uint64_t r1, uint64_t r3, uintptr_t ra) - - cpu_physical_memory_read(addr, iplb, be32_to_cpu(iplb->len)); - -- if (!iplb_valid(iplb)) { -+ valid = subcode == DIAG308_PV_SET ? iplb_valid_pv(iplb) : iplb_valid(iplb); -+ if (!valid) { - env->regs[r1 + 1] = DIAG_308_RC_INVALID; - goto out; - } -@@ -117,10 +131,15 @@ out: - g_free(iplb); - return; - case DIAG308_STORE: -+ case DIAG308_PV_STORE: - if (diag308_parm_check(env, r1, addr, ra, true)) { - return; - } -- iplb = s390_ipl_get_iplb(); -+ if (subcode == DIAG308_PV_STORE) { -+ iplb = s390_ipl_get_iplb_pv(); -+ } else { -+ iplb = s390_ipl_get_iplb(); -+ } - if (iplb) { - cpu_physical_memory_write(addr, iplb, be32_to_cpu(iplb->len)); - env->regs[r1 + 1] = DIAG_308_RC_OK; -@@ -128,6 +147,22 @@ out: - env->regs[r1 + 1] = DIAG_308_RC_NO_CONF; - } - return; -+ case DIAG308_PV_START: -+ iplb = s390_ipl_get_iplb_pv(); -+ if (!iplb) { -+ env->regs[r1 + 1] = DIAG_308_RC_NO_PV_CONF; -+ return; -+ } -+ -+ if (kvm_s390_get_hpage_1m()) { -+ error_report("Protected VMs can currently not be backed with " -+ "huge pages"); -+ env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV; -+ return; -+ } -+ -+ s390_ipl_reset_request(cs, S390_RESET_PV); -+ break; - default: - s390_program_interrupt(env, PGM_SPECIFICATION, ra); - break; -diff --git a/target/s390x/kvm-stub.c b/target/s390x/kvm-stub.c -index c4cd497f85..aa185017a2 100644 ---- a/target/s390x/kvm-stub.c -+++ b/target/s390x/kvm-stub.c -@@ -39,6 +39,11 @@ int kvm_s390_vcpu_interrupt_post_load(S390CPU *cpu) - return 0; - } - -+int kvm_s390_get_hpage_1m(void) -+{ -+ return 0; -+} -+ - int kvm_s390_get_ri(void) - { - return 0; -diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 75d82af6fc..9a0be13959 100644 ---- a/target/s390x/kvm.c -+++ b/target/s390x/kvm.c -@@ -321,6 +321,11 @@ void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp) - cap_hpage_1m = 1; - } - -+int kvm_s390_get_hpage_1m(void) -+{ -+ return cap_hpage_1m; -+} -+ - static void ccw_machine_class_foreach(ObjectClass *oc, void *opaque) - { - MachineClass *mc = MACHINE_CLASS(oc); -diff --git a/target/s390x/kvm_s390x.h b/target/s390x/kvm_s390x.h -index 0b21789796..dea813f450 100644 ---- a/target/s390x/kvm_s390x.h -+++ b/target/s390x/kvm_s390x.h -@@ -23,6 +23,7 @@ void kvm_s390_program_interrupt(S390CPU *cpu, uint16_t code); - int kvm_s390_set_cpu_state(S390CPU *cpu, uint8_t cpu_state); - void kvm_s390_vcpu_interrupt_pre_save(S390CPU *cpu); - int kvm_s390_vcpu_interrupt_post_load(S390CPU *cpu); -+int kvm_s390_get_hpage_1m(void); - int kvm_s390_get_ri(void); - int kvm_s390_get_gs(void); - int kvm_s390_get_clock(uint8_t *tod_high, uint64_t *tod_clock); --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-protvirt-allow-to-IPL-secure-guests-with-no-re.patch b/SOURCES/kvm-s390x-protvirt-allow-to-IPL-secure-guests-with-no-re.patch deleted file mode 100644 index b12b458..0000000 --- a/SOURCES/kvm-s390x-protvirt-allow-to-IPL-secure-guests-with-no-re.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 8b994757136780998e0dd1d41613d2006c0dbcf6 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 4 Aug 2020 10:16:04 -0400 -Subject: [PATCH 4/4] s390x/protvirt: allow to IPL secure guests with - -no-reboot - -RH-Author: Thomas Huth -Message-id: <20200804101604.6259-2-thuth@redhat.com> -Patchwork-id: 98126 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 1/1] s390x/protvirt: allow to IPL secure guests with -no-reboot -Bugzilla: 1863034 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Christian Borntraeger - -Right now, -no-reboot prevents secure guests from running. This is -correct from an implementation point of view, as we have modeled the -transition from non-secure to secure as a program directed IPL. From -a user perspective, this is not the behavior of least surprise. - -We should implement the IPL into protected mode similar to the -functions that we use for kdump/kexec. In other words, we do not stop -here when -no-reboot is specified on the command line. Like function 0 -or function 1, function 10 is not a classic reboot. For example, it -can only be called once. Before calling it a second time, a real -reboot/reset must happen in-between. So function code 10 is more or -less a state transition reset, but not a "standard" reset or reboot. - -Fixes: 4d226deafc44 ("s390x: protvirt: Support unpack facility") -Signed-off-by: Christian Borntraeger -Reviewed-by: Janosch Frank -Reviewed-by: David Hildenbrand -Acked-by: Viktor Mihajlovski -Message-Id: <20200721103202.30610-1-borntraeger@de.ibm.com> -[CH: tweaked description] -Signed-off-by: Cornelia Huck -(cherry picked from commit d1bb69db4ceb6897ef6a17bf263146b53a123632) -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/ipl.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c -index 586d95b5b6..5b3ea990af 100644 ---- a/hw/s390x/ipl.c -+++ b/hw/s390x/ipl.c -@@ -624,7 +624,8 @@ void s390_ipl_reset_request(CPUState *cs, enum s390_reset reset_type) - } - } - if (reset_type == S390_RESET_MODIFIED_CLEAR || -- reset_type == S390_RESET_LOAD_NORMAL) { -+ reset_type == S390_RESET_LOAD_NORMAL || -+ reset_type == S390_RESET_PV) { - /* ignore -no-reboot, send no event */ - qemu_system_reset_request(SHUTDOWN_CAUSE_SUBSYSTEM_RESET); - } else { --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-pv-Fix-KVM_PV_PREP_RESET-command-wrapper-name.patch b/SOURCES/kvm-s390x-pv-Fix-KVM_PV_PREP_RESET-command-wrapper-name.patch deleted file mode 100644 index 764ceb1..0000000 --- a/SOURCES/kvm-s390x-pv-Fix-KVM_PV_PREP_RESET-command-wrapper-name.patch +++ /dev/null @@ -1,92 +0,0 @@ -From f3594f3d84a7442c194b1b9fd288e7414540ec0f Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:54:20 -0400 -Subject: [PATCH 38/42] s390x: pv: Fix KVM_PV_PREP_RESET command wrapper name -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-39-thuth@redhat.com> -Patchwork-id: 97051 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 38/38] s390x: pv: Fix KVM_PV_PREP_RESET command wrapper name -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -Upstream: Merged in https://github.com/cohuck/qemu/tree/s390-next - -s390_pv_perf_clear_reset() is not a very helpful name since that -function needs to be called for a normal and a clear reset via -diag308. - -Let's instead name it s390_pv_prep_reset() which reflects the purpose -of the function a bit better. - -Signed-off-by: Janosch Frank -Reviewed-by: David Hildenbrand -Message-Id: <20200505124159.24099-1-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit f9628f3f6db341751002dac3be18610fa77c01ad) -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/pv.c | 2 +- - hw/s390x/s390-virtio-ccw.c | 2 +- - include/hw/s390x/pv.h | 4 ++-- - 3 files changed, 4 insertions(+), 4 deletions(-) - -diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c -index f11868e865..ab3a2482aa 100644 ---- a/hw/s390x/pv.c -+++ b/hw/s390x/pv.c -@@ -88,7 +88,7 @@ int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) - return s390_pv_cmd(KVM_PV_UNPACK, &args); - } - --void s390_pv_perf_clear_reset(void) -+void s390_pv_prep_reset(void) - { - s390_pv_cmd_exit(KVM_PV_PREP_RESET, NULL); - } -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 07773a12b2..e6ed13b649 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -402,7 +402,7 @@ static void s390_pv_prepare_reset(S390CcwMachineState *ms) - s390_cpu_set_state(S390_CPU_STATE_STOPPED, S390_CPU(cs)); - } - s390_pv_unshare(); -- s390_pv_perf_clear_reset(); -+ s390_pv_prep_reset(); - } - - static void s390_machine_reset(MachineState *machine) -diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h -index 522ca6a04e..aee758bc2d 100644 ---- a/include/hw/s390x/pv.h -+++ b/include/hw/s390x/pv.h -@@ -39,7 +39,7 @@ int s390_pv_vm_enable(void); - void s390_pv_vm_disable(void); - int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); - int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak); --void s390_pv_perf_clear_reset(void); -+void s390_pv_prep_reset(void); - int s390_pv_verify(void); - void s390_pv_unshare(void); - void s390_pv_inject_reset_error(CPUState *cs); -@@ -49,7 +49,7 @@ static inline int s390_pv_vm_enable(void) { return 0; } - static inline void s390_pv_vm_disable(void) {} - static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; } - static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0; } --static inline void s390_pv_perf_clear_reset(void) {} -+static inline void s390_pv_prep_reset(void) {} - static inline int s390_pv_verify(void) { return 0; } - static inline void s390_pv_unshare(void) {} - static inline void s390_pv_inject_reset_error(CPUState *cs) {}; --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-pv-Fix-diag318-PV-fencing.patch b/SOURCES/kvm-s390x-pv-Fix-diag318-PV-fencing.patch deleted file mode 100644 index 4dcb862..0000000 --- a/SOURCES/kvm-s390x-pv-Fix-diag318-PV-fencing.patch +++ /dev/null @@ -1,114 +0,0 @@ -From 722078f9fdb766c2f0990145de6732f0c36a63b7 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Wed, 11 Nov 2020 12:03:16 -0500 -Subject: [PATCH 16/18] s390x: pv: Fix diag318 PV fencing - -RH-Author: Thomas Huth -Message-id: <20201111120316.707489-13-thuth@redhat.com> -Patchwork-id: 99509 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 12/12] s390x: pv: Fix diag318 PV fencing -Bugzilla: 1798506 -RH-Acked-by: Jens Freimann -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -Diag318 fencing needs to be determined on the current VM PV state and -not on the state that the VM has when we create the CPU model. - -Fixes: fabdada935 ("s390: guest support for diagnose 0x318") -Reported-by: Marc Hartmayer -Signed-off-by: Janosch Frank -Tested-by: Marc Hartmayer -Reviewed-by: Christian Borntraeger -Reviewed-by: Collin Walling -Acked-by: David Hildenbrand -Message-Id: <20201022103135.126033-3-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 3ded270a2697852a71961b45291519ae044f25e3) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/cpu_features.c | 5 +++++ - target/s390x/cpu_features.h | 4 ++++ - target/s390x/cpu_models.c | 4 ++++ - target/s390x/kvm.c | 3 +-- - 4 files changed, 14 insertions(+), 2 deletions(-) - -diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c -index 9f817e3cfa7..e5cdf232607 100644 ---- a/target/s390x/cpu_features.c -+++ b/target/s390x/cpu_features.c -@@ -14,6 +14,7 @@ - #include "qemu/osdep.h" - #include "qemu/module.h" - #include "cpu_features.h" -+#include "hw/s390x/pv.h" - - #define DEF_FEAT(_FEAT, _NAME, _TYPE, _BIT, _DESC) \ - [S390_FEAT_##_FEAT] = { \ -@@ -105,6 +106,10 @@ void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type, - } - feat = find_next_bit(features, S390_FEAT_MAX, feat + 1); - } -+ -+ if (type == S390_FEAT_TYPE_SCLP_FAC134 && s390_is_pv()) { -+ clear_be_bit(s390_feat_def(S390_FEAT_DIAG_318)->bit, data); -+ } - } - - void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type, -diff --git a/target/s390x/cpu_features.h b/target/s390x/cpu_features.h -index f74f7fc3a11..d3c685a04c8 100644 ---- a/target/s390x/cpu_features.h -+++ b/target/s390x/cpu_features.h -@@ -81,6 +81,10 @@ const S390FeatGroupDef *s390_feat_group_def(S390FeatGroup group); - - #define BE_BIT_NR(BIT) (BIT ^ (BITS_PER_LONG - 1)) - -+static inline void clear_be_bit(unsigned int bit_nr, uint8_t *array) -+{ -+ array[bit_nr / 8] &= ~(0x80 >> (bit_nr % 8)); -+} - static inline void set_be_bit(unsigned int bit_nr, uint8_t *array) - { - array[bit_nr / 8] |= 0x80 >> (bit_nr % 8); -diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index bf6a3faba9e..d489923cb8a 100644 ---- a/target/s390x/cpu_models.c -+++ b/target/s390x/cpu_models.c -@@ -29,6 +29,7 @@ - #include "hw/pci/pci.h" - #endif - #include "qapi/qapi-commands-machine-target.h" -+#include "hw/s390x/pv.h" - - #define CPUDEF_INIT(_type, _gen, _ec_ga, _mha_pow, _hmfai, _name, _desc) \ - { \ -@@ -238,6 +239,9 @@ bool s390_has_feat(S390Feat feat) - } - return 0; - } -+ if (feat == S390_FEAT_DIAG_318 && s390_is_pv()) { -+ return false; -+ } - return test_bit(feat, cpu->model->features); - } - -diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index e5e190d21c9..6edb52f6d25 100644 ---- a/target/s390x/kvm.c -+++ b/target/s390x/kvm.c -@@ -2483,8 +2483,7 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp) - */ - set_bit(S390_FEAT_EXTENDED_LENGTH_SCCB, model->features); - -- /* DIAGNOSE 0x318 is not supported under protected virtualization */ -- if (!s390_is_pv() && kvm_check_extension(kvm_state, KVM_CAP_S390_DIAG318)) { -+ if (kvm_check_extension(kvm_state, KVM_CAP_S390_DIAG318)) { - set_bit(S390_FEAT_DIAG_318, model->features); - } - --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-pv-Remove-sclp-boundary-checks.patch b/SOURCES/kvm-s390x-pv-Remove-sclp-boundary-checks.patch deleted file mode 100644 index 51ceb48..0000000 --- a/SOURCES/kvm-s390x-pv-Remove-sclp-boundary-checks.patch +++ /dev/null @@ -1,57 +0,0 @@ -From cf3d958b14e21fde929e67262b6e192592d95359 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Wed, 11 Nov 2020 12:03:15 -0500 -Subject: [PATCH 15/18] s390x: pv: Remove sclp boundary checks - -RH-Author: Thomas Huth -Message-id: <20201111120316.707489-12-thuth@redhat.com> -Patchwork-id: 99508 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 11/12] s390x: pv: Remove sclp boundary checks -Bugzilla: 1798506 -RH-Acked-by: Jens Freimann -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -The SCLP boundary cross check is done by the Ultravisor for a -protected guest, hence we don't need to do it. As QEMU doesn't get a -valid SCCB address in protected mode this is even problematic and can -lead to QEMU reporting a false boundary cross error. - -Fixes: db13387ca0 ("s390/sclp: rework sclp boundary checks") -Reported-by: Marc Hartmayer -Signed-off-by: Janosch Frank -Tested-by: Marc Hartmayer -Reviewed-by: Christian Borntraeger -Reviewed-by: Thomas Huth -Reviewed-by: Collin Walling -Acked-by: Halil Pasic -Acked-by: David Hildenbrand -Message-Id: <20201022103135.126033-2-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 3df4843d0e612a3c838e8d94c3e9c24520f2e680) -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/sclp.c | 5 ----- - 1 file changed, 5 deletions(-) - -diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c -index 2931046f456..03f847b2c8a 100644 ---- a/hw/s390x/sclp.c -+++ b/hw/s390x/sclp.c -@@ -285,11 +285,6 @@ int sclp_service_call_protected(CPUS390XState *env, uint64_t sccb, - goto out_write; - } - -- if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb->h.length), code)) { -- work_sccb->h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION); -- goto out_write; -- } -- - sclp_c->execute(sclp, work_sccb, code); - out_write: - s390_cpu_pv_mem_write(env_archcpu(env), 0, work_sccb, --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-pv-Retry-ioctls-on-EINTR.patch b/SOURCES/kvm-s390x-pv-Retry-ioctls-on-EINTR.patch deleted file mode 100644 index 65208c7..0000000 --- a/SOURCES/kvm-s390x-pv-Retry-ioctls-on-EINTR.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 1678288d945906d83d7adae109b842080aebaf19 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:54:18 -0400 -Subject: [PATCH 36/42] s390x/pv: Retry ioctls on -EINTR -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-37-thuth@redhat.com> -Patchwork-id: 97055 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 36/38] s390x/pv: Retry ioctls on -EINTR -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Christian Borntraeger - -PV_ENABLE (and maybe others) might return -EINTR when a signal is -pending. See the Linux kernel patch "s390/gmap: return proper error code -on ksm unsharing" for details. Let us retry the ioctl in that case. - -Fixes: c3347ed0d2ee ("s390x: protvirt: Support unpack facility") -Reported-by: Marc Hartmayer -Acked-by: Janosch Frank -Tested-by: Marc Hartmayer -Signed-off-by: Christian Borntraeger -Message-Id: <20200327124616.34866-1-borntraeger@de.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit e8d12a55f6d3e577455b02f15907c460578c689b) -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/pv.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c -index a40a844806..cb0dce4a4f 100644 ---- a/hw/s390x/pv.c -+++ b/hw/s390x/pv.c -@@ -23,7 +23,11 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) - .cmd = cmd, - .data = (uint64_t)data, - }; -- int rc = kvm_vm_ioctl(kvm_state, KVM_S390_PV_COMMAND, &pv_cmd); -+ int rc; -+ -+ do { -+ rc = kvm_vm_ioctl(kvm_state, KVM_S390_PV_COMMAND, &pv_cmd); -+ } while (rc == -EINTR); - - if (rc) { - error_report("KVM PV command %d (%s) failed: header rc %x rrc %x " --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-s390-virtio-ccw-Fix-build-on-systems-without-K.patch b/SOURCES/kvm-s390x-s390-virtio-ccw-Fix-build-on-systems-without-K.patch deleted file mode 100644 index e78f4da..0000000 --- a/SOURCES/kvm-s390x-s390-virtio-ccw-Fix-build-on-systems-without-K.patch +++ /dev/null @@ -1,150 +0,0 @@ -From 0db8d909a2f3c53d12b0ae12307965f9a8193dbc Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:54:19 -0400 -Subject: [PATCH 37/42] s390x/s390-virtio-ccw: Fix build on systems without KVM -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-38-thuth@redhat.com> -Patchwork-id: 97047 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 37/38] s390x/s390-virtio-ccw: Fix build on systems without KVM -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Christian Borntraeger - -linux/kvm.h is not available on all platforms. Let us move -s390_machine_inject_pv_error into pv.c as it uses KVM structures. -Also rename the function to s390_pv_inject_reset_error. - -While at it, ipl.h needs an include for "exec/address-spaces.h" -as it uses address_space_memory. - -Fixes: c3347ed0d2ee ("s390x: protvirt: Support unpack facility") -Reported-by: Bruce Rogers -Signed-off-by: Christian Borntraeger -Message-Id: <20200406100158.5940-2-borntraeger@de.ibm.com> -Reviewed-by: David Hildenbrand -Signed-off-by: Cornelia Huck -(cherry picked from commit fbc1384ccd48fa7c0c38f950adf7992a4fb6042e) -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/ipl.h | 1 + - hw/s390x/pv.c | 11 +++++++++++ - hw/s390x/s390-virtio-ccw.c | 12 +----------- - include/hw/s390x/pv.h | 3 +++ - 4 files changed, 16 insertions(+), 11 deletions(-) - -diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h -index 89b3044d7a..53cc9eb5ac 100644 ---- a/hw/s390x/ipl.h -+++ b/hw/s390x/ipl.h -@@ -14,6 +14,7 @@ - #define HW_S390_IPL_H - - #include "cpu.h" -+#include "exec/address-spaces.h" - #include "hw/qdev-core.h" - - struct IPLBlockPVComp { -diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c -index cb0dce4a4f..f11868e865 100644 ---- a/hw/s390x/pv.c -+++ b/hw/s390x/pv.c -@@ -13,8 +13,10 @@ - - #include - -+#include "cpu.h" - #include "qemu/error-report.h" - #include "sysemu/kvm.h" -+#include "hw/s390x/ipl.h" - #include "hw/s390x/pv.h" - - static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) -@@ -100,3 +102,12 @@ void s390_pv_unshare(void) - { - s390_pv_cmd_exit(KVM_PV_UNSHARE_ALL, NULL); - } -+ -+void s390_pv_inject_reset_error(CPUState *cs) -+{ -+ int r1 = (cs->kvm_run->s390_sieic.ipa & 0x00f0) >> 4; -+ CPUS390XState *env = &S390_CPU(cs)->env; -+ -+ /* Report that we are unable to enter protected mode */ -+ env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV; -+} -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index c08e42bda1..07773a12b2 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -44,7 +44,6 @@ - #include "sysemu/sysemu.h" - #include "sysemu/balloon.h" - #include "hw/s390x/pv.h" --#include - #include "migration/blocker.h" - - static Error *pv_mig_blocker; -@@ -391,15 +390,6 @@ out_err: - return rc; - } - --static void s390_machine_inject_pv_error(CPUState *cs) --{ -- int r1 = (cs->kvm_run->s390_sieic.ipa & 0x00f0) >> 4; -- CPUS390XState *env = &S390_CPU(cs)->env; -- -- /* Report that we are unable to enter protected mode */ -- env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV; --} -- - static void s390_pv_prepare_reset(S390CcwMachineState *ms) - { - CPUState *cs; -@@ -485,7 +475,7 @@ static void s390_machine_reset(MachineState *machine) - run_on_cpu(cs, s390_do_cpu_reset, RUN_ON_CPU_NULL); - - if (s390_machine_protect(ms)) { -- s390_machine_inject_pv_error(cs); -+ s390_pv_inject_reset_error(cs); - /* - * Continue after the diag308 so the guest knows something - * went wrong. -diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h -index c6cb360f2f..522ca6a04e 100644 ---- a/include/hw/s390x/pv.h -+++ b/include/hw/s390x/pv.h -@@ -13,6 +13,7 @@ - #define HW_S390_PV_H - - #ifdef CONFIG_KVM -+#include "cpu.h" - #include "hw/s390x/s390-virtio-ccw.h" - - static inline bool s390_is_pv(void) -@@ -41,6 +42,7 @@ int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak); - void s390_pv_perf_clear_reset(void); - int s390_pv_verify(void); - void s390_pv_unshare(void); -+void s390_pv_inject_reset_error(CPUState *cs); - #else /* CONFIG_KVM */ - static inline bool s390_is_pv(void) { return false; } - static inline int s390_pv_vm_enable(void) { return 0; } -@@ -50,6 +52,7 @@ static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { - static inline void s390_pv_perf_clear_reset(void) {} - static inline int s390_pv_verify(void) { return 0; } - static inline void s390_pv_unshare(void) {} -+static inline void s390_pv_inject_reset_error(CPUState *cs) {}; - #endif /* CONFIG_KVM */ - - #endif /* HW_S390_PV_H */ --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-s390-virtio-ccw-Reset-PCI-devices-during-subsy.patch b/SOURCES/kvm-s390x-s390-virtio-ccw-Reset-PCI-devices-during-subsy.patch deleted file mode 100644 index f90dc30..0000000 --- a/SOURCES/kvm-s390x-s390-virtio-ccw-Reset-PCI-devices-during-subsy.patch +++ /dev/null @@ -1,52 +0,0 @@ -From fa4e13a01ecc316cc43c1f39490330b94c910bc1 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Mon, 14 Dec 2020 18:29:49 -0500 -Subject: [PATCH 04/14] s390x/s390-virtio-ccw: Reset PCI devices during - subsystem reset - -RH-Author: Thomas Huth -Message-id: <20201214182949.35712-2-thuth@redhat.com> -Patchwork-id: 100440 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] s390x/s390-virtio-ccw: Reset PCI devices during subsystem reset -Bugzilla: 1905386 -RH-Acked-by: Danilo de Paula -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -From: Matthew Rosato - -Currently, a subsystem reset event leaves PCI devices enabled, causing -issues post-reset in the guest (an example would be after a kexec). These -devices need to be reset during a subsystem reset, allowing them to be -properly re-enabled afterwards. Add the S390 PCI host bridge to the list -of qdevs to be reset during subsystem reset. - -Signed-off-by: Matthew Rosato -Reviewed-by: Eric Farman -Acked-by: Halil Pasic -Acked-by: Christian Borntraeger -Cc: qemu-stable@nongnu.org -Message-Id: <1602767767-32713-1-git-send-email-mjrosato@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit db08244a3a7ec312dfed3fd9b88e114281215458) -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/s390-virtio-ccw.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 5905d2b7adc..5b3d07f55c4 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -103,6 +103,7 @@ static const char *const reset_dev_types[] = { - "s390-sclp-event-facility", - "s390-flic", - "diag288", -+ TYPE_S390_PCI_HOST_BRIDGE, - }; - - static void subsystem_reset(void) --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-sclp.c-remove-unneeded-label-in-sclp_service_c.patch b/SOURCES/kvm-s390x-sclp.c-remove-unneeded-label-in-sclp_service_c.patch deleted file mode 100644 index 5a38a88..0000000 --- a/SOURCES/kvm-s390x-sclp.c-remove-unneeded-label-in-sclp_service_c.patch +++ /dev/null @@ -1,90 +0,0 @@ -From 8b06cba98e37b9c50e2a9deb1567d8cf4e1ba2b6 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Wed, 11 Nov 2020 12:03:05 -0500 -Subject: [PATCH 05/18] s390x/sclp.c: remove unneeded label in - sclp_service_call() - -RH-Author: Thomas Huth -Message-id: <20201111120316.707489-2-thuth@redhat.com> -Patchwork-id: 99497 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 01/12] s390x/sclp.c: remove unneeded label in sclp_service_call() -Bugzilla: 1798506 -RH-Acked-by: Jens Freimann -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Daniel Henrique Barboza - -'out' label can be replaced by 'return' with the appropriate -value. The 'r' integer, which is used solely to set the -return value for this label, can also be removed. - -CC: Cornelia Huck -CC: Halil Pasic -CC: Christian Borntraeger -Signed-off-by: Daniel Henrique Barboza -Reviewed-by: Thomas Huth -Message-Id: <20200106182425.20312-39-danielhb413@gmail.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit e6de76fca48012348d8c81b1399c861f444bd4a4) -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/sclp.c | 16 +++++----------- - 1 file changed, 5 insertions(+), 11 deletions(-) - -diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c -index 1c380a49cc7..d8ae207731f 100644 ---- a/hw/s390x/sclp.c -+++ b/hw/s390x/sclp.c -@@ -241,24 +241,20 @@ int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code) - { - SCLPDevice *sclp = get_sclp_device(); - SCLPDeviceClass *sclp_c = SCLP_GET_CLASS(sclp); -- int r = 0; - SCCB work_sccb; - - hwaddr sccb_len = sizeof(SCCB); - - /* first some basic checks on program checks */ - if (env->psw.mask & PSW_MASK_PSTATE) { -- r = -PGM_PRIVILEGED; -- goto out; -+ return -PGM_PRIVILEGED; - } - if (cpu_physical_memory_is_io(sccb)) { -- r = -PGM_ADDRESSING; -- goto out; -+ return -PGM_ADDRESSING; - } - if ((sccb & ~0x1fffUL) == 0 || (sccb & ~0x1fffUL) == env->psa - || (sccb & ~0x7ffffff8UL) != 0) { -- r = -PGM_SPECIFICATION; -- goto out; -+ return -PGM_SPECIFICATION; - } - - /* -@@ -270,8 +266,7 @@ int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code) - - /* Valid sccb sizes */ - if (be16_to_cpu(work_sccb.h.length) < sizeof(SCCBHeader)) { -- r = -PGM_SPECIFICATION; -- goto out; -+ return -PGM_SPECIFICATION; - } - - if (!sclp_command_code_valid(code)) { -@@ -291,8 +286,7 @@ out_write: - - sclp_c->service_interrupt(sclp, sccb); - --out: -- return r; -+ return 0; - } - - static void service_interrupt(SCLPDevice *sclp, uint32_t sccb) --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-sigp-Fix-sense-running-reporting.patch b/SOURCES/kvm-s390x-sigp-Fix-sense-running-reporting.patch deleted file mode 100644 index 7143964..0000000 --- a/SOURCES/kvm-s390x-sigp-Fix-sense-running-reporting.patch +++ /dev/null @@ -1,49 +0,0 @@ -From a2befb24c10f58ce6c27d242f3b88afee1f77ec8 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 7 Jul 2020 09:35:31 -0400 -Subject: [PATCH 2/4] s390x: sigp: Fix sense running reporting - -RH-Author: Thomas Huth -Message-id: <20200707093532.22456-2-thuth@redhat.com> -Patchwork-id: 97920 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 1/2] s390x: sigp: Fix sense running reporting -Bugzilla: 1854092 -RH-Acked-by: Jens Freimann -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Janosch Frank - -The logic was inverted and reported running if the cpu was stopped. -Let's fix that. - -Signed-off-by: Janosch Frank -Fixes: d1b468bc8869 ("s390x/tcg: implement SIGP SENSE RUNNING STATUS") -Reviewed-by: David Hildenbrand -Message-Id: <20200124134818.9981-1-frankja@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 4103500e2fa934a6995e4cedab37423e606715bf) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/sigp.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/target/s390x/sigp.c b/target/s390x/sigp.c -index 727875bb4a..c604f17710 100644 ---- a/target/s390x/sigp.c -+++ b/target/s390x/sigp.c -@@ -348,9 +348,9 @@ static void sigp_sense_running(S390CPU *dst_cpu, SigpInfo *si) - - /* If halted (which includes also STOPPED), it is not running */ - if (CPU(dst_cpu)->halted) { -- si->cc = SIGP_CC_ORDER_CODE_ACCEPTED; -- } else { - set_sigp_status(si, SIGP_STAT_NOT_RUNNING); -+ } else { -+ si->cc = SIGP_CC_ORDER_CODE_ACCEPTED; - } - } - --- -2.27.0 - diff --git a/SOURCES/kvm-s390x-tcg-clear-local-interrupts-on-reset-normal.patch b/SOURCES/kvm-s390x-tcg-clear-local-interrupts-on-reset-normal.patch deleted file mode 100644 index b6ac314..0000000 --- a/SOURCES/kvm-s390x-tcg-clear-local-interrupts-on-reset-normal.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 0c85e86077b42547034ec6e8330a3e61d79b97ee Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 7 Jul 2020 09:35:32 -0400 -Subject: [PATCH 3/4] s390x/tcg: clear local interrupts on reset normal - -RH-Author: Thomas Huth -Message-id: <20200707093532.22456-3-thuth@redhat.com> -Patchwork-id: 97919 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 2/2] s390x/tcg: clear local interrupts on reset normal -Bugzilla: 1854092 -RH-Acked-by: Jens Freimann -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Cornelia Huck - -We neglected to clean up pending interrupts and emergency signals; -fix that. - -Message-Id: <20191206135404.16051-1-cohuck@redhat.com> -Signed-off-by: Cornelia Huck -Reviewed-by: David Hildenbrand -(cherry picked from commit bcf88d56efec4ffc153bbe98d11b689a5ebe1a91) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/cpu.h | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h -index edf8391504..a48e655c4d 100644 ---- a/target/s390x/cpu.h -+++ b/target/s390x/cpu.h -@@ -98,10 +98,6 @@ struct CPUS390XState { - - uint64_t cregs[16]; /* control registers */ - -- int pending_int; -- uint16_t external_call_addr; -- DECLARE_BITMAP(emergency_signals, S390_MAX_CPUS); -- - uint64_t ckc; - uint64_t cputm; - uint32_t todpr; -@@ -117,6 +113,10 @@ struct CPUS390XState { - struct {} start_normal_reset_fields; - uint8_t riccb[64]; /* runtime instrumentation control */ - -+ int pending_int; -+ uint16_t external_call_addr; -+ DECLARE_BITMAP(emergency_signals, S390_MAX_CPUS); -+ - /* Fields up to this point are cleared by a CPU reset */ - struct {} end_reset_fields; - --- -2.27.0 - diff --git a/SOURCES/kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch b/SOURCES/kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch deleted file mode 100644 index 189be7e..0000000 --- a/SOURCES/kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 08dc2a4dc481916fae9597220ad0faf3f6ed70c1 Mon Sep 17 00:00:00 2001 -From: Eduardo Otubo -Date: Mon, 16 Nov 2020 15:15:38 -0500 -Subject: [PATCH 1/5] seccomp: fix killing of whole process instead of thread -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eduardo Otubo -Message-id: <20201116151538.22254-1-otubo@redhat.com> -Patchwork-id: 99654 -O-Subject: [RHEL-8.3.0/RHEL-8.4.0 qemu-kvm PATCH] seccomp: fix killing of whole process instead of thread -Bugzilla: 1880546 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Danilo de Paula -RH-Acked-by: Marc-André Lureau - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1890885 -BRANCH: rhel-8.3.0 -UPSTREAM: Merged -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=1890885 - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1880546 -BRANCH: rhel-8.4.0 -UPSTREAM: Merged -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=33125023 - -From: Daniel P. Berrangé - -Back in 2018 we introduced support for killing the whole QEMU process -instead of just one thread, when a seccomp rule is violated: - - commit bda08a5764d470f101fa38635d30b41179a313e1 - Author: Marc-André Lureau - Date: Wed Aug 22 19:02:48 2018 +0200 - - seccomp: prefer SCMP_ACT_KILL_PROCESS if available - -Fast forward a year and we introduced a patch to avoid killing the -process for resource control syscalls tickled by Mesa. - - commit 9a1565a03b79d80b236bc7cc2dbce52a2ef3a1b8 - Author: Daniel P. Berrangé - Date: Wed Mar 13 09:49:03 2019 +0000 - - seccomp: don't kill process for resource control syscalls - -Unfortunately a logic bug effectively reverted the first commit -mentioned so that we go back to only killing the thread, not the whole -process. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Acked-by: Eduardo Otubo -(cherry picked from commit e474e3aacf4276eb0781d11c45e2fab996f9dc56) -Signed-off-by: Eduardo Otubo -Signed-off-by: Danilo C. L. de Paula ---- - qemu-seccomp.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/qemu-seccomp.c b/qemu-seccomp.c -index e0a1829b3dd..8325ecb766e 100644 ---- a/qemu-seccomp.c -+++ b/qemu-seccomp.c -@@ -136,8 +136,9 @@ static uint32_t qemu_seccomp_get_action(int set) - - if (qemu_seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &action) == 0) { - kill_process = 1; -+ } else { -+ kill_process = 0; - } -- kill_process = 0; - } - if (kill_process == 1) { - return SCMP_ACT_KILL_PROCESS; --- -2.27.0 - diff --git a/SOURCES/kvm-slirp-check-pkt_len-before-reading-protocol-header.patch b/SOURCES/kvm-slirp-check-pkt_len-before-reading-protocol-header.patch deleted file mode 100644 index 43c44ea..0000000 --- a/SOURCES/kvm-slirp-check-pkt_len-before-reading-protocol-header.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 2bfa25e55c0a49bc079e5769db2199989eda7745 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Fri, 11 Dec 2020 00:59:26 -0500 -Subject: [PATCH 03/14] slirp: check pkt_len before reading protocol header -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20201211005926.618830-2-jmaloy@redhat.com> -Patchwork-id: 100398 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] slirp: check pkt_len before reading protocol header -Bugzilla: 1902237 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Marc-André Lureau - -While processing ARP/NCSI packets in 'arp_input' or 'ncsi_input' -routines, ensure that pkt_len is large enough to accommodate the -respective protocol headers, lest it should do an OOB access. -Add check to avoid it. - -CVE-2020-29129 CVE-2020-29130 - QEMU: slirp: out-of-bounds access while processing ARP/NCSI packets - -> https://www.openwall.com/lists/oss-security/2020/11/27/1 - -Reported-by: Qiuhao Li -Signed-off-by: Prasad J Pandit -Message-Id: <20201126135706.273950-1-ppandit@redhat.com> -Reviewed-by: Marc-André Lureau - -(cherry picked from libslirp commit 2e1dcbc0c2af64fcb17009eaf2ceedd81be2b27f) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - slirp/src/ncsi.c | 4 ++++ - slirp/src/slirp.c | 4 ++++ - 2 files changed, 8 insertions(+) - -diff --git a/slirp/src/ncsi.c b/slirp/src/ncsi.c -index 6864b735db4..251c0d2bfbb 100644 ---- a/slirp/src/ncsi.c -+++ b/slirp/src/ncsi.c -@@ -147,6 +147,10 @@ void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) - uint32_t checksum; - uint32_t *pchecksum; - -+ if (pkt_len < ETH_HLEN + sizeof(struct ncsi_pkt_hdr)) { -+ return; /* packet too short */ -+ } -+ - memset(ncsi_reply, 0, sizeof(ncsi_reply)); - - memset(reh->h_dest, 0xff, ETH_ALEN); -diff --git a/slirp/src/slirp.c b/slirp/src/slirp.c -index b0194cb32bb..86b0f52d923 100644 ---- a/slirp/src/slirp.c -+++ b/slirp/src/slirp.c -@@ -700,6 +700,10 @@ static void arp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) - return; - } - -+ if (pkt_len < ETH_HLEN + sizeof(struct slirp_arphdr)) { -+ return; /* packet too short */ -+ } -+ - ar_op = ntohs(ah->ar_op); - switch (ar_op) { - case ARPOP_REQUEST: --- -2.27.0 - diff --git a/SOURCES/kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch b/SOURCES/kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch deleted file mode 100644 index 6d8dfe1..0000000 --- a/SOURCES/kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 0f659af4870f151e25a7d2184b9a383bff58e3ba Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Fri, 17 Jan 2020 12:07:57 +0100 -Subject: [PATCH 2/4] slirp: use correct size while emulating IRC commands -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20200117120758.1076549-3-marcandre.lureau@redhat.com> -Patchwork-id: 93400 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm + RHEL-AV-8.2.0 qemu-kvm PATCH 2/3] slirp: use correct size while emulating IRC commands -Bugzilla: 1791568 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi - -From: Prasad J Pandit - -While emulating IRC DCC commands, tcp_emu() uses 'mbuf' size -'m->m_size' to write DCC commands via snprintf(3). This may -lead to OOB write access, because 'bptr' points somewhere in -the middle of 'mbuf' buffer, not at the start. Use M_FREEROOM(m) -size to avoid OOB access. - -Reported-by: Vishnu Dev TJ -Signed-off-by: Prasad J Pandit -Reviewed-by: Samuel Thibault -Message-Id: <20200109094228.79764-2-ppandit@redhat.com> - -(cherry picked from libslirp commit ce131029d6d4a405cb7d3ac6716d03e58fb4a5d9) -Signed-off-by: Marc-André Lureau - -Signed-off-by: Miroslav Rezanina ---- - slirp/src/tcp_subr.c | 11 ++++++----- - 1 file changed, 6 insertions(+), 5 deletions(-) - -diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -index cbecd64..cedbfb2 100644 ---- a/slirp/src/tcp_subr.c -+++ b/slirp/src/tcp_subr.c -@@ -778,7 +778,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, m->m_size, "DCC CHAT chat %lu %u%c\n", -+ m->m_len += snprintf(bptr, M_FREEROOM(m), -+ "DCC CHAT chat %lu %u%c\n", - (unsigned long)ntohl(so->so_faddr.s_addr), - ntohs(so->so_fport), 1); - } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, -@@ -788,8 +789,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += -- snprintf(bptr, m->m_size, "DCC SEND %s %lu %u %u%c\n", buff, -+ m->m_len += snprintf(bptr, M_FREEROOM(m), -+ "DCC SEND %s %lu %u %u%c\n", buff, - (unsigned long)ntohl(so->so_faddr.s_addr), - ntohs(so->so_fport), n1, 1); - } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, -@@ -799,8 +800,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += -- snprintf(bptr, m->m_size, "DCC MOVE %s %lu %u %u%c\n", buff, -+ m->m_len += snprintf(bptr, M_FREEROOM(m), -+ "DCC MOVE %s %lu %u %u%c\n", buff, - (unsigned long)ntohl(so->so_faddr.s_addr), - ntohs(so->so_fport), n1, 1); - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-slirp-use-correct-size-while-emulating-commands.patch b/SOURCES/kvm-slirp-use-correct-size-while-emulating-commands.patch deleted file mode 100644 index fe42f4f..0000000 --- a/SOURCES/kvm-slirp-use-correct-size-while-emulating-commands.patch +++ /dev/null @@ -1,71 +0,0 @@ -From dfbfcf02738640ab83f7970e636b72b78f166675 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Fri, 17 Jan 2020 12:07:58 +0100 -Subject: [PATCH 3/4] slirp: use correct size while emulating commands -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20200117120758.1076549-4-marcandre.lureau@redhat.com> -Patchwork-id: 93401 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm + RHEL-AV-8.2.0 qemu-kvm PATCH 3/3] slirp: use correct size while emulating commands -Bugzilla: 1791568 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi - -From: Prasad J Pandit - -While emulating services in tcp_emu(), it uses 'mbuf' size -'m->m_size' to write commands via snprintf(3). Use M_FREEROOM(m) -size to avoid possible OOB access. - -Signed-off-by: Prasad J Pandit -Signed-off-by: Samuel Thibault -Message-Id: <20200109094228.79764-3-ppandit@redhat.com> - -(cherry picked from commit 82ebe9c370a0e2970fb5695aa19aa5214a6a1c80) -Signed-off-by: Marc-André Lureau -Signed-off-by: Miroslav Rezanina ---- - slirp/src/tcp_subr.c | 9 ++++----- - 1 file changed, 4 insertions(+), 5 deletions(-) - -diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -index cedbfb2..954d1a6 100644 ---- a/slirp/src/tcp_subr.c -+++ b/slirp/src/tcp_subr.c -@@ -696,7 +696,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) - n4 = (laddr & 0xff); - - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, m->m_size - m->m_len, -+ m->m_len += snprintf(bptr, M_FREEROOM(m), - "ORT %d,%d,%d,%d,%d,%d\r\n%s", n1, n2, n3, n4, - n5, n6, x == 7 ? buff : ""); - return 1; -@@ -731,8 +731,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) - n4 = (laddr & 0xff); - - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += -- snprintf(bptr, m->m_size - m->m_len, -+ m->m_len += snprintf(bptr, M_FREEROOM(m), - "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", - n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); - -@@ -758,8 +757,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) - if (m->m_data[m->m_len - 1] == '\0' && lport != 0 && - (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, - htons(lport), SS_FACCEPTONCE)) != NULL) -- m->m_len = -- snprintf(m->m_data, m->m_size, "%d", ntohs(so->so_fport)) + 1; -+ m->m_len = snprintf(m->m_data, M_ROOM(m), -+ "%d", ntohs(so->so_fport)) + 1; - return 1; - - case EMU_IRC: --- -1.8.3.1 - diff --git a/SOURCES/kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch b/SOURCES/kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch new file mode 100644 index 0000000..c6fcf61 --- /dev/null +++ b/SOURCES/kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch @@ -0,0 +1,131 @@ +From afe1a63fe0cf863e024889edd82b9a380bfa8230 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Wed, 5 Jan 2022 12:38:47 +0000 +Subject: [PATCH 2/6] softmmu: fix device deletion events with -device JSON + syntax +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +RH-MergeRequest: 103: Fix hot unplug of devices created with -device JSON syntax +RH-Commit: [1/1] 64cbc78bcb46bdb24d5f589ceb5ad598c388e447 +RH-Bugzilla: 2033279 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth +RH-Acked-by: Jano Tomko +RH-Acked-by: Daniel P. Berrangé + +The -device JSON syntax impl leaks a reference on the created +DeviceState instance. As a result when you hot-unplug the +device, the device_finalize method won't be called and thus +it will fail to emit the required DEVICE_DELETED event. + +A 'json-cli' feature was previously added against the +'device_add' QMP command QAPI schema to indicated to mgmt +apps that -device supported JSON syntax. Given the hotplug +bug that feature flag is not usable for its purpose, so +we add a new 'json-cli-hotplug' feature to indicate the +-device supports JSON without breaking hotplug. + +Fixes: 5dacda5167560b3af8eadbce5814f60ba44b467e +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/802 +Signed-off-by: Daniel P. Berrangé +Message-Id: <20220105123847.4047954-2-berrange@redhat.com> +Reviewed-by: Laurent Vivier +Tested-by: Ján Tomko +Reviewed-by: Thomas Huth +Signed-off-by: Kevin Wolf +(cherry picked from commit 64b4529a432507ee84a924be69a03432639e87ba) +Signed-off-by: Kevin Wolf +--- + qapi/qdev.json | 5 ++++- + softmmu/vl.c | 4 +++- + tests/qtest/device-plug-test.c | 19 +++++++++++++++++++ + 3 files changed, 26 insertions(+), 2 deletions(-) + +diff --git a/qapi/qdev.json b/qapi/qdev.json +index 69656b14df..26cd10106b 100644 +--- a/qapi/qdev.json ++++ b/qapi/qdev.json +@@ -44,6 +44,9 @@ + # @json-cli: If present, the "-device" command line option supports JSON + # syntax with a structure identical to the arguments of this + # command. ++# @json-cli-hotplug: If present, the "-device" command line option supports JSON ++# syntax without the reference counting leak that broke ++# hot-unplug + # + # Notes: + # +@@ -74,7 +77,7 @@ + { 'command': 'device_add', + 'data': {'driver': 'str', '*bus': 'str', '*id': 'str'}, + 'gen': false, # so we can get the additional arguments +- 'features': ['json-cli'] } ++ 'features': ['json-cli', 'json-cli-hotplug'] } + + ## + # @device_del: +diff --git a/softmmu/vl.c b/softmmu/vl.c +index d46b8fb4ab..b3829e2edd 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -2690,6 +2690,7 @@ static void qemu_create_cli_devices(void) + qemu_opts_foreach(qemu_find_opts("device"), + device_init_func, NULL, &error_fatal); + QTAILQ_FOREACH(opt, &device_opts, next) { ++ DeviceState *dev; + loc_push_restore(&opt->loc); + /* + * TODO Eventually we should call qmp_device_add() here to make sure it +@@ -2698,7 +2699,8 @@ static void qemu_create_cli_devices(void) + * from the start, so call qdev_device_add_from_qdict() directly for + * now. + */ +- qdev_device_add_from_qdict(opt->opts, true, &error_fatal); ++ dev = qdev_device_add_from_qdict(opt->opts, true, &error_fatal); ++ object_unref(OBJECT(dev)); + loc_pop(&opt->loc); + } + rom_reset_order_override(); +diff --git a/tests/qtest/device-plug-test.c b/tests/qtest/device-plug-test.c +index 559d47727a..ad79bd4c14 100644 +--- a/tests/qtest/device-plug-test.c ++++ b/tests/qtest/device-plug-test.c +@@ -77,6 +77,23 @@ static void test_pci_unplug_request(void) + qtest_quit(qtest); + } + ++static void test_pci_unplug_json_request(void) ++{ ++ QTestState *qtest = qtest_initf( ++ "-device '{\"driver\": \"virtio-mouse-pci\", \"id\": \"dev0\"}'"); ++ ++ /* ++ * Request device removal. As the guest is not running, the request won't ++ * be processed. However during system reset, the removal will be ++ * handled, removing the device. ++ */ ++ device_del(qtest, "dev0"); ++ system_reset(qtest); ++ wait_device_deleted_event(qtest, "dev0"); ++ ++ qtest_quit(qtest); ++} ++ + static void test_ccw_unplug(void) + { + QTestState *qtest = qtest_initf("-device virtio-balloon-ccw,id=dev0"); +@@ -145,6 +162,8 @@ int main(int argc, char **argv) + */ + qtest_add_func("/device-plug/pci-unplug-request", + test_pci_unplug_request); ++ qtest_add_func("/device-plug/pci-unplug-json-request", ++ test_pci_unplug_json_request); + + if (!strcmp(arch, "s390x")) { + qtest_add_func("/device-plug/ccw-unplug", +-- +2.27.0 + diff --git a/SOURCES/kvm-softmmu-memory-Log-invalid-memory-accesses.patch b/SOURCES/kvm-softmmu-memory-Log-invalid-memory-accesses.patch deleted file mode 100644 index e4e1bc4..0000000 --- a/SOURCES/kvm-softmmu-memory-Log-invalid-memory-accesses.patch +++ /dev/null @@ -1,84 +0,0 @@ -From be0a190e3c5c4ff84f7c53630ed5a55644d18acc Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Wed, 21 Apr 2021 22:30:06 -0400 -Subject: [PATCH 7/7] softmmu/memory: Log invalid memory accesses -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20210421223006.19650-7-jmaloy@redhat.com> -Patchwork-id: 101481 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH v2 6/6] softmmu/memory: Log invalid memory accesses -Bugzilla: 1842478 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laszlo Ersek - -From: Philippe Mathieu-Daudé - -Log invalid memory accesses with as GUEST_ERROR. - -This is particularly useful since commit 5d971f9e67 which reverted -("memory: accept mismatching sizes in memory_region_access_valid"). - -Signed-off-by: Philippe Mathieu-Daudé -Reviewed-by: Michael S. Tsirkin -Message-Id: <20201005152725.2143444-1-philmd@redhat.com> -Signed-off-by: Laurent Vivier - -(cherry picked from commit 21786c7e59847b1612406ff394958f22e5b323f8) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - memory.c | 16 ++++++++++++++++ - 1 file changed, 16 insertions(+) - -diff --git a/memory.c b/memory.c -index 0cfcb72a5a..660df8159a 100644 ---- a/memory.c -+++ b/memory.c -@@ -14,6 +14,7 @@ - */ - - #include "qemu/osdep.h" -+#include "qemu/log.h" - #include "qapi/error.h" - #include "cpu.h" - #include "exec/memory.h" -@@ -1353,10 +1354,18 @@ bool memory_region_access_valid(MemoryRegion *mr, - { - if (mr->ops->valid.accepts - && !mr->ops->valid.accepts(mr->opaque, addr, size, is_write, attrs)) { -+ qemu_log_mask(LOG_GUEST_ERROR, "Invalid access at addr " -+ "0x%" HWADDR_PRIX ", size %u, " -+ "region '%s', reason: rejected\n", -+ addr, size, memory_region_name(mr)); - return false; - } - - if (!mr->ops->valid.unaligned && (addr & (size - 1))) { -+ qemu_log_mask(LOG_GUEST_ERROR, "Invalid access at addr " -+ "0x%" HWADDR_PRIX ", size %u, " -+ "region '%s', reason: unaligned\n", -+ addr, size, memory_region_name(mr)); - return false; - } - -@@ -1367,6 +1376,13 @@ bool memory_region_access_valid(MemoryRegion *mr, - - if (size > mr->ops->valid.max_access_size - || size < mr->ops->valid.min_access_size) { -+ qemu_log_mask(LOG_GUEST_ERROR, "Invalid access at addr " -+ "0x%" HWADDR_PRIX ", size %u, " -+ "region '%s', reason: invalid size " -+ "(min:%u max:%u)\n", -+ addr, size, memory_region_name(mr), -+ mr->ops->valid.min_access_size, -+ mr->ops->valid.max_access_size); - return false; - } - return true; --- -2.27.0 - diff --git a/SOURCES/kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch b/SOURCES/kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch deleted file mode 100644 index 7aaa982..0000000 --- a/SOURCES/kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch +++ /dev/null @@ -1,205 +0,0 @@ -From dfdf950e893c23e77c9dc0be18fca66ad195d260 Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Wed, 10 Feb 2021 15:56:45 +0000 -Subject: [PATCH 2/2] spapr: Adjust firmware path of PCI devices -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Greg Kurz -Message-id: <20210210165645.470195-2-gkurz@redhat.com> -Patchwork-id: 101038 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] spapr: Adjust firmware path of PCI devices -Bugzilla: 1912891 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: David Gibson -RH-Acked-by: Laszlo Ersek - -From: Greg Kurz - -It is currently not possible to perform a strict boot from USB storage: - -$ qemu-system-ppc64 -accel kvm -nodefaults -nographic -serial stdio \ - -boot strict=on \ - -device qemu-xhci \ - -device usb-storage,drive=disk,bootindex=0 \ - -blockdev driver=file,node-name=disk,filename=fedora-ppc64le.qcow2 - -SLOF ********************************************************************** -QEMU Starting - Build Date = Jul 17 2020 11:15:24 - FW Version = git-e18ddad8516ff2cf - Press "s" to enter Open Firmware. - -Populating /vdevice methods -Populating /vdevice/vty@71000000 -Populating /vdevice/nvram@71000001 -Populating /pci@800000020000000 - 00 0000 (D) : 1b36 000d serial bus [ usb-xhci ] -No NVRAM common partition, re-initializing... -Scanning USB - XHCI: Initializing - USB Storage - SCSI: Looking for devices - 101000000000000 DISK : "QEMU QEMU HARDDISK 2.5+" -Using default console: /vdevice/vty@71000000 - - Welcome to Open Firmware - - Copyright (c) 2004, 2017 IBM Corporation All rights reserved. - This program and the accompanying materials are made available - under the terms of the BSD License available at - http://www.opensource.org/licenses/bsd-license.php - -Trying to load: from: /pci@800000020000000/usb@0/storage@1/disk@101000000000000 ... -E3405: No such device - -E3407: Load failed - - Type 'boot' and press return to continue booting the system. - Type 'reset-all' and press return to reboot the system. - -Ready! -0 > - -The device tree handed over by QEMU to SLOF indeed contains: - -qemu,boot-list = - "/pci@800000020000000/usb@0/storage@1/disk@101000000000000 HALT"; - -but the device node is named usb-xhci@0, not usb@0. - -This happens because the firmware names of PCI devices returned -by get_boot_devices_list() come from pcibus_get_fw_dev_path(), -while the sPAPR PHB code uses a different naming scheme for -device nodes. This inconsistency has always been there but it was -hidden for a long time because SLOF used to rename USB device -nodes, until this commit, merged in QEMU 4.2.0 : - -commit 85164ad4ed9960cac842fa4cc067c6b6699b0994 -Author: Alexey Kardashevskiy -Date: Wed Sep 11 16:24:32 2019 +1000 - - pseries: Update SLOF firmware image - - This fixes USB host bus adapter name in the device tree to match QEMU's - one. - - Signed-off-by: Alexey Kardashevskiy - Signed-off-by: David Gibson - -Fortunately, sPAPR implements the firmware path provider interface. -This provides a way to override the default firmware paths. - -Just factor out the sPAPR PHB naming logic from spapr_dt_pci_device() -to a helper, and use it in the sPAPR firmware path provider hook. - -Fixes: 85164ad4ed99 ("pseries: Update SLOF firmware image") -Signed-off-by: Greg Kurz -Message-Id: <20210122170157.246374-1-groug@kaod.org> -Reviewed-by: Daniel Henrique Barboza -Signed-off-by: David Gibson -(cherry picked from commit 040bdafce12f750816d879442014df2999a995c4) -Signed-off-by: Greg Kurz -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr.c | 5 +++++ - hw/ppc/spapr_pci.c | 33 ++++++++++++++++++--------------- - include/hw/pci-host/spapr.h | 2 ++ - 3 files changed, 25 insertions(+), 15 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 00b1ef075e..bee2299199 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -3013,6 +3013,7 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus, - SCSIDevice *d = CAST(SCSIDevice, dev, TYPE_SCSI_DEVICE); - SpaprPhbState *phb = CAST(SpaprPhbState, dev, TYPE_SPAPR_PCI_HOST_BRIDGE); - VHostSCSICommon *vsc = CAST(VHostSCSICommon, dev, TYPE_VHOST_SCSI_COMMON); -+ PCIDevice *pcidev = CAST(PCIDevice, dev, TYPE_PCI_DEVICE); - - if (d) { - void *spapr = CAST(void, bus->parent, "spapr-vscsi"); -@@ -3086,6 +3087,10 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus, - return g_strdup_printf("pci@%x", PCI_SLOT(pcidev->devfn)); - } - -+ if (pcidev) { -+ return spapr_pci_fw_dev_name(pcidev); -+ } -+ - return NULL; - } - -diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c -index f6fbcf99ed..befa570aa8 100644 ---- a/hw/ppc/spapr_pci.c -+++ b/hw/ppc/spapr_pci.c -@@ -1348,15 +1348,29 @@ static int spapr_dt_pci_bus(SpaprPhbState *sphb, PCIBus *bus, - return offset; - } - -+char *spapr_pci_fw_dev_name(PCIDevice *dev) -+{ -+ const gchar *basename; -+ int slot = PCI_SLOT(dev->devfn); -+ int func = PCI_FUNC(dev->devfn); -+ uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3); -+ -+ basename = dt_name_from_class((ccode >> 16) & 0xff, (ccode >> 8) & 0xff, -+ ccode & 0xff); -+ -+ if (func != 0) { -+ return g_strdup_printf("%s@%x,%x", basename, slot, func); -+ } else { -+ return g_strdup_printf("%s@%x", basename, slot); -+ } -+} -+ - /* create OF node for pci device and required OF DT properties */ - static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev, - void *fdt, int parent_offset) - { - int offset; -- const gchar *basename; -- gchar *nodename; -- int slot = PCI_SLOT(dev->devfn); -- int func = PCI_FUNC(dev->devfn); -+ g_autofree gchar *nodename = spapr_pci_fw_dev_name(dev); - PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); - ResourceProps rp; - SpaprDrc *drc = drc_from_dev(sphb, dev); -@@ -1373,19 +1387,8 @@ static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev, - uint32_t pci_status = pci_default_read_config(dev, PCI_STATUS, 2); - gchar *loc_code; - -- basename = dt_name_from_class((ccode >> 16) & 0xff, (ccode >> 8) & 0xff, -- ccode & 0xff); -- -- if (func != 0) { -- nodename = g_strdup_printf("%s@%x,%x", basename, slot, func); -- } else { -- nodename = g_strdup_printf("%s@%x", basename, slot); -- } -- - _FDT(offset = fdt_add_subnode(fdt, parent_offset, nodename)); - -- g_free(nodename); -- - /* in accordance with PAPR+ v2.7 13.6.3, Table 181 */ - _FDT(fdt_setprop_cell(fdt, offset, "vendor-id", vendor_id)); - _FDT(fdt_setprop_cell(fdt, offset, "device-id", device_id)); -diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h -index 8877ff51fb..9522db9047 100644 ---- a/include/hw/pci-host/spapr.h -+++ b/include/hw/pci-host/spapr.h -@@ -212,4 +212,6 @@ static inline unsigned spapr_phb_windows_supported(SpaprPhbState *sphb) - return sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1; - } - -+char *spapr_pci_fw_dev_name(PCIDevice *dev); -+ - #endif /* PCI_HOST_SPAPR_H */ --- -2.27.0 - diff --git a/SOURCES/kvm-spapr-Allow-memory-unplug-to-always-succeed.patch b/SOURCES/kvm-spapr-Allow-memory-unplug-to-always-succeed.patch deleted file mode 100644 index 2968267..0000000 --- a/SOURCES/kvm-spapr-Allow-memory-unplug-to-always-succeed.patch +++ /dev/null @@ -1,101 +0,0 @@ -From 1fc9b693c54c93736c6f902f3df8b94440e8cc5d Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Tue, 19 Jan 2021 15:09:53 -0500 -Subject: [PATCH 5/9] spapr: Allow memory unplug to always succeed - -RH-Author: Greg Kurz -Message-id: <20210119150954.1017058-6-gkurz@redhat.com> -Patchwork-id: 100686 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 5/6] spapr: Allow memory unplug to always succeed -Bugzilla: 1901837 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Laurent Vivier -RH-Acked-by: David Gibson - -From: Greg Kurz - -It is currently impossible to hot-unplug a memory device between -machine reset and CAS. - -(qemu) device_del dimm1 -Error: Memory hot unplug not supported for this guest - -This limitation was introduced in order to provide an explicit -error path for older guests that didn't support hot-plug event -sources (and thus memory hot-unplug). - -The linux kernel has been supporting these since 4.11. All recent -enough guests are thus capable of handling the removal of a memory -device at all time, including during early boot. - -Lift the limitation for the latest machine type. This means that -trying to unplug memory from a guest that doesn't support it will -likely just do nothing and the memory will only get removed at -next reboot. Such older guests can still get the existing behavior -by using an older machine type. - -Signed-off-by: Greg Kurz -Message-Id: <160794035064.23292.17560963281911312439.stgit@bahia.lan> -Signed-off-by: David Gibson -(cherry picked from commit 1e8b5b1aa16b7d73ba8ba52c95d0b52329d5c9d0) -Signed-off-by: Greg Kurz - -Conflicts: - hw/ppc/spapr.c - include/hw/ppc/spapr.h - -Conflicts around the addition of pre_6_0_memory_unplug. Ignore the -change that sets pre_6_0_memory_unplug for older machine types. -This is ok because pre_6_0_memory_unplug is removed in a subsequent -patch anyway. - -Signed-off-by: Jon Maloy ---- - hw/ppc/spapr.c | 3 ++- - hw/ppc/spapr_events.c | 3 ++- - include/hw/ppc/spapr.h | 1 + - 3 files changed, 5 insertions(+), 2 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 992bd08aaa..f8de33e3e5 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -4001,7 +4001,8 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, - SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); - - if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { -- if (spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) { -+ if (!smc->pre_6_0_memory_unplug || -+ spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) { - spapr_memory_unplug_request(hotplug_dev, dev, errp); - } else { - /* NOTE: this means there is a window after guest reset, prior to -diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c -index 15b92b63ad..6e284aa4bc 100644 ---- a/hw/ppc/spapr_events.c -+++ b/hw/ppc/spapr_events.c -@@ -547,7 +547,8 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, - /* we should not be using count_indexed value unless the guest - * supports dedicated hotplug event source - */ -- g_assert(spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT)); -+ g_assert(!SPAPR_MACHINE_GET_CLASS(spapr)->pre_6_0_memory_unplug || -+ spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT)); - hp->drc_id.count_indexed.count = - cpu_to_be32(drc_id->count_indexed.count); - hp->drc_id.count_indexed.index = -diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index e5e2a99046..ac6961ed16 100644 ---- a/include/hw/ppc/spapr.h -+++ b/include/hw/ppc/spapr.h -@@ -124,6 +124,7 @@ struct SpaprMachineClass { - bool pre_4_1_migration; /* don't migrate hpt-max-page-size */ - bool linux_pci_probe; - bool smp_threads_vsmt; /* set VSMT to smp_threads by default */ -+ bool pre_6_0_memory_unplug; - - bool has_power9_support; - void (*phb_placement)(SpaprMachineState *spapr, uint32_t index, --- -2.18.2 - diff --git a/SOURCES/kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch b/SOURCES/kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch deleted file mode 100644 index d934712..0000000 --- a/SOURCES/kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch +++ /dev/null @@ -1,113 +0,0 @@ -From f2aeed761d2dad14920fa08c977dc45564886d9b Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Fri, 3 Jan 2020 01:15:12 +0000 -Subject: [PATCH 1/5] spapr: Don't trigger a CAS reboot for XICS/XIVE mode - changeover -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: David Gibson -Message-id: <20200103011512.49129-2-dgibson@redhat.com> -Patchwork-id: 93261 -O-Subject: [RHEL-AV-4.2 qemu-kvm PATCH 1/1] spapr: Don't trigger a CAS reboot for XICS/XIVE mode changeover -Bugzilla: 1733893 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Thomas Huth -RH-Acked-by: Philippe Mathieu-Daudé - -From: David Gibson - -PAPR allows the interrupt controller used on a POWER9 machine (XICS or -XIVE) to be selected by the guest operating system, by using the -ibm,client-architecture-support (CAS) feature negotiation call. - -Currently, if the guest selects an interrupt controller different from the -one selected at initial boot, this causes the system to be reset with the -new model and the boot starts again. This means we run through the SLOF -boot process twice, as well as any other bootloader (e.g. grub) in use -before the OS calls CAS. This can be confusing and/or inconvenient for -users. - -Thanks to two fairly recent changes, we no longer need this reboot. 1) we -now completely regenerate the device tree when CAS is called (meaning we -don't need special case updates for all the device tree changes caused by -the interrupt controller mode change), 2) we now have explicit code paths -to activate and deactivate the different interrupt controllers, rather than -just implicitly calling those at machine reset time. - -We can therefore eliminate the reboot for changing irq mode, simply by -putting a call to spapr_irq_update_active_intc() before we call -spapr_h_cas_compose_response() (which gives the updated device tree to -the guest firmware and OS). - -Signed-off-by: David Gibson -Reviewed-by: Cedric Le Goater -Reviewed-by: Greg Kurz -(cherry picked from commit 8deb8019d696c75e6ecaee7545026b62aba2f1bb) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1733893 - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr_hcall.c | 33 +++++++++++++-------------------- - 1 file changed, 13 insertions(+), 20 deletions(-) - -diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c -index 140f05c..05a7ca2 100644 ---- a/hw/ppc/spapr_hcall.c -+++ b/hw/ppc/spapr_hcall.c -@@ -1767,21 +1767,10 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, - } - spapr->cas_pre_isa3_guest = !spapr_ovec_test(ov1_guest, OV1_PPC_3_00); - spapr_ovec_cleanup(ov1_guest); -- if (!spapr->cas_reboot) { -- /* If spapr_machine_reset() did not set up a HPT but one is necessary -- * (because the guest isn't going to use radix) then set it up here. */ -- if ((spapr->patb_entry & PATE1_GR) && !guest_radix) { -- /* legacy hash or new hash: */ -- spapr_setup_hpt_and_vrma(spapr); -- } -- spapr->cas_reboot = -- (spapr_h_cas_compose_response(spapr, args[1], args[2], -- ov5_updates) != 0); -- } - - /* -- * Ensure the guest asks for an interrupt mode we support; otherwise -- * terminate the boot. -+ * Ensure the guest asks for an interrupt mode we support; -+ * otherwise terminate the boot. - */ - if (guest_xive) { - if (!spapr->irq->xive) { -@@ -1797,14 +1786,18 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, - } - } - -- /* -- * Generate a machine reset when we have an update of the -- * interrupt mode. Only required when the machine supports both -- * modes. -- */ -+ spapr_irq_update_active_intc(spapr); -+ - if (!spapr->cas_reboot) { -- spapr->cas_reboot = spapr_ovec_test(ov5_updates, OV5_XIVE_EXPLOIT) -- && spapr->irq->xics && spapr->irq->xive; -+ /* If spapr_machine_reset() did not set up a HPT but one is necessary -+ * (because the guest isn't going to use radix) then set it up here. */ -+ if ((spapr->patb_entry & PATE1_GR) && !guest_radix) { -+ /* legacy hash or new hash: */ -+ spapr_setup_hpt_and_vrma(spapr); -+ } -+ spapr->cas_reboot = -+ (spapr_h_cas_compose_response(spapr, args[1], args[2], -+ ov5_updates) != 0); - } - - spapr_ovec_cleanup(ov5_updates); --- -1.8.3.1 - diff --git a/SOURCES/kvm-spapr-Don-t-use-spapr_drc_needed-in-CAS-code.patch b/SOURCES/kvm-spapr-Don-t-use-spapr_drc_needed-in-CAS-code.patch deleted file mode 100644 index 1462d52..0000000 --- a/SOURCES/kvm-spapr-Don-t-use-spapr_drc_needed-in-CAS-code.patch +++ /dev/null @@ -1,145 +0,0 @@ -From ad7aaf34400b1bbd41bbec182fd5895eaad50932 Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Tue, 19 Jan 2021 15:09:51 -0500 -Subject: [PATCH 3/9] spapr: Don't use spapr_drc_needed() in CAS code - -RH-Author: Greg Kurz -Message-id: <20210119150954.1017058-4-gkurz@redhat.com> -Patchwork-id: 100683 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 3/6] spapr: Don't use spapr_drc_needed() in CAS code -Bugzilla: 1901837 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Laurent Vivier -RH-Acked-by: David Gibson - -From: Greg Kurz - -We currently don't support hotplug of devices between boot and CAS. If -this happens a CAS reboot is triggered. We detect this during CAS using -the spapr_drc_needed() function which is essentially a VMStateDescription -.needed callback. Even if the condition for CAS reboot happens to be the -same as for DRC migration, it looks wrong to piggyback a migration helper -for this. - -Introduce a helper with slightly more explicit name and use it in both CAS -and DRC migration code. Since a subsequent patch will enhance this helper -to cover the case of hot unplug, let's go for spapr_drc_transient(). While -here convert spapr_hotplugged_dev_before_cas() to the "transient" wording as -well. - -This doesn't change any behaviour. - -Signed-off-by: Greg Kurz -Message-Id: <158169248180.3465937.9531405453362718771.stgit@bahia.lan> -Signed-off-by: David Gibson -(cherry picked from commit 4b63db1289a9e597bc151fa5e4d72f882cb6de1e) -Signed-off-by: Greg Kurz -Signed-off-by: Jon Maloy ---- - hw/ppc/spapr_drc.c | 20 ++++++++++++++------ - hw/ppc/spapr_hcall.c | 14 +++++++++----- - include/hw/ppc/spapr_drc.h | 4 +++- - 3 files changed, 26 insertions(+), 12 deletions(-) - -diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c -index 62f1a42592..9b498d429e 100644 ---- a/hw/ppc/spapr_drc.c -+++ b/hw/ppc/spapr_drc.c -@@ -455,23 +455,31 @@ void spapr_drc_reset(SpaprDrc *drc) - } - } - --bool spapr_drc_needed(void *opaque) -+bool spapr_drc_transient(SpaprDrc *drc) - { -- SpaprDrc *drc = (SpaprDrc *)opaque; - SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); - -- /* If no dev is plugged in there is no need to migrate the DRC state */ -+ /* -+ * If no dev is plugged in there is no need to migrate the DRC state -+ * nor to reset the DRC at CAS. -+ */ - if (!drc->dev) { - return false; - } - - /* -- * We need to migrate the state if it's not equal to the expected -- * long-term state, which is the same as the coldplugged initial -- * state */ -+ * We need to reset the DRC at CAS or to migrate the DRC state if it's -+ * not equal to the expected long-term state, which is the same as the -+ * coldplugged initial state. -+ */ - return (drc->state != drck->ready_state); - } - -+static bool spapr_drc_needed(void *opaque) -+{ -+ return spapr_drc_transient(opaque); -+} -+ - static const VMStateDescription vmstate_spapr_drc = { - .name = "spapr_drc", - .version_id = 1, -diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c -index 0f19be794c..d70e643752 100644 ---- a/hw/ppc/spapr_hcall.c -+++ b/hw/ppc/spapr_hcall.c -@@ -1640,20 +1640,24 @@ static uint32_t cas_check_pvr(SpaprMachineState *spapr, PowerPCCPU *cpu, - return best_compat; - } - --static bool spapr_hotplugged_dev_before_cas(void) -+static bool spapr_transient_dev_before_cas(void) - { -- Object *drc_container, *obj; -+ Object *drc_container; - ObjectProperty *prop; - ObjectPropertyIterator iter; - - drc_container = container_get(object_get_root(), "/dr-connector"); - object_property_iter_init(&iter, drc_container); - while ((prop = object_property_iter_next(&iter))) { -+ SpaprDrc *drc; -+ - if (!strstart(prop->type, "link<", NULL)) { - continue; - } -- obj = object_property_get_link(drc_container, prop->name, NULL); -- if (spapr_drc_needed(obj)) { -+ drc = SPAPR_DR_CONNECTOR(object_property_get_link(drc_container, -+ prop->name, NULL)); -+ -+ if (spapr_drc_transient(drc)) { - return true; - } - } -@@ -1812,7 +1816,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, - - spapr_irq_update_active_intc(spapr); - -- if (spapr_hotplugged_dev_before_cas()) { -+ if (spapr_transient_dev_before_cas()) { - spapr->cas_reboot = true; - } - -diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h -index 83f03cc577..7e09d57114 100644 ---- a/include/hw/ppc/spapr_drc.h -+++ b/include/hw/ppc/spapr_drc.h -@@ -269,7 +269,9 @@ int spapr_dt_drc(void *fdt, int offset, Object *owner, uint32_t drc_type_mask); - - void spapr_drc_attach(SpaprDrc *drc, DeviceState *d, Error **errp); - void spapr_drc_detach(SpaprDrc *drc); --bool spapr_drc_needed(void *opaque); -+ -+/* Returns true if a hot plug/unplug request is pending */ -+bool spapr_drc_transient(SpaprDrc *drc); - - static inline bool spapr_drc_unplug_requested(SpaprDrc *drc) - { --- -2.18.2 - diff --git a/SOURCES/kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch b/SOURCES/kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch deleted file mode 100644 index 0aa782b..0000000 --- a/SOURCES/kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch +++ /dev/null @@ -1,135 +0,0 @@ -From eb121ffa97c1c25d7853d51b4c8209c0bb521deb Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Fri, 7 Feb 2020 00:57:04 +0000 -Subject: [PATCH 1/7] spapr: Enable DD2.3 accelerated count cache flush in - pseries-5.0 machine - -RH-Author: David Gibson -Message-id: <20200207005704.194428-1-dgibson@redhat.com> -Patchwork-id: 93737 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCHv2] spapr: Enable DD2.3 accelerated count cache flush in pseries-5.0 machine -Bugzilla: 1796240 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Laurent Vivier -RH-Acked-by: Thomas Huth - -From: David Gibson - -For POWER9 DD2.2 cpus, the best current Spectre v2 indirect branch -mitigation is "count cache disabled", which is configured with: - -machine cap-ibs=fixed-ccd -However, this option isn't available on DD2.3 CPUs with KVM, because they -don't have the count cache disabled. - -For POWER9 DD2.3 cpus, it is "count cache flush with assist", configured -with: - -machine cap-ibs=workaround,cap-ccf-assist=on -However this option isn't available on DD2.2 CPUs with KVM, because they -don't have the special CCF assist instruction this relies on. - -On current machine types, we default to "count cache flush w/o assist", -that is: - -machine cap-ibs=workaround,cap-ccf-assist=off -This runs, with mitigation on both DD2.2 and DD2.3 host cpus, but has a -fairly significant performance impact. - -It turns out we can do better. The special instruction that CCF assist -uses to trigger a count cache flush is a no-op on earlier CPUs, rather than -trapping or causing other badness. It doesn't, of itself, implement the -mitigation, but *if* we have count-cache-disabled, then the count cache -flush is unnecessary, and so using the count cache flush mitigation is -harmless. - -Therefore for the new pseries-5.0 machine type, enable cap-ccf-assist by -default. Along with that, suppress throwing an error if cap-ccf-assist -is selected but KVM doesn't support it, as long as KVM *is* giving us -count-cache-disabled. To allow TCG to work out of the box, even though it -doesn't implement the ccf flush assist, downgrade the error in that case to -a warning. This matches several Spectre mitigations where we allow TCG -to operate for debugging, since we don't really make guarantees about TCG -security properties anyway. - -While we're there, make the TCG warning for this case match that for other -mitigations. - -Signed-off-by: David Gibson -Tested-by: Michael Ellerman -(cherry picked from commit 37965dfe4dffa3ac49438337417608e7f346b58a) -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - hw/ppc/spapr.c - -Adjusted machine version compatibility code to the RHEL machine types -rather than the upstream machine types. - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1796240 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=26285002 -Branch: rhel-av-8.2.0 -Upstream: Merged for qemu-5.0 - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr.c | 4 +++- - hw/ppc/spapr_caps.c | 21 +++++++++++++++++---- - 2 files changed, 20 insertions(+), 5 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index c12862d..a330f03 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -4440,7 +4440,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) - smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */ - smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF; - smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON; -- smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; -+ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON; - spapr_caps_add_properties(smc, &error_abort); - smc->irq = &spapr_irq_dual; - smc->dr_phb_enabled = true; -@@ -4904,6 +4904,8 @@ static void spapr_machine_rhel810_class_options(MachineClass *mc) - hw_compat_rhel_8_1_len); - compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); - -+ /* from pseries-4.2 */ -+ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; - } - - DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", false); -diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index 805f385..6e6fb28 100644 ---- a/hw/ppc/spapr_caps.c -+++ b/hw/ppc/spapr_caps.c -@@ -492,11 +492,24 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val, - uint8_t kvm_val = kvmppc_get_cap_count_cache_flush_assist(); - - if (tcg_enabled() && val) { -- /* TODO - for now only allow broken for TCG */ -- error_setg(errp, --"Requested count cache flush assist capability level not supported by tcg," -- " try appending -machine cap-ccf-assist=off"); -+ /* TCG doesn't implement anything here, but allow with a warning */ -+ warn_report("TCG doesn't support requested feature, cap-ccf-assist=on"); - } else if (kvm_enabled() && (val > kvm_val)) { -+ uint8_t kvm_ibs = kvmppc_get_cap_safe_indirect_branch(); -+ -+ if (kvm_ibs == SPAPR_CAP_FIXED_CCD) { -+ /* -+ * If we don't have CCF assist on the host, the assist -+ * instruction is a harmless no-op. It won't correctly -+ * implement the cache count flush *but* if we have -+ * count-cache-disabled in the host, that flush is -+ * unnnecessary. So, specifically allow this case. This -+ * allows us to have better performance on POWER9 DD2.3, -+ * while still working on POWER9 DD2.2 and POWER8 host -+ * cpus. -+ */ -+ return; -+ } - error_setg(errp, - "Requested count cache flush assist capability level not supported by kvm," - " try appending -machine cap-ccf-assist=off"); --- -1.8.3.1 - diff --git a/SOURCES/kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch b/SOURCES/kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch deleted file mode 100644 index 8d30406..0000000 --- a/SOURCES/kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch +++ /dev/null @@ -1,165 +0,0 @@ -From f9d332b1280cd3f6009b59323719548a36a7c52b Mon Sep 17 00:00:00 2001 -From: Daniel Henrique Barboza -Date: Mon, 21 Jun 2021 14:40:24 -0400 -Subject: [PATCH 2/4] spapr: Fix EEH capability issue on KVM guest for PCI - passthru - -RH-Author: Daniel Henrique Barboza -Message-id: <20210621144024.199732-2-dbarboza@redhat.com> -Patchwork-id: 101740 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 1/1] spapr: Fix EEH capability issue on KVM guest for PCI passthru -Bugzilla: 1957866 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Greg Kurz -RH-Acked-by: David Gibson - -From: Mahesh Salgaonkar - -With upstream kernel, especially after commit 98ba956f6a389 -("powerpc/pseries/eeh: Rework device EEH PE determination") we see that KVM -guest isn't able to enable EEH option for PCI pass-through devices anymore. - -[root@atest-guest ~]# dmesg | grep EEH -[ 0.032337] EEH: pSeries platform initialized -[ 0.298207] EEH: No capable adapters found: recovery disabled. -[root@atest-guest ~]# - -So far the linux kernel was assuming pe_config_addr equal to device's -config_addr and using it to enable EEH on the PE through ibm,set-eeh-option -RTAS call. Which wasn't the correct way as per PAPR. The linux kernel -commit 98ba956f6a389 fixed this flow. With that fixed, linux now uses PE -config address returned by ibm,get-config-addr-info2 RTAS call to enable -EEH option per-PE basis instead of per-device basis. However this has -uncovered a bug in qemu where ibm,set-eeh-option is treating PE config -address as per-device config address. - -Hence in qemu guest with recent kernel the ibm,set-eeh-option RTAS call -fails with -3 return value indicating that there is no PCI device exist for -the specified PE config address. The rtas_ibm_set_eeh_option call uses -pci_find_device() to get the PC device that matches specific bus and devfn -extracted from PE config address passed as argument. Thus it tries to map -the PE config address to a single specific PCI device 'bus->devices[devfn]' -which always results into checking device on slot 0 'bus->devices[0]'. -This succeeds when there is a pass-through device (vfio-pci) present on -slot 0. But in cases where there is no pass-through device present in slot -0, but present in non-zero slots, ibm,set-eeh-option call fails to enable -the EEH capability. - -hw/ppc/spapr_pci_vfio.c: spapr_phb_vfio_eeh_set_option() - case RTAS_EEH_ENABLE: { - PCIHostState *phb; - PCIDevice *pdev; - - /* - * The EEH functionality is enabled on basis of PCI device, - * instead of PE. We need check the validity of the PCI - * device address. - */ - phb = PCI_HOST_BRIDGE(sphb); - pdev = pci_find_device(phb->bus, - (addr >> 16) & 0xFF, (addr >> 8) & 0xFF); - if (!pdev || !object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { - return RTAS_OUT_PARAM_ERROR; - } - -hw/pci/pci.c:pci_find_device() - -PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn) -{ - bus = pci_find_bus_nr(bus, bus_num); - - if (!bus) - return NULL; - - return bus->devices[devfn]; -} - -This patch fixes ibm,set-eeh-option to check for presence of any PCI device -(vfio-pci) under specified bus and enable the EEH if found. The current -code already makes sure that all the devices on that bus are from same -iommu group (within same PE) and fail very early if it does not. - -After this fix guest is able to find EEH capable devices and enable EEH -recovery on it. - -[root@atest-guest ~]# dmesg | grep EEH -[ 0.048139] EEH: pSeries platform initialized -[ 0.405115] EEH: Capable adapter found: recovery enabled. -[root@atest-guest ~]# - -Reviewed-by: Daniel Henrique Barboza -Signed-off-by: Mahesh Salgaonkar -Message-Id: <162158429107.145117.5843504911924013125.stgit@jupiter> -Signed-off-by: David Gibson -(cherry picked from commit ac9ef668321ebb6eb871a0c4dd380fa7d7891b4e) -Signed-off-by: Daniel Henrique Barboza -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr_pci_vfio.c | 40 +++++++++++++++++++++++++++++++++------- - 1 file changed, 33 insertions(+), 7 deletions(-) - -diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c -index ecb34aaade..a411b08d60 100644 ---- a/hw/ppc/spapr_pci_vfio.c -+++ b/hw/ppc/spapr_pci_vfio.c -@@ -48,6 +48,16 @@ void spapr_phb_vfio_reset(DeviceState *qdev) - spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev)); - } - -+static void spapr_eeh_pci_find_device(PCIBus *bus, PCIDevice *pdev, -+ void *opaque) -+{ -+ bool *found = opaque; -+ -+ if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { -+ *found = true; -+ } -+} -+ - int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb, - unsigned int addr, int option) - { -@@ -60,17 +70,33 @@ int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb, - break; - case RTAS_EEH_ENABLE: { - PCIHostState *phb; -- PCIDevice *pdev; -+ bool found = false; - - /* -- * The EEH functionality is enabled on basis of PCI device, -- * instead of PE. We need check the validity of the PCI -- * device address. -+ * The EEH functionality is enabled per sphb level instead of -+ * per PCI device. We have already identified this specific sphb -+ * based on buid passed as argument to ibm,set-eeh-option rtas -+ * call. Now we just need to check the validity of the PCI -+ * pass-through devices (vfio-pci) under this sphb bus. -+ * We have already validated that all the devices under this sphb -+ * are from same iommu group (within same PE) before comming here. -+ * -+ * Prior to linux commit 98ba956f6a389 ("powerpc/pseries/eeh: -+ * Rework device EEH PE determination") kernel would call -+ * eeh-set-option for each device in the PE using the device's -+ * config_address as the argument rather than the PE address. -+ * Hence if we check validity of supplied config_addr whether -+ * it matches to this PHB will cause issues with older kernel -+ * versions v5.9 and older. If we return an error from -+ * eeh-set-option when the argument isn't a valid PE address -+ * then older kernels (v5.9 and older) will interpret that as -+ * EEH not being supported. - */ - phb = PCI_HOST_BRIDGE(sphb); -- pdev = pci_find_device(phb->bus, -- (addr >> 16) & 0xFF, (addr >> 8) & 0xFF); -- if (!pdev || !object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { -+ pci_for_each_device(phb->bus, (addr >> 16) & 0xFF, -+ spapr_eeh_pci_find_device, &found); -+ -+ if (!found) { - return RTAS_OUT_PARAM_ERROR; - } - --- -2.27.0 - diff --git a/SOURCES/kvm-spapr-Fix-handling-of-unplugged-devices-during-CAS-a.patch b/SOURCES/kvm-spapr-Fix-handling-of-unplugged-devices-during-CAS-a.patch deleted file mode 100644 index c14aa7d..0000000 --- a/SOURCES/kvm-spapr-Fix-handling-of-unplugged-devices-during-CAS-a.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 9ebed8090b88282f9b7432258df9182b9d3944ee Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Tue, 19 Jan 2021 15:09:52 -0500 -Subject: [PATCH 4/9] spapr: Fix handling of unplugged devices during CAS and - migration - -RH-Author: Greg Kurz -Message-id: <20210119150954.1017058-5-gkurz@redhat.com> -Patchwork-id: 100685 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 4/6] spapr: Fix handling of unplugged devices during CAS and migration -Bugzilla: 1901837 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Laurent Vivier -RH-Acked-by: David Gibson - -From: Greg Kurz - -We already detect if a device is being hot plugged before CAS to trigger -a CAS reboot and during migration to migrate the state of the associated -DRC. But hot unplugging a device is also an asynchronous operation that -requires the guest to take action. This means that if the guest is migrated -after the hot unplug event was sent but before it could release the device -with RTAS, the destination QEMU doesn't know about the pending unplug -operation and doesn't actually remove the device when the guest finally -releases it. - -Similarly, if the unplug request is fired before CAS, the guest isn't -notified of the change, just like with hotplug. It ends up booting with -the device still present in the DT and configures it, just like it was -never removed. Even weirder, since the event is still queued, it will -be eventually processed when some other unrelated event is posted to -the guest. - -Enhance spapr_drc_transient() to also return true if an unplug request is -pending. This fixes the issue at CAS with a CAS reboot request and -causes the DRC state to be migrated. Some extra care is still needed to -inform the destination that an unplug request is pending : migrate the -unplug_requested field of the DRC in an optional subsection. This might -break backwards migration, but this is still better than ending with -an inconsistent guest. - -Signed-off-by: Greg Kurz -Message-Id: <158169248798.3465937.1108351365840514270.stgit@bahia.lan> -Signed-off-by: David Gibson -(cherry picked from commit ab8584349c476f9818dc6403359c85f9ab0ad5eb) -Signed-off-by: Greg Kurz -Signed-off-by: Jon Maloy ---- - hw/ppc/spapr_drc.c | 25 +++++++++++++++++++++++-- - 1 file changed, 23 insertions(+), 2 deletions(-) - -diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c -index 9b498d429e..897bb7aae0 100644 ---- a/hw/ppc/spapr_drc.c -+++ b/hw/ppc/spapr_drc.c -@@ -455,6 +455,22 @@ void spapr_drc_reset(SpaprDrc *drc) - } - } - -+static bool spapr_drc_unplug_requested_needed(void *opaque) -+{ -+ return spapr_drc_unplug_requested(opaque); -+} -+ -+static const VMStateDescription vmstate_spapr_drc_unplug_requested = { -+ .name = "spapr_drc/unplug_requested", -+ .version_id = 1, -+ .minimum_version_id = 1, -+ .needed = spapr_drc_unplug_requested_needed, -+ .fields = (VMStateField []) { -+ VMSTATE_BOOL(unplug_requested, SpaprDrc), -+ VMSTATE_END_OF_LIST() -+ } -+}; -+ - bool spapr_drc_transient(SpaprDrc *drc) - { - SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); -@@ -470,9 +486,10 @@ bool spapr_drc_transient(SpaprDrc *drc) - /* - * We need to reset the DRC at CAS or to migrate the DRC state if it's - * not equal to the expected long-term state, which is the same as the -- * coldplugged initial state. -+ * coldplugged initial state, or if an unplug request is pending. - */ -- return (drc->state != drck->ready_state); -+ return drc->state != drck->ready_state || -+ spapr_drc_unplug_requested(drc); - } - - static bool spapr_drc_needed(void *opaque) -@@ -488,6 +505,10 @@ static const VMStateDescription vmstate_spapr_drc = { - .fields = (VMStateField []) { - VMSTATE_UINT32(state, SpaprDrc), - VMSTATE_END_OF_LIST() -+ }, -+ .subsections = (const VMStateDescription * []) { -+ &vmstate_spapr_drc_unplug_requested, -+ NULL - } - }; - --- -2.18.2 - diff --git a/SOURCES/kvm-spapr-Fold-h_cas_compose_response-into-h_client_arch.patch b/SOURCES/kvm-spapr-Fold-h_cas_compose_response-into-h_client_arch.patch deleted file mode 100644 index b0ca288..0000000 --- a/SOURCES/kvm-spapr-Fold-h_cas_compose_response-into-h_client_arch.patch +++ /dev/null @@ -1,246 +0,0 @@ -From cb9d5380b1376b2a44d91d84eaf09f948ef1e165 Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Tue, 19 Jan 2021 15:09:50 -0500 -Subject: [PATCH 2/9] spapr: Fold h_cas_compose_response() into - h_client_architecture_support() - -RH-Author: Greg Kurz -Message-id: <20210119150954.1017058-3-gkurz@redhat.com> -Patchwork-id: 100687 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 2/6] spapr: Fold h_cas_compose_response() into h_client_architecture_support() -Bugzilla: 1901837 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Laurent Vivier -RH-Acked-by: David Gibson - -From: David Gibson - -spapr_h_cas_compose_response() handles the last piece of the PAPR feature -negotiation process invoked via the ibm,client-architecture-support OF -call. Its only caller is h_client_architecture_support() which handles -most of the rest of that process. - -I believe it was placed in a separate file originally to handle some -fiddly dependencies between functions, but mostly it's just confusing -to have the CAS process split into two pieces like this. Now that -compose response is simplified (by just generating the whole device -tree anew), it's cleaner to just fold it into -h_client_architecture_support(). - -Signed-off-by: David Gibson -Reviewed-by: Cedric Le Goater -Reviewed-by: Greg Kurz -(cherry picked from commit 0c21e073541cc093b4cb8744640e24f130e6f8ba) -Signed-off-by: Greg Kurz -Signed-off-by: Jon Maloy ---- - hw/ppc/spapr.c | 61 +----------------------------------------- - hw/ppc/spapr_hcall.c | 55 ++++++++++++++++++++++++++++++++++--- - include/hw/ppc/spapr.h | 4 +-- - 3 files changed, 54 insertions(+), 66 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 92f63ad035..992bd08aaa 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -76,7 +76,6 @@ - #include "hw/nmi.h" - #include "hw/intc/intc.h" - --#include "qemu/cutils.h" - #include "hw/ppc/spapr_cpu_core.h" - #include "hw/mem/memory-device.h" - #include "hw/ppc/spapr_tpm_proxy.h" -@@ -898,63 +897,6 @@ out: - return ret; - } - --static bool spapr_hotplugged_dev_before_cas(void) --{ -- Object *drc_container, *obj; -- ObjectProperty *prop; -- ObjectPropertyIterator iter; -- -- drc_container = container_get(object_get_root(), "/dr-connector"); -- object_property_iter_init(&iter, drc_container); -- while ((prop = object_property_iter_next(&iter))) { -- if (!strstart(prop->type, "link<", NULL)) { -- continue; -- } -- obj = object_property_get_link(drc_container, prop->name, NULL); -- if (spapr_drc_needed(obj)) { -- return true; -- } -- } -- return false; --} -- --static void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, -- size_t space); -- --int spapr_h_cas_compose_response(SpaprMachineState *spapr, -- target_ulong addr, target_ulong size, -- SpaprOptionVector *ov5_updates) --{ -- void *fdt; -- SpaprDeviceTreeUpdateHeader hdr = { .version_id = 1 }; -- -- if (spapr_hotplugged_dev_before_cas()) { -- return 1; -- } -- -- if (size < sizeof(hdr)) { -- error_report("SLOF provided insufficient CAS buffer " -- TARGET_FMT_lu " (min: %zu)", size, sizeof(hdr)); -- exit(EXIT_FAILURE); -- } -- -- size -= sizeof(hdr); -- -- fdt = spapr_build_fdt(spapr, false, size); -- _FDT((fdt_pack(fdt))); -- -- cpu_physical_memory_write(addr, &hdr, sizeof(hdr)); -- cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt)); -- trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr)); -- -- g_free(spapr->fdt_blob); -- spapr->fdt_size = fdt_totalsize(fdt); -- spapr->fdt_initial_size = spapr->fdt_size; -- spapr->fdt_blob = fdt; -- -- return 0; --} -- - static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt) - { - MachineState *ms = MACHINE(spapr); -@@ -1192,8 +1134,7 @@ static void spapr_dt_hypervisor(SpaprMachineState *spapr, void *fdt) - } - } - --static void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, -- size_t space) -+void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space) - { - MachineState *machine = MACHINE(spapr); - MachineClass *mc = MACHINE_GET_CLASS(machine); -diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c -index 05a7ca275b..0f19be794c 100644 ---- a/hw/ppc/spapr_hcall.c -+++ b/hw/ppc/spapr_hcall.c -@@ -1,4 +1,5 @@ - #include "qemu/osdep.h" -+#include "qemu/cutils.h" - #include "qapi/error.h" - #include "sysemu/hw_accel.h" - #include "sysemu/runstate.h" -@@ -15,6 +16,7 @@ - #include "cpu-models.h" - #include "trace.h" - #include "kvm_ppc.h" -+#include "hw/ppc/fdt.h" - #include "hw/ppc/spapr_ovec.h" - #include "mmu-book3s-v3.h" - #include "hw/mem/memory-device.h" -@@ -1638,6 +1640,26 @@ static uint32_t cas_check_pvr(SpaprMachineState *spapr, PowerPCCPU *cpu, - return best_compat; - } - -+static bool spapr_hotplugged_dev_before_cas(void) -+{ -+ Object *drc_container, *obj; -+ ObjectProperty *prop; -+ ObjectPropertyIterator iter; -+ -+ drc_container = container_get(object_get_root(), "/dr-connector"); -+ object_property_iter_init(&iter, drc_container); -+ while ((prop = object_property_iter_next(&iter))) { -+ if (!strstart(prop->type, "link<", NULL)) { -+ continue; -+ } -+ obj = object_property_get_link(drc_container, prop->name, NULL); -+ if (spapr_drc_needed(obj)) { -+ return true; -+ } -+ } -+ return false; -+} -+ - static target_ulong h_client_architecture_support(PowerPCCPU *cpu, - SpaprMachineState *spapr, - target_ulong opcode, -@@ -1645,6 +1667,8 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, - { - /* Working address in data buffer */ - target_ulong addr = ppc64_phys_to_real(args[0]); -+ target_ulong fdt_buf = args[1]; -+ target_ulong fdt_bufsize = args[2]; - target_ulong ov_table; - uint32_t cas_pvr; - SpaprOptionVector *ov1_guest, *ov5_guest, *ov5_cas_old, *ov5_updates; -@@ -1788,16 +1812,41 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, - - spapr_irq_update_active_intc(spapr); - -+ if (spapr_hotplugged_dev_before_cas()) { -+ spapr->cas_reboot = true; -+ } -+ - if (!spapr->cas_reboot) { -+ void *fdt; -+ SpaprDeviceTreeUpdateHeader hdr = { .version_id = 1 }; -+ - /* If spapr_machine_reset() did not set up a HPT but one is necessary - * (because the guest isn't going to use radix) then set it up here. */ - if ((spapr->patb_entry & PATE1_GR) && !guest_radix) { - /* legacy hash or new hash: */ - spapr_setup_hpt_and_vrma(spapr); - } -- spapr->cas_reboot = -- (spapr_h_cas_compose_response(spapr, args[1], args[2], -- ov5_updates) != 0); -+ -+ if (fdt_bufsize < sizeof(hdr)) { -+ error_report("SLOF provided insufficient CAS buffer " -+ TARGET_FMT_lu " (min: %zu)", fdt_bufsize, sizeof(hdr)); -+ exit(EXIT_FAILURE); -+ } -+ -+ fdt_bufsize -= sizeof(hdr); -+ -+ fdt = spapr_build_fdt(spapr, false, fdt_bufsize); -+ _FDT((fdt_pack(fdt))); -+ -+ cpu_physical_memory_write(fdt_buf, &hdr, sizeof(hdr)); -+ cpu_physical_memory_write(fdt_buf + sizeof(hdr), fdt, -+ fdt_totalsize(fdt)); -+ trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr)); -+ -+ g_free(spapr->fdt_blob); -+ spapr->fdt_size = fdt_totalsize(fdt); -+ spapr->fdt_initial_size = spapr->fdt_size; -+ spapr->fdt_blob = fdt; - } - - spapr_ovec_cleanup(ov5_updates); -diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index e047dabf30..e5e2a99046 100644 ---- a/include/hw/ppc/spapr.h -+++ b/include/hw/ppc/spapr.h -@@ -767,11 +767,9 @@ struct SpaprEventLogEntry { - QTAILQ_ENTRY(SpaprEventLogEntry) next; - }; - -+void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space); - void spapr_events_init(SpaprMachineState *sm); - void spapr_dt_events(SpaprMachineState *sm, void *fdt); --int spapr_h_cas_compose_response(SpaprMachineState *sm, -- target_ulong addr, target_ulong size, -- SpaprOptionVector *ov5_updates); - void close_htab_fd(SpaprMachineState *spapr); - void spapr_setup_hpt_and_vrma(SpaprMachineState *spapr); - void spapr_free_hpt(SpaprMachineState *spapr); --- -2.18.2 - diff --git a/SOURCES/kvm-spapr-Improve-handling-of-fdt-buffer-size.patch b/SOURCES/kvm-spapr-Improve-handling-of-fdt-buffer-size.patch deleted file mode 100644 index 2f57cde..0000000 --- a/SOURCES/kvm-spapr-Improve-handling-of-fdt-buffer-size.patch +++ /dev/null @@ -1,125 +0,0 @@ -From 04f7fe2423a4de8d2fea7068b3fb316e15e76eaa Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Tue, 19 Jan 2021 15:09:49 -0500 -Subject: [PATCH 1/9] spapr: Improve handling of fdt buffer size - -RH-Author: Greg Kurz -Message-id: <20210119150954.1017058-2-gkurz@redhat.com> -Patchwork-id: 100682 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 1/6] spapr: Improve handling of fdt buffer size -Bugzilla: 1901837 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Laurent Vivier -RH-Acked-by: David Gibson - -From: David Gibson - -Previously, spapr_build_fdt() constructed the device tree in a fixed -buffer of size FDT_MAX_SIZE. This is a bit inflexible, but more -importantly it's awkward for the case where we use it during CAS. In -that case the guest firmware supplies a buffer and we have to -awkwardly check that what we generated fits into it afterwards, after -doing a lot of size checks during spapr_build_fdt(). - -Simplify this by having spapr_build_fdt() take a 'space' parameter. -For the CAS case, we pass in the buffer size provided by SLOF, for the -machine init case, we continue to pass FDT_MAX_SIZE. - -Signed-off-by: David Gibson -Reviewed-by: Cedric Le Goater -Reviewed-by: Greg Kurz -(cherry picked from commit 97b32a6afa78ae68fb16344b9a144b6f433f42a2) -Signed-off-by: Greg Kurz -Signed-off-by: Jon Maloy ---- - hw/ppc/spapr.c | 33 +++++++++++---------------------- - 1 file changed, 11 insertions(+), 22 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index c74079702d..92f63ad035 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -918,7 +918,8 @@ static bool spapr_hotplugged_dev_before_cas(void) - return false; - } - --static void *spapr_build_fdt(SpaprMachineState *spapr, bool reset); -+static void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, -+ size_t space); - - int spapr_h_cas_compose_response(SpaprMachineState *spapr, - target_ulong addr, target_ulong size, -@@ -931,24 +932,17 @@ int spapr_h_cas_compose_response(SpaprMachineState *spapr, - return 1; - } - -- if (size < sizeof(hdr) || size > FW_MAX_SIZE) { -- error_report("SLOF provided an unexpected CAS buffer size " -- TARGET_FMT_lu " (min: %zu, max: %u)", -- size, sizeof(hdr), FW_MAX_SIZE); -+ if (size < sizeof(hdr)) { -+ error_report("SLOF provided insufficient CAS buffer " -+ TARGET_FMT_lu " (min: %zu)", size, sizeof(hdr)); - exit(EXIT_FAILURE); - } - - size -= sizeof(hdr); - -- fdt = spapr_build_fdt(spapr, false); -+ fdt = spapr_build_fdt(spapr, false, size); - _FDT((fdt_pack(fdt))); - -- if (fdt_totalsize(fdt) + sizeof(hdr) > size) { -- g_free(fdt); -- trace_spapr_cas_failed(size); -- return -1; -- } -- - cpu_physical_memory_write(addr, &hdr, sizeof(hdr)); - cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt)); - trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr)); -@@ -1198,7 +1192,8 @@ static void spapr_dt_hypervisor(SpaprMachineState *spapr, void *fdt) - } - } - --static void *spapr_build_fdt(SpaprMachineState *spapr, bool reset) -+static void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, -+ size_t space) - { - MachineState *machine = MACHINE(spapr); - MachineClass *mc = MACHINE_GET_CLASS(machine); -@@ -1208,8 +1203,8 @@ static void *spapr_build_fdt(SpaprMachineState *spapr, bool reset) - SpaprPhbState *phb; - char *buf; - -- fdt = g_malloc0(FDT_MAX_SIZE); -- _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE))); -+ fdt = g_malloc0(space); -+ _FDT((fdt_create_empty_tree(fdt, space))); - - /* Root node */ - _FDT(fdt_setprop_string(fdt, 0, "device_type", "chrp")); -@@ -1724,19 +1719,13 @@ static void spapr_machine_reset(MachineState *machine) - */ - fdt_addr = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FDT_MAX_SIZE; - -- fdt = spapr_build_fdt(spapr, true); -+ fdt = spapr_build_fdt(spapr, true, FDT_MAX_SIZE); - - rc = fdt_pack(fdt); - - /* Should only fail if we've built a corrupted tree */ - assert(rc == 0); - -- if (fdt_totalsize(fdt) > FDT_MAX_SIZE) { -- error_report("FDT too big ! 0x%x bytes (max is 0x%x)", -- fdt_totalsize(fdt), FDT_MAX_SIZE); -- exit(1); -- } -- - /* Load the fdt */ - qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt)); - cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt)); --- -2.18.2 - diff --git a/SOURCES/kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch b/SOURCES/kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch deleted file mode 100644 index b4b2b5f..0000000 --- a/SOURCES/kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch +++ /dev/null @@ -1,170 +0,0 @@ -From f94b3a4eb9d709f1f6a14ad9ad6ebcc1b67b6923 Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Tue, 19 Jan 2021 15:09:54 -0500 -Subject: [PATCH 6/9] spapr: Improve handling of memory unplug with old guests - -RH-Author: Greg Kurz -Message-id: <20210119150954.1017058-7-gkurz@redhat.com> -Patchwork-id: 100684 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 6/6] spapr: Improve handling of memory unplug with old guests -Bugzilla: 1901837 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Laurent Vivier -RH-Acked-by: David Gibson - -From: Greg Kurz - -Since commit 1e8b5b1aa16b ("spapr: Allow memory unplug to always succeed") -trying to unplug memory from a guest that doesn't support it (eg. rhel6) -no longer generates an error like it used to. Instead, it leaves the -memory around : only a subsequent reboot or manual use of drmgr within -the guest can complete the hot-unplug sequence. A flag was added to -SpaprMachineClass so that this new behavior only applies to the default -machine type. - -We can do better. CAS processes all pending hot-unplug requests. This -means that we don't really care about what the guest supports if -the hot-unplug request happens before CAS. - -All guests that we care for, even old ones, set enough bits in OV5 -that lead to a non-empty bitmap in spapr->ov5_cas. Use that as a -heuristic to decide if CAS has already occured or not. - -Always accept unplug requests that happen before CAS since CAS will -process them. Restore the previous behavior of rejecting them after -CAS when we know that the guest doesn't support memory hot-unplug. - -This behavior is suitable for all machine types : this allows to -drop the pre_6_0_memory_unplug flag. - -Fixes: 1e8b5b1aa16b ("spapr: Allow memory unplug to always succeed") -Signed-off-by: Greg Kurz -Message-Id: <161012708715.801107.11418801796987916516.stgit@bahia.lan> -Reviewed-by: Daniel Henrique Barboza -Signed-off-by: David Gibson -(cherry picked from commit 73598c75df0585e039825e642adede21912dabc7) -Signed-off-by: Greg Kurz - -Conflicts: - hw/ppc/spapr.c - include/hw/ppc/spapr.h - -Contextual conflicts around the removal of pre_6_0_memory_unplug, -which was only partially backported from upstream 1e8b5b1aa16b, and -the addition of spapr_memory_hot_unplug_supported(). - -Signed-off-by: Jon Maloy ---- - hw/ppc/spapr.c | 21 +++++++++++++-------- - hw/ppc/spapr_events.c | 3 +-- - hw/ppc/spapr_ovec.c | 7 +++++++ - include/hw/ppc/spapr.h | 2 +- - include/hw/ppc/spapr_ovec.h | 1 + - 5 files changed, 23 insertions(+), 11 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index f8de33e3e5..00b1ef075e 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -3993,6 +3993,18 @@ static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev, - } - } - -+bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr) -+{ -+ return spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT) || -+ /* -+ * CAS will process all pending unplug requests. -+ * -+ * HACK: a guest could theoretically have cleared all bits in OV5, -+ * but none of the guests we care for do. -+ */ -+ spapr_ovec_empty(spapr->ov5_cas); -+} -+ - static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) - { -@@ -4001,16 +4013,9 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, - SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); - - if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { -- if (!smc->pre_6_0_memory_unplug || -- spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) { -+ if (spapr_memory_hot_unplug_supported(sms)) { - spapr_memory_unplug_request(hotplug_dev, dev, errp); - } else { -- /* NOTE: this means there is a window after guest reset, prior to -- * CAS negotiation, where unplug requests will fail due to the -- * capability not being detected yet. This is a bit different than -- * the case with PCI unplug, where the events will be queued and -- * eventually handled by the guest after boot -- */ - error_setg(errp, "Memory hot unplug not supported for this guest"); - } - } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { -diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c -index 6e284aa4bc..08168acd65 100644 ---- a/hw/ppc/spapr_events.c -+++ b/hw/ppc/spapr_events.c -@@ -547,8 +547,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, - /* we should not be using count_indexed value unless the guest - * supports dedicated hotplug event source - */ -- g_assert(!SPAPR_MACHINE_GET_CLASS(spapr)->pre_6_0_memory_unplug || -- spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT)); -+ g_assert(spapr_memory_hot_unplug_supported(spapr)); - hp->drc_id.count_indexed.count = - cpu_to_be32(drc_id->count_indexed.count); - hp->drc_id.count_indexed.index = -diff --git a/hw/ppc/spapr_ovec.c b/hw/ppc/spapr_ovec.c -index 811fadf143..f858afc7d5 100644 ---- a/hw/ppc/spapr_ovec.c -+++ b/hw/ppc/spapr_ovec.c -@@ -135,6 +135,13 @@ bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr) - return test_bit(bitnr, ov->bitmap) ? true : false; - } - -+bool spapr_ovec_empty(SpaprOptionVector *ov) -+{ -+ g_assert(ov); -+ -+ return bitmap_empty(ov->bitmap, OV_MAXBITS); -+} -+ - static void guest_byte_to_bitmap(uint8_t entry, unsigned long *bitmap, - long bitmap_offset) - { -diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index ac6961ed16..7aaf5d9996 100644 ---- a/include/hw/ppc/spapr.h -+++ b/include/hw/ppc/spapr.h -@@ -124,7 +124,6 @@ struct SpaprMachineClass { - bool pre_4_1_migration; /* don't migrate hpt-max-page-size */ - bool linux_pci_probe; - bool smp_threads_vsmt; /* set VSMT to smp_threads by default */ -- bool pre_6_0_memory_unplug; - - bool has_power9_support; - void (*phb_placement)(SpaprMachineState *spapr, uint32_t index, -@@ -894,4 +893,5 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, - #define SPAPR_OV5_XIVE_BOTH 0x80 /* Only to advertise on the platform */ - - void spapr_set_all_lpcrs(target_ulong value, target_ulong mask); -+bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr); - #endif /* HW_SPAPR_H */ -diff --git a/include/hw/ppc/spapr_ovec.h b/include/hw/ppc/spapr_ovec.h -index 7891e9caac..98c73bf601 100644 ---- a/include/hw/ppc/spapr_ovec.h -+++ b/include/hw/ppc/spapr_ovec.h -@@ -73,6 +73,7 @@ void spapr_ovec_cleanup(SpaprOptionVector *ov); - void spapr_ovec_set(SpaprOptionVector *ov, long bitnr); - void spapr_ovec_clear(SpaprOptionVector *ov, long bitnr); - bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr); -+bool spapr_ovec_empty(SpaprOptionVector *ov); - SpaprOptionVector *spapr_ovec_parse_vector(target_ulong table_addr, int vector); - int spapr_ovec_populate_dt(void *fdt, int fdt_offset, - SpaprOptionVector *ov, const char *name); --- -2.18.2 - diff --git a/SOURCES/kvm-spapr-Pass-the-maximum-number-of-vCPUs-to-the-KVM-in.patch b/SOURCES/kvm-spapr-Pass-the-maximum-number-of-vCPUs-to-the-KVM-in.patch deleted file mode 100644 index 7c48718..0000000 --- a/SOURCES/kvm-spapr-Pass-the-maximum-number-of-vCPUs-to-the-KVM-in.patch +++ /dev/null @@ -1,213 +0,0 @@ -From 5aea41b56f07f586e0f56a5c8b3e8443e485cd77 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 5 Jun 2020 07:41:09 -0400 -Subject: [PATCH 39/42] spapr: Pass the maximum number of vCPUs to the KVM - interrupt controller -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20200605074111.2185-2-thuth@redhat.com> -Patchwork-id: 97368 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 1/3] spapr: Pass the maximum number of vCPUs to the KVM interrupt controller -Bugzilla: 1756946 -RH-Acked-by: Greg Kurz -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Greg Kurz - -The XIVE and XICS-on-XIVE KVM devices on POWER9 hosts can greatly reduce -their consumption of some scarce HW resources, namely Virtual Presenter -identifiers, if they know the maximum number of vCPUs that may run in the -VM. - -Prepare ground for this by passing the value down to xics_kvm_connect() -and kvmppc_xive_connect(). This is purely mechanical, no functional -change. - -Signed-off-by: Greg Kurz -Message-Id: <157478678301.67101.2717368060417156338.stgit@bahia.tlslab.ibm.com> -Reviewed-by: Cédric Le Goater -Signed-off-by: David Gibson -(cherry picked from commit 4ffb7496881ec361deaf1f51c41a933bde3cbf7b) -Signed-off-by: Danilo C. L. de Paula ---- - hw/intc/spapr_xive.c | 6 ++++-- - hw/intc/spapr_xive_kvm.c | 3 ++- - hw/intc/xics_kvm.c | 3 ++- - hw/intc/xics_spapr.c | 5 +++-- - hw/ppc/spapr_irq.c | 8 +++++--- - include/hw/ppc/spapr_irq.h | 10 ++++++++-- - include/hw/ppc/spapr_xive.h | 3 ++- - include/hw/ppc/xics_spapr.h | 3 ++- - 8 files changed, 28 insertions(+), 13 deletions(-) - -diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c -index 9cb8d38a3b..a570e6e90a 100644 ---- a/hw/intc/spapr_xive.c -+++ b/hw/intc/spapr_xive.c -@@ -651,12 +651,14 @@ static void spapr_xive_dt(SpaprInterruptController *intc, uint32_t nr_servers, - plat_res_int_priorities, sizeof(plat_res_int_priorities))); - } - --static int spapr_xive_activate(SpaprInterruptController *intc, Error **errp) -+static int spapr_xive_activate(SpaprInterruptController *intc, -+ uint32_t nr_servers, Error **errp) - { - SpaprXive *xive = SPAPR_XIVE(intc); - - if (kvm_enabled()) { -- int rc = spapr_irq_init_kvm(kvmppc_xive_connect, intc, errp); -+ int rc = spapr_irq_init_kvm(kvmppc_xive_connect, intc, nr_servers, -+ errp); - if (rc < 0) { - return rc; - } -diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c -index 08012ac7cd..c1c837a764 100644 ---- a/hw/intc/spapr_xive_kvm.c -+++ b/hw/intc/spapr_xive_kvm.c -@@ -740,7 +740,8 @@ static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len, - * All the XIVE memory regions are now backed by mappings from the KVM - * XIVE device. - */ --int kvmppc_xive_connect(SpaprInterruptController *intc, Error **errp) -+int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers, -+ Error **errp) - { - SpaprXive *xive = SPAPR_XIVE(intc); - XiveSource *xsrc = &xive->source; -diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c -index 954c424b36..a1f1b7b0d3 100644 ---- a/hw/intc/xics_kvm.c -+++ b/hw/intc/xics_kvm.c -@@ -342,7 +342,8 @@ void ics_kvm_set_irq(ICSState *ics, int srcno, int val) - } - } - --int xics_kvm_connect(SpaprInterruptController *intc, Error **errp) -+int xics_kvm_connect(SpaprInterruptController *intc, uint32_t nr_servers, -+ Error **errp) - { - ICSState *ics = ICS_SPAPR(intc); - int rc; -diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c -index b3705dab0e..8ae4f41459 100644 ---- a/hw/intc/xics_spapr.c -+++ b/hw/intc/xics_spapr.c -@@ -422,10 +422,11 @@ static int xics_spapr_post_load(SpaprInterruptController *intc, int version_id) - return 0; - } - --static int xics_spapr_activate(SpaprInterruptController *intc, Error **errp) -+static int xics_spapr_activate(SpaprInterruptController *intc, -+ uint32_t nr_servers, Error **errp) - { - if (kvm_enabled()) { -- return spapr_irq_init_kvm(xics_kvm_connect, intc, errp); -+ return spapr_irq_init_kvm(xics_kvm_connect, intc, nr_servers, errp); - } - return 0; - } -diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c -index d6bb7fd2d6..9da423658a 100644 ---- a/hw/ppc/spapr_irq.c -+++ b/hw/ppc/spapr_irq.c -@@ -70,15 +70,16 @@ void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num) - bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num); - } - --int spapr_irq_init_kvm(int (*fn)(SpaprInterruptController *, Error **), -+int spapr_irq_init_kvm(SpaprInterruptControllerInitKvm fn, - SpaprInterruptController *intc, -+ uint32_t nr_servers, - Error **errp) - { - MachineState *machine = MACHINE(qdev_get_machine()); - Error *local_err = NULL; - - if (kvm_enabled() && machine_kernel_irqchip_allowed(machine)) { -- if (fn(intc, &local_err) < 0) { -+ if (fn(intc, nr_servers, &local_err) < 0) { - if (machine_kernel_irqchip_required(machine)) { - error_prepend(&local_err, - "kernel_irqchip requested but unavailable: "); -@@ -495,6 +496,7 @@ static void set_active_intc(SpaprMachineState *spapr, - SpaprInterruptController *new_intc) - { - SpaprInterruptControllerClass *sicc; -+ uint32_t nr_servers = spapr_max_server_number(spapr); - - assert(new_intc); - -@@ -512,7 +514,7 @@ static void set_active_intc(SpaprMachineState *spapr, - - sicc = SPAPR_INTC_GET_CLASS(new_intc); - if (sicc->activate) { -- sicc->activate(new_intc, &error_fatal); -+ sicc->activate(new_intc, nr_servers, &error_fatal); - } - - spapr->active_intc = new_intc; -diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h -index ff814d13de..ca8cb44213 100644 ---- a/include/hw/ppc/spapr_irq.h -+++ b/include/hw/ppc/spapr_irq.h -@@ -43,7 +43,8 @@ typedef struct SpaprInterruptController SpaprInterruptController; - typedef struct SpaprInterruptControllerClass { - InterfaceClass parent; - -- int (*activate)(SpaprInterruptController *intc, Error **errp); -+ int (*activate)(SpaprInterruptController *intc, uint32_t nr_servers, -+ Error **errp); - void (*deactivate)(SpaprInterruptController *intc); - - /* -@@ -98,8 +99,13 @@ qemu_irq spapr_qirq(SpaprMachineState *spapr, int irq); - int spapr_irq_post_load(SpaprMachineState *spapr, int version_id); - void spapr_irq_reset(SpaprMachineState *spapr, Error **errp); - int spapr_irq_get_phandle(SpaprMachineState *spapr, void *fdt, Error **errp); --int spapr_irq_init_kvm(int (*fn)(SpaprInterruptController *, Error **), -+ -+typedef int (*SpaprInterruptControllerInitKvm)(SpaprInterruptController *, -+ uint32_t, Error **); -+ -+int spapr_irq_init_kvm(SpaprInterruptControllerInitKvm fn, - SpaprInterruptController *intc, -+ uint32_t nr_servers, - Error **errp); - - /* -diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h -index 742b7e834f..3a103c224d 100644 ---- a/include/hw/ppc/spapr_xive.h -+++ b/include/hw/ppc/spapr_xive.h -@@ -66,7 +66,8 @@ int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx, - /* - * KVM XIVE device helpers - */ --int kvmppc_xive_connect(SpaprInterruptController *intc, Error **errp); -+int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers, -+ Error **errp); - void kvmppc_xive_disconnect(SpaprInterruptController *intc); - void kvmppc_xive_reset(SpaprXive *xive, Error **errp); - void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, -diff --git a/include/hw/ppc/xics_spapr.h b/include/hw/ppc/xics_spapr.h -index 28b87038c8..1c65c96e3c 100644 ---- a/include/hw/ppc/xics_spapr.h -+++ b/include/hw/ppc/xics_spapr.h -@@ -32,7 +32,8 @@ - #define TYPE_ICS_SPAPR "ics-spapr" - #define ICS_SPAPR(obj) OBJECT_CHECK(ICSState, (obj), TYPE_ICS_SPAPR) - --int xics_kvm_connect(SpaprInterruptController *intc, Error **errp); -+int xics_kvm_connect(SpaprInterruptController *intc, uint32_t nr_servers, -+ Error **errp); - void xics_kvm_disconnect(SpaprInterruptController *intc); - bool xics_kvm_has_broken_disconnect(SpaprMachineState *spapr); - --- -2.27.0 - diff --git a/SOURCES/kvm-spapr-Remove-stale-comment-about-power-saving-LPCR-b.patch b/SOURCES/kvm-spapr-Remove-stale-comment-about-power-saving-LPCR-b.patch deleted file mode 100644 index 4f15509..0000000 --- a/SOURCES/kvm-spapr-Remove-stale-comment-about-power-saving-LPCR-b.patch +++ /dev/null @@ -1,50 +0,0 @@ -From b46fdf56b1a7938468565838bdadf260870e4f9b Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Wed, 9 Jun 2021 10:05:00 -0400 -Subject: [PATCH 3/4] spapr: Remove stale comment about power-saving LPCR bits -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -Message-id: <20210609100501.427096-2-lvivier@redhat.com> -Patchwork-id: 101682 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 1/2] spapr: Remove stale comment about power-saving LPCR bits -Bugzilla: 1969768 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: David Gibson -RH-Acked-by: Greg Kurz - -From: Nicholas Piggin - -Commit 47a9b551547 ("spapr: Clean up handling of LPCR power-saving exit -bits") moved this logic but did not remove the comment from the -previous location. - -Signed-off-by: Nicholas Piggin -Message-Id: <20210526091626.3388262-2-npiggin@gmail.com> -Reviewed-by: Cédric Le Goater -Reviewed-by: Greg Kurz -Signed-off-by: David Gibson -(cherry picked from commit 7be3bf6c8429969f97728bb712d9a99997835607) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr_rtas.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c -index 8d8d8cdfcb..295eac986e 100644 ---- a/hw/ppc/spapr_rtas.c -+++ b/hw/ppc/spapr_rtas.c -@@ -163,7 +163,6 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr, - - env->msr = (1ULL << MSR_SF) | (1ULL << MSR_ME); - -- /* Enable Power-saving mode Exit Cause exceptions for the new CPU */ - lpcr = env->spr[SPR_LPCR]; - if (!pcc->interrupts_big_endian(callcpu)) { - lpcr |= LPCR_ILE; --- -2.27.0 - diff --git a/SOURCES/kvm-spapr-Set-LPCR-to-current-AIL-mode-when-starting-a-n.patch b/SOURCES/kvm-spapr-Set-LPCR-to-current-AIL-mode-when-starting-a-n.patch deleted file mode 100644 index 84abc74..0000000 --- a/SOURCES/kvm-spapr-Set-LPCR-to-current-AIL-mode-when-starting-a-n.patch +++ /dev/null @@ -1,89 +0,0 @@ -From 28794dca79a94d01c8732b84fe6ac6ba2986ce45 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Wed, 9 Jun 2021 10:05:01 -0400 -Subject: [PATCH 4/4] spapr: Set LPCR to current AIL mode when starting a new - CPU -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -Message-id: <20210609100501.427096-3-lvivier@redhat.com> -Patchwork-id: 101683 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 2/2] spapr: Set LPCR to current AIL mode when starting a new CPU -Bugzilla: 1969768 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: David Gibson -RH-Acked-by: Greg Kurz - -From: Nicholas Piggin - -TCG does not keep track of AIL mode in a central place, it's based on -the current LPCR[AIL] bits. Synchronize the new CPU's LPCR to the -current LPCR in rtas_start_cpu(), similarly to the way the ILE bit is -synchronized. - -Open-code the ILE setting as well now that the caller's LPCR is -available directly, there is no need for the indirection. - -Without this, under both TCG and KVM, adding a POWER8/9/10 class CPU -with a new core ID after a modern Linux has booted results in the new -CPU's LPCR missing the LPCR[AIL]=0b11 setting that the other CPUs have. -This can cause crashes and unexpected behaviour. - -Signed-off-by: Nicholas Piggin -Message-Id: <20210526091626.3388262-3-npiggin@gmail.com> -Reviewed-by: Cédric Le Goater -Reviewed-by: Greg Kurz -Signed-off-by: David Gibson -(cherry picked from commit ac559ecbea2649819e7b3fdd09f4e0243e0128db) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr_rtas.c | 14 +++++++++----- - 1 file changed, 9 insertions(+), 5 deletions(-) - -diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c -index 295eac986e..5acb7c1f10 100644 ---- a/hw/ppc/spapr_rtas.c -+++ b/hw/ppc/spapr_rtas.c -@@ -132,8 +132,8 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr, - target_ulong id, start, r3; - PowerPCCPU *newcpu; - CPUPPCState *env; -- PowerPCCPUClass *pcc; - target_ulong lpcr; -+ target_ulong caller_lpcr; - - if (nargs != 3 || nret != 1) { - rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); -@@ -152,7 +152,6 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr, - } - - env = &newcpu->env; -- pcc = POWERPC_CPU_GET_CLASS(newcpu); - - if (!CPU(newcpu)->halted) { - rtas_st(rets, 0, RTAS_OUT_HW_ERROR); -@@ -163,10 +162,15 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr, - - env->msr = (1ULL << MSR_SF) | (1ULL << MSR_ME); - -+ caller_lpcr = callcpu->env.spr[SPR_LPCR]; - lpcr = env->spr[SPR_LPCR]; -- if (!pcc->interrupts_big_endian(callcpu)) { -- lpcr |= LPCR_ILE; -- } -+ -+ /* Set ILE the same way */ -+ lpcr = (lpcr & ~LPCR_ILE) | (caller_lpcr & LPCR_ILE); -+ -+ /* Set AIL the same way */ -+ lpcr = (lpcr & ~LPCR_AIL) | (caller_lpcr & LPCR_AIL); -+ - if (env->mmu_model == POWERPC_MMU_3_00) { - /* - * New cpus are expected to start in the same radix/hash mode --- -2.27.0 - diff --git a/SOURCES/kvm-sungem-switch-to-use-qemu_receive_packet-for-loopbac.patch b/SOURCES/kvm-sungem-switch-to-use-qemu_receive_packet-for-loopbac.patch deleted file mode 100644 index e8c9f8b..0000000 --- a/SOURCES/kvm-sungem-switch-to-use-qemu_receive_packet-for-loopbac.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 07df0f52c26a3819bc02b4f2970b6735bcf15c5b Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 29 Jun 2021 03:42:42 -0400 -Subject: [PATCH 4/9] sungem: switch to use qemu_receive_packet() for loopback -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20210629034247.3286477-5-jmaloy@redhat.com> -Patchwork-id: 101786 -O-Subject: [RHEL-8.4.0.z qemu-kvm PATCH v2 4/9] sungem: switch to use qemu_receive_packet() for loopback -Bugzilla: 1932917 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth - -From: Jason Wang - -This patch switches to use qemu_receive_packet() which can detect -reentrancy and return early. - -This is intended to address CVE-2021-3416. - -Cc: Prasad J Pandit -Cc: qemu-stable@nongnu.org -Reviewed-by: Mark Cave-Ayland -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Alistair Francis -Signed-off-by: Jason Wang - -(cherry picked from commit 8c92060d3c0248bd4d515719a35922cd2391b9b4) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/sungem.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/net/sungem.c b/hw/net/sungem.c -index f31d41ac5b..8b202b5c15 100644 ---- a/hw/net/sungem.c -+++ b/hw/net/sungem.c -@@ -305,7 +305,7 @@ static void sungem_send_packet(SunGEMState *s, const uint8_t *buf, - NetClientState *nc = qemu_get_queue(s->nic); - - if (s->macregs[MAC_XIFCFG >> 2] & MAC_XIFCFG_LBCK) { -- nc->info->receive(nc, buf, size); -+ qemu_receive_packet(nc, buf, size); - } else { - qemu_send_packet(nc, buf, size); - } --- -2.27.0 - diff --git a/SOURCES/kvm-target-arm-Fix-PAuth-sbox-functions.patch b/SOURCES/kvm-target-arm-Fix-PAuth-sbox-functions.patch deleted file mode 100644 index 0e08184..0000000 --- a/SOURCES/kvm-target-arm-Fix-PAuth-sbox-functions.patch +++ /dev/null @@ -1,65 +0,0 @@ -From b8c8288a65146952cdfe7d5f0cd96734c9de8ee1 Mon Sep 17 00:00:00 2001 -From: jmaloy -Date: Thu, 7 May 2020 17:57:08 +0100 -Subject: [PATCH 1/7] target/arm: Fix PAuth sbox functions -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: jmaloy -Message-id: <20200507175708.1165177-2-jmaloy@redhat.com> -Patchwork-id: 96341 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/1] target/arm: Fix PAuth sbox functions -Bugzilla: 1813940 -RH-Acked-by: Andrew Jones -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefano Garzarella - -From: Vincent Dehors - -In the PAC computation, sbox was applied over wrong bits. -As this is a 4-bit sbox, bit index should be incremented by 4 instead of 16. - -Test vector from QARMA paper (https://eprint.iacr.org/2016/444.pdf) was -used to verify one computation of the pauth_computepac() function which -uses sbox2. - -Launchpad: https://bugs.launchpad.net/bugs/1859713 -Reviewed-by: Richard Henderson -Signed-off-by: Vincent DEHORS -Signed-off-by: Adrien GRASSEIN -Message-id: 20200116230809.19078-2-richard.henderson@linaro.org -Reviewed-by: Peter Maydell -Signed-off-by: Peter Maydell -(cherry picked from commit de0b1bae6461f67243282555475f88b2384a1eb9) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - target/arm/pauth_helper.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/target/arm/pauth_helper.c b/target/arm/pauth_helper.c -index d3194f2..0a5f41e 100644 ---- a/target/arm/pauth_helper.c -+++ b/target/arm/pauth_helper.c -@@ -89,7 +89,7 @@ static uint64_t pac_sub(uint64_t i) - uint64_t o = 0; - int b; - -- for (b = 0; b < 64; b += 16) { -+ for (b = 0; b < 64; b += 4) { - o |= (uint64_t)sub[(i >> b) & 0xf] << b; - } - return o; -@@ -104,7 +104,7 @@ static uint64_t pac_inv_sub(uint64_t i) - uint64_t o = 0; - int b; - -- for (b = 0; b < 64; b += 16) { -+ for (b = 0; b < 64; b += 4) { - o |= (uint64_t)inv_sub[(i >> b) & 0xf] << b; - } - return o; --- -1.8.3.1 - diff --git a/SOURCES/kvm-target-arm-arch_dump-Add-SVE-notes.patch b/SOURCES/kvm-target-arm-arch_dump-Add-SVE-notes.patch deleted file mode 100644 index febea10..0000000 --- a/SOURCES/kvm-target-arm-arch_dump-Add-SVE-notes.patch +++ /dev/null @@ -1,298 +0,0 @@ -From d8871ae2842531130c9b333e7c06a6a5d1561286 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Fri, 24 Jan 2020 09:14:34 +0100 -Subject: [PATCH 001/116] target/arm/arch_dump: Add SVE notes - -RH-Author: Andrew Jones -Message-id: <20200124091434.15021-2-drjones@redhat.com> -Patchwork-id: 93443 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] target/arm/arch_dump: Add SVE notes -Bugzilla: 1725084 -RH-Acked-by: Auger Eric -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Gavin Shan - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1725084 - -Author: Andrew Jones -Date: Thu, 23 Jan 2020 15:22:40 +0000 - - target/arm/arch_dump: Add SVE notes - - When dumping a guest with dump-guest-memory also dump the SVE - registers if they are in use. - - Signed-off-by: Andrew Jones - Reviewed-by: Richard Henderson - Message-id: 20200120101832.18781-1-drjones@redhat.com - [PMM: fixed checkpatch nits] - Signed-off-by: Peter Maydell - -(cherry picked from commit 538baab245ca881e6a6ff720b5133f3ad1fcaafc) -Signed-off-by: Miroslav Rezanina ---- - include/elf.h | 1 + - target/arm/arch_dump.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++- - target/arm/cpu.h | 25 ++++++++++ - target/arm/kvm64.c | 24 ---------- - 4 files changed, 148 insertions(+), 26 deletions(-) - -diff --git a/include/elf.h b/include/elf.h -index 3501e0c..8fbfe60 100644 ---- a/include/elf.h -+++ b/include/elf.h -@@ -1650,6 +1650,7 @@ typedef struct elf64_shdr { - #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ - #define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ - #define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ -+#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension regs */ - - /* - * Physical entry point into the kernel. -diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c -index 26a2c09..2345dec 100644 ---- a/target/arm/arch_dump.c -+++ b/target/arm/arch_dump.c -@@ -62,12 +62,23 @@ struct aarch64_user_vfp_state { - - QEMU_BUILD_BUG_ON(sizeof(struct aarch64_user_vfp_state) != 528); - -+/* struct user_sve_header from arch/arm64/include/uapi/asm/ptrace.h */ -+struct aarch64_user_sve_header { -+ uint32_t size; -+ uint32_t max_size; -+ uint16_t vl; -+ uint16_t max_vl; -+ uint16_t flags; -+ uint16_t reserved; -+} QEMU_PACKED; -+ - struct aarch64_note { - Elf64_Nhdr hdr; - char name[8]; /* align_up(sizeof("CORE"), 4) */ - union { - struct aarch64_elf_prstatus prstatus; - struct aarch64_user_vfp_state vfp; -+ struct aarch64_user_sve_header sve; - }; - } QEMU_PACKED; - -@@ -76,6 +87,8 @@ struct aarch64_note { - (AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_elf_prstatus)) - #define AARCH64_PRFPREG_NOTE_SIZE \ - (AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_user_vfp_state)) -+#define AARCH64_SVE_NOTE_SIZE(env) \ -+ (AARCH64_NOTE_HEADER_SIZE + sve_size(env)) - - static void aarch64_note_init(struct aarch64_note *note, DumpState *s, - const char *name, Elf64_Word namesz, -@@ -128,11 +141,102 @@ static int aarch64_write_elf64_prfpreg(WriteCoreDumpFunction f, - return 0; - } - -+#ifdef TARGET_AARCH64 -+static off_t sve_zreg_offset(uint32_t vq, int n) -+{ -+ off_t off = sizeof(struct aarch64_user_sve_header); -+ return ROUND_UP(off, 16) + vq * 16 * n; -+} -+ -+static off_t sve_preg_offset(uint32_t vq, int n) -+{ -+ return sve_zreg_offset(vq, 32) + vq * 16 / 8 * n; -+} -+ -+static off_t sve_fpsr_offset(uint32_t vq) -+{ -+ off_t off = sve_preg_offset(vq, 17); -+ return ROUND_UP(off, 16); -+} -+ -+static off_t sve_fpcr_offset(uint32_t vq) -+{ -+ return sve_fpsr_offset(vq) + sizeof(uint32_t); -+} -+ -+static uint32_t sve_current_vq(CPUARMState *env) -+{ -+ return sve_zcr_len_for_el(env, arm_current_el(env)) + 1; -+} -+ -+static size_t sve_size_vq(uint32_t vq) -+{ -+ off_t off = sve_fpcr_offset(vq) + sizeof(uint32_t); -+ return ROUND_UP(off, 16); -+} -+ -+static size_t sve_size(CPUARMState *env) -+{ -+ return sve_size_vq(sve_current_vq(env)); -+} -+ -+static int aarch64_write_elf64_sve(WriteCoreDumpFunction f, -+ CPUARMState *env, int cpuid, -+ DumpState *s) -+{ -+ struct aarch64_note *note; -+ ARMCPU *cpu = env_archcpu(env); -+ uint32_t vq = sve_current_vq(env); -+ uint64_t tmp[ARM_MAX_VQ * 2], *r; -+ uint32_t fpr; -+ uint8_t *buf; -+ int ret, i; -+ -+ note = g_malloc0(AARCH64_SVE_NOTE_SIZE(env)); -+ buf = (uint8_t *)¬e->sve; -+ -+ aarch64_note_init(note, s, "LINUX", 6, NT_ARM_SVE, sve_size_vq(vq)); -+ -+ note->sve.size = cpu_to_dump32(s, sve_size_vq(vq)); -+ note->sve.max_size = cpu_to_dump32(s, sve_size_vq(cpu->sve_max_vq)); -+ note->sve.vl = cpu_to_dump16(s, vq * 16); -+ note->sve.max_vl = cpu_to_dump16(s, cpu->sve_max_vq * 16); -+ note->sve.flags = cpu_to_dump16(s, 1); -+ -+ for (i = 0; i < 32; ++i) { -+ r = sve_bswap64(tmp, &env->vfp.zregs[i].d[0], vq * 2); -+ memcpy(&buf[sve_zreg_offset(vq, i)], r, vq * 16); -+ } -+ -+ for (i = 0; i < 17; ++i) { -+ r = sve_bswap64(tmp, r = &env->vfp.pregs[i].p[0], -+ DIV_ROUND_UP(vq * 2, 8)); -+ memcpy(&buf[sve_preg_offset(vq, i)], r, vq * 16 / 8); -+ } -+ -+ fpr = cpu_to_dump32(s, vfp_get_fpsr(env)); -+ memcpy(&buf[sve_fpsr_offset(vq)], &fpr, sizeof(uint32_t)); -+ -+ fpr = cpu_to_dump32(s, vfp_get_fpcr(env)); -+ memcpy(&buf[sve_fpcr_offset(vq)], &fpr, sizeof(uint32_t)); -+ -+ ret = f(note, AARCH64_SVE_NOTE_SIZE(env), s); -+ g_free(note); -+ -+ if (ret < 0) { -+ return -1; -+ } -+ -+ return 0; -+} -+#endif -+ - int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque) - { - struct aarch64_note note; -- CPUARMState *env = &ARM_CPU(cs)->env; -+ ARMCPU *cpu = ARM_CPU(cs); -+ CPUARMState *env = &cpu->env; - DumpState *s = opaque; - uint64_t pstate, sp; - int ret, i; -@@ -163,7 +267,18 @@ int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, - return -1; - } - -- return aarch64_write_elf64_prfpreg(f, env, cpuid, s); -+ ret = aarch64_write_elf64_prfpreg(f, env, cpuid, s); -+ if (ret) { -+ return ret; -+ } -+ -+#ifdef TARGET_AARCH64 -+ if (cpu_isar_feature(aa64_sve, cpu)) { -+ ret = aarch64_write_elf64_sve(f, env, cpuid, s); -+ } -+#endif -+ -+ return ret; - } - - /* struct pt_regs from arch/arm/include/asm/ptrace.h */ -@@ -335,6 +450,11 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) - if (class == ELFCLASS64) { - note_size = AARCH64_PRSTATUS_NOTE_SIZE; - note_size += AARCH64_PRFPREG_NOTE_SIZE; -+#ifdef TARGET_AARCH64 -+ if (cpu_isar_feature(aa64_sve, cpu)) { -+ note_size += AARCH64_SVE_NOTE_SIZE(env); -+ } -+#endif - } else { - note_size = ARM_PRSTATUS_NOTE_SIZE; - if (arm_feature(env, ARM_FEATURE_VFP)) { -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 83a809d..82dd3cc 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -975,6 +975,31 @@ void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq); - void aarch64_sve_change_el(CPUARMState *env, int old_el, - int new_el, bool el0_a64); - void aarch64_add_sve_properties(Object *obj); -+ -+/* -+ * SVE registers are encoded in KVM's memory in an endianness-invariant format. -+ * The byte at offset i from the start of the in-memory representation contains -+ * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the -+ * lowest offsets are stored in the lowest memory addresses, then that nearly -+ * matches QEMU's representation, which is to use an array of host-endian -+ * uint64_t's, where the lower offsets are at the lower indices. To complete -+ * the translation we just need to byte swap the uint64_t's on big-endian hosts. -+ */ -+static inline uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr) -+{ -+#ifdef HOST_WORDS_BIGENDIAN -+ int i; -+ -+ for (i = 0; i < nr; ++i) { -+ dst[i] = bswap64(src[i]); -+ } -+ -+ return dst; -+#else -+ return src; -+#endif -+} -+ - #else - static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { } - static inline void aarch64_sve_change_el(CPUARMState *env, int o, -diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c -index 876184b..e2da756 100644 ---- a/target/arm/kvm64.c -+++ b/target/arm/kvm64.c -@@ -877,30 +877,6 @@ static int kvm_arch_put_fpsimd(CPUState *cs) - } - - /* -- * SVE registers are encoded in KVM's memory in an endianness-invariant format. -- * The byte at offset i from the start of the in-memory representation contains -- * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the -- * lowest offsets are stored in the lowest memory addresses, then that nearly -- * matches QEMU's representation, which is to use an array of host-endian -- * uint64_t's, where the lower offsets are at the lower indices. To complete -- * the translation we just need to byte swap the uint64_t's on big-endian hosts. -- */ --static uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr) --{ --#ifdef HOST_WORDS_BIGENDIAN -- int i; -- -- for (i = 0; i < nr; ++i) { -- dst[i] = bswap64(src[i]); -- } -- -- return dst; --#else -- return src; --#endif --} -- --/* - * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits - * and PREGS and the FFR have a slice size of 256 bits. However we simply hard - * code the slice index to zero for now as it's unlikely we'll need more than --- -1.8.3.1 - diff --git a/SOURCES/kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch b/SOURCES/kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch deleted file mode 100644 index 601b8c4..0000000 --- a/SOURCES/kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch +++ /dev/null @@ -1,281 +0,0 @@ -From 730f72105b478553c4f22555c29b0f64224ff914 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Fri, 31 Jan 2020 14:23:14 +0000 -Subject: [PATCH 12/15] target/arm/cpu: Add the kvm-no-adjvtime CPU property -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Andrew Jones -Message-id: <20200131142314.13175-6-drjones@redhat.com> -Patchwork-id: 93623 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/5] target/arm/cpu: Add the kvm-no-adjvtime CPU property -Bugzilla: 1647366 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Auger Eric -RH-Acked-by: Gavin Shan - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 - -Author: Andrew Jones -Date: Thu, 30 Jan 2020 16:02:06 +0000 - - target/arm/cpu: Add the kvm-no-adjvtime CPU property - - kvm-no-adjvtime is a KVM specific CPU property and a first of its - kind. To accommodate it we also add kvm_arm_add_vcpu_properties() - and a KVM specific CPU properties description to the CPU features - document. - - Signed-off-by: Andrew Jones - Message-id: 20200120101023.16030-7-drjones@redhat.com - Reviewed-by: Peter Maydell - Signed-off-by: Peter Maydell - -(cherry picked from commit dea101a1ae9968c9fec6ab0291489dad7c49f36f) -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - Dropped the second hunk of the hw/arm/virt.c changes - as they would patch dead code. - -Signed-off-by: Danilo C. L. de Paula ---- - docs/arm-cpu-features.rst | 37 ++++++++++++++++++++++++++++++++++++- - hw/arm/virt.c | 5 +++++ - include/hw/arm/virt.h | 1 + - target/arm/cpu.c | 2 ++ - target/arm/cpu64.c | 1 + - target/arm/kvm.c | 28 ++++++++++++++++++++++++++++ - target/arm/kvm_arm.h | 11 +++++++++++ - target/arm/monitor.c | 1 + - tests/arm-cpu-features.c | 4 ++++ - 9 files changed, 89 insertions(+), 1 deletion(-) - -diff --git a/docs/arm-cpu-features.rst b/docs/arm-cpu-features.rst -index 1b367e2..45d1eb6 100644 ---- a/docs/arm-cpu-features.rst -+++ b/docs/arm-cpu-features.rst -@@ -31,7 +31,9 @@ supporting the feature or only supporting the feature under certain - configurations. For example, the `aarch64` CPU feature, which, when - disabled, enables the optional AArch32 CPU feature, is only supported - when using the KVM accelerator and when running on a host CPU type that --supports the feature. -+supports the feature. While `aarch64` currently only works with KVM, -+it could work with TCG. CPU features that are specific to KVM are -+prefixed with "kvm-" and are described in "KVM VCPU Features". - - CPU Feature Probing - =================== -@@ -171,6 +173,39 @@ disabling many SVE vector lengths would be quite verbose, the `sve` CPU - properties have special semantics (see "SVE CPU Property Parsing - Semantics"). - -+KVM VCPU Features -+================= -+ -+KVM VCPU features are CPU features that are specific to KVM, such as -+paravirt features or features that enable CPU virtualization extensions. -+The features' CPU properties are only available when KVM is enabled and -+are named with the prefix "kvm-". KVM VCPU features may be probed, -+enabled, and disabled in the same way as other CPU features. Below is -+the list of KVM VCPU features and their descriptions. -+ -+ kvm-no-adjvtime By default kvm-no-adjvtime is disabled. This -+ means that by default the virtual time -+ adjustment is enabled (vtime is *not not* -+ adjusted). -+ -+ When virtual time adjustment is enabled each -+ time the VM transitions back to running state -+ the VCPU's virtual counter is updated to ensure -+ stopped time is not counted. This avoids time -+ jumps surprising guest OSes and applications, -+ as long as they use the virtual counter for -+ timekeeping. However it has the side effect of -+ the virtual and physical counters diverging. -+ All timekeeping based on the virtual counter -+ will appear to lag behind any timekeeping that -+ does not subtract VM stopped time. The guest -+ may resynchronize its virtual counter with -+ other time sources as needed. -+ -+ Enable kvm-no-adjvtime to disable virtual time -+ adjustment, also restoring the legacy (pre-5.0) -+ behavior. -+ - SVE CPU Properties - ================== - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index e108391..d30d38c 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1707,6 +1707,11 @@ static void machvirt_init(MachineState *machine) - } - } - -+ if (vmc->kvm_no_adjvtime && -+ object_property_find(cpuobj, "kvm-no-adjvtime", NULL)) { -+ object_property_set_bool(cpuobj, true, "kvm-no-adjvtime", NULL); -+ } -+ - if (vmc->no_pmu && object_property_find(cpuobj, "pmu", NULL)) { - object_property_set_bool(cpuobj, false, "pmu", NULL); - } -diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 53fdf16..77828ce 100644 ---- a/include/hw/arm/virt.h -+++ b/include/hw/arm/virt.h -@@ -109,6 +109,7 @@ typedef struct { - bool smbios_old_sys_ver; - bool no_highmem_ecam; - bool no_ged; /* Machines < 4.2 has no support for ACPI GED device */ -+ bool kvm_no_adjvtime; - } VirtMachineClass; - - typedef struct { -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 3788fc3..e46efe9 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -2482,6 +2482,7 @@ static void arm_max_initfn(Object *obj) - - if (kvm_enabled()) { - kvm_arm_set_cpu_features_from_host(cpu); -+ kvm_arm_add_vcpu_properties(obj); - } else { - cortex_a15_initfn(obj); - -@@ -2673,6 +2674,7 @@ static void arm_host_initfn(Object *obj) - if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { - aarch64_add_sve_properties(obj); - } -+ kvm_arm_add_vcpu_properties(obj); - arm_cpu_post_init(obj); - } - -diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index a39d6fc..3cd416d 100644 ---- a/target/arm/cpu64.c -+++ b/target/arm/cpu64.c -@@ -605,6 +605,7 @@ static void aarch64_max_initfn(Object *obj) - - if (kvm_enabled()) { - kvm_arm_set_cpu_features_from_host(cpu); -+ kvm_arm_add_vcpu_properties(obj); - } else { - uint64_t t; - uint32_t u; -diff --git a/target/arm/kvm.c b/target/arm/kvm.c -index 26d7f8b..4be9497 100644 ---- a/target/arm/kvm.c -+++ b/target/arm/kvm.c -@@ -17,6 +17,8 @@ - #include "qemu/timer.h" - #include "qemu/error-report.h" - #include "qemu/main-loop.h" -+#include "qom/object.h" -+#include "qapi/error.h" - #include "sysemu/sysemu.h" - #include "sysemu/kvm.h" - #include "sysemu/kvm_int.h" -@@ -179,6 +181,32 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) - env->features = arm_host_cpu_features.features; - } - -+static bool kvm_no_adjvtime_get(Object *obj, Error **errp) -+{ -+ return !ARM_CPU(obj)->kvm_adjvtime; -+} -+ -+static void kvm_no_adjvtime_set(Object *obj, bool value, Error **errp) -+{ -+ ARM_CPU(obj)->kvm_adjvtime = !value; -+} -+ -+/* KVM VCPU properties should be prefixed with "kvm-". */ -+void kvm_arm_add_vcpu_properties(Object *obj) -+{ -+ if (!kvm_enabled()) { -+ return; -+ } -+ -+ ARM_CPU(obj)->kvm_adjvtime = true; -+ object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get, -+ kvm_no_adjvtime_set, &error_abort); -+ object_property_set_description(obj, "kvm-no-adjvtime", -+ "Set on to disable the adjustment of " -+ "the virtual counter. VM stopped time " -+ "will be counted.", &error_abort); -+} -+ - bool kvm_arm_pmu_supported(CPUState *cpu) - { - KVMState *s = KVM_STATE(current_machine->accelerator); -diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h -index 01a9a18..ae9e075 100644 ---- a/target/arm/kvm_arm.h -+++ b/target/arm/kvm_arm.h -@@ -256,6 +256,15 @@ void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map); - void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu); - - /** -+ * kvm_arm_add_vcpu_properties: -+ * @obj: The CPU object to add the properties to -+ * -+ * Add all KVM specific CPU properties to the CPU object. These -+ * are the CPU properties with "kvm-" prefixed names. -+ */ -+void kvm_arm_add_vcpu_properties(Object *obj); -+ -+/** - * kvm_arm_aarch32_supported: - * @cs: CPUState - * -@@ -345,6 +354,8 @@ static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) - cpu->host_cpu_probe_failed = true; - } - -+static inline void kvm_arm_add_vcpu_properties(Object *obj) {} -+ - static inline bool kvm_arm_aarch32_supported(CPUState *cs) - { - return false; -diff --git a/target/arm/monitor.c b/target/arm/monitor.c -index fa054f8..9725dff 100644 ---- a/target/arm/monitor.c -+++ b/target/arm/monitor.c -@@ -103,6 +103,7 @@ static const char *cpu_model_advertised_features[] = { - "sve128", "sve256", "sve384", "sve512", - "sve640", "sve768", "sve896", "sve1024", "sve1152", "sve1280", - "sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048", -+ "kvm-no-adjvtime", - NULL - }; - -diff --git a/tests/arm-cpu-features.c b/tests/arm-cpu-features.c -index 89285ca..ba1a6fe 100644 ---- a/tests/arm-cpu-features.c -+++ b/tests/arm-cpu-features.c -@@ -428,6 +428,8 @@ static void test_query_cpu_model_expansion(const void *data) - assert_has_feature_enabled(qts, "cortex-a15", "pmu"); - assert_has_not_feature(qts, "cortex-a15", "aarch64"); - -+ assert_has_not_feature(qts, "max", "kvm-no-adjvtime"); -+ - if (g_str_equal(qtest_get_arch(), "aarch64")) { - assert_has_feature_enabled(qts, "max", "aarch64"); - assert_has_feature_enabled(qts, "max", "sve"); -@@ -462,6 +464,8 @@ static void test_query_cpu_model_expansion_kvm(const void *data) - return; - } - -+ assert_has_feature_disabled(qts, "host", "kvm-no-adjvtime"); -+ - if (g_str_equal(qtest_get_arch(), "aarch64")) { - bool kvm_supports_sve; - char max_name[8], name[8]; --- -1.8.3.1 - diff --git a/SOURCES/kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch b/SOURCES/kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch deleted file mode 100644 index 3396a32..0000000 --- a/SOURCES/kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch +++ /dev/null @@ -1,330 +0,0 @@ -From 5388ea3fc0737d1a659256ff3663057bef484c19 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Fri, 31 Jan 2020 14:23:13 +0000 -Subject: [PATCH 11/15] target/arm/kvm: Implement virtual time adjustment -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Andrew Jones -Message-id: <20200131142314.13175-5-drjones@redhat.com> -Patchwork-id: 93622 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/5] target/arm/kvm: Implement virtual time adjustment -Bugzilla: 1647366 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Auger Eric -RH-Acked-by: Gavin Shan - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 - -Author: Andrew Jones -Date: Thu, 30 Jan 2020 16:02:06 +0000 - - target/arm/kvm: Implement virtual time adjustment - - When a VM is stopped (such as when it's paused) guest virtual time - should stop counting. Otherwise, when the VM is resumed it will - experience time jumps and its kernel may report soft lockups. Not - counting virtual time while the VM is stopped has the side effect - of making the guest's time appear to lag when compared with real - time, and even with time derived from the physical counter. For - this reason, this change, which is enabled by default, comes with - a KVM CPU feature allowing it to be disabled, restoring legacy - behavior. - - This patch only provides the implementation of the virtual time - adjustment. A subsequent patch will provide the CPU property - allowing the change to be enabled and disabled. - - Reported-by: Bijan Mottahedeh - Signed-off-by: Andrew Jones - Message-id: 20200120101023.16030-6-drjones@redhat.com - Reviewed-by: Peter Maydell - Signed-off-by: Peter Maydell - -(cherry picked from commit e5ac4200b4cddf44df9adbef677af0d1f1c579c6) -Signed-off-by: Danilo C. L. de Paula ---- - target/arm/cpu.h | 7 ++++ - target/arm/kvm.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++ - target/arm/kvm32.c | 3 ++ - target/arm/kvm64.c | 3 ++ - target/arm/kvm_arm.h | 38 ++++++++++++++++++++++ - target/arm/machine.c | 7 ++++ - 6 files changed, 150 insertions(+) - -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 82dd3cc..fbd8ea0 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -821,6 +821,13 @@ struct ARMCPU { - /* KVM init features for this CPU */ - uint32_t kvm_init_features[7]; - -+ /* KVM CPU state */ -+ -+ /* KVM virtual time adjustment */ -+ bool kvm_adjvtime; -+ bool kvm_vtime_dirty; -+ uint64_t kvm_vtime; -+ - /* Uniprocessor system with MP extensions */ - bool mp_is_up; - -diff --git a/target/arm/kvm.c b/target/arm/kvm.c -index 5b82cef..26d7f8b 100644 ---- a/target/arm/kvm.c -+++ b/target/arm/kvm.c -@@ -359,6 +359,22 @@ static int compare_u64(const void *a, const void *b) - return 0; - } - -+/* -+ * cpreg_values are sorted in ascending order by KVM register ID -+ * (see kvm_arm_init_cpreg_list). This allows us to cheaply find -+ * the storage for a KVM register by ID with a binary search. -+ */ -+static uint64_t *kvm_arm_get_cpreg_ptr(ARMCPU *cpu, uint64_t regidx) -+{ -+ uint64_t *res; -+ -+ res = bsearch(®idx, cpu->cpreg_indexes, cpu->cpreg_array_len, -+ sizeof(uint64_t), compare_u64); -+ assert(res); -+ -+ return &cpu->cpreg_values[res - cpu->cpreg_indexes]; -+} -+ - /* Initialize the ARMCPU cpreg list according to the kernel's - * definition of what CPU registers it knows about (and throw away - * the previous TCG-created cpreg list). -@@ -512,6 +528,23 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level) - return ok; - } - -+void kvm_arm_cpu_pre_save(ARMCPU *cpu) -+{ -+ /* KVM virtual time adjustment */ -+ if (cpu->kvm_vtime_dirty) { -+ *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT) = cpu->kvm_vtime; -+ } -+} -+ -+void kvm_arm_cpu_post_load(ARMCPU *cpu) -+{ -+ /* KVM virtual time adjustment */ -+ if (cpu->kvm_adjvtime) { -+ cpu->kvm_vtime = *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT); -+ cpu->kvm_vtime_dirty = true; -+ } -+} -+ - void kvm_arm_reset_vcpu(ARMCPU *cpu) - { - int ret; -@@ -579,6 +612,50 @@ int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu) - return 0; - } - -+void kvm_arm_get_virtual_time(CPUState *cs) -+{ -+ ARMCPU *cpu = ARM_CPU(cs); -+ struct kvm_one_reg reg = { -+ .id = KVM_REG_ARM_TIMER_CNT, -+ .addr = (uintptr_t)&cpu->kvm_vtime, -+ }; -+ int ret; -+ -+ if (cpu->kvm_vtime_dirty) { -+ return; -+ } -+ -+ ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); -+ if (ret) { -+ error_report("Failed to get KVM_REG_ARM_TIMER_CNT"); -+ abort(); -+ } -+ -+ cpu->kvm_vtime_dirty = true; -+} -+ -+void kvm_arm_put_virtual_time(CPUState *cs) -+{ -+ ARMCPU *cpu = ARM_CPU(cs); -+ struct kvm_one_reg reg = { -+ .id = KVM_REG_ARM_TIMER_CNT, -+ .addr = (uintptr_t)&cpu->kvm_vtime, -+ }; -+ int ret; -+ -+ if (!cpu->kvm_vtime_dirty) { -+ return; -+ } -+ -+ ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); -+ if (ret) { -+ error_report("Failed to set KVM_REG_ARM_TIMER_CNT"); -+ abort(); -+ } -+ -+ cpu->kvm_vtime_dirty = false; -+} -+ - int kvm_put_vcpu_events(ARMCPU *cpu) - { - CPUARMState *env = &cpu->env; -@@ -690,6 +767,21 @@ MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) - return MEMTXATTRS_UNSPECIFIED; - } - -+void kvm_arm_vm_state_change(void *opaque, int running, RunState state) -+{ -+ CPUState *cs = opaque; -+ ARMCPU *cpu = ARM_CPU(cs); -+ -+ if (running) { -+ if (cpu->kvm_adjvtime) { -+ kvm_arm_put_virtual_time(cs); -+ } -+ } else { -+ if (cpu->kvm_adjvtime) { -+ kvm_arm_get_virtual_time(cs); -+ } -+ } -+} - - int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) - { -diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c -index 32bf8d6..3a8b437 100644 ---- a/target/arm/kvm32.c -+++ b/target/arm/kvm32.c -@@ -16,6 +16,7 @@ - #include "qemu-common.h" - #include "cpu.h" - #include "qemu/timer.h" -+#include "sysemu/runstate.h" - #include "sysemu/kvm.h" - #include "kvm_arm.h" - #include "internals.h" -@@ -198,6 +199,8 @@ int kvm_arch_init_vcpu(CPUState *cs) - return -EINVAL; - } - -+ qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); -+ - /* Determine init features for this CPU */ - memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); - if (cpu->start_powered_off) { -diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c -index 666a81a..d368189 100644 ---- a/target/arm/kvm64.c -+++ b/target/arm/kvm64.c -@@ -23,6 +23,7 @@ - #include "qemu/host-utils.h" - #include "qemu/main-loop.h" - #include "exec/gdbstub.h" -+#include "sysemu/runstate.h" - #include "sysemu/kvm.h" - #include "sysemu/kvm_int.h" - #include "kvm_arm.h" -@@ -735,6 +736,8 @@ int kvm_arch_init_vcpu(CPUState *cs) - return -EINVAL; - } - -+ qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); -+ - /* Determine init features for this CPU */ - memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); - if (cpu->start_powered_off) { -diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h -index b48a9c9..01a9a18 100644 ---- a/target/arm/kvm_arm.h -+++ b/target/arm/kvm_arm.h -@@ -128,6 +128,23 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level); - bool write_kvmstate_to_list(ARMCPU *cpu); - - /** -+ * kvm_arm_cpu_pre_save: -+ * @cpu: ARMCPU -+ * -+ * Called after write_kvmstate_to_list() from cpu_pre_save() to update -+ * the cpreg list with KVM CPU state. -+ */ -+void kvm_arm_cpu_pre_save(ARMCPU *cpu); -+ -+/** -+ * kvm_arm_cpu_post_load: -+ * @cpu: ARMCPU -+ * -+ * Called from cpu_post_load() to update KVM CPU state from the cpreg list. -+ */ -+void kvm_arm_cpu_post_load(ARMCPU *cpu); -+ -+/** - * kvm_arm_reset_vcpu: - * @cpu: ARMCPU - * -@@ -292,6 +309,24 @@ int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu); - */ - int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu); - -+/** -+ * kvm_arm_get_virtual_time: -+ * @cs: CPUState -+ * -+ * Gets the VCPU's virtual counter and stores it in the KVM CPU state. -+ */ -+void kvm_arm_get_virtual_time(CPUState *cs); -+ -+/** -+ * kvm_arm_put_virtual_time: -+ * @cs: CPUState -+ * -+ * Sets the VCPU's virtual counter to the value stored in the KVM CPU state. -+ */ -+void kvm_arm_put_virtual_time(CPUState *cs); -+ -+void kvm_arm_vm_state_change(void *opaque, int running, RunState state); -+ - int kvm_arm_vgic_probe(void); - - void kvm_arm_pmu_set_irq(CPUState *cs, int irq); -@@ -339,6 +374,9 @@ static inline void kvm_arm_pmu_set_irq(CPUState *cs, int irq) {} - static inline void kvm_arm_pmu_init(CPUState *cs) {} - - static inline void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map) {} -+ -+static inline void kvm_arm_get_virtual_time(CPUState *cs) {} -+static inline void kvm_arm_put_virtual_time(CPUState *cs) {} - #endif - - static inline const char *gic_class_name(void) -diff --git a/target/arm/machine.c b/target/arm/machine.c -index eb28b23..241890a 100644 ---- a/target/arm/machine.c -+++ b/target/arm/machine.c -@@ -642,6 +642,12 @@ static int cpu_pre_save(void *opaque) - /* This should never fail */ - abort(); - } -+ -+ /* -+ * kvm_arm_cpu_pre_save() must be called after -+ * write_kvmstate_to_list() -+ */ -+ kvm_arm_cpu_pre_save(cpu); - } else { - if (!write_cpustate_to_list(cpu, false)) { - /* This should never fail. */ -@@ -744,6 +750,7 @@ static int cpu_post_load(void *opaque, int version_id) - * we're using it. - */ - write_list_to_cpustate(cpu); -+ kvm_arm_cpu_post_load(cpu); - } else { - if (!write_list_to_cpustate(cpu)) { - return -1; --- -1.8.3.1 - diff --git a/SOURCES/kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch b/SOURCES/kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch deleted file mode 100644 index 8cdc867..0000000 --- a/SOURCES/kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch +++ /dev/null @@ -1,197 +0,0 @@ -From 11cb9cb7b1b56d5c9723e9c50bc2903281893bcc Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Fri, 31 Jan 2020 14:23:10 +0000 -Subject: [PATCH 08/15] target/arm/kvm: trivial: Clean up header documentation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Andrew Jones -Message-id: <20200131142314.13175-2-drjones@redhat.com> -Patchwork-id: 93625 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/5] target/arm/kvm: trivial: Clean up header documentation -Bugzilla: 1647366 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Auger Eric -RH-Acked-by: Gavin Shan - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 - -Author: Andrew Jones -Date: Thu, 30 Jan 2020 16:02:05 +0000 - - target/arm/kvm: trivial: Clean up header documentation - - Signed-off-by: Andrew Jones - Message-id: 20200120101023.16030-2-drjones@redhat.com - Reviewed-by: Peter Maydell - Signed-off-by: Peter Maydell - -(cherry picked from commit d1ebbc9d16297b54b153ee33abe05eb4f1df0c66) -Signed-off-by: Danilo C. L. de Paula ---- - target/arm/kvm_arm.h | 46 +++++++++++++++++++++++++++------------------- - 1 file changed, 27 insertions(+), 19 deletions(-) - -diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h -index 8e14d40..b48a9c9 100644 ---- a/target/arm/kvm_arm.h -+++ b/target/arm/kvm_arm.h -@@ -28,9 +28,9 @@ - int kvm_arm_vcpu_init(CPUState *cs); - - /** -- * kvm_arm_vcpu_finalize -+ * kvm_arm_vcpu_finalize: - * @cs: CPUState -- * @feature: int -+ * @feature: feature to finalize - * - * Finalizes the configuration of the specified VCPU feature by - * invoking the KVM_ARM_VCPU_FINALIZE ioctl. Features requiring -@@ -75,8 +75,8 @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, - int kvm_arm_init_cpreg_list(ARMCPU *cpu); - - /** -- * kvm_arm_reg_syncs_via_cpreg_list -- * regidx: KVM register index -+ * kvm_arm_reg_syncs_via_cpreg_list: -+ * @regidx: KVM register index - * - * Return true if this KVM register should be synchronized via the - * cpreg list of arbitrary system registers, false if it is synchronized -@@ -85,8 +85,8 @@ int kvm_arm_init_cpreg_list(ARMCPU *cpu); - bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx); - - /** -- * kvm_arm_cpreg_level -- * regidx: KVM register index -+ * kvm_arm_cpreg_level: -+ * @regidx: KVM register index - * - * Return the level of this coprocessor/system register. Return value is - * either KVM_PUT_RUNTIME_STATE, KVM_PUT_RESET_STATE, or KVM_PUT_FULL_STATE. -@@ -148,6 +148,8 @@ void kvm_arm_init_serror_injection(CPUState *cs); - * @cpu: ARMCPU - * - * Get VCPU related state from kvm. -+ * -+ * Returns: 0 if success else < 0 error code - */ - int kvm_get_vcpu_events(ARMCPU *cpu); - -@@ -156,6 +158,8 @@ int kvm_get_vcpu_events(ARMCPU *cpu); - * @cpu: ARMCPU - * - * Put VCPU related state to kvm. -+ * -+ * Returns: 0 if success else < 0 error code - */ - int kvm_put_vcpu_events(ARMCPU *cpu); - -@@ -205,10 +209,12 @@ typedef struct ARMHostCPUFeatures { - - /** - * kvm_arm_get_host_cpu_features: -- * @ahcc: ARMHostCPUClass to fill in -+ * @ahcf: ARMHostCPUClass to fill in - * - * Probe the capabilities of the host kernel's preferred CPU and fill - * in the ARMHostCPUClass struct accordingly. -+ * -+ * Returns true on success and false otherwise. - */ - bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf); - -@@ -242,7 +248,7 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu); - bool kvm_arm_aarch32_supported(CPUState *cs); - - /** -- * bool kvm_arm_pmu_supported: -+ * kvm_arm_pmu_supported: - * @cs: CPUState - * - * Returns: true if the KVM VCPU can enable its PMU -@@ -251,7 +257,7 @@ bool kvm_arm_aarch32_supported(CPUState *cs); - bool kvm_arm_pmu_supported(CPUState *cs); - - /** -- * bool kvm_arm_sve_supported: -+ * kvm_arm_sve_supported: - * @cs: CPUState - * - * Returns true if the KVM VCPU can enable SVE and false otherwise. -@@ -259,26 +265,30 @@ bool kvm_arm_pmu_supported(CPUState *cs); - bool kvm_arm_sve_supported(CPUState *cs); - - /** -- * kvm_arm_get_max_vm_ipa_size - Returns the number of bits in the -- * IPA address space supported by KVM -- * -+ * kvm_arm_get_max_vm_ipa_size: - * @ms: Machine state handle -+ * -+ * Returns the number of bits in the IPA address space supported by KVM - */ - int kvm_arm_get_max_vm_ipa_size(MachineState *ms); - - /** -- * kvm_arm_sync_mpstate_to_kvm -+ * kvm_arm_sync_mpstate_to_kvm: - * @cpu: ARMCPU - * - * If supported set the KVM MP_STATE based on QEMU's model. -+ * -+ * Returns 0 on success and -1 on failure. - */ - int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu); - - /** -- * kvm_arm_sync_mpstate_to_qemu -+ * kvm_arm_sync_mpstate_to_qemu: - * @cpu: ARMCPU - * - * If supported get the MP_STATE from KVM and store in QEMU's model. -+ * -+ * Returns 0 on success and aborts on failure. - */ - int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu); - -@@ -292,7 +302,8 @@ int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level); - - static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) - { -- /* This should never actually be called in the "not KVM" case, -+ /* -+ * This should never actually be called in the "not KVM" case, - * but set up the fields to indicate an error anyway. - */ - cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE; -@@ -377,23 +388,20 @@ bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit); - * - * Return: TRUE if any hardware breakpoints in use. - */ -- - bool kvm_arm_hw_debug_active(CPUState *cs); - - /** - * kvm_arm_copy_hw_debug_data: -- * - * @ptr: kvm_guest_debug_arch structure - * - * Copy the architecture specific debug registers into the - * kvm_guest_debug ioctl structure. - */ - struct kvm_guest_debug_arch; -- - void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr); - - /** -- * its_class_name -+ * its_class_name: - * - * Return the ITS class name to use depending on whether KVM acceleration - * and KVM CAP_SIGNAL_MSI are supported --- -1.8.3.1 - diff --git a/SOURCES/kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch b/SOURCES/kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch deleted file mode 100644 index 36c0f1a..0000000 --- a/SOURCES/kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 2740a84fe798ade5c1ce725d65cdaffb255da47c Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Fri, 31 Jan 2020 14:23:11 +0000 -Subject: [PATCH 09/15] target/arm/kvm64: kvm64 cpus have timer registers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Andrew Jones -Message-id: <20200131142314.13175-3-drjones@redhat.com> -Patchwork-id: 93621 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/5] target/arm/kvm64: kvm64 cpus have timer registers -Bugzilla: 1647366 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Auger Eric -RH-Acked-by: Gavin Shan - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 - -Author: Andrew Jones -Date: Thu, 30 Jan 2020 16:02:06 +0000 - - target/arm/kvm64: kvm64 cpus have timer registers - - Add the missing GENERIC_TIMER feature to kvm64 cpus. - - We don't currently use these registers when KVM is enabled, but it's - probably best we add the feature flag for consistency and potential - future use. There's also precedent, as we add the PMU feature flag to - KVM enabled guests, even though we don't use those registers either. - - This change was originally posted as a hunk of a different, never - merged patch from Bijan Mottahedeh. - - Signed-off-by: Andrew Jones - Reviewed-by: Richard Henderson - Message-id: 20200120101023.16030-4-drjones@redhat.com - Signed-off-by: Peter Maydell - -(cherry picked from commit 65caa415487f4a6e265105446c6ef8f56bb0aa70) -Signed-off-by: Danilo C. L. de Paula ---- - target/arm/kvm64.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c -index e2da756..666a81a 100644 ---- a/target/arm/kvm64.c -+++ b/target/arm/kvm64.c -@@ -605,6 +605,7 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) - set_feature(&features, ARM_FEATURE_NEON); - set_feature(&features, ARM_FEATURE_AARCH64); - set_feature(&features, ARM_FEATURE_PMU); -+ set_feature(&features, ARM_FEATURE_GENERIC_TIMER); - - ahcf->features = features; - --- -1.8.3.1 - diff --git a/SOURCES/kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch b/SOURCES/kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch deleted file mode 100644 index 55f328d..0000000 --- a/SOURCES/kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch +++ /dev/null @@ -1,81 +0,0 @@ -From c82cf5c08617c947b34eb490d1714729103e3379 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Mon, 10 Feb 2020 17:33:57 +0000 -Subject: [PATCH 17/18] target/arm/monitor: query-cpu-model-expansion crashed - qemu when using machine type none -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Andrew Jones -Message-id: <20200210173358.16896-2-drjones@redhat.com> -Patchwork-id: 93773 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] target/arm/monitor: query-cpu-model-expansion crashed qemu when using machine type none -Bugzilla: 1801320 -RH-Acked-by: Auger Eric -RH-Acked-by: Gavin Shan -RH-Acked-by: Philippe Mathieu-Daudé - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1801320 - -Author: Liang Yan -Date: Fri, 07 Feb 2020 14:04:21 +0000 - - target/arm/monitor: query-cpu-model-expansion crashed qemu when using machine type none - - Commit e19afd566781 mentioned that target-arm only supports queryable - cpu models 'max', 'host', and the current type when KVM is in use. - The logic works well until using machine type none. - - For machine type none, cpu_type will be null if cpu option is not - set by command line, strlen(cpu_type) will terminate process. - So We add a check above it. - - This won't affect i386 and s390x since they do not use current_cpu. - - Signed-off-by: Liang Yan - Message-id: 20200203134251.12986-1-lyan@suse.com - Reviewed-by: Andrew Jones - Tested-by: Andrew Jones - Signed-off-by: Peter Maydell - -(cherry picked from commit 0999a4ba8718aa96105b978d3567fc7e90244c7e) -Signed-off-by: Danilo C. L. de Paula ---- - target/arm/monitor.c | 15 +++++++++------ - 1 file changed, 9 insertions(+), 6 deletions(-) - -diff --git a/target/arm/monitor.c b/target/arm/monitor.c -index 9725dff..c2dc790 100644 ---- a/target/arm/monitor.c -+++ b/target/arm/monitor.c -@@ -137,17 +137,20 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, - } - - if (kvm_enabled()) { -- const char *cpu_type = current_machine->cpu_type; -- int len = strlen(cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX); - bool supported = false; - - if (!strcmp(model->name, "host") || !strcmp(model->name, "max")) { - /* These are kvmarm's recommended cpu types */ - supported = true; -- } else if (strlen(model->name) == len && -- !strncmp(model->name, cpu_type, len)) { -- /* KVM is enabled and we're using this type, so it works. */ -- supported = true; -+ } else if (current_machine->cpu_type) { -+ const char *cpu_type = current_machine->cpu_type; -+ int len = strlen(cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX); -+ -+ if (strlen(model->name) == len && -+ !strncmp(model->name, cpu_type, len)) { -+ /* KVM is enabled and we're using this type, so it works. */ -+ supported = true; -+ } - } - if (!supported) { - error_setg(errp, "We cannot guarantee the CPU type '%s' works " --- -1.8.3.1 - diff --git a/SOURCES/kvm-target-i386-Add-ARCH_CAPABILITIES-related-bits-into-.patch b/SOURCES/kvm-target-i386-Add-ARCH_CAPABILITIES-related-bits-into-.patch deleted file mode 100644 index ffb6ab7..0000000 --- a/SOURCES/kvm-target-i386-Add-ARCH_CAPABILITIES-related-bits-into-.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 4c9201a83e3ff48d2a55e45a34eb27966a1e4ab0 Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Fri, 5 Jun 2020 18:37:33 -0400 -Subject: [PATCH 3/3] target/i386: Add ARCH_CAPABILITIES related bits into - Icelake-Server CPU model -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: plai@redhat.com -Message-id: <20200605183733.8269-1-plai@redhat.com> -Patchwork-id: 97380 -O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH] target/i386: Add ARCH_CAPABILITIES related bits into Icelake-Server CPU model -Bugzilla: 1840342 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Bandan Das -RH-Acked-by: Danilo de Paula -RH-Acked-by: Eduardo Habkost - -From: Xiaoyao Li - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1840342 -Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=28983822 -Branch: rhel-av-8.2.1 - -Tested on HOST: intel-whitley-09.khw1.lab.eng.bos.redhat.com - -1. qemu-kvm -cpu host … - VM guest does have arch_capabilities in cpuinfo/flags. - [Expected success] - -2. qemu-kvm -cpu Icelake-Server … - VM guest does NOT have arch_capabilities in cpuinfo/flags. - [Expected failure] - -3. qemu-kvm -cpu Icelake-Server-v3 … - VM guest does have arch_capabilities in cpuinfo/flags. - [Expected success] - ---- - -Current Icelake-Server CPU model lacks all the features enumerated by -MSR_IA32_ARCH_CAPABILITIES. - -Add them, so that guest of "Icelake-Server" can see all of them. - -Signed-off-by: Xiaoyao Li -Message-Id: <20200316095605.12318-1-xiaoyao.li@intel.com> -Signed-off-by: Eduardo Habkost -(cherry picked from commit d965dc35592d24c0c1519f1c566223c6277cb80e) -Signed-off-by: Paul Lai -Signed-off-by: Eduardo Lima (Etrunko) ---- - target/i386/cpu.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index b763adcdc5..7d7b016bb7 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -3496,6 +3496,19 @@ static X86CPUDefinition builtin_x86_defs[] = { - { /* end of list */ } - }, - }, -+ { -+ .version = 3, -+ .props = (PropValue[]) { -+ { "arch-capabilities", "on" }, -+ { "rdctl-no", "on" }, -+ { "ibrs-all", "on" }, -+ { "skip-l1dfl-vmentry", "on" }, -+ { "mds-no", "on" }, -+ { "pschange-mc-no", "on" }, -+ { "taa-no", "on" }, -+ { /* end of list */ } -+ }, -+ }, - { /* end of list */ } - } - }, --- -2.27.0 - diff --git a/SOURCES/kvm-target-i386-Add-missed-features-to-Cooperlake-CPU-mo.patch b/SOURCES/kvm-target-i386-Add-missed-features-to-Cooperlake-CPU-mo.patch deleted file mode 100644 index ef95ccf..0000000 --- a/SOURCES/kvm-target-i386-Add-missed-features-to-Cooperlake-CPU-mo.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 1ffeb321151b3878bcbb2229639456c0677305f5 Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Fri, 15 May 2020 18:02:43 +0100 -Subject: [PATCH 17/17] target/i386: Add missed features to Cooperlake CPU - model - -RH-Author: plai@redhat.com -Message-id: <20200515180243.17488-5-plai@redhat.com> -Patchwork-id: 96611 -O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 4/4] target/i386: Add missed features to Cooperlake CPU model -Bugzilla: 1769912 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Igor Mammedov -RH-Acked-by: Eduardo Habkost - -From: Xiaoyao Li - -It lacks VMX features and two security feature bits (disclosed recently) in -MSR_IA32_ARCH_CAPABILITIES in current Cooperlake CPU model, so add them. - -Fixes: 22a866b6166d ("i386: Add new CPU model Cooperlake") -Signed-off-by: Xiaoyao Li -Message-Id: <20191225063018.20038-3-xiaoyao.li@intel.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 2dea9d9ca4ea7e9afe83d0b4153b21a16987e866) -Signed-off-by: Paul Lai -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 50 insertions(+), 1 deletion(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 996a74f..b763adc 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -3202,7 +3202,8 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_7_0_EDX_SPEC_CTRL_SSBD | CPUID_7_0_EDX_ARCH_CAPABILITIES, - .features[FEAT_ARCH_CAPABILITIES] = - MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL | -- MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO, -+ MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO | -+ MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO, - .features[FEAT_7_1_EAX] = - CPUID_7_1_EAX_AVX512_BF16, - /* -@@ -3217,6 +3218,54 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_XSAVE_XGETBV1, - .features[FEAT_6_EAX] = - CPUID_6_EAX_ARAT, -+ /* Missing: Mode-based execute control (XS/XU), processor tracing, TSC scaling */ -+ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | -+ MSR_VMX_BASIC_TRUE_CTLS, -+ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | -+ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | -+ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, -+ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | -+ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | -+ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | -+ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | -+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, -+ .features[FEAT_VMX_EXIT_CTLS] = -+ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | -+ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | -+ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | -+ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | -+ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, -+ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | -+ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, -+ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | -+ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | -+ VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, -+ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | -+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | -+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | -+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | -+ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | -+ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | -+ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | -+ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | -+ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | -+ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | -+ VMX_CPU_BASED_MONITOR_TRAP_FLAG | -+ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, -+ .features[FEAT_VMX_SECONDARY_CTLS] = -+ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | -+ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | -+ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | -+ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | -+ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | -+ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | -+ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | -+ VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | -+ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | -+ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, -+ .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, - .xlevel = 0x80000008, - .model_id = "Intel Xeon Processor (Cooperlake)", - }, --- -1.8.3.1 - diff --git a/SOURCES/kvm-target-i386-Add-new-bit-definitions-of-MSR_IA32_ARCH.patch b/SOURCES/kvm-target-i386-Add-new-bit-definitions-of-MSR_IA32_ARCH.patch deleted file mode 100644 index ad2dd77..0000000 --- a/SOURCES/kvm-target-i386-Add-new-bit-definitions-of-MSR_IA32_ARCH.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 6f0630299a3edbb8f5e5ac41eb9e1f1c363f1e3e Mon Sep 17 00:00:00 2001 -From: Danilo de Paula -Date: Tue, 9 Jun 2020 18:46:51 +0100 -Subject: [PATCH 15/17] target/i386: Add new bit definitions of - MSR_IA32_ARCH_CAPABILITIES - -RH-Author: Danilo de Paula -Message-id: <20200609184651.1328372-1-ddepaula@redhat.com> -Patchwork-id: 97489 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 5/4] target/i386: Add new bit definitions of MSR_IA32_ARCH_CAPABILITIES -Bugzilla: 1769912 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Eduardo Habkost - -From: Danilo de Paula - -redhat: builds with that series were failing. It complains about a undefined -MSR_ARCH_CAP_TAA_NO. - -The bit 6, 7 and 8 of MSR_IA32_ARCH_CAPABILITIES are recently disclosed -for some security issues. Add the definitions for them to be used by named -CPU models. - -Signed-off-by: Xiaoyao Li -Message-Id: <20191225063018.20038-2-xiaoyao.li@intel.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 6c997b4adb300788d61d72e2b8bc67c03a584956) - -Signed-off-by: Paolo Bonzini -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.h | 13 ++++++++----- - 1 file changed, 8 insertions(+), 5 deletions(-) - -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index e77d101..7bfbf2a 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -836,12 +836,15 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; - #define CPUID_TOPOLOGY_LEVEL_DIE (5U << 8) - - /* MSR Feature Bits */ --#define MSR_ARCH_CAP_RDCL_NO (1U << 0) --#define MSR_ARCH_CAP_IBRS_ALL (1U << 1) --#define MSR_ARCH_CAP_RSBA (1U << 2) -+#define MSR_ARCH_CAP_RDCL_NO (1U << 0) -+#define MSR_ARCH_CAP_IBRS_ALL (1U << 1) -+#define MSR_ARCH_CAP_RSBA (1U << 2) - #define MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY (1U << 3) --#define MSR_ARCH_CAP_SSB_NO (1U << 4) --#define MSR_ARCH_CAP_MDS_NO (1U << 5) -+#define MSR_ARCH_CAP_SSB_NO (1U << 4) -+#define MSR_ARCH_CAP_MDS_NO (1U << 5) -+#define MSR_ARCH_CAP_PSCHANGE_MC_NO (1U << 6) -+#define MSR_ARCH_CAP_TSX_CTRL_MSR (1U << 7) -+#define MSR_ARCH_CAP_TAA_NO (1U << 8) - - #define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5) - --- -1.8.3.1 - diff --git a/SOURCES/kvm-target-i386-add-a-ucode-rev-property.patch b/SOURCES/kvm-target-i386-add-a-ucode-rev-property.patch deleted file mode 100644 index 5c3c770..0000000 --- a/SOURCES/kvm-target-i386-add-a-ucode-rev-property.patch +++ /dev/null @@ -1,125 +0,0 @@ -From 4009f0bcc8004ce481015d088fe335a16b8d7ce1 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 17 Feb 2020 16:23:12 +0000 -Subject: [PATCH 2/9] target/i386: add a ucode-rev property - -RH-Author: Paolo Bonzini -Message-id: <20200217162316.2464-3-pbonzini@redhat.com> -Patchwork-id: 93909 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/6] target/i386: add a ucode-rev property -Bugzilla: 1791648 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Dr. David Alan Gilbert - -Add the property and plumb it in TCG and HVF (the latter of which -tried to support returning a constant value but used the wrong MSR). - -Signed-off-by: Paolo Bonzini -Message-Id: <1579544504-3616-3-git-send-email-pbonzini@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 4e45aff398cd1542c2a384a2a3b8600f23337d86) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 10 ++++++++++ - target/i386/cpu.h | 3 +++ - target/i386/hvf/x86_emu.c | 4 +--- - target/i386/misc_helper.c | 4 ++++ - 4 files changed, 18 insertions(+), 3 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 863192c..e505d3e 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -6325,6 +6325,15 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) - } - } - -+ if (cpu->ucode_rev == 0) { -+ /* The default is the same as KVM's. */ -+ if (IS_AMD_CPU(env)) { -+ cpu->ucode_rev = 0x01000065; -+ } else { -+ cpu->ucode_rev = 0x100000000ULL; -+ } -+ } -+ - /* mwait extended info: needed for Core compatibility */ - /* We always wake on interrupt even if host does not have the capability */ - cpu->mwait.ecx |= CPUID_MWAIT_EMX | CPUID_MWAIT_IBE; -@@ -7008,6 +7017,7 @@ static Property x86_cpu_properties[] = { - DEFINE_PROP_UINT32("min-level", X86CPU, env.cpuid_min_level, 0), - DEFINE_PROP_UINT32("min-xlevel", X86CPU, env.cpuid_min_xlevel, 0), - DEFINE_PROP_UINT32("min-xlevel2", X86CPU, env.cpuid_min_xlevel2, 0), -+ DEFINE_PROP_UINT64("ucode-rev", X86CPU, ucode_rev, 0), - DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true), - DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor_id), - DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index cde2a16..4441061 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -348,6 +348,7 @@ typedef enum X86Seg { - #define MSR_IA32_SPEC_CTRL 0x48 - #define MSR_VIRT_SSBD 0xc001011f - #define MSR_IA32_PRED_CMD 0x49 -+#define MSR_IA32_UCODE_REV 0x8b - #define MSR_IA32_CORE_CAPABILITY 0xcf - - #define MSR_IA32_ARCH_CAPABILITIES 0x10a -@@ -1621,6 +1622,8 @@ struct X86CPU { - CPUNegativeOffsetState neg; - CPUX86State env; - -+ uint64_t ucode_rev; -+ - uint32_t hyperv_spinlock_attempts; - char *hyperv_vendor_id; - bool hyperv_synic_kvm_only; -diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c -index 3df7672..92ab815 100644 ---- a/target/i386/hvf/x86_emu.c -+++ b/target/i386/hvf/x86_emu.c -@@ -664,8 +664,6 @@ static void exec_lods(struct CPUX86State *env, struct x86_decode *decode) - RIP(env) += decode->len; - } - --#define MSR_IA32_UCODE_REV 0x00000017 -- - void simulate_rdmsr(struct CPUState *cpu) - { - X86CPU *x86_cpu = X86_CPU(cpu); -@@ -681,7 +679,7 @@ void simulate_rdmsr(struct CPUState *cpu) - val = cpu_get_apic_base(X86_CPU(cpu)->apic_state); - break; - case MSR_IA32_UCODE_REV: -- val = (0x100000000ULL << 32) | 0x100000000ULL; -+ val = x86_cpu->ucode_rev; - break; - case MSR_EFER: - val = rvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER); -diff --git a/target/i386/misc_helper.c b/target/i386/misc_helper.c -index 3eff688..aed16fe 100644 ---- a/target/i386/misc_helper.c -+++ b/target/i386/misc_helper.c -@@ -229,6 +229,7 @@ void helper_rdmsr(CPUX86State *env) - #else - void helper_wrmsr(CPUX86State *env) - { -+ X86CPU *x86_cpu = env_archcpu(env); - uint64_t val; - - cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 1, GETPC()); -@@ -371,6 +372,9 @@ void helper_wrmsr(CPUX86State *env) - env->msr_bndcfgs = val; - cpu_sync_bndcs_hflags(env); - break; -+ case MSR_IA32_UCODE_REV: -+ val = x86_cpu->ucode_rev; -+ break; - default: - if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL - && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + --- -1.8.3.1 - diff --git a/SOURCES/kvm-target-i386-add-fast-short-REP-MOV-support.patch b/SOURCES/kvm-target-i386-add-fast-short-REP-MOV-support.patch deleted file mode 100644 index 51af7e7..0000000 --- a/SOURCES/kvm-target-i386-add-fast-short-REP-MOV-support.patch +++ /dev/null @@ -1,59 +0,0 @@ -From f33880c5f7a4e2cad25c22112da073273c6e2cfb Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Wed, 24 Feb 2021 11:30:35 -0500 -Subject: [PATCH 2/4] target/i386: add fast short REP MOV support - -RH-Author: Dr. David Alan Gilbert -Message-id: <20210224113037.15599-3-dgilbert@redhat.com> -Patchwork-id: 101201 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 2/4] target/i386: add fast short REP MOV support -Bugzilla: 1790620 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Peter Xu - -From: Chenyi Qiang - -For CPUs support fast short REP MOV[CPUID.(EAX=7,ECX=0):EDX(bit4)], e.g -Icelake and Tigerlake, expose it to the guest VM. - -Reviewed-by: Eduardo Habkost -Signed-off-by: Chenyi Qiang -Message-Id: <20200714084148.26690-2-chenyi.qiang@intel.com> -Signed-off-by: Eduardo Habkost -(cherry picked from commit 5cb287d2bd578dfe4897458793b4fce35bc4f744) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 2 +- - target/i386/cpu.h | 2 ++ - 2 files changed, 3 insertions(+), 1 deletion(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 67dab94aa5..f6a9ed84b3 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1077,7 +1077,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - .type = CPUID_FEATURE_WORD, - .feat_names = { - NULL, NULL, "avx512-4vnniw", "avx512-4fmaps", -- NULL, NULL, NULL, NULL, -+ "fsrm", NULL, NULL, NULL, - "avx512-vp2intersect", NULL, "md-clear", NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL /* pconfig */, NULL, -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 8e2e52ed31..f5a4efcec6 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -770,6 +770,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; - #define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) - /* AVX512 Multiply Accumulation Single Precision */ - #define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) -+/* Fast Short Rep Mov */ -+#define CPUID_7_0_EDX_FSRM (1U << 4) - /* AVX512 Vector Pair Intersection to a Pair of Mask Registers */ - #define CPUID_7_0_EDX_AVX512_VP2INTERSECT (1U << 8) - /* Speculation Control */ --- -2.27.0 - diff --git a/SOURCES/kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch b/SOURCES/kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch deleted file mode 100644 index a80c9d3..0000000 --- a/SOURCES/kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 27d7b085f2f568050d638b694ed2f51495db718c Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 17 Feb 2020 16:23:15 +0000 -Subject: [PATCH 5/9] target/i386: check for availability of MSR_IA32_UCODE_REV - as an emulated MSR -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20200217162316.2464-6-pbonzini@redhat.com> -Patchwork-id: 93898 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/6] target/i386: check for availability of MSR_IA32_UCODE_REV as an emulated MSR -Bugzilla: 1791648 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Dr. David Alan Gilbert - -Even though MSR_IA32_UCODE_REV has been available long before Linux 5.6, -which added it to the emulated MSR list, a bug caused the microcode -version to revert to 0x100000000 on INIT. As a result, processors other -than the bootstrap processor would not see the host microcode revision; -some Windows version complain loudly about this and crash with a -fairly explicit MICROCODE REVISION MISMATCH error. - -[If running 5.6 prereleases, the kernel fix "KVM: x86: do not reset - microcode version on INIT or RESET" should also be applied.] - -Reported-by: Alex Williamson -Message-id: <20200211175516.10716-1-pbonzini@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 6702514814c7e7b4cbf179624539b5f38c72740b) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/kvm.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 6c61aef..99840ca 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -105,6 +105,7 @@ static bool has_msr_smi_count; - static bool has_msr_arch_capabs; - static bool has_msr_core_capabs; - static bool has_msr_vmx_vmfunc; -+static bool has_msr_ucode_rev; - - static uint32_t has_architectural_pmu_version; - static uint32_t num_architectural_pmu_gp_counters; -@@ -2056,6 +2057,9 @@ static int kvm_get_supported_msrs(KVMState *s) - case MSR_IA32_VMX_VMFUNC: - has_msr_vmx_vmfunc = true; - break; -+ case MSR_IA32_UCODE_REV: -+ has_msr_ucode_rev = true; -+ break; - } - } - } -@@ -2696,8 +2700,7 @@ static void kvm_init_msrs(X86CPU *cpu) - env->features[FEAT_CORE_CAPABILITY]); - } - -- if (kvm_arch_get_supported_msr_feature(kvm_state, -- MSR_IA32_UCODE_REV)) { -+ if (has_msr_ucode_rev) { - kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); - } - --- -1.8.3.1 - diff --git a/SOURCES/kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch b/SOURCES/kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch deleted file mode 100644 index 4c2362d..0000000 --- a/SOURCES/kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch +++ /dev/null @@ -1,112 +0,0 @@ -From 77cdcccc49ba988e3b5bcb66decdee2e99fdcd72 Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Tue, 14 Apr 2020 15:00:36 +0100 -Subject: [PATCH] target/i386: do not set unsupported VMX secondary execution - controls - -RH-Author: Vitaly Kuznetsov -Message-id: <20200414150036.625732-2-vkuznets@redhat.com> -Patchwork-id: 94674 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] target/i386: do not set unsupported VMX secondary execution controls -Bugzilla: 1822682 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Paolo Bonzini - -Commit 048c95163b4 ("target/i386: work around KVM_GET_MSRS bug for -secondary execution controls") added a workaround for KVM pre-dating -commit 6defc591846d ("KVM: nVMX: include conditional controls in /dev/kvm -KVM_GET_MSRS") which wasn't setting certain available controls. The -workaround uses generic CPUID feature bits to set missing VMX controls. - -It was found that in some cases it is possible to observe hosts which -have certain CPUID features but lack the corresponding VMX control. - -In particular, it was reported that Azure VMs have RDSEED but lack -VMX_SECONDARY_EXEC_RDSEED_EXITING; attempts to enable this feature -bit result in QEMU abort. - -Resolve the issue but not applying the workaround when we don't have -to. As there is no good way to find out if KVM has the fix itself, use -95c5c7c77c ("KVM: nVMX: list VMX MSRs in KVM_GET_MSR_INDEX_LIST") instead -as these [are supposed to] come together. - -Fixes: 048c95163b4 ("target/i386: work around KVM_GET_MSRS bug for secondary execution controls") -Suggested-by: Paolo Bonzini -Signed-off-by: Vitaly Kuznetsov -Message-Id: <20200331162752.1209928-1-vkuznets@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 4a910e1f6ab4155ec8b24c49b2585cc486916985) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/kvm.c | 41 ++++++++++++++++++++++++++--------------- - 1 file changed, 26 insertions(+), 15 deletions(-) - -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 99840ca..fcc8f7d 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -106,6 +106,7 @@ static bool has_msr_arch_capabs; - static bool has_msr_core_capabs; - static bool has_msr_vmx_vmfunc; - static bool has_msr_ucode_rev; -+static bool has_msr_vmx_procbased_ctls2; - - static uint32_t has_architectural_pmu_version; - static uint32_t num_architectural_pmu_gp_counters; -@@ -490,21 +491,28 @@ uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) - value = msr_data.entries[0].data; - switch (index) { - case MSR_IA32_VMX_PROCBASED_CTLS2: -- /* KVM forgot to add these bits for some time, do this ourselves. */ -- if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & CPUID_XSAVE_XSAVES) { -- value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32; -- } -- if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & CPUID_EXT_RDRAND) { -- value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32; -- } -- if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_INVPCID) { -- value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32; -- } -- if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_RDSEED) { -- value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32; -- } -- if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & CPUID_EXT2_RDTSCP) { -- value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32; -+ if (!has_msr_vmx_procbased_ctls2) { -+ /* KVM forgot to add these bits for some time, do this ourselves. */ -+ if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & -+ CPUID_XSAVE_XSAVES) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32; -+ } -+ if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & -+ CPUID_EXT_RDRAND) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32; -+ } -+ if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & -+ CPUID_7_0_EBX_INVPCID) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32; -+ } -+ if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & -+ CPUID_7_0_EBX_RDSEED) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32; -+ } -+ if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & -+ CPUID_EXT2_RDTSCP) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32; -+ } - } - /* fall through */ - case MSR_IA32_VMX_TRUE_PINBASED_CTLS: -@@ -2060,6 +2068,9 @@ static int kvm_get_supported_msrs(KVMState *s) - case MSR_IA32_UCODE_REV: - has_msr_ucode_rev = true; - break; -+ case MSR_IA32_VMX_PROCBASED_CTLS2: -+ has_msr_vmx_procbased_ctls2 = true; -+ break; - } - } - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch b/SOURCES/kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch deleted file mode 100644 index 47438a3..0000000 --- a/SOURCES/kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 7b71a7011437ebfa3bc7df9297e892b82293ec98 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 17 Feb 2020 16:23:16 +0000 -Subject: [PATCH 6/9] target/i386: enable monitor and ucode revision with -cpu - max -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20200217162316.2464-7-pbonzini@redhat.com> -Patchwork-id: 93910 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 6/6] target/i386: enable monitor and ucode revision with -cpu max -Bugzilla: 1791648 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Dr. David Alan Gilbert - -These two features were incorrectly tied to host_cpuid_required rather than -cpu->max_features. As a result, -cpu max was not enabling either MONITOR -features or ucode revision. - -Signed-off-by: Paolo Bonzini -(cherry picked from commit be02cda3afde60d219786e23c3f8edb53aec8e17) - -[RHEL7: context, upstream uses g_autofree] - -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 5ac843d..1685a8c 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -6317,7 +6317,9 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) - g_free(name); - goto out; - } -+ } - -+ if (cpu->max_features && accel_uses_host_cpuid()) { - if (enable_cpu_pm) { - host_cpuid(5, 0, &cpu->mwait.eax, &cpu->mwait.ebx, - &cpu->mwait.ecx, &cpu->mwait.edx); --- -1.8.3.1 - diff --git a/SOURCES/kvm-target-i386-fix-TCG-UCODE_REV-access.patch b/SOURCES/kvm-target-i386-fix-TCG-UCODE_REV-access.patch deleted file mode 100644 index c7ced8a..0000000 --- a/SOURCES/kvm-target-i386-fix-TCG-UCODE_REV-access.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 3d16f05359e6277da1f970f71aa9f76337d655dc Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 17 Feb 2020 16:23:14 +0000 -Subject: [PATCH 4/9] target/i386: fix TCG UCODE_REV access -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20200217162316.2464-5-pbonzini@redhat.com> -Patchwork-id: 93904 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/6] target/i386: fix TCG UCODE_REV access -Bugzilla: 1791648 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Dr. David Alan Gilbert - -This was a very interesting semantic conflict that caused git to move -the MSR_IA32_UCODE_REV read to helper_wrmsr. Not a big deal, but -still should be fixed... - -Fixes: 4e45aff398 ("target/i386: add a ucode-rev property", 2020-01-24) -Message-id: <20200206171022.9289-1-pbonzini@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 9028c75c9d08be303ccc425bfe3d3b23d8f4cac7) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/misc_helper.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/target/i386/misc_helper.c b/target/i386/misc_helper.c -index aed16fe..7d61221 100644 ---- a/target/i386/misc_helper.c -+++ b/target/i386/misc_helper.c -@@ -229,7 +229,6 @@ void helper_rdmsr(CPUX86State *env) - #else - void helper_wrmsr(CPUX86State *env) - { -- X86CPU *x86_cpu = env_archcpu(env); - uint64_t val; - - cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 1, GETPC()); -@@ -372,9 +371,6 @@ void helper_wrmsr(CPUX86State *env) - env->msr_bndcfgs = val; - cpu_sync_bndcs_hflags(env); - break; -- case MSR_IA32_UCODE_REV: -- val = x86_cpu->ucode_rev; -- break; - default: - if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL - && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + -@@ -393,6 +389,7 @@ void helper_wrmsr(CPUX86State *env) - - void helper_rdmsr(CPUX86State *env) - { -+ X86CPU *x86_cpu = env_archcpu(env); - uint64_t val; - - cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 0, GETPC()); -@@ -526,6 +523,9 @@ void helper_rdmsr(CPUX86State *env) - case MSR_IA32_BNDCFGS: - val = env->msr_bndcfgs; - break; -+ case MSR_IA32_UCODE_REV: -+ val = x86_cpu->ucode_rev; -+ break; - default: - if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL - && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + --- -1.8.3.1 - diff --git a/SOURCES/kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch b/SOURCES/kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch deleted file mode 100644 index 5118aed..0000000 --- a/SOURCES/kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch +++ /dev/null @@ -1,178 +0,0 @@ -From eb0fc0ae2750a0462698d6d21ebb56a4249539f9 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 17 Feb 2020 16:23:11 +0000 -Subject: [PATCH 1/9] target/i386: kvm: initialize feature MSRs very early -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20200217162316.2464-2-pbonzini@redhat.com> -Patchwork-id: 93899 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/6] target/i386: kvm: initialize feature MSRs very early -Bugzilla: 1791648 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Dr. David Alan Gilbert - -Some read-only MSRs affect the behavior of ioctls such as -KVM_SET_NESTED_STATE. We can initialize them once and for all -right after the CPU is realized, since they will never be modified -by the guest. - -Reported-by: Qingua Cheng -Cc: qemu-stable@nongnu.org -Signed-off-by: Paolo Bonzini -Message-Id: <1579544504-3616-2-git-send-email-pbonzini@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 420ae1fc51c99abfd03b1c590f55617edd2a2bed) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/kvm.c | 81 ++++++++++++++++++++++++++++++-------------------- - target/i386/kvm_i386.h | 1 + - 2 files changed, 49 insertions(+), 33 deletions(-) - -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 86d9a1f..f41605b 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -67,6 +67,8 @@ - * 255 kvm_msr_entry structs */ - #define MSR_BUF_SIZE 4096 - -+static void kvm_init_msrs(X86CPU *cpu); -+ - const KVMCapabilityInfo kvm_arch_required_capabilities[] = { - KVM_CAP_INFO(SET_TSS_ADDR), - KVM_CAP_INFO(EXT_CPUID), -@@ -1842,6 +1844,8 @@ int kvm_arch_init_vcpu(CPUState *cs) - has_msr_tsc_aux = false; - } - -+ kvm_init_msrs(cpu); -+ - r = hyperv_init_vcpu(cpu); - if (r) { - goto fail; -@@ -2660,11 +2664,53 @@ static void kvm_msr_entry_add_vmx(X86CPU *cpu, FeatureWordArray f) - VMCS12_MAX_FIELD_INDEX << 1); - } - -+static int kvm_buf_set_msrs(X86CPU *cpu) -+{ -+ int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); -+ if (ret < 0) { -+ return ret; -+ } -+ -+ if (ret < cpu->kvm_msr_buf->nmsrs) { -+ struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; -+ error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64, -+ (uint32_t)e->index, (uint64_t)e->data); -+ } -+ -+ assert(ret == cpu->kvm_msr_buf->nmsrs); -+ return 0; -+} -+ -+static void kvm_init_msrs(X86CPU *cpu) -+{ -+ CPUX86State *env = &cpu->env; -+ -+ kvm_msr_buf_reset(cpu); -+ if (has_msr_arch_capabs) { -+ kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, -+ env->features[FEAT_ARCH_CAPABILITIES]); -+ } -+ -+ if (has_msr_core_capabs) { -+ kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, -+ env->features[FEAT_CORE_CAPABILITY]); -+ } -+ -+ /* -+ * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but -+ * all kernels with MSR features should have them. -+ */ -+ if (kvm_feature_msrs && cpu_has_vmx(env)) { -+ kvm_msr_entry_add_vmx(cpu, env->features); -+ } -+ -+ assert(kvm_buf_set_msrs(cpu) == 0); -+} -+ - static int kvm_put_msrs(X86CPU *cpu, int level) - { - CPUX86State *env = &cpu->env; - int i; -- int ret; - - kvm_msr_buf_reset(cpu); - -@@ -2722,17 +2768,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level) - } - #endif - -- /* If host supports feature MSR, write down. */ -- if (has_msr_arch_capabs) { -- kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, -- env->features[FEAT_ARCH_CAPABILITIES]); -- } -- -- if (has_msr_core_capabs) { -- kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, -- env->features[FEAT_CORE_CAPABILITY]); -- } -- - /* - * The following MSRs have side effects on the guest or are too heavy - * for normal writeback. Limit them to reset or full state updates. -@@ -2910,14 +2945,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level) - - /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see - * kvm_put_msr_feature_control. */ -- -- /* -- * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but -- * all kernels with MSR features should have them. -- */ -- if (kvm_feature_msrs && cpu_has_vmx(env)) { -- kvm_msr_entry_add_vmx(cpu, env->features); -- } - } - - if (env->mcg_cap) { -@@ -2933,19 +2960,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level) - } - } - -- ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); -- if (ret < 0) { -- return ret; -- } -- -- if (ret < cpu->kvm_msr_buf->nmsrs) { -- struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; -- error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64, -- (uint32_t)e->index, (uint64_t)e->data); -- } -- -- assert(ret == cpu->kvm_msr_buf->nmsrs); -- return 0; -+ return kvm_buf_set_msrs(cpu); - } - - -diff --git a/target/i386/kvm_i386.h b/target/i386/kvm_i386.h -index 06fe06b..d98c6f6 100644 ---- a/target/i386/kvm_i386.h -+++ b/target/i386/kvm_i386.h -@@ -66,4 +66,5 @@ bool kvm_enable_x2apic(void); - bool kvm_has_x2apic_api(void); - - bool kvm_hv_vpindex_settable(void); -+ - #endif --- -1.8.3.1 - diff --git a/SOURCES/kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch b/SOURCES/kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch deleted file mode 100644 index 99b18fc..0000000 --- a/SOURCES/kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 8f39b0c9523630efeb451e2298cf64b88cd2ac81 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 17 Feb 2020 16:23:13 +0000 -Subject: [PATCH 3/9] target/i386: kvm: initialize microcode revision from KVM -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20200217162316.2464-4-pbonzini@redhat.com> -Patchwork-id: 93897 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/6] target/i386: kvm: initialize microcode revision from KVM -Bugzilla: 1791648 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Dr. David Alan Gilbert - -KVM can return the host microcode revision as a feature MSR. -Use it as the default value for -cpu host. - -Signed-off-by: Paolo Bonzini -Message-Id: <1579544504-3616-4-git-send-email-pbonzini@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 32c87d70ff55b96741f08c35108935cac6f40fe4) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 4 ++++ - target/i386/kvm.c | 5 +++++ - 2 files changed, 9 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index e505d3e..5ac843d 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -6323,6 +6323,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) - &cpu->mwait.ecx, &cpu->mwait.edx); - env->features[FEAT_1_ECX] |= CPUID_EXT_MONITOR; - } -+ if (kvm_enabled() && cpu->ucode_rev == 0) { -+ cpu->ucode_rev = kvm_arch_get_supported_msr_feature(kvm_state, -+ MSR_IA32_UCODE_REV); -+ } - } - - if (cpu->ucode_rev == 0) { -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index f41605b..6c61aef 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -2696,6 +2696,11 @@ static void kvm_init_msrs(X86CPU *cpu) - env->features[FEAT_CORE_CAPABILITY]); - } - -+ if (kvm_arch_get_supported_msr_feature(kvm_state, -+ MSR_IA32_UCODE_REV)) { -+ kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); -+ } -+ - /* - * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but - * all kernels with MSR features should have them. --- -1.8.3.1 - diff --git a/SOURCES/kvm-target-i386-set-the-CPUID-level-to-0x14-on-old-machi.patch b/SOURCES/kvm-target-i386-set-the-CPUID-level-to-0x14-on-old-machi.patch deleted file mode 100644 index 49e54ba..0000000 --- a/SOURCES/kvm-target-i386-set-the-CPUID-level-to-0x14-on-old-machi.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 72a1827006be22791017ff2b671eac1c96be5d12 Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Thu, 7 May 2020 22:09:23 +0100 -Subject: [PATCH 01/26] target/i386: set the CPUID level to 0x14 on old - machine-type - -RH-Author: plai@redhat.com -Message-id: <20200507220923.13723-1-plai@redhat.com> -Patchwork-id: 96347 -O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH RESEND] target/i386: set the CPUID level to 0x14 on old machine-type -Bugzilla: 1513681 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Igor Mammedov -RH-Acked-by: Danilo de Paula - -From: Luwei Kang - -BZ https://bugzilla.redhat.com/show_bug.cgi?id=1513681 -Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=28146304 -Branch: rhel-av-8.2.1 - -Tested on intel-icelake-y-01.ml3.eng.bos.redhat.com. - -The CPUID level need to be set to 0x14 manually on old -machine-type if Intel PT is enabled in guest. E.g. the -CPUID[0].EAX(level)=7 and CPUID[7].EBX[25](intel-pt)=1 when the -Qemu with "-machine pc-i440fx-3.1 -cpu qemu64,+intel-pt" parameter. - -Some Intel PT capabilities are exposed by leaf 0x14 and the -missing capabilities will cause some MSRs access failed. -This patch add a warning message to inform the user to extend -the CPUID level. - -Suggested-by: Eduardo Habkost -Signed-off-by: Luwei Kang -Message-Id: <1584031686-16444-1-git-send-email-luwei.kang@intel.com> -Signed-off-by: Eduardo Habkost -(cherry picked from commit ddc2fc9e4e42ebce48b088963dc7fbd1c08d5f33) -Signed-off-by: Paul Lai -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 11 ++++++++--- - 1 file changed, 8 insertions(+), 3 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 1685a8c..0f0a2db 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -6206,9 +6206,14 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) - x86_cpu_adjust_feat_level(cpu, FEAT_XSAVE); - - /* Intel Processor Trace requires CPUID[0x14] */ -- if ((env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) && -- kvm_enabled() && cpu->intel_pt_auto_level) { -- x86_cpu_adjust_level(cpu, &cpu->env.cpuid_min_level, 0x14); -+ if ((env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT)) { -+ if (cpu->intel_pt_auto_level) { -+ x86_cpu_adjust_level(cpu, &cpu->env.cpuid_min_level, 0x14); -+ } else if (cpu->env.cpuid_min_level < 0x14) { -+ mark_unavailable_features(cpu, FEAT_7_0_EBX, -+ CPUID_7_0_EBX_INTEL_PT, -+ "Intel PT need CPUID leaf 0x14, please set by \"-cpu ...,+intel-pt,level=0x14\""); -+ } - } - - /* CPU topology with multi-dies support requires CPUID[0x1F] */ --- -1.8.3.1 - diff --git a/SOURCES/kvm-target-i386-sev-fail-query-sev-capabilities-if-QEMU-.patch b/SOURCES/kvm-target-i386-sev-fail-query-sev-capabilities-if-QEMU-.patch deleted file mode 100644 index 60abc1b..0000000 --- a/SOURCES/kvm-target-i386-sev-fail-query-sev-capabilities-if-QEMU-.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 9adf5e57df32df464e7465b1df72c993d0ed4ed4 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Fri, 31 Jul 2020 18:08:35 -0400 -Subject: [PATCH 3/4] target/i386: sev: fail query-sev-capabilities if QEMU - cannot use SEV -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20200731180835.86786-3-pbonzini@redhat.com> -Patchwork-id: 98124 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 2/2] target/i386: sev: fail query-sev-capabilities if QEMU cannot use SEV -Bugzilla: 1689341 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Dr. David Alan Gilbert - -In some cases, such as if the kvm-amd "sev" module parameter is set -to 0, SEV will be unavailable but query-sev-capabilities will still -return all the information. This tricks libvirt into erroneously -reporting that SEV is available. Check the actual usability of the -feature and return the appropriate error if QEMU cannot use KVM -or KVM cannot use SEV. - -Reviewed-by: Eric Blake -Signed-off-by: Paolo Bonzini -cherry picked from commit 1b38750c40281dd0d068f8536b2ea95d7b9bd585 -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/sev.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/target/i386/sev.c b/target/i386/sev.c -index 054f2d846a..a47f0d3880 100644 ---- a/target/i386/sev.c -+++ b/target/i386/sev.c -@@ -504,6 +504,15 @@ sev_get_capabilities(Error **errp) - uint32_t ebx; - int fd; - -+ if (!kvm_enabled()) { -+ error_setg(errp, "KVM not enabled"); -+ return NULL; -+ } -+ if (kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, NULL) < 0) { -+ error_setg(errp, "SEV is not enabled in KVM"); -+ return NULL; -+ } -+ - fd = open(DEFAULT_SEV_DEVICE, O_RDWR); - if (fd < 0) { - error_setg_errno(errp, errno, "Failed to open %s", --- -2.27.0 - diff --git a/SOURCES/kvm-target-i386-sev-provide-proper-error-reporting-for-q.patch b/SOURCES/kvm-target-i386-sev-provide-proper-error-reporting-for-q.patch deleted file mode 100644 index e5f3459..0000000 --- a/SOURCES/kvm-target-i386-sev-provide-proper-error-reporting-for-q.patch +++ /dev/null @@ -1,142 +0,0 @@ -From 8789f2662c6ddacc5472a803d253b94d93c6e9f0 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Fri, 31 Jul 2020 18:08:34 -0400 -Subject: [PATCH 2/4] target/i386: sev: provide proper error reporting for - query-sev-capabilities -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20200731180835.86786-2-pbonzini@redhat.com> -Patchwork-id: 98123 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 1/2] target/i386: sev: provide proper error reporting for query-sev-capabilities -Bugzilla: 1689341 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Dr. David Alan Gilbert - -The query-sev-capabilities was reporting errors through error_report; -change it to use Error** so that the cause of the failure is clearer. - -Reviewed-by: Eric Blake -Signed-off-by: Paolo Bonzini -Cherry picked from commit e4f6278557148151e77260b872b41bcd7ceb4737 -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/monitor.c | 10 +--------- - target/i386/sev-stub.c | 3 ++- - target/i386/sev.c | 18 +++++++++--------- - target/i386/sev_i386.h | 2 +- - 4 files changed, 13 insertions(+), 20 deletions(-) - -diff --git a/target/i386/monitor.c b/target/i386/monitor.c -index 9fb4d641d5..cfd8075e4f 100644 ---- a/target/i386/monitor.c -+++ b/target/i386/monitor.c -@@ -727,13 +727,5 @@ SevLaunchMeasureInfo *qmp_query_sev_launch_measure(Error **errp) - - SevCapability *qmp_query_sev_capabilities(Error **errp) - { -- SevCapability *data; -- -- data = sev_get_capabilities(); -- if (!data) { -- error_setg(errp, "SEV feature is not available"); -- return NULL; -- } -- -- return data; -+ return sev_get_capabilities(errp); - } -diff --git a/target/i386/sev-stub.c b/target/i386/sev-stub.c -index e5ee13309c..88e3f39a1e 100644 ---- a/target/i386/sev-stub.c -+++ b/target/i386/sev-stub.c -@@ -44,7 +44,8 @@ char *sev_get_launch_measurement(void) - return NULL; - } - --SevCapability *sev_get_capabilities(void) -+SevCapability *sev_get_capabilities(Error **errp) - { -+ error_setg(errp, "SEV is not available in this QEMU"); - return NULL; - } -diff --git a/target/i386/sev.c b/target/i386/sev.c -index 024bb24e51..054f2d846a 100644 ---- a/target/i386/sev.c -+++ b/target/i386/sev.c -@@ -453,7 +453,7 @@ sev_get_info(void) - - static int - sev_get_pdh_info(int fd, guchar **pdh, size_t *pdh_len, guchar **cert_chain, -- size_t *cert_chain_len) -+ size_t *cert_chain_len, Error **errp) - { - guchar *pdh_data = NULL; - guchar *cert_chain_data = NULL; -@@ -464,8 +464,8 @@ sev_get_pdh_info(int fd, guchar **pdh, size_t *pdh_len, guchar **cert_chain, - r = sev_platform_ioctl(fd, SEV_PDH_CERT_EXPORT, &export, &err); - if (r < 0) { - if (err != SEV_RET_INVALID_LEN) { -- error_report("failed to export PDH cert ret=%d fw_err=%d (%s)", -- r, err, fw_error_to_str(err)); -+ error_setg(errp, "failed to export PDH cert ret=%d fw_err=%d (%s)", -+ r, err, fw_error_to_str(err)); - return 1; - } - } -@@ -477,8 +477,8 @@ sev_get_pdh_info(int fd, guchar **pdh, size_t *pdh_len, guchar **cert_chain, - - r = sev_platform_ioctl(fd, SEV_PDH_CERT_EXPORT, &export, &err); - if (r < 0) { -- error_report("failed to export PDH cert ret=%d fw_err=%d (%s)", -- r, err, fw_error_to_str(err)); -+ error_setg(errp, "failed to export PDH cert ret=%d fw_err=%d (%s)", -+ r, err, fw_error_to_str(err)); - goto e_free; - } - -@@ -495,7 +495,7 @@ e_free: - } - - SevCapability * --sev_get_capabilities(void) -+sev_get_capabilities(Error **errp) - { - SevCapability *cap = NULL; - guchar *pdh_data = NULL; -@@ -506,13 +506,13 @@ sev_get_capabilities(void) - - fd = open(DEFAULT_SEV_DEVICE, O_RDWR); - if (fd < 0) { -- error_report("%s: Failed to open %s '%s'", __func__, -- DEFAULT_SEV_DEVICE, strerror(errno)); -+ error_setg_errno(errp, errno, "Failed to open %s", -+ DEFAULT_SEV_DEVICE); - return NULL; - } - - if (sev_get_pdh_info(fd, &pdh_data, &pdh_len, -- &cert_chain_data, &cert_chain_len)) { -+ &cert_chain_data, &cert_chain_len, errp)) { - goto out; - } - -diff --git a/target/i386/sev_i386.h b/target/i386/sev_i386.h -index 8ada9d385d..1e073342ba 100644 ---- a/target/i386/sev_i386.h -+++ b/target/i386/sev_i386.h -@@ -38,7 +38,7 @@ extern SevInfo *sev_get_info(void); - extern uint32_t sev_get_cbit_position(void); - extern uint32_t sev_get_reduced_phys_bits(void); - extern char *sev_get_launch_measurement(void); --extern SevCapability *sev_get_capabilities(void); -+extern SevCapability *sev_get_capabilities(Error **errp); - - typedef struct QSevGuestInfo QSevGuestInfo; - typedef struct QSevGuestInfoClass QSevGuestInfoClass; --- -2.27.0 - diff --git a/SOURCES/kvm-target-s390x-kvm-Enable-adapter-interruption-suppres.patch b/SOURCES/kvm-target-s390x-kvm-Enable-adapter-interruption-suppres.patch deleted file mode 100644 index 38e5637..0000000 --- a/SOURCES/kvm-target-s390x-kvm-Enable-adapter-interruption-suppres.patch +++ /dev/null @@ -1,60 +0,0 @@ -From c4fe37ae6d75ed72e6a3bde01fea053eb508274c Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 5 Jun 2020 07:41:11 -0400 -Subject: [PATCH 41/42] target/s390x/kvm: Enable adapter interruption - suppression again -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20200605074111.2185-4-thuth@redhat.com> -Patchwork-id: 97370 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 3/3] target/s390x/kvm: Enable adapter interruption suppression again -Bugzilla: 1756946 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -The AIS feature has been disabled late in the v2.10 development cycle since -there were some issues with migration (see commit 3f2d07b3b01ea61126b - -"s390x/ais: for 2.10 stable: disable ais facility"). We originally wanted -to enable it again for newer machine types, but apparently we forgot to do -this so far. Let's do it now for the machines that support proper CPU models. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1756946 -Signed-off-by: Thomas Huth -Message-Id: <20200122101437.5069-1-thuth@redhat.com> -Reviewed-by: David Hildenbrand -Tested-by: Matthew Rosato -Signed-off-by: Cornelia Huck -(cherry picked from commit a5c8617af6919515b84256978452edf07401c45e) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/kvm.c | 9 ++++++--- - 1 file changed, 6 insertions(+), 3 deletions(-) - -diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index c589ef9034..0bbf8f81b0 100644 ---- a/target/s390x/kvm.c -+++ b/target/s390x/kvm.c -@@ -377,10 +377,13 @@ int kvm_arch_init(MachineState *ms, KVMState *s) - /* - * The migration interface for ais was introduced with kernel 4.13 - * but the capability itself had been active since 4.12. As migration -- * support is considered necessary let's disable ais in the 2.10 -- * machine. -+ * support is considered necessary, we only try to enable this for -+ * newer machine types if KVM_CAP_S390_AIS_MIGRATION is available. - */ -- /* kvm_vm_enable_cap(s, KVM_CAP_S390_AIS, 0); */ -+ if (cpu_model_allowed() && kvm_kernel_irqchip_allowed() && -+ kvm_check_extension(s, KVM_CAP_S390_AIS_MIGRATION)) { -+ kvm_vm_enable_cap(s, KVM_CAP_S390_AIS, 0); -+ } - - kvm_set_max_memslot_size(KVM_SLOT_MAX_BYTES); - return 0; --- -2.27.0 - diff --git a/SOURCES/kvm-tcp_emu-Fix-oob-access.patch b/SOURCES/kvm-tcp_emu-Fix-oob-access.patch deleted file mode 100644 index e532877..0000000 --- a/SOURCES/kvm-tcp_emu-Fix-oob-access.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 5c2c5496083fa549e1dff903413bb6136fc19d8d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Fri, 17 Jan 2020 12:07:56 +0100 -Subject: [PATCH 1/4] tcp_emu: Fix oob access -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20200117120758.1076549-2-marcandre.lureau@redhat.com> -Patchwork-id: 93399 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm + RHEL-AV-8.2.0 qemu-kvm PATCH 1/3] tcp_emu: Fix oob access -Bugzilla: 1791568 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi - -From: Samuel Thibault - -The main loop only checks for one available byte, while we sometimes -need two bytes. - -[ MA - minor conflict, CHANGELOG.md absent ] -(cherry picked from libslirp commit 2655fffed7a9e765bcb4701dd876e9dab975f289) -Signed-off-by: Marc-André Lureau - -Signed-off-by: Miroslav Rezanina ---- - slirp/src/tcp_subr.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -index d6dd133..cbecd64 100644 ---- a/slirp/src/tcp_subr.c -+++ b/slirp/src/tcp_subr.c -@@ -886,6 +886,9 @@ int tcp_emu(struct socket *so, struct mbuf *m) - break; - - case 5: -+ if (bptr == m->m_data + m->m_len - 1) -+ return 1; /* We need two bytes */ -+ - /* - * The difference between versions 1.0 and - * 2.0 is here. For future versions of -@@ -901,6 +904,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) - /* This is the field containing the port - * number that RA-player is listening to. - */ -+ -+ if (bptr == m->m_data + m->m_len - 1) -+ return 1; /* We need two bytes */ -+ - lport = (((uint8_t *)bptr)[0] << 8) + ((uint8_t *)bptr)[1]; - if (lport < 6970) - lport += 256; /* don't know why */ --- -1.8.3.1 - diff --git a/SOURCES/kvm-tcp_emu-fix-unsafe-snprintf-usages.patch b/SOURCES/kvm-tcp_emu-fix-unsafe-snprintf-usages.patch deleted file mode 100644 index 846da73..0000000 --- a/SOURCES/kvm-tcp_emu-fix-unsafe-snprintf-usages.patch +++ /dev/null @@ -1,149 +0,0 @@ -From 9a7810c257711ce02627916d886fc1029f7a8190 Mon Sep 17 00:00:00 2001 -From: jmaloy -Date: Thu, 13 Feb 2020 15:50:49 +0000 -Subject: [PATCH 3/7] tcp_emu: fix unsafe snprintf() usages -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: jmaloy -Message-id: <20200213155049.3936-3-jmaloy@redhat.com> -Patchwork-id: 93826 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] tcp_emu: fix unsafe snprintf() usages -Bugzilla: 1798994 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi - -From: Marc-André Lureau - -Various calls to snprintf() assume that snprintf() returns "only" the -number of bytes written (excluding terminating NUL). - -https://pubs.opengroup.org/onlinepubs/9699919799/functions/snprintf.html#tag_16_159_04 - -"Upon successful completion, the snprintf() function shall return the -number of bytes that would be written to s had n been sufficiently -large excluding the terminating null byte." - -Before patch ce131029, if there isn't enough room in "m_data" for the -"DCC ..." message, we overflow "m_data". - -After the patch, if there isn't enough room for the same, we don't -overflow "m_data", but we set "m_len" out-of-bounds. The next time an -access is bounded by "m_len", we'll have a buffer overflow then. - -Use slirp_fmt*() to fix potential OOB memory access. - -Reported-by: Laszlo Ersek -Signed-off-by: Marc-André Lureau -Reviewed-by: Samuel Thibault -Message-Id: <20200127092414.169796-7-marcandre.lureau@redhat.com> -(cherry picked from libslirp commit 68ccb8021a838066f0951d4b2817eb6b6f10a843) -Signed-off-by: Jon Maloy - -Signed-off-by: Danilo C. L. de Paula ---- - slirp/src/tcp_subr.c | 44 +++++++++++++++++++++----------------------- - 1 file changed, 21 insertions(+), 23 deletions(-) - -diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -index 954d1a6..26d4ead 100644 ---- a/slirp/src/tcp_subr.c -+++ b/slirp/src/tcp_subr.c -@@ -655,8 +655,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) - NTOHS(n1); - NTOHS(n2); - m_inc(m, snprintf(NULL, 0, "%d,%d\r\n", n1, n2) + 1); -- m->m_len = snprintf(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); -- assert(m->m_len < M_ROOM(m)); -+ m->m_len = slirp_fmt(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); - } else { - *eol = '\r'; - } -@@ -696,9 +695,9 @@ int tcp_emu(struct socket *so, struct mbuf *m) - n4 = (laddr & 0xff); - - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, M_FREEROOM(m), -- "ORT %d,%d,%d,%d,%d,%d\r\n%s", n1, n2, n3, n4, -- n5, n6, x == 7 ? buff : ""); -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), -+ "ORT %d,%d,%d,%d,%d,%d\r\n%s", -+ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); - return 1; - } else if ((bptr = (char *)strstr(m->m_data, "27 Entering")) != NULL) { - /* -@@ -731,10 +730,9 @@ int tcp_emu(struct socket *so, struct mbuf *m) - n4 = (laddr & 0xff); - - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, M_FREEROOM(m), -- "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", -- n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); -- -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), -+ "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", -+ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); - return 1; - } - -@@ -757,8 +755,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) - if (m->m_data[m->m_len - 1] == '\0' && lport != 0 && - (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, - htons(lport), SS_FACCEPTONCE)) != NULL) -- m->m_len = snprintf(m->m_data, M_ROOM(m), -- "%d", ntohs(so->so_fport)) + 1; -+ m->m_len = slirp_fmt0(m->m_data, M_ROOM(m), -+ "%d", ntohs(so->so_fport)); - return 1; - - case EMU_IRC: -@@ -777,10 +775,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, M_FREEROOM(m), -- "DCC CHAT chat %lu %u%c\n", -- (unsigned long)ntohl(so->so_faddr.s_addr), -- ntohs(so->so_fport), 1); -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), -+ "DCC CHAT chat %lu %u%c\n", -+ (unsigned long)ntohl(so->so_faddr.s_addr), -+ ntohs(so->so_fport), 1); - } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, - &n1) == 4) { - if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), -@@ -788,10 +786,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, M_FREEROOM(m), -- "DCC SEND %s %lu %u %u%c\n", buff, -- (unsigned long)ntohl(so->so_faddr.s_addr), -- ntohs(so->so_fport), n1, 1); -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), -+ "DCC SEND %s %lu %u %u%c\n", buff, -+ (unsigned long)ntohl(so->so_faddr.s_addr), -+ ntohs(so->so_fport), n1, 1); - } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, - &n1) == 4) { - if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), -@@ -799,10 +797,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, M_FREEROOM(m), -- "DCC MOVE %s %lu %u %u%c\n", buff, -- (unsigned long)ntohl(so->so_faddr.s_addr), -- ntohs(so->so_fport), n1, 1); -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), -+ "DCC MOVE %s %lu %u %u%c\n", buff, -+ (unsigned long)ntohl(so->so_faddr.s_addr), -+ ntohs(so->so_fport), n1, 1); - } - return 1; - --- -1.8.3.1 - diff --git a/SOURCES/kvm-tests-arm-cpu-features-Check-feature-default-values.patch b/SOURCES/kvm-tests-arm-cpu-features-Check-feature-default-values.patch deleted file mode 100644 index e8a48bf..0000000 --- a/SOURCES/kvm-tests-arm-cpu-features-Check-feature-default-values.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 323889aa2182bf39df10f1caf43f22daea2d7d37 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Fri, 31 Jan 2020 14:23:12 +0000 -Subject: [PATCH 10/15] tests/arm-cpu-features: Check feature default values -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Andrew Jones -Message-id: <20200131142314.13175-4-drjones@redhat.com> -Patchwork-id: 93626 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/5] tests/arm-cpu-features: Check feature default values -Bugzilla: 1647366 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Auger Eric -RH-Acked-by: Gavin Shan - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 - -Author: Andrew Jones -Date: Thu, 30 Jan 2020 16:02:06 +0000 - - tests/arm-cpu-features: Check feature default values - - If we know what the default value should be then we can test for - that as well as the feature existence. - - Signed-off-by: Andrew Jones - Reviewed-by: Richard Henderson - Message-id: 20200120101023.16030-5-drjones@redhat.com - Signed-off-by: Peter Maydell - -(cherry picked from commit 789a35efb583464f9fcd5d871a7fd6164318bb91) -Signed-off-by: Danilo C. L. de Paula ---- - tests/arm-cpu-features.c | 37 ++++++++++++++++++++++++++++--------- - 1 file changed, 28 insertions(+), 9 deletions(-) - -diff --git a/tests/arm-cpu-features.c b/tests/arm-cpu-features.c -index 6e99aa9..89285ca 100644 ---- a/tests/arm-cpu-features.c -+++ b/tests/arm-cpu-features.c -@@ -159,6 +159,25 @@ static bool resp_get_feature(QDict *resp, const char *feature) - qobject_unref(_resp); \ - }) - -+#define assert_feature(qts, cpu_type, feature, expected_value) \ -+({ \ -+ QDict *_resp, *_props; \ -+ \ -+ _resp = do_query_no_props(qts, cpu_type); \ -+ g_assert(_resp); \ -+ g_assert(resp_has_props(_resp)); \ -+ _props = resp_get_props(_resp); \ -+ g_assert(qdict_get(_props, feature)); \ -+ g_assert(qdict_get_bool(_props, feature) == (expected_value)); \ -+ qobject_unref(_resp); \ -+}) -+ -+#define assert_has_feature_enabled(qts, cpu_type, feature) \ -+ assert_feature(qts, cpu_type, feature, true) -+ -+#define assert_has_feature_disabled(qts, cpu_type, feature) \ -+ assert_feature(qts, cpu_type, feature, false) -+ - static void assert_type_full(QTestState *qts) - { - const char *error; -@@ -405,16 +424,16 @@ static void test_query_cpu_model_expansion(const void *data) - assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); - - /* Test expected feature presence/absence for some cpu types */ -- assert_has_feature(qts, "max", "pmu"); -- assert_has_feature(qts, "cortex-a15", "pmu"); -+ assert_has_feature_enabled(qts, "max", "pmu"); -+ assert_has_feature_enabled(qts, "cortex-a15", "pmu"); - assert_has_not_feature(qts, "cortex-a15", "aarch64"); - - if (g_str_equal(qtest_get_arch(), "aarch64")) { -- assert_has_feature(qts, "max", "aarch64"); -- assert_has_feature(qts, "max", "sve"); -- assert_has_feature(qts, "max", "sve128"); -- assert_has_feature(qts, "cortex-a57", "pmu"); -- assert_has_feature(qts, "cortex-a57", "aarch64"); -+ assert_has_feature_enabled(qts, "max", "aarch64"); -+ assert_has_feature_enabled(qts, "max", "sve"); -+ assert_has_feature_enabled(qts, "max", "sve128"); -+ assert_has_feature_enabled(qts, "cortex-a57", "pmu"); -+ assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); - - sve_tests_default(qts, "max"); - -@@ -451,8 +470,8 @@ static void test_query_cpu_model_expansion_kvm(const void *data) - QDict *resp; - char *error; - -- assert_has_feature(qts, "host", "aarch64"); -- assert_has_feature(qts, "host", "pmu"); -+ assert_has_feature_enabled(qts, "host", "aarch64"); -+ assert_has_feature_enabled(qts, "host", "pmu"); - - assert_error(qts, "cortex-a15", - "We cannot guarantee the CPU type 'cortex-a15' works " --- -1.8.3.1 - diff --git a/SOURCES/kvm-tests-bios-tables-test-add-test-cases-for-ACPI-HMAT.patch b/SOURCES/kvm-tests-bios-tables-test-add-test-cases-for-ACPI-HMAT.patch deleted file mode 100644 index 12df637..0000000 --- a/SOURCES/kvm-tests-bios-tables-test-add-test-cases-for-ACPI-HMAT.patch +++ /dev/null @@ -1,127 +0,0 @@ -From 6d549629becb69f315dd4213f730122d19c9c566 Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Thu, 21 May 2020 23:56:54 +0100 -Subject: [PATCH 11/12] tests/bios-tables-test: add test cases for ACPI HMAT - -RH-Author: plai@redhat.com -Message-id: <20200521235655.27141-11-plai@redhat.com> -Patchwork-id: 96739 -O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 10/11] tests/bios-tables-test: add test cases for ACPI HMAT -Bugzilla: 1600217 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Igor Mammedov -RH-Acked-by: Eduardo Habkost - -From: Tao Xu - -ACPI table HMAT has been introduced, QEMU now builds HMAT tables for -Heterogeneous Memory with boot option '-numa node'. - -Add test cases on PC and Q35 machines with 2 numa nodes. -Because HMAT is generated when system enable numa, the -following tables need to be added for this test: - tests/data/acpi/pc/APIC.acpihmat - tests/data/acpi/pc/SRAT.acpihmat - tests/data/acpi/pc/HMAT.acpihmat - tests/data/acpi/pc/DSDT.acpihmat - tests/data/acpi/q35/APIC.acpihmat - tests/data/acpi/q35/SRAT.acpihmat - tests/data/acpi/q35/HMAT.acpihmat - tests/data/acpi/q35/DSDT.acpihmat - -Acked-by: Markus Armbruster -Reviewed-by: Igor Mammedov -Reviewed-by: Daniel Black -Reviewed-by: Jingqi Liu -Suggested-by: Igor Mammedov -Signed-off-by: Tao Xu -Message-Id: <20191213011929.2520-9-tao3.xu@intel.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 1c8f85d93d261dc555a0aad6f54f2b5e8009d859) -Signed-off-by: Paul Lai -Signed-off-by: Danilo C. L. de Paula ---- - tests/bios-tables-test-allowed-diff.h | 8 +++++++ - tests/bios-tables-test.c | 44 +++++++++++++++++++++++++++++++++++ - 2 files changed, 52 insertions(+) - -diff --git a/tests/bios-tables-test-allowed-diff.h b/tests/bios-tables-test-allowed-diff.h -index dfb8523..3c9e0c9 100644 ---- a/tests/bios-tables-test-allowed-diff.h -+++ b/tests/bios-tables-test-allowed-diff.h -@@ -1 +1,9 @@ - /* List of comma-separated changed AML files to ignore */ -+"tests/data/acpi/pc/APIC.acpihmat", -+"tests/data/acpi/pc/SRAT.acpihmat", -+"tests/data/acpi/pc/HMAT.acpihmat", -+"tests/data/acpi/pc/DSDT.acpihmat", -+"tests/data/acpi/q35/APIC.acpihmat", -+"tests/data/acpi/q35/SRAT.acpihmat", -+"tests/data/acpi/q35/HMAT.acpihmat", -+"tests/data/acpi/q35/DSDT.acpihmat", -diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c -index 79f5da0..9823820 100644 ---- a/tests/bios-tables-test.c -+++ b/tests/bios-tables-test.c -@@ -947,6 +947,48 @@ static void test_acpi_virt_tcg_numamem(void) - - } - -+static void test_acpi_tcg_acpi_hmat(const char *machine) -+{ -+ test_data data; -+ -+ memset(&data, 0, sizeof(data)); -+ data.machine = machine; -+ data.variant = ".acpihmat"; -+ test_acpi_one(" -machine hmat=on" -+ " -smp 2,sockets=2" -+ " -m 128M,slots=2,maxmem=1G" -+ " -object memory-backend-ram,size=64M,id=m0" -+ " -object memory-backend-ram,size=64M,id=m1" -+ " -numa node,nodeid=0,memdev=m0" -+ " -numa node,nodeid=1,memdev=m1,initiator=0" -+ " -numa cpu,node-id=0,socket-id=0" -+ " -numa cpu,node-id=0,socket-id=1" -+ " -numa hmat-lb,initiator=0,target=0,hierarchy=memory," -+ "data-type=access-latency,latency=1" -+ " -numa hmat-lb,initiator=0,target=0,hierarchy=memory," -+ "data-type=access-bandwidth,bandwidth=65534M" -+ " -numa hmat-lb,initiator=0,target=1,hierarchy=memory," -+ "data-type=access-latency,latency=65534" -+ " -numa hmat-lb,initiator=0,target=1,hierarchy=memory," -+ "data-type=access-bandwidth,bandwidth=32767M" -+ " -numa hmat-cache,node-id=0,size=10K,level=1," -+ "associativity=direct,policy=write-back,line=8" -+ " -numa hmat-cache,node-id=1,size=10K,level=1," -+ "associativity=direct,policy=write-back,line=8", -+ &data); -+ free_test_data(&data); -+} -+ -+static void test_acpi_q35_tcg_acpi_hmat(void) -+{ -+ test_acpi_tcg_acpi_hmat(MACHINE_Q35); -+} -+ -+static void test_acpi_piix4_tcg_acpi_hmat(void) -+{ -+ test_acpi_tcg_acpi_hmat(MACHINE_PC); -+} -+ - static void test_acpi_virt_tcg(void) - { - test_data data = { -@@ -991,6 +1033,8 @@ int main(int argc, char *argv[]) - qtest_add_func("acpi/q35/numamem", test_acpi_q35_tcg_numamem); - qtest_add_func("acpi/piix4/dimmpxm", test_acpi_piix4_tcg_dimm_pxm); - qtest_add_func("acpi/q35/dimmpxm", test_acpi_q35_tcg_dimm_pxm); -+ qtest_add_func("acpi/piix4/acpihmat", test_acpi_piix4_tcg_acpi_hmat); -+ qtest_add_func("acpi/q35/acpihmat", test_acpi_q35_tcg_acpi_hmat); - } else if (strcmp(arch, "aarch64") == 0) { - qtest_add_func("acpi/virt", test_acpi_virt_tcg); - qtest_add_func("acpi/virt/numamem", test_acpi_virt_tcg_numamem); --- -1.8.3.1 - diff --git a/SOURCES/kvm-tests-boot-sector-Fix-the-bad-s390x-assembler-code.patch b/SOURCES/kvm-tests-boot-sector-Fix-the-bad-s390x-assembler-code.patch deleted file mode 100644 index 240c408..0000000 --- a/SOURCES/kvm-tests-boot-sector-Fix-the-bad-s390x-assembler-code.patch +++ /dev/null @@ -1,60 +0,0 @@ -From f73b18e03c6758500bf367b1575205772d1f878f Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:53:52 -0400 -Subject: [PATCH 10/42] tests/boot-sector: Fix the bad s390x assembler code - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-11-thuth@redhat.com> -Patchwork-id: 97031 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 10/38] tests/boot-sector: Fix the bad s390x assembler code -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -There are currently two bugs in s390x_code[]: First, the initial jump -uses the wrong offset, so it was jumping to 0x10014 instead of 0x10010. -Second, LHI only loads the lower 32-bit of the register. - -Everything worked fine as long as the s390-ccw bios code was jumping -here with r3 containing zeroes in the uppermost 48 bit - which just -happened to be the case so far by accident. But we can not rely on this -fact, and indeed one of the recent suggested patches to jump2ipl.c cause -the newer GCCs to put different values into r3. In that case the code -from s390x_code[] crashes very ungracefully. - -Thus let's make sure to jump to the right instruction, and use LGHI -instead of LHI to make sure that we always zero out the upper bits -of the register. - -Signed-off-by: Thomas Huth -Message-Id: <20191217150642.27946-1-thuth@redhat.com> -Reviewed-by: Christian Borntraeger -Signed-off-by: Cornelia Huck -(cherry picked from commit 5afec76fbe2c07d03fd8c9ac525140059499637a) -Signed-off-by: Danilo C. L. de Paula ---- - tests/boot-sector.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/tests/boot-sector.c b/tests/boot-sector.c -index 7824286b9a..9e66c6d013 100644 ---- a/tests/boot-sector.c -+++ b/tests/boot-sector.c -@@ -75,11 +75,11 @@ static const uint8_t s390x_psw_and_magic[] = { - 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 /* in the s390-ccw bios */ - }; - static const uint8_t s390x_code[] = { -- 0xa7, 0xf4, 0x00, 0x0a, /* j 0x10010 */ -+ 0xa7, 0xf4, 0x00, 0x08, /* j 0x10010 */ - 0x00, 0x00, 0x00, 0x00, - 'S', '3', '9', '0', - 'E', 'P', 0x00, 0x01, -- 0xa7, 0x38, HIGH(SIGNATURE_ADDR), LOW(SIGNATURE_ADDR), /* lhi r3,0x7c10 */ -+ 0xa7, 0x39, HIGH(SIGNATURE_ADDR), LOW(SIGNATURE_ADDR), /* lghi r3,0x7c10 */ - 0xa7, 0x48, LOW(SIGNATURE), HIGH(SIGNATURE), /* lhi r4,0xadde */ - 0x40, 0x40, 0x30, 0x00, /* sth r4,0(r3) */ - 0xa7, 0xf4, 0xff, 0xfa /* j 0x10010 */ --- -2.27.0 - diff --git a/SOURCES/kvm-tests-numa-Add-case-for-QMP-build-HMAT.patch b/SOURCES/kvm-tests-numa-Add-case-for-QMP-build-HMAT.patch deleted file mode 100644 index 41ee71c..0000000 --- a/SOURCES/kvm-tests-numa-Add-case-for-QMP-build-HMAT.patch +++ /dev/null @@ -1,266 +0,0 @@ -From 0f11aae02dcabd3a5ee0b5946aec39da6dddea52 Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Thu, 21 May 2020 23:56:53 +0100 -Subject: [PATCH 10/12] tests/numa: Add case for QMP build HMAT - -RH-Author: plai@redhat.com -Message-id: <20200521235655.27141-10-plai@redhat.com> -Patchwork-id: 96735 -O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH 09/11] tests/numa: Add case for QMP build HMAT -Bugzilla: 1600217 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Igor Mammedov -RH-Acked-by: Eduardo Habkost - -From: Tao Xu - -Check configuring HMAT usecase - -Acked-by: Markus Armbruster -Suggested-by: Igor Mammedov -Signed-off-by: Tao Xu -Message-Id: <20191213011929.2520-8-tao3.xu@intel.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Igor Mammedov -(cherry picked from commit d00817c944ed15fbe4a61d44fe7f9fe166c7df88) -Signed-off-by: Paul Lai -Signed-off-by: Danilo C. L. de Paula ---- - tests/numa-test.c | 213 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 213 insertions(+) - -diff --git a/tests/numa-test.c b/tests/numa-test.c -index 8de8581..17dd807 100644 ---- a/tests/numa-test.c -+++ b/tests/numa-test.c -@@ -327,6 +327,216 @@ static void pc_dynamic_cpu_cfg(const void *data) - qtest_quit(qs); - } - -+static void pc_hmat_build_cfg(const void *data) -+{ -+ QTestState *qs = qtest_initf("%s -nodefaults --preconfig -machine hmat=on " -+ "-smp 2,sockets=2 " -+ "-m 128M,slots=2,maxmem=1G " -+ "-object memory-backend-ram,size=64M,id=m0 " -+ "-object memory-backend-ram,size=64M,id=m1 " -+ "-numa node,nodeid=0,memdev=m0 " -+ "-numa node,nodeid=1,memdev=m1,initiator=0 " -+ "-numa cpu,node-id=0,socket-id=0 " -+ "-numa cpu,node-id=0,socket-id=1", -+ data ? (char *)data : ""); -+ -+ /* Fail: Initiator should be less than the number of nodes */ -+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 2, 'target': 0," -+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }"))); -+ -+ /* Fail: Target should be less than the number of nodes */ -+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 2," -+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }"))); -+ -+ /* Fail: Initiator should contain cpu */ -+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 1, 'target': 0," -+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\" } }"))); -+ -+ /* Fail: Data-type mismatch */ -+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0," -+ " 'hierarchy': \"memory\", 'data-type': \"write-latency\"," -+ " 'bandwidth': 524288000 } }"))); -+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0," -+ " 'hierarchy': \"memory\", 'data-type': \"read-bandwidth\"," -+ " 'latency': 5 } }"))); -+ -+ /* Fail: Bandwidth should be 1MB (1048576) aligned */ -+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0," -+ " 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\"," -+ " 'bandwidth': 1048575 } }"))); -+ -+ /* Configuring HMAT bandwidth and latency details */ -+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0," -+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\"," -+ " 'latency': 1 } }"))); /* 1 ns */ -+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0," -+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\"," -+ " 'latency': 5 } }"))); /* Fail: Duplicate configuration */ -+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0," -+ " 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\"," -+ " 'bandwidth': 68717379584 } }"))); /* 65534 MB/s */ -+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1," -+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\"," -+ " 'latency': 65534 } }"))); /* 65534 ns */ -+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1," -+ " 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\"," -+ " 'bandwidth': 34358689792 } }"))); /* 32767 MB/s */ -+ -+ /* Fail: node_id should be less than the number of nodes */ -+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 2, 'size': 10240," -+ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\"," -+ " 'line': 8 } }"))); -+ -+ /* Fail: level should be less than HMAT_LB_LEVELS (4) */ -+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240," -+ " 'level': 4, 'associativity': \"direct\", 'policy': \"write-back\"," -+ " 'line': 8 } }"))); -+ -+ /* Fail: associativity option should be 'none', if level is 0 */ -+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240," -+ " 'level': 0, 'associativity': \"direct\", 'policy': \"none\"," -+ " 'line': 0 } }"))); -+ /* Fail: policy option should be 'none', if level is 0 */ -+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240," -+ " 'level': 0, 'associativity': \"none\", 'policy': \"write-back\"," -+ " 'line': 0 } }"))); -+ /* Fail: line option should be 0, if level is 0 */ -+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240," -+ " 'level': 0, 'associativity': \"none\", 'policy': \"none\"," -+ " 'line': 8 } }"))); -+ -+ /* Configuring HMAT memory side cache attributes */ -+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240," -+ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\"," -+ " 'line': 8 } }"))); -+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240," -+ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\"," -+ " 'line': 8 } }"))); /* Fail: Duplicate configuration */ -+ /* Fail: The size of level 2 size should be small than level 1 */ -+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240," -+ " 'level': 2, 'associativity': \"direct\", 'policy': \"write-back\"," -+ " 'line': 8 } }"))); -+ /* Fail: The size of level 0 size should be larger than level 1 */ -+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240," -+ " 'level': 0, 'associativity': \"direct\", 'policy': \"write-back\"," -+ " 'line': 8 } }"))); -+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 1, 'size': 10240," -+ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\"," -+ " 'line': 8 } }"))); -+ -+ /* let machine initialization to complete and run */ -+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, -+ "{ 'execute': 'x-exit-preconfig' }"))); -+ qtest_qmp_eventwait(qs, "RESUME"); -+ -+ qtest_quit(qs); -+} -+ -+static void pc_hmat_off_cfg(const void *data) -+{ -+ QTestState *qs = qtest_initf("%s -nodefaults --preconfig " -+ "-smp 2,sockets=2 " -+ "-m 128M,slots=2,maxmem=1G " -+ "-object memory-backend-ram,size=64M,id=m0 " -+ "-object memory-backend-ram,size=64M,id=m1 " -+ "-numa node,nodeid=0,memdev=m0", -+ data ? (char *)data : ""); -+ -+ /* -+ * Fail: Enable HMAT with -machine hmat=on -+ * before using any of hmat specific options -+ */ -+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'node', 'nodeid': 1, 'memdev': \"m1\"," -+ " 'initiator': 0 } }"))); -+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'node', 'nodeid': 1, 'memdev': \"m1\" } }"))); -+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0," -+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\"," -+ " 'latency': 1 } }"))); -+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240," -+ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\"," -+ " 'line': 8 } }"))); -+ -+ /* let machine initialization to complete and run */ -+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, -+ "{ 'execute': 'x-exit-preconfig' }"))); -+ qtest_qmp_eventwait(qs, "RESUME"); -+ -+ qtest_quit(qs); -+} -+ -+static void pc_hmat_erange_cfg(const void *data) -+{ -+ QTestState *qs = qtest_initf("%s -nodefaults --preconfig -machine hmat=on " -+ "-smp 2,sockets=2 " -+ "-m 128M,slots=2,maxmem=1G " -+ "-object memory-backend-ram,size=64M,id=m0 " -+ "-object memory-backend-ram,size=64M,id=m1 " -+ "-numa node,nodeid=0,memdev=m0 " -+ "-numa node,nodeid=1,memdev=m1,initiator=0 " -+ "-numa cpu,node-id=0,socket-id=0 " -+ "-numa cpu,node-id=0,socket-id=1", -+ data ? (char *)data : ""); -+ -+ /* Can't store the compressed latency */ -+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0," -+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\"," -+ " 'latency': 1 } }"))); /* 1 ns */ -+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1," -+ " 'hierarchy': \"memory\", 'data-type': \"access-latency\"," -+ " 'latency': 65535 } }"))); /* 65535 ns */ -+ -+ /* Test the 0 input (bandwidth not provided) */ -+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 0," -+ " 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\"," -+ " 'bandwidth': 0 } }"))); /* 0 MB/s */ -+ /* Fail: bandwidth should be provided before memory side cache attributes */ -+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-cache', 'node-id': 0, 'size': 10240," -+ " 'level': 1, 'associativity': \"direct\", 'policy': \"write-back\"," -+ " 'line': 8 } }"))); -+ -+ /* Can't store the compressed bandwidth */ -+ g_assert_true(qmp_rsp_is_err(qtest_qmp(qs, "{ 'execute': 'set-numa-node'," -+ " 'arguments': { 'type': 'hmat-lb', 'initiator': 0, 'target': 1," -+ " 'hierarchy': \"memory\", 'data-type': \"access-bandwidth\"," -+ " 'bandwidth': 68718428160 } }"))); /* 65535 MB/s */ -+ -+ /* let machine initialization to complete and run */ -+ g_assert_false(qmp_rsp_is_err(qtest_qmp(qs, -+ "{ 'execute': 'x-exit-preconfig' }"))); -+ qtest_qmp_eventwait(qs, "RESUME"); -+ -+ qtest_quit(qs); -+} -+ - int main(int argc, char **argv) - { - const char *args = NULL; -@@ -346,6 +556,9 @@ int main(int argc, char **argv) - if (!strcmp(arch, "i386") || !strcmp(arch, "x86_64")) { - qtest_add_data_func("/numa/pc/cpu/explicit", args, pc_numa_cpu); - qtest_add_data_func("/numa/pc/dynamic/cpu", args, pc_dynamic_cpu_cfg); -+ qtest_add_data_func("/numa/pc/hmat/build", args, pc_hmat_build_cfg); -+ qtest_add_data_func("/numa/pc/hmat/off", args, pc_hmat_off_cfg); -+ qtest_add_data_func("/numa/pc/hmat/erange", args, pc_hmat_erange_cfg); - } - - if (!strcmp(arch, "ppc64")) { --- -1.8.3.1 - diff --git a/SOURCES/kvm-tftp-check-tftp_input-buffer-size.patch b/SOURCES/kvm-tftp-check-tftp_input-buffer-size.patch deleted file mode 100644 index 85ed811..0000000 --- a/SOURCES/kvm-tftp-check-tftp_input-buffer-size.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 6bd4d80f9274f76eb402ce85aa60729150b39980 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Thu, 29 Jul 2021 04:56:34 -0400 -Subject: [PATCH 09/14] tftp: check tftp_input buffer size -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20210708082537.1550263-6-marcandre.lureau@redhat.com> -Patchwork-id: 101823 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 5/8] tftp: check tftp_input buffer size -Bugzilla: 1970843 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi - -From: Marc-André Lureau - -Fixes: CVE-2021-3595 -Fixes: https://gitlab.freedesktop.org/slirp/libslirp/-/issues/46 - -Signed-off-by: Marc-André Lureau - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1970843 - -(cherry picked from commit 3f17948137155f025f7809fdc38576d5d2451c3d) -Signed-off-by: Marc-André Lureau -Signed-off-by: Miroslav Rezanina ---- - slirp/src/tftp.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/slirp/src/tftp.c b/slirp/src/tftp.c -index 093c2e06a3..07e8f3cb2f 100644 ---- a/slirp/src/tftp.c -+++ b/slirp/src/tftp.c -@@ -444,7 +444,11 @@ static void tftp_handle_error(Slirp *slirp, struct sockaddr_storage *srcsas, - - void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m) - { -- struct tftp_t *tp = (struct tftp_t *)m->m_data; -+ struct tftp_t *tp = mtod_check(m, offsetof(struct tftp_t, x.tp_buf)); -+ -+ if (tp == NULL) { -+ return; -+ } - - switch (ntohs(tp->tp_op)) { - case TFTP_RRQ: --- -2.27.0 - diff --git a/SOURCES/kvm-tftp-introduce-a-header-structure.patch b/SOURCES/kvm-tftp-introduce-a-header-structure.patch deleted file mode 100644 index d8c8ddb..0000000 --- a/SOURCES/kvm-tftp-introduce-a-header-structure.patch +++ /dev/null @@ -1,263 +0,0 @@ -From af72e344459614fcf2746739f05494ef7e691a78 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Thu, 29 Jul 2021 04:56:36 -0400 -Subject: [PATCH 10/14] tftp: introduce a header structure -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20210708082537.1550263-7-marcandre.lureau@redhat.com> -Patchwork-id: 101825 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 6/8] tftp: introduce a header structure -Bugzilla: 1970819 1970835 1970843 1970853 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi - -From: Marc-André Lureau - -Instead of using a composed structure and potentially reading past the -incoming buffer, use a different structure for the header. - -Signed-off-by: Marc-André Lureau - -(cherry picked from commit 990163cf3ac86b7875559f49602c4d76f46f6f30) -Signed-off-by: Marc-André Lureau -Signed-off-by: Miroslav Rezanina ---- - slirp/src/tftp.c | 58 +++++++++++++++++++++++++----------------------- - slirp/src/tftp.h | 6 ++++- - 2 files changed, 35 insertions(+), 29 deletions(-) - -diff --git a/slirp/src/tftp.c b/slirp/src/tftp.c -index 07e8f3cb2f..53e04d0aeb 100644 ---- a/slirp/src/tftp.c -+++ b/slirp/src/tftp.c -@@ -50,7 +50,7 @@ static void tftp_session_terminate(struct tftp_session *spt) - } - - static int tftp_session_allocate(Slirp *slirp, struct sockaddr_storage *srcsas, -- struct tftp_t *tp) -+ struct tftphdr *hdr) - { - struct tftp_session *spt; - int k; -@@ -75,7 +75,7 @@ found: - memcpy(&spt->client_addr, srcsas, sockaddr_size(srcsas)); - spt->fd = -1; - spt->block_size = 512; -- spt->client_port = tp->udp.uh_sport; -+ spt->client_port = hdr->udp.uh_sport; - spt->slirp = slirp; - - tftp_session_update(spt); -@@ -84,7 +84,7 @@ found: - } - - static int tftp_session_find(Slirp *slirp, struct sockaddr_storage *srcsas, -- struct tftp_t *tp) -+ struct tftphdr *hdr) - { - struct tftp_session *spt; - int k; -@@ -94,7 +94,7 @@ static int tftp_session_find(Slirp *slirp, struct sockaddr_storage *srcsas, - - if (tftp_session_in_use(spt)) { - if (sockaddr_equal(&spt->client_addr, srcsas)) { -- if (spt->client_port == tp->udp.uh_sport) { -+ if (spt->client_port == hdr->udp.uh_sport) { - return k; - } - } -@@ -146,13 +146,13 @@ static struct tftp_t *tftp_prep_mbuf_data(struct tftp_session *spt, - } - - static void tftp_udp_output(struct tftp_session *spt, struct mbuf *m, -- struct tftp_t *recv_tp) -+ struct tftphdr *hdr) - { - if (spt->client_addr.ss_family == AF_INET6) { - struct sockaddr_in6 sa6, da6; - - sa6.sin6_addr = spt->slirp->vhost_addr6; -- sa6.sin6_port = recv_tp->udp.uh_dport; -+ sa6.sin6_port = hdr->udp.uh_dport; - da6.sin6_addr = ((struct sockaddr_in6 *)&spt->client_addr)->sin6_addr; - da6.sin6_port = spt->client_port; - -@@ -161,7 +161,7 @@ static void tftp_udp_output(struct tftp_session *spt, struct mbuf *m, - struct sockaddr_in sa4, da4; - - sa4.sin_addr = spt->slirp->vhost_addr; -- sa4.sin_port = recv_tp->udp.uh_dport; -+ sa4.sin_port = hdr->udp.uh_dport; - da4.sin_addr = ((struct sockaddr_in *)&spt->client_addr)->sin_addr; - da4.sin_port = spt->client_port; - -@@ -183,7 +183,7 @@ static int tftp_send_oack(struct tftp_session *spt, const char *keys[], - - tp = tftp_prep_mbuf_data(spt, m); - -- tp->tp_op = htons(TFTP_OACK); -+ tp->hdr.tp_op = htons(TFTP_OACK); - for (i = 0; i < nb; i++) { - n += snprintf(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%s", - keys[i]) + -@@ -195,7 +195,7 @@ static int tftp_send_oack(struct tftp_session *spt, const char *keys[], - - m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX + 2) + n - - sizeof(struct udphdr); -- tftp_udp_output(spt, m, recv_tp); -+ tftp_udp_output(spt, m, &recv_tp->hdr); - - return 0; - } -@@ -216,21 +216,21 @@ static void tftp_send_error(struct tftp_session *spt, uint16_t errorcode, - - tp = tftp_prep_mbuf_data(spt, m); - -- tp->tp_op = htons(TFTP_ERROR); -+ tp->hdr.tp_op = htons(TFTP_ERROR); - tp->x.tp_error.tp_error_code = htons(errorcode); - slirp_pstrcpy((char *)tp->x.tp_error.tp_msg, sizeof(tp->x.tp_error.tp_msg), - msg); - - m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX + 2) + 3 + - strlen(msg) - sizeof(struct udphdr); -- tftp_udp_output(spt, m, recv_tp); -+ tftp_udp_output(spt, m, &recv_tp->hdr); - - out: - tftp_session_terminate(spt); - } - - static void tftp_send_next_block(struct tftp_session *spt, -- struct tftp_t *recv_tp) -+ struct tftphdr *hdr) - { - struct mbuf *m; - struct tftp_t *tp; -@@ -244,7 +244,7 @@ static void tftp_send_next_block(struct tftp_session *spt, - - tp = tftp_prep_mbuf_data(spt, m); - -- tp->tp_op = htons(TFTP_DATA); -+ tp->hdr.tp_op = htons(TFTP_DATA); - tp->x.tp_data.tp_block_nr = htons((spt->block_nr + 1) & 0xffff); - - nobytes = tftp_read_data(spt, spt->block_nr, tp->x.tp_data.tp_buf, -@@ -262,7 +262,7 @@ static void tftp_send_next_block(struct tftp_session *spt, - - m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX - nobytes) - - sizeof(struct udphdr); -- tftp_udp_output(spt, m, recv_tp); -+ tftp_udp_output(spt, m, hdr); - - if (nobytes == spt->block_size) { - tftp_session_update(spt); -@@ -285,12 +285,12 @@ static void tftp_handle_rrq(Slirp *slirp, struct sockaddr_storage *srcsas, - int nb_options = 0; - - /* check if a session already exists and if so terminate it */ -- s = tftp_session_find(slirp, srcsas, tp); -+ s = tftp_session_find(slirp, srcsas, &tp->hdr); - if (s >= 0) { - tftp_session_terminate(&slirp->tftp_sessions[s]); - } - -- s = tftp_session_allocate(slirp, srcsas, tp); -+ s = tftp_session_allocate(slirp, srcsas, &tp->hdr); - - if (s < 0) { - return; -@@ -411,29 +411,29 @@ static void tftp_handle_rrq(Slirp *slirp, struct sockaddr_storage *srcsas, - } - - spt->block_nr = 0; -- tftp_send_next_block(spt, tp); -+ tftp_send_next_block(spt, &tp->hdr); - } - - static void tftp_handle_ack(Slirp *slirp, struct sockaddr_storage *srcsas, -- struct tftp_t *tp, int pktlen) -+ struct tftphdr *hdr) - { - int s; - -- s = tftp_session_find(slirp, srcsas, tp); -+ s = tftp_session_find(slirp, srcsas, hdr); - - if (s < 0) { - return; - } - -- tftp_send_next_block(&slirp->tftp_sessions[s], tp); -+ tftp_send_next_block(&slirp->tftp_sessions[s], hdr); - } - - static void tftp_handle_error(Slirp *slirp, struct sockaddr_storage *srcsas, -- struct tftp_t *tp, int pktlen) -+ struct tftphdr *hdr) - { - int s; - -- s = tftp_session_find(slirp, srcsas, tp); -+ s = tftp_session_find(slirp, srcsas, hdr); - - if (s < 0) { - return; -@@ -444,23 +444,25 @@ static void tftp_handle_error(Slirp *slirp, struct sockaddr_storage *srcsas, - - void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m) - { -- struct tftp_t *tp = mtod_check(m, offsetof(struct tftp_t, x.tp_buf)); -+ struct tftphdr *hdr = mtod_check(m, sizeof(struct tftphdr)); - -- if (tp == NULL) { -+ if (hdr == NULL) { - return; - } - -- switch (ntohs(tp->tp_op)) { -+ switch (ntohs(hdr->tp_op)) { - case TFTP_RRQ: -- tftp_handle_rrq(m->slirp, srcsas, tp, m->m_len); -+ tftp_handle_rrq(m->slirp, srcsas, -+ mtod(m, struct tftp_t *), -+ m->m_len); - break; - - case TFTP_ACK: -- tftp_handle_ack(m->slirp, srcsas, tp, m->m_len); -+ tftp_handle_ack(m->slirp, srcsas, hdr); - break; - - case TFTP_ERROR: -- tftp_handle_error(m->slirp, srcsas, tp, m->m_len); -+ tftp_handle_error(m->slirp, srcsas, hdr); - break; - } - } -diff --git a/slirp/src/tftp.h b/slirp/src/tftp.h -index c47bb43c7d..021f6cf109 100644 ---- a/slirp/src/tftp.h -+++ b/slirp/src/tftp.h -@@ -18,9 +18,13 @@ - #define TFTP_FILENAME_MAX 512 - #define TFTP_BLOCKSIZE_MAX 1428 - --struct tftp_t { -+struct tftphdr { - struct udphdr udp; - uint16_t tp_op; -+} SLIRP_PACKED; -+ -+struct tftp_t { -+ struct tftphdr hdr; - union { - struct { - uint16_t tp_block_nr; --- -2.27.0 - diff --git a/SOURCES/kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch b/SOURCES/kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch deleted file mode 100644 index 3efef47..0000000 --- a/SOURCES/kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch +++ /dev/null @@ -1,55 +0,0 @@ -From e483eea891139ee38138381ba6715b3a2be050cc Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 3 Mar 2020 18:43:12 +0000 -Subject: [PATCH 16/18] tools/virtiofsd/fuse_lowlevel: Fix - fuse_out_header::error value -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200303184314.155564-6-dgilbert@redhat.com> -Patchwork-id: 94128 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/7] tools/virtiofsd/fuse_lowlevel: Fix fuse_out_header::error value -Bugzilla: 1797064 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Ján Tomko - -From: Philippe Mathieu-Daudé - -Fix warning reported by Clang static code analyzer: - - CC tools/virtiofsd/fuse_lowlevel.o - tools/virtiofsd/fuse_lowlevel.c:195:9: warning: Value stored to 'error' is never read - error = -ERANGE; - ^ ~~~~~~~ - -Fixes: 3db2876 -Reported-by: Clang Static Analyzer -Reviewed-by: Ján Tomko -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 09c086b2a144324199f99a7d4de78c3276a486c1) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/fuse_lowlevel.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 704c036..2dd36ec 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -192,7 +192,7 @@ int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, - - if (error <= -1000 || error > 0) { - fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); -- error = -ERANGE; -+ out.error = -ERANGE; - } - - iov[0].iov_base = &out; --- -1.8.3.1 - diff --git a/SOURCES/kvm-tools-virtiofsd-passthrough_ll-Fix-double-close.patch b/SOURCES/kvm-tools-virtiofsd-passthrough_ll-Fix-double-close.patch deleted file mode 100644 index 6af549a..0000000 --- a/SOURCES/kvm-tools-virtiofsd-passthrough_ll-Fix-double-close.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 8ce8ccc2a22798a89bac06a37427c3a3cea91a62 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 5 May 2020 16:35:54 +0100 -Subject: [PATCH 3/9] tools/virtiofsd/passthrough_ll: Fix double close() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200505163600.22956-2-dgilbert@redhat.com> -Patchwork-id: 96269 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/7] tools/virtiofsd/passthrough_ll: Fix double close() -Bugzilla: 1817445 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Michael S. Tsirkin - -From: Philippe Mathieu-Daudé - -On success, the fdopendir() call closes fd. Later on the error -path we try to close an already-closed fd. This can lead to -use-after-free. Fix by only closing the fd if the fdopendir() -call failed. - -Cc: qemu-stable@nongnu.org -Fixes: b39bce121b (add dirp_map to hide lo_dirp pointers) -Reported-by: Coverity (CID 1421933 USE_AFTER_FREE) -Suggested-by: Peter Maydell -Signed-off-by: Philippe Mathieu-Daudé -Message-Id: <20200321120654.7985-1-philmd@redhat.com> -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit e1cd92d95cd4f97b3464c4e08cd5b22bf5ca05cb) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/passthrough_ll.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 9cba3f1..50ff672 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1524,8 +1524,7 @@ out_err: - if (d) { - if (d->dp) { - closedir(d->dp); -- } -- if (fd != -1) { -+ } else if (fd != -1) { - close(fd); - } - free(d); --- -1.8.3.1 - diff --git a/SOURCES/kvm-tpm-ppi-page-align-PPI-RAM.patch b/SOURCES/kvm-tpm-ppi-page-align-PPI-RAM.patch deleted file mode 100644 index 32c971d..0000000 --- a/SOURCES/kvm-tpm-ppi-page-align-PPI-RAM.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 7cb1c5e1416de9a09180f0930d2a216c77e8cdbd Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Thu, 30 Jan 2020 16:01:10 +0000 -Subject: [PATCH 07/15] tpm-ppi: page-align PPI RAM -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20200130160110.126086-1-marcandre.lureau@redhat.com> -Patchwork-id: 93600 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] tpm-ppi: page-align PPI RAM -Bugzilla: 1787444 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Philippe Mathieu-Daudé - -post-copy migration fails on destination with error such as: -2019-12-26T10:22:44.714644Z qemu-kvm: ram_block_discard_range: -Unaligned start address: 0x559d2afae9a0 - -Use qemu_memalign() to constrain the PPI RAM memory alignment. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Marc-André Lureau -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Stefan Berger -Signed-off-by: Stefan Berger -Message-id: 20200103074000.1006389-3-marcandre.lureau@redhat.com - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1787444 -Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=26122940 - -(cherry picked from commit 71e415c8a75c130875f14d6b2136825789feb297) -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - hw/tpm/tpm_ppi.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c -index ff31459..6d9c1a3 100644 ---- a/hw/tpm/tpm_ppi.c -+++ b/hw/tpm/tpm_ppi.c -@@ -43,7 +43,8 @@ void tpm_ppi_reset(TPMPPI *tpmppi) - void tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m, - hwaddr addr, Object *obj) - { -- tpmppi->buf = g_malloc0(HOST_PAGE_ALIGN(TPM_PPI_ADDR_SIZE)); -+ tpmppi->buf = qemu_memalign(qemu_real_host_page_size, -+ HOST_PAGE_ALIGN(TPM_PPI_ADDR_SIZE)); - memory_region_init_ram_device_ptr(&tpmppi->ram, obj, "tpm-ppi", - TPM_PPI_ADDR_SIZE, tpmppi->buf); - vmstate_register_ram(&tpmppi->ram, DEVICE(obj)); --- -1.8.3.1 - diff --git a/SOURCES/kvm-trace-update-qemu-trace-stap-to-Python-3.patch b/SOURCES/kvm-trace-update-qemu-trace-stap-to-Python-3.patch deleted file mode 100644 index c49aecd..0000000 --- a/SOURCES/kvm-trace-update-qemu-trace-stap-to-Python-3.patch +++ /dev/null @@ -1,82 +0,0 @@ -From e7cdcd1e39c4c030a32c9e8ef79316eae8555bc8 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 16 Jan 2020 17:52:48 +0000 -Subject: [PATCH 04/15] trace: update qemu-trace-stap to Python 3 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -Message-id: <20200116175248.286556-2-stefanha@redhat.com> -Patchwork-id: 93365 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] trace: update qemu-trace-stap to Python 3 -Bugzilla: 1787395 -RH-Acked-by: John Snow -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Dr. David Alan Gilbert - -qemu-trace-stap does not support Python 3 yet: - - $ scripts/qemu-trace-stap list path/to/qemu-system-x86_64 - Traceback (most recent call last): - File "scripts/qemu-trace-stap", line 175, in - main() - File "scripts/qemu-trace-stap", line 171, in main - args.func(args) - File "scripts/qemu-trace-stap", line 118, in cmd_list - print_probes(args.verbose, "*") - File "scripts/qemu-trace-stap", line 114, in print_probes - if line.startswith(prefix): - TypeError: startswith first arg must be bytes or a tuple of bytes, not str - -Now that QEMU requires Python 3.5 or later we can switch to pure Python -3. Use Popen()'s universal_newlines=True argument to treat stdout as -text instead of binary. - -Fixes: 62dd1048c0bd ("trace: add ability to do simple printf logging via systemtap") -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1787395 -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Message-id: 20200107112438.383958-1-stefanha@redhat.com -Message-Id: <20200107112438.383958-1-stefanha@redhat.com> -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 3f0097169bb60268cc5dda0c5ea47c31ab57b22f) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Danilo C. L. de Paula ---- - scripts/qemu-trace-stap | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/scripts/qemu-trace-stap b/scripts/qemu-trace-stap -index 91d1051..90527eb 100755 ---- a/scripts/qemu-trace-stap -+++ b/scripts/qemu-trace-stap -@@ -1,4 +1,4 @@ --#!/usr/bin/python -+#!/usr/bin/env python3 - # -*- python -*- - # - # Copyright (C) 2019 Red Hat, Inc -@@ -18,8 +18,6 @@ - # You should have received a copy of the GNU General Public License - # along with this program; if not, see . - --from __future__ import print_function -- - import argparse - import copy - import os.path -@@ -104,7 +102,9 @@ def cmd_list(args): - if verbose: - print("Listing probes with name '%s'" % script) - proc = subprocess.Popen(["stap", "-l", script], -- stdout=subprocess.PIPE, env=tapset_env(tapsets)) -+ stdout=subprocess.PIPE, -+ universal_newlines=True, -+ env=tapset_env(tapsets)) - out, err = proc.communicate() - if proc.returncode != 0: - print("No probes found, are the tapsets installed in %s" % tapset_dir(args.binary)) --- -1.8.3.1 - diff --git a/SOURCES/kvm-trace-use-STAP_SDT_V2-to-work-around-symbol-visibili.patch b/SOURCES/kvm-trace-use-STAP_SDT_V2-to-work-around-symbol-visibili.patch deleted file mode 100644 index 059445b..0000000 --- a/SOURCES/kvm-trace-use-STAP_SDT_V2-to-work-around-symbol-visibili.patch +++ /dev/null @@ -1,116 +0,0 @@ -From ba3068eb1a349ec4ed8b7ccdae76450f0c315be9 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 19 Nov 2020 17:23:11 -0500 -Subject: [PATCH 18/18] trace: use STAP_SDT_V2 to work around symbol visibility -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -Message-id: <20201119172311.942629-2-stefanha@redhat.com> -Patchwork-id: 99779 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] trace: use STAP_SDT_V2 to work around symbol visibility -Bugzilla: 1898700 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Gerd Hoffmann -RH-Acked-by: Philippe Mathieu-Daudé - -QEMU binaries no longer launch successfully with recent SystemTap -releases. This is because modular QEMU builds link the sdt semaphores -into the main binary instead of into the shared objects where they are -used. The symbol visibility of semaphores is 'hidden' and the dynamic -linker prints an error during module loading: - - $ ./configure --enable-trace-backends=dtrace --enable-modules ... - ... - Failed to open module: /builddir/build/BUILD/qemu-4.2.0/s390x-softmmu/../block-curl.so: undefined symbol: qemu_curl_close_semaphore - -The long-term solution is to generate per-module dtrace .o files and -link them into the module instead of the main binary. - -In the short term we can define STAP_SDT_V2 so dtrace(1) produces a .o -file with 'default' symbol visibility instead of 'hidden'. This -workaround is small and easier to merge for QEMU 5.2 and downstream -backports. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1898700 -Cc: wcohen@redhat.com -Cc: fche@redhat.com -Cc: kraxel@redhat.com -Cc: rjones@redhat.com -Cc: ddepaula@redhat.com -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Miroslav Rezanina - -(cherry picked from commit 4b265c79a85bb35abe19aacea6954c1616521639) -Signed-off-by: Stefan Hajnoczi - -Conflicts: - trace/meson.build - Downstream uses makefiles, so move the dtrace invocation changes to - rules.mak and Makefile. -Signed-off-by: Danilo C. L. de Paula ---- - Makefile | 4 ++-- - configure | 7 +++++++ - rules.mak | 2 +- - 3 files changed, 10 insertions(+), 3 deletions(-) - -diff --git a/Makefile b/Makefile -index ff05c309497..29b01a13ee3 100644 ---- a/Makefile -+++ b/Makefile -@@ -198,7 +198,7 @@ tracetool-y += $(shell find $(SRC_PATH)/scripts/tracetool -name "*.py") - $< > $@,"GEN","$(@:%-timestamp=%)") - - %/trace-dtrace.h: %/trace-dtrace.dtrace $(tracetool-y) -- $(call quiet-command,dtrace -o $@ -h -s $<, "GEN","$@") -+ $(call quiet-command,dtrace -o $@ -DSTAP_SDT_V2 -h -s $<, "GEN","$@") - - %/trace-dtrace.o: %/trace-dtrace.dtrace $(tracetool-y) - -@@ -258,7 +258,7 @@ trace-dtrace-root.dtrace-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config - $< > $@,"GEN","$(@:%-timestamp=%)") - - trace-dtrace-root.h: trace-dtrace-root.dtrace -- $(call quiet-command,dtrace -o $@ -h -s $<, "GEN","$@") -+ $(call quiet-command,dtrace -o $@ -DSTAP_SDT_V2 -h -s $<, "GEN","$@") - - trace-dtrace-root.o: trace-dtrace-root.dtrace - -diff --git a/configure b/configure -index 5120c1409a7..c62b61403f6 100755 ---- a/configure -+++ b/configure -@@ -5275,6 +5275,13 @@ if have_backend "dtrace"; then - trace_backend_stap="no" - if has 'stap' ; then - trace_backend_stap="yes" -+ -+ # Workaround to avoid dtrace(1) producing a file with 'hidden' symbol -+ # visibility. Define STAP_SDT_V2 to produce 'default' symbol visibility -+ # instead. QEMU --enable-modules depends on this because the SystemTap -+ # semaphores are linked into the main binary and not the module's shared -+ # object. -+ QEMU_CFLAGS="$QEMU_CFLAGS -DSTAP_SDT_V2" - fi - fi - -diff --git a/rules.mak b/rules.mak -index 967295dd2b6..bdfc223a5a1 100644 ---- a/rules.mak -+++ b/rules.mak -@@ -101,7 +101,7 @@ LINK = $(call quiet-command, $(LINKPROG) $(QEMU_LDFLAGS) $(QEMU_CFLAGS) $(CFLAGS - -c -o $@ $<,"OBJC","$(TARGET_DIR)$@") - - %.o: %.dtrace -- $(call quiet-command,dtrace -o $@ -G -s $<,"GEN","$(TARGET_DIR)$@") -+ $(call quiet-command,dtrace -o $@ -DSTAP_SDT_V2 -G -s $<,"GEN","$(TARGET_DIR)$@") - - DSO_OBJ_CFLAGS := -fPIC -DBUILD_DSO - module-common.o: CFLAGS += $(DSO_OBJ_CFLAGS) --- -2.27.0 - diff --git a/SOURCES/kvm-tx_pkt-switch-to-use-qemu_receive_packet_iov-for-loo.patch b/SOURCES/kvm-tx_pkt-switch-to-use-qemu_receive_packet_iov-for-loo.patch deleted file mode 100644 index 4da71cc..0000000 --- a/SOURCES/kvm-tx_pkt-switch-to-use-qemu_receive_packet_iov-for-loo.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 87cacc268f37758553ad93fefa8b312ed0bd2520 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 29 Jun 2021 03:42:43 -0400 -Subject: [PATCH 5/9] tx_pkt: switch to use qemu_receive_packet_iov() for - loopback -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20210629034247.3286477-6-jmaloy@redhat.com> -Patchwork-id: 101788 -O-Subject: [RHEL-8.4.0.z qemu-kvm PATCH v2 5/9] tx_pkt: switch to use qemu_receive_packet_iov() for loopback -Bugzilla: 1932917 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth - -From: Jason Wang - -This patch switches to use qemu_receive_receive_iov() which can detect -reentrancy and return early. - -This is intended to address CVE-2021-3416. - -Cc: Prasad J Pandit -Cc: qemu-stable@nongnu.org -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Jason Wang - -(cherry picked from commit 8c552542b81e56ff532dd27ec6e5328954bdda73) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/net_tx_pkt.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c -index 54d4c3bbd0..646cdfaf4d 100644 ---- a/hw/net/net_tx_pkt.c -+++ b/hw/net/net_tx_pkt.c -@@ -544,7 +544,7 @@ static inline void net_tx_pkt_sendv(struct NetTxPkt *pkt, - NetClientState *nc, const struct iovec *iov, int iov_cnt) - { - if (pkt->is_loopback) { -- nc->info->receive_iov(nc, iov, iov_cnt); -+ qemu_receive_packet_iov(nc, iov, iov_cnt); - } else { - qemu_sendv_packet(nc, iov, iov_cnt); - } --- -2.27.0 - diff --git a/SOURCES/kvm-udp-check-upd_input-buffer-size.patch b/SOURCES/kvm-udp-check-upd_input-buffer-size.patch deleted file mode 100644 index 0f3c6f3..0000000 --- a/SOURCES/kvm-udp-check-upd_input-buffer-size.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 1b8aa33b218a8ff3e8aa2f1b6875df40fd70f0ed Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Thu, 29 Jul 2021 04:56:40 -0400 -Subject: [PATCH 11/14] udp: check upd_input buffer size -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20210708082537.1550263-8-marcandre.lureau@redhat.com> -Patchwork-id: 101826 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 7/8] udp: check upd_input buffer size -Bugzilla: 1970853 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi - -From: Marc-André Lureau - -Fixes: CVE-2021-3594 -Fixes: https://gitlab.freedesktop.org/slirp/libslirp/-/issues/47 - -Signed-off-by: Marc-André Lureau - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1970853 - -(cherry picked from commit 74572be49247c8c5feae7c6e0b50c4f569ca9824) -Signed-off-by: Marc-André Lureau -Signed-off-by: Miroslav Rezanina ---- - slirp/src/udp.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/slirp/src/udp.c b/slirp/src/udp.c -index ae23ba4b2a..86142bba14 100644 ---- a/slirp/src/udp.c -+++ b/slirp/src/udp.c -@@ -90,7 +90,10 @@ void udp_input(register struct mbuf *m, int iphlen) - /* - * Get IP and UDP header together in first mbuf. - */ -- ip = mtod(m, struct ip *); -+ ip = mtod_check(m, iphlen + sizeof(struct udphdr)); -+ if (ip == NULL) { -+ goto bad; -+ } - uh = (struct udphdr *)((char *)ip + iphlen); - - /* --- -2.27.0 - diff --git a/SOURCES/kvm-upd6-check-udp6_input-buffer-size.patch b/SOURCES/kvm-upd6-check-udp6_input-buffer-size.patch deleted file mode 100644 index 2aa3a24..0000000 --- a/SOURCES/kvm-upd6-check-udp6_input-buffer-size.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 6808086932ddc83fd748c46fea495e7004299b55 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Thu, 29 Jul 2021 04:56:31 -0400 -Subject: [PATCH 08/14] upd6: check udp6_input buffer size -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20210708082537.1550263-5-marcandre.lureau@redhat.com> -Patchwork-id: 101822 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 4/8] upd6: check udp6_input buffer size -Bugzilla: 1970835 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi - -From: Marc-André Lureau - -Fixes: CVE-2021-3593 -Fixes: https://gitlab.freedesktop.org/slirp/libslirp/-/issues/45 - -Signed-off-by: Marc-André Lureau - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1970835 - -(cherry picked from commit de71c15de66ba9350bf62c45b05f8fbff166517b) -Signed-off-by: Marc-André Lureau -Signed-off-by: Miroslav Rezanina ---- - slirp/src/udp6.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/slirp/src/udp6.c b/slirp/src/udp6.c -index 6f9486bbca..8c490e4d10 100644 ---- a/slirp/src/udp6.c -+++ b/slirp/src/udp6.c -@@ -28,7 +28,10 @@ void udp6_input(struct mbuf *m) - ip = mtod(m, struct ip6 *); - m->m_len -= iphlen; - m->m_data += iphlen; -- uh = mtod(m, struct udphdr *); -+ uh = mtod_check(m, sizeof(struct udphdr)); -+ if (uh == NULL) { -+ goto bad; -+ } - m->m_len += iphlen; - m->m_data -= iphlen; - --- -2.27.0 - diff --git a/SOURCES/kvm-usb-fix-setup_len-init-CVE-2020-14364.patch b/SOURCES/kvm-usb-fix-setup_len-init-CVE-2020-14364.patch deleted file mode 100644 index 5e63299..0000000 --- a/SOURCES/kvm-usb-fix-setup_len-init-CVE-2020-14364.patch +++ /dev/null @@ -1,102 +0,0 @@ -From feb16ff29a13a4286389bb8b9d4f541aab9b84f1 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Thu, 3 Sep 2020 15:27:13 -0400 -Subject: [PATCH] usb: fix setup_len init (CVE-2020-14364) - -RH-Author: Jon Maloy -Message-id: <20200903152713.1420531-2-jmaloy@redhat.com> -Patchwork-id: 98271 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 1/1] usb: fix setup_len init (CVE-2020-14364) -Bugzilla: 1869710 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Thomas Huth -RH-Acked-by: Gerd Hoffmann - -From: Gerd Hoffmann - -Store calculated setup_len in a local variable, verify it, and only -write it to the struct (USBDevice->setup_len) in case it passed the -sanity checks. - -This prevents other code (do_token_{in,out} functions specifically) -from working with invalid USBDevice->setup_len values and overrunning -the USBDevice->setup_buf[] buffer. - -Fixes: CVE-2020-14364 -Signed-off-by: Gerd Hoffmann -Tested-by: Gonglei -Reviewed-by: Li Qiang -Message-id: 20200825053636.29648-1-kraxel@redhat.com -(cherry picked from commit b946434f2659a182afc17e155be6791ebfb302eb) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - hw/usb/core.c | 16 ++++++++++------ - 1 file changed, 10 insertions(+), 6 deletions(-) - -diff --git a/hw/usb/core.c b/hw/usb/core.c -index 5abd128b6b..5234dcc73f 100644 ---- a/hw/usb/core.c -+++ b/hw/usb/core.c -@@ -129,6 +129,7 @@ void usb_wakeup(USBEndpoint *ep, unsigned int stream) - static void do_token_setup(USBDevice *s, USBPacket *p) - { - int request, value, index; -+ unsigned int setup_len; - - if (p->iov.size != 8) { - p->status = USB_RET_STALL; -@@ -138,14 +139,15 @@ static void do_token_setup(USBDevice *s, USBPacket *p) - usb_packet_copy(p, s->setup_buf, p->iov.size); - s->setup_index = 0; - p->actual_length = 0; -- s->setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; -- if (s->setup_len > sizeof(s->data_buf)) { -+ setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; -+ if (setup_len > sizeof(s->data_buf)) { - fprintf(stderr, - "usb_generic_handle_packet: ctrl buffer too small (%d > %zu)\n", -- s->setup_len, sizeof(s->data_buf)); -+ setup_len, sizeof(s->data_buf)); - p->status = USB_RET_STALL; - return; - } -+ s->setup_len = setup_len; - - request = (s->setup_buf[0] << 8) | s->setup_buf[1]; - value = (s->setup_buf[3] << 8) | s->setup_buf[2]; -@@ -259,26 +261,28 @@ static void do_token_out(USBDevice *s, USBPacket *p) - static void do_parameter(USBDevice *s, USBPacket *p) - { - int i, request, value, index; -+ unsigned int setup_len; - - for (i = 0; i < 8; i++) { - s->setup_buf[i] = p->parameter >> (i*8); - } - - s->setup_state = SETUP_STATE_PARAM; -- s->setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; - s->setup_index = 0; - - request = (s->setup_buf[0] << 8) | s->setup_buf[1]; - value = (s->setup_buf[3] << 8) | s->setup_buf[2]; - index = (s->setup_buf[5] << 8) | s->setup_buf[4]; - -- if (s->setup_len > sizeof(s->data_buf)) { -+ setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; -+ if (setup_len > sizeof(s->data_buf)) { - fprintf(stderr, - "usb_generic_handle_packet: ctrl buffer too small (%d > %zu)\n", -- s->setup_len, sizeof(s->data_buf)); -+ setup_len, sizeof(s->data_buf)); - p->status = USB_RET_STALL; - return; - } -+ s->setup_len = setup_len; - - if (p->pid == USB_TOKEN_OUT) { - usb_packet_copy(p, s->data_buf, s->setup_len); --- -2.27.0 - diff --git a/SOURCES/kvm-usbredir-Prevent-recursion-in-usbredir_write.patch b/SOURCES/kvm-usbredir-Prevent-recursion-in-usbredir_write.patch deleted file mode 100644 index 8f08256..0000000 --- a/SOURCES/kvm-usbredir-Prevent-recursion-in-usbredir_write.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 8f6311159977b8ee4b78172caa411d3cee4d2ae5 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 14 Jan 2020 20:23:30 +0000 -Subject: [PATCH 4/5] usbredir: Prevent recursion in usbredir_write -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200114202331.51831-2-dgilbert@redhat.com> -Patchwork-id: 93344 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] usbredir: Prevent recursion in usbredir_write -Bugzilla: 1790844 -RH-Acked-by: Peter Xu -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Gerd Hoffmann - -From: "Dr. David Alan Gilbert" - -I've got a case where usbredir_write manages to call back into itself -via spice; this patch causes the recursion to fail (0 bytes) the write; -this seems to avoid the deadlock I was previously seeing. - -I can't say I fully understand the interaction of usbredir and spice; -but there are a few similar guards in spice and usbredir -to catch other cases especially onces also related to spice_server_char_device_wakeup - -This case seems to be triggered by repeated migration+repeated -reconnection of the viewer; but my debugging suggests the migration -finished before this hits. - -The backtrace of the hang looks like: - reds_handle_ticket - reds_handle_other_links - reds_channel_do_link - red_channel_connect - spicevmc_connect - usbredir_create_parser - usbredirparser_do_write - usbredir_write - qemu_chr_fe_write - qemu_chr_write - qemu_chr_write_buffer - spice_chr_write - spice_server_char_device_wakeup - red_char_device_wakeup - red_char_device_write_to_device - vmc_write - usbredirparser_do_write - usbredir_write - qemu_chr_fe_write - qemu_chr_write - qemu_chr_write_buffer - qemu_mutex_lock_impl - -and we fail as we land through qemu_chr_write_buffer's lock -twice. - -Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1752320 - -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20191218113012.13331-1-dgilbert@redhat.com> -Signed-off-by: Gerd Hoffmann -(cherry picked from commit 394642a8d3742c885e397d5bb5ee0ec40743cdc6) -Signed-off-by: Danilo C. L. de Paula ---- - hw/usb/redirect.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c -index e0f5ca6..97f2c3a 100644 ---- a/hw/usb/redirect.c -+++ b/hw/usb/redirect.c -@@ -113,6 +113,7 @@ struct USBRedirDevice { - /* Properties */ - CharBackend cs; - bool enable_streams; -+ bool in_write; - uint8_t debug; - int32_t bootindex; - char *filter_str; -@@ -290,6 +291,13 @@ static int usbredir_write(void *priv, uint8_t *data, int count) - return 0; - } - -+ /* Recursion check */ -+ if (dev->in_write) { -+ DPRINTF("usbredir_write recursion\n"); -+ return 0; -+ } -+ dev->in_write = true; -+ - r = qemu_chr_fe_write(&dev->cs, data, count); - if (r < count) { - if (!dev->watch) { -@@ -300,6 +308,7 @@ static int usbredir_write(void *priv, uint8_t *data, int count) - r = 0; - } - } -+ dev->in_write = false; - return r; - } - --- -1.8.3.1 - diff --git a/SOURCES/kvm-util-Introduce-qemu_get_host_name.patch b/SOURCES/kvm-util-Introduce-qemu_get_host_name.patch deleted file mode 100644 index da21888..0000000 --- a/SOURCES/kvm-util-Introduce-qemu_get_host_name.patch +++ /dev/null @@ -1,123 +0,0 @@ -From 41510fba34cda98cb85a8d04e46dcfdd9a91aa61 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Thu, 24 Dec 2020 12:53:03 -0500 -Subject: [PATCH 3/5] util: Introduce qemu_get_host_name() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20201224125304.62697-3-marcandre.lureau@redhat.com> -Patchwork-id: 100499 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 2/3] util: Introduce qemu_get_host_name() -Bugzilla: 1910326 -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Philippe Mathieu-Daudé - -From: Michal Privoznik - -This function offers operating system agnostic way to fetch host -name. It is implemented for both POSIX-like and Windows systems. - -Signed-off-by: Michal Privoznik -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Daniel P. Berrangé -Cc: qemu-stable@nongnu.org -Signed-off-by: Michael Roth - -(cherry picked from commit e47f4765afcab2b78dfa5b0115abf64d1d49a5d3) -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - include/qemu/osdep.h | 10 ++++++++++ - util/oslib-posix.c | 35 +++++++++++++++++++++++++++++++++++ - util/oslib-win32.c | 13 +++++++++++++ - 3 files changed, 58 insertions(+) - -diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h -index 0f97d68586a..d427e81a427 100644 ---- a/include/qemu/osdep.h -+++ b/include/qemu/osdep.h -@@ -620,4 +620,14 @@ static inline void qemu_reset_optind(void) - #endif - } - -+/** -+ * qemu_get_host_name: -+ * @errp: Error object -+ * -+ * Operating system agnostic way of querying host name. -+ * -+ * Returns allocated hostname (caller should free), NULL on failure. -+ */ -+char *qemu_get_host_name(Error **errp); -+ - #endif -diff --git a/util/oslib-posix.c b/util/oslib-posix.c -index 5a291cc9820..8f88e4dbe10 100644 ---- a/util/oslib-posix.c -+++ b/util/oslib-posix.c -@@ -726,3 +726,38 @@ void sigaction_invoke(struct sigaction *action, - } - action->sa_sigaction(info->ssi_signo, &si, NULL); - } -+ -+#ifndef HOST_NAME_MAX -+# ifdef _POSIX_HOST_NAME_MAX -+# define HOST_NAME_MAX _POSIX_HOST_NAME_MAX -+# else -+# define HOST_NAME_MAX 255 -+# endif -+#endif -+ -+char *qemu_get_host_name(Error **errp) -+{ -+ long len = -1; -+ g_autofree char *hostname = NULL; -+ -+#ifdef _SC_HOST_NAME_MAX -+ len = sysconf(_SC_HOST_NAME_MAX); -+#endif /* _SC_HOST_NAME_MAX */ -+ -+ if (len < 0) { -+ len = HOST_NAME_MAX; -+ } -+ -+ /* Unfortunately, gethostname() below does not guarantee a -+ * NULL terminated string. Therefore, allocate one byte more -+ * to be sure. */ -+ hostname = g_new0(char, len + 1); -+ -+ if (gethostname(hostname, len) < 0) { -+ error_setg_errno(errp, errno, -+ "cannot get hostname"); -+ return NULL; -+ } -+ -+ return g_steal_pointer(&hostname); -+} -diff --git a/util/oslib-win32.c b/util/oslib-win32.c -index e9b14ab1784..3b49d272972 100644 ---- a/util/oslib-win32.c -+++ b/util/oslib-win32.c -@@ -808,3 +808,16 @@ bool qemu_write_pidfile(const char *filename, Error **errp) - } - return true; - } -+ -+char *qemu_get_host_name(Error **errp) -+{ -+ wchar_t tmp[MAX_COMPUTERNAME_LENGTH + 1]; -+ DWORD size = G_N_ELEMENTS(tmp); -+ -+ if (GetComputerNameW(tmp, &size) == 0) { -+ error_setg_win32(errp, GetLastError(), "failed close handle"); -+ return NULL; -+ } -+ -+ return g_utf16_to_utf8(tmp, size, NULL, NULL, NULL); -+} --- -2.27.0 - diff --git a/SOURCES/kvm-util-add-slirp_fmt-helpers.patch b/SOURCES/kvm-util-add-slirp_fmt-helpers.patch deleted file mode 100644 index 31af599..0000000 --- a/SOURCES/kvm-util-add-slirp_fmt-helpers.patch +++ /dev/null @@ -1,140 +0,0 @@ -From 5dc50c6bca059a9cda6677b1fd0187df1de78ed7 Mon Sep 17 00:00:00 2001 -From: jmaloy -Date: Thu, 13 Feb 2020 15:50:48 +0000 -Subject: [PATCH 2/7] util: add slirp_fmt() helpers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: jmaloy -Message-id: <20200213155049.3936-2-jmaloy@redhat.com> -Patchwork-id: 93824 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] util: add slirp_fmt() helpers -Bugzilla: 1798994 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi - -From: Marc-André Lureau - -Various calls to snprintf() in libslirp assume that snprintf() returns -"only" the number of bytes written (excluding terminating NUL). - -https://pubs.opengroup.org/onlinepubs/9699919799/functions/snprintf.html#tag_16_159_04 - -"Upon successful completion, the snprintf() function shall return the -number of bytes that would be written to s had n been sufficiently -large excluding the terminating null byte." - -Introduce slirp_fmt() that handles several pathological cases the -way libslirp usually expect: - -- treat error as fatal (instead of silently returning -1) - -- fmt0() will always \0 end - -- return the number of bytes actually written (instead of what would -have been written, which would usually result in OOB later), including -the ending \0 for fmt0() - -- warn if truncation happened (instead of ignoring) - -Other less common cases can still be handled with strcpy/snprintf() etc. - -Signed-off-by: Marc-André Lureau -Reviewed-by: Samuel Thibault -Message-Id: <20200127092414.169796-2-marcandre.lureau@redhat.com> -(cherry picked from libslirp commit 30648c03b27fb8d9611b723184216cd3174b6775) -Signed-off-by: Jon Maloy - -Signed-off-by: Danilo C. L. de Paula ---- - slirp/src/util.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - slirp/src/util.h | 3 +++ - 2 files changed, 65 insertions(+) - -diff --git a/slirp/src/util.c b/slirp/src/util.c -index e596087..e3b6257 100644 ---- a/slirp/src/util.c -+++ b/slirp/src/util.c -@@ -364,3 +364,65 @@ void slirp_pstrcpy(char *buf, int buf_size, const char *str) - } - *q = '\0'; - } -+ -+static int slirp_vsnprintf(char *str, size_t size, -+ const char *format, va_list args) -+{ -+ int rv = vsnprintf(str, size, format, args); -+ -+ if (rv < 0) { -+ g_error("vsnprintf() failed: %s", g_strerror(errno)); -+ } -+ -+ return rv; -+} -+ -+/* -+ * A snprintf()-like function that: -+ * - returns the number of bytes written (excluding optional \0-ending) -+ * - dies on error -+ * - warn on truncation -+ */ -+int slirp_fmt(char *str, size_t size, const char *format, ...) -+{ -+ va_list args; -+ int rv; -+ -+ va_start(args, format); -+ rv = slirp_vsnprintf(str, size, format, args); -+ va_end(args); -+ -+ if (rv > size) { -+ g_critical("vsnprintf() truncation"); -+ } -+ -+ return MIN(rv, size); -+} -+ -+/* -+ * A snprintf()-like function that: -+ * - always \0-end (unless size == 0) -+ * - returns the number of bytes actually written, including \0 ending -+ * - dies on error -+ * - warn on truncation -+ */ -+int slirp_fmt0(char *str, size_t size, const char *format, ...) -+{ -+ va_list args; -+ int rv; -+ -+ va_start(args, format); -+ rv = slirp_vsnprintf(str, size, format, args); -+ va_end(args); -+ -+ if (rv >= size) { -+ g_critical("vsnprintf() truncation"); -+ if (size > 0) -+ str[size - 1] = '\0'; -+ rv = size; -+ } else { -+ rv += 1; /* include \0 */ -+ } -+ -+ return rv; -+} -diff --git a/slirp/src/util.h b/slirp/src/util.h -index 3c6223c..0558dfc 100644 ---- a/slirp/src/util.h -+++ b/slirp/src/util.h -@@ -177,4 +177,7 @@ static inline int slirp_socket_set_fast_reuse(int fd) - - void slirp_pstrcpy(char *buf, int buf_size, const char *str); - -+int slirp_fmt(char *str, size_t size, const char *format, ...); -+int slirp_fmt0(char *str, size_t size, const char *format, ...); -+ - #endif --- -1.8.3.1 - diff --git a/SOURCES/kvm-vfio-Create-shared-routine-for-scanning-info-capabil.patch b/SOURCES/kvm-vfio-Create-shared-routine-for-scanning-info-capabil.patch deleted file mode 100644 index 8e58473..0000000 --- a/SOURCES/kvm-vfio-Create-shared-routine-for-scanning-info-capabil.patch +++ /dev/null @@ -1,79 +0,0 @@ -From f53c2c68db7780353a915072f8c953a74149b1f7 Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Tue, 19 Jan 2021 12:50:42 -0500 -Subject: [PATCH 3/7] vfio: Create shared routine for scanning info - capabilities -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cornelia Huck -Message-id: <20210119125046.472811-4-cohuck@redhat.com> -Patchwork-id: 100678 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 3/7] vfio: Create shared routine for scanning info capabilities -Bugzilla: 1905391 -RH-Acked-by: David Hildenbrand -RH-Acked-by: Auger Eric -RH-Acked-by: Thomas Huth - -From: Matthew Rosato - -Rather than duplicating the same loop in multiple locations, -create a static function to do the work. - -Signed-off-by: Matthew Rosato -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Cornelia Huck -Signed-off-by: Alex Williamson -(cherry picked from commit 3ab7a0b40d4be5ade3b61d4afd1518193b199423) -Signed-off-by: Cornelia Huck -Signed-off-by: Danilo C. L. de Paula ---- - hw/vfio/common.c | 21 +++++++++++++-------- - 1 file changed, 13 insertions(+), 8 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 5ca11488d67..77d62d2dcdf 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -826,17 +826,12 @@ static void vfio_listener_release(VFIOContainer *container) - } - } - --struct vfio_info_cap_header * --vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id) -+static struct vfio_info_cap_header * -+vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id) - { - struct vfio_info_cap_header *hdr; -- void *ptr = info; -- -- if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) { -- return NULL; -- } - -- for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) { -+ for (hdr = ptr + cap_offset; hdr != ptr; hdr = ptr + hdr->next) { - if (hdr->id == id) { - return hdr; - } -@@ -845,6 +840,16 @@ vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id) - return NULL; - } - -+struct vfio_info_cap_header * -+vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id) -+{ -+ if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) { -+ return NULL; -+ } -+ -+ return vfio_get_cap((void *)info, info->cap_offset, id); -+} -+ - static int vfio_setup_region_sparse_mmaps(VFIORegion *region, - struct vfio_region_info *info) - { --- -2.27.0 - diff --git a/SOURCES/kvm-vfio-Find-DMA-available-capability.patch b/SOURCES/kvm-vfio-Find-DMA-available-capability.patch deleted file mode 100644 index b81bcc4..0000000 --- a/SOURCES/kvm-vfio-Find-DMA-available-capability.patch +++ /dev/null @@ -1,91 +0,0 @@ -From e6147c5a23a75361b1374bfb4b96403d243b5c38 Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Tue, 19 Jan 2021 12:50:43 -0500 -Subject: [PATCH 4/7] vfio: Find DMA available capability - -RH-Author: Cornelia Huck -Message-id: <20210119125046.472811-5-cohuck@redhat.com> -Patchwork-id: 100677 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 4/7] vfio: Find DMA available capability -Bugzilla: 1905391 -RH-Acked-by: David Hildenbrand -RH-Acked-by: Auger Eric -RH-Acked-by: Thomas Huth - -From: Matthew Rosato - -The underlying host may be limiting the number of outstanding DMA -requests for type 1 IOMMU. Add helper functions to check for the -DMA available capability and retrieve the current number of DMA -mappings allowed. - -Signed-off-by: Matthew Rosato -Reviewed-by: Cornelia Huck -[aw: vfio_get_info_dma_avail moved inside CONFIG_LINUX] -Signed-off-by: Alex Williamson -(cherry picked from commit 7486a62845b1e12011dd99973e4739f69d57cd38) -Signed-off-by: Cornelia Huck -Signed-off-by: Danilo C. L. de Paula ---- - hw/vfio/common.c | 31 +++++++++++++++++++++++++++++++ - include/hw/vfio/vfio-common.h | 2 ++ - 2 files changed, 33 insertions(+) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 77d62d2dcdf..23efdfadebd 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -850,6 +850,37 @@ vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id) - return vfio_get_cap((void *)info, info->cap_offset, id); - } - -+static struct vfio_info_cap_header * -+vfio_get_iommu_type1_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) -+{ -+ if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) { -+ return NULL; -+ } -+ -+ return vfio_get_cap((void *)info, info->cap_offset, id); -+} -+ -+bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, -+ unsigned int *avail) -+{ -+ struct vfio_info_cap_header *hdr; -+ struct vfio_iommu_type1_info_dma_avail *cap; -+ -+ /* If the capability cannot be found, assume no DMA limiting */ -+ hdr = vfio_get_iommu_type1_info_cap(info, -+ VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL); -+ if (hdr == NULL) { -+ return false; -+ } -+ -+ if (avail != NULL) { -+ cap = (void *) hdr; -+ *avail = cap->avail; -+ } -+ -+ return true; -+} -+ - static int vfio_setup_region_sparse_mmaps(VFIORegion *region, - struct vfio_region_info *info) - { -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index fd564209ac7..aa6cbe4a998 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -191,6 +191,8 @@ int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type, - bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type); - struct vfio_info_cap_header * - vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id); -+bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, -+ unsigned int *avail); - #endif - extern const MemoryListener vfio_prereg_listener; - --- -2.27.0 - diff --git a/SOURCES/kvm-vfio-ccw-Add-support-for-the-CRW-region-and-IRQ.patch b/SOURCES/kvm-vfio-ccw-Add-support-for-the-CRW-region-and-IRQ.patch deleted file mode 100644 index c515676..0000000 --- a/SOURCES/kvm-vfio-ccw-Add-support-for-the-CRW-region-and-IRQ.patch +++ /dev/null @@ -1,175 +0,0 @@ -From 58edd0fba4d9e98edfeb16139467d6035a1f4e61 Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Tue, 23 Jun 2020 09:25:42 -0400 -Subject: [PATCH 08/12] vfio-ccw: Add support for the CRW region and IRQ - -RH-Author: Cornelia Huck -Message-id: <20200623092543.358315-9-cohuck@redhat.com> -Patchwork-id: 97698 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 8/9] vfio-ccw: Add support for the CRW region and IRQ -Bugzilla: 1660916 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: David Hildenbrand -RH-Acked-by: Thomas Huth - -From: Farhan Ali - -The crw region can be used to obtain information about -Channel Report Words (CRW) from vfio-ccw driver. - -Currently only channel-path related CRWs are passed to -QEMU from vfio-ccw driver. - -Signed-off-by: Farhan Ali -Signed-off-by: Eric Farman -Reviewed-by: Cornelia Huck -Message-Id: <20200505125757.98209-7-farman@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit f030532f2ad6eeb200034915e9c6357cce81b538) -Signed-off-by: Cornelia Huck -Signed-off-by: Danilo C. L. de Paula ---- - hw/vfio/ccw.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 73 insertions(+) - -diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c -index 94a0d9840d..b72a505893 100644 ---- a/hw/vfio/ccw.c -+++ b/hw/vfio/ccw.c -@@ -44,7 +44,11 @@ struct VFIOCCWDevice { - uint64_t schib_region_size; - uint64_t schib_region_offset; - struct ccw_schib_region *schib_region; -+ uint64_t crw_region_size; -+ uint64_t crw_region_offset; -+ struct ccw_crw_region *crw_region; - EventNotifier io_notifier; -+ EventNotifier crw_notifier; - bool force_orb_pfch; - bool warned_orb_pfch; - }; -@@ -254,6 +258,44 @@ static void vfio_ccw_reset(DeviceState *dev) - ioctl(vcdev->vdev.fd, VFIO_DEVICE_RESET); - } - -+static void vfio_ccw_crw_read(VFIOCCWDevice *vcdev) -+{ -+ struct ccw_crw_region *region = vcdev->crw_region; -+ CRW crw; -+ int size; -+ -+ /* Keep reading CRWs as long as data is returned */ -+ do { -+ memset(region, 0, sizeof(*region)); -+ size = pread(vcdev->vdev.fd, region, vcdev->crw_region_size, -+ vcdev->crw_region_offset); -+ -+ if (size == -1) { -+ error_report("vfio-ccw: Read crw region failed with errno=%d", -+ errno); -+ break; -+ } -+ -+ if (region->crw == 0) { -+ /* No more CRWs to queue */ -+ break; -+ } -+ -+ memcpy(&crw, ®ion->crw, sizeof(CRW)); -+ -+ css_crw_add_to_queue(crw); -+ } while (1); -+} -+ -+static void vfio_ccw_crw_notifier_handler(void *opaque) -+{ -+ VFIOCCWDevice *vcdev = opaque; -+ -+ while (event_notifier_test_and_clear(&vcdev->crw_notifier)) { -+ vfio_ccw_crw_read(vcdev); -+ } -+} -+ - static void vfio_ccw_io_notifier_handler(void *opaque) - { - VFIOCCWDevice *vcdev = opaque; -@@ -340,6 +382,10 @@ static void vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev, - notifier = &vcdev->io_notifier; - fd_read = vfio_ccw_io_notifier_handler; - break; -+ case VFIO_CCW_CRW_IRQ_INDEX: -+ notifier = &vcdev->crw_notifier; -+ fd_read = vfio_ccw_crw_notifier_handler; -+ break; - default: - error_setg(errp, "vfio: Unsupported device irq(%d)", irq); - return; -@@ -391,6 +437,9 @@ static void vfio_ccw_unregister_irq_notifier(VFIOCCWDevice *vcdev, - case VFIO_CCW_IO_IRQ_INDEX: - notifier = &vcdev->io_notifier; - break; -+ case VFIO_CCW_CRW_IRQ_INDEX: -+ notifier = &vcdev->crw_notifier; -+ break; - default: - error_report("vfio: Unsupported device irq(%d)", irq); - return; -@@ -468,10 +517,24 @@ static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) - vcdev->schib_region = g_malloc(info->size); - } - -+ ret = vfio_get_dev_region_info(vdev, VFIO_REGION_TYPE_CCW, -+ VFIO_REGION_SUBTYPE_CCW_CRW, &info); -+ -+ if (!ret) { -+ vcdev->crw_region_size = info->size; -+ if (sizeof(*vcdev->crw_region) != vcdev->crw_region_size) { -+ error_setg(errp, "vfio: Unexpected size of the CRW region"); -+ goto out_err; -+ } -+ vcdev->crw_region_offset = info->offset; -+ vcdev->crw_region = g_malloc(info->size); -+ } -+ - g_free(info); - return; - - out_err: -+ g_free(vcdev->crw_region); - g_free(vcdev->schib_region); - g_free(vcdev->async_cmd_region); - g_free(vcdev->io_region); -@@ -481,6 +544,7 @@ out_err: - - static void vfio_ccw_put_region(VFIOCCWDevice *vcdev) - { -+ g_free(vcdev->crw_region); - g_free(vcdev->schib_region); - g_free(vcdev->async_cmd_region); - g_free(vcdev->io_region); -@@ -596,6 +660,14 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp) - goto out_notifier_err; - } - -+ if (vcdev->crw_region) { -+ vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX, &err); -+ if (err) { -+ vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX); -+ goto out_notifier_err; -+ } -+ } -+ - return; - - out_notifier_err: -@@ -620,6 +692,7 @@ static void vfio_ccw_unrealize(DeviceState *dev, Error **errp) - S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); - VFIOGroup *group = vcdev->vdev.group; - -+ vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX); - vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX); - vfio_ccw_put_region(vcdev); - vfio_ccw_put_device(vcdev); --- -2.27.0 - diff --git a/SOURCES/kvm-vfio-ccw-Add-support-for-the-schib-region.patch b/SOURCES/kvm-vfio-ccw-Add-support-for-the-schib-region.patch deleted file mode 100644 index 667e5cf..0000000 --- a/SOURCES/kvm-vfio-ccw-Add-support-for-the-schib-region.patch +++ /dev/null @@ -1,254 +0,0 @@ -From b73e3e52f76db823d7bffe3f705f575ca413863b Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Tue, 23 Jun 2020 09:25:39 -0400 -Subject: [PATCH 05/12] vfio-ccw: Add support for the schib region - -RH-Author: Cornelia Huck -Message-id: <20200623092543.358315-6-cohuck@redhat.com> -Patchwork-id: 97697 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 5/9] vfio-ccw: Add support for the schib region -Bugzilla: 1660916 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: David Hildenbrand -RH-Acked-by: Thomas Huth - -From: Farhan Ali - -The schib region can be used to obtain the latest SCHIB from the host -passthrough subchannel. Since the guest SCHIB is virtualized, -we currently only update the path related information so that the -guest is aware of any path related changes when it issues the -'stsch' instruction. - -Signed-off-by: Farhan Ali -Signed-off-by: Eric Farman -Reviewed-by: Cornelia Huck -Message-Id: <20200505125757.98209-4-farman@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 46ea3841edaff2a7657b8f6c7f474e5e3850cd62) -Signed-off-by: Cornelia Huck -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/css.c | 13 ++++++-- - hw/s390x/s390-ccw.c | 21 +++++++++++++ - hw/vfio/ccw.c | 63 +++++++++++++++++++++++++++++++++++++ - include/hw/s390x/css.h | 3 +- - include/hw/s390x/s390-ccw.h | 1 + - target/s390x/ioinst.c | 3 +- - 6 files changed, 99 insertions(+), 5 deletions(-) - -diff --git a/hw/s390x/css.c b/hw/s390x/css.c -index 844caab408..71fd3f9a00 100644 ---- a/hw/s390x/css.c -+++ b/hw/s390x/css.c -@@ -1335,11 +1335,20 @@ static void copy_schib_to_guest(SCHIB *dest, const SCHIB *src) - } - } - --int css_do_stsch(SubchDev *sch, SCHIB *schib) -+IOInstEnding css_do_stsch(SubchDev *sch, SCHIB *schib) - { -+ int ret; -+ -+ /* -+ * For some subchannels, we may want to update parts of -+ * the schib (e.g., update path masks from the host device -+ * for passthrough subchannels). -+ */ -+ ret = s390_ccw_store(sch); -+ - /* Use current status. */ - copy_schib_to_guest(schib, &sch->curr_status); -- return 0; -+ return ret; - } - - static void copy_pmcw_from_guest(PMCW *dest, const PMCW *src) -diff --git a/hw/s390x/s390-ccw.c b/hw/s390x/s390-ccw.c -index 0c5a5b60bd..75b788c95e 100644 ---- a/hw/s390x/s390-ccw.c -+++ b/hw/s390x/s390-ccw.c -@@ -51,6 +51,27 @@ int s390_ccw_clear(SubchDev *sch) - return cdc->handle_clear(sch); - } - -+IOInstEnding s390_ccw_store(SubchDev *sch) -+{ -+ S390CCWDeviceClass *cdc = NULL; -+ int ret = IOINST_CC_EXPECTED; -+ -+ /* -+ * This code is called for both virtual and passthrough devices, -+ * but only applies to to the latter. This ugly check makes that -+ * distinction for us. -+ */ -+ if (object_dynamic_cast(OBJECT(sch->driver_data), TYPE_S390_CCW)) { -+ cdc = S390_CCW_DEVICE_GET_CLASS(sch->driver_data); -+ } -+ -+ if (cdc && cdc->handle_store) { -+ ret = cdc->handle_store(sch); -+ } -+ -+ return ret; -+} -+ - static void s390_ccw_get_dev_info(S390CCWDevice *cdev, - char *sysfsdev, - Error **errp) -diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c -index 17eb4c4048..859ad646f1 100644 ---- a/hw/vfio/ccw.c -+++ b/hw/vfio/ccw.c -@@ -41,6 +41,9 @@ struct VFIOCCWDevice { - uint64_t async_cmd_region_size; - uint64_t async_cmd_region_offset; - struct ccw_cmd_region *async_cmd_region; -+ uint64_t schib_region_size; -+ uint64_t schib_region_offset; -+ struct ccw_schib_region *schib_region; - EventNotifier io_notifier; - bool force_orb_pfch; - bool warned_orb_pfch; -@@ -116,6 +119,51 @@ again: - } - } - -+static IOInstEnding vfio_ccw_handle_store(SubchDev *sch) -+{ -+ S390CCWDevice *cdev = sch->driver_data; -+ VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); -+ SCHIB *schib = &sch->curr_status; -+ struct ccw_schib_region *region = vcdev->schib_region; -+ SCHIB *s; -+ int ret; -+ -+ /* schib region not available so nothing else to do */ -+ if (!region) { -+ return IOINST_CC_EXPECTED; -+ } -+ -+ memset(region, 0, sizeof(*region)); -+ ret = pread(vcdev->vdev.fd, region, vcdev->schib_region_size, -+ vcdev->schib_region_offset); -+ -+ if (ret == -1) { -+ /* -+ * Device is probably damaged, but store subchannel does not -+ * have a nonzero cc defined for this scenario. Log an error, -+ * and presume things are otherwise fine. -+ */ -+ error_report("vfio-ccw: store region read failed with errno=%d", errno); -+ return IOINST_CC_EXPECTED; -+ } -+ -+ /* -+ * Selectively copy path-related bits of the SCHIB, -+ * rather than copying the entire struct. -+ */ -+ s = (SCHIB *)region->schib_area; -+ schib->pmcw.pnom = s->pmcw.pnom; -+ schib->pmcw.lpum = s->pmcw.lpum; -+ schib->pmcw.pam = s->pmcw.pam; -+ schib->pmcw.pom = s->pmcw.pom; -+ -+ if (s->scsw.flags & SCSW_FLAGS_MASK_PNO) { -+ schib->scsw.flags |= SCSW_FLAGS_MASK_PNO; -+ } -+ -+ return IOINST_CC_EXPECTED; -+} -+ - static int vfio_ccw_handle_clear(SubchDev *sch) - { - S390CCWDevice *cdev = sch->driver_data; -@@ -382,10 +430,23 @@ static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) - vcdev->async_cmd_region = g_malloc0(info->size); - } - -+ ret = vfio_get_dev_region_info(vdev, VFIO_REGION_TYPE_CCW, -+ VFIO_REGION_SUBTYPE_CCW_SCHIB, &info); -+ if (!ret) { -+ vcdev->schib_region_size = info->size; -+ if (sizeof(*vcdev->schib_region) != vcdev->schib_region_size) { -+ error_setg(errp, "vfio: Unexpected size of the schib region"); -+ goto out_err; -+ } -+ vcdev->schib_region_offset = info->offset; -+ vcdev->schib_region = g_malloc(info->size); -+ } -+ - g_free(info); - return; - - out_err: -+ g_free(vcdev->schib_region); - g_free(vcdev->async_cmd_region); - g_free(vcdev->io_region); - g_free(info); -@@ -394,6 +455,7 @@ out_err: - - static void vfio_ccw_put_region(VFIOCCWDevice *vcdev) - { -+ g_free(vcdev->schib_region); - g_free(vcdev->async_cmd_region); - g_free(vcdev->io_region); - } -@@ -569,6 +631,7 @@ static void vfio_ccw_class_init(ObjectClass *klass, void *data) - cdc->handle_request = vfio_ccw_handle_request; - cdc->handle_halt = vfio_ccw_handle_halt; - cdc->handle_clear = vfio_ccw_handle_clear; -+ cdc->handle_store = vfio_ccw_handle_store; - } - - static const TypeInfo vfio_ccw_info = { -diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h -index f46bcafb16..7e3a5e7433 100644 ---- a/include/hw/s390x/css.h -+++ b/include/hw/s390x/css.h -@@ -218,6 +218,7 @@ IOInstEnding do_subchannel_work_passthrough(SubchDev *sub); - - int s390_ccw_halt(SubchDev *sch); - int s390_ccw_clear(SubchDev *sch); -+IOInstEnding s390_ccw_store(SubchDev *sch); - - typedef enum { - CSS_IO_ADAPTER_VIRTIO = 0, -@@ -242,7 +243,7 @@ SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid, - uint16_t schid); - bool css_subch_visible(SubchDev *sch); - void css_conditional_io_interrupt(SubchDev *sch); --int css_do_stsch(SubchDev *sch, SCHIB *schib); -+IOInstEnding css_do_stsch(SubchDev *sch, SCHIB *schib); - bool css_schid_final(int m, uint8_t cssid, uint8_t ssid, uint16_t schid); - IOInstEnding css_do_msch(SubchDev *sch, const SCHIB *schib); - IOInstEnding css_do_xsch(SubchDev *sch); -diff --git a/include/hw/s390x/s390-ccw.h b/include/hw/s390x/s390-ccw.h -index fffb54562f..4a43803ef2 100644 ---- a/include/hw/s390x/s390-ccw.h -+++ b/include/hw/s390x/s390-ccw.h -@@ -37,6 +37,7 @@ typedef struct S390CCWDeviceClass { - IOInstEnding (*handle_request) (SubchDev *sch); - int (*handle_halt) (SubchDev *sch); - int (*handle_clear) (SubchDev *sch); -+ IOInstEnding (*handle_store) (SubchDev *sch); - } S390CCWDeviceClass; - - #endif -diff --git a/target/s390x/ioinst.c b/target/s390x/ioinst.c -index f40c35c6ff..b6be300cc4 100644 ---- a/target/s390x/ioinst.c -+++ b/target/s390x/ioinst.c -@@ -292,8 +292,7 @@ void ioinst_handle_stsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, - sch = css_find_subch(m, cssid, ssid, schid); - if (sch) { - if (css_subch_visible(sch)) { -- css_do_stsch(sch, &schib); -- cc = 0; -+ cc = css_do_stsch(sch, &schib); - } else { - /* Indicate no more subchannels in this css/ss */ - cc = 3; --- -2.27.0 - diff --git a/SOURCES/kvm-vfio-ccw-Connect-the-device-request-notifier.patch b/SOURCES/kvm-vfio-ccw-Connect-the-device-request-notifier.patch deleted file mode 100644 index 298fb29..0000000 --- a/SOURCES/kvm-vfio-ccw-Connect-the-device-request-notifier.patch +++ /dev/null @@ -1,128 +0,0 @@ -From db6a782f8b9ba062f195ff504b4d2f93e471fecc Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 11 May 2021 11:24:05 -0400 -Subject: [PATCH 2/5] vfio-ccw: Connect the device request notifier - -RH-Author: Thomas Huth -Message-id: <20210511112405.297037-3-thuth@redhat.com> -Patchwork-id: 101536 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 2/2] vfio-ccw: Connect the device request notifier -Bugzilla: 1940450 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -Now that the vfio-ccw code has a notifier interface to request that -a device be unplugged, let's wire that together. - -Signed-off-by: Eric Farman -Reviewed-by: Cornelia Huck -Message-Id: <20210104202057.48048-4-farman@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit b2f96f9e4f5fbc8f2770a436191cb328da4d5350) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1940450 -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - hw/vfio/ccw.c | 40 ++++++++++++++++++++++++++++++++++++---- - 1 file changed, 36 insertions(+), 4 deletions(-) - -diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c -index b72a505893..3d450fe1c9 100644 ---- a/hw/vfio/ccw.c -+++ b/hw/vfio/ccw.c -@@ -49,6 +49,7 @@ struct VFIOCCWDevice { - struct ccw_crw_region *crw_region; - EventNotifier io_notifier; - EventNotifier crw_notifier; -+ EventNotifier req_notifier; - bool force_orb_pfch; - bool warned_orb_pfch; - }; -@@ -287,6 +288,21 @@ static void vfio_ccw_crw_read(VFIOCCWDevice *vcdev) - } while (1); - } - -+static void vfio_ccw_req_notifier_handler(void *opaque) -+{ -+ VFIOCCWDevice *vcdev = opaque; -+ Error *err = NULL; -+ -+ if (!event_notifier_test_and_clear(&vcdev->req_notifier)) { -+ return; -+ } -+ -+ qdev_unplug(DEVICE(vcdev), &err); -+ if (err) { -+ warn_reportf_err(err, VFIO_MSG_PREFIX, vcdev->vdev.name); -+ } -+} -+ - static void vfio_ccw_crw_notifier_handler(void *opaque) - { - VFIOCCWDevice *vcdev = opaque; -@@ -386,6 +402,10 @@ static void vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev, - notifier = &vcdev->crw_notifier; - fd_read = vfio_ccw_crw_notifier_handler; - break; -+ case VFIO_CCW_REQ_IRQ_INDEX: -+ notifier = &vcdev->req_notifier; -+ fd_read = vfio_ccw_req_notifier_handler; -+ break; - default: - error_setg(errp, "vfio: Unsupported device irq(%d)", irq); - return; -@@ -440,6 +460,9 @@ static void vfio_ccw_unregister_irq_notifier(VFIOCCWDevice *vcdev, - case VFIO_CCW_CRW_IRQ_INDEX: - notifier = &vcdev->crw_notifier; - break; -+ case VFIO_CCW_REQ_IRQ_INDEX: -+ notifier = &vcdev->req_notifier; -+ break; - default: - error_report("vfio: Unsupported device irq(%d)", irq); - return; -@@ -657,20 +680,28 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp) - - vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX, &err); - if (err) { -- goto out_notifier_err; -+ goto out_io_notifier_err; - } - - if (vcdev->crw_region) { - vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX, &err); - if (err) { -- vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX); -- goto out_notifier_err; -+ goto out_crw_notifier_err; - } - } - -+ vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_REQ_IRQ_INDEX, &err); -+ if (err) { -+ goto out_req_notifier_err; -+ } -+ - return; - --out_notifier_err: -+out_req_notifier_err: -+ vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX); -+out_crw_notifier_err: -+ vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX); -+out_io_notifier_err: - vfio_ccw_put_region(vcdev); - out_region_err: - vfio_ccw_put_device(vcdev); -@@ -692,6 +723,7 @@ static void vfio_ccw_unrealize(DeviceState *dev, Error **errp) - S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); - VFIOGroup *group = vcdev->vdev.group; - -+ vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_REQ_IRQ_INDEX); - vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX); - vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX); - vfio_ccw_put_region(vcdev); --- -2.27.0 - diff --git a/SOURCES/kvm-vfio-ccw-Fix-error-message.patch b/SOURCES/kvm-vfio-ccw-Fix-error-message.patch deleted file mode 100644 index 86d2fdf..0000000 --- a/SOURCES/kvm-vfio-ccw-Fix-error-message.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 7258b1fabcd152c2ad9b61485b869a41d1bc64e2 Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Tue, 23 Jun 2020 09:25:35 -0400 -Subject: [PATCH 01/12] vfio-ccw: Fix error message -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cornelia Huck -Message-id: <20200623092543.358315-2-cohuck@redhat.com> -Patchwork-id: 97693 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 1/9] vfio-ccw: Fix error message -Bugzilla: 1660916 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: David Hildenbrand -RH-Acked-by: Thomas Huth -RH-Acked-by: Philippe Mathieu-Daudé - -From: Boris Fiuczynski - -Signed-off-by: Boris Fiuczynski -Reviewed-by: Eric Farman -Message-Id: <20191128143015.5231-1-fiuczy@linux.ibm.com> -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Cornelia Huck -(cherry picked from commit 91f751dc111b270b1e81d80ac92cf479e7620fa4) -Signed-off-by: Cornelia Huck -Signed-off-by: Danilo C. L. de Paula ---- - hw/vfio/ccw.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c -index 6863f6c69f..3b5520ae75 100644 ---- a/hw/vfio/ccw.c -+++ b/hw/vfio/ccw.c -@@ -102,7 +102,7 @@ again: - if (errno == EAGAIN) { - goto again; - } -- error_report("vfio-ccw: wirte I/O region failed with errno=%d", errno); -+ error_report("vfio-ccw: write I/O region failed with errno=%d", errno); - ret = -errno; - } else { - ret = region->ret_code; --- -2.27.0 - diff --git a/SOURCES/kvm-vfio-ccw-Refactor-ccw-irq-handler.patch b/SOURCES/kvm-vfio-ccw-Refactor-ccw-irq-handler.patch deleted file mode 100644 index 8a3514d..0000000 --- a/SOURCES/kvm-vfio-ccw-Refactor-ccw-irq-handler.patch +++ /dev/null @@ -1,155 +0,0 @@ -From ee9b03e774641fba8baaf85256706fcc5e8d8efa Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Tue, 23 Jun 2020 09:25:40 -0400 -Subject: [PATCH 06/12] vfio-ccw: Refactor ccw irq handler - -RH-Author: Cornelia Huck -Message-id: <20200623092543.358315-7-cohuck@redhat.com> -Patchwork-id: 97695 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 6/9] vfio-ccw: Refactor ccw irq handler -Bugzilla: 1660916 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: David Hildenbrand -RH-Acked-by: Thomas Huth - -From: Eric Farman - -Make it easier to add new ones in the future. - -Signed-off-by: Eric Farman -Reviewed-by: Cornelia Huck -Message-Id: <20200505125757.98209-5-farman@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 690e29b91102ac69810b35fe72cd90bc9fa1fff7) -Signed-off-by: Cornelia Huck -Signed-off-by: Danilo C. L. de Paula ---- - hw/vfio/ccw.c | 58 +++++++++++++++++++++++++++++++++++++-------------- - 1 file changed, 42 insertions(+), 16 deletions(-) - -diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c -index 859ad646f1..94a0d9840d 100644 ---- a/hw/vfio/ccw.c -+++ b/hw/vfio/ccw.c -@@ -324,22 +324,36 @@ read_err: - css_inject_io_interrupt(sch); - } - --static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp) -+static void vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev, -+ unsigned int irq, -+ Error **errp) - { - VFIODevice *vdev = &vcdev->vdev; - struct vfio_irq_info *irq_info; - size_t argsz; - int fd; -+ EventNotifier *notifier; -+ IOHandler *fd_read; -+ -+ switch (irq) { -+ case VFIO_CCW_IO_IRQ_INDEX: -+ notifier = &vcdev->io_notifier; -+ fd_read = vfio_ccw_io_notifier_handler; -+ break; -+ default: -+ error_setg(errp, "vfio: Unsupported device irq(%d)", irq); -+ return; -+ } - -- if (vdev->num_irqs < VFIO_CCW_IO_IRQ_INDEX + 1) { -- error_setg(errp, "vfio: unexpected number of io irqs %u", -+ if (vdev->num_irqs < irq + 1) { -+ error_setg(errp, "vfio: unexpected number of irqs %u", - vdev->num_irqs); - return; - } - - argsz = sizeof(*irq_info); - irq_info = g_malloc0(argsz); -- irq_info->index = VFIO_CCW_IO_IRQ_INDEX; -+ irq_info->index = irq; - irq_info->argsz = argsz; - if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, - irq_info) < 0 || irq_info->count < 1) { -@@ -347,37 +361,49 @@ static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp) - goto out_free_info; - } - -- if (event_notifier_init(&vcdev->io_notifier, 0)) { -+ if (event_notifier_init(notifier, 0)) { - error_setg_errno(errp, errno, -- "vfio: Unable to init event notifier for IO"); -+ "vfio: Unable to init event notifier for irq (%d)", -+ irq); - goto out_free_info; - } - -- fd = event_notifier_get_fd(&vcdev->io_notifier); -- qemu_set_fd_handler(fd, vfio_ccw_io_notifier_handler, NULL, vcdev); -+ fd = event_notifier_get_fd(notifier); -+ qemu_set_fd_handler(fd, fd_read, NULL, vcdev); - -- if (vfio_set_irq_signaling(vdev, VFIO_CCW_IO_IRQ_INDEX, 0, -+ if (vfio_set_irq_signaling(vdev, irq, 0, - VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) { - qemu_set_fd_handler(fd, NULL, NULL, vcdev); -- event_notifier_cleanup(&vcdev->io_notifier); -+ event_notifier_cleanup(notifier); - } - - out_free_info: - g_free(irq_info); - } - --static void vfio_ccw_unregister_io_notifier(VFIOCCWDevice *vcdev) -+static void vfio_ccw_unregister_irq_notifier(VFIOCCWDevice *vcdev, -+ unsigned int irq) - { - Error *err = NULL; -+ EventNotifier *notifier; -+ -+ switch (irq) { -+ case VFIO_CCW_IO_IRQ_INDEX: -+ notifier = &vcdev->io_notifier; -+ break; -+ default: -+ error_report("vfio: Unsupported device irq(%d)", irq); -+ return; -+ } - -- if (vfio_set_irq_signaling(&vcdev->vdev, VFIO_CCW_IO_IRQ_INDEX, 0, -+ if (vfio_set_irq_signaling(&vcdev->vdev, irq, 0, - VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) { - error_reportf_err(err, VFIO_MSG_PREFIX, vcdev->vdev.name); - } - -- qemu_set_fd_handler(event_notifier_get_fd(&vcdev->io_notifier), -+ qemu_set_fd_handler(event_notifier_get_fd(notifier), - NULL, NULL, vcdev); -- event_notifier_cleanup(&vcdev->io_notifier); -+ event_notifier_cleanup(notifier); - } - - static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) -@@ -565,7 +591,7 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp) - goto out_region_err; - } - -- vfio_ccw_register_io_notifier(vcdev, &err); -+ vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX, &err); - if (err) { - goto out_notifier_err; - } -@@ -594,7 +620,7 @@ static void vfio_ccw_unrealize(DeviceState *dev, Error **errp) - S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); - VFIOGroup *group = vcdev->vdev.group; - -- vfio_ccw_unregister_io_notifier(vcdev); -+ vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX); - vfio_ccw_put_region(vcdev); - vfio_ccw_put_device(vcdev); - vfio_put_group(group); --- -2.27.0 - diff --git a/SOURCES/kvm-vfio-ccw-Refactor-cleanup-of-regions.patch b/SOURCES/kvm-vfio-ccw-Refactor-cleanup-of-regions.patch deleted file mode 100644 index 1741f4b..0000000 --- a/SOURCES/kvm-vfio-ccw-Refactor-cleanup-of-regions.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 30906c9c78af2710a2b86c096cc7b18bbc4b4e69 Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Tue, 23 Jun 2020 09:25:38 -0400 -Subject: [PATCH 04/12] vfio-ccw: Refactor cleanup of regions - -RH-Author: Cornelia Huck -Message-id: <20200623092543.358315-5-cohuck@redhat.com> -Patchwork-id: 97694 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 4/9] vfio-ccw: Refactor cleanup of regions -Bugzilla: 1660916 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: David Hildenbrand -RH-Acked-by: Thomas Huth - -From: Eric Farman - -While we're at it, add a g_free() for the async_cmd_region that -is the last thing currently created. g_free() knows how to handle -NULL pointers, so this makes it easier to remember what cleanups -need to be performed when new regions are added. - -Signed-off-by: Eric Farman -Reviewed-by: Cornelia Huck -Message-Id: <20200505125757.98209-3-farman@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 2a3b9cbaa7b25a4db4cdcfe1c65279c5464f2923) -Signed-off-by: Cornelia Huck -Signed-off-by: Danilo C. L. de Paula ---- - hw/vfio/ccw.c | 14 +++++++++----- - 1 file changed, 9 insertions(+), 5 deletions(-) - -diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c -index 6bc612b5b7..17eb4c4048 100644 ---- a/hw/vfio/ccw.c -+++ b/hw/vfio/ccw.c -@@ -363,8 +363,7 @@ static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) - vcdev->io_region_size = info->size; - if (sizeof(*vcdev->io_region) != vcdev->io_region_size) { - error_setg(errp, "vfio: Unexpected size of the I/O region"); -- g_free(info); -- return; -+ goto out_err; - } - - vcdev->io_region_offset = info->offset; -@@ -377,15 +376,20 @@ static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) - vcdev->async_cmd_region_size = info->size; - if (sizeof(*vcdev->async_cmd_region) != vcdev->async_cmd_region_size) { - error_setg(errp, "vfio: Unexpected size of the async cmd region"); -- g_free(vcdev->io_region); -- g_free(info); -- return; -+ goto out_err; - } - vcdev->async_cmd_region_offset = info->offset; - vcdev->async_cmd_region = g_malloc0(info->size); - } - - g_free(info); -+ return; -+ -+out_err: -+ g_free(vcdev->async_cmd_region); -+ g_free(vcdev->io_region); -+ g_free(info); -+ return; - } - - static void vfio_ccw_put_region(VFIOCCWDevice *vcdev) --- -2.27.0 - diff --git a/SOURCES/kvm-vfio-ccw-allow-non-prefetch-ORBs.patch b/SOURCES/kvm-vfio-ccw-allow-non-prefetch-ORBs.patch deleted file mode 100644 index da2fc5c..0000000 --- a/SOURCES/kvm-vfio-ccw-allow-non-prefetch-ORBs.patch +++ /dev/null @@ -1,61 +0,0 @@ -From d5f5a307f3396064d29ef0d300c7377756dd165b Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Tue, 23 Jun 2020 09:25:36 -0400 -Subject: [PATCH 02/12] vfio-ccw: allow non-prefetch ORBs - -RH-Author: Cornelia Huck -Message-id: <20200623092543.358315-3-cohuck@redhat.com> -Patchwork-id: 97692 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 2/9] vfio-ccw: allow non-prefetch ORBs -Bugzilla: 1660916 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: David Hildenbrand -RH-Acked-by: Thomas Huth - -From: Jared Rossi - -Remove the explicit prefetch check when using vfio-ccw devices. -This check does not trigger in practice as all Linux channel programs -are intended to use prefetch. - -Newer Linux kernel versions do not require to force the PFCH flag with -vfio-ccw devices anymore. - -Signed-off-by: Jared Rossi -Reviewed-by: Eric Farman -Message-Id: <20200512181535.18630-2-jrossi@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 24e58a7b1d411627e326144030a20dcf0093fed0) -Signed-off-by: Cornelia Huck -Signed-off-by: Danilo C. L. de Paula ---- - hw/vfio/ccw.c | 13 +++---------- - 1 file changed, 3 insertions(+), 10 deletions(-) - -diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c -index 3b5520ae75..6bc612b5b7 100644 ---- a/hw/vfio/ccw.c -+++ b/hw/vfio/ccw.c -@@ -74,16 +74,9 @@ static IOInstEnding vfio_ccw_handle_request(SubchDev *sch) - struct ccw_io_region *region = vcdev->io_region; - int ret; - -- if (!(sch->orb.ctrl0 & ORB_CTRL0_MASK_PFCH)) { -- if (!(vcdev->force_orb_pfch)) { -- warn_once_pfch(vcdev, sch, "requires PFCH flag set"); -- sch_gen_unit_exception(sch); -- css_inject_io_interrupt(sch); -- return IOINST_CC_EXPECTED; -- } else { -- sch->orb.ctrl0 |= ORB_CTRL0_MASK_PFCH; -- warn_once_pfch(vcdev, sch, "PFCH flag forced"); -- } -+ if (!(sch->orb.ctrl0 & ORB_CTRL0_MASK_PFCH) && vcdev->force_orb_pfch) { -+ sch->orb.ctrl0 |= ORB_CTRL0_MASK_PFCH; -+ warn_once_pfch(vcdev, sch, "PFCH flag forced"); - } - - QEMU_BUILD_BUG_ON(sizeof(region->orb_area) != sizeof(ORB)); --- -2.27.0 - diff --git a/SOURCES/kvm-vfio-nvlink-Remove-exec-permission-to-avoid-SELinux-.patch b/SOURCES/kvm-vfio-nvlink-Remove-exec-permission-to-avoid-SELinux-.patch deleted file mode 100644 index 81cf80e..0000000 --- a/SOURCES/kvm-vfio-nvlink-Remove-exec-permission-to-avoid-SELinux-.patch +++ /dev/null @@ -1,75 +0,0 @@ -From f01098bb86c12f485895f38f7a24170ec84b60b6 Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Mon, 8 Jun 2020 16:25:21 -0400 -Subject: [PATCH 42/42] vfio/nvlink: Remove exec permission to avoid SELinux - AVCs -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Greg Kurz -Message-id: <20200608162521.382858-2-gkurz@redhat.com> -Patchwork-id: 97459 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH 1/1] vfio/nvlink: Remove exec permission to avoid SELinux AVCs -Bugzilla: 1823275 -RH-Acked-by: David Gibson -RH-Acked-by: Laurent Vivier -RH-Acked-by: Philippe Mathieu-Daudé - -From: Leonardo Bras - -If SELinux is setup without 'execmem' permission for qemu, all mmap -with (PROT_WRITE | PROT_EXEC) will fail and print a warning in -SELinux log. - -If "nvlink2-mr" memory allocation fails (fist diff), it will cause -guest NUMA nodes to not be correctly configured (V100 memory will -not be visible for guest, nor its NUMA nodes). - -Not having 'execmem' permission is intesting for virtual machines to -avoid buffer-overflow based attacks, and it's adopted in distros -like RHEL. - -So, removing the PROT_EXEC flag seems the right thing to do. - -Browsing some other code that mmaps memory for usage with -memory_region_init_ram_device_ptr, I could notice it's usual to -not have PROT_EXEC (only PROT_READ | PROT_WRITE), so it should be -no problem around this. - -Signed-off-by: Leonardo Bras -Message-Id: <20200501055448.286518-1-leobras.c@gmail.com> -Acked-by: Alex Williamson -Signed-off-by: David Gibson -(cherry picked from commit 9c7c0407028355ca83349b8a60fddfad46f2ebd8) -Signed-off-by: Greg Kurz -Signed-off-by: Danilo C. L. de Paula ---- - hw/vfio/pci-quirks.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c -index 4505ffe48a..1c5fe014cf 100644 ---- a/hw/vfio/pci-quirks.c -+++ b/hw/vfio/pci-quirks.c -@@ -2237,7 +2237,7 @@ int vfio_pci_nvidia_v100_ram_init(VFIOPCIDevice *vdev, Error **errp) - } - cap = (void *) hdr; - -- p = mmap(NULL, nv2reg->size, PROT_READ | PROT_WRITE | PROT_EXEC, -+ p = mmap(NULL, nv2reg->size, PROT_READ | PROT_WRITE, - MAP_SHARED, vdev->vbasedev.fd, nv2reg->offset); - if (p == MAP_FAILED) { - ret = -errno; -@@ -2297,7 +2297,7 @@ int vfio_pci_nvlink2_init(VFIOPCIDevice *vdev, Error **errp) - - /* Some NVLink bridges may not have assigned ATSD */ - if (atsdreg->size) { -- p = mmap(NULL, atsdreg->size, PROT_READ | PROT_WRITE | PROT_EXEC, -+ p = mmap(NULL, atsdreg->size, PROT_READ | PROT_WRITE, - MAP_SHARED, vdev->vbasedev.fd, atsdreg->offset); - if (p == MAP_FAILED) { - ret = -errno; --- -2.27.0 - diff --git a/SOURCES/kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch b/SOURCES/kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch deleted file mode 100644 index d416e0f..0000000 --- a/SOURCES/kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch +++ /dev/null @@ -1,58 +0,0 @@ -From e4631c00d8e9ee3608ef3196cbe8bec4841ee988 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 8 Jan 2020 15:04:57 +0000 -Subject: [PATCH 2/5] vfio/pci: Don't remove irqchip notifier if not registered - -RH-Author: Peter Xu -Message-id: <20200108150457.12324-2-peterx@redhat.com> -Patchwork-id: 93291 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] vfio/pci: Don't remove irqchip notifier if not registered -Bugzilla: 1782678 -RH-Acked-by: Alex Williamson -RH-Acked-by: Cornelia Huck -RH-Acked-by: Auger Eric -RH-Acked-by: Jens Freimann - -The kvm irqchip notifier is only registered if the device supports -INTx, however it's unconditionally removed. If the assigned device -does not support INTx, this will cause QEMU to crash when unplugging -the device from the system. Change it to conditionally remove the -notifier only if the notify hook is setup. - -CC: Eduardo Habkost -CC: David Gibson -CC: Alex Williamson -Cc: qemu-stable@nongnu.org # v4.2 -Reported-by: yanghliu@redhat.com -Debugged-by: Eduardo Habkost -Fixes: c5478fea27ac ("vfio/pci: Respond to KVM irqchip change notifier") -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1782678 -Signed-off-by: Peter Xu -Reviewed-by: David Gibson -Reviewed-by: Greg Kurz -Signed-off-by: Alex Williamson -(cherry picked from commit 0446f8121723b134ca1d1ed0b73e96d4a0a8689d) -Signed-off-by: Peter Xu -Signed-off-by: Danilo C. L. de Paula ---- - hw/vfio/pci.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 309535f..d717520 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3100,7 +3100,9 @@ static void vfio_exitfn(PCIDevice *pdev) - vfio_unregister_req_notifier(vdev); - vfio_unregister_err_notifier(vdev); - pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); -- kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); -+ if (vdev->irqchip_change_notifier.notify) { -+ kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); -+ } - vfio_disable_interrupts(vdev); - if (vdev->intx.mmap_timer) { - timer_free(vdev->intx.mmap_timer); --- -1.8.3.1 - diff --git a/SOURCES/kvm-vhost-Add-names-to-section-rounded-warning.patch b/SOURCES/kvm-vhost-Add-names-to-section-rounded-warning.patch deleted file mode 100644 index c41a14c..0000000 --- a/SOURCES/kvm-vhost-Add-names-to-section-rounded-warning.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 0d545c5850caf76ad3e8dd9bb0fbc9f86b08e220 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 24 Jan 2020 19:46:11 +0100 -Subject: [PATCH 002/116] vhost: Add names to section rounded warning -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200124194613.41119-2-dgilbert@redhat.com> -Patchwork-id: 93450 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 1/3] vhost: Add names to section rounded warning -Bugzilla: 1779041 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Philippe Mathieu-Daudé - -From: "Dr. David Alan Gilbert" - -Add the memory region names to section rounding/alignment -warnings. - -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20200116202414.157959-2-dgilbert@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit ff4776147e960b128ee68f94c728659f662f4378) -Signed-off-by: Miroslav Rezanina ---- - hw/virtio/vhost.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index 4da0d5a..774d87d 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -590,9 +590,10 @@ static void vhost_region_add_section(struct vhost_dev *dev, - * match up in the same RAMBlock if they do. - */ - if (mrs_gpa < prev_gpa_start) { -- error_report("%s:Section rounded to %"PRIx64 -- " prior to previous %"PRIx64, -- __func__, mrs_gpa, prev_gpa_start); -+ error_report("%s:Section '%s' rounded to %"PRIx64 -+ " prior to previous '%s' %"PRIx64, -+ __func__, section->mr->name, mrs_gpa, -+ prev_sec->mr->name, prev_gpa_start); - /* A way to cleanly fail here would be better */ - return; - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-vhost-Only-align-sections-for-vhost-user.patch b/SOURCES/kvm-vhost-Only-align-sections-for-vhost-user.patch deleted file mode 100644 index e082ce8..0000000 --- a/SOURCES/kvm-vhost-Only-align-sections-for-vhost-user.patch +++ /dev/null @@ -1,97 +0,0 @@ -From c35466c168e5219bf585aa65ac31fc9bdc7cbf36 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 24 Jan 2020 19:46:12 +0100 -Subject: [PATCH 003/116] vhost: Only align sections for vhost-user -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200124194613.41119-3-dgilbert@redhat.com> -Patchwork-id: 93452 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 2/3] vhost: Only align sections for vhost-user -Bugzilla: 1779041 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Philippe Mathieu-Daudé - -From: "Dr. David Alan Gilbert" - -I added hugepage alignment code in c1ece84e7c9 to deal with -vhost-user + postcopy which needs aligned pages when using userfault. -However, on x86 the lower 2MB of address space tends to be shotgun'd -with small fragments around the 512-640k range - e.g. video RAM, and -with HyperV synic pages tend to sit around there - again splitting -it up. The alignment code complains with a 'Section rounded to ...' -error and gives up. - -Since vhost-user already filters out devices without an fd -(see vhost-user.c vhost_user_mem_section_filter) it shouldn't be -affected by those overlaps. - -Turn the alignment off on vhost-kernel so that it doesn't try -and align, and thus won't hit the rounding issues. - -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20200116202414.157959-3-dgilbert@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Paolo Bonzini -(cherry picked from commit 76525114736e8f669766e69b715fa59ce8648aae) -Signed-off-by: Miroslav Rezanina ---- - hw/virtio/vhost.c | 34 ++++++++++++++++++---------------- - 1 file changed, 18 insertions(+), 16 deletions(-) - -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index 774d87d..25fd469 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -547,26 +547,28 @@ static void vhost_region_add_section(struct vhost_dev *dev, - uintptr_t mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) + - section->offset_within_region; - RAMBlock *mrs_rb = section->mr->ram_block; -- size_t mrs_page = qemu_ram_pagesize(mrs_rb); - - trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size, - mrs_host); - -- /* Round the section to it's page size */ -- /* First align the start down to a page boundary */ -- uint64_t alignage = mrs_host & (mrs_page - 1); -- if (alignage) { -- mrs_host -= alignage; -- mrs_size += alignage; -- mrs_gpa -= alignage; -- } -- /* Now align the size up to a page boundary */ -- alignage = mrs_size & (mrs_page - 1); -- if (alignage) { -- mrs_size += mrs_page - alignage; -- } -- trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size, -- mrs_host); -+ if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { -+ /* Round the section to it's page size */ -+ /* First align the start down to a page boundary */ -+ size_t mrs_page = qemu_ram_pagesize(mrs_rb); -+ uint64_t alignage = mrs_host & (mrs_page - 1); -+ if (alignage) { -+ mrs_host -= alignage; -+ mrs_size += alignage; -+ mrs_gpa -= alignage; -+ } -+ /* Now align the size up to a page boundary */ -+ alignage = mrs_size & (mrs_page - 1); -+ if (alignage) { -+ mrs_size += mrs_page - alignage; -+ } -+ trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size, -+ mrs_host); -+ } - - if (dev->n_tmp_sections) { - /* Since we already have at least one section, lets see if --- -1.8.3.1 - diff --git a/SOURCES/kvm-vhost-coding-style-fix.patch b/SOURCES/kvm-vhost-coding-style-fix.patch deleted file mode 100644 index 4546130..0000000 --- a/SOURCES/kvm-vhost-coding-style-fix.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 624d96c456536e1471968a59fbeea206309cc33b Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 24 Jan 2020 19:46:13 +0100 -Subject: [PATCH 004/116] vhost: coding style fix -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200124194613.41119-4-dgilbert@redhat.com> -Patchwork-id: 93453 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 3/3] vhost: coding style fix -Bugzilla: 1779041 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Philippe Mathieu-Daudé - -From: "Michael S. Tsirkin" - -Drop a trailing whitespace. Make line shorter. - -Fixes: 76525114736e8 ("vhost: Only align sections for vhost-user") -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 8347505640238d3b80f9bb7510fdc1bb574bad19) -Signed-off-by: Miroslav Rezanina ---- - hw/virtio/vhost.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index 25fd469..9edfadc 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -551,7 +551,7 @@ static void vhost_region_add_section(struct vhost_dev *dev, - trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size, - mrs_host); - -- if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { -+ if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { - /* Round the section to it's page size */ - /* First align the start down to a page boundary */ - size_t mrs_page = qemu_ram_pagesize(mrs_rb); -@@ -566,8 +566,8 @@ static void vhost_region_add_section(struct vhost_dev *dev, - if (alignage) { - mrs_size += mrs_page - alignage; - } -- trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size, -- mrs_host); -+ trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, -+ mrs_size, mrs_host); - } - - if (dev->n_tmp_sections) { --- -1.8.3.1 - diff --git a/SOURCES/kvm-vhost-correctly-turn-on-VIRTIO_F_IOMMU_PLATFORM.patch b/SOURCES/kvm-vhost-correctly-turn-on-VIRTIO_F_IOMMU_PLATFORM.patch deleted file mode 100644 index 7e1353c..0000000 --- a/SOURCES/kvm-vhost-correctly-turn-on-VIRTIO_F_IOMMU_PLATFORM.patch +++ /dev/null @@ -1,69 +0,0 @@ -From e06655cfe0fa9473b1e8b311571f36d787472834 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 May 2020 05:54:02 -0400 -Subject: [PATCH 20/42] vhost: correctly turn on VIRTIO_F_IOMMU_PLATFORM - -RH-Author: Thomas Huth -Message-id: <20200529055420.16855-21-thuth@redhat.com> -Patchwork-id: 97041 -O-Subject: [RHEL-8.3.0 qemu-kvm PATCH v2 20/38] vhost: correctly turn on VIRTIO_F_IOMMU_PLATFORM -Bugzilla: 1828317 -RH-Acked-by: Claudio Imbrenda -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -From: Jason Wang - -We turn on device IOTLB via VIRTIO_F_IOMMU_PLATFORM unconditionally on -platform without IOMMU support. This can lead unnecessary IOTLB -transactions which will damage the performance. - -Fixing this by check whether the device is backed by IOMMU and disable -device IOTLB. - -Reported-by: Halil Pasic -Tested-by: Halil Pasic -Reviewed-by: Halil Pasic -Signed-off-by: Jason Wang -Message-Id: <20200302042454.24814-1-jasowang@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit f7ef7e6e3ba6e994e070cc609eb154339d1c4a11) -Signed-off-by: Danilo C. L. de Paula ---- - hw/virtio/vhost.c | 12 +++++++++++- - 1 file changed, 11 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index 9edfadc81d..9182a00495 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -290,7 +290,14 @@ static int vhost_dev_has_iommu(struct vhost_dev *dev) - { - VirtIODevice *vdev = dev->vdev; - -- return virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); -+ /* -+ * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support -+ * incremental memory mapping API via IOTLB API. For platform that -+ * does not have IOMMU, there's no need to enable this feature -+ * which may cause unnecessary IOTLB miss/update trnasactions. -+ */ -+ return vdev->dma_as != &address_space_memory && -+ virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); - } - - static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr, -@@ -765,6 +772,9 @@ static int vhost_dev_set_features(struct vhost_dev *dev, - if (enable_log) { - features |= 0x1ULL << VHOST_F_LOG_ALL; - } -+ if (!vhost_dev_has_iommu(dev)) { -+ features &= ~(0x1ULL << VIRTIO_F_IOMMU_PLATFORM); -+ } - r = dev->vhost_ops->vhost_set_features(dev, features); - if (r < 0) { - VHOST_OPS_DEBUG("vhost_set_features failed"); --- -2.27.0 - diff --git a/SOURCES/kvm-vhost-user-Print-unexpected-slave-message-types.patch b/SOURCES/kvm-vhost-user-Print-unexpected-slave-message-types.patch deleted file mode 100644 index e5776e7..0000000 --- a/SOURCES/kvm-vhost-user-Print-unexpected-slave-message-types.patch +++ /dev/null @@ -1,48 +0,0 @@ -From d6abbdaeb2c35efe6793a599c98116e250b1f179 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:43 +0100 -Subject: [PATCH 072/116] vhost-user: Print unexpected slave message types -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-69-dgilbert@redhat.com> -Patchwork-id: 93519 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 068/112] vhost-user: Print unexpected slave message types -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -When we receive an unexpected message type on the slave fd, print -the type. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 0fdc465d7d5aafeae127eba488f247ac6f58df4c) -Signed-off-by: Miroslav Rezanina ---- - hw/virtio/vhost-user.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c -index 02a9b25..e4f46ec 100644 ---- a/hw/virtio/vhost-user.c -+++ b/hw/virtio/vhost-user.c -@@ -1055,7 +1055,7 @@ static void slave_read(void *opaque) - fd[0]); - break; - default: -- error_report("Received unexpected msg type."); -+ error_report("Received unexpected msg type: %d.", hdr.request); - ret = -EINVAL; - } - --- -1.8.3.1 - diff --git a/SOURCES/kvm-vhost-user-fs-remove-vhostfd-property.patch b/SOURCES/kvm-vhost-user-fs-remove-vhostfd-property.patch deleted file mode 100644 index 5904e82..0000000 --- a/SOURCES/kvm-vhost-user-fs-remove-vhostfd-property.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 912af6f7c270e2939a91c9b3f62b6ba1202edc43 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:37 +0100 -Subject: [PATCH 006/116] vhost-user-fs: remove "vhostfd" property -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-3-dgilbert@redhat.com> -Patchwork-id: 93458 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 002/112] vhost-user-fs: remove "vhostfd" property -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Marc-André Lureau - -The property doesn't make much sense for a vhost-user device. - -Signed-off-by: Marc-André Lureau -Message-Id: <20191116112016.14872-1-marcandre.lureau@redhat.com> -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 703857348724319735d9be7b5b996e6445c6e6b9) -Signed-off-by: Miroslav Rezanina ---- - hw/virtio/vhost-user-fs.c | 1 - - include/hw/virtio/vhost-user-fs.h | 1 - - 2 files changed, 2 deletions(-) - -diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c -index f0df7f4..ca0b7fc 100644 ---- a/hw/virtio/vhost-user-fs.c -+++ b/hw/virtio/vhost-user-fs.c -@@ -263,7 +263,6 @@ static Property vuf_properties[] = { - DEFINE_PROP_UINT16("num-request-queues", VHostUserFS, - conf.num_request_queues, 1), - DEFINE_PROP_UINT16("queue-size", VHostUserFS, conf.queue_size, 128), -- DEFINE_PROP_STRING("vhostfd", VHostUserFS, conf.vhostfd), - DEFINE_PROP_END_OF_LIST(), - }; - -diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h -index 539885b..9ff1bdb 100644 ---- a/include/hw/virtio/vhost-user-fs.h -+++ b/include/hw/virtio/vhost-user-fs.h -@@ -28,7 +28,6 @@ typedef struct { - char *tag; - uint16_t num_request_queues; - uint16_t queue_size; -- char *vhostfd; - } VHostUserFSConf; - - typedef struct { --- -1.8.3.1 - diff --git a/SOURCES/kvm-vhost-user-gpu-Drop-trailing-json-comma.patch b/SOURCES/kvm-vhost-user-gpu-Drop-trailing-json-comma.patch deleted file mode 100644 index 3a50632..0000000 --- a/SOURCES/kvm-vhost-user-gpu-Drop-trailing-json-comma.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 044feb40e3041759ee77d08136f334cf3ad67c1e Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?J=C3=A1n=20Tomko?= -Date: Fri, 21 Feb 2020 09:49:23 +0000 -Subject: [PATCH] vhost-user-gpu: Drop trailing json comma -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Ján Tomko -Message-id: <07fed9a38495938a7180819e27f590d80cd6668d.1582278173.git.jtomko@redhat.com> -Patchwork-id: 94019 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] vhost-user-gpu: Drop trailing json comma -Bugzilla: 1805334 -RH-Acked-by: Marc-André Lureau -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Stefan Hajnoczi - -From: Cole Robinson - -Trailing comma is not valid json: - -$ cat contrib/vhost-user-gpu/50-qemu-gpu.json.in | jq -parse error: Expected another key-value pair at line 5, column 1 - -Signed-off-by: Cole Robinson -Reviewed-by: Marc-André Lureau -Reviewed-by: Li Qiang -Reviewed-by: Philippe Mathieu-Daudé -Message-id: 7f5dd2ac9f3504e2699f23e69bc3d8051b729832.1568925097.git.crobinso@redhat.com -Signed-off-by: Gerd Hoffmann -(cherry picked from commit ca26b032e5a0e8a190c763ce828a8740d24b9b65) -Signed-off-by: Ján Tomko -Signed-off-by: Danilo C. L. de Paula ---- - contrib/vhost-user-gpu/50-qemu-gpu.json.in | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/contrib/vhost-user-gpu/50-qemu-gpu.json.in b/contrib/vhost-user-gpu/50-qemu-gpu.json.in -index 658b545..f5edd09 100644 ---- a/contrib/vhost-user-gpu/50-qemu-gpu.json.in -+++ b/contrib/vhost-user-gpu/50-qemu-gpu.json.in -@@ -1,5 +1,5 @@ - { - "description": "QEMU vhost-user-gpu", - "type": "gpu", -- "binary": "@libexecdir@/vhost-user-gpu", -+ "binary": "@libexecdir@/vhost-user-gpu" - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch b/SOURCES/kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch deleted file mode 100644 index ed10701..0000000 --- a/SOURCES/kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch +++ /dev/null @@ -1,80 +0,0 @@ -From b395ad369278d0923a590975fabbb99ec7716c6b Mon Sep 17 00:00:00 2001 -From: Julia Suvorova -Date: Wed, 19 Feb 2020 21:34:28 +0000 -Subject: [PATCH 4/7] virtio: add ability to delete vq through a pointer -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Julia Suvorova -Message-id: <20200219213431.11913-2-jusual@redhat.com> -Patchwork-id: 93980 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/4] virtio: add ability to delete vq through a pointer -Bugzilla: 1791590 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Michael S. Tsirkin - -From: "Michael S. Tsirkin" - -Devices tend to maintain vq pointers, allow deleting them trough a vq pointer. - -Signed-off-by: Michael S. Tsirkin -Reviewed-by: David Hildenbrand -Reviewed-by: David Hildenbrand -(cherry picked from commit 722f8c51d8af223751dfb1d02de40043e8ba067e) -Signed-off-by: Danilo C. L. de Paula ---- - hw/virtio/virtio.c | 15 ++++++++++----- - include/hw/virtio/virtio.h | 2 ++ - 2 files changed, 12 insertions(+), 5 deletions(-) - -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index 3211135..d63a369 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -2335,17 +2335,22 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, - return &vdev->vq[i]; - } - -+void virtio_delete_queue(VirtQueue *vq) -+{ -+ vq->vring.num = 0; -+ vq->vring.num_default = 0; -+ vq->handle_output = NULL; -+ vq->handle_aio_output = NULL; -+ g_free(vq->used_elems); -+} -+ - void virtio_del_queue(VirtIODevice *vdev, int n) - { - if (n < 0 || n >= VIRTIO_QUEUE_MAX) { - abort(); - } - -- vdev->vq[n].vring.num = 0; -- vdev->vq[n].vring.num_default = 0; -- vdev->vq[n].handle_output = NULL; -- vdev->vq[n].handle_aio_output = NULL; -- g_free(vdev->vq[n].used_elems); -+ virtio_delete_queue(&vdev->vq[n]); - } - - static void virtio_set_isr(VirtIODevice *vdev, int value) -diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h -index 6a20442..91167f6 100644 ---- a/include/hw/virtio/virtio.h -+++ b/include/hw/virtio/virtio.h -@@ -183,6 +183,8 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, - - void virtio_del_queue(VirtIODevice *vdev, int n); - -+void virtio_delete_queue(VirtQueue *vq); -+ - void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, - unsigned int len); - void virtqueue_flush(VirtQueue *vq, unsigned int count); --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtio-add-vhost-user-fs-ccw-device.patch b/SOURCES/kvm-virtio-add-vhost-user-fs-ccw-device.patch deleted file mode 100644 index d7d41af..0000000 --- a/SOURCES/kvm-virtio-add-vhost-user-fs-ccw-device.patch +++ /dev/null @@ -1,136 +0,0 @@ -From fc5d5887462da813d91a3a0649214313d580d7af Mon Sep 17 00:00:00 2001 -From: Claudio Imbrenda -Date: Tue, 27 Oct 2020 12:02:16 -0400 -Subject: [PATCH 03/18] virtio: add vhost-user-fs-ccw device - -RH-Author: Claudio Imbrenda -Message-id: <20201027120217.2997314-3-cimbrend@redhat.com> -Patchwork-id: 98720 -O-Subject: [RHEL8.4 qemu-kvm PATCH 2/3] virtio: add vhost-user-fs-ccw device -Bugzilla: 1857733 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Cornelia Huck - -From: Halil Pasic - -upstream bd0bbb9aba2afbc2ea24b0475be04f795468b381 - -fixed for the backport: -* makefile logic instead of meson -* old style qdev initialization -* old style device class properties - --- - -Wire up the CCW device for vhost-user-fs. - -Reviewed-by: Cornelia Huck -Signed-off-by: Halil Pasic -Message-id: 20200901150019.29229-2-mhartmay@linux.ibm.com -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/Makefile.objs | 1 + - hw/s390x/vhost-user-fs-ccw.c | 76 ++++++++++++++++++++++++++++++++++++ - 2 files changed, 77 insertions(+) - create mode 100644 hw/s390x/vhost-user-fs-ccw.c - -diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs -index a46a1c7894e..c4086ec3171 100644 ---- a/hw/s390x/Makefile.objs -+++ b/hw/s390x/Makefile.objs -@@ -20,6 +20,7 @@ obj-$(CONFIG_VIRTIO_NET) += virtio-ccw-net.o - obj-$(CONFIG_VIRTIO_BLK) += virtio-ccw-blk.o - obj-$(call land,$(CONFIG_VIRTIO_9P),$(CONFIG_VIRTFS)) += virtio-ccw-9p.o - obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock-ccw.o -+obj-$(CONFIG_VHOST_USER_FS) += vhost-user-fs-ccw.o - endif - obj-y += css-bridge.o - obj-y += ccw-device.o -diff --git a/hw/s390x/vhost-user-fs-ccw.c b/hw/s390x/vhost-user-fs-ccw.c -new file mode 100644 -index 00000000000..e7b165d5f61 ---- /dev/null -+++ b/hw/s390x/vhost-user-fs-ccw.c -@@ -0,0 +1,76 @@ -+/* -+ * virtio ccw vhost-user-fs implementation -+ * -+ * Copyright 2020 IBM Corp. -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or (at -+ * your option) any later version. See the COPYING file in the top-level -+ * directory. -+ */ -+#include "qemu/osdep.h" -+#include "hw/qdev-properties.h" -+#include "qapi/error.h" -+#include "hw/virtio/vhost-user-fs.h" -+#include "virtio-ccw.h" -+ -+typedef struct VHostUserFSCcw { -+ VirtioCcwDevice parent_obj; -+ VHostUserFS vdev; -+} VHostUserFSCcw; -+ -+#define TYPE_VHOST_USER_FS_CCW "vhost-user-fs-ccw" -+#define VHOST_USER_FS_CCW(obj) \ -+ OBJECT_CHECK(VHostUserFSCcw, (obj), TYPE_VHOST_USER_FS_CCW) -+ -+ -+static Property vhost_user_fs_ccw_properties[] = { -+ DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, -+ VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true), -+ DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, -+ VIRTIO_CCW_MAX_REV), -+ DEFINE_PROP_END_OF_LIST(), -+}; -+ -+static void vhost_user_fs_ccw_realize(VirtioCcwDevice *ccw_dev, Error **errp) -+{ -+ VHostUserFSCcw *dev = VHOST_USER_FS_CCW(ccw_dev); -+ DeviceState *vdev = DEVICE(&dev->vdev); -+ -+ qdev_set_parent_bus(vdev, BUS(&ccw_dev->bus)); -+ object_property_set_bool(OBJECT(vdev), true, "realized", errp); -+} -+ -+static void vhost_user_fs_ccw_instance_init(Object *obj) -+{ -+ VHostUserFSCcw *dev = VHOST_USER_FS_CCW(obj); -+ VirtioCcwDevice *ccw_dev = VIRTIO_CCW_DEVICE(obj); -+ -+ ccw_dev->force_revision_1 = true; -+ virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), -+ TYPE_VHOST_USER_FS); -+} -+ -+static void vhost_user_fs_ccw_class_init(ObjectClass *klass, void *data) -+{ -+ DeviceClass *dc = DEVICE_CLASS(klass); -+ VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass); -+ -+ k->realize = vhost_user_fs_ccw_realize; -+ dc->props = vhost_user_fs_ccw_properties; -+ set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); -+} -+ -+static const TypeInfo vhost_user_fs_ccw = { -+ .name = TYPE_VHOST_USER_FS_CCW, -+ .parent = TYPE_VIRTIO_CCW_DEVICE, -+ .instance_size = sizeof(VHostUserFSCcw), -+ .instance_init = vhost_user_fs_ccw_instance_init, -+ .class_init = vhost_user_fs_ccw_class_init, -+}; -+ -+static void vhost_user_fs_ccw_register(void) -+{ -+ type_register_static(&vhost_user_fs_ccw); -+} -+ -+type_init(vhost_user_fs_ccw_register) --- -2.27.0 - diff --git a/SOURCES/kvm-virtio-blk-On-restart-process-queued-requests-in-the.patch b/SOURCES/kvm-virtio-blk-On-restart-process-queued-requests-in-the.patch deleted file mode 100644 index 9e46be1..0000000 --- a/SOURCES/kvm-virtio-blk-On-restart-process-queued-requests-in-the.patch +++ /dev/null @@ -1,203 +0,0 @@ -From fdd1f3bf672ad8bb0a6db896ec8cbc797c31da1f Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Wed, 24 Jun 2020 13:24:53 -0400 -Subject: [PATCH 11/12] virtio-blk: On restart, process queued requests in the - proper context - -RH-Author: Sergio Lopez Pascual -Message-id: <20200624132453.111276-3-slp@redhat.com> -Patchwork-id: 97798 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 2/2] virtio-blk: On restart, process queued requests in the proper context -Bugzilla: -RH-Acked-by: John Snow -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Kevin Wolf - -On restart, we were scheduling a BH to process queued requests, which -would run before starting up the data plane, leading to those requests -being assigned and started on coroutines on the main context. - -This could cause requests to be wrongly processed in parallel from -different threads (the main thread and the iothread managing the data -plane), potentially leading to multiple issues. - -For example, stopping and resuming a VM multiple times while the guest -is generating I/O on a virtio_blk device can trigger a crash with a -stack tracing looking like this one: - -<------> - Thread 2 (Thread 0x7ff736765700 (LWP 1062503)): - #0 0x00005567a13b99d6 in iov_memset - (iov=0x6563617073206f4e, iov_cnt=1717922848, offset=516096, fillc=0, bytes=7018105756081554803) - at util/iov.c:69 - #1 0x00005567a13bab73 in qemu_iovec_memset - (qiov=0x7ff73ec99748, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:530 - #2 0x00005567a12f411c in qemu_laio_process_completion (laiocb=0x7ff6512ee6c0) at block/linux-aio.c:86 - #3 0x00005567a12f42ff in qemu_laio_process_completions (s=0x7ff7182e8420) at block/linux-aio.c:217 - #4 0x00005567a12f480d in ioq_submit (s=0x7ff7182e8420) at block/linux-aio.c:323 - #5 0x00005567a12f43d9 in qemu_laio_process_completions_and_submit (s=0x7ff7182e8420) - at block/linux-aio.c:236 - #6 0x00005567a12f44c2 in qemu_laio_poll_cb (opaque=0x7ff7182e8430) at block/linux-aio.c:267 - #7 0x00005567a13aed83 in run_poll_handlers_once (ctx=0x5567a2b58c70, timeout=0x7ff7367645f8) - at util/aio-posix.c:520 - #8 0x00005567a13aee9f in run_poll_handlers (ctx=0x5567a2b58c70, max_ns=16000, timeout=0x7ff7367645f8) - at util/aio-posix.c:562 - #9 0x00005567a13aefde in try_poll_mode (ctx=0x5567a2b58c70, timeout=0x7ff7367645f8) - at util/aio-posix.c:597 - #10 0x00005567a13af115 in aio_poll (ctx=0x5567a2b58c70, blocking=true) at util/aio-posix.c:639 - #11 0x00005567a109acca in iothread_run (opaque=0x5567a2b29760) at iothread.c:75 - #12 0x00005567a13b2790 in qemu_thread_start (args=0x5567a2b694c0) at util/qemu-thread-posix.c:519 - #13 0x00007ff73eedf2de in start_thread () at /lib64/libpthread.so.0 - #14 0x00007ff73ec10e83 in clone () at /lib64/libc.so.6 - - Thread 1 (Thread 0x7ff743986f00 (LWP 1062500)): - #0 0x00005567a13b99d6 in iov_memset - (iov=0x6563617073206f4e, iov_cnt=1717922848, offset=516096, fillc=0, bytes=7018105756081554803) - at util/iov.c:69 - #1 0x00005567a13bab73 in qemu_iovec_memset - (qiov=0x7ff73ec99748, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:530 - #2 0x00005567a12f411c in qemu_laio_process_completion (laiocb=0x7ff6512ee6c0) at block/linux-aio.c:86 - #3 0x00005567a12f42ff in qemu_laio_process_completions (s=0x7ff7182e8420) at block/linux-aio.c:217 - #4 0x00005567a12f480d in ioq_submit (s=0x7ff7182e8420) at block/linux-aio.c:323 - #5 0x00005567a12f4a2f in laio_do_submit (fd=19, laiocb=0x7ff5f4ff9ae0, offset=472363008, type=2) - at block/linux-aio.c:375 - #6 0x00005567a12f4af2 in laio_co_submit - (bs=0x5567a2b8c460, s=0x7ff7182e8420, fd=19, offset=472363008, qiov=0x7ff5f4ff9ca0, type=2) - at block/linux-aio.c:394 - #7 0x00005567a12f1803 in raw_co_prw - (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, type=2) - at block/file-posix.c:1892 - #8 0x00005567a12f1941 in raw_co_pwritev - (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, flags=0) - at block/file-posix.c:1925 - #9 0x00005567a12fe3e1 in bdrv_driver_pwritev - (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, qiov_offset=0, flags=0) - at block/io.c:1183 - #10 0x00005567a1300340 in bdrv_aligned_pwritev - (child=0x5567a2b5b070, req=0x7ff5f4ff9db0, offset=472363008, bytes=20480, align=512, qiov=0x7ff72c0425b8, qiov_offset=0, flags=0) at block/io.c:1980 - #11 0x00005567a1300b29 in bdrv_co_pwritev_part - (child=0x5567a2b5b070, offset=472363008, bytes=20480, qiov=0x7ff72c0425b8, qiov_offset=0, flags=0) - at block/io.c:2137 - #12 0x00005567a12baba1 in qcow2_co_pwritev_task - (bs=0x5567a2b92740, file_cluster_offset=472317952, offset=487305216, bytes=20480, qiov=0x7ff72c0425b8, qiov_offset=0, l2meta=0x0) at block/qcow2.c:2444 - #13 0x00005567a12bacdb in qcow2_co_pwritev_task_entry (task=0x5567a2b48540) at block/qcow2.c:2475 - #14 0x00005567a13167d8 in aio_task_co (opaque=0x5567a2b48540) at block/aio_task.c:45 - #15 0x00005567a13cf00c in coroutine_trampoline (i0=738245600, i1=32759) at util/coroutine-ucontext.c:115 - #16 0x00007ff73eb622e0 in __start_context () at /lib64/libc.so.6 - #17 0x00007ff6626f1350 in () - #18 0x0000000000000000 in () -<------> - -This is also known to cause crashes with this message (assertion -failed): - - aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule' - -RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1812765 -Signed-off-by: Sergio Lopez -Message-Id: <20200603093240.40489-3-slp@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 49b44549ace7890fffdf027fd3695218ee7f1121) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - hw/block/dataplane/virtio-blk.c | 8 ++++++++ - hw/block/virtio-blk.c | 18 ++++++++++++------ - include/hw/virtio/virtio-blk.h | 2 +- - 3 files changed, 21 insertions(+), 7 deletions(-) - -diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c -index 119906a5fe..ac495fd72a 100644 ---- a/hw/block/dataplane/virtio-blk.c -+++ b/hw/block/dataplane/virtio-blk.c -@@ -220,6 +220,9 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - goto fail_guest_notifiers; - } - -+ /* Process queued requests before the ones in vring */ -+ virtio_blk_process_queued_requests(vblk, false); -+ - /* Kick right away to begin processing requests already in vring */ - for (i = 0; i < nvqs; i++) { - VirtQueue *vq = virtio_get_queue(s->vdev, i); -@@ -239,6 +242,11 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - return 0; - - fail_guest_notifiers: -+ /* -+ * If we failed to set up the guest notifiers queued requests will be -+ * processed on the main context. -+ */ -+ virtio_blk_process_queued_requests(vblk, false); - vblk->dataplane_disabled = true; - s->starting = false; - vblk->dataplane_started = true; -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index 6ff29a05d6..493a263fa6 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -819,7 +819,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) - virtio_blk_handle_output_do(s, vq); - } - --void virtio_blk_process_queued_requests(VirtIOBlock *s) -+void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh) - { - VirtIOBlockReq *req = s->rq; - MultiReqBuffer mrb = {}; -@@ -847,7 +847,9 @@ void virtio_blk_process_queued_requests(VirtIOBlock *s) - if (mrb.num_reqs) { - virtio_blk_submit_multireq(s->blk, &mrb); - } -- blk_dec_in_flight(s->conf.conf.blk); -+ if (is_bh) { -+ blk_dec_in_flight(s->conf.conf.blk); -+ } - aio_context_release(blk_get_aio_context(s->conf.conf.blk)); - } - -@@ -858,21 +860,25 @@ static void virtio_blk_dma_restart_bh(void *opaque) - qemu_bh_delete(s->bh); - s->bh = NULL; - -- virtio_blk_process_queued_requests(s); -+ virtio_blk_process_queued_requests(s, true); - } - - static void virtio_blk_dma_restart_cb(void *opaque, int running, - RunState state) - { - VirtIOBlock *s = opaque; -+ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); -+ VirtioBusState *bus = VIRTIO_BUS(qbus); - - if (!running) { - return; - } - -- if (!s->bh) { -- /* FIXME The data plane is not started yet, so these requests are -- * processed in the main thread. */ -+ /* -+ * If ioeventfd is enabled, don't schedule the BH here as queued -+ * requests will be processed while starting the data plane. -+ */ -+ if (!s->bh && !virtio_bus_ioeventfd_enabled(bus)) { - s->bh = aio_bh_new(blk_get_aio_context(s->conf.conf.blk), - virtio_blk_dma_restart_bh, s); - blk_inc_in_flight(s->conf.conf.blk); -diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h -index cf8eea2f58..e77f0db3b0 100644 ---- a/include/hw/virtio/virtio-blk.h -+++ b/include/hw/virtio/virtio-blk.h -@@ -84,6 +84,6 @@ typedef struct MultiReqBuffer { - } MultiReqBuffer; - - bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq); --void virtio_blk_process_queued_requests(VirtIOBlock *s); -+void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh); - - #endif --- -2.27.0 - diff --git a/SOURCES/kvm-virtio-blk-Refactor-the-code-that-processes-queued-r.patch b/SOURCES/kvm-virtio-blk-Refactor-the-code-that-processes-queued-r.patch deleted file mode 100644 index 148045d..0000000 --- a/SOURCES/kvm-virtio-blk-Refactor-the-code-that-processes-queued-r.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 73d83d8880e85eedc22c9651b67d1eacd5de5ff4 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Wed, 24 Jun 2020 13:24:52 -0400 -Subject: [PATCH 10/12] virtio-blk: Refactor the code that processes queued - requests - -RH-Author: Sergio Lopez Pascual -Message-id: <20200624132453.111276-2-slp@redhat.com> -Patchwork-id: 97797 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/2] virtio-blk: Refactor the code that processes queued requests -Bugzilla: -RH-Acked-by: John Snow -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Kevin Wolf - -Move the code that processes queued requests from -virtio_blk_dma_restart_bh() to its own, non-static, function. This -will allow us to call it from the virtio_blk_data_plane_start() in a -future patch. - -Signed-off-by: Sergio Lopez -Message-Id: <20200603093240.40489-2-slp@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 7aa1c247b466870b0704d3ccdc3755e5e7394dca) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - hw/block/virtio-blk.c | 16 +++++++++++----- - include/hw/virtio/virtio-blk.h | 1 + - 2 files changed, 12 insertions(+), 5 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index c4e55fb3de..6ff29a05d6 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -819,15 +819,11 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) - virtio_blk_handle_output_do(s, vq); - } - --static void virtio_blk_dma_restart_bh(void *opaque) -+void virtio_blk_process_queued_requests(VirtIOBlock *s) - { -- VirtIOBlock *s = opaque; - VirtIOBlockReq *req = s->rq; - MultiReqBuffer mrb = {}; - -- qemu_bh_delete(s->bh); -- s->bh = NULL; -- - s->rq = NULL; - - aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); -@@ -855,6 +851,16 @@ static void virtio_blk_dma_restart_bh(void *opaque) - aio_context_release(blk_get_aio_context(s->conf.conf.blk)); - } - -+static void virtio_blk_dma_restart_bh(void *opaque) -+{ -+ VirtIOBlock *s = opaque; -+ -+ qemu_bh_delete(s->bh); -+ s->bh = NULL; -+ -+ virtio_blk_process_queued_requests(s); -+} -+ - static void virtio_blk_dma_restart_cb(void *opaque, int running, - RunState state) - { -diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h -index cddcfbebe9..cf8eea2f58 100644 ---- a/include/hw/virtio/virtio-blk.h -+++ b/include/hw/virtio/virtio-blk.h -@@ -84,5 +84,6 @@ typedef struct MultiReqBuffer { - } MultiReqBuffer; - - bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq); -+void virtio_blk_process_queued_requests(VirtIOBlock *s); - - #endif --- -2.27.0 - diff --git a/SOURCES/kvm-virtio-don-t-enable-notifications-during-polling.patch b/SOURCES/kvm-virtio-don-t-enable-notifications-during-polling.patch deleted file mode 100644 index 2dffc01..0000000 --- a/SOURCES/kvm-virtio-don-t-enable-notifications-during-polling.patch +++ /dev/null @@ -1,158 +0,0 @@ -From 351dd07d7b5e69cdf47260c9ea848c0c93cd2c8a Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 9 Jan 2020 11:13:25 +0000 -Subject: [PATCH 3/5] virtio: don't enable notifications during polling - -RH-Author: Stefan Hajnoczi -Message-id: <20200109111325.559557-2-stefanha@redhat.com> -Patchwork-id: 93298 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] virtio: don't enable notifications during polling -Bugzilla: 1789301 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Michael S. Tsirkin - -Virtqueue notifications are not necessary during polling, so we disable -them. This allows the guest driver to avoid MMIO vmexits. -Unfortunately the virtio-blk and virtio-scsi handler functions re-enable -notifications, defeating this optimization. - -Fix virtio-blk and virtio-scsi emulation so they leave notifications -disabled. The key thing to remember for correctness is that polling -always checks one last time after ending its loop, therefore it's safe -to lose the race when re-enabling notifications at the end of polling. - -There is a measurable performance improvement of 5-10% with the null-co -block driver. Real-life storage configurations will see a smaller -improvement because the MMIO vmexit overhead contributes less to -latency. - -Signed-off-by: Stefan Hajnoczi -Message-Id: <20191209210957.65087-1-stefanha@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit d0435bc513e23a4961b6af20164d1c6c219eb4ea) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Danilo C. L. de Paula ---- - hw/block/virtio-blk.c | 9 +++++++-- - hw/scsi/virtio-scsi.c | 9 +++++++-- - hw/virtio/virtio.c | 12 ++++++------ - include/hw/virtio/virtio.h | 1 + - 4 files changed, 21 insertions(+), 10 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index 4c357d2..c4e55fb 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -764,13 +764,16 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) - { - VirtIOBlockReq *req; - MultiReqBuffer mrb = {}; -+ bool suppress_notifications = virtio_queue_get_notification(vq); - bool progress = false; - - aio_context_acquire(blk_get_aio_context(s->blk)); - blk_io_plug(s->blk); - - do { -- virtio_queue_set_notification(vq, 0); -+ if (suppress_notifications) { -+ virtio_queue_set_notification(vq, 0); -+ } - - while ((req = virtio_blk_get_request(s, vq))) { - progress = true; -@@ -781,7 +784,9 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) - } - } - -- virtio_queue_set_notification(vq, 1); -+ if (suppress_notifications) { -+ virtio_queue_set_notification(vq, 1); -+ } - } while (!virtio_queue_empty(vq)); - - if (mrb.num_reqs) { -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 54108c0..e2cd1df 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -597,12 +597,15 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) - { - VirtIOSCSIReq *req, *next; - int ret = 0; -+ bool suppress_notifications = virtio_queue_get_notification(vq); - bool progress = false; - - QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); - - do { -- virtio_queue_set_notification(vq, 0); -+ if (suppress_notifications) { -+ virtio_queue_set_notification(vq, 0); -+ } - - while ((req = virtio_scsi_pop_req(s, vq))) { - progress = true; -@@ -622,7 +625,9 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) - } - } - -- virtio_queue_set_notification(vq, 1); -+ if (suppress_notifications) { -+ virtio_queue_set_notification(vq, 1); -+ } - } while (ret != -EINVAL && !virtio_queue_empty(vq)); - - QTAILQ_FOREACH_SAFE(req, &reqs, next, next) { -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index 04716b5..3211135 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -432,6 +432,11 @@ static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable) - } - } - -+bool virtio_queue_get_notification(VirtQueue *vq) -+{ -+ return vq->notification; -+} -+ - void virtio_queue_set_notification(VirtQueue *vq, int enable) - { - vq->notification = enable; -@@ -3384,17 +3389,12 @@ static bool virtio_queue_host_notifier_aio_poll(void *opaque) - { - EventNotifier *n = opaque; - VirtQueue *vq = container_of(n, VirtQueue, host_notifier); -- bool progress; - - if (!vq->vring.desc || virtio_queue_empty(vq)) { - return false; - } - -- progress = virtio_queue_notify_aio_vq(vq); -- -- /* In case the handler function re-enabled notifications */ -- virtio_queue_set_notification(vq, 0); -- return progress; -+ return virtio_queue_notify_aio_vq(vq); - } - - static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n) -diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h -index c32a815..6a20442 100644 ---- a/include/hw/virtio/virtio.h -+++ b/include/hw/virtio/virtio.h -@@ -224,6 +224,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id); - - void virtio_notify_config(VirtIODevice *vdev); - -+bool virtio_queue_get_notification(VirtQueue *vq); - void virtio_queue_set_notification(VirtQueue *vq, int enable); - - int virtio_queue_ready(VirtQueue *vq); --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch b/SOURCES/kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch deleted file mode 100644 index 9a69ed1..0000000 --- a/SOURCES/kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch +++ /dev/null @@ -1,60 +0,0 @@ -From c0cf6d8a1d3b9bf3928f37fcfd5aa8ae6f1338ca Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:36 +0100 -Subject: [PATCH 005/116] virtio-fs: fix MSI-X nvectors calculation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-2-dgilbert@redhat.com> -Patchwork-id: 93455 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 001/112] virtio-fs: fix MSI-X nvectors calculation -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -The following MSI-X vectors are required: - * VIRTIO Configuration Change - * hiprio virtqueue - * requests virtqueues - -Fix the calculation to reserve enough MSI-X vectors. Otherwise guest -drivers fall back to a sub-optional configuration where all virtqueues -share a single vector. - -This change does not break live migration compatibility since -vhost-user-fs-pci devices are not migratable yet. - -Reported-by: Vivek Goyal -Signed-off-by: Stefan Hajnoczi -Message-Id: <20191209110759.35227-1-stefanha@redhat.com> -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 366844f3d1329c6423dd752891a28ccb3ee8fddd) -Signed-off-by: Miroslav Rezanina ---- - hw/virtio/vhost-user-fs-pci.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-user-fs-pci.c b/hw/virtio/vhost-user-fs-pci.c -index 933a3f2..e3a649d 100644 ---- a/hw/virtio/vhost-user-fs-pci.c -+++ b/hw/virtio/vhost-user-fs-pci.c -@@ -40,7 +40,8 @@ static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) - DeviceState *vdev = DEVICE(&dev->vdev); - - if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { -- vpci_dev->nvectors = dev->vdev.conf.num_request_queues + 1; -+ /* Also reserve config change and hiprio queue vectors */ -+ vpci_dev->nvectors = dev->vdev.conf.num_request_queues + 2; - } - - qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtio-make-virtio_delete_queue-idempotent.patch b/SOURCES/kvm-virtio-make-virtio_delete_queue-idempotent.patch deleted file mode 100644 index 16eb1da..0000000 --- a/SOURCES/kvm-virtio-make-virtio_delete_queue-idempotent.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 901e65fa6ccbadeacd6c585cf49a0a7cdafb4737 Mon Sep 17 00:00:00 2001 -From: Julia Suvorova -Date: Wed, 19 Feb 2020 21:34:29 +0000 -Subject: [PATCH 5/7] virtio: make virtio_delete_queue idempotent - -RH-Author: Julia Suvorova -Message-id: <20200219213431.11913-3-jusual@redhat.com> -Patchwork-id: 93981 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/4] virtio: make virtio_delete_queue idempotent -Bugzilla: 1791590 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Michael S. Tsirkin - -From: "Michael S. Tsirkin" - -Let's make sure calling this twice is harmless - -no known instances, but seems safer. - -Suggested-by: Pan Nengyuan -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 8cd353ea0fbf0e334e015d833f612799be642296) -Signed-off-by: Danilo C. L. de Paula ---- - hw/virtio/virtio.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index d63a369..e6a9ba4 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -2342,6 +2342,7 @@ void virtio_delete_queue(VirtQueue *vq) - vq->handle_output = NULL; - vq->handle_aio_output = NULL; - g_free(vq->used_elems); -+ vq->used_elems = NULL; - } - - void virtio_del_queue(VirtIODevice *vdev, int n) --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch b/SOURCES/kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch deleted file mode 100644 index c21c699..0000000 --- a/SOURCES/kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 2f494c41715193522c52eafc6af2a5e33f88ceb9 Mon Sep 17 00:00:00 2001 -From: Julia Suvorova -Date: Wed, 19 Feb 2020 21:34:31 +0000 -Subject: [PATCH 7/7] virtio-net: delete also control queue when TX/RX deleted - -RH-Author: Julia Suvorova -Message-id: <20200219213431.11913-5-jusual@redhat.com> -Patchwork-id: 93983 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/4] virtio-net: delete also control queue when TX/RX deleted -Bugzilla: 1791590 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Michael S. Tsirkin - -From: Yuri Benditovich - -https://bugzilla.redhat.com/show_bug.cgi?id=1708480 -If the control queue is not deleted together with TX/RX, it -later will be ignored in freeing cache resources and hot -unplug will not be completed. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Yuri Benditovich -Message-Id: <20191226043649.14481-3-yuri.benditovich@daynix.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit d945d9f1731244ef341f74ede93120fc9de35913) -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index db3d7c3..f325440 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3101,7 +3101,8 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) - for (i = 0; i < max_queues; i++) { - virtio_net_del_queue(n, i); - } -- -+ /* delete also control vq */ -+ virtio_del_queue(vdev, max_queues * 2); - qemu_announce_timer_del(&n->announce_timer, false); - g_free(n->vqs); - qemu_del_nic(n->nic); --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtio-net-fix-removal-of-failover-device.patch b/SOURCES/kvm-virtio-net-fix-removal-of-failover-device.patch deleted file mode 100644 index 6044f3d..0000000 --- a/SOURCES/kvm-virtio-net-fix-removal-of-failover-device.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 92fb4f6cdde32652352a0a831a2ba815701a4014 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Fri, 3 Jul 2020 12:37:05 -0400 -Subject: [PATCH 4/4] virtio-net: fix removal of failover device - -RH-Author: Juan Quintela -Message-id: <20200703123705.7175-2-quintela@redhat.com> -Patchwork-id: 97901 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/1] virtio-net: fix removal of failover device -Bugzilla: -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Laurent Vivier -RH-Acked-by: Dr. David Alan Gilbert - -If you have a networking device and its virtio failover device, and -you remove them in this order: -- virtio device -- the real device - -You get qemu crash. -See bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1820120 - -Bug exist on qemu 4.2 and 5.0. -But in 5.0 don't shows because commit -77b06bba62034a87cc61a9c8de1309ae3e527d97 - -somehow papers over it. - -CC: Jason Wang -CC: Michael S. Tsirkin - -Signed-off-by: Juan Quintela -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index f325440d01..dabeb9e720 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3091,6 +3091,7 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) - g_free(n->vlans); - - if (n->failover) { -+ device_listener_unregister(&n->primary_listener); - g_free(n->primary_device_id); - g_free(n->standby_id); - qobject_unref(n->primary_device_dict); --- -2.27.0 - diff --git a/SOURCES/kvm-virtio-reset-region-cache-when-on-queue-deletion.patch b/SOURCES/kvm-virtio-reset-region-cache-when-on-queue-deletion.patch deleted file mode 100644 index c9f1086..0000000 --- a/SOURCES/kvm-virtio-reset-region-cache-when-on-queue-deletion.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 8bf4f561262d9282cebdb3418cdb9a69c92216a0 Mon Sep 17 00:00:00 2001 -From: Julia Suvorova -Date: Wed, 19 Feb 2020 21:34:30 +0000 -Subject: [PATCH 6/7] virtio: reset region cache when on queue deletion - -RH-Author: Julia Suvorova -Message-id: <20200219213431.11913-4-jusual@redhat.com> -Patchwork-id: 93982 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/4] virtio: reset region cache when on queue deletion -Bugzilla: 1791590 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Michael S. Tsirkin - -From: Yuri Benditovich - -https://bugzilla.redhat.com/show_bug.cgi?id=1708480 -Fix leak of region reference that prevents complete -device deletion on hot unplug. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Yuri Benditovich -Message-Id: <20191226043649.14481-2-yuri.benditovich@daynix.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 421afd2fe8dd4603216cbf36081877c391f5a2a4) -Signed-off-by: Danilo C. L. de Paula ---- - hw/virtio/virtio.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index e6a9ba4..f644d9a 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -2343,6 +2343,7 @@ void virtio_delete_queue(VirtQueue *vq) - vq->handle_aio_output = NULL; - g_free(vq->used_elems); - vq->used_elems = NULL; -+ virtio_virtqueue_reset_region_cache(vq); - } - - void virtio_del_queue(VirtIODevice *vdev, int n) --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofs-Add-maintainers-entry.patch b/SOURCES/kvm-virtiofs-Add-maintainers-entry.patch deleted file mode 100644 index fec9371..0000000 --- a/SOURCES/kvm-virtiofs-Add-maintainers-entry.patch +++ /dev/null @@ -1,52 +0,0 @@ -From f4144443eacceb04823ee72cb2d4f9f841f05495 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:11 +0100 -Subject: [PATCH 040/116] virtiofs: Add maintainers entry -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-37-dgilbert@redhat.com> -Patchwork-id: 93491 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 036/112] virtiofs: Add maintainers entry -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Tested-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit bad7d2c3ad1af9344df035aedaf8e0967a543070) -Signed-off-by: Miroslav Rezanina ---- - MAINTAINERS | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/MAINTAINERS b/MAINTAINERS -index 5e5e3e5..d1b3e26 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -1575,6 +1575,14 @@ T: git https://github.com/cohuck/qemu.git s390-next - T: git https://github.com/borntraeger/qemu.git s390-next - L: qemu-s390x@nongnu.org - -+virtiofs -+M: Dr. David Alan Gilbert -+M: Stefan Hajnoczi -+S: Supported -+F: tools/virtiofsd/* -+F: hw/virtio/vhost-user-fs* -+F: include/hw/virtio/vhost-user-fs.h -+ - virtio-input - M: Gerd Hoffmann - S: Maintained --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch b/SOURCES/kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch deleted file mode 100644 index a2b91be..0000000 --- a/SOURCES/kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch +++ /dev/null @@ -1,86 +0,0 @@ -From 4d9106acfd7ed9e4d197ddf9f22b79ba6c8afdd8 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:38 +0100 -Subject: [PATCH 067/116] virtiofsd: Add ID to the log with FUSE_LOG_DEBUG - level -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-64-dgilbert@redhat.com> -Patchwork-id: 93514 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 063/112] virtiofsd: Add ID to the log with FUSE_LOG_DEBUG level -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Masayoshi Mizuma - -virtiofsd has some threads, so we see a lot of logs with debug option. -It would be useful for debugging if we can identify the specific thread -from the log. - -Add ID, which is got by gettid(), to the log with FUSE_LOG_DEBUG level -so that we can grep the specific thread. - -The log is like as: - - ]# ./virtiofsd -d -o vhost_user_socket=/tmp/vhostqemu0 -o source=/tmp/share0 -o cache=auto - ... - [ID: 00000097] unique: 12696, success, outsize: 120 - [ID: 00000097] virtio_send_msg: elem 18: with 2 in desc of length 120 - [ID: 00000003] fv_queue_thread: Got queue event on Queue 1 - [ID: 00000003] fv_queue_thread: Queue 1 gave evalue: 1 available: in: 65552 out: 80 - [ID: 00000003] fv_queue_thread: Waiting for Queue 1 event - [ID: 00000071] fv_queue_worker: elem 33: with 2 out desc of length 80 bad_in_num=0 bad_out_num=0 - [ID: 00000071] unique: 12694, opcode: READ (15), nodeid: 2, insize: 80, pid: 2014 - [ID: 00000071] lo_read(ino=2, size=65536, off=131072) - -Signed-off-by: Masayoshi Mizuma - -Signed-off-by: Dr. David Alan Gilbert - added rework as suggested by Daniel P. Berrangé during review -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 36f3846902bd41413f6c0bf797dee509028c29f4) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index ff6910f..f08324f 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -43,6 +43,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -2268,10 +2269,17 @@ static void setup_nofile_rlimit(void) - - static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) - { -+ g_autofree char *localfmt = NULL; -+ - if (current_log_level < level) { - return; - } - -+ if (current_log_level == FUSE_LOG_DEBUG) { -+ localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), fmt); -+ fmt = localfmt; -+ } -+ - if (use_syslog) { - int priority = LOG_ERR; - switch (level) { --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch b/SOURCES/kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch deleted file mode 100644 index b017bf4..0000000 --- a/SOURCES/kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 709408de33112d32b7c6675f8c9320b8bebccd58 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:05 +0100 -Subject: [PATCH 034/116] virtiofsd: Add Makefile wiring for virtiofsd contrib -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-31-dgilbert@redhat.com> -Patchwork-id: 93482 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 030/112] virtiofsd: Add Makefile wiring for virtiofsd contrib -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Wire up the building of the virtiofsd in tools. - -virtiofsd relies on Linux-specific system calls and seccomp. Anyone -wishing to port it to other host operating systems should do so -carefully and without reducing security. - -Only allow building on Linux hosts. - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Liam Merwick -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 81bfc42dcf473bc8d3790622633410da72d8e622) -Signed-off-by: Miroslav Rezanina ---- - Makefile | 10 ++++++++++ - Makefile.objs | 1 + - tools/virtiofsd/Makefile.objs | 9 +++++++++ - 3 files changed, 20 insertions(+) - create mode 100644 tools/virtiofsd/Makefile.objs - -diff --git a/Makefile b/Makefile -index 4254950..1526775 100644 ---- a/Makefile -+++ b/Makefile -@@ -330,6 +330,10 @@ endif - endif - endif - -+ifdef CONFIG_LINUX -+HELPERS-y += virtiofsd$(EXESUF) -+endif -+ - # Sphinx does not allow building manuals into the same directory as - # the source files, so if we're doing an in-tree QEMU build we must - # build the manuals into a subdirectory (and then install them from -@@ -430,6 +434,7 @@ dummy := $(call unnest-vars,, \ - elf2dmp-obj-y \ - ivshmem-client-obj-y \ - ivshmem-server-obj-y \ -+ virtiofsd-obj-y \ - rdmacm-mux-obj-y \ - libvhost-user-obj-y \ - vhost-user-scsi-obj-y \ -@@ -675,6 +680,11 @@ rdmacm-mux$(EXESUF): LIBS += "-libumad" - rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS) - $(call LINK, $^) - -+ifdef CONFIG_LINUX # relies on Linux-specific syscalls -+virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS) -+ $(call LINK, $^) -+endif -+ - vhost-user-gpu$(EXESUF): $(vhost-user-gpu-obj-y) $(libvhost-user-obj-y) libqemuutil.a libqemustub.a - $(call LINK, $^) - -diff --git a/Makefile.objs b/Makefile.objs -index fcf63e1..1a8f288 100644 ---- a/Makefile.objs -+++ b/Makefile.objs -@@ -125,6 +125,7 @@ vhost-user-blk-obj-y = contrib/vhost-user-blk/ - rdmacm-mux-obj-y = contrib/rdmacm-mux/ - vhost-user-input-obj-y = contrib/vhost-user-input/ - vhost-user-gpu-obj-y = contrib/vhost-user-gpu/ -+virtiofsd-obj-y = tools/virtiofsd/ - - ###################################################################### - trace-events-subdirs = -diff --git a/tools/virtiofsd/Makefile.objs b/tools/virtiofsd/Makefile.objs -new file mode 100644 -index 0000000..45a8075 ---- /dev/null -+++ b/tools/virtiofsd/Makefile.objs -@@ -0,0 +1,9 @@ -+virtiofsd-obj-y = buffer.o \ -+ fuse_opt.o \ -+ fuse_log.o \ -+ fuse_lowlevel.o \ -+ fuse_signals.o \ -+ fuse_virtio.o \ -+ helper.o \ -+ passthrough_ll.o -+ --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Add-auxiliary-.c-s.patch b/SOURCES/kvm-virtiofsd-Add-auxiliary-.c-s.patch deleted file mode 100644 index 90150d9..0000000 --- a/SOURCES/kvm-virtiofsd-Add-auxiliary-.c-s.patch +++ /dev/null @@ -1,1387 +0,0 @@ -From 55b4059d6399c212109c758190e15b574accdd07 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:41 +0100 -Subject: [PATCH 010/116] virtiofsd: Add auxiliary .c's -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-7-dgilbert@redhat.com> -Patchwork-id: 93461 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 006/112] virtiofsd: Add auxiliary .c's -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Add most of the non-main .c files we need from upstream fuse-3.8.0 - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit ffcf8d9f8649c6e56b1193bbbc9c9f7388920043) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 321 ++++++++++++++++++++++++++++++ - tools/virtiofsd/fuse_log.c | 40 ++++ - tools/virtiofsd/fuse_opt.c | 423 +++++++++++++++++++++++++++++++++++++++ - tools/virtiofsd/fuse_signals.c | 91 +++++++++ - tools/virtiofsd/helper.c | 440 +++++++++++++++++++++++++++++++++++++++++ - 5 files changed, 1315 insertions(+) - create mode 100644 tools/virtiofsd/buffer.c - create mode 100644 tools/virtiofsd/fuse_log.c - create mode 100644 tools/virtiofsd/fuse_opt.c - create mode 100644 tools/virtiofsd/fuse_signals.c - create mode 100644 tools/virtiofsd/helper.c - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -new file mode 100644 -index 0000000..5ab9b87 ---- /dev/null -+++ b/tools/virtiofsd/buffer.c -@@ -0,0 +1,321 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2010 Miklos Szeredi -+ -+ Functions for dealing with `struct fuse_buf` and `struct -+ fuse_bufvec`. -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB -+*/ -+ -+#define _GNU_SOURCE -+ -+#include "config.h" -+#include "fuse_i.h" -+#include "fuse_lowlevel.h" -+#include -+#include -+#include -+#include -+ -+size_t fuse_buf_size(const struct fuse_bufvec *bufv) -+{ -+ size_t i; -+ size_t size = 0; -+ -+ for (i = 0; i < bufv->count; i++) { -+ if (bufv->buf[i].size == SIZE_MAX) -+ size = SIZE_MAX; -+ else -+ size += bufv->buf[i].size; -+ } -+ -+ return size; -+} -+ -+static size_t min_size(size_t s1, size_t s2) -+{ -+ return s1 < s2 ? s1 : s2; -+} -+ -+static ssize_t fuse_buf_write(const struct fuse_buf *dst, size_t dst_off, -+ const struct fuse_buf *src, size_t src_off, -+ size_t len) -+{ -+ ssize_t res = 0; -+ size_t copied = 0; -+ -+ while (len) { -+ if (dst->flags & FUSE_BUF_FD_SEEK) { -+ res = pwrite(dst->fd, (char *)src->mem + src_off, len, -+ dst->pos + dst_off); -+ } else { -+ res = write(dst->fd, (char *)src->mem + src_off, len); -+ } -+ if (res == -1) { -+ if (!copied) -+ return -errno; -+ break; -+ } -+ if (res == 0) -+ break; -+ -+ copied += res; -+ if (!(dst->flags & FUSE_BUF_FD_RETRY)) -+ break; -+ -+ src_off += res; -+ dst_off += res; -+ len -= res; -+ } -+ -+ return copied; -+} -+ -+static ssize_t fuse_buf_read(const struct fuse_buf *dst, size_t dst_off, -+ const struct fuse_buf *src, size_t src_off, -+ size_t len) -+{ -+ ssize_t res = 0; -+ size_t copied = 0; -+ -+ while (len) { -+ if (src->flags & FUSE_BUF_FD_SEEK) { -+ res = pread(src->fd, (char *)dst->mem + dst_off, len, -+ src->pos + src_off); -+ } else { -+ res = read(src->fd, (char *)dst->mem + dst_off, len); -+ } -+ if (res == -1) { -+ if (!copied) -+ return -errno; -+ break; -+ } -+ if (res == 0) -+ break; -+ -+ copied += res; -+ if (!(src->flags & FUSE_BUF_FD_RETRY)) -+ break; -+ -+ dst_off += res; -+ src_off += res; -+ len -= res; -+ } -+ -+ return copied; -+} -+ -+static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, -+ const struct fuse_buf *src, size_t src_off, -+ size_t len) -+{ -+ char buf[4096]; -+ struct fuse_buf tmp = { -+ .size = sizeof(buf), -+ .flags = 0, -+ }; -+ ssize_t res; -+ size_t copied = 0; -+ -+ tmp.mem = buf; -+ -+ while (len) { -+ size_t this_len = min_size(tmp.size, len); -+ size_t read_len; -+ -+ res = fuse_buf_read(&tmp, 0, src, src_off, this_len); -+ if (res < 0) { -+ if (!copied) -+ return res; -+ break; -+ } -+ if (res == 0) -+ break; -+ -+ read_len = res; -+ res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len); -+ if (res < 0) { -+ if (!copied) -+ return res; -+ break; -+ } -+ if (res == 0) -+ break; -+ -+ copied += res; -+ -+ if (res < this_len) -+ break; -+ -+ dst_off += res; -+ src_off += res; -+ len -= res; -+ } -+ -+ return copied; -+} -+ -+#ifdef HAVE_SPLICE -+static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, -+ const struct fuse_buf *src, size_t src_off, -+ size_t len, enum fuse_buf_copy_flags flags) -+{ -+ int splice_flags = 0; -+ off_t *srcpos = NULL; -+ off_t *dstpos = NULL; -+ off_t srcpos_val; -+ off_t dstpos_val; -+ ssize_t res; -+ size_t copied = 0; -+ -+ if (flags & FUSE_BUF_SPLICE_MOVE) -+ splice_flags |= SPLICE_F_MOVE; -+ if (flags & FUSE_BUF_SPLICE_NONBLOCK) -+ splice_flags |= SPLICE_F_NONBLOCK; -+ -+ if (src->flags & FUSE_BUF_FD_SEEK) { -+ srcpos_val = src->pos + src_off; -+ srcpos = &srcpos_val; -+ } -+ if (dst->flags & FUSE_BUF_FD_SEEK) { -+ dstpos_val = dst->pos + dst_off; -+ dstpos = &dstpos_val; -+ } -+ -+ while (len) { -+ res = splice(src->fd, srcpos, dst->fd, dstpos, len, -+ splice_flags); -+ if (res == -1) { -+ if (copied) -+ break; -+ -+ if (errno != EINVAL || (flags & FUSE_BUF_FORCE_SPLICE)) -+ return -errno; -+ -+ /* Maybe splice is not supported for this combination */ -+ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, -+ len); -+ } -+ if (res == 0) -+ break; -+ -+ copied += res; -+ if (!(src->flags & FUSE_BUF_FD_RETRY) && -+ !(dst->flags & FUSE_BUF_FD_RETRY)) { -+ break; -+ } -+ -+ len -= res; -+ } -+ -+ return copied; -+} -+#else -+static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, -+ const struct fuse_buf *src, size_t src_off, -+ size_t len, enum fuse_buf_copy_flags flags) -+{ -+ (void) flags; -+ -+ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); -+} -+#endif -+ -+ -+static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, -+ const struct fuse_buf *src, size_t src_off, -+ size_t len, enum fuse_buf_copy_flags flags) -+{ -+ int src_is_fd = src->flags & FUSE_BUF_IS_FD; -+ int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; -+ -+ if (!src_is_fd && !dst_is_fd) { -+ char *dstmem = (char *)dst->mem + dst_off; -+ char *srcmem = (char *)src->mem + src_off; -+ -+ if (dstmem != srcmem) { -+ if (dstmem + len <= srcmem || srcmem + len <= dstmem) -+ memcpy(dstmem, srcmem, len); -+ else -+ memmove(dstmem, srcmem, len); -+ } -+ -+ return len; -+ } else if (!src_is_fd) { -+ return fuse_buf_write(dst, dst_off, src, src_off, len); -+ } else if (!dst_is_fd) { -+ return fuse_buf_read(dst, dst_off, src, src_off, len); -+ } else if (flags & FUSE_BUF_NO_SPLICE) { -+ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); -+ } else { -+ return fuse_buf_splice(dst, dst_off, src, src_off, len, flags); -+ } -+} -+ -+static const struct fuse_buf *fuse_bufvec_current(struct fuse_bufvec *bufv) -+{ -+ if (bufv->idx < bufv->count) -+ return &bufv->buf[bufv->idx]; -+ else -+ return NULL; -+} -+ -+static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) -+{ -+ const struct fuse_buf *buf = fuse_bufvec_current(bufv); -+ -+ bufv->off += len; -+ assert(bufv->off <= buf->size); -+ if (bufv->off == buf->size) { -+ assert(bufv->idx < bufv->count); -+ bufv->idx++; -+ if (bufv->idx == bufv->count) -+ return 0; -+ bufv->off = 0; -+ } -+ return 1; -+} -+ -+ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, -+ enum fuse_buf_copy_flags flags) -+{ -+ size_t copied = 0; -+ -+ if (dstv == srcv) -+ return fuse_buf_size(dstv); -+ -+ for (;;) { -+ const struct fuse_buf *src = fuse_bufvec_current(srcv); -+ const struct fuse_buf *dst = fuse_bufvec_current(dstv); -+ size_t src_len; -+ size_t dst_len; -+ size_t len; -+ ssize_t res; -+ -+ if (src == NULL || dst == NULL) -+ break; -+ -+ src_len = src->size - srcv->off; -+ dst_len = dst->size - dstv->off; -+ len = min_size(src_len, dst_len); -+ -+ res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); -+ if (res < 0) { -+ if (!copied) -+ return res; -+ break; -+ } -+ copied += res; -+ -+ if (!fuse_bufvec_advance(srcv, res) || -+ !fuse_bufvec_advance(dstv, res)) -+ break; -+ -+ if (res < len) -+ break; -+ } -+ -+ return copied; -+} -diff --git a/tools/virtiofsd/fuse_log.c b/tools/virtiofsd/fuse_log.c -new file mode 100644 -index 0000000..0d268ab ---- /dev/null -+++ b/tools/virtiofsd/fuse_log.c -@@ -0,0 +1,40 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2019 Red Hat, Inc. -+ -+ Logging API. -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB -+*/ -+ -+#include "fuse_log.h" -+ -+#include -+#include -+ -+static void default_log_func( -+ __attribute__(( unused )) enum fuse_log_level level, -+ const char *fmt, va_list ap) -+{ -+ vfprintf(stderr, fmt, ap); -+} -+ -+static fuse_log_func_t log_func = default_log_func; -+ -+void fuse_set_log_func(fuse_log_func_t func) -+{ -+ if (!func) -+ func = default_log_func; -+ -+ log_func = func; -+} -+ -+void fuse_log(enum fuse_log_level level, const char *fmt, ...) -+{ -+ va_list ap; -+ -+ va_start(ap, fmt); -+ log_func(level, fmt, ap); -+ va_end(ap); -+} -diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c -new file mode 100644 -index 0000000..93066b9 ---- /dev/null -+++ b/tools/virtiofsd/fuse_opt.c -@@ -0,0 +1,423 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ Implementation of option parsing routines (dealing with `struct -+ fuse_args`). -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB -+*/ -+ -+#include "config.h" -+#include "fuse_i.h" -+#include "fuse_opt.h" -+#include "fuse_misc.h" -+ -+#include -+#include -+#include -+#include -+ -+struct fuse_opt_context { -+ void *data; -+ const struct fuse_opt *opt; -+ fuse_opt_proc_t proc; -+ int argctr; -+ int argc; -+ char **argv; -+ struct fuse_args outargs; -+ char *opts; -+ int nonopt; -+}; -+ -+void fuse_opt_free_args(struct fuse_args *args) -+{ -+ if (args) { -+ if (args->argv && args->allocated) { -+ int i; -+ for (i = 0; i < args->argc; i++) -+ free(args->argv[i]); -+ free(args->argv); -+ } -+ args->argc = 0; -+ args->argv = NULL; -+ args->allocated = 0; -+ } -+} -+ -+static int alloc_failed(void) -+{ -+ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -+ return -1; -+} -+ -+int fuse_opt_add_arg(struct fuse_args *args, const char *arg) -+{ -+ char **newargv; -+ char *newarg; -+ -+ assert(!args->argv || args->allocated); -+ -+ newarg = strdup(arg); -+ if (!newarg) -+ return alloc_failed(); -+ -+ newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *)); -+ if (!newargv) { -+ free(newarg); -+ return alloc_failed(); -+ } -+ -+ args->argv = newargv; -+ args->allocated = 1; -+ args->argv[args->argc++] = newarg; -+ args->argv[args->argc] = NULL; -+ return 0; -+} -+ -+static int fuse_opt_insert_arg_common(struct fuse_args *args, int pos, -+ const char *arg) -+{ -+ assert(pos <= args->argc); -+ if (fuse_opt_add_arg(args, arg) == -1) -+ return -1; -+ -+ if (pos != args->argc - 1) { -+ char *newarg = args->argv[args->argc - 1]; -+ memmove(&args->argv[pos + 1], &args->argv[pos], -+ sizeof(char *) * (args->argc - pos - 1)); -+ args->argv[pos] = newarg; -+ } -+ return 0; -+} -+ -+int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg) -+{ -+ return fuse_opt_insert_arg_common(args, pos, arg); -+} -+ -+static int next_arg(struct fuse_opt_context *ctx, const char *opt) -+{ -+ if (ctx->argctr + 1 >= ctx->argc) { -+ fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt); -+ return -1; -+ } -+ ctx->argctr++; -+ return 0; -+} -+ -+static int add_arg(struct fuse_opt_context *ctx, const char *arg) -+{ -+ return fuse_opt_add_arg(&ctx->outargs, arg); -+} -+ -+static int add_opt_common(char **opts, const char *opt, int esc) -+{ -+ unsigned oldlen = *opts ? strlen(*opts) : 0; -+ char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1); -+ -+ if (!d) -+ return alloc_failed(); -+ -+ *opts = d; -+ if (oldlen) { -+ d += oldlen; -+ *d++ = ','; -+ } -+ -+ for (; *opt; opt++) { -+ if (esc && (*opt == ',' || *opt == '\\')) -+ *d++ = '\\'; -+ *d++ = *opt; -+ } -+ *d = '\0'; -+ -+ return 0; -+} -+ -+int fuse_opt_add_opt(char **opts, const char *opt) -+{ -+ return add_opt_common(opts, opt, 0); -+} -+ -+int fuse_opt_add_opt_escaped(char **opts, const char *opt) -+{ -+ return add_opt_common(opts, opt, 1); -+} -+ -+static int add_opt(struct fuse_opt_context *ctx, const char *opt) -+{ -+ return add_opt_common(&ctx->opts, opt, 1); -+} -+ -+static int call_proc(struct fuse_opt_context *ctx, const char *arg, int key, -+ int iso) -+{ -+ if (key == FUSE_OPT_KEY_DISCARD) -+ return 0; -+ -+ if (key != FUSE_OPT_KEY_KEEP && ctx->proc) { -+ int res = ctx->proc(ctx->data, arg, key, &ctx->outargs); -+ if (res == -1 || !res) -+ return res; -+ } -+ if (iso) -+ return add_opt(ctx, arg); -+ else -+ return add_arg(ctx, arg); -+} -+ -+static int match_template(const char *t, const char *arg, unsigned *sepp) -+{ -+ int arglen = strlen(arg); -+ const char *sep = strchr(t, '='); -+ sep = sep ? sep : strchr(t, ' '); -+ if (sep && (!sep[1] || sep[1] == '%')) { -+ int tlen = sep - t; -+ if (sep[0] == '=') -+ tlen ++; -+ if (arglen >= tlen && strncmp(arg, t, tlen) == 0) { -+ *sepp = sep - t; -+ return 1; -+ } -+ } -+ if (strcmp(t, arg) == 0) { -+ *sepp = 0; -+ return 1; -+ } -+ return 0; -+} -+ -+static const struct fuse_opt *find_opt(const struct fuse_opt *opt, -+ const char *arg, unsigned *sepp) -+{ -+ for (; opt && opt->templ; opt++) -+ if (match_template(opt->templ, arg, sepp)) -+ return opt; -+ return NULL; -+} -+ -+int fuse_opt_match(const struct fuse_opt *opts, const char *opt) -+{ -+ unsigned dummy; -+ return find_opt(opts, opt, &dummy) ? 1 : 0; -+} -+ -+static int process_opt_param(void *var, const char *format, const char *param, -+ const char *arg) -+{ -+ assert(format[0] == '%'); -+ if (format[1] == 's') { -+ char **s = var; -+ char *copy = strdup(param); -+ if (!copy) -+ return alloc_failed(); -+ -+ free(*s); -+ *s = copy; -+ } else { -+ if (sscanf(param, format, var) != 1) { -+ fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n", arg); -+ return -1; -+ } -+ } -+ return 0; -+} -+ -+static int process_opt(struct fuse_opt_context *ctx, -+ const struct fuse_opt *opt, unsigned sep, -+ const char *arg, int iso) -+{ -+ if (opt->offset == -1U) { -+ if (call_proc(ctx, arg, opt->value, iso) == -1) -+ return -1; -+ } else { -+ void *var = (char *)ctx->data + opt->offset; -+ if (sep && opt->templ[sep + 1]) { -+ const char *param = arg + sep; -+ if (opt->templ[sep] == '=') -+ param ++; -+ if (process_opt_param(var, opt->templ + sep + 1, -+ param, arg) == -1) -+ return -1; -+ } else -+ *(int *)var = opt->value; -+ } -+ return 0; -+} -+ -+static int process_opt_sep_arg(struct fuse_opt_context *ctx, -+ const struct fuse_opt *opt, unsigned sep, -+ const char *arg, int iso) -+{ -+ int res; -+ char *newarg; -+ char *param; -+ -+ if (next_arg(ctx, arg) == -1) -+ return -1; -+ -+ param = ctx->argv[ctx->argctr]; -+ newarg = malloc(sep + strlen(param) + 1); -+ if (!newarg) -+ return alloc_failed(); -+ -+ memcpy(newarg, arg, sep); -+ strcpy(newarg + sep, param); -+ res = process_opt(ctx, opt, sep, newarg, iso); -+ free(newarg); -+ -+ return res; -+} -+ -+static int process_gopt(struct fuse_opt_context *ctx, const char *arg, int iso) -+{ -+ unsigned sep; -+ const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep); -+ if (opt) { -+ for (; opt; opt = find_opt(opt + 1, arg, &sep)) { -+ int res; -+ if (sep && opt->templ[sep] == ' ' && !arg[sep]) -+ res = process_opt_sep_arg(ctx, opt, sep, arg, -+ iso); -+ else -+ res = process_opt(ctx, opt, sep, arg, iso); -+ if (res == -1) -+ return -1; -+ } -+ return 0; -+ } else -+ return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso); -+} -+ -+static int process_real_option_group(struct fuse_opt_context *ctx, char *opts) -+{ -+ char *s = opts; -+ char *d = s; -+ int end = 0; -+ -+ while (!end) { -+ if (*s == '\0') -+ end = 1; -+ if (*s == ',' || end) { -+ int res; -+ -+ *d = '\0'; -+ res = process_gopt(ctx, opts, 1); -+ if (res == -1) -+ return -1; -+ d = opts; -+ } else { -+ if (s[0] == '\\' && s[1] != '\0') { -+ s++; -+ if (s[0] >= '0' && s[0] <= '3' && -+ s[1] >= '0' && s[1] <= '7' && -+ s[2] >= '0' && s[2] <= '7') { -+ *d++ = (s[0] - '0') * 0100 + -+ (s[1] - '0') * 0010 + -+ (s[2] - '0'); -+ s += 2; -+ } else { -+ *d++ = *s; -+ } -+ } else { -+ *d++ = *s; -+ } -+ } -+ s++; -+ } -+ -+ return 0; -+} -+ -+static int process_option_group(struct fuse_opt_context *ctx, const char *opts) -+{ -+ int res; -+ char *copy = strdup(opts); -+ -+ if (!copy) { -+ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -+ return -1; -+ } -+ res = process_real_option_group(ctx, copy); -+ free(copy); -+ return res; -+} -+ -+static int process_one(struct fuse_opt_context *ctx, const char *arg) -+{ -+ if (ctx->nonopt || arg[0] != '-') -+ return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0); -+ else if (arg[1] == 'o') { -+ if (arg[2]) -+ return process_option_group(ctx, arg + 2); -+ else { -+ if (next_arg(ctx, arg) == -1) -+ return -1; -+ -+ return process_option_group(ctx, -+ ctx->argv[ctx->argctr]); -+ } -+ } else if (arg[1] == '-' && !arg[2]) { -+ if (add_arg(ctx, arg) == -1) -+ return -1; -+ ctx->nonopt = ctx->outargs.argc; -+ return 0; -+ } else -+ return process_gopt(ctx, arg, 0); -+} -+ -+static int opt_parse(struct fuse_opt_context *ctx) -+{ -+ if (ctx->argc) { -+ if (add_arg(ctx, ctx->argv[0]) == -1) -+ return -1; -+ } -+ -+ for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) -+ if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) -+ return -1; -+ -+ if (ctx->opts) { -+ if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 || -+ fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) -+ return -1; -+ } -+ -+ /* If option separator ("--") is the last argument, remove it */ -+ if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc && -+ strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) { -+ free(ctx->outargs.argv[ctx->outargs.argc - 1]); -+ ctx->outargs.argv[--ctx->outargs.argc] = NULL; -+ } -+ -+ return 0; -+} -+ -+int fuse_opt_parse(struct fuse_args *args, void *data, -+ const struct fuse_opt opts[], fuse_opt_proc_t proc) -+{ -+ int res; -+ struct fuse_opt_context ctx = { -+ .data = data, -+ .opt = opts, -+ .proc = proc, -+ }; -+ -+ if (!args || !args->argv || !args->argc) -+ return 0; -+ -+ ctx.argc = args->argc; -+ ctx.argv = args->argv; -+ -+ res = opt_parse(&ctx); -+ if (res != -1) { -+ struct fuse_args tmp = *args; -+ *args = ctx.outargs; -+ ctx.outargs = tmp; -+ } -+ free(ctx.opts); -+ fuse_opt_free_args(&ctx.outargs); -+ return res; -+} -diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c -new file mode 100644 -index 0000000..4271947 ---- /dev/null -+++ b/tools/virtiofsd/fuse_signals.c -@@ -0,0 +1,91 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ Utility functions for setting signal handlers. -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB -+*/ -+ -+#include "config.h" -+#include "fuse_lowlevel.h" -+#include "fuse_i.h" -+ -+#include -+#include -+#include -+#include -+ -+static struct fuse_session *fuse_instance; -+ -+static void exit_handler(int sig) -+{ -+ if (fuse_instance) { -+ fuse_session_exit(fuse_instance); -+ if(sig <= 0) { -+ fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n"); -+ abort(); -+ } -+ fuse_instance->error = sig; -+ } -+} -+ -+static void do_nothing(int sig) -+{ -+ (void) sig; -+} -+ -+static int set_one_signal_handler(int sig, void (*handler)(int), int remove) -+{ -+ struct sigaction sa; -+ struct sigaction old_sa; -+ -+ memset(&sa, 0, sizeof(struct sigaction)); -+ sa.sa_handler = remove ? SIG_DFL : handler; -+ sigemptyset(&(sa.sa_mask)); -+ sa.sa_flags = 0; -+ -+ if (sigaction(sig, NULL, &old_sa) == -1) { -+ perror("fuse: cannot get old signal handler"); -+ return -1; -+ } -+ -+ if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && -+ sigaction(sig, &sa, NULL) == -1) { -+ perror("fuse: cannot set signal handler"); -+ return -1; -+ } -+ return 0; -+} -+ -+int fuse_set_signal_handlers(struct fuse_session *se) -+{ -+ /* If we used SIG_IGN instead of the do_nothing function, -+ then we would be unable to tell if we set SIG_IGN (and -+ thus should reset to SIG_DFL in fuse_remove_signal_handlers) -+ or if it was already set to SIG_IGN (and should be left -+ untouched. */ -+ if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 || -+ set_one_signal_handler(SIGINT, exit_handler, 0) == -1 || -+ set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 || -+ set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) -+ return -1; -+ -+ fuse_instance = se; -+ return 0; -+} -+ -+void fuse_remove_signal_handlers(struct fuse_session *se) -+{ -+ if (fuse_instance != se) -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: fuse_remove_signal_handlers: unknown session\n"); -+ else -+ fuse_instance = NULL; -+ -+ set_one_signal_handler(SIGHUP, exit_handler, 1); -+ set_one_signal_handler(SIGINT, exit_handler, 1); -+ set_one_signal_handler(SIGTERM, exit_handler, 1); -+ set_one_signal_handler(SIGPIPE, do_nothing, 1); -+} -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -new file mode 100644 -index 0000000..64ff7ad ---- /dev/null -+++ b/tools/virtiofsd/helper.c -@@ -0,0 +1,440 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ Helper functions to create (simple) standalone programs. With the -+ aid of these functions it should be possible to create full FUSE -+ file system by implementing nothing but the request handlers. -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB. -+*/ -+ -+#include "config.h" -+#include "fuse_i.h" -+#include "fuse_misc.h" -+#include "fuse_opt.h" -+#include "fuse_lowlevel.h" -+#include "mount_util.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define FUSE_HELPER_OPT(t, p) \ -+ { t, offsetof(struct fuse_cmdline_opts, p), 1 } -+ -+static const struct fuse_opt fuse_helper_opts[] = { -+ FUSE_HELPER_OPT("-h", show_help), -+ FUSE_HELPER_OPT("--help", show_help), -+ FUSE_HELPER_OPT("-V", show_version), -+ FUSE_HELPER_OPT("--version", show_version), -+ FUSE_HELPER_OPT("-d", debug), -+ FUSE_HELPER_OPT("debug", debug), -+ FUSE_HELPER_OPT("-d", foreground), -+ FUSE_HELPER_OPT("debug", foreground), -+ FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), -+ FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), -+ FUSE_HELPER_OPT("-f", foreground), -+ FUSE_HELPER_OPT("-s", singlethread), -+ FUSE_HELPER_OPT("fsname=", nodefault_subtype), -+ FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), -+#ifndef __FreeBSD__ -+ FUSE_HELPER_OPT("subtype=", nodefault_subtype), -+ FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), -+#endif -+ FUSE_HELPER_OPT("clone_fd", clone_fd), -+ FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), -+ FUSE_OPT_END -+}; -+ -+struct fuse_conn_info_opts { -+ int atomic_o_trunc; -+ int no_remote_posix_lock; -+ int no_remote_flock; -+ int splice_write; -+ int splice_move; -+ int splice_read; -+ int no_splice_write; -+ int no_splice_move; -+ int no_splice_read; -+ int auto_inval_data; -+ int no_auto_inval_data; -+ int no_readdirplus; -+ int no_readdirplus_auto; -+ int async_dio; -+ int no_async_dio; -+ int writeback_cache; -+ int no_writeback_cache; -+ int async_read; -+ int sync_read; -+ unsigned max_write; -+ unsigned max_readahead; -+ unsigned max_background; -+ unsigned congestion_threshold; -+ unsigned time_gran; -+ int set_max_write; -+ int set_max_readahead; -+ int set_max_background; -+ int set_congestion_threshold; -+ int set_time_gran; -+}; -+ -+#define CONN_OPTION(t, p, v) \ -+ { t, offsetof(struct fuse_conn_info_opts, p), v } -+static const struct fuse_opt conn_info_opt_spec[] = { -+ CONN_OPTION("max_write=%u", max_write, 0), -+ CONN_OPTION("max_write=", set_max_write, 1), -+ CONN_OPTION("max_readahead=%u", max_readahead, 0), -+ CONN_OPTION("max_readahead=", set_max_readahead, 1), -+ CONN_OPTION("max_background=%u", max_background, 0), -+ CONN_OPTION("max_background=", set_max_background, 1), -+ CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0), -+ CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1), -+ CONN_OPTION("sync_read", sync_read, 1), -+ CONN_OPTION("async_read", async_read, 1), -+ CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1), -+ CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1), -+ CONN_OPTION("no_remote_lock", no_remote_flock, 1), -+ CONN_OPTION("no_remote_flock", no_remote_flock, 1), -+ CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1), -+ CONN_OPTION("splice_write", splice_write, 1), -+ CONN_OPTION("no_splice_write", no_splice_write, 1), -+ CONN_OPTION("splice_move", splice_move, 1), -+ CONN_OPTION("no_splice_move", no_splice_move, 1), -+ CONN_OPTION("splice_read", splice_read, 1), -+ CONN_OPTION("no_splice_read", no_splice_read, 1), -+ CONN_OPTION("auto_inval_data", auto_inval_data, 1), -+ CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1), -+ CONN_OPTION("readdirplus=no", no_readdirplus, 1), -+ CONN_OPTION("readdirplus=yes", no_readdirplus, 0), -+ CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1), -+ CONN_OPTION("readdirplus=auto", no_readdirplus, 0), -+ CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0), -+ CONN_OPTION("async_dio", async_dio, 1), -+ CONN_OPTION("no_async_dio", no_async_dio, 1), -+ CONN_OPTION("writeback_cache", writeback_cache, 1), -+ CONN_OPTION("no_writeback_cache", no_writeback_cache, 1), -+ CONN_OPTION("time_gran=%u", time_gran, 0), -+ CONN_OPTION("time_gran=", set_time_gran, 1), -+ FUSE_OPT_END -+}; -+ -+ -+void fuse_cmdline_help(void) -+{ -+ printf(" -h --help print help\n" -+ " -V --version print version\n" -+ " -d -o debug enable debug output (implies -f)\n" -+ " -f foreground operation\n" -+ " -s disable multi-threaded operation\n" -+ " -o clone_fd use separate fuse device fd for each thread\n" -+ " (may improve performance)\n" -+ " -o max_idle_threads the maximum number of idle worker threads\n" -+ " allowed (default: 10)\n"); -+} -+ -+static int fuse_helper_opt_proc(void *data, const char *arg, int key, -+ struct fuse_args *outargs) -+{ -+ (void) outargs; -+ struct fuse_cmdline_opts *opts = data; -+ -+ switch (key) { -+ case FUSE_OPT_KEY_NONOPT: -+ if (!opts->mountpoint) { -+ if (fuse_mnt_parse_fuse_fd(arg) != -1) { -+ return fuse_opt_add_opt(&opts->mountpoint, arg); -+ } -+ -+ char mountpoint[PATH_MAX] = ""; -+ if (realpath(arg, mountpoint) == NULL) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: bad mount point `%s': %s\n", -+ arg, strerror(errno)); -+ return -1; -+ } -+ return fuse_opt_add_opt(&opts->mountpoint, mountpoint); -+ } else { -+ fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); -+ return -1; -+ } -+ -+ default: -+ /* Pass through unknown options */ -+ return 1; -+ } -+} -+ -+/* Under FreeBSD, there is no subtype option so this -+ function actually sets the fsname */ -+static int add_default_subtype(const char *progname, struct fuse_args *args) -+{ -+ int res; -+ char *subtype_opt; -+ -+ const char *basename = strrchr(progname, '/'); -+ if (basename == NULL) -+ basename = progname; -+ else if (basename[1] != '\0') -+ basename++; -+ -+ subtype_opt = (char *) malloc(strlen(basename) + 64); -+ if (subtype_opt == NULL) { -+ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -+ return -1; -+ } -+#ifdef __FreeBSD__ -+ sprintf(subtype_opt, "-ofsname=%s", basename); -+#else -+ sprintf(subtype_opt, "-osubtype=%s", basename); -+#endif -+ res = fuse_opt_add_arg(args, subtype_opt); -+ free(subtype_opt); -+ return res; -+} -+ -+int fuse_parse_cmdline(struct fuse_args *args, -+ struct fuse_cmdline_opts *opts) -+{ -+ memset(opts, 0, sizeof(struct fuse_cmdline_opts)); -+ -+ opts->max_idle_threads = 10; -+ -+ if (fuse_opt_parse(args, opts, fuse_helper_opts, -+ fuse_helper_opt_proc) == -1) -+ return -1; -+ -+ /* *Linux*: if neither -o subtype nor -o fsname are specified, -+ set subtype to program's basename. -+ *FreeBSD*: if fsname is not specified, set to program's -+ basename. */ -+ if (!opts->nodefault_subtype) -+ if (add_default_subtype(args->argv[0], args) == -1) -+ return -1; -+ -+ return 0; -+} -+ -+ -+int fuse_daemonize(int foreground) -+{ -+ if (!foreground) { -+ int nullfd; -+ int waiter[2]; -+ char completed; -+ -+ if (pipe(waiter)) { -+ perror("fuse_daemonize: pipe"); -+ return -1; -+ } -+ -+ /* -+ * demonize current process by forking it and killing the -+ * parent. This makes current process as a child of 'init'. -+ */ -+ switch(fork()) { -+ case -1: -+ perror("fuse_daemonize: fork"); -+ return -1; -+ case 0: -+ break; -+ default: -+ (void) read(waiter[0], &completed, sizeof(completed)); -+ _exit(0); -+ } -+ -+ if (setsid() == -1) { -+ perror("fuse_daemonize: setsid"); -+ return -1; -+ } -+ -+ (void) chdir("/"); -+ -+ nullfd = open("/dev/null", O_RDWR, 0); -+ if (nullfd != -1) { -+ (void) dup2(nullfd, 0); -+ (void) dup2(nullfd, 1); -+ (void) dup2(nullfd, 2); -+ if (nullfd > 2) -+ close(nullfd); -+ } -+ -+ /* Propagate completion of daemon initialization */ -+ completed = 1; -+ (void) write(waiter[1], &completed, sizeof(completed)); -+ close(waiter[0]); -+ close(waiter[1]); -+ } else { -+ (void) chdir("/"); -+ } -+ return 0; -+} -+ -+int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, -+ size_t op_size, void *user_data) -+{ -+ struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -+ struct fuse *fuse; -+ struct fuse_cmdline_opts opts; -+ int res; -+ -+ if (fuse_parse_cmdline(&args, &opts) != 0) -+ return 1; -+ -+ if (opts.show_version) { -+ printf("FUSE library version %s\n", PACKAGE_VERSION); -+ fuse_lowlevel_version(); -+ res = 0; -+ goto out1; -+ } -+ -+ if (opts.show_help) { -+ if(args.argv[0][0] != '\0') -+ printf("usage: %s [options] \n\n", -+ args.argv[0]); -+ printf("FUSE options:\n"); -+ fuse_cmdline_help(); -+ fuse_lib_help(&args); -+ res = 0; -+ goto out1; -+ } -+ -+ if (!opts.show_help && -+ !opts.mountpoint) { -+ fuse_log(FUSE_LOG_ERR, "error: no mountpoint specified\n"); -+ res = 2; -+ goto out1; -+ } -+ -+ -+ fuse = fuse_new_31(&args, op, op_size, user_data); -+ if (fuse == NULL) { -+ res = 3; -+ goto out1; -+ } -+ -+ if (fuse_mount(fuse,opts.mountpoint) != 0) { -+ res = 4; -+ goto out2; -+ } -+ -+ if (fuse_daemonize(opts.foreground) != 0) { -+ res = 5; -+ goto out3; -+ } -+ -+ struct fuse_session *se = fuse_get_session(fuse); -+ if (fuse_set_signal_handlers(se) != 0) { -+ res = 6; -+ goto out3; -+ } -+ -+ if (opts.singlethread) -+ res = fuse_loop(fuse); -+ else { -+ struct fuse_loop_config loop_config; -+ loop_config.clone_fd = opts.clone_fd; -+ loop_config.max_idle_threads = opts.max_idle_threads; -+ res = fuse_loop_mt_32(fuse, &loop_config); -+ } -+ if (res) -+ res = 7; -+ -+ fuse_remove_signal_handlers(se); -+out3: -+ fuse_unmount(fuse); -+out2: -+ fuse_destroy(fuse); -+out1: -+ free(opts.mountpoint); -+ fuse_opt_free_args(&args); -+ return res; -+} -+ -+ -+void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, -+ struct fuse_conn_info *conn) -+{ -+ if(opts->set_max_write) -+ conn->max_write = opts->max_write; -+ if(opts->set_max_background) -+ conn->max_background = opts->max_background; -+ if(opts->set_congestion_threshold) -+ conn->congestion_threshold = opts->congestion_threshold; -+ if(opts->set_time_gran) -+ conn->time_gran = opts->time_gran; -+ if(opts->set_max_readahead) -+ conn->max_readahead = opts->max_readahead; -+ -+#define LL_ENABLE(cond,cap) \ -+ if (cond) conn->want |= (cap) -+#define LL_DISABLE(cond,cap) \ -+ if (cond) conn->want &= ~(cap) -+ -+ LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ); -+ LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ); -+ -+ LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE); -+ LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE); -+ -+ LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE); -+ LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE); -+ -+ LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); -+ LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); -+ -+ LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS); -+ LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO); -+ -+ LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO); -+ LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO); -+ -+ LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE); -+ LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE); -+ -+ LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ); -+ LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ); -+ -+ LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS); -+ LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS); -+} -+ -+struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args) -+{ -+ struct fuse_conn_info_opts *opts; -+ -+ opts = calloc(1, sizeof(struct fuse_conn_info_opts)); -+ if(opts == NULL) { -+ fuse_log(FUSE_LOG_ERR, "calloc failed\n"); -+ return NULL; -+ } -+ if(fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) { -+ free(opts); -+ return NULL; -+ } -+ return opts; -+} -+ -+int fuse_open_channel(const char *mountpoint, const char* options) -+{ -+ struct mount_opts *opts = NULL; -+ int fd = -1; -+ const char *argv[] = { "", "-o", options }; -+ int argc = sizeof(argv) / sizeof(argv[0]); -+ struct fuse_args args = FUSE_ARGS_INIT(argc, (char**) argv); -+ -+ opts = parse_mount_opts(&args); -+ if (opts == NULL) -+ return -1; -+ -+ fd = fuse_kern_mount(mountpoint, opts); -+ destroy_mount_opts(opts); -+ -+ return fd; -+} --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Add-fuse_lowlevel.c.patch b/SOURCES/kvm-virtiofsd-Add-fuse_lowlevel.c.patch deleted file mode 100644 index 1318fef..0000000 --- a/SOURCES/kvm-virtiofsd-Add-fuse_lowlevel.c.patch +++ /dev/null @@ -1,3172 +0,0 @@ -From f6c6830f772e8060255323d2a458cd0e774d9654 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:42 +0100 -Subject: [PATCH 011/116] virtiofsd: Add fuse_lowlevel.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-8-dgilbert@redhat.com> -Patchwork-id: 93456 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 007/112] virtiofsd: Add fuse_lowlevel.c -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -fuse_lowlevel is one of the largest files from the library -and does most of the work. Add it separately to keep the diff -sizes small. -Again this is from upstream fuse-3.8.0 - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 2de121f01e37e2fe98a4362f4abf7c0848697f76) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 3129 +++++++++++++++++++++++++++++++++++++++ - 1 file changed, 3129 insertions(+) - create mode 100644 tools/virtiofsd/fuse_lowlevel.c - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -new file mode 100644 -index 0000000..f2d7038 ---- /dev/null -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -0,0 +1,3129 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ Implementation of (most of) the low-level FUSE API. The session loop -+ functions are implemented in separate files. -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB -+*/ -+ -+#define _GNU_SOURCE -+ -+#include "config.h" -+#include "fuse_i.h" -+#include "fuse_kernel.h" -+#include "fuse_opt.h" -+#include "fuse_misc.h" -+#include "mount_util.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifndef F_LINUX_SPECIFIC_BASE -+#define F_LINUX_SPECIFIC_BASE 1024 -+#endif -+#ifndef F_SETPIPE_SZ -+#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) -+#endif -+ -+ -+#define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) -+#define OFFSET_MAX 0x7fffffffffffffffLL -+ -+#define container_of(ptr, type, member) ({ \ -+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \ -+ (type *)( (char *)__mptr - offsetof(type,member) );}) -+ -+struct fuse_pollhandle { -+ uint64_t kh; -+ struct fuse_session *se; -+}; -+ -+static size_t pagesize; -+ -+static __attribute__((constructor)) void fuse_ll_init_pagesize(void) -+{ -+ pagesize = getpagesize(); -+} -+ -+static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr) -+{ -+ attr->ino = stbuf->st_ino; -+ attr->mode = stbuf->st_mode; -+ attr->nlink = stbuf->st_nlink; -+ attr->uid = stbuf->st_uid; -+ attr->gid = stbuf->st_gid; -+ attr->rdev = stbuf->st_rdev; -+ attr->size = stbuf->st_size; -+ attr->blksize = stbuf->st_blksize; -+ attr->blocks = stbuf->st_blocks; -+ attr->atime = stbuf->st_atime; -+ attr->mtime = stbuf->st_mtime; -+ attr->ctime = stbuf->st_ctime; -+ attr->atimensec = ST_ATIM_NSEC(stbuf); -+ attr->mtimensec = ST_MTIM_NSEC(stbuf); -+ attr->ctimensec = ST_CTIM_NSEC(stbuf); -+} -+ -+static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf) -+{ -+ stbuf->st_mode = attr->mode; -+ stbuf->st_uid = attr->uid; -+ stbuf->st_gid = attr->gid; -+ stbuf->st_size = attr->size; -+ stbuf->st_atime = attr->atime; -+ stbuf->st_mtime = attr->mtime; -+ stbuf->st_ctime = attr->ctime; -+ ST_ATIM_NSEC_SET(stbuf, attr->atimensec); -+ ST_MTIM_NSEC_SET(stbuf, attr->mtimensec); -+ ST_CTIM_NSEC_SET(stbuf, attr->ctimensec); -+} -+ -+static size_t iov_length(const struct iovec *iov, size_t count) -+{ -+ size_t seg; -+ size_t ret = 0; -+ -+ for (seg = 0; seg < count; seg++) -+ ret += iov[seg].iov_len; -+ return ret; -+} -+ -+static void list_init_req(struct fuse_req *req) -+{ -+ req->next = req; -+ req->prev = req; -+} -+ -+static void list_del_req(struct fuse_req *req) -+{ -+ struct fuse_req *prev = req->prev; -+ struct fuse_req *next = req->next; -+ prev->next = next; -+ next->prev = prev; -+} -+ -+static void list_add_req(struct fuse_req *req, struct fuse_req *next) -+{ -+ struct fuse_req *prev = next->prev; -+ req->next = next; -+ req->prev = prev; -+ prev->next = req; -+ next->prev = req; -+} -+ -+static void destroy_req(fuse_req_t req) -+{ -+ pthread_mutex_destroy(&req->lock); -+ free(req); -+} -+ -+void fuse_free_req(fuse_req_t req) -+{ -+ int ctr; -+ struct fuse_session *se = req->se; -+ -+ pthread_mutex_lock(&se->lock); -+ req->u.ni.func = NULL; -+ req->u.ni.data = NULL; -+ list_del_req(req); -+ ctr = --req->ctr; -+ fuse_chan_put(req->ch); -+ req->ch = NULL; -+ pthread_mutex_unlock(&se->lock); -+ if (!ctr) -+ destroy_req(req); -+} -+ -+static struct fuse_req *fuse_ll_alloc_req(struct fuse_session *se) -+{ -+ struct fuse_req *req; -+ -+ req = (struct fuse_req *) calloc(1, sizeof(struct fuse_req)); -+ if (req == NULL) { -+ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n"); -+ } else { -+ req->se = se; -+ req->ctr = 1; -+ list_init_req(req); -+ fuse_mutex_init(&req->lock); -+ } -+ -+ return req; -+} -+ -+/* Send data. If *ch* is NULL, send via session master fd */ -+static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, -+ struct iovec *iov, int count) -+{ -+ struct fuse_out_header *out = iov[0].iov_base; -+ -+ out->len = iov_length(iov, count); -+ if (se->debug) { -+ if (out->unique == 0) { -+ fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", -+ out->error, out->len); -+ } else if (out->error) { -+ fuse_log(FUSE_LOG_DEBUG, -+ " unique: %llu, error: %i (%s), outsize: %i\n", -+ (unsigned long long) out->unique, out->error, -+ strerror(-out->error), out->len); -+ } else { -+ fuse_log(FUSE_LOG_DEBUG, -+ " unique: %llu, success, outsize: %i\n", -+ (unsigned long long) out->unique, out->len); -+ } -+ } -+ -+ ssize_t res = writev(ch ? ch->fd : se->fd, -+ iov, count); -+ int err = errno; -+ -+ if (res == -1) { -+ assert(se != NULL); -+ -+ /* ENOENT means the operation was interrupted */ -+ if (!fuse_session_exited(se) && err != ENOENT) -+ perror("fuse: writing device"); -+ return -err; -+ } -+ -+ return 0; -+} -+ -+ -+int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, -+ int count) -+{ -+ struct fuse_out_header out; -+ -+ if (error <= -1000 || error > 0) { -+ fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); -+ error = -ERANGE; -+ } -+ -+ out.unique = req->unique; -+ out.error = error; -+ -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(struct fuse_out_header); -+ -+ return fuse_send_msg(req->se, req->ch, iov, count); -+} -+ -+static int send_reply_iov(fuse_req_t req, int error, struct iovec *iov, -+ int count) -+{ -+ int res; -+ -+ res = fuse_send_reply_iov_nofree(req, error, iov, count); -+ fuse_free_req(req); -+ return res; -+} -+ -+static int send_reply(fuse_req_t req, int error, const void *arg, -+ size_t argsize) -+{ -+ struct iovec iov[2]; -+ int count = 1; -+ if (argsize) { -+ iov[1].iov_base = (void *) arg; -+ iov[1].iov_len = argsize; -+ count++; -+ } -+ return send_reply_iov(req, error, iov, count); -+} -+ -+int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count) -+{ -+ int res; -+ struct iovec *padded_iov; -+ -+ padded_iov = malloc((count + 1) * sizeof(struct iovec)); -+ if (padded_iov == NULL) -+ return fuse_reply_err(req, ENOMEM); -+ -+ memcpy(padded_iov + 1, iov, count * sizeof(struct iovec)); -+ count++; -+ -+ res = send_reply_iov(req, 0, padded_iov, count); -+ free(padded_iov); -+ -+ return res; -+} -+ -+ -+/* `buf` is allowed to be empty so that the proper size may be -+ allocated by the caller */ -+size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, -+ const char *name, const struct stat *stbuf, off_t off) -+{ -+ (void)req; -+ size_t namelen; -+ size_t entlen; -+ size_t entlen_padded; -+ struct fuse_dirent *dirent; -+ -+ namelen = strlen(name); -+ entlen = FUSE_NAME_OFFSET + namelen; -+ entlen_padded = FUSE_DIRENT_ALIGN(entlen); -+ -+ if ((buf == NULL) || (entlen_padded > bufsize)) -+ return entlen_padded; -+ -+ dirent = (struct fuse_dirent*) buf; -+ dirent->ino = stbuf->st_ino; -+ dirent->off = off; -+ dirent->namelen = namelen; -+ dirent->type = (stbuf->st_mode & S_IFMT) >> 12; -+ memcpy(dirent->name, name, namelen); -+ memset(dirent->name + namelen, 0, entlen_padded - entlen); -+ -+ return entlen_padded; -+} -+ -+static void convert_statfs(const struct statvfs *stbuf, -+ struct fuse_kstatfs *kstatfs) -+{ -+ kstatfs->bsize = stbuf->f_bsize; -+ kstatfs->frsize = stbuf->f_frsize; -+ kstatfs->blocks = stbuf->f_blocks; -+ kstatfs->bfree = stbuf->f_bfree; -+ kstatfs->bavail = stbuf->f_bavail; -+ kstatfs->files = stbuf->f_files; -+ kstatfs->ffree = stbuf->f_ffree; -+ kstatfs->namelen = stbuf->f_namemax; -+} -+ -+static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize) -+{ -+ return send_reply(req, 0, arg, argsize); -+} -+ -+int fuse_reply_err(fuse_req_t req, int err) -+{ -+ return send_reply(req, -err, NULL, 0); -+} -+ -+void fuse_reply_none(fuse_req_t req) -+{ -+ fuse_free_req(req); -+} -+ -+static unsigned long calc_timeout_sec(double t) -+{ -+ if (t > (double) ULONG_MAX) -+ return ULONG_MAX; -+ else if (t < 0.0) -+ return 0; -+ else -+ return (unsigned long) t; -+} -+ -+static unsigned int calc_timeout_nsec(double t) -+{ -+ double f = t - (double) calc_timeout_sec(t); -+ if (f < 0.0) -+ return 0; -+ else if (f >= 0.999999999) -+ return 999999999; -+ else -+ return (unsigned int) (f * 1.0e9); -+} -+ -+static void fill_entry(struct fuse_entry_out *arg, -+ const struct fuse_entry_param *e) -+{ -+ arg->nodeid = e->ino; -+ arg->generation = e->generation; -+ arg->entry_valid = calc_timeout_sec(e->entry_timeout); -+ arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); -+ arg->attr_valid = calc_timeout_sec(e->attr_timeout); -+ arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); -+ convert_stat(&e->attr, &arg->attr); -+} -+ -+/* `buf` is allowed to be empty so that the proper size may be -+ allocated by the caller */ -+size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, -+ const char *name, -+ const struct fuse_entry_param *e, off_t off) -+{ -+ (void)req; -+ size_t namelen; -+ size_t entlen; -+ size_t entlen_padded; -+ -+ namelen = strlen(name); -+ entlen = FUSE_NAME_OFFSET_DIRENTPLUS + namelen; -+ entlen_padded = FUSE_DIRENT_ALIGN(entlen); -+ if ((buf == NULL) || (entlen_padded > bufsize)) -+ return entlen_padded; -+ -+ struct fuse_direntplus *dp = (struct fuse_direntplus *) buf; -+ memset(&dp->entry_out, 0, sizeof(dp->entry_out)); -+ fill_entry(&dp->entry_out, e); -+ -+ struct fuse_dirent *dirent = &dp->dirent; -+ dirent->ino = e->attr.st_ino; -+ dirent->off = off; -+ dirent->namelen = namelen; -+ dirent->type = (e->attr.st_mode & S_IFMT) >> 12; -+ memcpy(dirent->name, name, namelen); -+ memset(dirent->name + namelen, 0, entlen_padded - entlen); -+ -+ return entlen_padded; -+} -+ -+static void fill_open(struct fuse_open_out *arg, -+ const struct fuse_file_info *f) -+{ -+ arg->fh = f->fh; -+ if (f->direct_io) -+ arg->open_flags |= FOPEN_DIRECT_IO; -+ if (f->keep_cache) -+ arg->open_flags |= FOPEN_KEEP_CACHE; -+ if (f->cache_readdir) -+ arg->open_flags |= FOPEN_CACHE_DIR; -+ if (f->nonseekable) -+ arg->open_flags |= FOPEN_NONSEEKABLE; -+} -+ -+int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) -+{ -+ struct fuse_entry_out arg; -+ size_t size = req->se->conn.proto_minor < 9 ? -+ FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(arg); -+ -+ /* before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant -+ negative entry */ -+ if (!e->ino && req->se->conn.proto_minor < 4) -+ return fuse_reply_err(req, ENOENT); -+ -+ memset(&arg, 0, sizeof(arg)); -+ fill_entry(&arg, e); -+ return send_reply_ok(req, &arg, size); -+} -+ -+int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, -+ const struct fuse_file_info *f) -+{ -+ char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; -+ size_t entrysize = req->se->conn.proto_minor < 9 ? -+ FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(struct fuse_entry_out); -+ struct fuse_entry_out *earg = (struct fuse_entry_out *) buf; -+ struct fuse_open_out *oarg = (struct fuse_open_out *) (buf + entrysize); -+ -+ memset(buf, 0, sizeof(buf)); -+ fill_entry(earg, e); -+ fill_open(oarg, f); -+ return send_reply_ok(req, buf, -+ entrysize + sizeof(struct fuse_open_out)); -+} -+ -+int fuse_reply_attr(fuse_req_t req, const struct stat *attr, -+ double attr_timeout) -+{ -+ struct fuse_attr_out arg; -+ size_t size = req->se->conn.proto_minor < 9 ? -+ FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.attr_valid = calc_timeout_sec(attr_timeout); -+ arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout); -+ convert_stat(attr, &arg.attr); -+ -+ return send_reply_ok(req, &arg, size); -+} -+ -+int fuse_reply_readlink(fuse_req_t req, const char *linkname) -+{ -+ return send_reply_ok(req, linkname, strlen(linkname)); -+} -+ -+int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *f) -+{ -+ struct fuse_open_out arg; -+ -+ memset(&arg, 0, sizeof(arg)); -+ fill_open(&arg, f); -+ return send_reply_ok(req, &arg, sizeof(arg)); -+} -+ -+int fuse_reply_write(fuse_req_t req, size_t count) -+{ -+ struct fuse_write_out arg; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.size = count; -+ -+ return send_reply_ok(req, &arg, sizeof(arg)); -+} -+ -+int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size) -+{ -+ return send_reply_ok(req, buf, size); -+} -+ -+static int fuse_send_data_iov_fallback(struct fuse_session *se, -+ struct fuse_chan *ch, -+ struct iovec *iov, int iov_count, -+ struct fuse_bufvec *buf, -+ size_t len) -+{ -+ struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); -+ void *mbuf; -+ int res; -+ -+ /* Optimize common case */ -+ if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && -+ !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { -+ /* FIXME: also avoid memory copy if there are multiple buffers -+ but none of them contain an fd */ -+ -+ iov[iov_count].iov_base = buf->buf[0].mem; -+ iov[iov_count].iov_len = len; -+ iov_count++; -+ return fuse_send_msg(se, ch, iov, iov_count); -+ } -+ -+ res = posix_memalign(&mbuf, pagesize, len); -+ if (res != 0) -+ return res; -+ -+ mem_buf.buf[0].mem = mbuf; -+ res = fuse_buf_copy(&mem_buf, buf, 0); -+ if (res < 0) { -+ free(mbuf); -+ return -res; -+ } -+ len = res; -+ -+ iov[iov_count].iov_base = mbuf; -+ iov[iov_count].iov_len = len; -+ iov_count++; -+ res = fuse_send_msg(se, ch, iov, iov_count); -+ free(mbuf); -+ -+ return res; -+} -+ -+struct fuse_ll_pipe { -+ size_t size; -+ int can_grow; -+ int pipe[2]; -+}; -+ -+static void fuse_ll_pipe_free(struct fuse_ll_pipe *llp) -+{ -+ close(llp->pipe[0]); -+ close(llp->pipe[1]); -+ free(llp); -+} -+ -+#ifdef HAVE_SPLICE -+#if !defined(HAVE_PIPE2) || !defined(O_CLOEXEC) -+static int fuse_pipe(int fds[2]) -+{ -+ int rv = pipe(fds); -+ -+ if (rv == -1) -+ return rv; -+ -+ if (fcntl(fds[0], F_SETFL, O_NONBLOCK) == -1 || -+ fcntl(fds[1], F_SETFL, O_NONBLOCK) == -1 || -+ fcntl(fds[0], F_SETFD, FD_CLOEXEC) == -1 || -+ fcntl(fds[1], F_SETFD, FD_CLOEXEC) == -1) { -+ close(fds[0]); -+ close(fds[1]); -+ rv = -1; -+ } -+ return rv; -+} -+#else -+static int fuse_pipe(int fds[2]) -+{ -+ return pipe2(fds, O_CLOEXEC | O_NONBLOCK); -+} -+#endif -+ -+static struct fuse_ll_pipe *fuse_ll_get_pipe(struct fuse_session *se) -+{ -+ struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); -+ if (llp == NULL) { -+ int res; -+ -+ llp = malloc(sizeof(struct fuse_ll_pipe)); -+ if (llp == NULL) -+ return NULL; -+ -+ res = fuse_pipe(llp->pipe); -+ if (res == -1) { -+ free(llp); -+ return NULL; -+ } -+ -+ /* -+ *the default size is 16 pages on linux -+ */ -+ llp->size = pagesize * 16; -+ llp->can_grow = 1; -+ -+ pthread_setspecific(se->pipe_key, llp); -+ } -+ -+ return llp; -+} -+#endif -+ -+static void fuse_ll_clear_pipe(struct fuse_session *se) -+{ -+ struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); -+ if (llp) { -+ pthread_setspecific(se->pipe_key, NULL); -+ fuse_ll_pipe_free(llp); -+ } -+} -+ -+#if defined(HAVE_SPLICE) && defined(HAVE_VMSPLICE) -+static int read_back(int fd, char *buf, size_t len) -+{ -+ int res; -+ -+ res = read(fd, buf, len); -+ if (res == -1) { -+ fuse_log(FUSE_LOG_ERR, "fuse: internal error: failed to read back from pipe: %s\n", strerror(errno)); -+ return -EIO; -+ } -+ if (res != len) { -+ fuse_log(FUSE_LOG_ERR, "fuse: internal error: short read back from pipe: %i from %zi\n", res, len); -+ return -EIO; -+ } -+ return 0; -+} -+ -+static int grow_pipe_to_max(int pipefd) -+{ -+ int max; -+ int res; -+ int maxfd; -+ char buf[32]; -+ -+ maxfd = open("/proc/sys/fs/pipe-max-size", O_RDONLY); -+ if (maxfd < 0) -+ return -errno; -+ -+ res = read(maxfd, buf, sizeof(buf) - 1); -+ if (res < 0) { -+ int saved_errno; -+ -+ saved_errno = errno; -+ close(maxfd); -+ return -saved_errno; -+ } -+ close(maxfd); -+ buf[res] = '\0'; -+ -+ max = atoi(buf); -+ res = fcntl(pipefd, F_SETPIPE_SZ, max); -+ if (res < 0) -+ return -errno; -+ return max; -+} -+ -+static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, -+ struct iovec *iov, int iov_count, -+ struct fuse_bufvec *buf, unsigned int flags) -+{ -+ int res; -+ size_t len = fuse_buf_size(buf); -+ struct fuse_out_header *out = iov[0].iov_base; -+ struct fuse_ll_pipe *llp; -+ int splice_flags; -+ size_t pipesize; -+ size_t total_fd_size; -+ size_t idx; -+ size_t headerlen; -+ struct fuse_bufvec pipe_buf = FUSE_BUFVEC_INIT(len); -+ -+ if (se->broken_splice_nonblock) -+ goto fallback; -+ -+ if (flags & FUSE_BUF_NO_SPLICE) -+ goto fallback; -+ -+ total_fd_size = 0; -+ for (idx = buf->idx; idx < buf->count; idx++) { -+ if (buf->buf[idx].flags & FUSE_BUF_IS_FD) { -+ total_fd_size = buf->buf[idx].size; -+ if (idx == buf->idx) -+ total_fd_size -= buf->off; -+ } -+ } -+ if (total_fd_size < 2 * pagesize) -+ goto fallback; -+ -+ if (se->conn.proto_minor < 14 || -+ !(se->conn.want & FUSE_CAP_SPLICE_WRITE)) -+ goto fallback; -+ -+ llp = fuse_ll_get_pipe(se); -+ if (llp == NULL) -+ goto fallback; -+ -+ -+ headerlen = iov_length(iov, iov_count); -+ -+ out->len = headerlen + len; -+ -+ /* -+ * Heuristic for the required pipe size, does not work if the -+ * source contains less than page size fragments -+ */ -+ pipesize = pagesize * (iov_count + buf->count + 1) + out->len; -+ -+ if (llp->size < pipesize) { -+ if (llp->can_grow) { -+ res = fcntl(llp->pipe[0], F_SETPIPE_SZ, pipesize); -+ if (res == -1) { -+ res = grow_pipe_to_max(llp->pipe[0]); -+ if (res > 0) -+ llp->size = res; -+ llp->can_grow = 0; -+ goto fallback; -+ } -+ llp->size = res; -+ } -+ if (llp->size < pipesize) -+ goto fallback; -+ } -+ -+ -+ res = vmsplice(llp->pipe[1], iov, iov_count, SPLICE_F_NONBLOCK); -+ if (res == -1) -+ goto fallback; -+ -+ if (res != headerlen) { -+ res = -EIO; -+ fuse_log(FUSE_LOG_ERR, "fuse: short vmsplice to pipe: %u/%zu\n", res, -+ headerlen); -+ goto clear_pipe; -+ } -+ -+ pipe_buf.buf[0].flags = FUSE_BUF_IS_FD; -+ pipe_buf.buf[0].fd = llp->pipe[1]; -+ -+ res = fuse_buf_copy(&pipe_buf, buf, -+ FUSE_BUF_FORCE_SPLICE | FUSE_BUF_SPLICE_NONBLOCK); -+ if (res < 0) { -+ if (res == -EAGAIN || res == -EINVAL) { -+ /* -+ * Should only get EAGAIN on kernels with -+ * broken SPLICE_F_NONBLOCK support (<= -+ * 2.6.35) where this error or a short read is -+ * returned even if the pipe itself is not -+ * full -+ * -+ * EINVAL might mean that splice can't handle -+ * this combination of input and output. -+ */ -+ if (res == -EAGAIN) -+ se->broken_splice_nonblock = 1; -+ -+ pthread_setspecific(se->pipe_key, NULL); -+ fuse_ll_pipe_free(llp); -+ goto fallback; -+ } -+ res = -res; -+ goto clear_pipe; -+ } -+ -+ if (res != 0 && res < len) { -+ struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); -+ void *mbuf; -+ size_t now_len = res; -+ /* -+ * For regular files a short count is either -+ * 1) due to EOF, or -+ * 2) because of broken SPLICE_F_NONBLOCK (see above) -+ * -+ * For other inputs it's possible that we overflowed -+ * the pipe because of small buffer fragments. -+ */ -+ -+ res = posix_memalign(&mbuf, pagesize, len); -+ if (res != 0) -+ goto clear_pipe; -+ -+ mem_buf.buf[0].mem = mbuf; -+ mem_buf.off = now_len; -+ res = fuse_buf_copy(&mem_buf, buf, 0); -+ if (res > 0) { -+ char *tmpbuf; -+ size_t extra_len = res; -+ /* -+ * Trickiest case: got more data. Need to get -+ * back the data from the pipe and then fall -+ * back to regular write. -+ */ -+ tmpbuf = malloc(headerlen); -+ if (tmpbuf == NULL) { -+ free(mbuf); -+ res = ENOMEM; -+ goto clear_pipe; -+ } -+ res = read_back(llp->pipe[0], tmpbuf, headerlen); -+ free(tmpbuf); -+ if (res != 0) { -+ free(mbuf); -+ goto clear_pipe; -+ } -+ res = read_back(llp->pipe[0], mbuf, now_len); -+ if (res != 0) { -+ free(mbuf); -+ goto clear_pipe; -+ } -+ len = now_len + extra_len; -+ iov[iov_count].iov_base = mbuf; -+ iov[iov_count].iov_len = len; -+ iov_count++; -+ res = fuse_send_msg(se, ch, iov, iov_count); -+ free(mbuf); -+ return res; -+ } -+ free(mbuf); -+ res = now_len; -+ } -+ len = res; -+ out->len = headerlen + len; -+ -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, -+ " unique: %llu, success, outsize: %i (splice)\n", -+ (unsigned long long) out->unique, out->len); -+ } -+ -+ splice_flags = 0; -+ if ((flags & FUSE_BUF_SPLICE_MOVE) && -+ (se->conn.want & FUSE_CAP_SPLICE_MOVE)) -+ splice_flags |= SPLICE_F_MOVE; -+ -+ res = splice(llp->pipe[0], NULL, ch ? ch->fd : se->fd, -+ NULL, out->len, splice_flags); -+ if (res == -1) { -+ res = -errno; -+ perror("fuse: splice from pipe"); -+ goto clear_pipe; -+ } -+ if (res != out->len) { -+ res = -EIO; -+ fuse_log(FUSE_LOG_ERR, "fuse: short splice from pipe: %u/%u\n", -+ res, out->len); -+ goto clear_pipe; -+ } -+ return 0; -+ -+clear_pipe: -+ fuse_ll_clear_pipe(se); -+ return res; -+ -+fallback: -+ return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); -+} -+#else -+static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, -+ struct iovec *iov, int iov_count, -+ struct fuse_bufvec *buf, unsigned int flags) -+{ -+ size_t len = fuse_buf_size(buf); -+ (void) flags; -+ -+ return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); -+} -+#endif -+ -+int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, -+ enum fuse_buf_copy_flags flags) -+{ -+ struct iovec iov[2]; -+ struct fuse_out_header out; -+ int res; -+ -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(struct fuse_out_header); -+ -+ out.unique = req->unique; -+ out.error = 0; -+ -+ res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); -+ if (res <= 0) { -+ fuse_free_req(req); -+ return res; -+ } else { -+ return fuse_reply_err(req, res); -+ } -+} -+ -+int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf) -+{ -+ struct fuse_statfs_out arg; -+ size_t size = req->se->conn.proto_minor < 4 ? -+ FUSE_COMPAT_STATFS_SIZE : sizeof(arg); -+ -+ memset(&arg, 0, sizeof(arg)); -+ convert_statfs(stbuf, &arg.st); -+ -+ return send_reply_ok(req, &arg, size); -+} -+ -+int fuse_reply_xattr(fuse_req_t req, size_t count) -+{ -+ struct fuse_getxattr_out arg; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.size = count; -+ -+ return send_reply_ok(req, &arg, sizeof(arg)); -+} -+ -+int fuse_reply_lock(fuse_req_t req, const struct flock *lock) -+{ -+ struct fuse_lk_out arg; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.lk.type = lock->l_type; -+ if (lock->l_type != F_UNLCK) { -+ arg.lk.start = lock->l_start; -+ if (lock->l_len == 0) -+ arg.lk.end = OFFSET_MAX; -+ else -+ arg.lk.end = lock->l_start + lock->l_len - 1; -+ } -+ arg.lk.pid = lock->l_pid; -+ return send_reply_ok(req, &arg, sizeof(arg)); -+} -+ -+int fuse_reply_bmap(fuse_req_t req, uint64_t idx) -+{ -+ struct fuse_bmap_out arg; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.block = idx; -+ -+ return send_reply_ok(req, &arg, sizeof(arg)); -+} -+ -+static struct fuse_ioctl_iovec *fuse_ioctl_iovec_copy(const struct iovec *iov, -+ size_t count) -+{ -+ struct fuse_ioctl_iovec *fiov; -+ size_t i; -+ -+ fiov = malloc(sizeof(fiov[0]) * count); -+ if (!fiov) -+ return NULL; -+ -+ for (i = 0; i < count; i++) { -+ fiov[i].base = (uintptr_t) iov[i].iov_base; -+ fiov[i].len = iov[i].iov_len; -+ } -+ -+ return fiov; -+} -+ -+int fuse_reply_ioctl_retry(fuse_req_t req, -+ const struct iovec *in_iov, size_t in_count, -+ const struct iovec *out_iov, size_t out_count) -+{ -+ struct fuse_ioctl_out arg; -+ struct fuse_ioctl_iovec *in_fiov = NULL; -+ struct fuse_ioctl_iovec *out_fiov = NULL; -+ struct iovec iov[4]; -+ size_t count = 1; -+ int res; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.flags |= FUSE_IOCTL_RETRY; -+ arg.in_iovs = in_count; -+ arg.out_iovs = out_count; -+ iov[count].iov_base = &arg; -+ iov[count].iov_len = sizeof(arg); -+ count++; -+ -+ if (req->se->conn.proto_minor < 16) { -+ if (in_count) { -+ iov[count].iov_base = (void *)in_iov; -+ iov[count].iov_len = sizeof(in_iov[0]) * in_count; -+ count++; -+ } -+ -+ if (out_count) { -+ iov[count].iov_base = (void *)out_iov; -+ iov[count].iov_len = sizeof(out_iov[0]) * out_count; -+ count++; -+ } -+ } else { -+ /* Can't handle non-compat 64bit ioctls on 32bit */ -+ if (sizeof(void *) == 4 && req->ioctl_64bit) { -+ res = fuse_reply_err(req, EINVAL); -+ goto out; -+ } -+ -+ if (in_count) { -+ in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); -+ if (!in_fiov) -+ goto enomem; -+ -+ iov[count].iov_base = (void *)in_fiov; -+ iov[count].iov_len = sizeof(in_fiov[0]) * in_count; -+ count++; -+ } -+ if (out_count) { -+ out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); -+ if (!out_fiov) -+ goto enomem; -+ -+ iov[count].iov_base = (void *)out_fiov; -+ iov[count].iov_len = sizeof(out_fiov[0]) * out_count; -+ count++; -+ } -+ } -+ -+ res = send_reply_iov(req, 0, iov, count); -+out: -+ free(in_fiov); -+ free(out_fiov); -+ -+ return res; -+ -+enomem: -+ res = fuse_reply_err(req, ENOMEM); -+ goto out; -+} -+ -+int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size) -+{ -+ struct fuse_ioctl_out arg; -+ struct iovec iov[3]; -+ size_t count = 1; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.result = result; -+ iov[count].iov_base = &arg; -+ iov[count].iov_len = sizeof(arg); -+ count++; -+ -+ if (size) { -+ iov[count].iov_base = (char *) buf; -+ iov[count].iov_len = size; -+ count++; -+ } -+ -+ return send_reply_iov(req, 0, iov, count); -+} -+ -+int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, -+ int count) -+{ -+ struct iovec *padded_iov; -+ struct fuse_ioctl_out arg; -+ int res; -+ -+ padded_iov = malloc((count + 2) * sizeof(struct iovec)); -+ if (padded_iov == NULL) -+ return fuse_reply_err(req, ENOMEM); -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.result = result; -+ padded_iov[1].iov_base = &arg; -+ padded_iov[1].iov_len = sizeof(arg); -+ -+ memcpy(&padded_iov[2], iov, count * sizeof(struct iovec)); -+ -+ res = send_reply_iov(req, 0, padded_iov, count + 2); -+ free(padded_iov); -+ -+ return res; -+} -+ -+int fuse_reply_poll(fuse_req_t req, unsigned revents) -+{ -+ struct fuse_poll_out arg; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.revents = revents; -+ -+ return send_reply_ok(req, &arg, sizeof(arg)); -+} -+ -+int fuse_reply_lseek(fuse_req_t req, off_t off) -+{ -+ struct fuse_lseek_out arg; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.offset = off; -+ -+ return send_reply_ok(req, &arg, sizeof(arg)); -+} -+ -+static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ char *name = (char *) inarg; -+ -+ if (req->se->op.lookup) -+ req->se->op.lookup(req, nodeid, name); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_forget_in *arg = (struct fuse_forget_in *) inarg; -+ -+ if (req->se->op.forget) -+ req->se->op.forget(req, nodeid, arg->nlookup); -+ else -+ fuse_reply_none(req); -+} -+ -+static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, -+ const void *inarg) -+{ -+ struct fuse_batch_forget_in *arg = (void *) inarg; -+ struct fuse_forget_one *param = (void *) PARAM(arg); -+ unsigned int i; -+ -+ (void) nodeid; -+ -+ if (req->se->op.forget_multi) { -+ req->se->op.forget_multi(req, arg->count, -+ (struct fuse_forget_data *) param); -+ } else if (req->se->op.forget) { -+ for (i = 0; i < arg->count; i++) { -+ struct fuse_forget_one *forget = ¶m[i]; -+ struct fuse_req *dummy_req; -+ -+ dummy_req = fuse_ll_alloc_req(req->se); -+ if (dummy_req == NULL) -+ break; -+ -+ dummy_req->unique = req->unique; -+ dummy_req->ctx = req->ctx; -+ dummy_req->ch = NULL; -+ -+ req->se->op.forget(dummy_req, forget->nodeid, -+ forget->nlookup); -+ } -+ fuse_reply_none(req); -+ } else { -+ fuse_reply_none(req); -+ } -+} -+ -+static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_file_info *fip = NULL; -+ struct fuse_file_info fi; -+ -+ if (req->se->conn.proto_minor >= 9) { -+ struct fuse_getattr_in *arg = (struct fuse_getattr_in *) inarg; -+ -+ if (arg->getattr_flags & FUSE_GETATTR_FH) { -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fip = &fi; -+ } -+ } -+ -+ if (req->se->op.getattr) -+ req->se->op.getattr(req, nodeid, fip); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_setattr_in *arg = (struct fuse_setattr_in *) inarg; -+ -+ if (req->se->op.setattr) { -+ struct fuse_file_info *fi = NULL; -+ struct fuse_file_info fi_store; -+ struct stat stbuf; -+ memset(&stbuf, 0, sizeof(stbuf)); -+ convert_attr(arg, &stbuf); -+ if (arg->valid & FATTR_FH) { -+ arg->valid &= ~FATTR_FH; -+ memset(&fi_store, 0, sizeof(fi_store)); -+ fi = &fi_store; -+ fi->fh = arg->fh; -+ } -+ arg->valid &= -+ FUSE_SET_ATTR_MODE | -+ FUSE_SET_ATTR_UID | -+ FUSE_SET_ATTR_GID | -+ FUSE_SET_ATTR_SIZE | -+ FUSE_SET_ATTR_ATIME | -+ FUSE_SET_ATTR_MTIME | -+ FUSE_SET_ATTR_ATIME_NOW | -+ FUSE_SET_ATTR_MTIME_NOW | -+ FUSE_SET_ATTR_CTIME; -+ -+ req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi); -+ } else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_access_in *arg = (struct fuse_access_in *) inarg; -+ -+ if (req->se->op.access) -+ req->se->op.access(req, nodeid, arg->mask); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ (void) inarg; -+ -+ if (req->se->op.readlink) -+ req->se->op.readlink(req, nodeid); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_mknod_in *arg = (struct fuse_mknod_in *) inarg; -+ char *name = PARAM(arg); -+ -+ if (req->se->conn.proto_minor >= 12) -+ req->ctx.umask = arg->umask; -+ else -+ name = (char *) inarg + FUSE_COMPAT_MKNOD_IN_SIZE; -+ -+ if (req->se->op.mknod) -+ req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *) inarg; -+ -+ if (req->se->conn.proto_minor >= 12) -+ req->ctx.umask = arg->umask; -+ -+ if (req->se->op.mkdir) -+ req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ char *name = (char *) inarg; -+ -+ if (req->se->op.unlink) -+ req->se->op.unlink(req, nodeid, name); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ char *name = (char *) inarg; -+ -+ if (req->se->op.rmdir) -+ req->se->op.rmdir(req, nodeid, name); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ char *name = (char *) inarg; -+ char *linkname = ((char *) inarg) + strlen((char *) inarg) + 1; -+ -+ if (req->se->op.symlink) -+ req->se->op.symlink(req, linkname, nodeid, name); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_rename_in *arg = (struct fuse_rename_in *) inarg; -+ char *oldname = PARAM(arg); -+ char *newname = oldname + strlen(oldname) + 1; -+ -+ if (req->se->op.rename) -+ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, -+ 0); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_rename2_in *arg = (struct fuse_rename2_in *) inarg; -+ char *oldname = PARAM(arg); -+ char *newname = oldname + strlen(oldname) + 1; -+ -+ if (req->se->op.rename) -+ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, -+ arg->flags); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_link(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_link_in *arg = (struct fuse_link_in *) inarg; -+ -+ if (req->se->op.link) -+ req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_create_in *arg = (struct fuse_create_in *) inarg; -+ -+ if (req->se->op.create) { -+ struct fuse_file_info fi; -+ char *name = PARAM(arg); -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; -+ -+ if (req->se->conn.proto_minor >= 12) -+ req->ctx.umask = arg->umask; -+ else -+ name = (char *) inarg + sizeof(struct fuse_open_in); -+ -+ req->se->op.create(req, nodeid, name, arg->mode, &fi); -+ } else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_open_in *arg = (struct fuse_open_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; -+ -+ if (req->se->op.open) -+ req->se->op.open(req, nodeid, &fi); -+ else -+ fuse_reply_open(req, &fi); -+} -+ -+static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_read_in *arg = (struct fuse_read_in *) inarg; -+ -+ if (req->se->op.read) { -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ if (req->se->conn.proto_minor >= 9) { -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ } -+ req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); -+ } else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_write_in *arg = (struct fuse_write_in *) inarg; -+ struct fuse_file_info fi; -+ char *param; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; -+ -+ if (req->se->conn.proto_minor < 9) { -+ param = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; -+ } else { -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ param = PARAM(arg); -+ } -+ -+ if (req->se->op.write) -+ req->se->op.write(req, nodeid, param, arg->size, -+ arg->offset, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, -+ const struct fuse_buf *ibuf) -+{ -+ struct fuse_session *se = req->se; -+ struct fuse_bufvec bufv = { -+ .buf[0] = *ibuf, -+ .count = 1, -+ }; -+ struct fuse_write_in *arg = (struct fuse_write_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; -+ -+ if (se->conn.proto_minor < 9) { -+ bufv.buf[0].mem = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; -+ bufv.buf[0].size -= sizeof(struct fuse_in_header) + -+ FUSE_COMPAT_WRITE_IN_SIZE; -+ assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); -+ } else { -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) -+ bufv.buf[0].mem = PARAM(arg); -+ -+ bufv.buf[0].size -= sizeof(struct fuse_in_header) + -+ sizeof(struct fuse_write_in); -+ } -+ if (bufv.buf[0].size < arg->size) { -+ fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); -+ fuse_reply_err(req, EIO); -+ goto out; -+ } -+ bufv.buf[0].size = arg->size; -+ -+ se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); -+ -+out: -+ /* Need to reset the pipe if ->write_buf() didn't consume all data */ -+ if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) -+ fuse_ll_clear_pipe(se); -+} -+ -+static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_flush_in *arg = (struct fuse_flush_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.flush = 1; -+ if (req->se->conn.proto_minor >= 7) -+ fi.lock_owner = arg->lock_owner; -+ -+ if (req->se->op.flush) -+ req->se->op.flush(req, nodeid, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_release_in *arg = (struct fuse_release_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; -+ fi.fh = arg->fh; -+ if (req->se->conn.proto_minor >= 8) { -+ fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; -+ fi.lock_owner = arg->lock_owner; -+ } -+ if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { -+ fi.flock_release = 1; -+ fi.lock_owner = arg->lock_owner; -+ } -+ -+ if (req->se->op.release) -+ req->se->op.release(req, nodeid, &fi); -+ else -+ fuse_reply_err(req, 0); -+} -+ -+static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; -+ struct fuse_file_info fi; -+ int datasync = arg->fsync_flags & 1; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ -+ if (req->se->op.fsync) -+ req->se->op.fsync(req, nodeid, datasync, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_open_in *arg = (struct fuse_open_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; -+ -+ if (req->se->op.opendir) -+ req->se->op.opendir(req, nodeid, &fi); -+ else -+ fuse_reply_open(req, &fi); -+} -+ -+static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_read_in *arg = (struct fuse_read_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ -+ if (req->se->op.readdir) -+ req->se->op.readdir(req, nodeid, arg->size, arg->offset, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_read_in *arg = (struct fuse_read_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ -+ if (req->se->op.readdirplus) -+ req->se->op.readdirplus(req, nodeid, arg->size, arg->offset, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_release_in *arg = (struct fuse_release_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; -+ fi.fh = arg->fh; -+ -+ if (req->se->op.releasedir) -+ req->se->op.releasedir(req, nodeid, &fi); -+ else -+ fuse_reply_err(req, 0); -+} -+ -+static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; -+ struct fuse_file_info fi; -+ int datasync = arg->fsync_flags & 1; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ -+ if (req->se->op.fsyncdir) -+ req->se->op.fsyncdir(req, nodeid, datasync, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ (void) nodeid; -+ (void) inarg; -+ -+ if (req->se->op.statfs) -+ req->se->op.statfs(req, nodeid); -+ else { -+ struct statvfs buf = { -+ .f_namemax = 255, -+ .f_bsize = 512, -+ }; -+ fuse_reply_statfs(req, &buf); -+ } -+} -+ -+static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *) inarg; -+ char *name = PARAM(arg); -+ char *value = name + strlen(name) + 1; -+ -+ if (req->se->op.setxattr) -+ req->se->op.setxattr(req, nodeid, name, value, arg->size, -+ arg->flags); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; -+ -+ if (req->se->op.getxattr) -+ req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; -+ -+ if (req->se->op.listxattr) -+ req->se->op.listxattr(req, nodeid, arg->size); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ char *name = (char *) inarg; -+ -+ if (req->se->op.removexattr) -+ req->se->op.removexattr(req, nodeid, name); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void convert_fuse_file_lock(struct fuse_file_lock *fl, -+ struct flock *flock) -+{ -+ memset(flock, 0, sizeof(struct flock)); -+ flock->l_type = fl->type; -+ flock->l_whence = SEEK_SET; -+ flock->l_start = fl->start; -+ if (fl->end == OFFSET_MAX) -+ flock->l_len = 0; -+ else -+ flock->l_len = fl->end - fl->start + 1; -+ flock->l_pid = fl->pid; -+} -+ -+static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; -+ struct fuse_file_info fi; -+ struct flock flock; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.lock_owner = arg->owner; -+ -+ convert_fuse_file_lock(&arg->lk, &flock); -+ if (req->se->op.getlk) -+ req->se->op.getlk(req, nodeid, &fi, &flock); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, -+ const void *inarg, int sleep) -+{ -+ struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; -+ struct fuse_file_info fi; -+ struct flock flock; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.lock_owner = arg->owner; -+ -+ if (arg->lk_flags & FUSE_LK_FLOCK) { -+ int op = 0; -+ -+ switch (arg->lk.type) { -+ case F_RDLCK: -+ op = LOCK_SH; -+ break; -+ case F_WRLCK: -+ op = LOCK_EX; -+ break; -+ case F_UNLCK: -+ op = LOCK_UN; -+ break; -+ } -+ if (!sleep) -+ op |= LOCK_NB; -+ -+ if (req->se->op.flock) -+ req->se->op.flock(req, nodeid, &fi, op); -+ else -+ fuse_reply_err(req, ENOSYS); -+ } else { -+ convert_fuse_file_lock(&arg->lk, &flock); -+ if (req->se->op.setlk) -+ req->se->op.setlk(req, nodeid, &fi, &flock, sleep); -+ else -+ fuse_reply_err(req, ENOSYS); -+ } -+} -+ -+static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ do_setlk_common(req, nodeid, inarg, 0); -+} -+ -+static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ do_setlk_common(req, nodeid, inarg, 1); -+} -+ -+static int find_interrupted(struct fuse_session *se, struct fuse_req *req) -+{ -+ struct fuse_req *curr; -+ -+ for (curr = se->list.next; curr != &se->list; curr = curr->next) { -+ if (curr->unique == req->u.i.unique) { -+ fuse_interrupt_func_t func; -+ void *data; -+ -+ curr->ctr++; -+ pthread_mutex_unlock(&se->lock); -+ -+ /* Ugh, ugly locking */ -+ pthread_mutex_lock(&curr->lock); -+ pthread_mutex_lock(&se->lock); -+ curr->interrupted = 1; -+ func = curr->u.ni.func; -+ data = curr->u.ni.data; -+ pthread_mutex_unlock(&se->lock); -+ if (func) -+ func(curr, data); -+ pthread_mutex_unlock(&curr->lock); -+ -+ pthread_mutex_lock(&se->lock); -+ curr->ctr--; -+ if (!curr->ctr) -+ destroy_req(curr); -+ -+ return 1; -+ } -+ } -+ for (curr = se->interrupts.next; curr != &se->interrupts; -+ curr = curr->next) { -+ if (curr->u.i.unique == req->u.i.unique) -+ return 1; -+ } -+ return 0; -+} -+ -+static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *) inarg; -+ struct fuse_session *se = req->se; -+ -+ (void) nodeid; -+ if (se->debug) -+ fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", -+ (unsigned long long) arg->unique); -+ -+ req->u.i.unique = arg->unique; -+ -+ pthread_mutex_lock(&se->lock); -+ if (find_interrupted(se, req)) -+ destroy_req(req); -+ else -+ list_add_req(req, &se->interrupts); -+ pthread_mutex_unlock(&se->lock); -+} -+ -+static struct fuse_req *check_interrupt(struct fuse_session *se, -+ struct fuse_req *req) -+{ -+ struct fuse_req *curr; -+ -+ for (curr = se->interrupts.next; curr != &se->interrupts; -+ curr = curr->next) { -+ if (curr->u.i.unique == req->unique) { -+ req->interrupted = 1; -+ list_del_req(curr); -+ free(curr); -+ return NULL; -+ } -+ } -+ curr = se->interrupts.next; -+ if (curr != &se->interrupts) { -+ list_del_req(curr); -+ list_init_req(curr); -+ return curr; -+ } else -+ return NULL; -+} -+ -+static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_bmap_in *arg = (struct fuse_bmap_in *) inarg; -+ -+ if (req->se->op.bmap) -+ req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *) inarg; -+ unsigned int flags = arg->flags; -+ void *in_buf = arg->in_size ? PARAM(arg) : NULL; -+ struct fuse_file_info fi; -+ -+ if (flags & FUSE_IOCTL_DIR && -+ !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { -+ fuse_reply_err(req, ENOTTY); -+ return; -+ } -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ -+ if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && -+ !(flags & FUSE_IOCTL_32BIT)) { -+ req->ioctl_64bit = 1; -+ } -+ -+ if (req->se->op.ioctl) -+ req->se->op.ioctl(req, nodeid, arg->cmd, -+ (void *)(uintptr_t)arg->arg, &fi, flags, -+ in_buf, arg->in_size, arg->out_size); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+void fuse_pollhandle_destroy(struct fuse_pollhandle *ph) -+{ -+ free(ph); -+} -+ -+static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_poll_in *arg = (struct fuse_poll_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.poll_events = arg->events; -+ -+ if (req->se->op.poll) { -+ struct fuse_pollhandle *ph = NULL; -+ -+ if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) { -+ ph = malloc(sizeof(struct fuse_pollhandle)); -+ if (ph == NULL) { -+ fuse_reply_err(req, ENOMEM); -+ return; -+ } -+ ph->kh = arg->kh; -+ ph->se = req->se; -+ } -+ -+ req->se->op.poll(req, nodeid, &fi, ph); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } -+} -+ -+static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ -+ if (req->se->op.fallocate) -+ req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, const void *inarg) -+{ -+ struct fuse_copy_file_range_in *arg = (struct fuse_copy_file_range_in *) inarg; -+ struct fuse_file_info fi_in, fi_out; -+ -+ memset(&fi_in, 0, sizeof(fi_in)); -+ fi_in.fh = arg->fh_in; -+ -+ memset(&fi_out, 0, sizeof(fi_out)); -+ fi_out.fh = arg->fh_out; -+ -+ -+ if (req->se->op.copy_file_range) -+ req->se->op.copy_file_range(req, nodeid_in, arg->off_in, -+ &fi_in, arg->nodeid_out, -+ arg->off_out, &fi_out, arg->len, -+ arg->flags); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_lseek_in *arg = (struct fuse_lseek_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ -+ if (req->se->op.lseek) -+ req->se->op.lseek(req, nodeid, arg->offset, arg->whence, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_init_in *arg = (struct fuse_init_in *) inarg; -+ struct fuse_init_out outarg; -+ struct fuse_session *se = req->se; -+ size_t bufsize = se->bufsize; -+ size_t outargsize = sizeof(outarg); -+ -+ (void) nodeid; -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); -+ if (arg->major == 7 && arg->minor >= 6) { -+ fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); -+ fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", -+ arg->max_readahead); -+ } -+ } -+ se->conn.proto_major = arg->major; -+ se->conn.proto_minor = arg->minor; -+ se->conn.capable = 0; -+ se->conn.want = 0; -+ -+ memset(&outarg, 0, sizeof(outarg)); -+ outarg.major = FUSE_KERNEL_VERSION; -+ outarg.minor = FUSE_KERNEL_MINOR_VERSION; -+ -+ if (arg->major < 7) { -+ fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", -+ arg->major, arg->minor); -+ fuse_reply_err(req, EPROTO); -+ return; -+ } -+ -+ if (arg->major > 7) { -+ /* Wait for a second INIT request with a 7.X version */ -+ send_reply_ok(req, &outarg, sizeof(outarg)); -+ return; -+ } -+ -+ if (arg->minor >= 6) { -+ if (arg->max_readahead < se->conn.max_readahead) -+ se->conn.max_readahead = arg->max_readahead; -+ if (arg->flags & FUSE_ASYNC_READ) -+ se->conn.capable |= FUSE_CAP_ASYNC_READ; -+ if (arg->flags & FUSE_POSIX_LOCKS) -+ se->conn.capable |= FUSE_CAP_POSIX_LOCKS; -+ if (arg->flags & FUSE_ATOMIC_O_TRUNC) -+ se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; -+ if (arg->flags & FUSE_EXPORT_SUPPORT) -+ se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; -+ if (arg->flags & FUSE_DONT_MASK) -+ se->conn.capable |= FUSE_CAP_DONT_MASK; -+ if (arg->flags & FUSE_FLOCK_LOCKS) -+ se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; -+ if (arg->flags & FUSE_AUTO_INVAL_DATA) -+ se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; -+ if (arg->flags & FUSE_DO_READDIRPLUS) -+ se->conn.capable |= FUSE_CAP_READDIRPLUS; -+ if (arg->flags & FUSE_READDIRPLUS_AUTO) -+ se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; -+ if (arg->flags & FUSE_ASYNC_DIO) -+ se->conn.capable |= FUSE_CAP_ASYNC_DIO; -+ if (arg->flags & FUSE_WRITEBACK_CACHE) -+ se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; -+ if (arg->flags & FUSE_NO_OPEN_SUPPORT) -+ se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; -+ if (arg->flags & FUSE_PARALLEL_DIROPS) -+ se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; -+ if (arg->flags & FUSE_POSIX_ACL) -+ se->conn.capable |= FUSE_CAP_POSIX_ACL; -+ if (arg->flags & FUSE_HANDLE_KILLPRIV) -+ se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; -+ if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) -+ se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; -+ if (!(arg->flags & FUSE_MAX_PAGES)) { -+ size_t max_bufsize = -+ FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() -+ + FUSE_BUFFER_HEADER_SIZE; -+ if (bufsize > max_bufsize) { -+ bufsize = max_bufsize; -+ } -+ } -+ } else { -+ se->conn.max_readahead = 0; -+ } -+ -+ if (se->conn.proto_minor >= 14) { -+#ifdef HAVE_SPLICE -+#ifdef HAVE_VMSPLICE -+ se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; -+#endif -+ se->conn.capable |= FUSE_CAP_SPLICE_READ; -+#endif -+ } -+ if (se->conn.proto_minor >= 18) -+ se->conn.capable |= FUSE_CAP_IOCTL_DIR; -+ -+ /* Default settings for modern filesystems. -+ * -+ * Most of these capabilities were disabled by default in -+ * libfuse2 for backwards compatibility reasons. In libfuse3, -+ * we can finally enable them by default (as long as they're -+ * supported by the kernel). -+ */ -+#define LL_SET_DEFAULT(cond, cap) \ -+ if ((cond) && (se->conn.capable & (cap))) \ -+ se->conn.want |= (cap) -+ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_READ); -+ LL_SET_DEFAULT(1, FUSE_CAP_PARALLEL_DIROPS); -+ LL_SET_DEFAULT(1, FUSE_CAP_AUTO_INVAL_DATA); -+ LL_SET_DEFAULT(1, FUSE_CAP_HANDLE_KILLPRIV); -+ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO); -+ LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR); -+ LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC); -+ LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ); -+ LL_SET_DEFAULT(se->op.getlk && se->op.setlk, -+ FUSE_CAP_POSIX_LOCKS); -+ LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS); -+ LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS); -+ LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir, -+ FUSE_CAP_READDIRPLUS_AUTO); -+ se->conn.time_gran = 1; -+ -+ if (bufsize < FUSE_MIN_READ_BUFFER) { -+ fuse_log(FUSE_LOG_ERR, "fuse: warning: buffer size too small: %zu\n", -+ bufsize); -+ bufsize = FUSE_MIN_READ_BUFFER; -+ } -+ se->bufsize = bufsize; -+ -+ if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE) -+ se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE; -+ -+ se->got_init = 1; -+ if (se->op.init) -+ se->op.init(se->userdata, &se->conn); -+ -+ if (se->conn.want & (~se->conn.capable)) { -+ fuse_log(FUSE_LOG_ERR, "fuse: error: filesystem requested capabilities " -+ "0x%x that are not supported by kernel, aborting.\n", -+ se->conn.want & (~se->conn.capable)); -+ fuse_reply_err(req, EPROTO); -+ se->error = -EPROTO; -+ fuse_session_exit(se); -+ return; -+ } -+ -+ unsigned max_read_mo = get_max_read(se->mo); -+ if (se->conn.max_read != max_read_mo) { -+ fuse_log(FUSE_LOG_ERR, "fuse: error: init() and fuse_session_new() " -+ "requested different maximum read size (%u vs %u)\n", -+ se->conn.max_read, max_read_mo); -+ fuse_reply_err(req, EPROTO); -+ se->error = -EPROTO; -+ fuse_session_exit(se); -+ return; -+ } -+ -+ if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { -+ se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; -+ } -+ if (arg->flags & FUSE_MAX_PAGES) { -+ outarg.flags |= FUSE_MAX_PAGES; -+ outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1; -+ } -+ -+ /* Always enable big writes, this is superseded -+ by the max_write option */ -+ outarg.flags |= FUSE_BIG_WRITES; -+ -+ if (se->conn.want & FUSE_CAP_ASYNC_READ) -+ outarg.flags |= FUSE_ASYNC_READ; -+ if (se->conn.want & FUSE_CAP_POSIX_LOCKS) -+ outarg.flags |= FUSE_POSIX_LOCKS; -+ if (se->conn.want & FUSE_CAP_ATOMIC_O_TRUNC) -+ outarg.flags |= FUSE_ATOMIC_O_TRUNC; -+ if (se->conn.want & FUSE_CAP_EXPORT_SUPPORT) -+ outarg.flags |= FUSE_EXPORT_SUPPORT; -+ if (se->conn.want & FUSE_CAP_DONT_MASK) -+ outarg.flags |= FUSE_DONT_MASK; -+ if (se->conn.want & FUSE_CAP_FLOCK_LOCKS) -+ outarg.flags |= FUSE_FLOCK_LOCKS; -+ if (se->conn.want & FUSE_CAP_AUTO_INVAL_DATA) -+ outarg.flags |= FUSE_AUTO_INVAL_DATA; -+ if (se->conn.want & FUSE_CAP_READDIRPLUS) -+ outarg.flags |= FUSE_DO_READDIRPLUS; -+ if (se->conn.want & FUSE_CAP_READDIRPLUS_AUTO) -+ outarg.flags |= FUSE_READDIRPLUS_AUTO; -+ if (se->conn.want & FUSE_CAP_ASYNC_DIO) -+ outarg.flags |= FUSE_ASYNC_DIO; -+ if (se->conn.want & FUSE_CAP_WRITEBACK_CACHE) -+ outarg.flags |= FUSE_WRITEBACK_CACHE; -+ if (se->conn.want & FUSE_CAP_POSIX_ACL) -+ outarg.flags |= FUSE_POSIX_ACL; -+ outarg.max_readahead = se->conn.max_readahead; -+ outarg.max_write = se->conn.max_write; -+ if (se->conn.proto_minor >= 13) { -+ if (se->conn.max_background >= (1 << 16)) -+ se->conn.max_background = (1 << 16) - 1; -+ if (se->conn.congestion_threshold > se->conn.max_background) -+ se->conn.congestion_threshold = se->conn.max_background; -+ if (!se->conn.congestion_threshold) { -+ se->conn.congestion_threshold = -+ se->conn.max_background * 3 / 4; -+ } -+ -+ outarg.max_background = se->conn.max_background; -+ outarg.congestion_threshold = se->conn.congestion_threshold; -+ } -+ if (se->conn.proto_minor >= 23) -+ outarg.time_gran = se->conn.time_gran; -+ -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor); -+ fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); -+ fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", -+ outarg.max_readahead); -+ fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); -+ fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", -+ outarg.max_background); -+ fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", -+ outarg.congestion_threshold); -+ fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", -+ outarg.time_gran); -+ } -+ if (arg->minor < 5) -+ outargsize = FUSE_COMPAT_INIT_OUT_SIZE; -+ else if (arg->minor < 23) -+ outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; -+ -+ send_reply_ok(req, &outarg, outargsize); -+} -+ -+static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_session *se = req->se; -+ -+ (void) nodeid; -+ (void) inarg; -+ -+ se->got_destroy = 1; -+ if (se->op.destroy) -+ se->op.destroy(se->userdata); -+ -+ send_reply_ok(req, NULL, 0); -+} -+ -+static void list_del_nreq(struct fuse_notify_req *nreq) -+{ -+ struct fuse_notify_req *prev = nreq->prev; -+ struct fuse_notify_req *next = nreq->next; -+ prev->next = next; -+ next->prev = prev; -+} -+ -+static void list_add_nreq(struct fuse_notify_req *nreq, -+ struct fuse_notify_req *next) -+{ -+ struct fuse_notify_req *prev = next->prev; -+ nreq->next = next; -+ nreq->prev = prev; -+ prev->next = nreq; -+ next->prev = nreq; -+} -+ -+static void list_init_nreq(struct fuse_notify_req *nreq) -+{ -+ nreq->next = nreq; -+ nreq->prev = nreq; -+} -+ -+static void do_notify_reply(fuse_req_t req, fuse_ino_t nodeid, -+ const void *inarg, const struct fuse_buf *buf) -+{ -+ struct fuse_session *se = req->se; -+ struct fuse_notify_req *nreq; -+ struct fuse_notify_req *head; -+ -+ pthread_mutex_lock(&se->lock); -+ head = &se->notify_list; -+ for (nreq = head->next; nreq != head; nreq = nreq->next) { -+ if (nreq->unique == req->unique) { -+ list_del_nreq(nreq); -+ break; -+ } -+ } -+ pthread_mutex_unlock(&se->lock); -+ -+ if (nreq != head) -+ nreq->reply(nreq, req, nodeid, inarg, buf); -+} -+ -+static int send_notify_iov(struct fuse_session *se, int notify_code, -+ struct iovec *iov, int count) -+{ -+ struct fuse_out_header out; -+ -+ if (!se->got_init) -+ return -ENOTCONN; -+ -+ out.unique = 0; -+ out.error = notify_code; -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(struct fuse_out_header); -+ -+ return fuse_send_msg(se, NULL, iov, count); -+} -+ -+int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) -+{ -+ if (ph != NULL) { -+ struct fuse_notify_poll_wakeup_out outarg; -+ struct iovec iov[2]; -+ -+ outarg.kh = ph->kh; -+ -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ -+ return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2); -+ } else { -+ return 0; -+ } -+} -+ -+int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, -+ off_t off, off_t len) -+{ -+ struct fuse_notify_inval_inode_out outarg; -+ struct iovec iov[2]; -+ -+ if (!se) -+ return -EINVAL; -+ -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) -+ return -ENOSYS; -+ -+ outarg.ino = ino; -+ outarg.off = off; -+ outarg.len = len; -+ -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ -+ return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2); -+} -+ -+int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, -+ const char *name, size_t namelen) -+{ -+ struct fuse_notify_inval_entry_out outarg; -+ struct iovec iov[3]; -+ -+ if (!se) -+ return -EINVAL; -+ -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) -+ return -ENOSYS; -+ -+ outarg.parent = parent; -+ outarg.namelen = namelen; -+ outarg.padding = 0; -+ -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ iov[2].iov_base = (void *)name; -+ iov[2].iov_len = namelen + 1; -+ -+ return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3); -+} -+ -+int fuse_lowlevel_notify_delete(struct fuse_session *se, -+ fuse_ino_t parent, fuse_ino_t child, -+ const char *name, size_t namelen) -+{ -+ struct fuse_notify_delete_out outarg; -+ struct iovec iov[3]; -+ -+ if (!se) -+ return -EINVAL; -+ -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) -+ return -ENOSYS; -+ -+ outarg.parent = parent; -+ outarg.child = child; -+ outarg.namelen = namelen; -+ outarg.padding = 0; -+ -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ iov[2].iov_base = (void *)name; -+ iov[2].iov_len = namelen + 1; -+ -+ return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3); -+} -+ -+int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, -+ off_t offset, struct fuse_bufvec *bufv, -+ enum fuse_buf_copy_flags flags) -+{ -+ struct fuse_out_header out; -+ struct fuse_notify_store_out outarg; -+ struct iovec iov[3]; -+ size_t size = fuse_buf_size(bufv); -+ int res; -+ -+ if (!se) -+ return -EINVAL; -+ -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) -+ return -ENOSYS; -+ -+ out.unique = 0; -+ out.error = FUSE_NOTIFY_STORE; -+ -+ outarg.nodeid = ino; -+ outarg.offset = offset; -+ outarg.size = size; -+ outarg.padding = 0; -+ -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(out); -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ -+ res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); -+ if (res > 0) -+ res = -res; -+ -+ return res; -+} -+ -+struct fuse_retrieve_req { -+ struct fuse_notify_req nreq; -+ void *cookie; -+}; -+ -+static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, -+ fuse_req_t req, fuse_ino_t ino, -+ const void *inarg, -+ const struct fuse_buf *ibuf) -+{ -+ struct fuse_session *se = req->se; -+ struct fuse_retrieve_req *rreq = -+ container_of(nreq, struct fuse_retrieve_req, nreq); -+ const struct fuse_notify_retrieve_in *arg = inarg; -+ struct fuse_bufvec bufv = { -+ .buf[0] = *ibuf, -+ .count = 1, -+ }; -+ -+ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) -+ bufv.buf[0].mem = PARAM(arg); -+ -+ bufv.buf[0].size -= sizeof(struct fuse_in_header) + -+ sizeof(struct fuse_notify_retrieve_in); -+ -+ if (bufv.buf[0].size < arg->size) { -+ fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); -+ fuse_reply_none(req); -+ goto out; -+ } -+ bufv.buf[0].size = arg->size; -+ -+ if (se->op.retrieve_reply) { -+ se->op.retrieve_reply(req, rreq->cookie, ino, -+ arg->offset, &bufv); -+ } else { -+ fuse_reply_none(req); -+ } -+out: -+ free(rreq); -+ if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) -+ fuse_ll_clear_pipe(se); -+} -+ -+int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, -+ size_t size, off_t offset, void *cookie) -+{ -+ struct fuse_notify_retrieve_out outarg; -+ struct iovec iov[2]; -+ struct fuse_retrieve_req *rreq; -+ int err; -+ -+ if (!se) -+ return -EINVAL; -+ -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) -+ return -ENOSYS; -+ -+ rreq = malloc(sizeof(*rreq)); -+ if (rreq == NULL) -+ return -ENOMEM; -+ -+ pthread_mutex_lock(&se->lock); -+ rreq->cookie = cookie; -+ rreq->nreq.unique = se->notify_ctr++; -+ rreq->nreq.reply = fuse_ll_retrieve_reply; -+ list_add_nreq(&rreq->nreq, &se->notify_list); -+ pthread_mutex_unlock(&se->lock); -+ -+ outarg.notify_unique = rreq->nreq.unique; -+ outarg.nodeid = ino; -+ outarg.offset = offset; -+ outarg.size = size; -+ outarg.padding = 0; -+ -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ -+ err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); -+ if (err) { -+ pthread_mutex_lock(&se->lock); -+ list_del_nreq(&rreq->nreq); -+ pthread_mutex_unlock(&se->lock); -+ free(rreq); -+ } -+ -+ return err; -+} -+ -+void *fuse_req_userdata(fuse_req_t req) -+{ -+ return req->se->userdata; -+} -+ -+const struct fuse_ctx *fuse_req_ctx(fuse_req_t req) -+{ -+ return &req->ctx; -+} -+ -+void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, -+ void *data) -+{ -+ pthread_mutex_lock(&req->lock); -+ pthread_mutex_lock(&req->se->lock); -+ req->u.ni.func = func; -+ req->u.ni.data = data; -+ pthread_mutex_unlock(&req->se->lock); -+ if (req->interrupted && func) -+ func(req, data); -+ pthread_mutex_unlock(&req->lock); -+} -+ -+int fuse_req_interrupted(fuse_req_t req) -+{ -+ int interrupted; -+ -+ pthread_mutex_lock(&req->se->lock); -+ interrupted = req->interrupted; -+ pthread_mutex_unlock(&req->se->lock); -+ -+ return interrupted; -+} -+ -+static struct { -+ void (*func)(fuse_req_t, fuse_ino_t, const void *); -+ const char *name; -+} fuse_ll_ops[] = { -+ [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, -+ [FUSE_FORGET] = { do_forget, "FORGET" }, -+ [FUSE_GETATTR] = { do_getattr, "GETATTR" }, -+ [FUSE_SETATTR] = { do_setattr, "SETATTR" }, -+ [FUSE_READLINK] = { do_readlink, "READLINK" }, -+ [FUSE_SYMLINK] = { do_symlink, "SYMLINK" }, -+ [FUSE_MKNOD] = { do_mknod, "MKNOD" }, -+ [FUSE_MKDIR] = { do_mkdir, "MKDIR" }, -+ [FUSE_UNLINK] = { do_unlink, "UNLINK" }, -+ [FUSE_RMDIR] = { do_rmdir, "RMDIR" }, -+ [FUSE_RENAME] = { do_rename, "RENAME" }, -+ [FUSE_LINK] = { do_link, "LINK" }, -+ [FUSE_OPEN] = { do_open, "OPEN" }, -+ [FUSE_READ] = { do_read, "READ" }, -+ [FUSE_WRITE] = { do_write, "WRITE" }, -+ [FUSE_STATFS] = { do_statfs, "STATFS" }, -+ [FUSE_RELEASE] = { do_release, "RELEASE" }, -+ [FUSE_FSYNC] = { do_fsync, "FSYNC" }, -+ [FUSE_SETXATTR] = { do_setxattr, "SETXATTR" }, -+ [FUSE_GETXATTR] = { do_getxattr, "GETXATTR" }, -+ [FUSE_LISTXATTR] = { do_listxattr, "LISTXATTR" }, -+ [FUSE_REMOVEXATTR] = { do_removexattr, "REMOVEXATTR" }, -+ [FUSE_FLUSH] = { do_flush, "FLUSH" }, -+ [FUSE_INIT] = { do_init, "INIT" }, -+ [FUSE_OPENDIR] = { do_opendir, "OPENDIR" }, -+ [FUSE_READDIR] = { do_readdir, "READDIR" }, -+ [FUSE_RELEASEDIR] = { do_releasedir, "RELEASEDIR" }, -+ [FUSE_FSYNCDIR] = { do_fsyncdir, "FSYNCDIR" }, -+ [FUSE_GETLK] = { do_getlk, "GETLK" }, -+ [FUSE_SETLK] = { do_setlk, "SETLK" }, -+ [FUSE_SETLKW] = { do_setlkw, "SETLKW" }, -+ [FUSE_ACCESS] = { do_access, "ACCESS" }, -+ [FUSE_CREATE] = { do_create, "CREATE" }, -+ [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" }, -+ [FUSE_BMAP] = { do_bmap, "BMAP" }, -+ [FUSE_IOCTL] = { do_ioctl, "IOCTL" }, -+ [FUSE_POLL] = { do_poll, "POLL" }, -+ [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, -+ [FUSE_DESTROY] = { do_destroy, "DESTROY" }, -+ [FUSE_NOTIFY_REPLY] = { (void *) 1, "NOTIFY_REPLY" }, -+ [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, -+ [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS"}, -+ [FUSE_RENAME2] = { do_rename2, "RENAME2" }, -+ [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, -+ [FUSE_LSEEK] = { do_lseek, "LSEEK" }, -+ [CUSE_INIT] = { cuse_lowlevel_init, "CUSE_INIT" }, -+}; -+ -+#define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) -+ -+static const char *opname(enum fuse_opcode opcode) -+{ -+ if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) -+ return "???"; -+ else -+ return fuse_ll_ops[opcode].name; -+} -+ -+static int fuse_ll_copy_from_pipe(struct fuse_bufvec *dst, -+ struct fuse_bufvec *src) -+{ -+ ssize_t res = fuse_buf_copy(dst, src, 0); -+ if (res < 0) { -+ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", strerror(-res)); -+ return res; -+ } -+ if ((size_t)res < fuse_buf_size(dst)) { -+ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); -+ return -1; -+ } -+ return 0; -+} -+ -+void fuse_session_process_buf(struct fuse_session *se, -+ const struct fuse_buf *buf) -+{ -+ fuse_session_process_buf_int(se, buf, NULL); -+} -+ -+void fuse_session_process_buf_int(struct fuse_session *se, -+ const struct fuse_buf *buf, struct fuse_chan *ch) -+{ -+ const size_t write_header_size = sizeof(struct fuse_in_header) + -+ sizeof(struct fuse_write_in); -+ struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 }; -+ struct fuse_bufvec tmpbuf = FUSE_BUFVEC_INIT(write_header_size); -+ struct fuse_in_header *in; -+ const void *inarg; -+ struct fuse_req *req; -+ void *mbuf = NULL; -+ int err; -+ int res; -+ -+ if (buf->flags & FUSE_BUF_IS_FD) { -+ if (buf->size < tmpbuf.buf[0].size) -+ tmpbuf.buf[0].size = buf->size; -+ -+ mbuf = malloc(tmpbuf.buf[0].size); -+ if (mbuf == NULL) { -+ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate header\n"); -+ goto clear_pipe; -+ } -+ tmpbuf.buf[0].mem = mbuf; -+ -+ res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); -+ if (res < 0) -+ goto clear_pipe; -+ -+ in = mbuf; -+ } else { -+ in = buf->mem; -+ } -+ -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, -+ "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n", -+ (unsigned long long) in->unique, -+ opname((enum fuse_opcode) in->opcode), in->opcode, -+ (unsigned long long) in->nodeid, buf->size, in->pid); -+ } -+ -+ req = fuse_ll_alloc_req(se); -+ if (req == NULL) { -+ struct fuse_out_header out = { -+ .unique = in->unique, -+ .error = -ENOMEM, -+ }; -+ struct iovec iov = { -+ .iov_base = &out, -+ .iov_len = sizeof(struct fuse_out_header), -+ }; -+ -+ fuse_send_msg(se, ch, &iov, 1); -+ goto clear_pipe; -+ } -+ -+ req->unique = in->unique; -+ req->ctx.uid = in->uid; -+ req->ctx.gid = in->gid; -+ req->ctx.pid = in->pid; -+ req->ch = ch ? fuse_chan_get(ch) : NULL; -+ -+ err = EIO; -+ if (!se->got_init) { -+ enum fuse_opcode expected; -+ -+ expected = se->cuse_data ? CUSE_INIT : FUSE_INIT; -+ if (in->opcode != expected) -+ goto reply_err; -+ } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) -+ goto reply_err; -+ -+ err = EACCES; -+ /* Implement -o allow_root */ -+ if (se->deny_others && in->uid != se->owner && in->uid != 0 && -+ in->opcode != FUSE_INIT && in->opcode != FUSE_READ && -+ in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC && -+ in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR && -+ in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR && -+ in->opcode != FUSE_NOTIFY_REPLY && -+ in->opcode != FUSE_READDIRPLUS) -+ goto reply_err; -+ -+ err = ENOSYS; -+ if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) -+ goto reply_err; -+ if (in->opcode != FUSE_INTERRUPT) { -+ struct fuse_req *intr; -+ pthread_mutex_lock(&se->lock); -+ intr = check_interrupt(se, req); -+ list_add_req(req, &se->list); -+ pthread_mutex_unlock(&se->lock); -+ if (intr) -+ fuse_reply_err(intr, EAGAIN); -+ } -+ -+ if ((buf->flags & FUSE_BUF_IS_FD) && write_header_size < buf->size && -+ (in->opcode != FUSE_WRITE || !se->op.write_buf) && -+ in->opcode != FUSE_NOTIFY_REPLY) { -+ void *newmbuf; -+ -+ err = ENOMEM; -+ newmbuf = realloc(mbuf, buf->size); -+ if (newmbuf == NULL) -+ goto reply_err; -+ mbuf = newmbuf; -+ -+ tmpbuf = FUSE_BUFVEC_INIT(buf->size - write_header_size); -+ tmpbuf.buf[0].mem = (char *)mbuf + write_header_size; -+ -+ res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); -+ err = -res; -+ if (res < 0) -+ goto reply_err; -+ -+ in = mbuf; -+ } -+ -+ inarg = (void *) &in[1]; -+ if (in->opcode == FUSE_WRITE && se->op.write_buf) -+ do_write_buf(req, in->nodeid, inarg, buf); -+ else if (in->opcode == FUSE_NOTIFY_REPLY) -+ do_notify_reply(req, in->nodeid, inarg, buf); -+ else -+ fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); -+ -+out_free: -+ free(mbuf); -+ return; -+ -+reply_err: -+ fuse_reply_err(req, err); -+clear_pipe: -+ if (buf->flags & FUSE_BUF_IS_FD) -+ fuse_ll_clear_pipe(se); -+ goto out_free; -+} -+ -+#define LL_OPTION(n,o,v) \ -+ { n, offsetof(struct fuse_session, o), v } -+ -+static const struct fuse_opt fuse_ll_opts[] = { -+ LL_OPTION("debug", debug, 1), -+ LL_OPTION("-d", debug, 1), -+ LL_OPTION("--debug", debug, 1), -+ LL_OPTION("allow_root", deny_others, 1), -+ FUSE_OPT_END -+}; -+ -+void fuse_lowlevel_version(void) -+{ -+ printf("using FUSE kernel interface version %i.%i\n", -+ FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); -+ fuse_mount_version(); -+} -+ -+void fuse_lowlevel_help(void) -+{ -+ /* These are not all options, but the ones that are -+ potentially of interest to an end-user */ -+ printf( -+" -o allow_other allow access by all users\n" -+" -o allow_root allow access by root\n" -+" -o auto_unmount auto unmount on process termination\n"); -+} -+ -+void fuse_session_destroy(struct fuse_session *se) -+{ -+ struct fuse_ll_pipe *llp; -+ -+ if (se->got_init && !se->got_destroy) { -+ if (se->op.destroy) -+ se->op.destroy(se->userdata); -+ } -+ llp = pthread_getspecific(se->pipe_key); -+ if (llp != NULL) -+ fuse_ll_pipe_free(llp); -+ pthread_key_delete(se->pipe_key); -+ pthread_mutex_destroy(&se->lock); -+ free(se->cuse_data); -+ if (se->fd != -1) -+ close(se->fd); -+ destroy_mount_opts(se->mo); -+ free(se); -+} -+ -+ -+static void fuse_ll_pipe_destructor(void *data) -+{ -+ struct fuse_ll_pipe *llp = data; -+ fuse_ll_pipe_free(llp); -+} -+ -+int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf) -+{ -+ return fuse_session_receive_buf_int(se, buf, NULL); -+} -+ -+int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, -+ struct fuse_chan *ch) -+{ -+ int err; -+ ssize_t res; -+#ifdef HAVE_SPLICE -+ size_t bufsize = se->bufsize; -+ struct fuse_ll_pipe *llp; -+ struct fuse_buf tmpbuf; -+ -+ if (se->conn.proto_minor < 14 || !(se->conn.want & FUSE_CAP_SPLICE_READ)) -+ goto fallback; -+ -+ llp = fuse_ll_get_pipe(se); -+ if (llp == NULL) -+ goto fallback; -+ -+ if (llp->size < bufsize) { -+ if (llp->can_grow) { -+ res = fcntl(llp->pipe[0], F_SETPIPE_SZ, bufsize); -+ if (res == -1) { -+ llp->can_grow = 0; -+ res = grow_pipe_to_max(llp->pipe[0]); -+ if (res > 0) -+ llp->size = res; -+ goto fallback; -+ } -+ llp->size = res; -+ } -+ if (llp->size < bufsize) -+ goto fallback; -+ } -+ -+ res = splice(ch ? ch->fd : se->fd, -+ NULL, llp->pipe[1], NULL, bufsize, 0); -+ err = errno; -+ -+ if (fuse_session_exited(se)) -+ return 0; -+ -+ if (res == -1) { -+ if (err == ENODEV) { -+ /* Filesystem was unmounted, or connection was aborted -+ via /sys/fs/fuse/connections */ -+ fuse_session_exit(se); -+ return 0; -+ } -+ if (err != EINTR && err != EAGAIN) -+ perror("fuse: splice from device"); -+ return -err; -+ } -+ -+ if (res < sizeof(struct fuse_in_header)) { -+ fuse_log(FUSE_LOG_ERR, "short splice from fuse device\n"); -+ return -EIO; -+ } -+ -+ tmpbuf = (struct fuse_buf) { -+ .size = res, -+ .flags = FUSE_BUF_IS_FD, -+ .fd = llp->pipe[0], -+ }; -+ -+ /* -+ * Don't bother with zero copy for small requests. -+ * fuse_loop_mt() needs to check for FORGET so this more than -+ * just an optimization. -+ */ -+ if (res < sizeof(struct fuse_in_header) + -+ sizeof(struct fuse_write_in) + pagesize) { -+ struct fuse_bufvec src = { .buf[0] = tmpbuf, .count = 1 }; -+ struct fuse_bufvec dst = { .count = 1 }; -+ -+ if (!buf->mem) { -+ buf->mem = malloc(se->bufsize); -+ if (!buf->mem) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: failed to allocate read buffer\n"); -+ return -ENOMEM; -+ } -+ } -+ buf->size = se->bufsize; -+ buf->flags = 0; -+ dst.buf[0] = *buf; -+ -+ res = fuse_buf_copy(&dst, &src, 0); -+ if (res < 0) { -+ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", -+ strerror(-res)); -+ fuse_ll_clear_pipe(se); -+ return res; -+ } -+ if (res < tmpbuf.size) { -+ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); -+ fuse_ll_clear_pipe(se); -+ return -EIO; -+ } -+ assert(res == tmpbuf.size); -+ -+ } else { -+ /* Don't overwrite buf->mem, as that would cause a leak */ -+ buf->fd = tmpbuf.fd; -+ buf->flags = tmpbuf.flags; -+ } -+ buf->size = tmpbuf.size; -+ -+ return res; -+ -+fallback: -+#endif -+ if (!buf->mem) { -+ buf->mem = malloc(se->bufsize); -+ if (!buf->mem) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: failed to allocate read buffer\n"); -+ return -ENOMEM; -+ } -+ } -+ -+restart: -+ res = read(ch ? ch->fd : se->fd, buf->mem, se->bufsize); -+ err = errno; -+ -+ if (fuse_session_exited(se)) -+ return 0; -+ if (res == -1) { -+ /* ENOENT means the operation was interrupted, it's safe -+ to restart */ -+ if (err == ENOENT) -+ goto restart; -+ -+ if (err == ENODEV) { -+ /* Filesystem was unmounted, or connection was aborted -+ via /sys/fs/fuse/connections */ -+ fuse_session_exit(se); -+ return 0; -+ } -+ /* Errors occurring during normal operation: EINTR (read -+ interrupted), EAGAIN (nonblocking I/O), ENODEV (filesystem -+ umounted) */ -+ if (err != EINTR && err != EAGAIN) -+ perror("fuse: reading device"); -+ return -err; -+ } -+ if ((size_t) res < sizeof(struct fuse_in_header)) { -+ fuse_log(FUSE_LOG_ERR, "short read on fuse device\n"); -+ return -EIO; -+ } -+ -+ buf->size = res; -+ -+ return res; -+} -+ -+struct fuse_session *fuse_session_new(struct fuse_args *args, -+ const struct fuse_lowlevel_ops *op, -+ size_t op_size, void *userdata) -+{ -+ int err; -+ struct fuse_session *se; -+ struct mount_opts *mo; -+ -+ if (sizeof(struct fuse_lowlevel_ops) < op_size) { -+ fuse_log(FUSE_LOG_ERR, "fuse: warning: library too old, some operations may not work\n"); -+ op_size = sizeof(struct fuse_lowlevel_ops); -+ } -+ -+ if (args->argc == 0) { -+ fuse_log(FUSE_LOG_ERR, "fuse: empty argv passed to fuse_session_new().\n"); -+ return NULL; -+ } -+ -+ se = (struct fuse_session *) calloc(1, sizeof(struct fuse_session)); -+ if (se == NULL) { -+ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n"); -+ goto out1; -+ } -+ se->fd = -1; -+ se->conn.max_write = UINT_MAX; -+ se->conn.max_readahead = UINT_MAX; -+ -+ /* Parse options */ -+ if(fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) -+ goto out2; -+ if(se->deny_others) { -+ /* Allowing access only by root is done by instructing -+ * kernel to allow access by everyone, and then restricting -+ * access to root and mountpoint owner in libfuse. -+ */ -+ // We may be adding the option a second time, but -+ // that doesn't hurt. -+ if(fuse_opt_add_arg(args, "-oallow_other") == -1) -+ goto out2; -+ } -+ mo = parse_mount_opts(args); -+ if (mo == NULL) -+ goto out3; -+ -+ if(args->argc == 1 && -+ args->argv[0][0] == '-') { -+ fuse_log(FUSE_LOG_ERR, "fuse: warning: argv[0] looks like an option, but " -+ "will be ignored\n"); -+ } else if (args->argc != 1) { -+ int i; -+ fuse_log(FUSE_LOG_ERR, "fuse: unknown option(s): `"); -+ for(i = 1; i < args->argc-1; i++) -+ fuse_log(FUSE_LOG_ERR, "%s ", args->argv[i]); -+ fuse_log(FUSE_LOG_ERR, "%s'\n", args->argv[i]); -+ goto out4; -+ } -+ -+ if (se->debug) -+ fuse_log(FUSE_LOG_DEBUG, "FUSE library version: %s\n", PACKAGE_VERSION); -+ -+ se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + -+ FUSE_BUFFER_HEADER_SIZE; -+ -+ list_init_req(&se->list); -+ list_init_req(&se->interrupts); -+ list_init_nreq(&se->notify_list); -+ se->notify_ctr = 1; -+ fuse_mutex_init(&se->lock); -+ -+ err = pthread_key_create(&se->pipe_key, fuse_ll_pipe_destructor); -+ if (err) { -+ fuse_log(FUSE_LOG_ERR, "fuse: failed to create thread specific key: %s\n", -+ strerror(err)); -+ goto out5; -+ } -+ -+ memcpy(&se->op, op, op_size); -+ se->owner = getuid(); -+ se->userdata = userdata; -+ -+ se->mo = mo; -+ return se; -+ -+out5: -+ pthread_mutex_destroy(&se->lock); -+out4: -+ fuse_opt_free_args(args); -+out3: -+ free(mo); -+out2: -+ free(se); -+out1: -+ return NULL; -+} -+ -+int fuse_session_mount(struct fuse_session *se, const char *mountpoint) -+{ -+ int fd; -+ -+ /* -+ * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos -+ * would ensue. -+ */ -+ do { -+ fd = open("/dev/null", O_RDWR); -+ if (fd > 2) -+ close(fd); -+ } while (fd >= 0 && fd <= 2); -+ -+ /* -+ * To allow FUSE daemons to run without privileges, the caller may open -+ * /dev/fuse before launching the file system and pass on the file -+ * descriptor by specifying /dev/fd/N as the mount point. Note that the -+ * parent process takes care of performing the mount in this case. -+ */ -+ fd = fuse_mnt_parse_fuse_fd(mountpoint); -+ if (fd != -1) { -+ if (fcntl(fd, F_GETFD) == -1) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: Invalid file descriptor /dev/fd/%u\n", -+ fd); -+ return -1; -+ } -+ se->fd = fd; -+ return 0; -+ } -+ -+ /* Open channel */ -+ fd = fuse_kern_mount(mountpoint, se->mo); -+ if (fd == -1) -+ return -1; -+ se->fd = fd; -+ -+ /* Save mountpoint */ -+ se->mountpoint = strdup(mountpoint); -+ if (se->mountpoint == NULL) -+ goto error_out; -+ -+ return 0; -+ -+error_out: -+ fuse_kern_unmount(mountpoint, fd); -+ return -1; -+} -+ -+int fuse_session_fd(struct fuse_session *se) -+{ -+ return se->fd; -+} -+ -+void fuse_session_unmount(struct fuse_session *se) -+{ -+ if (se->mountpoint != NULL) { -+ fuse_kern_unmount(se->mountpoint, se->fd); -+ free(se->mountpoint); -+ se->mountpoint = NULL; -+ } -+} -+ -+#ifdef linux -+int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) -+{ -+ char *buf; -+ size_t bufsize = 1024; -+ char path[128]; -+ int ret; -+ int fd; -+ unsigned long pid = req->ctx.pid; -+ char *s; -+ -+ sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); -+ -+retry: -+ buf = malloc(bufsize); -+ if (buf == NULL) -+ return -ENOMEM; -+ -+ ret = -EIO; -+ fd = open(path, O_RDONLY); -+ if (fd == -1) -+ goto out_free; -+ -+ ret = read(fd, buf, bufsize); -+ close(fd); -+ if (ret < 0) { -+ ret = -EIO; -+ goto out_free; -+ } -+ -+ if ((size_t)ret == bufsize) { -+ free(buf); -+ bufsize *= 4; -+ goto retry; -+ } -+ -+ ret = -EIO; -+ s = strstr(buf, "\nGroups:"); -+ if (s == NULL) -+ goto out_free; -+ -+ s += 8; -+ ret = 0; -+ while (1) { -+ char *end; -+ unsigned long val = strtoul(s, &end, 0); -+ if (end == s) -+ break; -+ -+ s = end; -+ if (ret < size) -+ list[ret] = val; -+ ret++; -+ } -+ -+out_free: -+ free(buf); -+ return ret; -+} -+#else /* linux */ -+/* -+ * This is currently not implemented on other than Linux... -+ */ -+int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) -+{ -+ (void) req; (void) size; (void) list; -+ return -ENOSYS; -+} -+#endif -+ -+void fuse_session_exit(struct fuse_session *se) -+{ -+ se->exited = 1; -+} -+ -+void fuse_session_reset(struct fuse_session *se) -+{ -+ se->exited = 0; -+ se->error = 0; -+} -+ -+int fuse_session_exited(struct fuse_session *se) -+{ -+ return se->exited; -+} --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Add-main-virtio-loop.patch b/SOURCES/kvm-virtiofsd-Add-main-virtio-loop.patch deleted file mode 100644 index c0ba96a..0000000 --- a/SOURCES/kvm-virtiofsd-Add-main-virtio-loop.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 6f413d8b76ff38e5bc01f36515ca71d7fd6e6144 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:58 +0100 -Subject: [PATCH 027/116] virtiofsd: Add main virtio loop -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-24-dgilbert@redhat.com> -Patchwork-id: 93475 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 023/112] virtiofsd: Add main virtio loop -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Processes incoming requests on the vhost-user fd. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 204d8ae57b3c57098642c79b3c03d42495149c09) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 42 +++++++++++++++++++++++++++++++++++++++--- - 1 file changed, 39 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 2ae3c76..1928a20 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -11,12 +11,14 @@ - * See the file COPYING.LIB - */ - -+#include "fuse_virtio.h" - #include "fuse_i.h" - #include "standard-headers/linux/fuse.h" - #include "fuse_misc.h" - #include "fuse_opt.h" --#include "fuse_virtio.h" - -+#include -+#include - #include - #include - #include -@@ -80,15 +82,49 @@ static const VuDevIface fv_iface = { - .queue_is_processed_in_order = fv_queue_order, - }; - -+/* -+ * Main loop; this mostly deals with events on the vhost-user -+ * socket itself, and not actual fuse data. -+ */ - int virtio_loop(struct fuse_session *se) - { - fuse_log(FUSE_LOG_INFO, "%s: Entry\n", __func__); - -- while (1) { -- /* TODO: Add stuffing */ -+ while (!fuse_session_exited(se)) { -+ struct pollfd pf[1]; -+ pf[0].fd = se->vu_socketfd; -+ pf[0].events = POLLIN; -+ pf[0].revents = 0; -+ -+ fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for VU event\n", __func__); -+ int poll_res = ppoll(pf, 1, NULL, NULL); -+ -+ if (poll_res == -1) { -+ if (errno == EINTR) { -+ fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n", -+ __func__); -+ continue; -+ } -+ fuse_log(FUSE_LOG_ERR, "virtio_loop ppoll: %m\n"); -+ break; -+ } -+ assert(poll_res == 1); -+ if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) { -+ fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x\n", __func__, -+ pf[0].revents); -+ break; -+ } -+ assert(pf[0].revents & POLLIN); -+ fuse_log(FUSE_LOG_DEBUG, "%s: Got VU event\n", __func__); -+ if (!vu_dispatch(&se->virtio_dev->dev)) { -+ fuse_log(FUSE_LOG_ERR, "%s: vu_dispatch failed\n", __func__); -+ break; -+ } - } - - fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__); -+ -+ return 0; - } - - int virtio_session_mount(struct fuse_session *se) --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Add-options-for-virtio.patch b/SOURCES/kvm-virtiofsd-Add-options-for-virtio.patch deleted file mode 100644 index 8ac7fa7..0000000 --- a/SOURCES/kvm-virtiofsd-Add-options-for-virtio.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 9c1bbe327cf8f88ffc78eed0fce8cdd6f3f006ef Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:54 +0100 -Subject: [PATCH 023/116] virtiofsd: Add options for virtio -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-20-dgilbert@redhat.com> -Patchwork-id: 93473 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 019/112] virtiofsd: Add options for virtio -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Add options to specify parameters for virtio-fs paths, i.e. - - ./virtiofsd -o vhost_user_socket=/tmp/vhostqemu - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 205de006aab8dcbe546a7e3a51d295c2d05e654b) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 1 + - tools/virtiofsd/fuse_lowlevel.c | 11 ++++++++--- - tools/virtiofsd/helper.c | 14 +++++++------- - 3 files changed, 16 insertions(+), 10 deletions(-) - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index bae0699..26b1a7d 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -63,6 +63,7 @@ struct fuse_session { - struct fuse_notify_req notify_list; - size_t bufsize; - int error; -+ char *vu_socket_path; - }; - - struct fuse_chan { -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 8552cfb..17e8718 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2115,8 +2115,11 @@ reply_err: - } - - static const struct fuse_opt fuse_ll_opts[] = { -- LL_OPTION("debug", debug, 1), LL_OPTION("-d", debug, 1), -- LL_OPTION("--debug", debug, 1), LL_OPTION("allow_root", deny_others, 1), -+ LL_OPTION("debug", debug, 1), -+ LL_OPTION("-d", debug, 1), -+ LL_OPTION("--debug", debug, 1), -+ LL_OPTION("allow_root", deny_others, 1), -+ LL_OPTION("--socket-path=%s", vu_socket_path, 0), - FUSE_OPT_END - }; - -@@ -2132,7 +2135,9 @@ void fuse_lowlevel_help(void) - * These are not all options, but the ones that are - * potentially of interest to an end-user - */ -- printf(" -o allow_root allow access by root\n"); -+ printf( -+ " -o allow_root allow access by root\n" -+ " --socket-path=PATH path for the vhost-user socket\n"); - } - - void fuse_session_destroy(struct fuse_session *se) -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 9333691..676032e 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -127,13 +127,13 @@ static const struct fuse_opt conn_info_opt_spec[] = { - - void fuse_cmdline_help(void) - { -- printf( -- " -h --help print help\n" -- " -V --version print version\n" -- " -d -o debug enable debug output (implies -f)\n" -- " -f foreground operation\n" -- " -o max_idle_threads the maximum number of idle worker threads\n" -- " allowed (default: 10)\n"); -+ printf(" -h --help print help\n" -+ " -V --version print version\n" -+ " -d -o debug enable debug output (implies -f)\n" -+ " -f foreground operation\n" -+ " -o max_idle_threads the maximum number of idle worker " -+ "threads\n" -+ " allowed (default: 10)\n"); - } - - static int fuse_helper_opt_proc(void *data, const char *arg, int key, --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Add-passthrough_ll.patch b/SOURCES/kvm-virtiofsd-Add-passthrough_ll.patch deleted file mode 100644 index 2510551..0000000 --- a/SOURCES/kvm-virtiofsd-Add-passthrough_ll.patch +++ /dev/null @@ -1,1387 +0,0 @@ -From 18ef831cac81a6bd2336c73dda357d9d69f8fd25 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:43 +0100 -Subject: [PATCH 012/116] virtiofsd: Add passthrough_ll -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-9-dgilbert@redhat.com> -Patchwork-id: 93462 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 008/112] virtiofsd: Add passthrough_ll -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -passthrough_ll is one of the examples in the upstream fuse project -and is the main part of our daemon here. It passes through requests -from fuse to the underlying filesystem, using syscalls as directly -as possible. - ->From libfuse fuse-3.8.0 - -Signed-off-by: Dr. David Alan Gilbert - Fixed up 'GPL' to 'GPLv2' as per Dan's comments and consistent - with the 'LICENSE' file in libfuse; patch sent to libfuse to fix - it upstream. -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 7c6b66027241f41720240fc6ee1021cdbd975b2e) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 1338 ++++++++++++++++++++++++++++++++++++++ - 1 file changed, 1338 insertions(+) - create mode 100644 tools/virtiofsd/passthrough_ll.c - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -new file mode 100644 -index 0000000..e1a6056 ---- /dev/null -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -0,0 +1,1338 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU GPLv2. -+ See the file COPYING. -+*/ -+ -+/** @file -+ * -+ * This file system mirrors the existing file system hierarchy of the -+ * system, starting at the root file system. This is implemented by -+ * just "passing through" all requests to the corresponding user-space -+ * libc functions. In contrast to passthrough.c and passthrough_fh.c, -+ * this implementation uses the low-level API. Its performance should -+ * be the least bad among the three, but many operations are not -+ * implemented. In particular, it is not possible to remove files (or -+ * directories) because the code necessary to defer actual removal -+ * until the file is not opened anymore would make the example much -+ * more complicated. -+ * -+ * When writeback caching is enabled (-o writeback mount option), it -+ * is only possible to write to files for which the mounting user has -+ * read permissions. This is because the writeback cache requires the -+ * kernel to be able to issue read requests for all files (which the -+ * passthrough filesystem cannot satisfy if it can't read the file in -+ * the underlying filesystem). -+ * -+ * Compile with: -+ * -+ * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o passthrough_ll -+ * -+ * ## Source code ## -+ * \include passthrough_ll.c -+ */ -+ -+#define _GNU_SOURCE -+#define FUSE_USE_VERSION 31 -+ -+#include "config.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "passthrough_helpers.h" -+ -+/* We are re-using pointers to our `struct lo_inode` and `struct -+ lo_dirp` elements as inodes. This means that we must be able to -+ store uintptr_t values in a fuse_ino_t variable. The following -+ incantation checks this condition at compile time. */ -+#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus -+_Static_assert(sizeof(fuse_ino_t) >= sizeof(uintptr_t), -+ "fuse_ino_t too small to hold uintptr_t values!"); -+#else -+struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct \ -+ { unsigned _uintptr_to_must_hold_fuse_ino_t: -+ ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); }; -+#endif -+ -+struct lo_inode { -+ struct lo_inode *next; /* protected by lo->mutex */ -+ struct lo_inode *prev; /* protected by lo->mutex */ -+ int fd; -+ bool is_symlink; -+ ino_t ino; -+ dev_t dev; -+ uint64_t refcount; /* protected by lo->mutex */ -+}; -+ -+enum { -+ CACHE_NEVER, -+ CACHE_NORMAL, -+ CACHE_ALWAYS, -+}; -+ -+struct lo_data { -+ pthread_mutex_t mutex; -+ int debug; -+ int writeback; -+ int flock; -+ int xattr; -+ const char *source; -+ double timeout; -+ int cache; -+ int timeout_set; -+ struct lo_inode root; /* protected by lo->mutex */ -+}; -+ -+static const struct fuse_opt lo_opts[] = { -+ { "writeback", -+ offsetof(struct lo_data, writeback), 1 }, -+ { "no_writeback", -+ offsetof(struct lo_data, writeback), 0 }, -+ { "source=%s", -+ offsetof(struct lo_data, source), 0 }, -+ { "flock", -+ offsetof(struct lo_data, flock), 1 }, -+ { "no_flock", -+ offsetof(struct lo_data, flock), 0 }, -+ { "xattr", -+ offsetof(struct lo_data, xattr), 1 }, -+ { "no_xattr", -+ offsetof(struct lo_data, xattr), 0 }, -+ { "timeout=%lf", -+ offsetof(struct lo_data, timeout), 0 }, -+ { "timeout=", -+ offsetof(struct lo_data, timeout_set), 1 }, -+ { "cache=never", -+ offsetof(struct lo_data, cache), CACHE_NEVER }, -+ { "cache=auto", -+ offsetof(struct lo_data, cache), CACHE_NORMAL }, -+ { "cache=always", -+ offsetof(struct lo_data, cache), CACHE_ALWAYS }, -+ -+ FUSE_OPT_END -+}; -+ -+static struct lo_data *lo_data(fuse_req_t req) -+{ -+ return (struct lo_data *) fuse_req_userdata(req); -+} -+ -+static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) -+{ -+ if (ino == FUSE_ROOT_ID) -+ return &lo_data(req)->root; -+ else -+ return (struct lo_inode *) (uintptr_t) ino; -+} -+ -+static int lo_fd(fuse_req_t req, fuse_ino_t ino) -+{ -+ return lo_inode(req, ino)->fd; -+} -+ -+static bool lo_debug(fuse_req_t req) -+{ -+ return lo_data(req)->debug != 0; -+} -+ -+static void lo_init(void *userdata, -+ struct fuse_conn_info *conn) -+{ -+ struct lo_data *lo = (struct lo_data*) userdata; -+ -+ if(conn->capable & FUSE_CAP_EXPORT_SUPPORT) -+ conn->want |= FUSE_CAP_EXPORT_SUPPORT; -+ -+ if (lo->writeback && -+ conn->capable & FUSE_CAP_WRITEBACK_CACHE) { -+ if (lo->debug) -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); -+ conn->want |= FUSE_CAP_WRITEBACK_CACHE; -+ } -+ if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { -+ if (lo->debug) -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); -+ conn->want |= FUSE_CAP_FLOCK_LOCKS; -+ } -+} -+ -+static void lo_getattr(fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi) -+{ -+ int res; -+ struct stat buf; -+ struct lo_data *lo = lo_data(req); -+ -+ (void) fi; -+ -+ res = fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) -+ return (void) fuse_reply_err(req, errno); -+ -+ fuse_reply_attr(req, &buf, lo->timeout); -+} -+ -+static int utimensat_empty_nofollow(struct lo_inode *inode, -+ const struct timespec *tv) -+{ -+ int res; -+ char procname[64]; -+ -+ if (inode->is_symlink) { -+ res = utimensat(inode->fd, "", tv, -+ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1 && errno == EINVAL) { -+ /* Sorry, no race free way to set times on symlink. */ -+ errno = EPERM; -+ } -+ return res; -+ } -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ return utimensat(AT_FDCWD, procname, tv, 0); -+} -+ -+static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, -+ int valid, struct fuse_file_info *fi) -+{ -+ int saverr; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ int ifd = inode->fd; -+ int res; -+ -+ if (valid & FUSE_SET_ATTR_MODE) { -+ if (fi) { -+ res = fchmod(fi->fh, attr->st_mode); -+ } else { -+ sprintf(procname, "/proc/self/fd/%i", ifd); -+ res = chmod(procname, attr->st_mode); -+ } -+ if (res == -1) -+ goto out_err; -+ } -+ if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { -+ uid_t uid = (valid & FUSE_SET_ATTR_UID) ? -+ attr->st_uid : (uid_t) -1; -+ gid_t gid = (valid & FUSE_SET_ATTR_GID) ? -+ attr->st_gid : (gid_t) -1; -+ -+ res = fchownat(ifd, "", uid, gid, -+ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) -+ goto out_err; -+ } -+ if (valid & FUSE_SET_ATTR_SIZE) { -+ if (fi) { -+ res = ftruncate(fi->fh, attr->st_size); -+ } else { -+ sprintf(procname, "/proc/self/fd/%i", ifd); -+ res = truncate(procname, attr->st_size); -+ } -+ if (res == -1) -+ goto out_err; -+ } -+ if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { -+ struct timespec tv[2]; -+ -+ tv[0].tv_sec = 0; -+ tv[1].tv_sec = 0; -+ tv[0].tv_nsec = UTIME_OMIT; -+ tv[1].tv_nsec = UTIME_OMIT; -+ -+ if (valid & FUSE_SET_ATTR_ATIME_NOW) -+ tv[0].tv_nsec = UTIME_NOW; -+ else if (valid & FUSE_SET_ATTR_ATIME) -+ tv[0] = attr->st_atim; -+ -+ if (valid & FUSE_SET_ATTR_MTIME_NOW) -+ tv[1].tv_nsec = UTIME_NOW; -+ else if (valid & FUSE_SET_ATTR_MTIME) -+ tv[1] = attr->st_mtim; -+ -+ if (fi) -+ res = futimens(fi->fh, tv); -+ else -+ res = utimensat_empty_nofollow(inode, tv); -+ if (res == -1) -+ goto out_err; -+ } -+ -+ return lo_getattr(req, ino, fi); -+ -+out_err: -+ saverr = errno; -+ fuse_reply_err(req, saverr); -+} -+ -+static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) -+{ -+ struct lo_inode *p; -+ struct lo_inode *ret = NULL; -+ -+ pthread_mutex_lock(&lo->mutex); -+ for (p = lo->root.next; p != &lo->root; p = p->next) { -+ if (p->ino == st->st_ino && p->dev == st->st_dev) { -+ assert(p->refcount > 0); -+ ret = p; -+ ret->refcount++; -+ break; -+ } -+ } -+ pthread_mutex_unlock(&lo->mutex); -+ return ret; -+} -+ -+static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, -+ struct fuse_entry_param *e) -+{ -+ int newfd; -+ int res; -+ int saverr; -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode; -+ -+ memset(e, 0, sizeof(*e)); -+ e->attr_timeout = lo->timeout; -+ e->entry_timeout = lo->timeout; -+ -+ newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); -+ if (newfd == -1) -+ goto out_err; -+ -+ res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) -+ goto out_err; -+ -+ inode = lo_find(lo_data(req), &e->attr); -+ if (inode) { -+ close(newfd); -+ newfd = -1; -+ } else { -+ struct lo_inode *prev, *next; -+ -+ saverr = ENOMEM; -+ inode = calloc(1, sizeof(struct lo_inode)); -+ if (!inode) -+ goto out_err; -+ -+ inode->is_symlink = S_ISLNK(e->attr.st_mode); -+ inode->refcount = 1; -+ inode->fd = newfd; -+ inode->ino = e->attr.st_ino; -+ inode->dev = e->attr.st_dev; -+ -+ pthread_mutex_lock(&lo->mutex); -+ prev = &lo->root; -+ next = prev->next; -+ next->prev = inode; -+ inode->next = next; -+ inode->prev = prev; -+ prev->next = inode; -+ pthread_mutex_unlock(&lo->mutex); -+ } -+ e->ino = (uintptr_t) inode; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -+ (unsigned long long) parent, name, (unsigned long long) e->ino); -+ -+ return 0; -+ -+out_err: -+ saverr = errno; -+ if (newfd != -1) -+ close(newfd); -+ return saverr; -+} -+ -+static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) -+{ -+ struct fuse_entry_param e; -+ int err; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", -+ parent, name); -+ -+ err = lo_do_lookup(req, parent, name, &e); -+ if (err) -+ fuse_reply_err(req, err); -+ else -+ fuse_reply_entry(req, &e); -+} -+ -+static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, -+ const char *name, mode_t mode, dev_t rdev, -+ const char *link) -+{ -+ int res; -+ int saverr; -+ struct lo_inode *dir = lo_inode(req, parent); -+ struct fuse_entry_param e; -+ -+ saverr = ENOMEM; -+ -+ res = mknod_wrapper(dir->fd, name, link, mode, rdev); -+ -+ saverr = errno; -+ if (res == -1) -+ goto out; -+ -+ saverr = lo_do_lookup(req, parent, name, &e); -+ if (saverr) -+ goto out; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -+ (unsigned long long) parent, name, (unsigned long long) e.ino); -+ -+ fuse_reply_entry(req, &e); -+ return; -+ -+out: -+ fuse_reply_err(req, saverr); -+} -+ -+static void lo_mknod(fuse_req_t req, fuse_ino_t parent, -+ const char *name, mode_t mode, dev_t rdev) -+{ -+ lo_mknod_symlink(req, parent, name, mode, rdev, NULL); -+} -+ -+static void lo_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode) -+{ -+ lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL); -+} -+ -+static void lo_symlink(fuse_req_t req, const char *link, -+ fuse_ino_t parent, const char *name) -+{ -+ lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); -+} -+ -+static int linkat_empty_nofollow(struct lo_inode *inode, int dfd, -+ const char *name) -+{ -+ int res; -+ char procname[64]; -+ -+ if (inode->is_symlink) { -+ res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); -+ if (res == -1 && (errno == ENOENT || errno == EINVAL)) { -+ /* Sorry, no race free way to hard-link a symlink. */ -+ errno = EPERM; -+ } -+ return res; -+ } -+ -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); -+} -+ -+static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, -+ const char *name) -+{ -+ int res; -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode = lo_inode(req, ino); -+ struct fuse_entry_param e; -+ int saverr; -+ -+ memset(&e, 0, sizeof(struct fuse_entry_param)); -+ e.attr_timeout = lo->timeout; -+ e.entry_timeout = lo->timeout; -+ -+ res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); -+ if (res == -1) -+ goto out_err; -+ -+ res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) -+ goto out_err; -+ -+ pthread_mutex_lock(&lo->mutex); -+ inode->refcount++; -+ pthread_mutex_unlock(&lo->mutex); -+ e.ino = (uintptr_t) inode; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -+ (unsigned long long) parent, name, -+ (unsigned long long) e.ino); -+ -+ fuse_reply_entry(req, &e); -+ return; -+ -+out_err: -+ saverr = errno; -+ fuse_reply_err(req, saverr); -+} -+ -+static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) -+{ -+ int res; -+ -+ res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); -+ -+ fuse_reply_err(req, res == -1 ? errno : 0); -+} -+ -+static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, -+ fuse_ino_t newparent, const char *newname, -+ unsigned int flags) -+{ -+ int res; -+ -+ if (flags) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ -+ res = renameat(lo_fd(req, parent), name, -+ lo_fd(req, newparent), newname); -+ -+ fuse_reply_err(req, res == -1 ? errno : 0); -+} -+ -+static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) -+{ -+ int res; -+ -+ res = unlinkat(lo_fd(req, parent), name, 0); -+ -+ fuse_reply_err(req, res == -1 ? errno : 0); -+} -+ -+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) -+{ -+ if (!inode) -+ return; -+ -+ pthread_mutex_lock(&lo->mutex); -+ assert(inode->refcount >= n); -+ inode->refcount -= n; -+ if (!inode->refcount) { -+ struct lo_inode *prev, *next; -+ -+ prev = inode->prev; -+ next = inode->next; -+ next->prev = prev; -+ prev->next = next; -+ -+ pthread_mutex_unlock(&lo->mutex); -+ close(inode->fd); -+ free(inode); -+ -+ } else { -+ pthread_mutex_unlock(&lo->mutex); -+ } -+} -+ -+static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) -+{ -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode = lo_inode(req, ino); -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", -+ (unsigned long long) ino, -+ (unsigned long long) inode->refcount, -+ (unsigned long long) nlookup); -+ } -+ -+ unref_inode(lo, inode, nlookup); -+} -+ -+static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) -+{ -+ lo_forget_one(req, ino, nlookup); -+ fuse_reply_none(req); -+} -+ -+static void lo_forget_multi(fuse_req_t req, size_t count, -+ struct fuse_forget_data *forgets) -+{ -+ int i; -+ -+ for (i = 0; i < count; i++) -+ lo_forget_one(req, forgets[i].ino, forgets[i].nlookup); -+ fuse_reply_none(req); -+} -+ -+static void lo_readlink(fuse_req_t req, fuse_ino_t ino) -+{ -+ char buf[PATH_MAX + 1]; -+ int res; -+ -+ res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf)); -+ if (res == -1) -+ return (void) fuse_reply_err(req, errno); -+ -+ if (res == sizeof(buf)) -+ return (void) fuse_reply_err(req, ENAMETOOLONG); -+ -+ buf[res] = '\0'; -+ -+ fuse_reply_readlink(req, buf); -+} -+ -+struct lo_dirp { -+ DIR *dp; -+ struct dirent *entry; -+ off_t offset; -+}; -+ -+static struct lo_dirp *lo_dirp(struct fuse_file_info *fi) -+{ -+ return (struct lo_dirp *) (uintptr_t) fi->fh; -+} -+ -+static void lo_opendir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+{ -+ int error = ENOMEM; -+ struct lo_data *lo = lo_data(req); -+ struct lo_dirp *d; -+ int fd; -+ -+ d = calloc(1, sizeof(struct lo_dirp)); -+ if (d == NULL) -+ goto out_err; -+ -+ fd = openat(lo_fd(req, ino), ".", O_RDONLY); -+ if (fd == -1) -+ goto out_errno; -+ -+ d->dp = fdopendir(fd); -+ if (d->dp == NULL) -+ goto out_errno; -+ -+ d->offset = 0; -+ d->entry = NULL; -+ -+ fi->fh = (uintptr_t) d; -+ if (lo->cache == CACHE_ALWAYS) -+ fi->keep_cache = 1; -+ fuse_reply_open(req, fi); -+ return; -+ -+out_errno: -+ error = errno; -+out_err: -+ if (d) { -+ if (fd != -1) -+ close(fd); -+ free(d); -+ } -+ fuse_reply_err(req, error); -+} -+ -+static int is_dot_or_dotdot(const char *name) -+{ -+ return name[0] == '.' && (name[1] == '\0' || -+ (name[1] == '.' && name[2] == '\0')); -+} -+ -+static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, -+ off_t offset, struct fuse_file_info *fi, int plus) -+{ -+ struct lo_dirp *d = lo_dirp(fi); -+ char *buf; -+ char *p; -+ size_t rem = size; -+ int err; -+ -+ (void) ino; -+ -+ buf = calloc(1, size); -+ if (!buf) { -+ err = ENOMEM; -+ goto error; -+ } -+ p = buf; -+ -+ if (offset != d->offset) { -+ seekdir(d->dp, offset); -+ d->entry = NULL; -+ d->offset = offset; -+ } -+ while (1) { -+ size_t entsize; -+ off_t nextoff; -+ const char *name; -+ -+ if (!d->entry) { -+ errno = 0; -+ d->entry = readdir(d->dp); -+ if (!d->entry) { -+ if (errno) { // Error -+ err = errno; -+ goto error; -+ } else { // End of stream -+ break; -+ } -+ } -+ } -+ nextoff = d->entry->d_off; -+ name = d->entry->d_name; -+ fuse_ino_t entry_ino = 0; -+ if (plus) { -+ struct fuse_entry_param e; -+ if (is_dot_or_dotdot(name)) { -+ e = (struct fuse_entry_param) { -+ .attr.st_ino = d->entry->d_ino, -+ .attr.st_mode = d->entry->d_type << 12, -+ }; -+ } else { -+ err = lo_do_lookup(req, ino, name, &e); -+ if (err) -+ goto error; -+ entry_ino = e.ino; -+ } -+ -+ entsize = fuse_add_direntry_plus(req, p, rem, name, -+ &e, nextoff); -+ } else { -+ struct stat st = { -+ .st_ino = d->entry->d_ino, -+ .st_mode = d->entry->d_type << 12, -+ }; -+ entsize = fuse_add_direntry(req, p, rem, name, -+ &st, nextoff); -+ } -+ if (entsize > rem) { -+ if (entry_ino != 0) -+ lo_forget_one(req, entry_ino, 1); -+ break; -+ } -+ -+ p += entsize; -+ rem -= entsize; -+ -+ d->entry = NULL; -+ d->offset = nextoff; -+ } -+ -+ err = 0; -+error: -+ // If there's an error, we can only signal it if we haven't stored -+ // any entries yet - otherwise we'd end up with wrong lookup -+ // counts for the entries that are already in the buffer. So we -+ // return what we've collected until that point. -+ if (err && rem == size) -+ fuse_reply_err(req, err); -+ else -+ fuse_reply_buf(req, buf, size - rem); -+ free(buf); -+} -+ -+static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, -+ off_t offset, struct fuse_file_info *fi) -+{ -+ lo_do_readdir(req, ino, size, offset, fi, 0); -+} -+ -+static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, -+ off_t offset, struct fuse_file_info *fi) -+{ -+ lo_do_readdir(req, ino, size, offset, fi, 1); -+} -+ -+static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+{ -+ struct lo_dirp *d = lo_dirp(fi); -+ (void) ino; -+ closedir(d->dp); -+ free(d); -+ fuse_reply_err(req, 0); -+} -+ -+static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode, struct fuse_file_info *fi) -+{ -+ int fd; -+ struct lo_data *lo = lo_data(req); -+ struct fuse_entry_param e; -+ int err; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", -+ parent, name); -+ -+ fd = openat(lo_fd(req, parent), name, -+ (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode); -+ if (fd == -1) -+ return (void) fuse_reply_err(req, errno); -+ -+ fi->fh = fd; -+ if (lo->cache == CACHE_NEVER) -+ fi->direct_io = 1; -+ else if (lo->cache == CACHE_ALWAYS) -+ fi->keep_cache = 1; -+ -+ err = lo_do_lookup(req, parent, name, &e); -+ if (err) -+ fuse_reply_err(req, err); -+ else -+ fuse_reply_create(req, &e, fi); -+} -+ -+static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, -+ struct fuse_file_info *fi) -+{ -+ int res; -+ int fd = dirfd(lo_dirp(fi)->dp); -+ (void) ino; -+ if (datasync) -+ res = fdatasync(fd); -+ else -+ res = fsync(fd); -+ fuse_reply_err(req, res == -1 ? errno : 0); -+} -+ -+static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+{ -+ int fd; -+ char buf[64]; -+ struct lo_data *lo = lo_data(req); -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", -+ ino, fi->flags); -+ -+ /* With writeback cache, kernel may send read requests even -+ when userspace opened write-only */ -+ if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { -+ fi->flags &= ~O_ACCMODE; -+ fi->flags |= O_RDWR; -+ } -+ -+ /* With writeback cache, O_APPEND is handled by the kernel. -+ This breaks atomicity (since the file may change in the -+ underlying filesystem, so that the kernel's idea of the -+ end of the file isn't accurate anymore). In this example, -+ we just accept that. A more rigorous filesystem may want -+ to return an error here */ -+ if (lo->writeback && (fi->flags & O_APPEND)) -+ fi->flags &= ~O_APPEND; -+ -+ sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); -+ fd = open(buf, fi->flags & ~O_NOFOLLOW); -+ if (fd == -1) -+ return (void) fuse_reply_err(req, errno); -+ -+ fi->fh = fd; -+ if (lo->cache == CACHE_NEVER) -+ fi->direct_io = 1; -+ else if (lo->cache == CACHE_ALWAYS) -+ fi->keep_cache = 1; -+ fuse_reply_open(req, fi); -+} -+ -+static void lo_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+{ -+ (void) ino; -+ -+ close(fi->fh); -+ fuse_reply_err(req, 0); -+} -+ -+static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+{ -+ int res; -+ (void) ino; -+ res = close(dup(fi->fh)); -+ fuse_reply_err(req, res == -1 ? errno : 0); -+} -+ -+static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, -+ struct fuse_file_info *fi) -+{ -+ int res; -+ (void) ino; -+ if (datasync) -+ res = fdatasync(fi->fh); -+ else -+ res = fsync(fi->fh); -+ fuse_reply_err(req, res == -1 ? errno : 0); -+} -+ -+static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, -+ off_t offset, struct fuse_file_info *fi) -+{ -+ struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, "lo_read(ino=%" PRIu64 ", size=%zd, " -+ "off=%lu)\n", ino, size, (unsigned long) offset); -+ -+ buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -+ buf.buf[0].fd = fi->fh; -+ buf.buf[0].pos = offset; -+ -+ fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); -+} -+ -+static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, -+ struct fuse_bufvec *in_buf, off_t off, -+ struct fuse_file_info *fi) -+{ -+ (void) ino; -+ ssize_t res; -+ struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); -+ -+ out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -+ out_buf.buf[0].fd = fi->fh; -+ out_buf.buf[0].pos = off; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", -+ ino, out_buf.buf[0].size, (unsigned long) off); -+ -+ res = fuse_buf_copy(&out_buf, in_buf, 0); -+ if(res < 0) -+ fuse_reply_err(req, -res); -+ else -+ fuse_reply_write(req, (size_t) res); -+} -+ -+static void lo_statfs(fuse_req_t req, fuse_ino_t ino) -+{ -+ int res; -+ struct statvfs stbuf; -+ -+ res = fstatvfs(lo_fd(req, ino), &stbuf); -+ if (res == -1) -+ fuse_reply_err(req, errno); -+ else -+ fuse_reply_statfs(req, &stbuf); -+} -+ -+static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, -+ off_t offset, off_t length, struct fuse_file_info *fi) -+{ -+ int err = EOPNOTSUPP; -+ (void) ino; -+ -+#ifdef HAVE_FALLOCATE -+ err = fallocate(fi->fh, mode, offset, length); -+ if (err < 0) -+ err = errno; -+ -+#elif defined(HAVE_POSIX_FALLOCATE) -+ if (mode) { -+ fuse_reply_err(req, EOPNOTSUPP); -+ return; -+ } -+ -+ err = posix_fallocate(fi->fh, offset, length); -+#endif -+ -+ fuse_reply_err(req, err); -+} -+ -+static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ int op) -+{ -+ int res; -+ (void) ino; -+ -+ res = flock(fi->fh, op); -+ -+ fuse_reply_err(req, res == -1 ? errno : 0); -+} -+ -+static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, -+ size_t size) -+{ -+ char *value = NULL; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; -+ -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) -+ goto out; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", -+ ino, name, size); -+ } -+ -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to getxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } -+ -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ if (size) { -+ value = malloc(size); -+ if (!value) -+ goto out_err; -+ -+ ret = getxattr(procname, name, value, size); -+ if (ret == -1) -+ goto out_err; -+ saverr = 0; -+ if (ret == 0) -+ goto out; -+ -+ fuse_reply_buf(req, value, ret); -+ } else { -+ ret = getxattr(procname, name, NULL, 0); -+ if (ret == -1) -+ goto out_err; -+ -+ fuse_reply_xattr(req, ret); -+ } -+out_free: -+ free(value); -+ return; -+ -+out_err: -+ saverr = errno; -+out: -+ fuse_reply_err(req, saverr); -+ goto out_free; -+} -+ -+static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) -+{ -+ char *value = NULL; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; -+ -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) -+ goto out; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", -+ ino, size); -+ } -+ -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to listxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } -+ -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ if (size) { -+ value = malloc(size); -+ if (!value) -+ goto out_err; -+ -+ ret = listxattr(procname, value, size); -+ if (ret == -1) -+ goto out_err; -+ saverr = 0; -+ if (ret == 0) -+ goto out; -+ -+ fuse_reply_buf(req, value, ret); -+ } else { -+ ret = listxattr(procname, NULL, 0); -+ if (ret == -1) -+ goto out_err; -+ -+ fuse_reply_xattr(req, ret); -+ } -+out_free: -+ free(value); -+ return; -+ -+out_err: -+ saverr = errno; -+out: -+ fuse_reply_err(req, saverr); -+ goto out_free; -+} -+ -+static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, -+ const char *value, size_t size, int flags) -+{ -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; -+ -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) -+ goto out; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", -+ ino, name, value, size); -+ } -+ -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to setxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } -+ -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ ret = setxattr(procname, name, value, size, flags); -+ saverr = ret == -1 ? errno : 0; -+ -+out: -+ fuse_reply_err(req, saverr); -+} -+ -+static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) -+{ -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; -+ -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) -+ goto out; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", -+ ino, name); -+ } -+ -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to setxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } -+ -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ ret = removexattr(procname, name); -+ saverr = ret == -1 ? errno : 0; -+ -+out: -+ fuse_reply_err(req, saverr); -+} -+ -+#ifdef HAVE_COPY_FILE_RANGE -+static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, -+ struct fuse_file_info *fi_in, -+ fuse_ino_t ino_out, off_t off_out, -+ struct fuse_file_info *fi_out, size_t len, -+ int flags) -+{ -+ ssize_t res; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " -+ "off=%lu, ino=%" PRIu64 "/fd=%lu, " -+ "off=%lu, size=%zd, flags=0x%x)\n", -+ ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, -+ len, flags); -+ -+ res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, -+ flags); -+ if (res < 0) -+ fuse_reply_err(req, -errno); -+ else -+ fuse_reply_write(req, res); -+} -+#endif -+ -+static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, -+ struct fuse_file_info *fi) -+{ -+ off_t res; -+ -+ (void)ino; -+ res = lseek(fi->fh, off, whence); -+ if (res != -1) -+ fuse_reply_lseek(req, res); -+ else -+ fuse_reply_err(req, errno); -+} -+ -+static struct fuse_lowlevel_ops lo_oper = { -+ .init = lo_init, -+ .lookup = lo_lookup, -+ .mkdir = lo_mkdir, -+ .mknod = lo_mknod, -+ .symlink = lo_symlink, -+ .link = lo_link, -+ .unlink = lo_unlink, -+ .rmdir = lo_rmdir, -+ .rename = lo_rename, -+ .forget = lo_forget, -+ .forget_multi = lo_forget_multi, -+ .getattr = lo_getattr, -+ .setattr = lo_setattr, -+ .readlink = lo_readlink, -+ .opendir = lo_opendir, -+ .readdir = lo_readdir, -+ .readdirplus = lo_readdirplus, -+ .releasedir = lo_releasedir, -+ .fsyncdir = lo_fsyncdir, -+ .create = lo_create, -+ .open = lo_open, -+ .release = lo_release, -+ .flush = lo_flush, -+ .fsync = lo_fsync, -+ .read = lo_read, -+ .write_buf = lo_write_buf, -+ .statfs = lo_statfs, -+ .fallocate = lo_fallocate, -+ .flock = lo_flock, -+ .getxattr = lo_getxattr, -+ .listxattr = lo_listxattr, -+ .setxattr = lo_setxattr, -+ .removexattr = lo_removexattr, -+#ifdef HAVE_COPY_FILE_RANGE -+ .copy_file_range = lo_copy_file_range, -+#endif -+ .lseek = lo_lseek, -+}; -+ -+int main(int argc, char *argv[]) -+{ -+ struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -+ struct fuse_session *se; -+ struct fuse_cmdline_opts opts; -+ struct lo_data lo = { .debug = 0, -+ .writeback = 0 }; -+ int ret = -1; -+ -+ /* Don't mask creation mode, kernel already did that */ -+ umask(0); -+ -+ pthread_mutex_init(&lo.mutex, NULL); -+ lo.root.next = lo.root.prev = &lo.root; -+ lo.root.fd = -1; -+ lo.cache = CACHE_NORMAL; -+ -+ if (fuse_parse_cmdline(&args, &opts) != 0) -+ return 1; -+ if (opts.show_help) { -+ printf("usage: %s [options] \n\n", argv[0]); -+ fuse_cmdline_help(); -+ fuse_lowlevel_help(); -+ ret = 0; -+ goto err_out1; -+ } else if (opts.show_version) { -+ printf("FUSE library version %s\n", fuse_pkgversion()); -+ fuse_lowlevel_version(); -+ ret = 0; -+ goto err_out1; -+ } -+ -+ if(opts.mountpoint == NULL) { -+ printf("usage: %s [options] \n", argv[0]); -+ printf(" %s --help\n", argv[0]); -+ ret = 1; -+ goto err_out1; -+ } -+ -+ if (fuse_opt_parse(&args, &lo, lo_opts, NULL)== -1) -+ return 1; -+ -+ lo.debug = opts.debug; -+ lo.root.refcount = 2; -+ if (lo.source) { -+ struct stat stat; -+ int res; -+ -+ res = lstat(lo.source, &stat); -+ if (res == -1) { -+ fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n", -+ lo.source); -+ exit(1); -+ } -+ if (!S_ISDIR(stat.st_mode)) { -+ fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); -+ exit(1); -+ } -+ -+ } else { -+ lo.source = "/"; -+ } -+ lo.root.is_symlink = false; -+ if (!lo.timeout_set) { -+ switch (lo.cache) { -+ case CACHE_NEVER: -+ lo.timeout = 0.0; -+ break; -+ -+ case CACHE_NORMAL: -+ lo.timeout = 1.0; -+ break; -+ -+ case CACHE_ALWAYS: -+ lo.timeout = 86400.0; -+ break; -+ } -+ } else if (lo.timeout < 0) { -+ fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", -+ lo.timeout); -+ exit(1); -+ } -+ -+ lo.root.fd = open(lo.source, O_PATH); -+ if (lo.root.fd == -1) { -+ fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", -+ lo.source); -+ exit(1); -+ } -+ -+ se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); -+ if (se == NULL) -+ goto err_out1; -+ -+ if (fuse_set_signal_handlers(se) != 0) -+ goto err_out2; -+ -+ if (fuse_session_mount(se, opts.mountpoint) != 0) -+ goto err_out3; -+ -+ fuse_daemonize(opts.foreground); -+ -+ /* Block until ctrl+c or fusermount -u */ -+ if (opts.singlethread) -+ ret = fuse_session_loop(se); -+ else -+ ret = fuse_session_loop_mt(se, opts.clone_fd); -+ -+ fuse_session_unmount(se); -+err_out3: -+ fuse_remove_signal_handlers(se); -+err_out2: -+ fuse_session_destroy(se); -+err_out1: -+ free(opts.mountpoint); -+ fuse_opt_free_args(&args); -+ -+ if (lo.root.fd >= 0) -+ close(lo.root.fd); -+ -+ return ret ? 1 : 0; -+} --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch b/SOURCES/kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch deleted file mode 100644 index cef537a..0000000 --- a/SOURCES/kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 52e93f2dc499ead339bf808dac3480b369dfadd1 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:39 +0100 -Subject: [PATCH 068/116] virtiofsd: Add timestamp to the log with - FUSE_LOG_DEBUG level -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-65-dgilbert@redhat.com> -Patchwork-id: 93517 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 064/112] virtiofsd: Add timestamp to the log with FUSE_LOG_DEBUG level -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Masayoshi Mizuma - -virtiofsd has some threads, so we see a lot of logs with debug option. -It would be useful for debugging if we can see the timestamp. - -Add nano second timestamp, which got by get_clock(), to the log with -FUSE_LOG_DEBUG level if the syslog option isn't set. - -The log is like as: - - # ./virtiofsd -d -o vhost_user_socket=/tmp/vhostqemu0 -o source=/tmp/share0 -o cache=auto - ... - [5365943125463727] [ID: 00000002] fv_queue_thread: Start for queue 0 kick_fd 9 - [5365943125568644] [ID: 00000002] fv_queue_thread: Waiting for Queue 0 event - [5365943125573561] [ID: 00000002] fv_queue_thread: Got queue event on Queue 0 - -Signed-off-by: Masayoshi Mizuma -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 50fb955aa0e6ede929422146936cf68bf1ca876f) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index f08324f..98114a3 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -36,6 +36,7 @@ - */ - - #include "qemu/osdep.h" -+#include "qemu/timer.h" - #include "fuse_virtio.h" - #include "fuse_log.h" - #include "fuse_lowlevel.h" -@@ -2276,7 +2277,13 @@ static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) - } - - if (current_log_level == FUSE_LOG_DEBUG) { -- localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), fmt); -+ if (!use_syslog) { -+ localfmt = g_strdup_printf("[%" PRId64 "] [ID: %08ld] %s", -+ get_clock(), syscall(__NR_gettid), fmt); -+ } else { -+ localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), -+ fmt); -+ } - fmt = localfmt; - } - --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Clean-up-inodes-on-destroy.patch b/SOURCES/kvm-virtiofsd-Clean-up-inodes-on-destroy.patch deleted file mode 100644 index 4713a0d..0000000 --- a/SOURCES/kvm-virtiofsd-Clean-up-inodes-on-destroy.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 2b921f7162b53204051955228bf99bbed55d2457 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:53 +0100 -Subject: [PATCH 082/116] virtiofsd: Clean up inodes on destroy -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-79-dgilbert@redhat.com> -Patchwork-id: 93532 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 078/112] virtiofsd: Clean up inodes on destroy -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Clear out our inodes and fd's on a 'destroy' - so we get rid -of them if we reboot the guest. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 771b01eb76ff480fee984bd1d21727147cc3e702) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 26 ++++++++++++++++++++++++++ - 1 file changed, 26 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index b176a31..9ed77a1 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1169,6 +1169,25 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, - } - } - -+static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) -+{ -+ struct lo_inode *inode = value; -+ struct lo_data *lo = user_data; -+ -+ inode->refcount = 0; -+ lo_map_remove(&lo->ino_map, inode->fuse_ino); -+ close(inode->fd); -+ -+ return TRUE; -+} -+ -+static void unref_all_inodes(struct lo_data *lo) -+{ -+ pthread_mutex_lock(&lo->mutex); -+ g_hash_table_foreach_remove(lo->inodes, unref_all_inodes_cb, lo); -+ pthread_mutex_unlock(&lo->mutex); -+} -+ - static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - { - struct lo_data *lo = lo_data(req); -@@ -2035,6 +2054,12 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, - } - } - -+static void lo_destroy(void *userdata) -+{ -+ struct lo_data *lo = (struct lo_data *)userdata; -+ unref_all_inodes(lo); -+} -+ - static struct fuse_lowlevel_ops lo_oper = { - .init = lo_init, - .lookup = lo_lookup, -@@ -2073,6 +2098,7 @@ static struct fuse_lowlevel_ops lo_oper = { - .copy_file_range = lo_copy_file_range, - #endif - .lseek = lo_lseek, -+ .destroy = lo_destroy, - }; - - /* Print vhost-user.json backend program capabilities */ --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch b/SOURCES/kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch deleted file mode 100644 index c421365..0000000 --- a/SOURCES/kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch +++ /dev/null @@ -1,112 +0,0 @@ -From 24f91062f571ad2dd2ac22db3b7d456a2c8bd2cb Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:23 +0100 -Subject: [PATCH 112/116] virtiofsd: Convert lo_destroy to take the lo->mutex - lock itself -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-109-dgilbert@redhat.com> -Patchwork-id: 93563 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 108/112] virtiofsd: Convert lo_destroy to take the lo->mutex lock itself -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -lo_destroy was relying on some implicit knowledge of the locking; -we can avoid this if we create an unref_inode that doesn't take -the lock and then grab it for the whole of the lo_destroy. - -Suggested-by: Vivek Goyal -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit fe4c15798a48143dd6b1f58d2d3cad12206ce211) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 31 +++++++++++++++++-------------- - 1 file changed, 17 insertions(+), 14 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index eb001b9..fc15d61 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1344,14 +1344,13 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) - lo_inode_put(lo, &inode); - } - --static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, -- uint64_t n) -+/* To be called with lo->mutex held */ -+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) - { - if (!inode) { - return; - } - -- pthread_mutex_lock(&lo->mutex); - assert(inode->nlookup >= n); - inode->nlookup -= n; - if (!inode->nlookup) { -@@ -1362,15 +1361,24 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, - } - g_hash_table_destroy(inode->posix_locks); - pthread_mutex_destroy(&inode->plock_mutex); -- pthread_mutex_unlock(&lo->mutex); - - /* Drop our refcount from lo_do_lookup() */ - lo_inode_put(lo, &inode); -- } else { -- pthread_mutex_unlock(&lo->mutex); - } - } - -+static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, -+ uint64_t n) -+{ -+ if (!inode) { -+ return; -+ } -+ -+ pthread_mutex_lock(&lo->mutex); -+ unref_inode(lo, inode, n); -+ pthread_mutex_unlock(&lo->mutex); -+} -+ - static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - { - struct lo_data *lo = lo_data(req); -@@ -2458,13 +2466,7 @@ static void lo_destroy(void *userdata) - { - struct lo_data *lo = (struct lo_data *)userdata; - -- /* -- * Normally lo->mutex must be taken when traversing lo->inodes but -- * lo_destroy() is a serialized request so no races are possible here. -- * -- * In addition, we cannot acquire lo->mutex since unref_inode() takes it -- * too and this would result in a recursive lock. -- */ -+ pthread_mutex_lock(&lo->mutex); - while (true) { - GHashTableIter iter; - gpointer key, value; -@@ -2475,8 +2477,9 @@ static void lo_destroy(void *userdata) - } - - struct lo_inode *inode = value; -- unref_inode_lolocked(lo, inode, inode->nlookup); -+ unref_inode(lo, inode, inode->nlookup); - } -+ pthread_mutex_unlock(&lo->mutex); - } - - static struct fuse_lowlevel_ops lo_oper = { --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Disable-remote-posix-locks-by-default.patch b/SOURCES/kvm-virtiofsd-Disable-remote-posix-locks-by-default.patch deleted file mode 100644 index 90b6b35..0000000 --- a/SOURCES/kvm-virtiofsd-Disable-remote-posix-locks-by-default.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 3ec945ba7c2649cca13cf6070c6365b1262ad1ec Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Fri, 6 Aug 2021 11:58:26 -0400 -Subject: [PATCH 1/2] virtiofsd: Disable remote posix locks by default -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Max Reitz -Message-id: <20210806115827.740945-2-mreitz@redhat.com> -Patchwork-id: 101970 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 1/2] virtiofsd: Disable remote posix locks by default -Bugzilla: 1967496 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Vivek Goyal - -From: Vivek Goyal - -Right now we enable remote posix locks by default. That means when guest -does a posix lock it sends request to server (virtiofsd). But currently -we only support non-blocking posix lock and return -EOPNOTSUPP for -blocking version. - -This means that existing applications which are doing blocking posix -locks get -EOPNOTSUPP and fail. To avoid this, people have been -running virtiosd with option "-o no_posix_lock". For new users it -is still a surprise and trial and error takes them to this option. - -Given posix lock implementation is not complete in virtiofsd, disable -it by default. This means that posix locks will work with-in applications -in a guest but not across guests. Anyway we don't support sharing -filesystem among different guests yet in virtiofs so this should -not lead to any kind of surprise or regression and will make life -little easier for virtiofs users. - -Reported-by: Aa Aa -Suggested-by: Miklos Szeredi -Signed-off-by: Vivek Goyal -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 88fc107956a5812649e5918e0c092d3f78bb28ad) - -Conflicts: - docs/tools/virtiofsd.rst - We do not have virtiofsd.rst downstream (added upstream in - commit 6a7e2bbee5fa), so I dropped that hunk (which effectively - updated the default value in the man page). - -Signed-off-by: Max Reitz -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/passthrough_ll.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index cb0992f2db..b47029da89 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -3001,7 +3001,7 @@ int main(int argc, char *argv[]) - struct lo_data lo = { - .debug = 0, - .writeback = 0, -- .posix_lock = 1, -+ .posix_lock = 0, - .proc_self_fd = -1, - }; - struct lo_map_elem *root_elem; --- -2.27.0 - diff --git a/SOURCES/kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch b/SOURCES/kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch deleted file mode 100644 index 9f198c2..0000000 --- a/SOURCES/kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch +++ /dev/null @@ -1,176 +0,0 @@ -From e217ab392e0d4c770ec18dbfbe986771773cb557 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:33 +0100 -Subject: [PATCH 062/116] virtiofsd: Drop CAP_FSETID if client asked for it -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-59-dgilbert@redhat.com> -Patchwork-id: 93513 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 058/112] virtiofsd: Drop CAP_FSETID if client asked for it -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Vivek Goyal - -If client requested killing setuid/setgid bits on file being written, drop -CAP_FSETID capability so that setuid/setgid bits are cleared upon write -automatically. - -pjdfstest chown/12.t needs this. - -Signed-off-by: Vivek Goyal - dgilbert: reworked for libcap-ng -Reviewed-by: Misono Tomohiro -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit ee88465224b3aed2596049caa28f86cbe0d5a3d0) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 105 +++++++++++++++++++++++++++++++++++++++ - 1 file changed, 105 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 97e7c75..d53cb1e 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -201,6 +201,91 @@ static int load_capng(void) - return 0; - } - -+/* -+ * Helpers for dropping and regaining effective capabilities. Returns 0 -+ * on success, error otherwise -+ */ -+static int drop_effective_cap(const char *cap_name, bool *cap_dropped) -+{ -+ int cap, ret; -+ -+ cap = capng_name_to_capability(cap_name); -+ if (cap < 0) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n", -+ cap_name, strerror(errno)); -+ goto out; -+ } -+ -+ if (load_capng()) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "load_capng() failed\n"); -+ goto out; -+ } -+ -+ /* We dont have this capability in effective set already. */ -+ if (!capng_have_capability(CAPNG_EFFECTIVE, cap)) { -+ ret = 0; -+ goto out; -+ } -+ -+ if (capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, cap)) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "capng_update(DROP,) failed\n"); -+ goto out; -+ } -+ -+ if (capng_apply(CAPNG_SELECT_CAPS)) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "drop:capng_apply() failed\n"); -+ goto out; -+ } -+ -+ ret = 0; -+ if (cap_dropped) { -+ *cap_dropped = true; -+ } -+ -+out: -+ return ret; -+} -+ -+static int gain_effective_cap(const char *cap_name) -+{ -+ int cap; -+ int ret = 0; -+ -+ cap = capng_name_to_capability(cap_name); -+ if (cap < 0) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n", -+ cap_name, strerror(errno)); -+ goto out; -+ } -+ -+ if (load_capng()) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "load_capng() failed\n"); -+ goto out; -+ } -+ -+ if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, cap)) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "capng_update(ADD,) failed\n"); -+ goto out; -+ } -+ -+ if (capng_apply(CAPNG_SELECT_CAPS)) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "gain:capng_apply() failed\n"); -+ goto out; -+ } -+ ret = 0; -+ -+out: -+ return ret; -+} -+ - static void lo_map_init(struct lo_map *map) - { - map->elems = NULL; -@@ -1577,6 +1662,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, - (void)ino; - ssize_t res; - struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); -+ bool cap_fsetid_dropped = false; - - out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; - out_buf.buf[0].fd = lo_fi_fd(req, fi); -@@ -1588,12 +1674,31 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, - out_buf.buf[0].size, (unsigned long)off); - } - -+ /* -+ * If kill_priv is set, drop CAP_FSETID which should lead to kernel -+ * clearing setuid/setgid on file. -+ */ -+ if (fi->kill_priv) { -+ res = drop_effective_cap("FSETID", &cap_fsetid_dropped); -+ if (res != 0) { -+ fuse_reply_err(req, res); -+ return; -+ } -+ } -+ - res = fuse_buf_copy(&out_buf, in_buf); - if (res < 0) { - fuse_reply_err(req, -res); - } else { - fuse_reply_write(req, (size_t)res); - } -+ -+ if (cap_fsetid_dropped) { -+ res = gain_effective_cap("FSETID"); -+ if (res) { -+ fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n"); -+ } -+ } - } - - static void lo_statfs(fuse_req_t req, fuse_ino_t ino) --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch b/SOURCES/kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch new file mode 100644 index 0000000..face8e6 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch @@ -0,0 +1,110 @@ +From 2754dc2c7def01d7dd1bb39f3e86ef444652d397 Mon Sep 17 00:00:00 2001 +From: Vivek Goyal +Date: Tue, 25 Jan 2022 13:51:14 -0500 +Subject: [PATCH 1/6] virtiofsd: Drop membership of all supplementary groups + (CVE-2022-0358) + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 102: virtiofsd: Drop membership of all supplementary groups (CVE-2022-0358) +RH-Commit: [1/1] 93e56c88277fec8e42559a899d32b80fac4a923f +RH-Bugzilla: 2046198 +RH-Acked-by: Greg Kurz +RH-Acked-by: Sergio Lopez +RH-Acked-by: Laszlo Ersek + +At the start, drop membership of all supplementary groups. This is +not required. + +If we have membership of "root" supplementary group and when we switch +uid/gid using setresuid/setsgid, we still retain membership of existing +supplemntary groups. And that can allow some operations which are not +normally allowed. + +For example, if root in guest creates a dir as follows. + +$ mkdir -m 03777 test_dir + +This sets SGID on dir as well as allows unprivileged users to write into +this dir. + +And now as unprivileged user open file as follows. + +$ su test +$ fd = open("test_dir/priviledge_id", O_RDWR|O_CREAT|O_EXCL, 02755); + +This will create SGID set executable in test_dir/. + +And that's a problem because now an unpriviliged user can execute it, +get egid=0 and get access to resources owned by "root" group. This is +privilege escalation. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2044863 +Fixes: CVE-2022-0358 +Reported-by: JIETAO XIAO +Suggested-by: Miklos Szeredi +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Vivek Goyal +Message-Id: +Signed-off-by: Dr. David Alan Gilbert + dgilbert: Fixed missing {}'s style nit +(cherry picked from commit 449e8171f96a6a944d1f3b7d3627ae059eae21ca) +--- + tools/virtiofsd/passthrough_ll.c | 27 +++++++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 64b5b4fbb1..b3d0674f6d 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -54,6 +54,7 @@ + #include + #include + #include ++#include + + #include "qemu/cutils.h" + #include "passthrough_helpers.h" +@@ -1161,6 +1162,30 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) + #define OURSYS_setresuid SYS_setresuid + #endif + ++static void drop_supplementary_groups(void) ++{ ++ int ret; ++ ++ ret = getgroups(0, NULL); ++ if (ret == -1) { ++ fuse_log(FUSE_LOG_ERR, "getgroups() failed with error=%d:%s\n", ++ errno, strerror(errno)); ++ exit(1); ++ } ++ ++ if (!ret) { ++ return; ++ } ++ ++ /* Drop all supplementary groups. We should not need it */ ++ ret = setgroups(0, NULL); ++ if (ret == -1) { ++ fuse_log(FUSE_LOG_ERR, "setgroups() failed with error=%d:%s\n", ++ errno, strerror(errno)); ++ exit(1); ++ } ++} ++ + /* + * Change to uid/gid of caller so that file is created with + * ownership of caller. +@@ -3926,6 +3951,8 @@ int main(int argc, char *argv[]) + + qemu_init_exec_dir(argv[0]); + ++ drop_supplementary_groups(); ++ + pthread_mutex_init(&lo.mutex, NULL); + lo.inodes = g_hash_table_new(lo_key_hash, lo_key_equal); + lo.root.fd = -1; +-- +2.27.0 + diff --git a/SOURCES/kvm-virtiofsd-Fast-path-for-virtio-read.patch b/SOURCES/kvm-virtiofsd-Fast-path-for-virtio-read.patch deleted file mode 100644 index 03874ce..0000000 --- a/SOURCES/kvm-virtiofsd-Fast-path-for-virtio-read.patch +++ /dev/null @@ -1,240 +0,0 @@ -From 7d2efc3e4af15eff57b0c38cff7c81b371a98303 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:06 +0100 -Subject: [PATCH 035/116] virtiofsd: Fast path for virtio read -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-32-dgilbert@redhat.com> -Patchwork-id: 93480 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 031/112] virtiofsd: Fast path for virtio read -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Readv the data straight into the guests buffer. - -Signed-off-by: Dr. David Alan Gilbert -With fix by: -Signed-off-by: Eryu Guan -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit eb49d187ef5134483a34c970bbfece28aaa686a7) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 5 ++ - tools/virtiofsd/fuse_virtio.c | 162 ++++++++++++++++++++++++++++++++++++++++ - tools/virtiofsd/fuse_virtio.h | 4 + - 3 files changed, 171 insertions(+) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 380d93b..4f4684d 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -475,6 +475,11 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, - return fuse_send_msg(se, ch, iov, iov_count); - } - -+ if (fuse_lowlevel_is_virtio(se) && buf->count == 1 && -+ buf->buf[0].flags == (FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK)) { -+ return virtio_send_data_iov(se, ch, iov, iov_count, buf, len); -+ } -+ - abort(); /* Will have taken vhost path */ - return 0; - } -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index f1adeb6..7e2711b 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -230,6 +230,168 @@ err: - return ret; - } - -+/* -+ * Callback from fuse_send_data_iov_* when it's virtio and the buffer -+ * is a single FD with FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK -+ * We need send the iov and then the buffer. -+ * Return 0 on success -+ */ -+int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, -+ struct iovec *iov, int count, struct fuse_bufvec *buf, -+ size_t len) -+{ -+ int ret = 0; -+ VuVirtqElement *elem; -+ VuVirtq *q; -+ -+ assert(count >= 1); -+ assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); -+ -+ struct fuse_out_header *out = iov[0].iov_base; -+ /* TODO: Endianness! */ -+ -+ size_t iov_len = iov_size(iov, count); -+ size_t tosend_len = iov_len + len; -+ -+ out->len = tosend_len; -+ -+ fuse_log(FUSE_LOG_DEBUG, "%s: count=%d len=%zd iov_len=%zd\n", __func__, -+ count, len, iov_len); -+ -+ /* unique == 0 is notification which we don't support */ -+ assert(out->unique); -+ -+ /* For virtio we always have ch */ -+ assert(ch); -+ assert(!ch->qi->reply_sent); -+ elem = ch->qi->qe; -+ q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; -+ -+ /* The 'in' part of the elem is to qemu */ -+ unsigned int in_num = elem->in_num; -+ struct iovec *in_sg = elem->in_sg; -+ size_t in_len = iov_size(in_sg, in_num); -+ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n", -+ __func__, elem->index, in_num, in_len); -+ -+ /* -+ * The elem should have room for a 'fuse_out_header' (out from fuse) -+ * plus the data based on the len in the header. -+ */ -+ if (in_len < sizeof(struct fuse_out_header)) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", -+ __func__, elem->index); -+ ret = E2BIG; -+ goto err; -+ } -+ if (in_len < tosend_len) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n", -+ __func__, elem->index, tosend_len); -+ ret = E2BIG; -+ goto err; -+ } -+ -+ /* TODO: Limit to 'len' */ -+ -+ /* First copy the header data from iov->in_sg */ -+ copy_iov(iov, count, in_sg, in_num, iov_len); -+ -+ /* -+ * Build a copy of the the in_sg iov so we can skip bits in it, -+ * including changing the offsets -+ */ -+ struct iovec *in_sg_cpy = calloc(sizeof(struct iovec), in_num); -+ assert(in_sg_cpy); -+ memcpy(in_sg_cpy, in_sg, sizeof(struct iovec) * in_num); -+ /* These get updated as we skip */ -+ struct iovec *in_sg_ptr = in_sg_cpy; -+ int in_sg_cpy_count = in_num; -+ -+ /* skip over parts of in_sg that contained the header iov */ -+ size_t skip_size = iov_len; -+ -+ size_t in_sg_left = 0; -+ do { -+ while (skip_size != 0 && in_sg_cpy_count) { -+ if (skip_size >= in_sg_ptr[0].iov_len) { -+ skip_size -= in_sg_ptr[0].iov_len; -+ in_sg_ptr++; -+ in_sg_cpy_count--; -+ } else { -+ in_sg_ptr[0].iov_len -= skip_size; -+ in_sg_ptr[0].iov_base += skip_size; -+ break; -+ } -+ } -+ -+ int i; -+ for (i = 0, in_sg_left = 0; i < in_sg_cpy_count; i++) { -+ in_sg_left += in_sg_ptr[i].iov_len; -+ } -+ fuse_log(FUSE_LOG_DEBUG, -+ "%s: after skip skip_size=%zd in_sg_cpy_count=%d " -+ "in_sg_left=%zd\n", -+ __func__, skip_size, in_sg_cpy_count, in_sg_left); -+ ret = preadv(buf->buf[0].fd, in_sg_ptr, in_sg_cpy_count, -+ buf->buf[0].pos); -+ -+ if (ret == -1) { -+ ret = errno; -+ fuse_log(FUSE_LOG_DEBUG, "%s: preadv failed (%m) len=%zd\n", -+ __func__, len); -+ free(in_sg_cpy); -+ goto err; -+ } -+ fuse_log(FUSE_LOG_DEBUG, "%s: preadv ret=%d len=%zd\n", __func__, -+ ret, len); -+ if (ret < len && ret) { -+ fuse_log(FUSE_LOG_DEBUG, "%s: ret < len\n", __func__); -+ /* Skip over this much next time around */ -+ skip_size = ret; -+ buf->buf[0].pos += ret; -+ len -= ret; -+ -+ /* Lets do another read */ -+ continue; -+ } -+ if (!ret) { -+ /* EOF case? */ -+ fuse_log(FUSE_LOG_DEBUG, "%s: !ret in_sg_left=%zd\n", __func__, -+ in_sg_left); -+ break; -+ } -+ if (ret != len) { -+ fuse_log(FUSE_LOG_DEBUG, "%s: ret!=len\n", __func__); -+ ret = EIO; -+ free(in_sg_cpy); -+ goto err; -+ } -+ in_sg_left -= ret; -+ len -= ret; -+ } while (in_sg_left); -+ free(in_sg_cpy); -+ -+ /* Need to fix out->len on EOF */ -+ if (len) { -+ struct fuse_out_header *out_sg = in_sg[0].iov_base; -+ -+ tosend_len -= len; -+ out_sg->len = tosend_len; -+ } -+ -+ ret = 0; -+ -+ vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); -+ vu_queue_notify(&se->virtio_dev->dev, q); -+ -+err: -+ if (ret == 0) { -+ ch->qi->reply_sent = true; -+ } -+ -+ return ret; -+} -+ - /* Thread function for individual queues, created when a queue is 'started' */ - static void *fv_queue_thread(void *opaque) - { -diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h -index 135a148..cc676b9 100644 ---- a/tools/virtiofsd/fuse_virtio.h -+++ b/tools/virtiofsd/fuse_virtio.h -@@ -26,4 +26,8 @@ int virtio_loop(struct fuse_session *se); - int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, - struct iovec *iov, int count); - -+int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, -+ struct iovec *iov, int count, -+ struct fuse_bufvec *buf, size_t len); -+ - #endif --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch b/SOURCES/kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch deleted file mode 100644 index 12bb9a2..0000000 --- a/SOURCES/kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch +++ /dev/null @@ -1,164 +0,0 @@ -From 6d41fc549198e140f38fddcb02975098df040ae1 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:50 +0100 -Subject: [PATCH 019/116] virtiofsd: Fix common header and define for QEMU - builds -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-16-dgilbert@redhat.com> -Patchwork-id: 93470 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 015/112] virtiofsd: Fix common header and define for QEMU builds -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -All of the fuse files include config.h and define GNU_SOURCE -where we don't have either under our build - remove them. -Fixup path to the kernel's fuse.h in the QEMUs world. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Tested-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 09863ebc7e32a107235b3c815ad54d26cc64f07a) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 4 +--- - tools/virtiofsd/fuse_i.h | 3 +++ - tools/virtiofsd/fuse_log.c | 1 + - tools/virtiofsd/fuse_lowlevel.c | 6 ++---- - tools/virtiofsd/fuse_opt.c | 2 +- - tools/virtiofsd/fuse_signals.c | 2 +- - tools/virtiofsd/helper.c | 1 + - tools/virtiofsd/passthrough_ll.c | 8 ++------ - 8 files changed, 12 insertions(+), 15 deletions(-) - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index 4d507f3..772efa9 100644 ---- a/tools/virtiofsd/buffer.c -+++ b/tools/virtiofsd/buffer.c -@@ -9,9 +9,7 @@ - * See the file COPYING.LIB - */ - --#define _GNU_SOURCE -- --#include "config.h" -+#include "qemu/osdep.h" - #include "fuse_i.h" - #include "fuse_lowlevel.h" - #include -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index e63cb58..bae0699 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -6,6 +6,9 @@ - * See the file COPYING.LIB - */ - -+#define FUSE_USE_VERSION 31 -+ -+ - #include "fuse.h" - #include "fuse_lowlevel.h" - -diff --git a/tools/virtiofsd/fuse_log.c b/tools/virtiofsd/fuse_log.c -index 11345f9..c301ff6 100644 ---- a/tools/virtiofsd/fuse_log.c -+++ b/tools/virtiofsd/fuse_log.c -@@ -8,6 +8,7 @@ - * See the file COPYING.LIB - */ - -+#include "qemu/osdep.h" - #include "fuse_log.h" - - #include -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 3da80de..07fb8a6 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -9,11 +9,9 @@ - * See the file COPYING.LIB - */ - --#define _GNU_SOURCE -- --#include "config.h" -+#include "qemu/osdep.h" - #include "fuse_i.h" --#include "fuse_kernel.h" -+#include "standard-headers/linux/fuse.h" - #include "fuse_misc.h" - #include "fuse_opt.h" - -diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c -index edd36f4..2892236 100644 ---- a/tools/virtiofsd/fuse_opt.c -+++ b/tools/virtiofsd/fuse_opt.c -@@ -9,8 +9,8 @@ - * See the file COPYING.LIB - */ - -+#include "qemu/osdep.h" - #include "fuse_opt.h" --#include "config.h" - #include "fuse_i.h" - #include "fuse_misc.h" - -diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c -index 19d6791..dc7c8ac 100644 ---- a/tools/virtiofsd/fuse_signals.c -+++ b/tools/virtiofsd/fuse_signals.c -@@ -8,7 +8,7 @@ - * See the file COPYING.LIB - */ - --#include "config.h" -+#include "qemu/osdep.h" - #include "fuse_i.h" - #include "fuse_lowlevel.h" - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index d9227d7..9333691 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -10,6 +10,7 @@ - * See the file COPYING.LIB. - */ - -+#include "qemu/osdep.h" - #include "fuse_i.h" - #include "fuse_lowlevel.h" - #include "fuse_misc.h" -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 126a56c..322a889 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -35,15 +35,11 @@ - * \include passthrough_ll.c - */ - --#define _GNU_SOURCE --#define FUSE_USE_VERSION 31 -- --#include "config.h" -- -+#include "qemu/osdep.h" -+#include "fuse_lowlevel.h" - #include - #include - #include --#include - #include - #include - #include --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch b/SOURCES/kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch deleted file mode 100644 index f929bab..0000000 --- a/SOURCES/kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch +++ /dev/null @@ -1,136 +0,0 @@ -From 9b5fbc95a287b2ce9448142194b161d8360d5e4e Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:15 +0100 -Subject: [PATCH 104/116] virtiofsd: Fix data corruption with O_APPEND write in - writeback mode -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-101-dgilbert@redhat.com> -Patchwork-id: 93556 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 100/112] virtiofsd: Fix data corruption with O_APPEND write in writeback mode -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Misono Tomohiro - -When writeback mode is enabled (-o writeback), O_APPEND handling is -done in kernel. Therefore virtiofsd clears O_APPEND flag when open. -Otherwise O_APPEND flag takes precedence over pwrite() and write -data may corrupt. - -Currently clearing O_APPEND flag is done in lo_open(), but we also -need the same operation in lo_create(). So, factor out the flag -update operation in lo_open() to update_open_flags() and call it -in both lo_open() and lo_create(). - -This fixes the failure of xfstest generic/069 in writeback mode -(which tests O_APPEND write data integrity). - -Signed-off-by: Misono Tomohiro -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 8e4e41e39eac5ee5f378d66f069a2f70a1734317) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 66 ++++++++++++++++++++-------------------- - 1 file changed, 33 insertions(+), 33 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 948cb19..4c61ac5 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1692,6 +1692,37 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, - fuse_reply_err(req, 0); - } - -+static void update_open_flags(int writeback, struct fuse_file_info *fi) -+{ -+ /* -+ * With writeback cache, kernel may send read requests even -+ * when userspace opened write-only -+ */ -+ if (writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { -+ fi->flags &= ~O_ACCMODE; -+ fi->flags |= O_RDWR; -+ } -+ -+ /* -+ * With writeback cache, O_APPEND is handled by the kernel. -+ * This breaks atomicity (since the file may change in the -+ * underlying filesystem, so that the kernel's idea of the -+ * end of the file isn't accurate anymore). In this example, -+ * we just accept that. A more rigorous filesystem may want -+ * to return an error here -+ */ -+ if (writeback && (fi->flags & O_APPEND)) { -+ fi->flags &= ~O_APPEND; -+ } -+ -+ /* -+ * O_DIRECT in guest should not necessarily mean bypassing page -+ * cache on host as well. If somebody needs that behavior, it -+ * probably should be a configuration knob in daemon. -+ */ -+ fi->flags &= ~O_DIRECT; -+} -+ - static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - mode_t mode, struct fuse_file_info *fi) - { -@@ -1721,12 +1752,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - goto out; - } - -- /* -- * O_DIRECT in guest should not necessarily mean bypassing page -- * cache on host as well. If somebody needs that behavior, it -- * probably should be a configuration knob in daemon. -- */ -- fi->flags &= ~O_DIRECT; -+ update_open_flags(lo->writeback, fi); - - fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, - mode); -@@ -1936,33 +1962,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, - fi->flags); - -- /* -- * With writeback cache, kernel may send read requests even -- * when userspace opened write-only -- */ -- if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { -- fi->flags &= ~O_ACCMODE; -- fi->flags |= O_RDWR; -- } -- -- /* -- * With writeback cache, O_APPEND is handled by the kernel. -- * This breaks atomicity (since the file may change in the -- * underlying filesystem, so that the kernel's idea of the -- * end of the file isn't accurate anymore). In this example, -- * we just accept that. A more rigorous filesystem may want -- * to return an error here -- */ -- if (lo->writeback && (fi->flags & O_APPEND)) { -- fi->flags &= ~O_APPEND; -- } -- -- /* -- * O_DIRECT in guest should not necessarily mean bypassing page -- * cache on host as well. If somebody needs that behavior, it -- * probably should be a configuration knob in daemon. -- */ -- fi->flags &= ~O_DIRECT; -+ update_open_flags(lo->writeback, fi); - - sprintf(buf, "%i", lo_fd(req, ino)); - fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch b/SOURCES/kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch deleted file mode 100644 index 306c183..0000000 --- a/SOURCES/kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch +++ /dev/null @@ -1,120 +0,0 @@ -From 9f726593bc3acbc247876dcc4d79fbf046958003 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:49 +0100 -Subject: [PATCH 018/116] virtiofsd: Fix fuse_daemonize ignored return values -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-15-dgilbert@redhat.com> -Patchwork-id: 93469 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 014/112] virtiofsd: Fix fuse_daemonize ignored return values -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -QEMU's compiler enables warnings/errors for ignored values -and the (void) trick used in the fuse code isn't enough. -Turn all the return values into a return value on the function. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Tested-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 30d8e49760712d65697ea517c53671bd1d214fc7) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/helper.c | 33 ++++++++++++++++++++++----------- - 1 file changed, 22 insertions(+), 11 deletions(-) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 5e6f205..d9227d7 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -10,12 +10,10 @@ - * See the file COPYING.LIB. - */ - --#include "config.h" - #include "fuse_i.h" - #include "fuse_lowlevel.h" - #include "fuse_misc.h" - #include "fuse_opt.h" --#include "mount_util.h" - - #include - #include -@@ -171,6 +169,7 @@ int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts) - - int fuse_daemonize(int foreground) - { -+ int ret = 0, rett; - if (!foreground) { - int nullfd; - int waiter[2]; -@@ -192,8 +191,8 @@ int fuse_daemonize(int foreground) - case 0: - break; - default: -- (void)read(waiter[0], &completed, sizeof(completed)); -- _exit(0); -+ _exit(read(waiter[0], &completed, -+ sizeof(completed) != sizeof(completed))); - } - - if (setsid() == -1) { -@@ -201,13 +200,22 @@ int fuse_daemonize(int foreground) - return -1; - } - -- (void)chdir("/"); -+ ret = chdir("/"); - - nullfd = open("/dev/null", O_RDWR, 0); - if (nullfd != -1) { -- (void)dup2(nullfd, 0); -- (void)dup2(nullfd, 1); -- (void)dup2(nullfd, 2); -+ rett = dup2(nullfd, 0); -+ if (!ret) { -+ ret = rett; -+ } -+ rett = dup2(nullfd, 1); -+ if (!ret) { -+ ret = rett; -+ } -+ rett = dup2(nullfd, 2); -+ if (!ret) { -+ ret = rett; -+ } - if (nullfd > 2) { - close(nullfd); - } -@@ -215,13 +223,16 @@ int fuse_daemonize(int foreground) - - /* Propagate completion of daemon initialization */ - completed = 1; -- (void)write(waiter[1], &completed, sizeof(completed)); -+ rett = write(waiter[1], &completed, sizeof(completed)); -+ if (!ret) { -+ ret = rett; -+ } - close(waiter[0]); - close(waiter[1]); - } else { -- (void)chdir("/"); -+ ret = chdir("/"); - } -- return 0; -+ return ret; - } - - void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Fix-the-help-message-of-posix-lock.patch b/SOURCES/kvm-virtiofsd-Fix-the-help-message-of-posix-lock.patch deleted file mode 100644 index 98907a5..0000000 --- a/SOURCES/kvm-virtiofsd-Fix-the-help-message-of-posix-lock.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 6abfb7b3c37015ff901d11f178bc6900deec2acf Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Fri, 6 Aug 2021 11:58:27 -0400 -Subject: [PATCH 2/2] virtiofsd: Fix the help message of posix lock -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Max Reitz -Message-id: <20210806115827.740945-3-mreitz@redhat.com> -Patchwork-id: 101969 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 2/2] virtiofsd: Fix the help message of posix lock -Bugzilla: 1967496 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Vivek Goyal - -From: Jiachen Zhang - -The commit 88fc107956a5812649e5918e0c092d3f78bb28ad disabled remote -posix locks by default. But the --help message still says it is enabled -by default. So fix it to output no_posix_lock. - -Signed-off-by: Jiachen Zhang -Message-Id: <20201027081558.29904-1-zhangjiachen.jaycee@bytedance.com> -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 0429eaf518be1d4742356056e6c886b7f9bc9712) -Signed-off-by: Max Reitz -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/helper.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 5b222ea49b..813d9490e5 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -163,7 +163,7 @@ void fuse_cmdline_help(void) - " default: false\n" - " -o posix_lock|no_posix_lock\n" - " enable/disable remote posix lock\n" -- " default: posix_lock\n" -+ " default: no_posix_lock\n" - " -o readdirplus|no_readdirplus\n" - " enable/disable readirplus\n" - " default: readdirplus except with " --- -2.27.0 - diff --git a/SOURCES/kvm-virtiofsd-Fix-xattr-operations.patch b/SOURCES/kvm-virtiofsd-Fix-xattr-operations.patch deleted file mode 100644 index 532948f..0000000 --- a/SOURCES/kvm-virtiofsd-Fix-xattr-operations.patch +++ /dev/null @@ -1,327 +0,0 @@ -From 8721796f22a8a61d82974088e542377ee6db209e Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 3 Mar 2020 18:43:14 +0000 -Subject: [PATCH 18/18] virtiofsd: Fix xattr operations -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200303184314.155564-8-dgilbert@redhat.com> -Patchwork-id: 94123 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 7/7] virtiofsd: Fix xattr operations -Bugzilla: 1797064 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Ján Tomko - -From: Misono Tomohiro - -Current virtiofsd has problems about xattr operations and -they does not work properly for directory/symlink/special file. - -The fundamental cause is that virtiofsd uses openat() + f...xattr() -systemcalls for xattr operation but we should not open symlink/special -file in the daemon. Therefore the function is restricted. - -Fix this problem by: - 1. during setup of each thread, call unshare(CLONE_FS) - 2. in xattr operations (i.e. lo_getxattr), if inode is not a regular - file or directory, use fchdir(proc_loot_fd) + ...xattr() + - fchdir(root.fd) instead of openat() + f...xattr() - - (Note: for a regular file/directory openat() + f...xattr() - is still used for performance reason) - -With this patch, xfstests generic/062 passes on virtiofs. - -This fix is suggested by Miklos Szeredi and Stefan Hajnoczi. -The original discussion can be found here: - https://www.redhat.com/archives/virtio-fs/2019-October/msg00046.html - -Signed-off-by: Misono Tomohiro -Message-Id: <20200227055927.24566-3-misono.tomohiro@jp.fujitsu.com> -Acked-by: Vivek Goyal -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit bdfd66788349acc43cd3f1298718ad491663cfcc) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/fuse_virtio.c | 13 +++++ - tools/virtiofsd/passthrough_ll.c | 105 +++++++++++++++++++++------------------ - tools/virtiofsd/seccomp.c | 6 +++ - 3 files changed, 77 insertions(+), 47 deletions(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index dd1c605..3b6d16a 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -426,6 +426,8 @@ err: - return ret; - } - -+static __thread bool clone_fs_called; -+ - /* Process one FVRequest in a thread pool */ - static void fv_queue_worker(gpointer data, gpointer user_data) - { -@@ -441,6 +443,17 @@ static void fv_queue_worker(gpointer data, gpointer user_data) - - assert(se->bufsize > sizeof(struct fuse_in_header)); - -+ if (!clone_fs_called) { -+ int ret; -+ -+ /* unshare FS for xattr operation */ -+ ret = unshare(CLONE_FS); -+ /* should not fail */ -+ assert(ret == 0); -+ -+ clone_fs_called = true; -+ } -+ - /* - * An element contains one request and the space to send our response - * They're spread over multiple descriptors in a scatter/gather set -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 50c7273..9cba3f1 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -123,7 +123,7 @@ struct lo_inode { - pthread_mutex_t plock_mutex; - GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ - -- bool is_symlink; -+ mode_t filetype; - }; - - struct lo_cred { -@@ -695,7 +695,7 @@ static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode, - struct lo_inode *parent; - char path[PATH_MAX]; - -- if (inode->is_symlink) { -+ if (S_ISLNK(inode->filetype)) { - res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH); - if (res == -1 && errno == EINVAL) { - /* Sorry, no race free way to set times on symlink. */ -@@ -929,7 +929,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - goto out_err; - } - -- inode->is_symlink = S_ISLNK(e->attr.st_mode); -+ /* cache only filetype */ -+ inode->filetype = (e->attr.st_mode & S_IFMT); - - /* - * One for the caller and one for nlookup (released in -@@ -1139,7 +1140,7 @@ static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode, - struct lo_inode *parent; - char path[PATH_MAX]; - -- if (inode->is_symlink) { -+ if (S_ISLNK(inode->filetype)) { - res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); - if (res == -1 && (errno == ENOENT || errno == EINVAL)) { - /* Sorry, no race free way to hard-link a symlink. */ -@@ -2193,12 +2194,6 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", - ino, name, size); - -- if (inode->is_symlink) { -- /* Sorry, no race free way to getxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -- - if (size) { - value = malloc(size); - if (!value) { -@@ -2207,12 +2202,25 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - } - - sprintf(procname, "%i", inode->fd); -- fd = openat(lo->proc_self_fd, procname, O_RDONLY); -- if (fd < 0) { -- goto out_err; -+ /* -+ * It is not safe to open() non-regular/non-dir files in file server -+ * unless O_PATH is used, so use that method for regular files/dir -+ * only (as it seems giving less performance overhead). -+ * Otherwise, call fchdir() to avoid open(). -+ */ -+ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { -+ fd = openat(lo->proc_self_fd, procname, O_RDONLY); -+ if (fd < 0) { -+ goto out_err; -+ } -+ ret = fgetxattr(fd, name, value, size); -+ } else { -+ /* fchdir should not fail here */ -+ assert(fchdir(lo->proc_self_fd) == 0); -+ ret = getxattr(procname, name, value, size); -+ assert(fchdir(lo->root.fd) == 0); - } - -- ret = fgetxattr(fd, name, value, size); - if (ret == -1) { - goto out_err; - } -@@ -2266,12 +2274,6 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino, - size); - -- if (inode->is_symlink) { -- /* Sorry, no race free way to listxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -- - if (size) { - value = malloc(size); - if (!value) { -@@ -2280,12 +2282,19 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - } - - sprintf(procname, "%i", inode->fd); -- fd = openat(lo->proc_self_fd, procname, O_RDONLY); -- if (fd < 0) { -- goto out_err; -+ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { -+ fd = openat(lo->proc_self_fd, procname, O_RDONLY); -+ if (fd < 0) { -+ goto out_err; -+ } -+ ret = flistxattr(fd, value, size); -+ } else { -+ /* fchdir should not fail here */ -+ assert(fchdir(lo->proc_self_fd) == 0); -+ ret = listxattr(procname, value, size); -+ assert(fchdir(lo->root.fd) == 0); - } - -- ret = flistxattr(fd, value, size); - if (ret == -1) { - goto out_err; - } -@@ -2339,20 +2348,21 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 - ", name=%s value=%s size=%zd)\n", ino, name, value, size); - -- if (inode->is_symlink) { -- /* Sorry, no race free way to setxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -- - sprintf(procname, "%i", inode->fd); -- fd = openat(lo->proc_self_fd, procname, O_RDWR); -- if (fd < 0) { -- saverr = errno; -- goto out; -+ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { -+ fd = openat(lo->proc_self_fd, procname, O_RDONLY); -+ if (fd < 0) { -+ saverr = errno; -+ goto out; -+ } -+ ret = fsetxattr(fd, name, value, size, flags); -+ } else { -+ /* fchdir should not fail here */ -+ assert(fchdir(lo->proc_self_fd) == 0); -+ ret = setxattr(procname, name, value, size, flags); -+ assert(fchdir(lo->root.fd) == 0); - } - -- ret = fsetxattr(fd, name, value, size, flags); - saverr = ret == -1 ? errno : 0; - - out: -@@ -2387,20 +2397,21 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) - fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino, - name); - -- if (inode->is_symlink) { -- /* Sorry, no race free way to setxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -- - sprintf(procname, "%i", inode->fd); -- fd = openat(lo->proc_self_fd, procname, O_RDWR); -- if (fd < 0) { -- saverr = errno; -- goto out; -+ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { -+ fd = openat(lo->proc_self_fd, procname, O_RDONLY); -+ if (fd < 0) { -+ saverr = errno; -+ goto out; -+ } -+ ret = fremovexattr(fd, name); -+ } else { -+ /* fchdir should not fail here */ -+ assert(fchdir(lo->proc_self_fd) == 0); -+ ret = removexattr(procname, name); -+ assert(fchdir(lo->root.fd) == 0); - } - -- ret = fremovexattr(fd, name); - saverr = ret == -1 ? errno : 0; - - out: -@@ -2800,7 +2811,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) - exit(1); - } - -- root->is_symlink = false; -+ root->filetype = S_IFDIR; - root->fd = fd; - root->key.ino = stat.st_ino; - root->key.dev = stat.st_dev; -diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c -index 2d9d4a7..bd9e7b0 100644 ---- a/tools/virtiofsd/seccomp.c -+++ b/tools/virtiofsd/seccomp.c -@@ -41,6 +41,7 @@ static const int syscall_whitelist[] = { - SCMP_SYS(exit), - SCMP_SYS(exit_group), - SCMP_SYS(fallocate), -+ SCMP_SYS(fchdir), - SCMP_SYS(fchmodat), - SCMP_SYS(fchownat), - SCMP_SYS(fcntl), -@@ -62,7 +63,9 @@ static const int syscall_whitelist[] = { - SCMP_SYS(getpid), - SCMP_SYS(gettid), - SCMP_SYS(gettimeofday), -+ SCMP_SYS(getxattr), - SCMP_SYS(linkat), -+ SCMP_SYS(listxattr), - SCMP_SYS(lseek), - SCMP_SYS(madvise), - SCMP_SYS(mkdirat), -@@ -85,6 +88,7 @@ static const int syscall_whitelist[] = { - SCMP_SYS(recvmsg), - SCMP_SYS(renameat), - SCMP_SYS(renameat2), -+ SCMP_SYS(removexattr), - SCMP_SYS(rt_sigaction), - SCMP_SYS(rt_sigprocmask), - SCMP_SYS(rt_sigreturn), -@@ -98,10 +102,12 @@ static const int syscall_whitelist[] = { - SCMP_SYS(setresuid32), - #endif - SCMP_SYS(set_robust_list), -+ SCMP_SYS(setxattr), - SCMP_SYS(symlinkat), - SCMP_SYS(time), /* Rarely needed, except on static builds */ - SCMP_SYS(tgkill), - SCMP_SYS(unlinkat), -+ SCMP_SYS(unshare), - SCMP_SYS(utimensat), - SCMP_SYS(write), - SCMP_SYS(writev), --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Format-imported-files-to-qemu-style.patch b/SOURCES/kvm-virtiofsd-Format-imported-files-to-qemu-style.patch deleted file mode 100644 index 5593a33..0000000 --- a/SOURCES/kvm-virtiofsd-Format-imported-files-to-qemu-style.patch +++ /dev/null @@ -1,14743 +0,0 @@ -From e313ab94af558bbc133e7a93b0a6dbff706dd1d8 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:45 +0100 -Subject: [PATCH 014/116] virtiofsd: Format imported files to qemu style -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-11-dgilbert@redhat.com> -Patchwork-id: 93464 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 010/112] virtiofsd: Format imported files to qemu style -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Mostly using a set like: - -indent -nut -i 4 -nlp -br -cs -ce --no-space-after-function-call-names file -clang-format -style=file -i -- file -clang-tidy -fix-errors -checks=readability-braces-around-statements file -clang-format -style=file -i -- file - -With manual cleanups. - -The .clang-format used is below. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Reviewed by: Aleksandar Markovic - -Language: Cpp -AlignAfterOpenBracket: Align -AlignConsecutiveAssignments: false # although we like it, it creates churn -AlignConsecutiveDeclarations: false -AlignEscapedNewlinesLeft: true -AlignOperands: true -AlignTrailingComments: false # churn -AllowAllParametersOfDeclarationOnNextLine: true -AllowShortBlocksOnASingleLine: false -AllowShortCaseLabelsOnASingleLine: false -AllowShortFunctionsOnASingleLine: None -AllowShortIfStatementsOnASingleLine: false -AllowShortLoopsOnASingleLine: false -AlwaysBreakAfterReturnType: None # AlwaysBreakAfterDefinitionReturnType is taken into account -AlwaysBreakBeforeMultilineStrings: false -BinPackArguments: true -BinPackParameters: true -BraceWrapping: - AfterControlStatement: false - AfterEnum: false - AfterFunction: true - AfterStruct: false - AfterUnion: false - BeforeElse: false - IndentBraces: false -BreakBeforeBinaryOperators: None -BreakBeforeBraces: Custom -BreakBeforeTernaryOperators: false -BreakStringLiterals: true -ColumnLimit: 80 -ContinuationIndentWidth: 4 -Cpp11BracedListStyle: false -DerivePointerAlignment: false -DisableFormat: false -ForEachMacros: [ - 'CPU_FOREACH', - 'CPU_FOREACH_REVERSE', - 'CPU_FOREACH_SAFE', - 'IOMMU_NOTIFIER_FOREACH', - 'QLIST_FOREACH', - 'QLIST_FOREACH_ENTRY', - 'QLIST_FOREACH_RCU', - 'QLIST_FOREACH_SAFE', - 'QLIST_FOREACH_SAFE_RCU', - 'QSIMPLEQ_FOREACH', - 'QSIMPLEQ_FOREACH_SAFE', - 'QSLIST_FOREACH', - 'QSLIST_FOREACH_SAFE', - 'QTAILQ_FOREACH', - 'QTAILQ_FOREACH_REVERSE', - 'QTAILQ_FOREACH_SAFE', - 'QTAILQ_RAW_FOREACH', - 'RAMBLOCK_FOREACH' -] -IncludeCategories: - - Regex: '^"qemu/osdep.h' - Priority: -3 - - Regex: '^"(block|chardev|crypto|disas|exec|fpu|hw|io|libdecnumber|migration|monitor|net|qapi|qemu|qom|standard-headers|sysemu|ui)/' - Priority: -2 - - Regex: '^"(elf.h|qemu-common.h|glib-compat.h|qemu-io.h|trace-tcg.h)' - Priority: -1 - - Regex: '.*' - Priority: 1 -IncludeIsMainRegex: '$' -IndentCaseLabels: false -IndentWidth: 4 -IndentWrappedFunctionNames: false -KeepEmptyLinesAtTheStartOfBlocks: false -MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ? -MacroBlockEnd: '.*_END$' -MaxEmptyLinesToKeep: 2 -PointerAlignment: Right -ReflowComments: true -SortIncludes: true -SpaceAfterCStyleCast: false -SpaceBeforeAssignmentOperators: true -SpaceBeforeParens: ControlStatements -SpaceInEmptyParentheses: false -SpacesBeforeTrailingComments: 1 -SpacesInContainerLiterals: true -SpacesInParentheses: false -SpacesInSquareBrackets: false -Standard: Auto -UseTab: Never -... - -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 7387863d033e8028aa09a815736617a7c4490827) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 434 ++-- - tools/virtiofsd/fuse.h | 1572 +++++++------- - tools/virtiofsd/fuse_common.h | 730 +++---- - tools/virtiofsd/fuse_i.h | 121 +- - tools/virtiofsd/fuse_log.c | 38 +- - tools/virtiofsd/fuse_log.h | 32 +- - tools/virtiofsd/fuse_lowlevel.c | 3638 +++++++++++++++++---------------- - tools/virtiofsd/fuse_lowlevel.h | 2392 +++++++++++----------- - tools/virtiofsd/fuse_misc.h | 30 +- - tools/virtiofsd/fuse_opt.c | 659 +++--- - tools/virtiofsd/fuse_opt.h | 79 +- - tools/virtiofsd/fuse_signals.c | 118 +- - tools/virtiofsd/helper.c | 506 ++--- - tools/virtiofsd/passthrough_helpers.h | 33 +- - tools/virtiofsd/passthrough_ll.c | 2061 ++++++++++--------- - 15 files changed, 6382 insertions(+), 6061 deletions(-) - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index aefb7db..5df946c 100644 ---- a/tools/virtiofsd/buffer.c -+++ b/tools/virtiofsd/buffer.c -@@ -1,252 +1,272 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2010 Miklos Szeredi -- -- Functions for dealing with `struct fuse_buf` and `struct -- fuse_bufvec`. -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2010 Miklos Szeredi -+ * -+ * Functions for dealing with `struct fuse_buf` and `struct -+ * fuse_bufvec`. -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ - - #define _GNU_SOURCE - - #include "config.h" - #include "fuse_i.h" - #include "fuse_lowlevel.h" -+#include -+#include - #include - #include --#include --#include - - size_t fuse_buf_size(const struct fuse_bufvec *bufv) - { -- size_t i; -- size_t size = 0; -- -- for (i = 0; i < bufv->count; i++) { -- if (bufv->buf[i].size == SIZE_MAX) -- size = SIZE_MAX; -- else -- size += bufv->buf[i].size; -- } -- -- return size; -+ size_t i; -+ size_t size = 0; -+ -+ for (i = 0; i < bufv->count; i++) { -+ if (bufv->buf[i].size == SIZE_MAX) { -+ size = SIZE_MAX; -+ } else { -+ size += bufv->buf[i].size; -+ } -+ } -+ -+ return size; - } - - static size_t min_size(size_t s1, size_t s2) - { -- return s1 < s2 ? s1 : s2; -+ return s1 < s2 ? s1 : s2; - } - - static ssize_t fuse_buf_write(const struct fuse_buf *dst, size_t dst_off, -- const struct fuse_buf *src, size_t src_off, -- size_t len) -+ const struct fuse_buf *src, size_t src_off, -+ size_t len) - { -- ssize_t res = 0; -- size_t copied = 0; -- -- while (len) { -- if (dst->flags & FUSE_BUF_FD_SEEK) { -- res = pwrite(dst->fd, (char *)src->mem + src_off, len, -- dst->pos + dst_off); -- } else { -- res = write(dst->fd, (char *)src->mem + src_off, len); -- } -- if (res == -1) { -- if (!copied) -- return -errno; -- break; -- } -- if (res == 0) -- break; -- -- copied += res; -- if (!(dst->flags & FUSE_BUF_FD_RETRY)) -- break; -- -- src_off += res; -- dst_off += res; -- len -= res; -- } -- -- return copied; -+ ssize_t res = 0; -+ size_t copied = 0; -+ -+ while (len) { -+ if (dst->flags & FUSE_BUF_FD_SEEK) { -+ res = pwrite(dst->fd, (char *)src->mem + src_off, len, -+ dst->pos + dst_off); -+ } else { -+ res = write(dst->fd, (char *)src->mem + src_off, len); -+ } -+ if (res == -1) { -+ if (!copied) { -+ return -errno; -+ } -+ break; -+ } -+ if (res == 0) { -+ break; -+ } -+ -+ copied += res; -+ if (!(dst->flags & FUSE_BUF_FD_RETRY)) { -+ break; -+ } -+ -+ src_off += res; -+ dst_off += res; -+ len -= res; -+ } -+ -+ return copied; - } - - static ssize_t fuse_buf_read(const struct fuse_buf *dst, size_t dst_off, -- const struct fuse_buf *src, size_t src_off, -- size_t len) -+ const struct fuse_buf *src, size_t src_off, -+ size_t len) - { -- ssize_t res = 0; -- size_t copied = 0; -- -- while (len) { -- if (src->flags & FUSE_BUF_FD_SEEK) { -- res = pread(src->fd, (char *)dst->mem + dst_off, len, -- src->pos + src_off); -- } else { -- res = read(src->fd, (char *)dst->mem + dst_off, len); -- } -- if (res == -1) { -- if (!copied) -- return -errno; -- break; -- } -- if (res == 0) -- break; -- -- copied += res; -- if (!(src->flags & FUSE_BUF_FD_RETRY)) -- break; -- -- dst_off += res; -- src_off += res; -- len -= res; -- } -- -- return copied; -+ ssize_t res = 0; -+ size_t copied = 0; -+ -+ while (len) { -+ if (src->flags & FUSE_BUF_FD_SEEK) { -+ res = pread(src->fd, (char *)dst->mem + dst_off, len, -+ src->pos + src_off); -+ } else { -+ res = read(src->fd, (char *)dst->mem + dst_off, len); -+ } -+ if (res == -1) { -+ if (!copied) { -+ return -errno; -+ } -+ break; -+ } -+ if (res == 0) { -+ break; -+ } -+ -+ copied += res; -+ if (!(src->flags & FUSE_BUF_FD_RETRY)) { -+ break; -+ } -+ -+ dst_off += res; -+ src_off += res; -+ len -= res; -+ } -+ -+ return copied; - } - - static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, -- const struct fuse_buf *src, size_t src_off, -- size_t len) -+ const struct fuse_buf *src, size_t src_off, -+ size_t len) - { -- char buf[4096]; -- struct fuse_buf tmp = { -- .size = sizeof(buf), -- .flags = 0, -- }; -- ssize_t res; -- size_t copied = 0; -- -- tmp.mem = buf; -- -- while (len) { -- size_t this_len = min_size(tmp.size, len); -- size_t read_len; -- -- res = fuse_buf_read(&tmp, 0, src, src_off, this_len); -- if (res < 0) { -- if (!copied) -- return res; -- break; -- } -- if (res == 0) -- break; -- -- read_len = res; -- res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len); -- if (res < 0) { -- if (!copied) -- return res; -- break; -- } -- if (res == 0) -- break; -- -- copied += res; -- -- if (res < this_len) -- break; -- -- dst_off += res; -- src_off += res; -- len -= res; -- } -- -- return copied; -+ char buf[4096]; -+ struct fuse_buf tmp = { -+ .size = sizeof(buf), -+ .flags = 0, -+ }; -+ ssize_t res; -+ size_t copied = 0; -+ -+ tmp.mem = buf; -+ -+ while (len) { -+ size_t this_len = min_size(tmp.size, len); -+ size_t read_len; -+ -+ res = fuse_buf_read(&tmp, 0, src, src_off, this_len); -+ if (res < 0) { -+ if (!copied) { -+ return res; -+ } -+ break; -+ } -+ if (res == 0) { -+ break; -+ } -+ -+ read_len = res; -+ res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len); -+ if (res < 0) { -+ if (!copied) { -+ return res; -+ } -+ break; -+ } -+ if (res == 0) { -+ break; -+ } -+ -+ copied += res; -+ -+ if (res < this_len) { -+ break; -+ } -+ -+ dst_off += res; -+ src_off += res; -+ len -= res; -+ } -+ -+ return copied; - } - - static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, -- const struct fuse_buf *src, size_t src_off, -- size_t len, enum fuse_buf_copy_flags flags) -+ const struct fuse_buf *src, size_t src_off, -+ size_t len, enum fuse_buf_copy_flags flags) - { -- int src_is_fd = src->flags & FUSE_BUF_IS_FD; -- int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; -- -- if (!src_is_fd && !dst_is_fd) { -- char *dstmem = (char *)dst->mem + dst_off; -- char *srcmem = (char *)src->mem + src_off; -- -- if (dstmem != srcmem) { -- if (dstmem + len <= srcmem || srcmem + len <= dstmem) -- memcpy(dstmem, srcmem, len); -- else -- memmove(dstmem, srcmem, len); -- } -- -- return len; -- } else if (!src_is_fd) { -- return fuse_buf_write(dst, dst_off, src, src_off, len); -- } else if (!dst_is_fd) { -- return fuse_buf_read(dst, dst_off, src, src_off, len); -- } else { -- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); -- } -+ int src_is_fd = src->flags & FUSE_BUF_IS_FD; -+ int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; -+ -+ if (!src_is_fd && !dst_is_fd) { -+ char *dstmem = (char *)dst->mem + dst_off; -+ char *srcmem = (char *)src->mem + src_off; -+ -+ if (dstmem != srcmem) { -+ if (dstmem + len <= srcmem || srcmem + len <= dstmem) { -+ memcpy(dstmem, srcmem, len); -+ } else { -+ memmove(dstmem, srcmem, len); -+ } -+ } -+ -+ return len; -+ } else if (!src_is_fd) { -+ return fuse_buf_write(dst, dst_off, src, src_off, len); -+ } else if (!dst_is_fd) { -+ return fuse_buf_read(dst, dst_off, src, src_off, len); -+ } else { -+ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); -+ } - } - - static const struct fuse_buf *fuse_bufvec_current(struct fuse_bufvec *bufv) - { -- if (bufv->idx < bufv->count) -- return &bufv->buf[bufv->idx]; -- else -- return NULL; -+ if (bufv->idx < bufv->count) { -+ return &bufv->buf[bufv->idx]; -+ } else { -+ return NULL; -+ } - } - - static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) - { -- const struct fuse_buf *buf = fuse_bufvec_current(bufv); -- -- bufv->off += len; -- assert(bufv->off <= buf->size); -- if (bufv->off == buf->size) { -- assert(bufv->idx < bufv->count); -- bufv->idx++; -- if (bufv->idx == bufv->count) -- return 0; -- bufv->off = 0; -- } -- return 1; -+ const struct fuse_buf *buf = fuse_bufvec_current(bufv); -+ -+ bufv->off += len; -+ assert(bufv->off <= buf->size); -+ if (bufv->off == buf->size) { -+ assert(bufv->idx < bufv->count); -+ bufv->idx++; -+ if (bufv->idx == bufv->count) { -+ return 0; -+ } -+ bufv->off = 0; -+ } -+ return 1; - } - - ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, -- enum fuse_buf_copy_flags flags) -+ enum fuse_buf_copy_flags flags) - { -- size_t copied = 0; -- -- if (dstv == srcv) -- return fuse_buf_size(dstv); -- -- for (;;) { -- const struct fuse_buf *src = fuse_bufvec_current(srcv); -- const struct fuse_buf *dst = fuse_bufvec_current(dstv); -- size_t src_len; -- size_t dst_len; -- size_t len; -- ssize_t res; -- -- if (src == NULL || dst == NULL) -- break; -- -- src_len = src->size - srcv->off; -- dst_len = dst->size - dstv->off; -- len = min_size(src_len, dst_len); -- -- res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); -- if (res < 0) { -- if (!copied) -- return res; -- break; -- } -- copied += res; -- -- if (!fuse_bufvec_advance(srcv, res) || -- !fuse_bufvec_advance(dstv, res)) -- break; -- -- if (res < len) -- break; -- } -- -- return copied; -+ size_t copied = 0; -+ -+ if (dstv == srcv) { -+ return fuse_buf_size(dstv); -+ } -+ -+ for (;;) { -+ const struct fuse_buf *src = fuse_bufvec_current(srcv); -+ const struct fuse_buf *dst = fuse_bufvec_current(dstv); -+ size_t src_len; -+ size_t dst_len; -+ size_t len; -+ ssize_t res; -+ -+ if (src == NULL || dst == NULL) { -+ break; -+ } -+ -+ src_len = src->size - srcv->off; -+ dst_len = dst->size - dstv->off; -+ len = min_size(src_len, dst_len); -+ -+ res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); -+ if (res < 0) { -+ if (!copied) { -+ return res; -+ } -+ break; -+ } -+ copied += res; -+ -+ if (!fuse_bufvec_advance(srcv, res) || -+ !fuse_bufvec_advance(dstv, res)) { -+ break; -+ } -+ -+ if (res < len) { -+ break; -+ } -+ } -+ -+ return copied; - } -diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h -index 3202fba..7a4c713 100644 ---- a/tools/virtiofsd/fuse.h -+++ b/tools/virtiofsd/fuse.h -@@ -1,15 +1,15 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB. --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB. -+ */ - - #ifndef FUSE_H_ - #define FUSE_H_ - --/** @file -+/* - * - * This file defines the library interface of FUSE - * -@@ -19,15 +19,15 @@ - #include "fuse_common.h" - - #include --#include --#include - #include - #include -+#include - #include -+#include - --/* ----------------------------------------------------------- * -- * Basic FUSE API * -- * ----------------------------------------------------------- */ -+/* -+ * Basic FUSE API -+ */ - - /** Handle for a FUSE filesystem */ - struct fuse; -@@ -36,38 +36,39 @@ struct fuse; - * Readdir flags, passed to ->readdir() - */ - enum fuse_readdir_flags { -- /** -- * "Plus" mode. -- * -- * The kernel wants to prefill the inode cache during readdir. The -- * filesystem may honour this by filling in the attributes and setting -- * FUSE_FILL_DIR_FLAGS for the filler function. The filesystem may also -- * just ignore this flag completely. -- */ -- FUSE_READDIR_PLUS = (1 << 0), -+ /** -+ * "Plus" mode. -+ * -+ * The kernel wants to prefill the inode cache during readdir. The -+ * filesystem may honour this by filling in the attributes and setting -+ * FUSE_FILL_DIR_FLAGS for the filler function. The filesystem may also -+ * just ignore this flag completely. -+ */ -+ FUSE_READDIR_PLUS = (1 << 0), - }; - - enum fuse_fill_dir_flags { -- /** -- * "Plus" mode: all file attributes are valid -- * -- * The attributes are used by the kernel to prefill the inode cache -- * during a readdir. -- * -- * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set -- * and vice versa. -- */ -- FUSE_FILL_DIR_PLUS = (1 << 1), -+ /** -+ * "Plus" mode: all file attributes are valid -+ * -+ * The attributes are used by the kernel to prefill the inode cache -+ * during a readdir. -+ * -+ * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set -+ * and vice versa. -+ */ -+ FUSE_FILL_DIR_PLUS = (1 << 1), - }; - --/** Function to add an entry in a readdir() operation -+/** -+ * Function to add an entry in a readdir() operation - * - * The *off* parameter can be any non-zero value that enables the - * filesystem to identify the current point in the directory - * stream. It does not need to be the actual physical position. A - * value of zero is reserved to indicate that seeking in directories - * is not supported. -- * -+ * - * @param buf the buffer passed to the readdir() operation - * @param name the file name of the directory entry - * @param stat file attributes, can be NULL -@@ -75,9 +76,9 @@ enum fuse_fill_dir_flags { - * @param flags fill flags - * @return 1 if buffer is full, zero otherwise - */ --typedef int (*fuse_fill_dir_t) (void *buf, const char *name, -- const struct stat *stbuf, off_t off, -- enum fuse_fill_dir_flags flags); -+typedef int (*fuse_fill_dir_t)(void *buf, const char *name, -+ const struct stat *stbuf, off_t off, -+ enum fuse_fill_dir_flags flags); - /** - * Configuration of the high-level API - * -@@ -87,186 +88,186 @@ typedef int (*fuse_fill_dir_t) (void *buf, const char *name, - * file system implementation. - */ - struct fuse_config { -- /** -- * If `set_gid` is non-zero, the st_gid attribute of each file -- * is overwritten with the value of `gid`. -- */ -- int set_gid; -- unsigned int gid; -- -- /** -- * If `set_uid` is non-zero, the st_uid attribute of each file -- * is overwritten with the value of `uid`. -- */ -- int set_uid; -- unsigned int uid; -- -- /** -- * If `set_mode` is non-zero, the any permissions bits set in -- * `umask` are unset in the st_mode attribute of each file. -- */ -- int set_mode; -- unsigned int umask; -- -- /** -- * The timeout in seconds for which name lookups will be -- * cached. -- */ -- double entry_timeout; -- -- /** -- * The timeout in seconds for which a negative lookup will be -- * cached. This means, that if file did not exist (lookup -- * retuned ENOENT), the lookup will only be redone after the -- * timeout, and the file/directory will be assumed to not -- * exist until then. A value of zero means that negative -- * lookups are not cached. -- */ -- double negative_timeout; -- -- /** -- * The timeout in seconds for which file/directory attributes -- * (as returned by e.g. the `getattr` handler) are cached. -- */ -- double attr_timeout; -- -- /** -- * Allow requests to be interrupted -- */ -- int intr; -- -- /** -- * Specify which signal number to send to the filesystem when -- * a request is interrupted. The default is hardcoded to -- * USR1. -- */ -- int intr_signal; -- -- /** -- * Normally, FUSE assigns inodes to paths only for as long as -- * the kernel is aware of them. With this option inodes are -- * instead remembered for at least this many seconds. This -- * will require more memory, but may be necessary when using -- * applications that make use of inode numbers. -- * -- * A number of -1 means that inodes will be remembered for the -- * entire life-time of the file-system process. -- */ -- int remember; -- -- /** -- * The default behavior is that if an open file is deleted, -- * the file is renamed to a hidden file (.fuse_hiddenXXX), and -- * only removed when the file is finally released. This -- * relieves the filesystem implementation of having to deal -- * with this problem. This option disables the hiding -- * behavior, and files are removed immediately in an unlink -- * operation (or in a rename operation which overwrites an -- * existing file). -- * -- * It is recommended that you not use the hard_remove -- * option. When hard_remove is set, the following libc -- * functions fail on unlinked files (returning errno of -- * ENOENT): read(2), write(2), fsync(2), close(2), f*xattr(2), -- * ftruncate(2), fstat(2), fchmod(2), fchown(2) -- */ -- int hard_remove; -- -- /** -- * Honor the st_ino field in the functions getattr() and -- * fill_dir(). This value is used to fill in the st_ino field -- * in the stat(2), lstat(2), fstat(2) functions and the d_ino -- * field in the readdir(2) function. The filesystem does not -- * have to guarantee uniqueness, however some applications -- * rely on this value being unique for the whole filesystem. -- * -- * Note that this does *not* affect the inode that libfuse -- * and the kernel use internally (also called the "nodeid"). -- */ -- int use_ino; -- -- /** -- * If use_ino option is not given, still try to fill in the -- * d_ino field in readdir(2). If the name was previously -- * looked up, and is still in the cache, the inode number -- * found there will be used. Otherwise it will be set to -1. -- * If use_ino option is given, this option is ignored. -- */ -- int readdir_ino; -- -- /** -- * This option disables the use of page cache (file content cache) -- * in the kernel for this filesystem. This has several affects: -- * -- * 1. Each read(2) or write(2) system call will initiate one -- * or more read or write operations, data will not be -- * cached in the kernel. -- * -- * 2. The return value of the read() and write() system calls -- * will correspond to the return values of the read and -- * write operations. This is useful for example if the -- * file size is not known in advance (before reading it). -- * -- * Internally, enabling this option causes fuse to set the -- * `direct_io` field of `struct fuse_file_info` - overwriting -- * any value that was put there by the file system. -- */ -- int direct_io; -- -- /** -- * This option disables flushing the cache of the file -- * contents on every open(2). This should only be enabled on -- * filesystems where the file data is never changed -- * externally (not through the mounted FUSE filesystem). Thus -- * it is not suitable for network filesystems and other -- * intermediate filesystems. -- * -- * NOTE: if this option is not specified (and neither -- * direct_io) data is still cached after the open(2), so a -- * read(2) system call will not always initiate a read -- * operation. -- * -- * Internally, enabling this option causes fuse to set the -- * `keep_cache` field of `struct fuse_file_info` - overwriting -- * any value that was put there by the file system. -- */ -- int kernel_cache; -- -- /** -- * This option is an alternative to `kernel_cache`. Instead of -- * unconditionally keeping cached data, the cached data is -- * invalidated on open(2) if if the modification time or the -- * size of the file has changed since it was last opened. -- */ -- int auto_cache; -- -- /** -- * The timeout in seconds for which file attributes are cached -- * for the purpose of checking if auto_cache should flush the -- * file data on open. -- */ -- int ac_attr_timeout_set; -- double ac_attr_timeout; -- -- /** -- * If this option is given the file-system handlers for the -- * following operations will not receive path information: -- * read, write, flush, release, fsync, readdir, releasedir, -- * fsyncdir, lock, ioctl and poll. -- * -- * For the truncate, getattr, chmod, chown and utimens -- * operations the path will be provided only if the struct -- * fuse_file_info argument is NULL. -- */ -- int nullpath_ok; -- -- /** -- * The remaining options are used by libfuse internally and -- * should not be touched. -- */ -- int show_help; -- char *modules; -- int debug; -+ /** -+ * If `set_gid` is non-zero, the st_gid attribute of each file -+ * is overwritten with the value of `gid`. -+ */ -+ int set_gid; -+ unsigned int gid; -+ -+ /** -+ * If `set_uid` is non-zero, the st_uid attribute of each file -+ * is overwritten with the value of `uid`. -+ */ -+ int set_uid; -+ unsigned int uid; -+ -+ /** -+ * If `set_mode` is non-zero, the any permissions bits set in -+ * `umask` are unset in the st_mode attribute of each file. -+ */ -+ int set_mode; -+ unsigned int umask; -+ -+ /** -+ * The timeout in seconds for which name lookups will be -+ * cached. -+ */ -+ double entry_timeout; -+ -+ /** -+ * The timeout in seconds for which a negative lookup will be -+ * cached. This means, that if file did not exist (lookup -+ * retuned ENOENT), the lookup will only be redone after the -+ * timeout, and the file/directory will be assumed to not -+ * exist until then. A value of zero means that negative -+ * lookups are not cached. -+ */ -+ double negative_timeout; -+ -+ /** -+ * The timeout in seconds for which file/directory attributes -+ * (as returned by e.g. the `getattr` handler) are cached. -+ */ -+ double attr_timeout; -+ -+ /** -+ * Allow requests to be interrupted -+ */ -+ int intr; -+ -+ /** -+ * Specify which signal number to send to the filesystem when -+ * a request is interrupted. The default is hardcoded to -+ * USR1. -+ */ -+ int intr_signal; -+ -+ /** -+ * Normally, FUSE assigns inodes to paths only for as long as -+ * the kernel is aware of them. With this option inodes are -+ * instead remembered for at least this many seconds. This -+ * will require more memory, but may be necessary when using -+ * applications that make use of inode numbers. -+ * -+ * A number of -1 means that inodes will be remembered for the -+ * entire life-time of the file-system process. -+ */ -+ int remember; -+ -+ /** -+ * The default behavior is that if an open file is deleted, -+ * the file is renamed to a hidden file (.fuse_hiddenXXX), and -+ * only removed when the file is finally released. This -+ * relieves the filesystem implementation of having to deal -+ * with this problem. This option disables the hiding -+ * behavior, and files are removed immediately in an unlink -+ * operation (or in a rename operation which overwrites an -+ * existing file). -+ * -+ * It is recommended that you not use the hard_remove -+ * option. When hard_remove is set, the following libc -+ * functions fail on unlinked files (returning errno of -+ * ENOENT): read(2), write(2), fsync(2), close(2), f*xattr(2), -+ * ftruncate(2), fstat(2), fchmod(2), fchown(2) -+ */ -+ int hard_remove; -+ -+ /** -+ * Honor the st_ino field in the functions getattr() and -+ * fill_dir(). This value is used to fill in the st_ino field -+ * in the stat(2), lstat(2), fstat(2) functions and the d_ino -+ * field in the readdir(2) function. The filesystem does not -+ * have to guarantee uniqueness, however some applications -+ * rely on this value being unique for the whole filesystem. -+ * -+ * Note that this does *not* affect the inode that libfuse -+ * and the kernel use internally (also called the "nodeid"). -+ */ -+ int use_ino; -+ -+ /** -+ * If use_ino option is not given, still try to fill in the -+ * d_ino field in readdir(2). If the name was previously -+ * looked up, and is still in the cache, the inode number -+ * found there will be used. Otherwise it will be set to -1. -+ * If use_ino option is given, this option is ignored. -+ */ -+ int readdir_ino; -+ -+ /** -+ * This option disables the use of page cache (file content cache) -+ * in the kernel for this filesystem. This has several affects: -+ * -+ * 1. Each read(2) or write(2) system call will initiate one -+ * or more read or write operations, data will not be -+ * cached in the kernel. -+ * -+ * 2. The return value of the read() and write() system calls -+ * will correspond to the return values of the read and -+ * write operations. This is useful for example if the -+ * file size is not known in advance (before reading it). -+ * -+ * Internally, enabling this option causes fuse to set the -+ * `direct_io` field of `struct fuse_file_info` - overwriting -+ * any value that was put there by the file system. -+ */ -+ int direct_io; -+ -+ /** -+ * This option disables flushing the cache of the file -+ * contents on every open(2). This should only be enabled on -+ * filesystems where the file data is never changed -+ * externally (not through the mounted FUSE filesystem). Thus -+ * it is not suitable for network filesystems and other -+ * intermediate filesystems. -+ * -+ * NOTE: if this option is not specified (and neither -+ * direct_io) data is still cached after the open(2), so a -+ * read(2) system call will not always initiate a read -+ * operation. -+ * -+ * Internally, enabling this option causes fuse to set the -+ * `keep_cache` field of `struct fuse_file_info` - overwriting -+ * any value that was put there by the file system. -+ */ -+ int kernel_cache; -+ -+ /** -+ * This option is an alternative to `kernel_cache`. Instead of -+ * unconditionally keeping cached data, the cached data is -+ * invalidated on open(2) if if the modification time or the -+ * size of the file has changed since it was last opened. -+ */ -+ int auto_cache; -+ -+ /** -+ * The timeout in seconds for which file attributes are cached -+ * for the purpose of checking if auto_cache should flush the -+ * file data on open. -+ */ -+ int ac_attr_timeout_set; -+ double ac_attr_timeout; -+ -+ /** -+ * If this option is given the file-system handlers for the -+ * following operations will not receive path information: -+ * read, write, flush, release, fsync, readdir, releasedir, -+ * fsyncdir, lock, ioctl and poll. -+ * -+ * For the truncate, getattr, chmod, chown and utimens -+ * operations the path will be provided only if the struct -+ * fuse_file_info argument is NULL. -+ */ -+ int nullpath_ok; -+ -+ /** -+ * The remaining options are used by libfuse internally and -+ * should not be touched. -+ */ -+ int show_help; -+ char *modules; -+ int debug; - }; - - -@@ -293,515 +294,535 @@ struct fuse_config { - * Almost all operations take a path which can be of any length. - */ - struct fuse_operations { -- /** Get file attributes. -- * -- * Similar to stat(). The 'st_dev' and 'st_blksize' fields are -- * ignored. The 'st_ino' field is ignored except if the 'use_ino' -- * mount option is given. In that case it is passed to userspace, -- * but libfuse and the kernel will still assign a different -- * inode for internal use (called the "nodeid"). -- * -- * `fi` will always be NULL if the file is not currently open, but -- * may also be NULL if the file is open. -- */ -- int (*getattr) (const char *, struct stat *, struct fuse_file_info *fi); -- -- /** Read the target of a symbolic link -- * -- * The buffer should be filled with a null terminated string. The -- * buffer size argument includes the space for the terminating -- * null character. If the linkname is too long to fit in the -- * buffer, it should be truncated. The return value should be 0 -- * for success. -- */ -- int (*readlink) (const char *, char *, size_t); -- -- /** Create a file node -- * -- * This is called for creation of all non-directory, non-symlink -- * nodes. If the filesystem defines a create() method, then for -- * regular files that will be called instead. -- */ -- int (*mknod) (const char *, mode_t, dev_t); -- -- /** Create a directory -- * -- * Note that the mode argument may not have the type specification -- * bits set, i.e. S_ISDIR(mode) can be false. To obtain the -- * correct directory type bits use mode|S_IFDIR -- * */ -- int (*mkdir) (const char *, mode_t); -- -- /** Remove a file */ -- int (*unlink) (const char *); -- -- /** Remove a directory */ -- int (*rmdir) (const char *); -- -- /** Create a symbolic link */ -- int (*symlink) (const char *, const char *); -- -- /** Rename a file -- * -- * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If -- * RENAME_NOREPLACE is specified, the filesystem must not -- * overwrite *newname* if it exists and return an error -- * instead. If `RENAME_EXCHANGE` is specified, the filesystem -- * must atomically exchange the two files, i.e. both must -- * exist and neither may be deleted. -- */ -- int (*rename) (const char *, const char *, unsigned int flags); -- -- /** Create a hard link to a file */ -- int (*link) (const char *, const char *); -- -- /** Change the permission bits of a file -- * -- * `fi` will always be NULL if the file is not currenlty open, but -- * may also be NULL if the file is open. -- */ -- int (*chmod) (const char *, mode_t, struct fuse_file_info *fi); -- -- /** Change the owner and group of a file -- * -- * `fi` will always be NULL if the file is not currenlty open, but -- * may also be NULL if the file is open. -- * -- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -- * expected to reset the setuid and setgid bits. -- */ -- int (*chown) (const char *, uid_t, gid_t, struct fuse_file_info *fi); -- -- /** Change the size of a file -- * -- * `fi` will always be NULL if the file is not currenlty open, but -- * may also be NULL if the file is open. -- * -- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -- * expected to reset the setuid and setgid bits. -- */ -- int (*truncate) (const char *, off_t, struct fuse_file_info *fi); -- -- /** Open a file -- * -- * Open flags are available in fi->flags. The following rules -- * apply. -- * -- * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be -- * filtered out / handled by the kernel. -- * -- * - Access modes (O_RDONLY, O_WRONLY, O_RDWR, O_EXEC, O_SEARCH) -- * should be used by the filesystem to check if the operation is -- * permitted. If the ``-o default_permissions`` mount option is -- * given, this check is already done by the kernel before calling -- * open() and may thus be omitted by the filesystem. -- * -- * - When writeback caching is enabled, the kernel may send -- * read requests even for files opened with O_WRONLY. The -- * filesystem should be prepared to handle this. -- * -- * - When writeback caching is disabled, the filesystem is -- * expected to properly handle the O_APPEND flag and ensure -- * that each write is appending to the end of the file. -- * -- * - When writeback caching is enabled, the kernel will -- * handle O_APPEND. However, unless all changes to the file -- * come through the kernel this will not work reliably. The -- * filesystem should thus either ignore the O_APPEND flag -- * (and let the kernel handle it), or return an error -- * (indicating that reliably O_APPEND is not available). -- * -- * Filesystem may store an arbitrary file handle (pointer, -- * index, etc) in fi->fh, and use this in other all other file -- * operations (read, write, flush, release, fsync). -- * -- * Filesystem may also implement stateless file I/O and not store -- * anything in fi->fh. -- * -- * There are also some flags (direct_io, keep_cache) which the -- * filesystem may set in fi, to change the way the file is opened. -- * See fuse_file_info structure in for more details. -- * -- * If this request is answered with an error code of ENOSYS -- * and FUSE_CAP_NO_OPEN_SUPPORT is set in -- * `fuse_conn_info.capable`, this is treated as success and -- * future calls to open will also succeed without being send -- * to the filesystem process. -- * -- */ -- int (*open) (const char *, struct fuse_file_info *); -- -- /** Read data from an open file -- * -- * Read should return exactly the number of bytes requested except -- * on EOF or error, otherwise the rest of the data will be -- * substituted with zeroes. An exception to this is when the -- * 'direct_io' mount option is specified, in which case the return -- * value of the read system call will reflect the return value of -- * this operation. -- */ -- int (*read) (const char *, char *, size_t, off_t, -- struct fuse_file_info *); -- -- /** Write data to an open file -- * -- * Write should return exactly the number of bytes requested -- * except on error. An exception to this is when the 'direct_io' -- * mount option is specified (see read operation). -- * -- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -- * expected to reset the setuid and setgid bits. -- */ -- int (*write) (const char *, const char *, size_t, off_t, -- struct fuse_file_info *); -- -- /** Get file system statistics -- * -- * The 'f_favail', 'f_fsid' and 'f_flag' fields are ignored -- */ -- int (*statfs) (const char *, struct statvfs *); -- -- /** Possibly flush cached data -- * -- * BIG NOTE: This is not equivalent to fsync(). It's not a -- * request to sync dirty data. -- * -- * Flush is called on each close() of a file descriptor, as opposed to -- * release which is called on the close of the last file descriptor for -- * a file. Under Linux, errors returned by flush() will be passed to -- * userspace as errors from close(), so flush() is a good place to write -- * back any cached dirty data. However, many applications ignore errors -- * on close(), and on non-Linux systems, close() may succeed even if flush() -- * returns an error. For these reasons, filesystems should not assume -- * that errors returned by flush will ever be noticed or even -- * delivered. -- * -- * NOTE: The flush() method may be called more than once for each -- * open(). This happens if more than one file descriptor refers to an -- * open file handle, e.g. due to dup(), dup2() or fork() calls. It is -- * not possible to determine if a flush is final, so each flush should -- * be treated equally. Multiple write-flush sequences are relatively -- * rare, so this shouldn't be a problem. -- * -- * Filesystems shouldn't assume that flush will be called at any -- * particular point. It may be called more times than expected, or not -- * at all. -- * -- * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html -- */ -- int (*flush) (const char *, struct fuse_file_info *); -- -- /** Release an open file -- * -- * Release is called when there are no more references to an open -- * file: all file descriptors are closed and all memory mappings -- * are unmapped. -- * -- * For every open() call there will be exactly one release() call -- * with the same flags and file handle. It is possible to -- * have a file opened more than once, in which case only the last -- * release will mean, that no more reads/writes will happen on the -- * file. The return value of release is ignored. -- */ -- int (*release) (const char *, struct fuse_file_info *); -- -- /** Synchronize file contents -- * -- * If the datasync parameter is non-zero, then only the user data -- * should be flushed, not the meta data. -- */ -- int (*fsync) (const char *, int, struct fuse_file_info *); -- -- /** Set extended attributes */ -- int (*setxattr) (const char *, const char *, const char *, size_t, int); -- -- /** Get extended attributes */ -- int (*getxattr) (const char *, const char *, char *, size_t); -- -- /** List extended attributes */ -- int (*listxattr) (const char *, char *, size_t); -- -- /** Remove extended attributes */ -- int (*removexattr) (const char *, const char *); -- -- /** Open directory -- * -- * Unless the 'default_permissions' mount option is given, -- * this method should check if opendir is permitted for this -- * directory. Optionally opendir may also return an arbitrary -- * filehandle in the fuse_file_info structure, which will be -- * passed to readdir, releasedir and fsyncdir. -- */ -- int (*opendir) (const char *, struct fuse_file_info *); -- -- /** Read directory -- * -- * The filesystem may choose between two modes of operation: -- * -- * 1) The readdir implementation ignores the offset parameter, and -- * passes zero to the filler function's offset. The filler -- * function will not return '1' (unless an error happens), so the -- * whole directory is read in a single readdir operation. -- * -- * 2) The readdir implementation keeps track of the offsets of the -- * directory entries. It uses the offset parameter and always -- * passes non-zero offset to the filler function. When the buffer -- * is full (or an error happens) the filler function will return -- * '1'. -- */ -- int (*readdir) (const char *, void *, fuse_fill_dir_t, off_t, -- struct fuse_file_info *, enum fuse_readdir_flags); -- -- /** Release directory -- */ -- int (*releasedir) (const char *, struct fuse_file_info *); -- -- /** Synchronize directory contents -- * -- * If the datasync parameter is non-zero, then only the user data -- * should be flushed, not the meta data -- */ -- int (*fsyncdir) (const char *, int, struct fuse_file_info *); -- -- /** -- * Initialize filesystem -- * -- * The return value will passed in the `private_data` field of -- * `struct fuse_context` to all file operations, and as a -- * parameter to the destroy() method. It overrides the initial -- * value provided to fuse_main() / fuse_new(). -- */ -- void *(*init) (struct fuse_conn_info *conn, -- struct fuse_config *cfg); -- -- /** -- * Clean up filesystem -- * -- * Called on filesystem exit. -- */ -- void (*destroy) (void *private_data); -- -- /** -- * Check file access permissions -- * -- * This will be called for the access() system call. If the -- * 'default_permissions' mount option is given, this method is not -- * called. -- * -- * This method is not called under Linux kernel versions 2.4.x -- */ -- int (*access) (const char *, int); -- -- /** -- * Create and open a file -- * -- * If the file does not exist, first create it with the specified -- * mode, and then open it. -- * -- * If this method is not implemented or under Linux kernel -- * versions earlier than 2.6.15, the mknod() and open() methods -- * will be called instead. -- */ -- int (*create) (const char *, mode_t, struct fuse_file_info *); -- -- /** -- * Perform POSIX file locking operation -- * -- * The cmd argument will be either F_GETLK, F_SETLK or F_SETLKW. -- * -- * For the meaning of fields in 'struct flock' see the man page -- * for fcntl(2). The l_whence field will always be set to -- * SEEK_SET. -- * -- * For checking lock ownership, the 'fuse_file_info->owner' -- * argument must be used. -- * -- * For F_GETLK operation, the library will first check currently -- * held locks, and if a conflicting lock is found it will return -- * information without calling this method. This ensures, that -- * for local locks the l_pid field is correctly filled in. The -- * results may not be accurate in case of race conditions and in -- * the presence of hard links, but it's unlikely that an -- * application would rely on accurate GETLK results in these -- * cases. If a conflicting lock is not found, this method will be -- * called, and the filesystem may fill out l_pid by a meaningful -- * value, or it may leave this field zero. -- * -- * For F_SETLK and F_SETLKW the l_pid field will be set to the pid -- * of the process performing the locking operation. -- * -- * Note: if this method is not implemented, the kernel will still -- * allow file locking to work locally. Hence it is only -- * interesting for network filesystems and similar. -- */ -- int (*lock) (const char *, struct fuse_file_info *, int cmd, -- struct flock *); -- -- /** -- * Change the access and modification times of a file with -- * nanosecond resolution -- * -- * This supersedes the old utime() interface. New applications -- * should use this. -- * -- * `fi` will always be NULL if the file is not currenlty open, but -- * may also be NULL if the file is open. -- * -- * See the utimensat(2) man page for details. -- */ -- int (*utimens) (const char *, const struct timespec tv[2], -- struct fuse_file_info *fi); -- -- /** -- * Map block index within file to block index within device -- * -- * Note: This makes sense only for block device backed filesystems -- * mounted with the 'blkdev' option -- */ -- int (*bmap) (const char *, size_t blocksize, uint64_t *idx); -- -- /** -- * Ioctl -- * -- * flags will have FUSE_IOCTL_COMPAT set for 32bit ioctls in -- * 64bit environment. The size and direction of data is -- * determined by _IOC_*() decoding of cmd. For _IOC_NONE, -- * data will be NULL, for _IOC_WRITE data is out area, for -- * _IOC_READ in area and if both are set in/out area. In all -- * non-NULL cases, the area is of _IOC_SIZE(cmd) bytes. -- * -- * If flags has FUSE_IOCTL_DIR then the fuse_file_info refers to a -- * directory file handle. -- * -- * Note : the unsigned long request submitted by the application -- * is truncated to 32 bits. -- */ -- int (*ioctl) (const char *, unsigned int cmd, void *arg, -- struct fuse_file_info *, unsigned int flags, void *data); -- -- /** -- * Poll for IO readiness events -- * -- * Note: If ph is non-NULL, the client should notify -- * when IO readiness events occur by calling -- * fuse_notify_poll() with the specified ph. -- * -- * Regardless of the number of times poll with a non-NULL ph -- * is received, single notification is enough to clear all. -- * Notifying more times incurs overhead but doesn't harm -- * correctness. -- * -- * The callee is responsible for destroying ph with -- * fuse_pollhandle_destroy() when no longer in use. -- */ -- int (*poll) (const char *, struct fuse_file_info *, -- struct fuse_pollhandle *ph, unsigned *reventsp); -- -- /** Write contents of buffer to an open file -- * -- * Similar to the write() method, but data is supplied in a -- * generic buffer. Use fuse_buf_copy() to transfer data to -- * the destination. -- * -- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -- * expected to reset the setuid and setgid bits. -- */ -- int (*write_buf) (const char *, struct fuse_bufvec *buf, off_t off, -- struct fuse_file_info *); -- -- /** Store data from an open file in a buffer -- * -- * Similar to the read() method, but data is stored and -- * returned in a generic buffer. -- * -- * No actual copying of data has to take place, the source -- * file descriptor may simply be stored in the buffer for -- * later data transfer. -- * -- * The buffer must be allocated dynamically and stored at the -- * location pointed to by bufp. If the buffer contains memory -- * regions, they too must be allocated using malloc(). The -- * allocated memory will be freed by the caller. -- */ -- int (*read_buf) (const char *, struct fuse_bufvec **bufp, -- size_t size, off_t off, struct fuse_file_info *); -- /** -- * Perform BSD file locking operation -- * -- * The op argument will be either LOCK_SH, LOCK_EX or LOCK_UN -- * -- * Nonblocking requests will be indicated by ORing LOCK_NB to -- * the above operations -- * -- * For more information see the flock(2) manual page. -- * -- * Additionally fi->owner will be set to a value unique to -- * this open file. This same value will be supplied to -- * ->release() when the file is released. -- * -- * Note: if this method is not implemented, the kernel will still -- * allow file locking to work locally. Hence it is only -- * interesting for network filesystems and similar. -- */ -- int (*flock) (const char *, struct fuse_file_info *, int op); -- -- /** -- * Allocates space for an open file -- * -- * This function ensures that required space is allocated for specified -- * file. If this function returns success then any subsequent write -- * request to specified range is guaranteed not to fail because of lack -- * of space on the file system media. -- */ -- int (*fallocate) (const char *, int, off_t, off_t, -- struct fuse_file_info *); -- -- /** -- * Copy a range of data from one file to another -- * -- * Performs an optimized copy between two file descriptors without the -- * additional cost of transferring data through the FUSE kernel module -- * to user space (glibc) and then back into the FUSE filesystem again. -- * -- * In case this method is not implemented, glibc falls back to reading -- * data from the source and writing to the destination. Effectively -- * doing an inefficient copy of the data. -- */ -- ssize_t (*copy_file_range) (const char *path_in, -- struct fuse_file_info *fi_in, -- off_t offset_in, const char *path_out, -- struct fuse_file_info *fi_out, -- off_t offset_out, size_t size, int flags); -- -- /** -- * Find next data or hole after the specified offset -- */ -- off_t (*lseek) (const char *, off_t off, int whence, struct fuse_file_info *); -+ /** -+ * Get file attributes. -+ * -+ * Similar to stat(). The 'st_dev' and 'st_blksize' fields are -+ * ignored. The 'st_ino' field is ignored except if the 'use_ino' -+ * mount option is given. In that case it is passed to userspace, -+ * but libfuse and the kernel will still assign a different -+ * inode for internal use (called the "nodeid"). -+ * -+ * `fi` will always be NULL if the file is not currently open, but -+ * may also be NULL if the file is open. -+ */ -+ int (*getattr)(const char *, struct stat *, struct fuse_file_info *fi); -+ -+ /** -+ * Read the target of a symbolic link -+ * -+ * The buffer should be filled with a null terminated string. The -+ * buffer size argument includes the space for the terminating -+ * null character. If the linkname is too long to fit in the -+ * buffer, it should be truncated. The return value should be 0 -+ * for success. -+ */ -+ int (*readlink)(const char *, char *, size_t); -+ -+ /** -+ * Create a file node -+ * -+ * This is called for creation of all non-directory, non-symlink -+ * nodes. If the filesystem defines a create() method, then for -+ * regular files that will be called instead. -+ */ -+ int (*mknod)(const char *, mode_t, dev_t); -+ -+ /** -+ * Create a directory -+ * -+ * Note that the mode argument may not have the type specification -+ * bits set, i.e. S_ISDIR(mode) can be false. To obtain the -+ * correct directory type bits use mode|S_IFDIR -+ */ -+ int (*mkdir)(const char *, mode_t); -+ -+ /** Remove a file */ -+ int (*unlink)(const char *); -+ -+ /** Remove a directory */ -+ int (*rmdir)(const char *); -+ -+ /** Create a symbolic link */ -+ int (*symlink)(const char *, const char *); -+ -+ /** -+ * Rename a file -+ * -+ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If -+ * RENAME_NOREPLACE is specified, the filesystem must not -+ * overwrite *newname* if it exists and return an error -+ * instead. If `RENAME_EXCHANGE` is specified, the filesystem -+ * must atomically exchange the two files, i.e. both must -+ * exist and neither may be deleted. -+ */ -+ int (*rename)(const char *, const char *, unsigned int flags); -+ -+ /** Create a hard link to a file */ -+ int (*link)(const char *, const char *); -+ -+ /** -+ * Change the permission bits of a file -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ */ -+ int (*chmod)(const char *, mode_t, struct fuse_file_info *fi); -+ -+ /** -+ * Change the owner and group of a file -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*chown)(const char *, uid_t, gid_t, struct fuse_file_info *fi); -+ -+ /** -+ * Change the size of a file -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*truncate)(const char *, off_t, struct fuse_file_info *fi); -+ -+ /** -+ * Open a file -+ * -+ * Open flags are available in fi->flags. The following rules -+ * apply. -+ * -+ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be -+ * filtered out / handled by the kernel. -+ * -+ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR, O_EXEC, O_SEARCH) -+ * should be used by the filesystem to check if the operation is -+ * permitted. If the ``-o default_permissions`` mount option is -+ * given, this check is already done by the kernel before calling -+ * open() and may thus be omitted by the filesystem. -+ * -+ * - When writeback caching is enabled, the kernel may send -+ * read requests even for files opened with O_WRONLY. The -+ * filesystem should be prepared to handle this. -+ * -+ * - When writeback caching is disabled, the filesystem is -+ * expected to properly handle the O_APPEND flag and ensure -+ * that each write is appending to the end of the file. -+ * -+ * - When writeback caching is enabled, the kernel will -+ * handle O_APPEND. However, unless all changes to the file -+ * come through the kernel this will not work reliably. The -+ * filesystem should thus either ignore the O_APPEND flag -+ * (and let the kernel handle it), or return an error -+ * (indicating that reliably O_APPEND is not available). -+ * -+ * Filesystem may store an arbitrary file handle (pointer, -+ * index, etc) in fi->fh, and use this in other all other file -+ * operations (read, write, flush, release, fsync). -+ * -+ * Filesystem may also implement stateless file I/O and not store -+ * anything in fi->fh. -+ * -+ * There are also some flags (direct_io, keep_cache) which the -+ * filesystem may set in fi, to change the way the file is opened. -+ * See fuse_file_info structure in for more details. -+ * -+ * If this request is answered with an error code of ENOSYS -+ * and FUSE_CAP_NO_OPEN_SUPPORT is set in -+ * `fuse_conn_info.capable`, this is treated as success and -+ * future calls to open will also succeed without being send -+ * to the filesystem process. -+ * -+ */ -+ int (*open)(const char *, struct fuse_file_info *); -+ -+ /** -+ * Read data from an open file -+ * -+ * Read should return exactly the number of bytes requested except -+ * on EOF or error, otherwise the rest of the data will be -+ * substituted with zeroes. An exception to this is when the -+ * 'direct_io' mount option is specified, in which case the return -+ * value of the read system call will reflect the return value of -+ * this operation. -+ */ -+ int (*read)(const char *, char *, size_t, off_t, struct fuse_file_info *); -+ -+ /** -+ * Write data to an open file -+ * -+ * Write should return exactly the number of bytes requested -+ * except on error. An exception to this is when the 'direct_io' -+ * mount option is specified (see read operation). -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*write)(const char *, const char *, size_t, off_t, -+ struct fuse_file_info *); -+ -+ /** -+ * Get file system statistics -+ * -+ * The 'f_favail', 'f_fsid' and 'f_flag' fields are ignored -+ */ -+ int (*statfs)(const char *, struct statvfs *); -+ -+ /** -+ * Possibly flush cached data -+ * -+ * BIG NOTE: This is not equivalent to fsync(). It's not a -+ * request to sync dirty data. -+ * -+ * Flush is called on each close() of a file descriptor, as opposed to -+ * release which is called on the close of the last file descriptor for -+ * a file. Under Linux, errors returned by flush() will be passed to -+ * userspace as errors from close(), so flush() is a good place to write -+ * back any cached dirty data. However, many applications ignore errors -+ * on close(), and on non-Linux systems, close() may succeed even if flush() -+ * returns an error. For these reasons, filesystems should not assume -+ * that errors returned by flush will ever be noticed or even -+ * delivered. -+ * -+ * NOTE: The flush() method may be called more than once for each -+ * open(). This happens if more than one file descriptor refers to an -+ * open file handle, e.g. due to dup(), dup2() or fork() calls. It is -+ * not possible to determine if a flush is final, so each flush should -+ * be treated equally. Multiple write-flush sequences are relatively -+ * rare, so this shouldn't be a problem. -+ * -+ * Filesystems shouldn't assume that flush will be called at any -+ * particular point. It may be called more times than expected, or not -+ * at all. -+ * -+ * [close]: -+ * http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html -+ */ -+ int (*flush)(const char *, struct fuse_file_info *); -+ -+ /** -+ * Release an open file -+ * -+ * Release is called when there are no more references to an open -+ * file: all file descriptors are closed and all memory mappings -+ * are unmapped. -+ * -+ * For every open() call there will be exactly one release() call -+ * with the same flags and file handle. It is possible to -+ * have a file opened more than once, in which case only the last -+ * release will mean, that no more reads/writes will happen on the -+ * file. The return value of release is ignored. -+ */ -+ int (*release)(const char *, struct fuse_file_info *); -+ -+ /* -+ * Synchronize file contents -+ * -+ * If the datasync parameter is non-zero, then only the user data -+ * should be flushed, not the meta data. -+ */ -+ int (*fsync)(const char *, int, struct fuse_file_info *); -+ -+ /** Set extended attributes */ -+ int (*setxattr)(const char *, const char *, const char *, size_t, int); -+ -+ /** Get extended attributes */ -+ int (*getxattr)(const char *, const char *, char *, size_t); -+ -+ /** List extended attributes */ -+ int (*listxattr)(const char *, char *, size_t); -+ -+ /** Remove extended attributes */ -+ int (*removexattr)(const char *, const char *); -+ -+ /* -+ * Open directory -+ * -+ * Unless the 'default_permissions' mount option is given, -+ * this method should check if opendir is permitted for this -+ * directory. Optionally opendir may also return an arbitrary -+ * filehandle in the fuse_file_info structure, which will be -+ * passed to readdir, releasedir and fsyncdir. -+ */ -+ int (*opendir)(const char *, struct fuse_file_info *); -+ -+ /* -+ * Read directory -+ * -+ * The filesystem may choose between two modes of operation: -+ * -+ * 1) The readdir implementation ignores the offset parameter, and -+ * passes zero to the filler function's offset. The filler -+ * function will not return '1' (unless an error happens), so the -+ * whole directory is read in a single readdir operation. -+ * -+ * 2) The readdir implementation keeps track of the offsets of the -+ * directory entries. It uses the offset parameter and always -+ * passes non-zero offset to the filler function. When the buffer -+ * is full (or an error happens) the filler function will return -+ * '1'. -+ */ -+ int (*readdir)(const char *, void *, fuse_fill_dir_t, off_t, -+ struct fuse_file_info *, enum fuse_readdir_flags); -+ -+ /** -+ * Release directory -+ */ -+ int (*releasedir)(const char *, struct fuse_file_info *); -+ -+ /** -+ * Synchronize directory contents -+ * -+ * If the datasync parameter is non-zero, then only the user data -+ * should be flushed, not the meta data -+ */ -+ int (*fsyncdir)(const char *, int, struct fuse_file_info *); -+ -+ /** -+ * Initialize filesystem -+ * -+ * The return value will passed in the `private_data` field of -+ * `struct fuse_context` to all file operations, and as a -+ * parameter to the destroy() method. It overrides the initial -+ * value provided to fuse_main() / fuse_new(). -+ */ -+ void *(*init)(struct fuse_conn_info *conn, struct fuse_config *cfg); -+ -+ /** -+ * Clean up filesystem -+ * -+ * Called on filesystem exit. -+ */ -+ void (*destroy)(void *private_data); -+ -+ /** -+ * Check file access permissions -+ * -+ * This will be called for the access() system call. If the -+ * 'default_permissions' mount option is given, this method is not -+ * called. -+ * -+ * This method is not called under Linux kernel versions 2.4.x -+ */ -+ int (*access)(const char *, int); -+ -+ /** -+ * Create and open a file -+ * -+ * If the file does not exist, first create it with the specified -+ * mode, and then open it. -+ * -+ * If this method is not implemented or under Linux kernel -+ * versions earlier than 2.6.15, the mknod() and open() methods -+ * will be called instead. -+ */ -+ int (*create)(const char *, mode_t, struct fuse_file_info *); -+ -+ /** -+ * Perform POSIX file locking operation -+ * -+ * The cmd argument will be either F_GETLK, F_SETLK or F_SETLKW. -+ * -+ * For the meaning of fields in 'struct flock' see the man page -+ * for fcntl(2). The l_whence field will always be set to -+ * SEEK_SET. -+ * -+ * For checking lock ownership, the 'fuse_file_info->owner' -+ * argument must be used. -+ * -+ * For F_GETLK operation, the library will first check currently -+ * held locks, and if a conflicting lock is found it will return -+ * information without calling this method. This ensures, that -+ * for local locks the l_pid field is correctly filled in. The -+ * results may not be accurate in case of race conditions and in -+ * the presence of hard links, but it's unlikely that an -+ * application would rely on accurate GETLK results in these -+ * cases. If a conflicting lock is not found, this method will be -+ * called, and the filesystem may fill out l_pid by a meaningful -+ * value, or it may leave this field zero. -+ * -+ * For F_SETLK and F_SETLKW the l_pid field will be set to the pid -+ * of the process performing the locking operation. -+ * -+ * Note: if this method is not implemented, the kernel will still -+ * allow file locking to work locally. Hence it is only -+ * interesting for network filesystems and similar. -+ */ -+ int (*lock)(const char *, struct fuse_file_info *, int cmd, struct flock *); -+ -+ /** -+ * Change the access and modification times of a file with -+ * nanosecond resolution -+ * -+ * This supersedes the old utime() interface. New applications -+ * should use this. -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ * -+ * See the utimensat(2) man page for details. -+ */ -+ int (*utimens)(const char *, const struct timespec tv[2], -+ struct fuse_file_info *fi); -+ -+ /** -+ * Map block index within file to block index within device -+ * -+ * Note: This makes sense only for block device backed filesystems -+ * mounted with the 'blkdev' option -+ */ -+ int (*bmap)(const char *, size_t blocksize, uint64_t *idx); -+ -+ /** -+ * Ioctl -+ * -+ * flags will have FUSE_IOCTL_COMPAT set for 32bit ioctls in -+ * 64bit environment. The size and direction of data is -+ * determined by _IOC_*() decoding of cmd. For _IOC_NONE, -+ * data will be NULL, for _IOC_WRITE data is out area, for -+ * _IOC_READ in area and if both are set in/out area. In all -+ * non-NULL cases, the area is of _IOC_SIZE(cmd) bytes. -+ * -+ * If flags has FUSE_IOCTL_DIR then the fuse_file_info refers to a -+ * directory file handle. -+ * -+ * Note : the unsigned long request submitted by the application -+ * is truncated to 32 bits. -+ */ -+ int (*ioctl)(const char *, unsigned int cmd, void *arg, -+ struct fuse_file_info *, unsigned int flags, void *data); -+ -+ /** -+ * Poll for IO readiness events -+ * -+ * Note: If ph is non-NULL, the client should notify -+ * when IO readiness events occur by calling -+ * fuse_notify_poll() with the specified ph. -+ * -+ * Regardless of the number of times poll with a non-NULL ph -+ * is received, single notification is enough to clear all. -+ * Notifying more times incurs overhead but doesn't harm -+ * correctness. -+ * -+ * The callee is responsible for destroying ph with -+ * fuse_pollhandle_destroy() when no longer in use. -+ */ -+ int (*poll)(const char *, struct fuse_file_info *, -+ struct fuse_pollhandle *ph, unsigned *reventsp); -+ -+ /* -+ * Write contents of buffer to an open file -+ * -+ * Similar to the write() method, but data is supplied in a -+ * generic buffer. Use fuse_buf_copy() to transfer data to -+ * the destination. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*write_buf)(const char *, struct fuse_bufvec *buf, off_t off, -+ struct fuse_file_info *); -+ -+ /* -+ * Store data from an open file in a buffer -+ * -+ * Similar to the read() method, but data is stored and -+ * returned in a generic buffer. -+ * -+ * No actual copying of data has to take place, the source -+ * file descriptor may simply be stored in the buffer for -+ * later data transfer. -+ * -+ * The buffer must be allocated dynamically and stored at the -+ * location pointed to by bufp. If the buffer contains memory -+ * regions, they too must be allocated using malloc(). The -+ * allocated memory will be freed by the caller. -+ */ -+ int (*read_buf)(const char *, struct fuse_bufvec **bufp, size_t size, -+ off_t off, struct fuse_file_info *); -+ /** -+ * Perform BSD file locking operation -+ * -+ * The op argument will be either LOCK_SH, LOCK_EX or LOCK_UN -+ * -+ * Nonblocking requests will be indicated by ORing LOCK_NB to -+ * the above operations -+ * -+ * For more information see the flock(2) manual page. -+ * -+ * Additionally fi->owner will be set to a value unique to -+ * this open file. This same value will be supplied to -+ * ->release() when the file is released. -+ * -+ * Note: if this method is not implemented, the kernel will still -+ * allow file locking to work locally. Hence it is only -+ * interesting for network filesystems and similar. -+ */ -+ int (*flock)(const char *, struct fuse_file_info *, int op); -+ -+ /** -+ * Allocates space for an open file -+ * -+ * This function ensures that required space is allocated for specified -+ * file. If this function returns success then any subsequent write -+ * request to specified range is guaranteed not to fail because of lack -+ * of space on the file system media. -+ */ -+ int (*fallocate)(const char *, int, off_t, off_t, struct fuse_file_info *); -+ -+ /** -+ * Copy a range of data from one file to another -+ * -+ * Performs an optimized copy between two file descriptors without the -+ * additional cost of transferring data through the FUSE kernel module -+ * to user space (glibc) and then back into the FUSE filesystem again. -+ * -+ * In case this method is not implemented, glibc falls back to reading -+ * data from the source and writing to the destination. Effectively -+ * doing an inefficient copy of the data. -+ */ -+ ssize_t (*copy_file_range)(const char *path_in, -+ struct fuse_file_info *fi_in, off_t offset_in, -+ const char *path_out, -+ struct fuse_file_info *fi_out, off_t offset_out, -+ size_t size, int flags); -+ -+ /** -+ * Find next data or hole after the specified offset -+ */ -+ off_t (*lseek)(const char *, off_t off, int whence, -+ struct fuse_file_info *); - }; - --/** Extra context that may be needed by some filesystems -+/* -+ * Extra context that may be needed by some filesystems - * - * The uid, gid and pid fields are not filled in case of a writepage - * operation. - */ - struct fuse_context { -- /** Pointer to the fuse object */ -- struct fuse *fuse; -+ /** Pointer to the fuse object */ -+ struct fuse *fuse; - -- /** User ID of the calling process */ -- uid_t uid; -+ /** User ID of the calling process */ -+ uid_t uid; - -- /** Group ID of the calling process */ -- gid_t gid; -+ /** Group ID of the calling process */ -+ gid_t gid; - -- /** Process ID of the calling thread */ -- pid_t pid; -+ /** Process ID of the calling thread */ -+ pid_t pid; - -- /** Private filesystem data */ -- void *private_data; -+ /** Private filesystem data */ -+ void *private_data; - -- /** Umask of the calling process */ -- mode_t umask; -+ /** Umask of the calling process */ -+ mode_t umask; - }; - - /** -@@ -859,15 +880,15 @@ struct fuse_context { - * Example usage, see hello.c - */ - /* -- int fuse_main(int argc, char *argv[], const struct fuse_operations *op, -- void *private_data); --*/ --#define fuse_main(argc, argv, op, private_data) \ -- fuse_main_real(argc, argv, op, sizeof(*(op)), private_data) -+ * int fuse_main(int argc, char *argv[], const struct fuse_operations *op, -+ * void *private_data); -+ */ -+#define fuse_main(argc, argv, op, private_data) \ -+ fuse_main_real(argc, argv, op, sizeof(*(op)), private_data) - --/* ----------------------------------------------------------- * -- * More detailed API * -- * ----------------------------------------------------------- */ -+/* -+ * More detailed API -+ */ - - /** - * Print available options (high- and low-level) to stdout. This is -@@ -910,12 +931,13 @@ void fuse_lib_help(struct fuse_args *args); - * @return the created FUSE handle - */ - #if FUSE_USE_VERSION == 30 --struct fuse *fuse_new_30(struct fuse_args *args, const struct fuse_operations *op, -- size_t op_size, void *private_data); -+struct fuse *fuse_new_30(struct fuse_args *args, -+ const struct fuse_operations *op, size_t op_size, -+ void *private_data); - #define fuse_new(args, op, size, data) fuse_new_30(args, op, size, data) - #else - struct fuse *fuse_new(struct fuse_args *args, const struct fuse_operations *op, -- size_t op_size, void *private_data); -+ size_t op_size, void *private_data); - #endif - - /** -@@ -940,7 +962,7 @@ void fuse_unmount(struct fuse *f); - /** - * Destroy the FUSE handle. - * -- * NOTE: This function does not unmount the filesystem. If this is -+ * NOTE: This function does not unmount the filesystem. If this is - * needed, call fuse_unmount() before calling this function. - * - * @param f the FUSE handle -@@ -1030,7 +1052,7 @@ int fuse_invalidate_path(struct fuse *f, const char *path); - * Do not call this directly, use fuse_main() - */ - int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, -- size_t op_size, void *private_data); -+ size_t op_size, void *private_data); - - /** - * Start the cleanup thread when using option "remember". -@@ -1081,89 +1103,87 @@ struct fuse_fs; - */ - - int fuse_fs_getattr(struct fuse_fs *fs, const char *path, struct stat *buf, -- struct fuse_file_info *fi); --int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, -- const char *newpath, unsigned int flags); -+ struct fuse_file_info *fi); -+int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, const char *newpath, -+ unsigned int flags); - int fuse_fs_unlink(struct fuse_fs *fs, const char *path); - int fuse_fs_rmdir(struct fuse_fs *fs, const char *path); --int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, -- const char *path); -+int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, const char *path); - int fuse_fs_link(struct fuse_fs *fs, const char *oldpath, const char *newpath); --int fuse_fs_release(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi); -+int fuse_fs_release(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi); - int fuse_fs_open(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_read(struct fuse_fs *fs, const char *path, char *buf, size_t size, -- off_t off, struct fuse_file_info *fi); -+ off_t off, struct fuse_file_info *fi); - int fuse_fs_read_buf(struct fuse_fs *fs, const char *path, -- struct fuse_bufvec **bufp, size_t size, off_t off, -- struct fuse_file_info *fi); -+ struct fuse_bufvec **bufp, size_t size, off_t off, -+ struct fuse_file_info *fi); - int fuse_fs_write(struct fuse_fs *fs, const char *path, const char *buf, -- size_t size, off_t off, struct fuse_file_info *fi); -+ size_t size, off_t off, struct fuse_file_info *fi); - int fuse_fs_write_buf(struct fuse_fs *fs, const char *path, -- struct fuse_bufvec *buf, off_t off, -- struct fuse_file_info *fi); -+ struct fuse_bufvec *buf, off_t off, -+ struct fuse_file_info *fi); - int fuse_fs_fsync(struct fuse_fs *fs, const char *path, int datasync, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_flush(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_statfs(struct fuse_fs *fs, const char *path, struct statvfs *buf); - int fuse_fs_opendir(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_readdir(struct fuse_fs *fs, const char *path, void *buf, -- fuse_fill_dir_t filler, off_t off, -- struct fuse_file_info *fi, enum fuse_readdir_flags flags); -+ fuse_fill_dir_t filler, off_t off, -+ struct fuse_file_info *fi, enum fuse_readdir_flags flags); - int fuse_fs_fsyncdir(struct fuse_fs *fs, const char *path, int datasync, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_releasedir(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_create(struct fuse_fs *fs, const char *path, mode_t mode, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_lock(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi, int cmd, struct flock *lock); -+ struct fuse_file_info *fi, int cmd, struct flock *lock); - int fuse_fs_flock(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi, int op); -+ struct fuse_file_info *fi, int op); - int fuse_fs_chmod(struct fuse_fs *fs, const char *path, mode_t mode, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_chown(struct fuse_fs *fs, const char *path, uid_t uid, gid_t gid, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_truncate(struct fuse_fs *fs, const char *path, off_t size, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_utimens(struct fuse_fs *fs, const char *path, -- const struct timespec tv[2], struct fuse_file_info *fi); -+ const struct timespec tv[2], struct fuse_file_info *fi); - int fuse_fs_access(struct fuse_fs *fs, const char *path, int mask); - int fuse_fs_readlink(struct fuse_fs *fs, const char *path, char *buf, -- size_t len); -+ size_t len); - int fuse_fs_mknod(struct fuse_fs *fs, const char *path, mode_t mode, -- dev_t rdev); -+ dev_t rdev); - int fuse_fs_mkdir(struct fuse_fs *fs, const char *path, mode_t mode); - int fuse_fs_setxattr(struct fuse_fs *fs, const char *path, const char *name, -- const char *value, size_t size, int flags); -+ const char *value, size_t size, int flags); - int fuse_fs_getxattr(struct fuse_fs *fs, const char *path, const char *name, -- char *value, size_t size); -+ char *value, size_t size); - int fuse_fs_listxattr(struct fuse_fs *fs, const char *path, char *list, -- size_t size); --int fuse_fs_removexattr(struct fuse_fs *fs, const char *path, -- const char *name); -+ size_t size); -+int fuse_fs_removexattr(struct fuse_fs *fs, const char *path, const char *name); - int fuse_fs_bmap(struct fuse_fs *fs, const char *path, size_t blocksize, -- uint64_t *idx); -+ uint64_t *idx); - int fuse_fs_ioctl(struct fuse_fs *fs, const char *path, unsigned int cmd, -- void *arg, struct fuse_file_info *fi, unsigned int flags, -- void *data); -+ void *arg, struct fuse_file_info *fi, unsigned int flags, -+ void *data); - int fuse_fs_poll(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi, struct fuse_pollhandle *ph, -- unsigned *reventsp); -+ struct fuse_file_info *fi, struct fuse_pollhandle *ph, -+ unsigned *reventsp); - int fuse_fs_fallocate(struct fuse_fs *fs, const char *path, int mode, -- off_t offset, off_t length, struct fuse_file_info *fi); -+ off_t offset, off_t length, struct fuse_file_info *fi); - ssize_t fuse_fs_copy_file_range(struct fuse_fs *fs, const char *path_in, -- struct fuse_file_info *fi_in, off_t off_in, -- const char *path_out, -- struct fuse_file_info *fi_out, off_t off_out, -- size_t len, int flags); -+ struct fuse_file_info *fi_in, off_t off_in, -+ const char *path_out, -+ struct fuse_file_info *fi_out, off_t off_out, -+ size_t len, int flags); - off_t fuse_fs_lseek(struct fuse_fs *fs, const char *path, off_t off, int whence, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - void fuse_fs_init(struct fuse_fs *fs, struct fuse_conn_info *conn, -- struct fuse_config *cfg); -+ struct fuse_config *cfg); - void fuse_fs_destroy(struct fuse_fs *fs); - - int fuse_notify_poll(struct fuse_pollhandle *ph); -@@ -1182,7 +1202,7 @@ int fuse_notify_poll(struct fuse_pollhandle *ph); - * @return a new filesystem object - */ - struct fuse_fs *fuse_fs_new(const struct fuse_operations *op, size_t op_size, -- void *private_data); -+ void *private_data); - - /** - * Factory for creating filesystem objects -@@ -1199,7 +1219,7 @@ struct fuse_fs *fuse_fs_new(const struct fuse_operations *op, size_t op_size, - * @return the new filesystem object - */ - typedef struct fuse_fs *(*fuse_module_factory_t)(struct fuse_args *args, -- struct fuse_fs *fs[]); -+ struct fuse_fs *fs[]); - /** - * Register filesystem module - * -@@ -1211,7 +1231,7 @@ typedef struct fuse_fs *(*fuse_module_factory_t)(struct fuse_args *args, - * @param factory_ the factory function for this filesystem module - */ - #define FUSE_REGISTER_MODULE(name_, factory_) \ -- fuse_module_factory_t fuse_module_ ## name_ ## _factory = factory_ -+ fuse_module_factory_t fuse_module_##name_##_factory = factory_ - - /** Get session from fuse object */ - struct fuse_session *fuse_get_session(struct fuse *f); -diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h -index bf8f8cc..bd9bf86 100644 ---- a/tools/virtiofsd/fuse_common.h -+++ b/tools/virtiofsd/fuse_common.h -@@ -1,21 +1,23 @@ --/* FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB. --*/ -+/* -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB. -+ */ - - /** @file */ - - #if !defined(FUSE_H_) && !defined(FUSE_LOWLEVEL_H_) --#error "Never include directly; use or instead." -+#error \ -+ "Never include directly; use or instead." - #endif - - #ifndef FUSE_COMMON_H_ - #define FUSE_COMMON_H_ - --#include "fuse_opt.h" - #include "fuse_log.h" -+#include "fuse_opt.h" - #include - #include - -@@ -25,7 +27,7 @@ - /** Minor version of FUSE library interface */ - #define FUSE_MINOR_VERSION 2 - --#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) -+#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) - #define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) - - /** -@@ -38,67 +40,83 @@ - * descriptors can share a single file handle. - */ - struct fuse_file_info { -- /** Open flags. Available in open() and release() */ -- int flags; -- -- /** In case of a write operation indicates if this was caused -- by a delayed write from the page cache. If so, then the -- context's pid, uid, and gid fields will not be valid, and -- the *fh* value may not match the *fh* value that would -- have been sent with the corresponding individual write -- requests if write caching had been disabled. */ -- unsigned int writepage : 1; -- -- /** Can be filled in by open, to use direct I/O on this file. */ -- unsigned int direct_io : 1; -- -- /** Can be filled in by open. It signals the kernel that any -- currently cached file data (ie., data that the filesystem -- provided the last time the file was open) need not be -- invalidated. Has no effect when set in other contexts (in -- particular it does nothing when set by opendir()). */ -- unsigned int keep_cache : 1; -- -- /** Indicates a flush operation. Set in flush operation, also -- maybe set in highlevel lock operation and lowlevel release -- operation. */ -- unsigned int flush : 1; -- -- /** Can be filled in by open, to indicate that the file is not -- seekable. */ -- unsigned int nonseekable : 1; -- -- /* Indicates that flock locks for this file should be -- released. If set, lock_owner shall contain a valid value. -- May only be set in ->release(). */ -- unsigned int flock_release : 1; -- -- /** Can be filled in by opendir. It signals the kernel to -- enable caching of entries returned by readdir(). Has no -- effect when set in other contexts (in particular it does -- nothing when set by open()). */ -- unsigned int cache_readdir : 1; -- -- /** Padding. Reserved for future use*/ -- unsigned int padding : 25; -- unsigned int padding2 : 32; -- -- /** File handle id. May be filled in by filesystem in create, -- * open, and opendir(). Available in most other file operations on the -- * same file handle. */ -- uint64_t fh; -- -- /** Lock owner id. Available in locking operations and flush */ -- uint64_t lock_owner; -- -- /** Requested poll events. Available in ->poll. Only set on kernels -- which support it. If unsupported, this field is set to zero. */ -- uint32_t poll_events; -+ /** Open flags. Available in open() and release() */ -+ int flags; -+ -+ /* -+ * In case of a write operation indicates if this was caused -+ * by a delayed write from the page cache. If so, then the -+ * context's pid, uid, and gid fields will not be valid, and -+ * the *fh* value may not match the *fh* value that would -+ * have been sent with the corresponding individual write -+ * requests if write caching had been disabled. -+ */ -+ unsigned int writepage:1; -+ -+ /** Can be filled in by open, to use direct I/O on this file. */ -+ unsigned int direct_io:1; -+ -+ /* -+ * Can be filled in by open. It signals the kernel that any -+ * currently cached file data (ie., data that the filesystem -+ * provided the last time the file was open) need not be -+ * invalidated. Has no effect when set in other contexts (in -+ * particular it does nothing when set by opendir()). -+ */ -+ unsigned int keep_cache:1; -+ -+ /* -+ * Indicates a flush operation. Set in flush operation, also -+ * maybe set in highlevel lock operation and lowlevel release -+ * operation. -+ */ -+ unsigned int flush:1; -+ -+ /* -+ * Can be filled in by open, to indicate that the file is not -+ * seekable. -+ */ -+ unsigned int nonseekable:1; -+ -+ /* -+ * Indicates that flock locks for this file should be -+ * released. If set, lock_owner shall contain a valid value. -+ * May only be set in ->release(). -+ */ -+ unsigned int flock_release:1; -+ -+ /* -+ * Can be filled in by opendir. It signals the kernel to -+ * enable caching of entries returned by readdir(). Has no -+ * effect when set in other contexts (in particular it does -+ * nothing when set by open()). -+ */ -+ unsigned int cache_readdir:1; -+ -+ /** Padding. Reserved for future use*/ -+ unsigned int padding:25; -+ unsigned int padding2:32; -+ -+ /* -+ * File handle id. May be filled in by filesystem in create, -+ * open, and opendir(). Available in most other file operations on the -+ * same file handle. -+ */ -+ uint64_t fh; -+ -+ /** Lock owner id. Available in locking operations and flush */ -+ uint64_t lock_owner; -+ -+ /* -+ * Requested poll events. Available in ->poll. Only set on kernels -+ * which support it. If unsupported, this field is set to zero. -+ */ -+ uint32_t poll_events; - }; - --/************************************************************************** -- * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' * -- **************************************************************************/ -+/* -+ * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' -+ */ - - /** - * Indicates that the filesystem supports asynchronous read requests. -@@ -110,7 +128,7 @@ struct fuse_file_info { - * - * This feature is enabled by default when supported by the kernel. - */ --#define FUSE_CAP_ASYNC_READ (1 << 0) -+#define FUSE_CAP_ASYNC_READ (1 << 0) - - /** - * Indicates that the filesystem supports "remote" locking. -@@ -118,7 +136,7 @@ struct fuse_file_info { - * This feature is enabled by default when supported by the kernel, - * and if getlk() and setlk() handlers are implemented. - */ --#define FUSE_CAP_POSIX_LOCKS (1 << 1) -+#define FUSE_CAP_POSIX_LOCKS (1 << 1) - - /** - * Indicates that the filesystem supports the O_TRUNC open flag. If -@@ -127,14 +145,14 @@ struct fuse_file_info { - * - * This feature is enabled by default when supported by the kernel. - */ --#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3) -+#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3) - - /** - * Indicates that the filesystem supports lookups of "." and "..". - * - * This feature is disabled by default. - */ --#define FUSE_CAP_EXPORT_SUPPORT (1 << 4) -+#define FUSE_CAP_EXPORT_SUPPORT (1 << 4) - - /** - * Indicates that the kernel should not apply the umask to the -@@ -142,7 +160,7 @@ struct fuse_file_info { - * - * This feature is disabled by default. - */ --#define FUSE_CAP_DONT_MASK (1 << 6) -+#define FUSE_CAP_DONT_MASK (1 << 6) - - /** - * Indicates that libfuse should try to use splice() when writing to -@@ -150,7 +168,7 @@ struct fuse_file_info { - * - * This feature is disabled by default. - */ --#define FUSE_CAP_SPLICE_WRITE (1 << 7) -+#define FUSE_CAP_SPLICE_WRITE (1 << 7) - - /** - * Indicates that libfuse should try to move pages instead of copying when -@@ -158,7 +176,7 @@ struct fuse_file_info { - * - * This feature is disabled by default. - */ --#define FUSE_CAP_SPLICE_MOVE (1 << 8) -+#define FUSE_CAP_SPLICE_MOVE (1 << 8) - - /** - * Indicates that libfuse should try to use splice() when reading from -@@ -167,7 +185,7 @@ struct fuse_file_info { - * This feature is enabled by default when supported by the kernel and - * if the filesystem implements a write_buf() handler. - */ --#define FUSE_CAP_SPLICE_READ (1 << 9) -+#define FUSE_CAP_SPLICE_READ (1 << 9) - - /** - * If set, the calls to flock(2) will be emulated using POSIX locks and must -@@ -180,14 +198,14 @@ struct fuse_file_info { - * This feature is enabled by default when supported by the kernel and - * if the filesystem implements a flock() handler. - */ --#define FUSE_CAP_FLOCK_LOCKS (1 << 10) -+#define FUSE_CAP_FLOCK_LOCKS (1 << 10) - - /** - * Indicates that the filesystem supports ioctl's on directories. - * - * This feature is enabled by default when supported by the kernel. - */ --#define FUSE_CAP_IOCTL_DIR (1 << 11) -+#define FUSE_CAP_IOCTL_DIR (1 << 11) - - /** - * Traditionally, while a file is open the FUSE kernel module only -@@ -209,7 +227,7 @@ struct fuse_file_info { - * - * This feature is enabled by default when supported by the kernel. - */ --#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12) -+#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12) - - /** - * Indicates that the filesystem supports readdirplus. -@@ -217,7 +235,7 @@ struct fuse_file_info { - * This feature is enabled by default when supported by the kernel and if the - * filesystem implements a readdirplus() handler. - */ --#define FUSE_CAP_READDIRPLUS (1 << 13) -+#define FUSE_CAP_READDIRPLUS (1 << 13) - - /** - * Indicates that the filesystem supports adaptive readdirplus. -@@ -245,7 +263,7 @@ struct fuse_file_info { - * if the filesystem implements both a readdirplus() and a readdir() - * handler. - */ --#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14) -+#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14) - - /** - * Indicates that the filesystem supports asynchronous direct I/O submission. -@@ -256,7 +274,7 @@ struct fuse_file_info { - * - * This feature is enabled by default when supported by the kernel. - */ --#define FUSE_CAP_ASYNC_DIO (1 << 15) -+#define FUSE_CAP_ASYNC_DIO (1 << 15) - - /** - * Indicates that writeback caching should be enabled. This means that -@@ -265,7 +283,7 @@ struct fuse_file_info { - * - * This feature is disabled by default. - */ --#define FUSE_CAP_WRITEBACK_CACHE (1 << 16) -+#define FUSE_CAP_WRITEBACK_CACHE (1 << 16) - - /** - * Indicates support for zero-message opens. If this flag is set in -@@ -278,7 +296,7 @@ struct fuse_file_info { - * Setting (or unsetting) this flag in the `want` field has *no - * effect*. - */ --#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17) -+#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17) - - /** - * Indicates support for parallel directory operations. If this flag -@@ -288,7 +306,7 @@ struct fuse_file_info { - * - * This feature is enabled by default when supported by the kernel. - */ --#define FUSE_CAP_PARALLEL_DIROPS (1 << 18) -+#define FUSE_CAP_PARALLEL_DIROPS (1 << 18) - - /** - * Indicates support for POSIX ACLs. -@@ -307,7 +325,7 @@ struct fuse_file_info { - * - * This feature is disabled by default. - */ --#define FUSE_CAP_POSIX_ACL (1 << 19) -+#define FUSE_CAP_POSIX_ACL (1 << 19) - - /** - * Indicates that the filesystem is responsible for unsetting -@@ -316,7 +334,7 @@ struct fuse_file_info { - * - * This feature is enabled by default when supported by the kernel. - */ --#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20) -+#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20) - - /** - * Indicates support for zero-message opendirs. If this flag is set in -@@ -328,7 +346,7 @@ struct fuse_file_info { - * - * Setting (or unsetting) this flag in the `want` field has *no effect*. - */ --#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) -+#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) - - /** - * Ioctl flags -@@ -340,12 +358,12 @@ struct fuse_file_info { - * - * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs - */ --#define FUSE_IOCTL_COMPAT (1 << 0) --#define FUSE_IOCTL_UNRESTRICTED (1 << 1) --#define FUSE_IOCTL_RETRY (1 << 2) --#define FUSE_IOCTL_DIR (1 << 4) -+#define FUSE_IOCTL_COMPAT (1 << 0) -+#define FUSE_IOCTL_UNRESTRICTED (1 << 1) -+#define FUSE_IOCTL_RETRY (1 << 2) -+#define FUSE_IOCTL_DIR (1 << 4) - --#define FUSE_IOCTL_MAX_IOV 256 -+#define FUSE_IOCTL_MAX_IOV 256 - - /** - * Connection information, passed to the ->init() method -@@ -355,114 +373,114 @@ struct fuse_file_info { - * value must usually be smaller than the indicated value. - */ - struct fuse_conn_info { -- /** -- * Major version of the protocol (read-only) -- */ -- unsigned proto_major; -- -- /** -- * Minor version of the protocol (read-only) -- */ -- unsigned proto_minor; -- -- /** -- * Maximum size of the write buffer -- */ -- unsigned max_write; -- -- /** -- * Maximum size of read requests. A value of zero indicates no -- * limit. However, even if the filesystem does not specify a -- * limit, the maximum size of read requests will still be -- * limited by the kernel. -- * -- * NOTE: For the time being, the maximum size of read requests -- * must be set both here *and* passed to fuse_session_new() -- * using the ``-o max_read=`` mount option. At some point -- * in the future, specifying the mount option will no longer -- * be necessary. -- */ -- unsigned max_read; -- -- /** -- * Maximum readahead -- */ -- unsigned max_readahead; -- -- /** -- * Capability flags that the kernel supports (read-only) -- */ -- unsigned capable; -- -- /** -- * Capability flags that the filesystem wants to enable. -- * -- * libfuse attempts to initialize this field with -- * reasonable default values before calling the init() handler. -- */ -- unsigned want; -- -- /** -- * Maximum number of pending "background" requests. A -- * background request is any type of request for which the -- * total number is not limited by other means. As of kernel -- * 4.8, only two types of requests fall into this category: -- * -- * 1. Read-ahead requests -- * 2. Asynchronous direct I/O requests -- * -- * Read-ahead requests are generated (if max_readahead is -- * non-zero) by the kernel to preemptively fill its caches -- * when it anticipates that userspace will soon read more -- * data. -- * -- * Asynchronous direct I/O requests are generated if -- * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large -- * direct I/O request. In this case the kernel will internally -- * split it up into multiple smaller requests and submit them -- * to the filesystem concurrently. -- * -- * Note that the following requests are *not* background -- * requests: writeback requests (limited by the kernel's -- * flusher algorithm), regular (i.e., synchronous and -- * buffered) userspace read/write requests (limited to one per -- * thread), asynchronous read requests (Linux's io_submit(2) -- * call actually blocks, so these are also limited to one per -- * thread). -- */ -- unsigned max_background; -- -- /** -- * Kernel congestion threshold parameter. If the number of pending -- * background requests exceeds this number, the FUSE kernel module will -- * mark the filesystem as "congested". This instructs the kernel to -- * expect that queued requests will take some time to complete, and to -- * adjust its algorithms accordingly (e.g. by putting a waiting thread -- * to sleep instead of using a busy-loop). -- */ -- unsigned congestion_threshold; -- -- /** -- * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible -- * for updating mtime and ctime when write requests are received. The -- * updated values are passed to the filesystem with setattr() requests. -- * However, if the filesystem does not support the full resolution of -- * the kernel timestamps (nanoseconds), the mtime and ctime values used -- * by kernel and filesystem will differ (and result in an apparent -- * change of times after a cache flush). -- * -- * To prevent this problem, this variable can be used to inform the -- * kernel about the timestamp granularity supported by the file-system. -- * The value should be power of 10. The default is 1, i.e. full -- * nano-second resolution. Filesystems supporting only second resolution -- * should set this to 1000000000. -- */ -- unsigned time_gran; -- -- /** -- * For future use. -- */ -- unsigned reserved[22]; -+ /** -+ * Major version of the protocol (read-only) -+ */ -+ unsigned proto_major; -+ -+ /** -+ * Minor version of the protocol (read-only) -+ */ -+ unsigned proto_minor; -+ -+ /** -+ * Maximum size of the write buffer -+ */ -+ unsigned max_write; -+ -+ /** -+ * Maximum size of read requests. A value of zero indicates no -+ * limit. However, even if the filesystem does not specify a -+ * limit, the maximum size of read requests will still be -+ * limited by the kernel. -+ * -+ * NOTE: For the time being, the maximum size of read requests -+ * must be set both here *and* passed to fuse_session_new() -+ * using the ``-o max_read=`` mount option. At some point -+ * in the future, specifying the mount option will no longer -+ * be necessary. -+ */ -+ unsigned max_read; -+ -+ /** -+ * Maximum readahead -+ */ -+ unsigned max_readahead; -+ -+ /** -+ * Capability flags that the kernel supports (read-only) -+ */ -+ unsigned capable; -+ -+ /** -+ * Capability flags that the filesystem wants to enable. -+ * -+ * libfuse attempts to initialize this field with -+ * reasonable default values before calling the init() handler. -+ */ -+ unsigned want; -+ -+ /** -+ * Maximum number of pending "background" requests. A -+ * background request is any type of request for which the -+ * total number is not limited by other means. As of kernel -+ * 4.8, only two types of requests fall into this category: -+ * -+ * 1. Read-ahead requests -+ * 2. Asynchronous direct I/O requests -+ * -+ * Read-ahead requests are generated (if max_readahead is -+ * non-zero) by the kernel to preemptively fill its caches -+ * when it anticipates that userspace will soon read more -+ * data. -+ * -+ * Asynchronous direct I/O requests are generated if -+ * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large -+ * direct I/O request. In this case the kernel will internally -+ * split it up into multiple smaller requests and submit them -+ * to the filesystem concurrently. -+ * -+ * Note that the following requests are *not* background -+ * requests: writeback requests (limited by the kernel's -+ * flusher algorithm), regular (i.e., synchronous and -+ * buffered) userspace read/write requests (limited to one per -+ * thread), asynchronous read requests (Linux's io_submit(2) -+ * call actually blocks, so these are also limited to one per -+ * thread). -+ */ -+ unsigned max_background; -+ -+ /** -+ * Kernel congestion threshold parameter. If the number of pending -+ * background requests exceeds this number, the FUSE kernel module will -+ * mark the filesystem as "congested". This instructs the kernel to -+ * expect that queued requests will take some time to complete, and to -+ * adjust its algorithms accordingly (e.g. by putting a waiting thread -+ * to sleep instead of using a busy-loop). -+ */ -+ unsigned congestion_threshold; -+ -+ /** -+ * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible -+ * for updating mtime and ctime when write requests are received. The -+ * updated values are passed to the filesystem with setattr() requests. -+ * However, if the filesystem does not support the full resolution of -+ * the kernel timestamps (nanoseconds), the mtime and ctime values used -+ * by kernel and filesystem will differ (and result in an apparent -+ * change of times after a cache flush). -+ * -+ * To prevent this problem, this variable can be used to inform the -+ * kernel about the timestamp granularity supported by the file-system. -+ * The value should be power of 10. The default is 1, i.e. full -+ * nano-second resolution. Filesystems supporting only second resolution -+ * should set this to 1000000000. -+ */ -+ unsigned time_gran; -+ -+ /** -+ * For future use. -+ */ -+ unsigned reserved[22]; - }; - - struct fuse_session; -@@ -489,21 +507,20 @@ struct fuse_conn_info_opts; - * -o async_read sets FUSE_CAP_ASYNC_READ in conn->want - * -o sync_read unsets FUSE_CAP_ASYNC_READ in conn->want - * -o atomic_o_trunc sets FUSE_CAP_ATOMIC_O_TRUNC in conn->want -- * -o no_remote_lock Equivalent to -o no_remote_flock,no_remote_posix_lock -- * -o no_remote_flock Unsets FUSE_CAP_FLOCK_LOCKS in conn->want -- * -o no_remote_posix_lock Unsets FUSE_CAP_POSIX_LOCKS in conn->want -- * -o [no_]splice_write (un-)sets FUSE_CAP_SPLICE_WRITE in conn->want -- * -o [no_]splice_move (un-)sets FUSE_CAP_SPLICE_MOVE in conn->want -- * -o [no_]splice_read (un-)sets FUSE_CAP_SPLICE_READ in conn->want -- * -o [no_]auto_inval_data (un-)sets FUSE_CAP_AUTO_INVAL_DATA in conn->want -- * -o readdirplus=no unsets FUSE_CAP_READDIRPLUS in conn->want -- * -o readdirplus=yes sets FUSE_CAP_READDIRPLUS and unsets -- * FUSE_CAP_READDIRPLUS_AUTO in conn->want -- * -o readdirplus=auto sets FUSE_CAP_READDIRPLUS and -- * FUSE_CAP_READDIRPLUS_AUTO in conn->want -- * -o [no_]async_dio (un-)sets FUSE_CAP_ASYNC_DIO in conn->want -- * -o [no_]writeback_cache (un-)sets FUSE_CAP_WRITEBACK_CACHE in conn->want -- * -o time_gran=N sets conn->time_gran -+ * -o no_remote_lock Equivalent to -o -+ *no_remote_flock,no_remote_posix_lock -o no_remote_flock Unsets -+ *FUSE_CAP_FLOCK_LOCKS in conn->want -o no_remote_posix_lock Unsets -+ *FUSE_CAP_POSIX_LOCKS in conn->want -o [no_]splice_write (un-)sets -+ *FUSE_CAP_SPLICE_WRITE in conn->want -o [no_]splice_move (un-)sets -+ *FUSE_CAP_SPLICE_MOVE in conn->want -o [no_]splice_read (un-)sets -+ *FUSE_CAP_SPLICE_READ in conn->want -o [no_]auto_inval_data (un-)sets -+ *FUSE_CAP_AUTO_INVAL_DATA in conn->want -o readdirplus=no unsets -+ *FUSE_CAP_READDIRPLUS in conn->want -o readdirplus=yes sets -+ *FUSE_CAP_READDIRPLUS and unsets FUSE_CAP_READDIRPLUS_AUTO in conn->want -o -+ *readdirplus=auto sets FUSE_CAP_READDIRPLUS and FUSE_CAP_READDIRPLUS_AUTO -+ *in conn->want -o [no_]async_dio (un-)sets FUSE_CAP_ASYNC_DIO in -+ *conn->want -o [no_]writeback_cache (un-)sets FUSE_CAP_WRITEBACK_CACHE in -+ *conn->want -o time_gran=N sets conn->time_gran - * - * Known options will be removed from *args*, unknown options will be - * passed through unchanged. -@@ -511,7 +528,7 @@ struct fuse_conn_info_opts; - * @param args argument vector (input+output) - * @return parsed options - **/ --struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args); -+struct fuse_conn_info_opts *fuse_parse_conn_info_opts(struct fuse_args *args); - - /** - * This function applies the (parsed) parameters in *opts* to the -@@ -521,7 +538,7 @@ struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args); - * option has been explicitly set. - */ - void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, -- struct fuse_conn_info *conn); -+ struct fuse_conn_info *conn); - - /** - * Go into the background -@@ -552,81 +569,81 @@ const char *fuse_pkgversion(void); - */ - void fuse_pollhandle_destroy(struct fuse_pollhandle *ph); - --/* ----------------------------------------------------------- * -- * Data buffer * -- * ----------------------------------------------------------- */ -+/* -+ * Data buffer -+ */ - - /** - * Buffer flags - */ - enum fuse_buf_flags { -- /** -- * Buffer contains a file descriptor -- * -- * If this flag is set, the .fd field is valid, otherwise the -- * .mem fields is valid. -- */ -- FUSE_BUF_IS_FD = (1 << 1), -- -- /** -- * Seek on the file descriptor -- * -- * If this flag is set then the .pos field is valid and is -- * used to seek to the given offset before performing -- * operation on file descriptor. -- */ -- FUSE_BUF_FD_SEEK = (1 << 2), -- -- /** -- * Retry operation on file descriptor -- * -- * If this flag is set then retry operation on file descriptor -- * until .size bytes have been copied or an error or EOF is -- * detected. -- */ -- FUSE_BUF_FD_RETRY = (1 << 3), -+ /** -+ * Buffer contains a file descriptor -+ * -+ * If this flag is set, the .fd field is valid, otherwise the -+ * .mem fields is valid. -+ */ -+ FUSE_BUF_IS_FD = (1 << 1), -+ -+ /** -+ * Seek on the file descriptor -+ * -+ * If this flag is set then the .pos field is valid and is -+ * used to seek to the given offset before performing -+ * operation on file descriptor. -+ */ -+ FUSE_BUF_FD_SEEK = (1 << 2), -+ -+ /** -+ * Retry operation on file descriptor -+ * -+ * If this flag is set then retry operation on file descriptor -+ * until .size bytes have been copied or an error or EOF is -+ * detected. -+ */ -+ FUSE_BUF_FD_RETRY = (1 << 3), - }; - - /** - * Buffer copy flags - */ - enum fuse_buf_copy_flags { -- /** -- * Don't use splice(2) -- * -- * Always fall back to using read and write instead of -- * splice(2) to copy data from one file descriptor to another. -- * -- * If this flag is not set, then only fall back if splice is -- * unavailable. -- */ -- FUSE_BUF_NO_SPLICE = (1 << 1), -- -- /** -- * Force splice -- * -- * Always use splice(2) to copy data from one file descriptor -- * to another. If splice is not available, return -EINVAL. -- */ -- FUSE_BUF_FORCE_SPLICE = (1 << 2), -- -- /** -- * Try to move data with splice. -- * -- * If splice is used, try to move pages from the source to the -- * destination instead of copying. See documentation of -- * SPLICE_F_MOVE in splice(2) man page. -- */ -- FUSE_BUF_SPLICE_MOVE = (1 << 3), -- -- /** -- * Don't block on the pipe when copying data with splice -- * -- * Makes the operations on the pipe non-blocking (if the pipe -- * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) -- * man page. -- */ -- FUSE_BUF_SPLICE_NONBLOCK= (1 << 4), -+ /** -+ * Don't use splice(2) -+ * -+ * Always fall back to using read and write instead of -+ * splice(2) to copy data from one file descriptor to another. -+ * -+ * If this flag is not set, then only fall back if splice is -+ * unavailable. -+ */ -+ FUSE_BUF_NO_SPLICE = (1 << 1), -+ -+ /** -+ * Force splice -+ * -+ * Always use splice(2) to copy data from one file descriptor -+ * to another. If splice is not available, return -EINVAL. -+ */ -+ FUSE_BUF_FORCE_SPLICE = (1 << 2), -+ -+ /** -+ * Try to move data with splice. -+ * -+ * If splice is used, try to move pages from the source to the -+ * destination instead of copying. See documentation of -+ * SPLICE_F_MOVE in splice(2) man page. -+ */ -+ FUSE_BUF_SPLICE_MOVE = (1 << 3), -+ -+ /** -+ * Don't block on the pipe when copying data with splice -+ * -+ * Makes the operations on the pipe non-blocking (if the pipe -+ * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) -+ * man page. -+ */ -+ FUSE_BUF_SPLICE_NONBLOCK = (1 << 4), - }; - - /** -@@ -636,36 +653,36 @@ enum fuse_buf_copy_flags { - * be supplied as a memory pointer or as a file descriptor - */ - struct fuse_buf { -- /** -- * Size of data in bytes -- */ -- size_t size; -- -- /** -- * Buffer flags -- */ -- enum fuse_buf_flags flags; -- -- /** -- * Memory pointer -- * -- * Used unless FUSE_BUF_IS_FD flag is set. -- */ -- void *mem; -- -- /** -- * File descriptor -- * -- * Used if FUSE_BUF_IS_FD flag is set. -- */ -- int fd; -- -- /** -- * File position -- * -- * Used if FUSE_BUF_FD_SEEK flag is set. -- */ -- off_t pos; -+ /** -+ * Size of data in bytes -+ */ -+ size_t size; -+ -+ /** -+ * Buffer flags -+ */ -+ enum fuse_buf_flags flags; -+ -+ /** -+ * Memory pointer -+ * -+ * Used unless FUSE_BUF_IS_FD flag is set. -+ */ -+ void *mem; -+ -+ /** -+ * File descriptor -+ * -+ * Used if FUSE_BUF_IS_FD flag is set. -+ */ -+ int fd; -+ -+ /** -+ * File position -+ * -+ * Used if FUSE_BUF_FD_SEEK flag is set. -+ */ -+ off_t pos; - }; - - /** -@@ -677,41 +694,39 @@ struct fuse_buf { - * Allocate dynamically to add more than one buffer. - */ - struct fuse_bufvec { -- /** -- * Number of buffers in the array -- */ -- size_t count; -- -- /** -- * Index of current buffer within the array -- */ -- size_t idx; -- -- /** -- * Current offset within the current buffer -- */ -- size_t off; -- -- /** -- * Array of buffers -- */ -- struct fuse_buf buf[1]; -+ /** -+ * Number of buffers in the array -+ */ -+ size_t count; -+ -+ /** -+ * Index of current buffer within the array -+ */ -+ size_t idx; -+ -+ /** -+ * Current offset within the current buffer -+ */ -+ size_t off; -+ -+ /** -+ * Array of buffers -+ */ -+ struct fuse_buf buf[1]; - }; - - /* Initialize bufvec with a single buffer of given size */ --#define FUSE_BUFVEC_INIT(size__) \ -- ((struct fuse_bufvec) { \ -- /* .count= */ 1, \ -- /* .idx = */ 0, \ -- /* .off = */ 0, \ -- /* .buf = */ { /* [0] = */ { \ -- /* .size = */ (size__), \ -- /* .flags = */ (enum fuse_buf_flags) 0, \ -- /* .mem = */ NULL, \ -- /* .fd = */ -1, \ -- /* .pos = */ 0, \ -- } } \ -- } ) -+#define FUSE_BUFVEC_INIT(size__) \ -+ ((struct fuse_bufvec){ /* .count= */ 1, \ -+ /* .idx = */ 0, \ -+ /* .off = */ 0, /* .buf = */ \ -+ { /* [0] = */ { \ -+ /* .size = */ (size__), \ -+ /* .flags = */ (enum fuse_buf_flags)0, \ -+ /* .mem = */ NULL, \ -+ /* .fd = */ -1, \ -+ /* .pos = */ 0, \ -+ } } }) - - /** - * Get total size of data in a fuse buffer vector -@@ -730,16 +745,16 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv); - * @return actual number of bytes copied or -errno on error - */ - ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src, -- enum fuse_buf_copy_flags flags); -+ enum fuse_buf_copy_flags flags); - --/* ----------------------------------------------------------- * -- * Signal handling * -- * ----------------------------------------------------------- */ -+/* -+ * Signal handling -+ */ - - /** - * Exit session on HUP, TERM and INT signals and ignore PIPE signal - * -- * Stores session in a global variable. May only be called once per -+ * Stores session in a global variable. May only be called once per - * process until fuse_remove_signal_handlers() is called. - * - * Once either of the POSIX signals arrives, the signal handler calls -@@ -766,12 +781,12 @@ int fuse_set_signal_handlers(struct fuse_session *se); - */ - void fuse_remove_signal_handlers(struct fuse_session *se); - --/* ----------------------------------------------------------- * -- * Compatibility stuff * -- * ----------------------------------------------------------- */ -+/* -+ * Compatibility stuff -+ */ - - #if !defined(FUSE_USE_VERSION) || FUSE_USE_VERSION < 30 --# error only API version 30 or greater is supported -+#error only API version 30 or greater is supported - #endif - - -@@ -781,11 +796,14 @@ void fuse_remove_signal_handlers(struct fuse_session *se); - * On 32bit systems please add -D_FILE_OFFSET_BITS=64 to your compile flags! - */ - --#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus -+#if defined(__GNUC__) && \ -+ (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && \ -+ !defined __cplusplus - _Static_assert(sizeof(off_t) == 8, "fuse: off_t must be 64bit"); - #else --struct _fuse_off_t_must_be_64bit_dummy_struct \ -- { unsigned _fuse_off_t_must_be_64bit:((sizeof(off_t) == 8) ? 1 : -1); }; -+struct _fuse_off_t_must_be_64bit_dummy_struct { -+ unsigned _fuse_off_t_must_be_64bit:((sizeof(off_t) == 8) ? 1 : -1); -+}; - #endif - - #endif /* FUSE_COMMON_H_ */ -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index b39522e..e63cb58 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -1,71 +1,71 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ - - #include "fuse.h" - #include "fuse_lowlevel.h" - - struct fuse_req { -- struct fuse_session *se; -- uint64_t unique; -- int ctr; -- pthread_mutex_t lock; -- struct fuse_ctx ctx; -- struct fuse_chan *ch; -- int interrupted; -- unsigned int ioctl_64bit : 1; -- union { -- struct { -- uint64_t unique; -- } i; -- struct { -- fuse_interrupt_func_t func; -- void *data; -- } ni; -- } u; -- struct fuse_req *next; -- struct fuse_req *prev; -+ struct fuse_session *se; -+ uint64_t unique; -+ int ctr; -+ pthread_mutex_t lock; -+ struct fuse_ctx ctx; -+ struct fuse_chan *ch; -+ int interrupted; -+ unsigned int ioctl_64bit:1; -+ union { -+ struct { -+ uint64_t unique; -+ } i; -+ struct { -+ fuse_interrupt_func_t func; -+ void *data; -+ } ni; -+ } u; -+ struct fuse_req *next; -+ struct fuse_req *prev; - }; - - struct fuse_notify_req { -- uint64_t unique; -- void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t, -- const void *, const struct fuse_buf *); -- struct fuse_notify_req *next; -- struct fuse_notify_req *prev; -+ uint64_t unique; -+ void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t, -+ const void *, const struct fuse_buf *); -+ struct fuse_notify_req *next; -+ struct fuse_notify_req *prev; - }; - - struct fuse_session { -- char *mountpoint; -- volatile int exited; -- int fd; -- int debug; -- int deny_others; -- struct fuse_lowlevel_ops op; -- int got_init; -- struct cuse_data *cuse_data; -- void *userdata; -- uid_t owner; -- struct fuse_conn_info conn; -- struct fuse_req list; -- struct fuse_req interrupts; -- pthread_mutex_t lock; -- int got_destroy; -- int broken_splice_nonblock; -- uint64_t notify_ctr; -- struct fuse_notify_req notify_list; -- size_t bufsize; -- int error; -+ char *mountpoint; -+ volatile int exited; -+ int fd; -+ int debug; -+ int deny_others; -+ struct fuse_lowlevel_ops op; -+ int got_init; -+ struct cuse_data *cuse_data; -+ void *userdata; -+ uid_t owner; -+ struct fuse_conn_info conn; -+ struct fuse_req list; -+ struct fuse_req interrupts; -+ pthread_mutex_t lock; -+ int got_destroy; -+ int broken_splice_nonblock; -+ uint64_t notify_ctr; -+ struct fuse_notify_req notify_list; -+ size_t bufsize; -+ int error; - }; - - struct fuse_chan { -- pthread_mutex_t lock; -- int ctr; -- int fd; -+ pthread_mutex_t lock; -+ int ctr; -+ int fd; - }; - - /** -@@ -76,19 +76,20 @@ struct fuse_chan { - * - */ - struct fuse_module { -- char *name; -- fuse_module_factory_t factory; -- struct fuse_module *next; -- struct fusemod_so *so; -- int ctr; -+ char *name; -+ fuse_module_factory_t factory; -+ struct fuse_module *next; -+ struct fusemod_so *so; -+ int ctr; - }; - - int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, -- int count); -+ int count); - void fuse_free_req(fuse_req_t req); - - void fuse_session_process_buf_int(struct fuse_session *se, -- const struct fuse_buf *buf, struct fuse_chan *ch); -+ const struct fuse_buf *buf, -+ struct fuse_chan *ch); - - - #define FUSE_MAX_MAX_PAGES 256 -diff --git a/tools/virtiofsd/fuse_log.c b/tools/virtiofsd/fuse_log.c -index 0d268ab..11345f9 100644 ---- a/tools/virtiofsd/fuse_log.c -+++ b/tools/virtiofsd/fuse_log.c -@@ -1,40 +1,40 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2019 Red Hat, Inc. -- -- Logging API. -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2019 Red Hat, Inc. -+ * -+ * Logging API. -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ - - #include "fuse_log.h" - - #include - #include - --static void default_log_func( -- __attribute__(( unused )) enum fuse_log_level level, -- const char *fmt, va_list ap) -+static void default_log_func(__attribute__((unused)) enum fuse_log_level level, -+ const char *fmt, va_list ap) - { -- vfprintf(stderr, fmt, ap); -+ vfprintf(stderr, fmt, ap); - } - - static fuse_log_func_t log_func = default_log_func; - - void fuse_set_log_func(fuse_log_func_t func) - { -- if (!func) -- func = default_log_func; -+ if (!func) { -+ func = default_log_func; -+ } - -- log_func = func; -+ log_func = func; - } - - void fuse_log(enum fuse_log_level level, const char *fmt, ...) - { -- va_list ap; -+ va_list ap; - -- va_start(ap, fmt); -- log_func(level, fmt, ap); -- va_end(ap); -+ va_start(ap, fmt); -+ log_func(level, fmt, ap); -+ va_end(ap); - } -diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h -index 0af700d..bf6c11f 100644 ---- a/tools/virtiofsd/fuse_log.h -+++ b/tools/virtiofsd/fuse_log.h -@@ -1,10 +1,10 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2019 Red Hat, Inc. -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB. --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2019 Red Hat, Inc. -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB. -+ */ - - #ifndef FUSE_LOG_H_ - #define FUSE_LOG_H_ -@@ -22,14 +22,14 @@ - * These levels correspond to syslog(2) log levels since they are widely used. - */ - enum fuse_log_level { -- FUSE_LOG_EMERG, -- FUSE_LOG_ALERT, -- FUSE_LOG_CRIT, -- FUSE_LOG_ERR, -- FUSE_LOG_WARNING, -- FUSE_LOG_NOTICE, -- FUSE_LOG_INFO, -- FUSE_LOG_DEBUG -+ FUSE_LOG_EMERG, -+ FUSE_LOG_ALERT, -+ FUSE_LOG_CRIT, -+ FUSE_LOG_ERR, -+ FUSE_LOG_WARNING, -+ FUSE_LOG_NOTICE, -+ FUSE_LOG_INFO, -+ FUSE_LOG_DEBUG - }; - - /** -@@ -45,8 +45,8 @@ enum fuse_log_level { - * @param fmt sprintf-style format string including newline - * @param ap format string arguments - */ --typedef void (*fuse_log_func_t)(enum fuse_log_level level, -- const char *fmt, va_list ap); -+typedef void (*fuse_log_func_t)(enum fuse_log_level level, const char *fmt, -+ va_list ap); - - /** - * Install a custom log handler function. -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index e6fa247..5c9cb52 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -1,2380 +1,2515 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- Implementation of (most of) the low-level FUSE API. The session loop -- functions are implemented in separate files. -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * Implementation of (most of) the low-level FUSE API. The session loop -+ * functions are implemented in separate files. -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ - - #define _GNU_SOURCE - - #include "config.h" - #include "fuse_i.h" - #include "fuse_kernel.h" --#include "fuse_opt.h" - #include "fuse_misc.h" -+#include "fuse_opt.h" - -+#include -+#include -+#include -+#include - #include - #include --#include - #include --#include --#include --#include --#include - #include -- -+#include - - - #define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) - #define OFFSET_MAX 0x7fffffffffffffffLL - --#define container_of(ptr, type, member) ({ \ -- const typeof( ((type *)0)->member ) *__mptr = (ptr); \ -- (type *)( (char *)__mptr - offsetof(type,member) );}) -+#define container_of(ptr, type, member) \ -+ ({ \ -+ const typeof(((type *)0)->member) *__mptr = (ptr); \ -+ (type *)((char *)__mptr - offsetof(type, member)); \ -+ }) - - struct fuse_pollhandle { -- uint64_t kh; -- struct fuse_session *se; -+ uint64_t kh; -+ struct fuse_session *se; - }; - - static size_t pagesize; - - static __attribute__((constructor)) void fuse_ll_init_pagesize(void) - { -- pagesize = getpagesize(); -+ pagesize = getpagesize(); - } - - static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr) - { -- attr->ino = stbuf->st_ino; -- attr->mode = stbuf->st_mode; -- attr->nlink = stbuf->st_nlink; -- attr->uid = stbuf->st_uid; -- attr->gid = stbuf->st_gid; -- attr->rdev = stbuf->st_rdev; -- attr->size = stbuf->st_size; -- attr->blksize = stbuf->st_blksize; -- attr->blocks = stbuf->st_blocks; -- attr->atime = stbuf->st_atime; -- attr->mtime = stbuf->st_mtime; -- attr->ctime = stbuf->st_ctime; -- attr->atimensec = ST_ATIM_NSEC(stbuf); -- attr->mtimensec = ST_MTIM_NSEC(stbuf); -- attr->ctimensec = ST_CTIM_NSEC(stbuf); -+ attr->ino = stbuf->st_ino; -+ attr->mode = stbuf->st_mode; -+ attr->nlink = stbuf->st_nlink; -+ attr->uid = stbuf->st_uid; -+ attr->gid = stbuf->st_gid; -+ attr->rdev = stbuf->st_rdev; -+ attr->size = stbuf->st_size; -+ attr->blksize = stbuf->st_blksize; -+ attr->blocks = stbuf->st_blocks; -+ attr->atime = stbuf->st_atime; -+ attr->mtime = stbuf->st_mtime; -+ attr->ctime = stbuf->st_ctime; -+ attr->atimensec = ST_ATIM_NSEC(stbuf); -+ attr->mtimensec = ST_MTIM_NSEC(stbuf); -+ attr->ctimensec = ST_CTIM_NSEC(stbuf); - } - - static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf) - { -- stbuf->st_mode = attr->mode; -- stbuf->st_uid = attr->uid; -- stbuf->st_gid = attr->gid; -- stbuf->st_size = attr->size; -- stbuf->st_atime = attr->atime; -- stbuf->st_mtime = attr->mtime; -- stbuf->st_ctime = attr->ctime; -- ST_ATIM_NSEC_SET(stbuf, attr->atimensec); -- ST_MTIM_NSEC_SET(stbuf, attr->mtimensec); -- ST_CTIM_NSEC_SET(stbuf, attr->ctimensec); -+ stbuf->st_mode = attr->mode; -+ stbuf->st_uid = attr->uid; -+ stbuf->st_gid = attr->gid; -+ stbuf->st_size = attr->size; -+ stbuf->st_atime = attr->atime; -+ stbuf->st_mtime = attr->mtime; -+ stbuf->st_ctime = attr->ctime; -+ ST_ATIM_NSEC_SET(stbuf, attr->atimensec); -+ ST_MTIM_NSEC_SET(stbuf, attr->mtimensec); -+ ST_CTIM_NSEC_SET(stbuf, attr->ctimensec); - } - --static size_t iov_length(const struct iovec *iov, size_t count) -+static size_t iov_length(const struct iovec *iov, size_t count) - { -- size_t seg; -- size_t ret = 0; -+ size_t seg; -+ size_t ret = 0; - -- for (seg = 0; seg < count; seg++) -- ret += iov[seg].iov_len; -- return ret; -+ for (seg = 0; seg < count; seg++) { -+ ret += iov[seg].iov_len; -+ } -+ return ret; - } - - static void list_init_req(struct fuse_req *req) - { -- req->next = req; -- req->prev = req; -+ req->next = req; -+ req->prev = req; - } - - static void list_del_req(struct fuse_req *req) - { -- struct fuse_req *prev = req->prev; -- struct fuse_req *next = req->next; -- prev->next = next; -- next->prev = prev; -+ struct fuse_req *prev = req->prev; -+ struct fuse_req *next = req->next; -+ prev->next = next; -+ next->prev = prev; - } - - static void list_add_req(struct fuse_req *req, struct fuse_req *next) - { -- struct fuse_req *prev = next->prev; -- req->next = next; -- req->prev = prev; -- prev->next = req; -- next->prev = req; -+ struct fuse_req *prev = next->prev; -+ req->next = next; -+ req->prev = prev; -+ prev->next = req; -+ next->prev = req; - } - - static void destroy_req(fuse_req_t req) - { -- pthread_mutex_destroy(&req->lock); -- free(req); -+ pthread_mutex_destroy(&req->lock); -+ free(req); - } - - void fuse_free_req(fuse_req_t req) - { -- int ctr; -- struct fuse_session *se = req->se; -+ int ctr; -+ struct fuse_session *se = req->se; - -- pthread_mutex_lock(&se->lock); -- req->u.ni.func = NULL; -- req->u.ni.data = NULL; -- list_del_req(req); -- ctr = --req->ctr; -- req->ch = NULL; -- pthread_mutex_unlock(&se->lock); -- if (!ctr) -- destroy_req(req); -+ pthread_mutex_lock(&se->lock); -+ req->u.ni.func = NULL; -+ req->u.ni.data = NULL; -+ list_del_req(req); -+ ctr = --req->ctr; -+ req->ch = NULL; -+ pthread_mutex_unlock(&se->lock); -+ if (!ctr) { -+ destroy_req(req); -+ } - } - - static struct fuse_req *fuse_ll_alloc_req(struct fuse_session *se) - { -- struct fuse_req *req; -+ struct fuse_req *req; - -- req = (struct fuse_req *) calloc(1, sizeof(struct fuse_req)); -- if (req == NULL) { -- fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n"); -- } else { -- req->se = se; -- req->ctr = 1; -- list_init_req(req); -- fuse_mutex_init(&req->lock); -- } -+ req = (struct fuse_req *)calloc(1, sizeof(struct fuse_req)); -+ if (req == NULL) { -+ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n"); -+ } else { -+ req->se = se; -+ req->ctr = 1; -+ list_init_req(req); -+ fuse_mutex_init(&req->lock); -+ } - -- return req; -+ return req; - } - - /* Send data. If *ch* is NULL, send via session master fd */ - static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, -- struct iovec *iov, int count) -+ struct iovec *iov, int count) - { -- struct fuse_out_header *out = iov[0].iov_base; -+ struct fuse_out_header *out = iov[0].iov_base; - -- out->len = iov_length(iov, count); -- if (se->debug) { -- if (out->unique == 0) { -- fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", -- out->error, out->len); -- } else if (out->error) { -- fuse_log(FUSE_LOG_DEBUG, -- " unique: %llu, error: %i (%s), outsize: %i\n", -- (unsigned long long) out->unique, out->error, -- strerror(-out->error), out->len); -- } else { -- fuse_log(FUSE_LOG_DEBUG, -- " unique: %llu, success, outsize: %i\n", -- (unsigned long long) out->unique, out->len); -- } -- } -+ out->len = iov_length(iov, count); -+ if (se->debug) { -+ if (out->unique == 0) { -+ fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error, -+ out->len); -+ } else if (out->error) { -+ fuse_log(FUSE_LOG_DEBUG, -+ " unique: %llu, error: %i (%s), outsize: %i\n", -+ (unsigned long long)out->unique, out->error, -+ strerror(-out->error), out->len); -+ } else { -+ fuse_log(FUSE_LOG_DEBUG, " unique: %llu, success, outsize: %i\n", -+ (unsigned long long)out->unique, out->len); -+ } -+ } - -- abort(); /* virtio should have taken it before here */ -- return 0; -+ abort(); /* virtio should have taken it before here */ -+ return 0; - } - - - int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, -- int count) -+ int count) - { -- struct fuse_out_header out; -+ struct fuse_out_header out; - -- if (error <= -1000 || error > 0) { -- fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); -- error = -ERANGE; -- } -+ if (error <= -1000 || error > 0) { -+ fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); -+ error = -ERANGE; -+ } - -- out.unique = req->unique; -- out.error = error; -+ out.unique = req->unique; -+ out.error = error; - -- iov[0].iov_base = &out; -- iov[0].iov_len = sizeof(struct fuse_out_header); -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(struct fuse_out_header); - -- return fuse_send_msg(req->se, req->ch, iov, count); -+ return fuse_send_msg(req->se, req->ch, iov, count); - } - - static int send_reply_iov(fuse_req_t req, int error, struct iovec *iov, -- int count) -+ int count) - { -- int res; -+ int res; - -- res = fuse_send_reply_iov_nofree(req, error, iov, count); -- fuse_free_req(req); -- return res; -+ res = fuse_send_reply_iov_nofree(req, error, iov, count); -+ fuse_free_req(req); -+ return res; - } - - static int send_reply(fuse_req_t req, int error, const void *arg, -- size_t argsize) -+ size_t argsize) - { -- struct iovec iov[2]; -- int count = 1; -- if (argsize) { -- iov[1].iov_base = (void *) arg; -- iov[1].iov_len = argsize; -- count++; -- } -- return send_reply_iov(req, error, iov, count); -+ struct iovec iov[2]; -+ int count = 1; -+ if (argsize) { -+ iov[1].iov_base = (void *)arg; -+ iov[1].iov_len = argsize; -+ count++; -+ } -+ return send_reply_iov(req, error, iov, count); - } - - int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count) - { -- int res; -- struct iovec *padded_iov; -+ int res; -+ struct iovec *padded_iov; - -- padded_iov = malloc((count + 1) * sizeof(struct iovec)); -- if (padded_iov == NULL) -- return fuse_reply_err(req, ENOMEM); -+ padded_iov = malloc((count + 1) * sizeof(struct iovec)); -+ if (padded_iov == NULL) { -+ return fuse_reply_err(req, ENOMEM); -+ } - -- memcpy(padded_iov + 1, iov, count * sizeof(struct iovec)); -- count++; -+ memcpy(padded_iov + 1, iov, count * sizeof(struct iovec)); -+ count++; - -- res = send_reply_iov(req, 0, padded_iov, count); -- free(padded_iov); -+ res = send_reply_iov(req, 0, padded_iov, count); -+ free(padded_iov); - -- return res; -+ return res; - } - - --/* `buf` is allowed to be empty so that the proper size may be -- allocated by the caller */ -+/* -+ * 'buf` is allowed to be empty so that the proper size may be -+ * allocated by the caller -+ */ - size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, -- const char *name, const struct stat *stbuf, off_t off) -+ const char *name, const struct stat *stbuf, off_t off) - { -- (void)req; -- size_t namelen; -- size_t entlen; -- size_t entlen_padded; -- struct fuse_dirent *dirent; -+ (void)req; -+ size_t namelen; -+ size_t entlen; -+ size_t entlen_padded; -+ struct fuse_dirent *dirent; - -- namelen = strlen(name); -- entlen = FUSE_NAME_OFFSET + namelen; -- entlen_padded = FUSE_DIRENT_ALIGN(entlen); -+ namelen = strlen(name); -+ entlen = FUSE_NAME_OFFSET + namelen; -+ entlen_padded = FUSE_DIRENT_ALIGN(entlen); - -- if ((buf == NULL) || (entlen_padded > bufsize)) -- return entlen_padded; -+ if ((buf == NULL) || (entlen_padded > bufsize)) { -+ return entlen_padded; -+ } - -- dirent = (struct fuse_dirent*) buf; -- dirent->ino = stbuf->st_ino; -- dirent->off = off; -- dirent->namelen = namelen; -- dirent->type = (stbuf->st_mode & S_IFMT) >> 12; -- memcpy(dirent->name, name, namelen); -- memset(dirent->name + namelen, 0, entlen_padded - entlen); -+ dirent = (struct fuse_dirent *)buf; -+ dirent->ino = stbuf->st_ino; -+ dirent->off = off; -+ dirent->namelen = namelen; -+ dirent->type = (stbuf->st_mode & S_IFMT) >> 12; -+ memcpy(dirent->name, name, namelen); -+ memset(dirent->name + namelen, 0, entlen_padded - entlen); - -- return entlen_padded; -+ return entlen_padded; - } - - static void convert_statfs(const struct statvfs *stbuf, -- struct fuse_kstatfs *kstatfs) -+ struct fuse_kstatfs *kstatfs) - { -- kstatfs->bsize = stbuf->f_bsize; -- kstatfs->frsize = stbuf->f_frsize; -- kstatfs->blocks = stbuf->f_blocks; -- kstatfs->bfree = stbuf->f_bfree; -- kstatfs->bavail = stbuf->f_bavail; -- kstatfs->files = stbuf->f_files; -- kstatfs->ffree = stbuf->f_ffree; -- kstatfs->namelen = stbuf->f_namemax; -+ kstatfs->bsize = stbuf->f_bsize; -+ kstatfs->frsize = stbuf->f_frsize; -+ kstatfs->blocks = stbuf->f_blocks; -+ kstatfs->bfree = stbuf->f_bfree; -+ kstatfs->bavail = stbuf->f_bavail; -+ kstatfs->files = stbuf->f_files; -+ kstatfs->ffree = stbuf->f_ffree; -+ kstatfs->namelen = stbuf->f_namemax; - } - - static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize) - { -- return send_reply(req, 0, arg, argsize); -+ return send_reply(req, 0, arg, argsize); - } - - int fuse_reply_err(fuse_req_t req, int err) - { -- return send_reply(req, -err, NULL, 0); -+ return send_reply(req, -err, NULL, 0); - } - - void fuse_reply_none(fuse_req_t req) - { -- fuse_free_req(req); -+ fuse_free_req(req); - } - - static unsigned long calc_timeout_sec(double t) - { -- if (t > (double) ULONG_MAX) -- return ULONG_MAX; -- else if (t < 0.0) -- return 0; -- else -- return (unsigned long) t; -+ if (t > (double)ULONG_MAX) { -+ return ULONG_MAX; -+ } else if (t < 0.0) { -+ return 0; -+ } else { -+ return (unsigned long)t; -+ } - } - - static unsigned int calc_timeout_nsec(double t) - { -- double f = t - (double) calc_timeout_sec(t); -- if (f < 0.0) -- return 0; -- else if (f >= 0.999999999) -- return 999999999; -- else -- return (unsigned int) (f * 1.0e9); -+ double f = t - (double)calc_timeout_sec(t); -+ if (f < 0.0) { -+ return 0; -+ } else if (f >= 0.999999999) { -+ return 999999999; -+ } else { -+ return (unsigned int)(f * 1.0e9); -+ } - } - - static void fill_entry(struct fuse_entry_out *arg, -- const struct fuse_entry_param *e) -+ const struct fuse_entry_param *e) - { -- arg->nodeid = e->ino; -- arg->generation = e->generation; -- arg->entry_valid = calc_timeout_sec(e->entry_timeout); -- arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); -- arg->attr_valid = calc_timeout_sec(e->attr_timeout); -- arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); -- convert_stat(&e->attr, &arg->attr); -+ arg->nodeid = e->ino; -+ arg->generation = e->generation; -+ arg->entry_valid = calc_timeout_sec(e->entry_timeout); -+ arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); -+ arg->attr_valid = calc_timeout_sec(e->attr_timeout); -+ arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); -+ convert_stat(&e->attr, &arg->attr); - } - --/* `buf` is allowed to be empty so that the proper size may be -- allocated by the caller */ -+/* -+ * `buf` is allowed to be empty so that the proper size may be -+ * allocated by the caller -+ */ - size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, -- const char *name, -- const struct fuse_entry_param *e, off_t off) --{ -- (void)req; -- size_t namelen; -- size_t entlen; -- size_t entlen_padded; -- -- namelen = strlen(name); -- entlen = FUSE_NAME_OFFSET_DIRENTPLUS + namelen; -- entlen_padded = FUSE_DIRENT_ALIGN(entlen); -- if ((buf == NULL) || (entlen_padded > bufsize)) -- return entlen_padded; -- -- struct fuse_direntplus *dp = (struct fuse_direntplus *) buf; -- memset(&dp->entry_out, 0, sizeof(dp->entry_out)); -- fill_entry(&dp->entry_out, e); -- -- struct fuse_dirent *dirent = &dp->dirent; -- dirent->ino = e->attr.st_ino; -- dirent->off = off; -- dirent->namelen = namelen; -- dirent->type = (e->attr.st_mode & S_IFMT) >> 12; -- memcpy(dirent->name, name, namelen); -- memset(dirent->name + namelen, 0, entlen_padded - entlen); -- -- return entlen_padded; --} -- --static void fill_open(struct fuse_open_out *arg, -- const struct fuse_file_info *f) --{ -- arg->fh = f->fh; -- if (f->direct_io) -- arg->open_flags |= FOPEN_DIRECT_IO; -- if (f->keep_cache) -- arg->open_flags |= FOPEN_KEEP_CACHE; -- if (f->cache_readdir) -- arg->open_flags |= FOPEN_CACHE_DIR; -- if (f->nonseekable) -- arg->open_flags |= FOPEN_NONSEEKABLE; -+ const char *name, -+ const struct fuse_entry_param *e, off_t off) -+{ -+ (void)req; -+ size_t namelen; -+ size_t entlen; -+ size_t entlen_padded; -+ -+ namelen = strlen(name); -+ entlen = FUSE_NAME_OFFSET_DIRENTPLUS + namelen; -+ entlen_padded = FUSE_DIRENT_ALIGN(entlen); -+ if ((buf == NULL) || (entlen_padded > bufsize)) { -+ return entlen_padded; -+ } -+ -+ struct fuse_direntplus *dp = (struct fuse_direntplus *)buf; -+ memset(&dp->entry_out, 0, sizeof(dp->entry_out)); -+ fill_entry(&dp->entry_out, e); -+ -+ struct fuse_dirent *dirent = &dp->dirent; -+ dirent->ino = e->attr.st_ino; -+ dirent->off = off; -+ dirent->namelen = namelen; -+ dirent->type = (e->attr.st_mode & S_IFMT) >> 12; -+ memcpy(dirent->name, name, namelen); -+ memset(dirent->name + namelen, 0, entlen_padded - entlen); -+ -+ return entlen_padded; -+} -+ -+static void fill_open(struct fuse_open_out *arg, const struct fuse_file_info *f) -+{ -+ arg->fh = f->fh; -+ if (f->direct_io) { -+ arg->open_flags |= FOPEN_DIRECT_IO; -+ } -+ if (f->keep_cache) { -+ arg->open_flags |= FOPEN_KEEP_CACHE; -+ } -+ if (f->cache_readdir) { -+ arg->open_flags |= FOPEN_CACHE_DIR; -+ } -+ if (f->nonseekable) { -+ arg->open_flags |= FOPEN_NONSEEKABLE; -+ } - } - - int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) - { -- struct fuse_entry_out arg; -- size_t size = req->se->conn.proto_minor < 9 ? -- FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(arg); -+ struct fuse_entry_out arg; -+ size_t size = req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ENTRY_OUT_SIZE : -+ sizeof(arg); - -- /* before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant -- negative entry */ -- if (!e->ino && req->se->conn.proto_minor < 4) -- return fuse_reply_err(req, ENOENT); -+ /* -+ * before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant -+ * negative entry -+ */ -+ if (!e->ino && req->se->conn.proto_minor < 4) { -+ return fuse_reply_err(req, ENOENT); -+ } - -- memset(&arg, 0, sizeof(arg)); -- fill_entry(&arg, e); -- return send_reply_ok(req, &arg, size); -+ memset(&arg, 0, sizeof(arg)); -+ fill_entry(&arg, e); -+ return send_reply_ok(req, &arg, size); - } - - int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, -- const struct fuse_file_info *f) -+ const struct fuse_file_info *f) - { -- char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; -- size_t entrysize = req->se->conn.proto_minor < 9 ? -- FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(struct fuse_entry_out); -- struct fuse_entry_out *earg = (struct fuse_entry_out *) buf; -- struct fuse_open_out *oarg = (struct fuse_open_out *) (buf + entrysize); -+ char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; -+ size_t entrysize = req->se->conn.proto_minor < 9 ? -+ FUSE_COMPAT_ENTRY_OUT_SIZE : -+ sizeof(struct fuse_entry_out); -+ struct fuse_entry_out *earg = (struct fuse_entry_out *)buf; -+ struct fuse_open_out *oarg = (struct fuse_open_out *)(buf + entrysize); - -- memset(buf, 0, sizeof(buf)); -- fill_entry(earg, e); -- fill_open(oarg, f); -- return send_reply_ok(req, buf, -- entrysize + sizeof(struct fuse_open_out)); -+ memset(buf, 0, sizeof(buf)); -+ fill_entry(earg, e); -+ fill_open(oarg, f); -+ return send_reply_ok(req, buf, entrysize + sizeof(struct fuse_open_out)); - } - - int fuse_reply_attr(fuse_req_t req, const struct stat *attr, -- double attr_timeout) -+ double attr_timeout) - { -- struct fuse_attr_out arg; -- size_t size = req->se->conn.proto_minor < 9 ? -- FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); -+ struct fuse_attr_out arg; -+ size_t size = -+ req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); - -- memset(&arg, 0, sizeof(arg)); -- arg.attr_valid = calc_timeout_sec(attr_timeout); -- arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout); -- convert_stat(attr, &arg.attr); -+ memset(&arg, 0, sizeof(arg)); -+ arg.attr_valid = calc_timeout_sec(attr_timeout); -+ arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout); -+ convert_stat(attr, &arg.attr); - -- return send_reply_ok(req, &arg, size); -+ return send_reply_ok(req, &arg, size); - } - - int fuse_reply_readlink(fuse_req_t req, const char *linkname) - { -- return send_reply_ok(req, linkname, strlen(linkname)); -+ return send_reply_ok(req, linkname, strlen(linkname)); - } - - int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *f) - { -- struct fuse_open_out arg; -+ struct fuse_open_out arg; - -- memset(&arg, 0, sizeof(arg)); -- fill_open(&arg, f); -- return send_reply_ok(req, &arg, sizeof(arg)); -+ memset(&arg, 0, sizeof(arg)); -+ fill_open(&arg, f); -+ return send_reply_ok(req, &arg, sizeof(arg)); - } - - int fuse_reply_write(fuse_req_t req, size_t count) - { -- struct fuse_write_out arg; -+ struct fuse_write_out arg; - -- memset(&arg, 0, sizeof(arg)); -- arg.size = count; -+ memset(&arg, 0, sizeof(arg)); -+ arg.size = count; - -- return send_reply_ok(req, &arg, sizeof(arg)); -+ return send_reply_ok(req, &arg, sizeof(arg)); - } - - int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size) - { -- return send_reply_ok(req, buf, size); -+ return send_reply_ok(req, buf, size); - } - - static int fuse_send_data_iov_fallback(struct fuse_session *se, -- struct fuse_chan *ch, -- struct iovec *iov, int iov_count, -- struct fuse_bufvec *buf, -- size_t len) -+ struct fuse_chan *ch, struct iovec *iov, -+ int iov_count, struct fuse_bufvec *buf, -+ size_t len) - { -- /* Optimize common case */ -- if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && -- !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { -- /* FIXME: also avoid memory copy if there are multiple buffers -- but none of them contain an fd */ -+ /* Optimize common case */ -+ if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && -+ !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { -+ /* -+ * FIXME: also avoid memory copy if there are multiple buffers -+ * but none of them contain an fd -+ */ - -- iov[iov_count].iov_base = buf->buf[0].mem; -- iov[iov_count].iov_len = len; -- iov_count++; -- return fuse_send_msg(se, ch, iov, iov_count); -- } -+ iov[iov_count].iov_base = buf->buf[0].mem; -+ iov[iov_count].iov_len = len; -+ iov_count++; -+ return fuse_send_msg(se, ch, iov, iov_count); -+ } - -- abort(); /* Will have taken vhost path */ -- return 0; -+ abort(); /* Will have taken vhost path */ -+ return 0; - } - - static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, -- struct iovec *iov, int iov_count, -- struct fuse_bufvec *buf, unsigned int flags) -+ struct iovec *iov, int iov_count, -+ struct fuse_bufvec *buf, unsigned int flags) - { -- size_t len = fuse_buf_size(buf); -- (void) flags; -+ size_t len = fuse_buf_size(buf); -+ (void)flags; - -- return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); -+ return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); - } - - int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags) -+ enum fuse_buf_copy_flags flags) - { -- struct iovec iov[2]; -- struct fuse_out_header out; -- int res; -+ struct iovec iov[2]; -+ struct fuse_out_header out; -+ int res; - -- iov[0].iov_base = &out; -- iov[0].iov_len = sizeof(struct fuse_out_header); -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(struct fuse_out_header); - -- out.unique = req->unique; -- out.error = 0; -+ out.unique = req->unique; -+ out.error = 0; - -- res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); -- if (res <= 0) { -- fuse_free_req(req); -- return res; -- } else { -- return fuse_reply_err(req, res); -- } -+ res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); -+ if (res <= 0) { -+ fuse_free_req(req); -+ return res; -+ } else { -+ return fuse_reply_err(req, res); -+ } - } - - int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf) - { -- struct fuse_statfs_out arg; -- size_t size = req->se->conn.proto_minor < 4 ? -- FUSE_COMPAT_STATFS_SIZE : sizeof(arg); -+ struct fuse_statfs_out arg; -+ size_t size = -+ req->se->conn.proto_minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(arg); - -- memset(&arg, 0, sizeof(arg)); -- convert_statfs(stbuf, &arg.st); -+ memset(&arg, 0, sizeof(arg)); -+ convert_statfs(stbuf, &arg.st); - -- return send_reply_ok(req, &arg, size); -+ return send_reply_ok(req, &arg, size); - } - - int fuse_reply_xattr(fuse_req_t req, size_t count) - { -- struct fuse_getxattr_out arg; -+ struct fuse_getxattr_out arg; - -- memset(&arg, 0, sizeof(arg)); -- arg.size = count; -+ memset(&arg, 0, sizeof(arg)); -+ arg.size = count; - -- return send_reply_ok(req, &arg, sizeof(arg)); -+ return send_reply_ok(req, &arg, sizeof(arg)); - } - - int fuse_reply_lock(fuse_req_t req, const struct flock *lock) - { -- struct fuse_lk_out arg; -+ struct fuse_lk_out arg; - -- memset(&arg, 0, sizeof(arg)); -- arg.lk.type = lock->l_type; -- if (lock->l_type != F_UNLCK) { -- arg.lk.start = lock->l_start; -- if (lock->l_len == 0) -- arg.lk.end = OFFSET_MAX; -- else -- arg.lk.end = lock->l_start + lock->l_len - 1; -- } -- arg.lk.pid = lock->l_pid; -- return send_reply_ok(req, &arg, sizeof(arg)); -+ memset(&arg, 0, sizeof(arg)); -+ arg.lk.type = lock->l_type; -+ if (lock->l_type != F_UNLCK) { -+ arg.lk.start = lock->l_start; -+ if (lock->l_len == 0) { -+ arg.lk.end = OFFSET_MAX; -+ } else { -+ arg.lk.end = lock->l_start + lock->l_len - 1; -+ } -+ } -+ arg.lk.pid = lock->l_pid; -+ return send_reply_ok(req, &arg, sizeof(arg)); - } - - int fuse_reply_bmap(fuse_req_t req, uint64_t idx) - { -- struct fuse_bmap_out arg; -+ struct fuse_bmap_out arg; - -- memset(&arg, 0, sizeof(arg)); -- arg.block = idx; -+ memset(&arg, 0, sizeof(arg)); -+ arg.block = idx; - -- return send_reply_ok(req, &arg, sizeof(arg)); -+ return send_reply_ok(req, &arg, sizeof(arg)); - } - - static struct fuse_ioctl_iovec *fuse_ioctl_iovec_copy(const struct iovec *iov, -- size_t count) --{ -- struct fuse_ioctl_iovec *fiov; -- size_t i; -- -- fiov = malloc(sizeof(fiov[0]) * count); -- if (!fiov) -- return NULL; -- -- for (i = 0; i < count; i++) { -- fiov[i].base = (uintptr_t) iov[i].iov_base; -- fiov[i].len = iov[i].iov_len; -- } -- -- return fiov; --} -- --int fuse_reply_ioctl_retry(fuse_req_t req, -- const struct iovec *in_iov, size_t in_count, -- const struct iovec *out_iov, size_t out_count) --{ -- struct fuse_ioctl_out arg; -- struct fuse_ioctl_iovec *in_fiov = NULL; -- struct fuse_ioctl_iovec *out_fiov = NULL; -- struct iovec iov[4]; -- size_t count = 1; -- int res; -- -- memset(&arg, 0, sizeof(arg)); -- arg.flags |= FUSE_IOCTL_RETRY; -- arg.in_iovs = in_count; -- arg.out_iovs = out_count; -- iov[count].iov_base = &arg; -- iov[count].iov_len = sizeof(arg); -- count++; -- -- if (req->se->conn.proto_minor < 16) { -- if (in_count) { -- iov[count].iov_base = (void *)in_iov; -- iov[count].iov_len = sizeof(in_iov[0]) * in_count; -- count++; -- } -- -- if (out_count) { -- iov[count].iov_base = (void *)out_iov; -- iov[count].iov_len = sizeof(out_iov[0]) * out_count; -- count++; -- } -- } else { -- /* Can't handle non-compat 64bit ioctls on 32bit */ -- if (sizeof(void *) == 4 && req->ioctl_64bit) { -- res = fuse_reply_err(req, EINVAL); -- goto out; -- } -- -- if (in_count) { -- in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); -- if (!in_fiov) -- goto enomem; -- -- iov[count].iov_base = (void *)in_fiov; -- iov[count].iov_len = sizeof(in_fiov[0]) * in_count; -- count++; -- } -- if (out_count) { -- out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); -- if (!out_fiov) -- goto enomem; -- -- iov[count].iov_base = (void *)out_fiov; -- iov[count].iov_len = sizeof(out_fiov[0]) * out_count; -- count++; -- } -- } -- -- res = send_reply_iov(req, 0, iov, count); -+ size_t count) -+{ -+ struct fuse_ioctl_iovec *fiov; -+ size_t i; -+ -+ fiov = malloc(sizeof(fiov[0]) * count); -+ if (!fiov) { -+ return NULL; -+ } -+ -+ for (i = 0; i < count; i++) { -+ fiov[i].base = (uintptr_t)iov[i].iov_base; -+ fiov[i].len = iov[i].iov_len; -+ } -+ -+ return fiov; -+} -+ -+int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov, -+ size_t in_count, const struct iovec *out_iov, -+ size_t out_count) -+{ -+ struct fuse_ioctl_out arg; -+ struct fuse_ioctl_iovec *in_fiov = NULL; -+ struct fuse_ioctl_iovec *out_fiov = NULL; -+ struct iovec iov[4]; -+ size_t count = 1; -+ int res; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.flags |= FUSE_IOCTL_RETRY; -+ arg.in_iovs = in_count; -+ arg.out_iovs = out_count; -+ iov[count].iov_base = &arg; -+ iov[count].iov_len = sizeof(arg); -+ count++; -+ -+ if (req->se->conn.proto_minor < 16) { -+ if (in_count) { -+ iov[count].iov_base = (void *)in_iov; -+ iov[count].iov_len = sizeof(in_iov[0]) * in_count; -+ count++; -+ } -+ -+ if (out_count) { -+ iov[count].iov_base = (void *)out_iov; -+ iov[count].iov_len = sizeof(out_iov[0]) * out_count; -+ count++; -+ } -+ } else { -+ /* Can't handle non-compat 64bit ioctls on 32bit */ -+ if (sizeof(void *) == 4 && req->ioctl_64bit) { -+ res = fuse_reply_err(req, EINVAL); -+ goto out; -+ } -+ -+ if (in_count) { -+ in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); -+ if (!in_fiov) { -+ goto enomem; -+ } -+ -+ iov[count].iov_base = (void *)in_fiov; -+ iov[count].iov_len = sizeof(in_fiov[0]) * in_count; -+ count++; -+ } -+ if (out_count) { -+ out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); -+ if (!out_fiov) { -+ goto enomem; -+ } -+ -+ iov[count].iov_base = (void *)out_fiov; -+ iov[count].iov_len = sizeof(out_fiov[0]) * out_count; -+ count++; -+ } -+ } -+ -+ res = send_reply_iov(req, 0, iov, count); - out: -- free(in_fiov); -- free(out_fiov); -+ free(in_fiov); -+ free(out_fiov); - -- return res; -+ return res; - - enomem: -- res = fuse_reply_err(req, ENOMEM); -- goto out; -+ res = fuse_reply_err(req, ENOMEM); -+ goto out; - } - - int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size) - { -- struct fuse_ioctl_out arg; -- struct iovec iov[3]; -- size_t count = 1; -+ struct fuse_ioctl_out arg; -+ struct iovec iov[3]; -+ size_t count = 1; - -- memset(&arg, 0, sizeof(arg)); -- arg.result = result; -- iov[count].iov_base = &arg; -- iov[count].iov_len = sizeof(arg); -- count++; -+ memset(&arg, 0, sizeof(arg)); -+ arg.result = result; -+ iov[count].iov_base = &arg; -+ iov[count].iov_len = sizeof(arg); -+ count++; - -- if (size) { -- iov[count].iov_base = (char *) buf; -- iov[count].iov_len = size; -- count++; -- } -+ if (size) { -+ iov[count].iov_base = (char *)buf; -+ iov[count].iov_len = size; -+ count++; -+ } - -- return send_reply_iov(req, 0, iov, count); -+ return send_reply_iov(req, 0, iov, count); - } - - int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, -- int count) -+ int count) - { -- struct iovec *padded_iov; -- struct fuse_ioctl_out arg; -- int res; -+ struct iovec *padded_iov; -+ struct fuse_ioctl_out arg; -+ int res; - -- padded_iov = malloc((count + 2) * sizeof(struct iovec)); -- if (padded_iov == NULL) -- return fuse_reply_err(req, ENOMEM); -+ padded_iov = malloc((count + 2) * sizeof(struct iovec)); -+ if (padded_iov == NULL) { -+ return fuse_reply_err(req, ENOMEM); -+ } - -- memset(&arg, 0, sizeof(arg)); -- arg.result = result; -- padded_iov[1].iov_base = &arg; -- padded_iov[1].iov_len = sizeof(arg); -+ memset(&arg, 0, sizeof(arg)); -+ arg.result = result; -+ padded_iov[1].iov_base = &arg; -+ padded_iov[1].iov_len = sizeof(arg); - -- memcpy(&padded_iov[2], iov, count * sizeof(struct iovec)); -+ memcpy(&padded_iov[2], iov, count * sizeof(struct iovec)); - -- res = send_reply_iov(req, 0, padded_iov, count + 2); -- free(padded_iov); -+ res = send_reply_iov(req, 0, padded_iov, count + 2); -+ free(padded_iov); - -- return res; -+ return res; - } - - int fuse_reply_poll(fuse_req_t req, unsigned revents) - { -- struct fuse_poll_out arg; -+ struct fuse_poll_out arg; - -- memset(&arg, 0, sizeof(arg)); -- arg.revents = revents; -+ memset(&arg, 0, sizeof(arg)); -+ arg.revents = revents; - -- return send_reply_ok(req, &arg, sizeof(arg)); -+ return send_reply_ok(req, &arg, sizeof(arg)); - } - - int fuse_reply_lseek(fuse_req_t req, off_t off) - { -- struct fuse_lseek_out arg; -+ struct fuse_lseek_out arg; - -- memset(&arg, 0, sizeof(arg)); -- arg.offset = off; -+ memset(&arg, 0, sizeof(arg)); -+ arg.offset = off; - -- return send_reply_ok(req, &arg, sizeof(arg)); -+ return send_reply_ok(req, &arg, sizeof(arg)); - } - - static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- char *name = (char *) inarg; -+ char *name = (char *)inarg; - -- if (req->se->op.lookup) -- req->se->op.lookup(req, nodeid, name); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.lookup) { -+ req->se->op.lookup(req, nodeid, name); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_forget_in *arg = (struct fuse_forget_in *) inarg; -+ struct fuse_forget_in *arg = (struct fuse_forget_in *)inarg; - -- if (req->se->op.forget) -- req->se->op.forget(req, nodeid, arg->nlookup); -- else -- fuse_reply_none(req); -+ if (req->se->op.forget) { -+ req->se->op.forget(req, nodeid, arg->nlookup); -+ } else { -+ fuse_reply_none(req); -+ } - } - - static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, -- const void *inarg) -+ const void *inarg) - { -- struct fuse_batch_forget_in *arg = (void *) inarg; -- struct fuse_forget_one *param = (void *) PARAM(arg); -- unsigned int i; -+ struct fuse_batch_forget_in *arg = (void *)inarg; -+ struct fuse_forget_one *param = (void *)PARAM(arg); -+ unsigned int i; - -- (void) nodeid; -+ (void)nodeid; - -- if (req->se->op.forget_multi) { -- req->se->op.forget_multi(req, arg->count, -- (struct fuse_forget_data *) param); -- } else if (req->se->op.forget) { -- for (i = 0; i < arg->count; i++) { -- struct fuse_forget_one *forget = ¶m[i]; -- struct fuse_req *dummy_req; -+ if (req->se->op.forget_multi) { -+ req->se->op.forget_multi(req, arg->count, -+ (struct fuse_forget_data *)param); -+ } else if (req->se->op.forget) { -+ for (i = 0; i < arg->count; i++) { -+ struct fuse_forget_one *forget = ¶m[i]; -+ struct fuse_req *dummy_req; - -- dummy_req = fuse_ll_alloc_req(req->se); -- if (dummy_req == NULL) -- break; -+ dummy_req = fuse_ll_alloc_req(req->se); -+ if (dummy_req == NULL) { -+ break; -+ } - -- dummy_req->unique = req->unique; -- dummy_req->ctx = req->ctx; -- dummy_req->ch = NULL; -+ dummy_req->unique = req->unique; -+ dummy_req->ctx = req->ctx; -+ dummy_req->ch = NULL; - -- req->se->op.forget(dummy_req, forget->nodeid, -- forget->nlookup); -- } -- fuse_reply_none(req); -- } else { -- fuse_reply_none(req); -- } -+ req->se->op.forget(dummy_req, forget->nodeid, forget->nlookup); -+ } -+ fuse_reply_none(req); -+ } else { -+ fuse_reply_none(req); -+ } - } - - static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_file_info *fip = NULL; -- struct fuse_file_info fi; -+ struct fuse_file_info *fip = NULL; -+ struct fuse_file_info fi; - -- if (req->se->conn.proto_minor >= 9) { -- struct fuse_getattr_in *arg = (struct fuse_getattr_in *) inarg; -+ if (req->se->conn.proto_minor >= 9) { -+ struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; - -- if (arg->getattr_flags & FUSE_GETATTR_FH) { -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fip = &fi; -- } -- } -+ if (arg->getattr_flags & FUSE_GETATTR_FH) { -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fip = &fi; -+ } -+ } - -- if (req->se->op.getattr) -- req->se->op.getattr(req, nodeid, fip); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.getattr) { -+ req->se->op.getattr(req, nodeid, fip); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_setattr_in *arg = (struct fuse_setattr_in *) inarg; -- -- if (req->se->op.setattr) { -- struct fuse_file_info *fi = NULL; -- struct fuse_file_info fi_store; -- struct stat stbuf; -- memset(&stbuf, 0, sizeof(stbuf)); -- convert_attr(arg, &stbuf); -- if (arg->valid & FATTR_FH) { -- arg->valid &= ~FATTR_FH; -- memset(&fi_store, 0, sizeof(fi_store)); -- fi = &fi_store; -- fi->fh = arg->fh; -- } -- arg->valid &= -- FUSE_SET_ATTR_MODE | -- FUSE_SET_ATTR_UID | -- FUSE_SET_ATTR_GID | -- FUSE_SET_ATTR_SIZE | -- FUSE_SET_ATTR_ATIME | -- FUSE_SET_ATTR_MTIME | -- FUSE_SET_ATTR_ATIME_NOW | -- FUSE_SET_ATTR_MTIME_NOW | -- FUSE_SET_ATTR_CTIME; -- -- req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi); -- } else -- fuse_reply_err(req, ENOSYS); -+ struct fuse_setattr_in *arg = (struct fuse_setattr_in *)inarg; -+ -+ if (req->se->op.setattr) { -+ struct fuse_file_info *fi = NULL; -+ struct fuse_file_info fi_store; -+ struct stat stbuf; -+ memset(&stbuf, 0, sizeof(stbuf)); -+ convert_attr(arg, &stbuf); -+ if (arg->valid & FATTR_FH) { -+ arg->valid &= ~FATTR_FH; -+ memset(&fi_store, 0, sizeof(fi_store)); -+ fi = &fi_store; -+ fi->fh = arg->fh; -+ } -+ arg->valid &= FUSE_SET_ATTR_MODE | FUSE_SET_ATTR_UID | -+ FUSE_SET_ATTR_GID | FUSE_SET_ATTR_SIZE | -+ FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME | -+ FUSE_SET_ATTR_ATIME_NOW | FUSE_SET_ATTR_MTIME_NOW | -+ FUSE_SET_ATTR_CTIME; -+ -+ req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_access_in *arg = (struct fuse_access_in *) inarg; -+ struct fuse_access_in *arg = (struct fuse_access_in *)inarg; - -- if (req->se->op.access) -- req->se->op.access(req, nodeid, arg->mask); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.access) { -+ req->se->op.access(req, nodeid, arg->mask); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- (void) inarg; -+ (void)inarg; - -- if (req->se->op.readlink) -- req->se->op.readlink(req, nodeid); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.readlink) { -+ req->se->op.readlink(req, nodeid); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_mknod_in *arg = (struct fuse_mknod_in *) inarg; -- char *name = PARAM(arg); -+ struct fuse_mknod_in *arg = (struct fuse_mknod_in *)inarg; -+ char *name = PARAM(arg); - -- if (req->se->conn.proto_minor >= 12) -- req->ctx.umask = arg->umask; -- else -- name = (char *) inarg + FUSE_COMPAT_MKNOD_IN_SIZE; -+ if (req->se->conn.proto_minor >= 12) { -+ req->ctx.umask = arg->umask; -+ } else { -+ name = (char *)inarg + FUSE_COMPAT_MKNOD_IN_SIZE; -+ } - -- if (req->se->op.mknod) -- req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.mknod) { -+ req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *) inarg; -+ struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *)inarg; - -- if (req->se->conn.proto_minor >= 12) -- req->ctx.umask = arg->umask; -+ if (req->se->conn.proto_minor >= 12) { -+ req->ctx.umask = arg->umask; -+ } - -- if (req->se->op.mkdir) -- req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.mkdir) { -+ req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- char *name = (char *) inarg; -+ char *name = (char *)inarg; - -- if (req->se->op.unlink) -- req->se->op.unlink(req, nodeid, name); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.unlink) { -+ req->se->op.unlink(req, nodeid, name); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- char *name = (char *) inarg; -+ char *name = (char *)inarg; - -- if (req->se->op.rmdir) -- req->se->op.rmdir(req, nodeid, name); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.rmdir) { -+ req->se->op.rmdir(req, nodeid, name); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- char *name = (char *) inarg; -- char *linkname = ((char *) inarg) + strlen((char *) inarg) + 1; -+ char *name = (char *)inarg; -+ char *linkname = ((char *)inarg) + strlen((char *)inarg) + 1; - -- if (req->se->op.symlink) -- req->se->op.symlink(req, linkname, nodeid, name); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.symlink) { -+ req->se->op.symlink(req, linkname, nodeid, name); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_rename_in *arg = (struct fuse_rename_in *) inarg; -- char *oldname = PARAM(arg); -- char *newname = oldname + strlen(oldname) + 1; -+ struct fuse_rename_in *arg = (struct fuse_rename_in *)inarg; -+ char *oldname = PARAM(arg); -+ char *newname = oldname + strlen(oldname) + 1; - -- if (req->se->op.rename) -- req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, -- 0); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.rename) { -+ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, 0); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_rename2_in *arg = (struct fuse_rename2_in *) inarg; -- char *oldname = PARAM(arg); -- char *newname = oldname + strlen(oldname) + 1; -+ struct fuse_rename2_in *arg = (struct fuse_rename2_in *)inarg; -+ char *oldname = PARAM(arg); -+ char *newname = oldname + strlen(oldname) + 1; - -- if (req->se->op.rename) -- req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, -- arg->flags); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.rename) { -+ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, -+ arg->flags); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_link(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_link_in *arg = (struct fuse_link_in *) inarg; -+ struct fuse_link_in *arg = (struct fuse_link_in *)inarg; - -- if (req->se->op.link) -- req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.link) { -+ req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_create_in *arg = (struct fuse_create_in *) inarg; -+ struct fuse_create_in *arg = (struct fuse_create_in *)inarg; - -- if (req->se->op.create) { -- struct fuse_file_info fi; -- char *name = PARAM(arg); -+ if (req->se->op.create) { -+ struct fuse_file_info fi; -+ char *name = PARAM(arg); - -- memset(&fi, 0, sizeof(fi)); -- fi.flags = arg->flags; -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; - -- if (req->se->conn.proto_minor >= 12) -- req->ctx.umask = arg->umask; -- else -- name = (char *) inarg + sizeof(struct fuse_open_in); -+ if (req->se->conn.proto_minor >= 12) { -+ req->ctx.umask = arg->umask; -+ } else { -+ name = (char *)inarg + sizeof(struct fuse_open_in); -+ } - -- req->se->op.create(req, nodeid, name, arg->mode, &fi); -- } else -- fuse_reply_err(req, ENOSYS); -+ req->se->op.create(req, nodeid, name, arg->mode, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_open_in *arg = (struct fuse_open_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_open_in *arg = (struct fuse_open_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.flags = arg->flags; -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; - -- if (req->se->op.open) -- req->se->op.open(req, nodeid, &fi); -- else -- fuse_reply_open(req, &fi); -+ if (req->se->op.open) { -+ req->se->op.open(req, nodeid, &fi); -+ } else { -+ fuse_reply_open(req, &fi); -+ } - } - - static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_read_in *arg = (struct fuse_read_in *) inarg; -+ struct fuse_read_in *arg = (struct fuse_read_in *)inarg; - -- if (req->se->op.read) { -- struct fuse_file_info fi; -+ if (req->se->op.read) { -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- if (req->se->conn.proto_minor >= 9) { -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- } -- req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); -- } else -- fuse_reply_err(req, ENOSYS); -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ if (req->se->conn.proto_minor >= 9) { -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ } -+ req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_write_in *arg = (struct fuse_write_in *) inarg; -- struct fuse_file_info fi; -- char *param; -+ struct fuse_write_in *arg = (struct fuse_write_in *)inarg; -+ struct fuse_file_info fi; -+ char *param; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; - -- if (req->se->conn.proto_minor < 9) { -- param = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; -- } else { -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- param = PARAM(arg); -- } -+ if (req->se->conn.proto_minor < 9) { -+ param = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; -+ } else { -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ param = PARAM(arg); -+ } - -- if (req->se->op.write) -- req->se->op.write(req, nodeid, param, arg->size, -- arg->offset, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.write) { -+ req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, -- const struct fuse_buf *ibuf) --{ -- struct fuse_session *se = req->se; -- struct fuse_bufvec bufv = { -- .buf[0] = *ibuf, -- .count = 1, -- }; -- struct fuse_write_in *arg = (struct fuse_write_in *) inarg; -- struct fuse_file_info fi; -- -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; -- -- if (se->conn.proto_minor < 9) { -- bufv.buf[0].mem = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; -- bufv.buf[0].size -= sizeof(struct fuse_in_header) + -- FUSE_COMPAT_WRITE_IN_SIZE; -- assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); -- } else { -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) -- bufv.buf[0].mem = PARAM(arg); -- -- bufv.buf[0].size -= sizeof(struct fuse_in_header) + -- sizeof(struct fuse_write_in); -- } -- if (bufv.buf[0].size < arg->size) { -- fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); -- fuse_reply_err(req, EIO); -- return; -- } -- bufv.buf[0].size = arg->size; -- -- se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); -+ const struct fuse_buf *ibuf) -+{ -+ struct fuse_session *se = req->se; -+ struct fuse_bufvec bufv = { -+ .buf[0] = *ibuf, -+ .count = 1, -+ }; -+ struct fuse_write_in *arg = (struct fuse_write_in *)inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; -+ -+ if (se->conn.proto_minor < 9) { -+ bufv.buf[0].mem = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; -+ bufv.buf[0].size -= -+ sizeof(struct fuse_in_header) + FUSE_COMPAT_WRITE_IN_SIZE; -+ assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); -+ } else { -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { -+ bufv.buf[0].mem = PARAM(arg); -+ } -+ -+ bufv.buf[0].size -= -+ sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); -+ } -+ if (bufv.buf[0].size < arg->size) { -+ fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); -+ fuse_reply_err(req, EIO); -+ return; -+ } -+ bufv.buf[0].size = arg->size; -+ -+ se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); - } - - static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_flush_in *arg = (struct fuse_flush_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_flush_in *arg = (struct fuse_flush_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fi.flush = 1; -- if (req->se->conn.proto_minor >= 7) -- fi.lock_owner = arg->lock_owner; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.flush = 1; -+ if (req->se->conn.proto_minor >= 7) { -+ fi.lock_owner = arg->lock_owner; -+ } - -- if (req->se->op.flush) -- req->se->op.flush(req, nodeid, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.flush) { -+ req->se->op.flush(req, nodeid, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_release_in *arg = (struct fuse_release_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_release_in *arg = (struct fuse_release_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.flags = arg->flags; -- fi.fh = arg->fh; -- if (req->se->conn.proto_minor >= 8) { -- fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; -- fi.lock_owner = arg->lock_owner; -- } -- if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { -- fi.flock_release = 1; -- fi.lock_owner = arg->lock_owner; -- } -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; -+ fi.fh = arg->fh; -+ if (req->se->conn.proto_minor >= 8) { -+ fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; -+ fi.lock_owner = arg->lock_owner; -+ } -+ if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { -+ fi.flock_release = 1; -+ fi.lock_owner = arg->lock_owner; -+ } - -- if (req->se->op.release) -- req->se->op.release(req, nodeid, &fi); -- else -- fuse_reply_err(req, 0); -+ if (req->se->op.release) { -+ req->se->op.release(req, nodeid, &fi); -+ } else { -+ fuse_reply_err(req, 0); -+ } - } - - static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; -- struct fuse_file_info fi; -- int datasync = arg->fsync_flags & 1; -+ struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; -+ struct fuse_file_info fi; -+ int datasync = arg->fsync_flags & 1; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; - -- if (req->se->op.fsync) -- req->se->op.fsync(req, nodeid, datasync, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.fsync) { -+ req->se->op.fsync(req, nodeid, datasync, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_open_in *arg = (struct fuse_open_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_open_in *arg = (struct fuse_open_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.flags = arg->flags; -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; - -- if (req->se->op.opendir) -- req->se->op.opendir(req, nodeid, &fi); -- else -- fuse_reply_open(req, &fi); -+ if (req->se->op.opendir) { -+ req->se->op.opendir(req, nodeid, &fi); -+ } else { -+ fuse_reply_open(req, &fi); -+ } - } - - static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_read_in *arg = (struct fuse_read_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_read_in *arg = (struct fuse_read_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; - -- if (req->se->op.readdir) -- req->se->op.readdir(req, nodeid, arg->size, arg->offset, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.readdir) { -+ req->se->op.readdir(req, nodeid, arg->size, arg->offset, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_read_in *arg = (struct fuse_read_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_read_in *arg = (struct fuse_read_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; - -- if (req->se->op.readdirplus) -- req->se->op.readdirplus(req, nodeid, arg->size, arg->offset, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.readdirplus) { -+ req->se->op.readdirplus(req, nodeid, arg->size, arg->offset, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_release_in *arg = (struct fuse_release_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_release_in *arg = (struct fuse_release_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.flags = arg->flags; -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; -+ fi.fh = arg->fh; - -- if (req->se->op.releasedir) -- req->se->op.releasedir(req, nodeid, &fi); -- else -- fuse_reply_err(req, 0); -+ if (req->se->op.releasedir) { -+ req->se->op.releasedir(req, nodeid, &fi); -+ } else { -+ fuse_reply_err(req, 0); -+ } - } - - static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; -- struct fuse_file_info fi; -- int datasync = arg->fsync_flags & 1; -+ struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; -+ struct fuse_file_info fi; -+ int datasync = arg->fsync_flags & 1; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; - -- if (req->se->op.fsyncdir) -- req->se->op.fsyncdir(req, nodeid, datasync, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.fsyncdir) { -+ req->se->op.fsyncdir(req, nodeid, datasync, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- (void) nodeid; -- (void) inarg; -+ (void)nodeid; -+ (void)inarg; - -- if (req->se->op.statfs) -- req->se->op.statfs(req, nodeid); -- else { -- struct statvfs buf = { -- .f_namemax = 255, -- .f_bsize = 512, -- }; -- fuse_reply_statfs(req, &buf); -- } -+ if (req->se->op.statfs) { -+ req->se->op.statfs(req, nodeid); -+ } else { -+ struct statvfs buf = { -+ .f_namemax = 255, -+ .f_bsize = 512, -+ }; -+ fuse_reply_statfs(req, &buf); -+ } - } - - static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *) inarg; -- char *name = PARAM(arg); -- char *value = name + strlen(name) + 1; -+ struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *)inarg; -+ char *name = PARAM(arg); -+ char *value = name + strlen(name) + 1; - -- if (req->se->op.setxattr) -- req->se->op.setxattr(req, nodeid, name, value, arg->size, -- arg->flags); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.setxattr) { -+ req->se->op.setxattr(req, nodeid, name, value, arg->size, arg->flags); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; -+ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; - -- if (req->se->op.getxattr) -- req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.getxattr) { -+ req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; -+ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; - -- if (req->se->op.listxattr) -- req->se->op.listxattr(req, nodeid, arg->size); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.listxattr) { -+ req->se->op.listxattr(req, nodeid, arg->size); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- char *name = (char *) inarg; -+ char *name = (char *)inarg; - -- if (req->se->op.removexattr) -- req->se->op.removexattr(req, nodeid, name); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.removexattr) { -+ req->se->op.removexattr(req, nodeid, name); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void convert_fuse_file_lock(struct fuse_file_lock *fl, -- struct flock *flock) -+ struct flock *flock) - { -- memset(flock, 0, sizeof(struct flock)); -- flock->l_type = fl->type; -- flock->l_whence = SEEK_SET; -- flock->l_start = fl->start; -- if (fl->end == OFFSET_MAX) -- flock->l_len = 0; -- else -- flock->l_len = fl->end - fl->start + 1; -- flock->l_pid = fl->pid; -+ memset(flock, 0, sizeof(struct flock)); -+ flock->l_type = fl->type; -+ flock->l_whence = SEEK_SET; -+ flock->l_start = fl->start; -+ if (fl->end == OFFSET_MAX) { -+ flock->l_len = 0; -+ } else { -+ flock->l_len = fl->end - fl->start + 1; -+ } -+ flock->l_pid = fl->pid; - } - - static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; -- struct fuse_file_info fi; -- struct flock flock; -+ struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; -+ struct fuse_file_info fi; -+ struct flock flock; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fi.lock_owner = arg->owner; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.lock_owner = arg->owner; - -- convert_fuse_file_lock(&arg->lk, &flock); -- if (req->se->op.getlk) -- req->se->op.getlk(req, nodeid, &fi, &flock); -- else -- fuse_reply_err(req, ENOSYS); -+ convert_fuse_file_lock(&arg->lk, &flock); -+ if (req->se->op.getlk) { -+ req->se->op.getlk(req, nodeid, &fi, &flock); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, -- const void *inarg, int sleep) --{ -- struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; -- struct fuse_file_info fi; -- struct flock flock; -- -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fi.lock_owner = arg->owner; -- -- if (arg->lk_flags & FUSE_LK_FLOCK) { -- int op = 0; -- -- switch (arg->lk.type) { -- case F_RDLCK: -- op = LOCK_SH; -- break; -- case F_WRLCK: -- op = LOCK_EX; -- break; -- case F_UNLCK: -- op = LOCK_UN; -- break; -- } -- if (!sleep) -- op |= LOCK_NB; -- -- if (req->se->op.flock) -- req->se->op.flock(req, nodeid, &fi, op); -- else -- fuse_reply_err(req, ENOSYS); -- } else { -- convert_fuse_file_lock(&arg->lk, &flock); -- if (req->se->op.setlk) -- req->se->op.setlk(req, nodeid, &fi, &flock, sleep); -- else -- fuse_reply_err(req, ENOSYS); -- } -+ const void *inarg, int sleep) -+{ -+ struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; -+ struct fuse_file_info fi; -+ struct flock flock; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.lock_owner = arg->owner; -+ -+ if (arg->lk_flags & FUSE_LK_FLOCK) { -+ int op = 0; -+ -+ switch (arg->lk.type) { -+ case F_RDLCK: -+ op = LOCK_SH; -+ break; -+ case F_WRLCK: -+ op = LOCK_EX; -+ break; -+ case F_UNLCK: -+ op = LOCK_UN; -+ break; -+ } -+ if (!sleep) { -+ op |= LOCK_NB; -+ } -+ -+ if (req->se->op.flock) { -+ req->se->op.flock(req, nodeid, &fi, op); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } -+ } else { -+ convert_fuse_file_lock(&arg->lk, &flock); -+ if (req->se->op.setlk) { -+ req->se->op.setlk(req, nodeid, &fi, &flock, sleep); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } -+ } - } - - static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- do_setlk_common(req, nodeid, inarg, 0); -+ do_setlk_common(req, nodeid, inarg, 0); - } - - static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- do_setlk_common(req, nodeid, inarg, 1); -+ do_setlk_common(req, nodeid, inarg, 1); - } - - static int find_interrupted(struct fuse_session *se, struct fuse_req *req) - { -- struct fuse_req *curr; -- -- for (curr = se->list.next; curr != &se->list; curr = curr->next) { -- if (curr->unique == req->u.i.unique) { -- fuse_interrupt_func_t func; -- void *data; -- -- curr->ctr++; -- pthread_mutex_unlock(&se->lock); -- -- /* Ugh, ugly locking */ -- pthread_mutex_lock(&curr->lock); -- pthread_mutex_lock(&se->lock); -- curr->interrupted = 1; -- func = curr->u.ni.func; -- data = curr->u.ni.data; -- pthread_mutex_unlock(&se->lock); -- if (func) -- func(curr, data); -- pthread_mutex_unlock(&curr->lock); -- -- pthread_mutex_lock(&se->lock); -- curr->ctr--; -- if (!curr->ctr) -- destroy_req(curr); -- -- return 1; -- } -- } -- for (curr = se->interrupts.next; curr != &se->interrupts; -- curr = curr->next) { -- if (curr->u.i.unique == req->u.i.unique) -- return 1; -- } -- return 0; -+ struct fuse_req *curr; -+ -+ for (curr = se->list.next; curr != &se->list; curr = curr->next) { -+ if (curr->unique == req->u.i.unique) { -+ fuse_interrupt_func_t func; -+ void *data; -+ -+ curr->ctr++; -+ pthread_mutex_unlock(&se->lock); -+ -+ /* Ugh, ugly locking */ -+ pthread_mutex_lock(&curr->lock); -+ pthread_mutex_lock(&se->lock); -+ curr->interrupted = 1; -+ func = curr->u.ni.func; -+ data = curr->u.ni.data; -+ pthread_mutex_unlock(&se->lock); -+ if (func) { -+ func(curr, data); -+ } -+ pthread_mutex_unlock(&curr->lock); -+ -+ pthread_mutex_lock(&se->lock); -+ curr->ctr--; -+ if (!curr->ctr) { -+ destroy_req(curr); -+ } -+ -+ return 1; -+ } -+ } -+ for (curr = se->interrupts.next; curr != &se->interrupts; -+ curr = curr->next) { -+ if (curr->u.i.unique == req->u.i.unique) { -+ return 1; -+ } -+ } -+ return 0; - } - - static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *) inarg; -- struct fuse_session *se = req->se; -+ struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *)inarg; -+ struct fuse_session *se = req->se; - -- (void) nodeid; -- if (se->debug) -- fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", -- (unsigned long long) arg->unique); -+ (void)nodeid; -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", -+ (unsigned long long)arg->unique); -+ } - -- req->u.i.unique = arg->unique; -+ req->u.i.unique = arg->unique; - -- pthread_mutex_lock(&se->lock); -- if (find_interrupted(se, req)) -- destroy_req(req); -- else -- list_add_req(req, &se->interrupts); -- pthread_mutex_unlock(&se->lock); -+ pthread_mutex_lock(&se->lock); -+ if (find_interrupted(se, req)) { -+ destroy_req(req); -+ } else { -+ list_add_req(req, &se->interrupts); -+ } -+ pthread_mutex_unlock(&se->lock); - } - - static struct fuse_req *check_interrupt(struct fuse_session *se, -- struct fuse_req *req) --{ -- struct fuse_req *curr; -- -- for (curr = se->interrupts.next; curr != &se->interrupts; -- curr = curr->next) { -- if (curr->u.i.unique == req->unique) { -- req->interrupted = 1; -- list_del_req(curr); -- free(curr); -- return NULL; -- } -- } -- curr = se->interrupts.next; -- if (curr != &se->interrupts) { -- list_del_req(curr); -- list_init_req(curr); -- return curr; -- } else -- return NULL; -+ struct fuse_req *req) -+{ -+ struct fuse_req *curr; -+ -+ for (curr = se->interrupts.next; curr != &se->interrupts; -+ curr = curr->next) { -+ if (curr->u.i.unique == req->unique) { -+ req->interrupted = 1; -+ list_del_req(curr); -+ free(curr); -+ return NULL; -+ } -+ } -+ curr = se->interrupts.next; -+ if (curr != &se->interrupts) { -+ list_del_req(curr); -+ list_init_req(curr); -+ return curr; -+ } else { -+ return NULL; -+ } - } - - static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_bmap_in *arg = (struct fuse_bmap_in *) inarg; -+ struct fuse_bmap_in *arg = (struct fuse_bmap_in *)inarg; - -- if (req->se->op.bmap) -- req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.bmap) { -+ req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *) inarg; -- unsigned int flags = arg->flags; -- void *in_buf = arg->in_size ? PARAM(arg) : NULL; -- struct fuse_file_info fi; -+ struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *)inarg; -+ unsigned int flags = arg->flags; -+ void *in_buf = arg->in_size ? PARAM(arg) : NULL; -+ struct fuse_file_info fi; - -- if (flags & FUSE_IOCTL_DIR && -- !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { -- fuse_reply_err(req, ENOTTY); -- return; -- } -+ if (flags & FUSE_IOCTL_DIR && !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { -+ fuse_reply_err(req, ENOTTY); -+ return; -+ } - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; - -- if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && -- !(flags & FUSE_IOCTL_32BIT)) { -- req->ioctl_64bit = 1; -- } -+ if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && -+ !(flags & FUSE_IOCTL_32BIT)) { -+ req->ioctl_64bit = 1; -+ } - -- if (req->se->op.ioctl) -- req->se->op.ioctl(req, nodeid, arg->cmd, -- (void *)(uintptr_t)arg->arg, &fi, flags, -- in_buf, arg->in_size, arg->out_size); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.ioctl) { -+ req->se->op.ioctl(req, nodeid, arg->cmd, (void *)(uintptr_t)arg->arg, -+ &fi, flags, in_buf, arg->in_size, arg->out_size); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - void fuse_pollhandle_destroy(struct fuse_pollhandle *ph) - { -- free(ph); -+ free(ph); - } - - static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_poll_in *arg = (struct fuse_poll_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_poll_in *arg = (struct fuse_poll_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fi.poll_events = arg->events; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.poll_events = arg->events; - -- if (req->se->op.poll) { -- struct fuse_pollhandle *ph = NULL; -+ if (req->se->op.poll) { -+ struct fuse_pollhandle *ph = NULL; - -- if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) { -- ph = malloc(sizeof(struct fuse_pollhandle)); -- if (ph == NULL) { -- fuse_reply_err(req, ENOMEM); -- return; -- } -- ph->kh = arg->kh; -- ph->se = req->se; -- } -+ if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) { -+ ph = malloc(sizeof(struct fuse_pollhandle)); -+ if (ph == NULL) { -+ fuse_reply_err(req, ENOMEM); -+ return; -+ } -+ ph->kh = arg->kh; -+ ph->se = req->se; -+ } - -- req->se->op.poll(req, nodeid, &fi, ph); -- } else { -- fuse_reply_err(req, ENOSYS); -- } -+ req->se->op.poll(req, nodeid, &fi, ph); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; - -- if (req->se->op.fallocate) -- req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.fallocate) { -+ req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length, -+ &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - --static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, const void *inarg) -+static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, -+ const void *inarg) - { -- struct fuse_copy_file_range_in *arg = (struct fuse_copy_file_range_in *) inarg; -- struct fuse_file_info fi_in, fi_out; -+ struct fuse_copy_file_range_in *arg = -+ (struct fuse_copy_file_range_in *)inarg; -+ struct fuse_file_info fi_in, fi_out; - -- memset(&fi_in, 0, sizeof(fi_in)); -- fi_in.fh = arg->fh_in; -+ memset(&fi_in, 0, sizeof(fi_in)); -+ fi_in.fh = arg->fh_in; - -- memset(&fi_out, 0, sizeof(fi_out)); -- fi_out.fh = arg->fh_out; -+ memset(&fi_out, 0, sizeof(fi_out)); -+ fi_out.fh = arg->fh_out; - - -- if (req->se->op.copy_file_range) -- req->se->op.copy_file_range(req, nodeid_in, arg->off_in, -- &fi_in, arg->nodeid_out, -- arg->off_out, &fi_out, arg->len, -- arg->flags); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.copy_file_range) { -+ req->se->op.copy_file_range(req, nodeid_in, arg->off_in, &fi_in, -+ arg->nodeid_out, arg->off_out, &fi_out, -+ arg->len, arg->flags); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_lseek_in *arg = (struct fuse_lseek_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_lseek_in *arg = (struct fuse_lseek_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; - -- if (req->se->op.lseek) -- req->se->op.lseek(req, nodeid, arg->offset, arg->whence, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.lseek) { -+ req->se->op.lseek(req, nodeid, arg->offset, arg->whence, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_init_in *arg = (struct fuse_init_in *) inarg; -- struct fuse_init_out outarg; -- struct fuse_session *se = req->se; -- size_t bufsize = se->bufsize; -- size_t outargsize = sizeof(outarg); -- -- (void) nodeid; -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); -- if (arg->major == 7 && arg->minor >= 6) { -- fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); -- fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", -- arg->max_readahead); -- } -- } -- se->conn.proto_major = arg->major; -- se->conn.proto_minor = arg->minor; -- se->conn.capable = 0; -- se->conn.want = 0; -- -- memset(&outarg, 0, sizeof(outarg)); -- outarg.major = FUSE_KERNEL_VERSION; -- outarg.minor = FUSE_KERNEL_MINOR_VERSION; -- -- if (arg->major < 7) { -- fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", -- arg->major, arg->minor); -- fuse_reply_err(req, EPROTO); -- return; -- } -- -- if (arg->major > 7) { -- /* Wait for a second INIT request with a 7.X version */ -- send_reply_ok(req, &outarg, sizeof(outarg)); -- return; -- } -- -- if (arg->minor >= 6) { -- if (arg->max_readahead < se->conn.max_readahead) -- se->conn.max_readahead = arg->max_readahead; -- if (arg->flags & FUSE_ASYNC_READ) -- se->conn.capable |= FUSE_CAP_ASYNC_READ; -- if (arg->flags & FUSE_POSIX_LOCKS) -- se->conn.capable |= FUSE_CAP_POSIX_LOCKS; -- if (arg->flags & FUSE_ATOMIC_O_TRUNC) -- se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; -- if (arg->flags & FUSE_EXPORT_SUPPORT) -- se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; -- if (arg->flags & FUSE_DONT_MASK) -- se->conn.capable |= FUSE_CAP_DONT_MASK; -- if (arg->flags & FUSE_FLOCK_LOCKS) -- se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; -- if (arg->flags & FUSE_AUTO_INVAL_DATA) -- se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; -- if (arg->flags & FUSE_DO_READDIRPLUS) -- se->conn.capable |= FUSE_CAP_READDIRPLUS; -- if (arg->flags & FUSE_READDIRPLUS_AUTO) -- se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; -- if (arg->flags & FUSE_ASYNC_DIO) -- se->conn.capable |= FUSE_CAP_ASYNC_DIO; -- if (arg->flags & FUSE_WRITEBACK_CACHE) -- se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; -- if (arg->flags & FUSE_NO_OPEN_SUPPORT) -- se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; -- if (arg->flags & FUSE_PARALLEL_DIROPS) -- se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; -- if (arg->flags & FUSE_POSIX_ACL) -- se->conn.capable |= FUSE_CAP_POSIX_ACL; -- if (arg->flags & FUSE_HANDLE_KILLPRIV) -- se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; -- if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) -- se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; -- if (!(arg->flags & FUSE_MAX_PAGES)) { -- size_t max_bufsize = -- FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() -- + FUSE_BUFFER_HEADER_SIZE; -- if (bufsize > max_bufsize) { -- bufsize = max_bufsize; -- } -- } -- } else { -- se->conn.max_readahead = 0; -- } -- -- if (se->conn.proto_minor >= 14) { -+ struct fuse_init_in *arg = (struct fuse_init_in *)inarg; -+ struct fuse_init_out outarg; -+ struct fuse_session *se = req->se; -+ size_t bufsize = se->bufsize; -+ size_t outargsize = sizeof(outarg); -+ -+ (void)nodeid; -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); -+ if (arg->major == 7 && arg->minor >= 6) { -+ fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); -+ fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", -+ arg->max_readahead); -+ } -+ } -+ se->conn.proto_major = arg->major; -+ se->conn.proto_minor = arg->minor; -+ se->conn.capable = 0; -+ se->conn.want = 0; -+ -+ memset(&outarg, 0, sizeof(outarg)); -+ outarg.major = FUSE_KERNEL_VERSION; -+ outarg.minor = FUSE_KERNEL_MINOR_VERSION; -+ -+ if (arg->major < 7) { -+ fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", -+ arg->major, arg->minor); -+ fuse_reply_err(req, EPROTO); -+ return; -+ } -+ -+ if (arg->major > 7) { -+ /* Wait for a second INIT request with a 7.X version */ -+ send_reply_ok(req, &outarg, sizeof(outarg)); -+ return; -+ } -+ -+ if (arg->minor >= 6) { -+ if (arg->max_readahead < se->conn.max_readahead) { -+ se->conn.max_readahead = arg->max_readahead; -+ } -+ if (arg->flags & FUSE_ASYNC_READ) { -+ se->conn.capable |= FUSE_CAP_ASYNC_READ; -+ } -+ if (arg->flags & FUSE_POSIX_LOCKS) { -+ se->conn.capable |= FUSE_CAP_POSIX_LOCKS; -+ } -+ if (arg->flags & FUSE_ATOMIC_O_TRUNC) { -+ se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; -+ } -+ if (arg->flags & FUSE_EXPORT_SUPPORT) { -+ se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; -+ } -+ if (arg->flags & FUSE_DONT_MASK) { -+ se->conn.capable |= FUSE_CAP_DONT_MASK; -+ } -+ if (arg->flags & FUSE_FLOCK_LOCKS) { -+ se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; -+ } -+ if (arg->flags & FUSE_AUTO_INVAL_DATA) { -+ se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; -+ } -+ if (arg->flags & FUSE_DO_READDIRPLUS) { -+ se->conn.capable |= FUSE_CAP_READDIRPLUS; -+ } -+ if (arg->flags & FUSE_READDIRPLUS_AUTO) { -+ se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; -+ } -+ if (arg->flags & FUSE_ASYNC_DIO) { -+ se->conn.capable |= FUSE_CAP_ASYNC_DIO; -+ } -+ if (arg->flags & FUSE_WRITEBACK_CACHE) { -+ se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; -+ } -+ if (arg->flags & FUSE_NO_OPEN_SUPPORT) { -+ se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; -+ } -+ if (arg->flags & FUSE_PARALLEL_DIROPS) { -+ se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; -+ } -+ if (arg->flags & FUSE_POSIX_ACL) { -+ se->conn.capable |= FUSE_CAP_POSIX_ACL; -+ } -+ if (arg->flags & FUSE_HANDLE_KILLPRIV) { -+ se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; -+ } -+ if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) { -+ se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; -+ } -+ if (!(arg->flags & FUSE_MAX_PAGES)) { -+ size_t max_bufsize = -+ FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() + -+ FUSE_BUFFER_HEADER_SIZE; -+ if (bufsize > max_bufsize) { -+ bufsize = max_bufsize; -+ } -+ } -+ } else { -+ se->conn.max_readahead = 0; -+ } -+ -+ if (se->conn.proto_minor >= 14) { - #ifdef HAVE_SPLICE - #ifdef HAVE_VMSPLICE -- se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; -+ se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; - #endif -- se->conn.capable |= FUSE_CAP_SPLICE_READ; -+ se->conn.capable |= FUSE_CAP_SPLICE_READ; - #endif -- } -- if (se->conn.proto_minor >= 18) -- se->conn.capable |= FUSE_CAP_IOCTL_DIR; -- -- /* Default settings for modern filesystems. -- * -- * Most of these capabilities were disabled by default in -- * libfuse2 for backwards compatibility reasons. In libfuse3, -- * we can finally enable them by default (as long as they're -- * supported by the kernel). -- */ --#define LL_SET_DEFAULT(cond, cap) \ -- if ((cond) && (se->conn.capable & (cap))) \ -- se->conn.want |= (cap) -- LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_READ); -- LL_SET_DEFAULT(1, FUSE_CAP_PARALLEL_DIROPS); -- LL_SET_DEFAULT(1, FUSE_CAP_AUTO_INVAL_DATA); -- LL_SET_DEFAULT(1, FUSE_CAP_HANDLE_KILLPRIV); -- LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO); -- LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR); -- LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC); -- LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ); -- LL_SET_DEFAULT(se->op.getlk && se->op.setlk, -- FUSE_CAP_POSIX_LOCKS); -- LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS); -- LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS); -- LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir, -- FUSE_CAP_READDIRPLUS_AUTO); -- se->conn.time_gran = 1; -- -- if (bufsize < FUSE_MIN_READ_BUFFER) { -- fuse_log(FUSE_LOG_ERR, "fuse: warning: buffer size too small: %zu\n", -- bufsize); -- bufsize = FUSE_MIN_READ_BUFFER; -- } -- se->bufsize = bufsize; -- -- if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE) -- se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE; -- -- se->got_init = 1; -- if (se->op.init) -- se->op.init(se->userdata, &se->conn); -- -- if (se->conn.want & (~se->conn.capable)) { -- fuse_log(FUSE_LOG_ERR, "fuse: error: filesystem requested capabilities " -- "0x%x that are not supported by kernel, aborting.\n", -- se->conn.want & (~se->conn.capable)); -- fuse_reply_err(req, EPROTO); -- se->error = -EPROTO; -- fuse_session_exit(se); -- return; -- } -- -- if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { -- se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; -- } -- if (arg->flags & FUSE_MAX_PAGES) { -- outarg.flags |= FUSE_MAX_PAGES; -- outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1; -- } -- -- /* Always enable big writes, this is superseded -- by the max_write option */ -- outarg.flags |= FUSE_BIG_WRITES; -- -- if (se->conn.want & FUSE_CAP_ASYNC_READ) -- outarg.flags |= FUSE_ASYNC_READ; -- if (se->conn.want & FUSE_CAP_POSIX_LOCKS) -- outarg.flags |= FUSE_POSIX_LOCKS; -- if (se->conn.want & FUSE_CAP_ATOMIC_O_TRUNC) -- outarg.flags |= FUSE_ATOMIC_O_TRUNC; -- if (se->conn.want & FUSE_CAP_EXPORT_SUPPORT) -- outarg.flags |= FUSE_EXPORT_SUPPORT; -- if (se->conn.want & FUSE_CAP_DONT_MASK) -- outarg.flags |= FUSE_DONT_MASK; -- if (se->conn.want & FUSE_CAP_FLOCK_LOCKS) -- outarg.flags |= FUSE_FLOCK_LOCKS; -- if (se->conn.want & FUSE_CAP_AUTO_INVAL_DATA) -- outarg.flags |= FUSE_AUTO_INVAL_DATA; -- if (se->conn.want & FUSE_CAP_READDIRPLUS) -- outarg.flags |= FUSE_DO_READDIRPLUS; -- if (se->conn.want & FUSE_CAP_READDIRPLUS_AUTO) -- outarg.flags |= FUSE_READDIRPLUS_AUTO; -- if (se->conn.want & FUSE_CAP_ASYNC_DIO) -- outarg.flags |= FUSE_ASYNC_DIO; -- if (se->conn.want & FUSE_CAP_WRITEBACK_CACHE) -- outarg.flags |= FUSE_WRITEBACK_CACHE; -- if (se->conn.want & FUSE_CAP_POSIX_ACL) -- outarg.flags |= FUSE_POSIX_ACL; -- outarg.max_readahead = se->conn.max_readahead; -- outarg.max_write = se->conn.max_write; -- if (se->conn.proto_minor >= 13) { -- if (se->conn.max_background >= (1 << 16)) -- se->conn.max_background = (1 << 16) - 1; -- if (se->conn.congestion_threshold > se->conn.max_background) -- se->conn.congestion_threshold = se->conn.max_background; -- if (!se->conn.congestion_threshold) { -- se->conn.congestion_threshold = -- se->conn.max_background * 3 / 4; -- } -- -- outarg.max_background = se->conn.max_background; -- outarg.congestion_threshold = se->conn.congestion_threshold; -- } -- if (se->conn.proto_minor >= 23) -- outarg.time_gran = se->conn.time_gran; -- -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor); -- fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); -- fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", -- outarg.max_readahead); -- fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); -- fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", -- outarg.max_background); -- fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", -- outarg.congestion_threshold); -- fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", -- outarg.time_gran); -- } -- if (arg->minor < 5) -- outargsize = FUSE_COMPAT_INIT_OUT_SIZE; -- else if (arg->minor < 23) -- outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; -- -- send_reply_ok(req, &outarg, outargsize); -+ } -+ if (se->conn.proto_minor >= 18) { -+ se->conn.capable |= FUSE_CAP_IOCTL_DIR; -+ } -+ -+ /* -+ * Default settings for modern filesystems. -+ * -+ * Most of these capabilities were disabled by default in -+ * libfuse2 for backwards compatibility reasons. In libfuse3, -+ * we can finally enable them by default (as long as they're -+ * supported by the kernel). -+ */ -+#define LL_SET_DEFAULT(cond, cap) \ -+ if ((cond) && (se->conn.capable & (cap))) \ -+ se->conn.want |= (cap) -+ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_READ); -+ LL_SET_DEFAULT(1, FUSE_CAP_PARALLEL_DIROPS); -+ LL_SET_DEFAULT(1, FUSE_CAP_AUTO_INVAL_DATA); -+ LL_SET_DEFAULT(1, FUSE_CAP_HANDLE_KILLPRIV); -+ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO); -+ LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR); -+ LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC); -+ LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ); -+ LL_SET_DEFAULT(se->op.getlk && se->op.setlk, FUSE_CAP_POSIX_LOCKS); -+ LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS); -+ LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS); -+ LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir, -+ FUSE_CAP_READDIRPLUS_AUTO); -+ se->conn.time_gran = 1; -+ -+ if (bufsize < FUSE_MIN_READ_BUFFER) { -+ fuse_log(FUSE_LOG_ERR, "fuse: warning: buffer size too small: %zu\n", -+ bufsize); -+ bufsize = FUSE_MIN_READ_BUFFER; -+ } -+ se->bufsize = bufsize; -+ -+ if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE) { -+ se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE; -+ } -+ -+ se->got_init = 1; -+ if (se->op.init) { -+ se->op.init(se->userdata, &se->conn); -+ } -+ -+ if (se->conn.want & (~se->conn.capable)) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: error: filesystem requested capabilities " -+ "0x%x that are not supported by kernel, aborting.\n", -+ se->conn.want & (~se->conn.capable)); -+ fuse_reply_err(req, EPROTO); -+ se->error = -EPROTO; -+ fuse_session_exit(se); -+ return; -+ } -+ -+ if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { -+ se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; -+ } -+ if (arg->flags & FUSE_MAX_PAGES) { -+ outarg.flags |= FUSE_MAX_PAGES; -+ outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1; -+ } -+ -+ /* -+ * Always enable big writes, this is superseded -+ * by the max_write option -+ */ -+ outarg.flags |= FUSE_BIG_WRITES; -+ -+ if (se->conn.want & FUSE_CAP_ASYNC_READ) { -+ outarg.flags |= FUSE_ASYNC_READ; -+ } -+ if (se->conn.want & FUSE_CAP_POSIX_LOCKS) { -+ outarg.flags |= FUSE_POSIX_LOCKS; -+ } -+ if (se->conn.want & FUSE_CAP_ATOMIC_O_TRUNC) { -+ outarg.flags |= FUSE_ATOMIC_O_TRUNC; -+ } -+ if (se->conn.want & FUSE_CAP_EXPORT_SUPPORT) { -+ outarg.flags |= FUSE_EXPORT_SUPPORT; -+ } -+ if (se->conn.want & FUSE_CAP_DONT_MASK) { -+ outarg.flags |= FUSE_DONT_MASK; -+ } -+ if (se->conn.want & FUSE_CAP_FLOCK_LOCKS) { -+ outarg.flags |= FUSE_FLOCK_LOCKS; -+ } -+ if (se->conn.want & FUSE_CAP_AUTO_INVAL_DATA) { -+ outarg.flags |= FUSE_AUTO_INVAL_DATA; -+ } -+ if (se->conn.want & FUSE_CAP_READDIRPLUS) { -+ outarg.flags |= FUSE_DO_READDIRPLUS; -+ } -+ if (se->conn.want & FUSE_CAP_READDIRPLUS_AUTO) { -+ outarg.flags |= FUSE_READDIRPLUS_AUTO; -+ } -+ if (se->conn.want & FUSE_CAP_ASYNC_DIO) { -+ outarg.flags |= FUSE_ASYNC_DIO; -+ } -+ if (se->conn.want & FUSE_CAP_WRITEBACK_CACHE) { -+ outarg.flags |= FUSE_WRITEBACK_CACHE; -+ } -+ if (se->conn.want & FUSE_CAP_POSIX_ACL) { -+ outarg.flags |= FUSE_POSIX_ACL; -+ } -+ outarg.max_readahead = se->conn.max_readahead; -+ outarg.max_write = se->conn.max_write; -+ if (se->conn.proto_minor >= 13) { -+ if (se->conn.max_background >= (1 << 16)) { -+ se->conn.max_background = (1 << 16) - 1; -+ } -+ if (se->conn.congestion_threshold > se->conn.max_background) { -+ se->conn.congestion_threshold = se->conn.max_background; -+ } -+ if (!se->conn.congestion_threshold) { -+ se->conn.congestion_threshold = se->conn.max_background * 3 / 4; -+ } -+ -+ outarg.max_background = se->conn.max_background; -+ outarg.congestion_threshold = se->conn.congestion_threshold; -+ } -+ if (se->conn.proto_minor >= 23) { -+ outarg.time_gran = se->conn.time_gran; -+ } -+ -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, -+ outarg.minor); -+ fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); -+ fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", -+ outarg.max_readahead); -+ fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); -+ fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", -+ outarg.max_background); -+ fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", -+ outarg.congestion_threshold); -+ fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); -+ } -+ if (arg->minor < 5) { -+ outargsize = FUSE_COMPAT_INIT_OUT_SIZE; -+ } else if (arg->minor < 23) { -+ outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; -+ } -+ -+ send_reply_ok(req, &outarg, outargsize); - } - - static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_session *se = req->se; -+ struct fuse_session *se = req->se; - -- (void) nodeid; -- (void) inarg; -+ (void)nodeid; -+ (void)inarg; - -- se->got_destroy = 1; -- if (se->op.destroy) -- se->op.destroy(se->userdata); -+ se->got_destroy = 1; -+ if (se->op.destroy) { -+ se->op.destroy(se->userdata); -+ } - -- send_reply_ok(req, NULL, 0); -+ send_reply_ok(req, NULL, 0); - } - - static void list_del_nreq(struct fuse_notify_req *nreq) - { -- struct fuse_notify_req *prev = nreq->prev; -- struct fuse_notify_req *next = nreq->next; -- prev->next = next; -- next->prev = prev; -+ struct fuse_notify_req *prev = nreq->prev; -+ struct fuse_notify_req *next = nreq->next; -+ prev->next = next; -+ next->prev = prev; - } - - static void list_add_nreq(struct fuse_notify_req *nreq, -- struct fuse_notify_req *next) -+ struct fuse_notify_req *next) - { -- struct fuse_notify_req *prev = next->prev; -- nreq->next = next; -- nreq->prev = prev; -- prev->next = nreq; -- next->prev = nreq; -+ struct fuse_notify_req *prev = next->prev; -+ nreq->next = next; -+ nreq->prev = prev; -+ prev->next = nreq; -+ next->prev = nreq; - } - - static void list_init_nreq(struct fuse_notify_req *nreq) - { -- nreq->next = nreq; -- nreq->prev = nreq; -+ nreq->next = nreq; -+ nreq->prev = nreq; - } - - static void do_notify_reply(fuse_req_t req, fuse_ino_t nodeid, -- const void *inarg, const struct fuse_buf *buf) -+ const void *inarg, const struct fuse_buf *buf) - { -- struct fuse_session *se = req->se; -- struct fuse_notify_req *nreq; -- struct fuse_notify_req *head; -+ struct fuse_session *se = req->se; -+ struct fuse_notify_req *nreq; -+ struct fuse_notify_req *head; - -- pthread_mutex_lock(&se->lock); -- head = &se->notify_list; -- for (nreq = head->next; nreq != head; nreq = nreq->next) { -- if (nreq->unique == req->unique) { -- list_del_nreq(nreq); -- break; -- } -- } -- pthread_mutex_unlock(&se->lock); -+ pthread_mutex_lock(&se->lock); -+ head = &se->notify_list; -+ for (nreq = head->next; nreq != head; nreq = nreq->next) { -+ if (nreq->unique == req->unique) { -+ list_del_nreq(nreq); -+ break; -+ } -+ } -+ pthread_mutex_unlock(&se->lock); - -- if (nreq != head) -- nreq->reply(nreq, req, nodeid, inarg, buf); -+ if (nreq != head) { -+ nreq->reply(nreq, req, nodeid, inarg, buf); -+ } - } - - static int send_notify_iov(struct fuse_session *se, int notify_code, -- struct iovec *iov, int count) -+ struct iovec *iov, int count) - { -- struct fuse_out_header out; -+ struct fuse_out_header out; - -- if (!se->got_init) -- return -ENOTCONN; -+ if (!se->got_init) { -+ return -ENOTCONN; -+ } - -- out.unique = 0; -- out.error = notify_code; -- iov[0].iov_base = &out; -- iov[0].iov_len = sizeof(struct fuse_out_header); -+ out.unique = 0; -+ out.error = notify_code; -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(struct fuse_out_header); - -- return fuse_send_msg(se, NULL, iov, count); -+ return fuse_send_msg(se, NULL, iov, count); - } - - int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) - { -- if (ph != NULL) { -- struct fuse_notify_poll_wakeup_out outarg; -- struct iovec iov[2]; -+ if (ph != NULL) { -+ struct fuse_notify_poll_wakeup_out outarg; -+ struct iovec iov[2]; - -- outarg.kh = ph->kh; -+ outarg.kh = ph->kh; - -- iov[1].iov_base = &outarg; -- iov[1].iov_len = sizeof(outarg); -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); - -- return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2); -- } else { -- return 0; -- } -+ return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2); -+ } else { -+ return 0; -+ } - } - - int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, -- off_t off, off_t len) -+ off_t off, off_t len) - { -- struct fuse_notify_inval_inode_out outarg; -- struct iovec iov[2]; -+ struct fuse_notify_inval_inode_out outarg; -+ struct iovec iov[2]; -+ -+ if (!se) { -+ return -EINVAL; -+ } - -- if (!se) -- return -EINVAL; -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { -+ return -ENOSYS; -+ } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) -- return -ENOSYS; -- -- outarg.ino = ino; -- outarg.off = off; -- outarg.len = len; -+ outarg.ino = ino; -+ outarg.off = off; -+ outarg.len = len; - -- iov[1].iov_base = &outarg; -- iov[1].iov_len = sizeof(outarg); -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); - -- return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2); -+ return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2); - } - - int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, -- const char *name, size_t namelen) -+ const char *name, size_t namelen) - { -- struct fuse_notify_inval_entry_out outarg; -- struct iovec iov[3]; -+ struct fuse_notify_inval_entry_out outarg; -+ struct iovec iov[3]; -+ -+ if (!se) { -+ return -EINVAL; -+ } - -- if (!se) -- return -EINVAL; -- -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) -- return -ENOSYS; -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { -+ return -ENOSYS; -+ } - -- outarg.parent = parent; -- outarg.namelen = namelen; -- outarg.padding = 0; -+ outarg.parent = parent; -+ outarg.namelen = namelen; -+ outarg.padding = 0; - -- iov[1].iov_base = &outarg; -- iov[1].iov_len = sizeof(outarg); -- iov[2].iov_base = (void *)name; -- iov[2].iov_len = namelen + 1; -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ iov[2].iov_base = (void *)name; -+ iov[2].iov_len = namelen + 1; - -- return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3); -+ return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3); - } - --int fuse_lowlevel_notify_delete(struct fuse_session *se, -- fuse_ino_t parent, fuse_ino_t child, -- const char *name, size_t namelen) -+int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, -+ fuse_ino_t child, const char *name, -+ size_t namelen) - { -- struct fuse_notify_delete_out outarg; -- struct iovec iov[3]; -+ struct fuse_notify_delete_out outarg; -+ struct iovec iov[3]; - -- if (!se) -- return -EINVAL; -+ if (!se) { -+ return -EINVAL; -+ } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) -- return -ENOSYS; -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) { -+ return -ENOSYS; -+ } - -- outarg.parent = parent; -- outarg.child = child; -- outarg.namelen = namelen; -- outarg.padding = 0; -+ outarg.parent = parent; -+ outarg.child = child; -+ outarg.namelen = namelen; -+ outarg.padding = 0; - -- iov[1].iov_base = &outarg; -- iov[1].iov_len = sizeof(outarg); -- iov[2].iov_base = (void *)name; -- iov[2].iov_len = namelen + 1; -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ iov[2].iov_base = (void *)name; -+ iov[2].iov_len = namelen + 1; - -- return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3); -+ return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3); - } - - int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, -- off_t offset, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags) -+ off_t offset, struct fuse_bufvec *bufv, -+ enum fuse_buf_copy_flags flags) - { -- struct fuse_out_header out; -- struct fuse_notify_store_out outarg; -- struct iovec iov[3]; -- size_t size = fuse_buf_size(bufv); -- int res; -+ struct fuse_out_header out; -+ struct fuse_notify_store_out outarg; -+ struct iovec iov[3]; -+ size_t size = fuse_buf_size(bufv); -+ int res; - -- if (!se) -- return -EINVAL; -+ if (!se) { -+ return -EINVAL; -+ } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) -- return -ENOSYS; -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { -+ return -ENOSYS; -+ } - -- out.unique = 0; -- out.error = FUSE_NOTIFY_STORE; -+ out.unique = 0; -+ out.error = FUSE_NOTIFY_STORE; - -- outarg.nodeid = ino; -- outarg.offset = offset; -- outarg.size = size; -- outarg.padding = 0; -+ outarg.nodeid = ino; -+ outarg.offset = offset; -+ outarg.size = size; -+ outarg.padding = 0; - -- iov[0].iov_base = &out; -- iov[0].iov_len = sizeof(out); -- iov[1].iov_base = &outarg; -- iov[1].iov_len = sizeof(outarg); -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(out); -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); - -- res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); -- if (res > 0) -- res = -res; -+ res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); -+ if (res > 0) { -+ res = -res; -+ } - -- return res; -+ return res; - } - - struct fuse_retrieve_req { -- struct fuse_notify_req nreq; -- void *cookie; -+ struct fuse_notify_req nreq; -+ void *cookie; - }; - --static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, -- fuse_req_t req, fuse_ino_t ino, -- const void *inarg, -- const struct fuse_buf *ibuf) --{ -- struct fuse_session *se = req->se; -- struct fuse_retrieve_req *rreq = -- container_of(nreq, struct fuse_retrieve_req, nreq); -- const struct fuse_notify_retrieve_in *arg = inarg; -- struct fuse_bufvec bufv = { -- .buf[0] = *ibuf, -- .count = 1, -- }; -- -- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) -- bufv.buf[0].mem = PARAM(arg); -- -- bufv.buf[0].size -= sizeof(struct fuse_in_header) + -- sizeof(struct fuse_notify_retrieve_in); -- -- if (bufv.buf[0].size < arg->size) { -- fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); -- fuse_reply_none(req); -- goto out; -- } -- bufv.buf[0].size = arg->size; -- -- if (se->op.retrieve_reply) { -- se->op.retrieve_reply(req, rreq->cookie, ino, -- arg->offset, &bufv); -- } else { -- fuse_reply_none(req); -- } -+static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, fuse_req_t req, -+ fuse_ino_t ino, const void *inarg, -+ const struct fuse_buf *ibuf) -+{ -+ struct fuse_session *se = req->se; -+ struct fuse_retrieve_req *rreq = -+ container_of(nreq, struct fuse_retrieve_req, nreq); -+ const struct fuse_notify_retrieve_in *arg = inarg; -+ struct fuse_bufvec bufv = { -+ .buf[0] = *ibuf, -+ .count = 1, -+ }; -+ -+ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { -+ bufv.buf[0].mem = PARAM(arg); -+ } -+ -+ bufv.buf[0].size -= -+ sizeof(struct fuse_in_header) + sizeof(struct fuse_notify_retrieve_in); -+ -+ if (bufv.buf[0].size < arg->size) { -+ fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); -+ fuse_reply_none(req); -+ goto out; -+ } -+ bufv.buf[0].size = arg->size; -+ -+ if (se->op.retrieve_reply) { -+ se->op.retrieve_reply(req, rreq->cookie, ino, arg->offset, &bufv); -+ } else { -+ fuse_reply_none(req); -+ } - out: -- free(rreq); -+ free(rreq); - } - - int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, -- size_t size, off_t offset, void *cookie) -+ size_t size, off_t offset, void *cookie) - { -- struct fuse_notify_retrieve_out outarg; -- struct iovec iov[2]; -- struct fuse_retrieve_req *rreq; -- int err; -+ struct fuse_notify_retrieve_out outarg; -+ struct iovec iov[2]; -+ struct fuse_retrieve_req *rreq; -+ int err; - -- if (!se) -- return -EINVAL; -+ if (!se) { -+ return -EINVAL; -+ } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) -- return -ENOSYS; -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { -+ return -ENOSYS; -+ } - -- rreq = malloc(sizeof(*rreq)); -- if (rreq == NULL) -- return -ENOMEM; -+ rreq = malloc(sizeof(*rreq)); -+ if (rreq == NULL) { -+ return -ENOMEM; -+ } - -- pthread_mutex_lock(&se->lock); -- rreq->cookie = cookie; -- rreq->nreq.unique = se->notify_ctr++; -- rreq->nreq.reply = fuse_ll_retrieve_reply; -- list_add_nreq(&rreq->nreq, &se->notify_list); -- pthread_mutex_unlock(&se->lock); -+ pthread_mutex_lock(&se->lock); -+ rreq->cookie = cookie; -+ rreq->nreq.unique = se->notify_ctr++; -+ rreq->nreq.reply = fuse_ll_retrieve_reply; -+ list_add_nreq(&rreq->nreq, &se->notify_list); -+ pthread_mutex_unlock(&se->lock); - -- outarg.notify_unique = rreq->nreq.unique; -- outarg.nodeid = ino; -- outarg.offset = offset; -- outarg.size = size; -- outarg.padding = 0; -+ outarg.notify_unique = rreq->nreq.unique; -+ outarg.nodeid = ino; -+ outarg.offset = offset; -+ outarg.size = size; -+ outarg.padding = 0; - -- iov[1].iov_base = &outarg; -- iov[1].iov_len = sizeof(outarg); -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); - -- err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); -- if (err) { -- pthread_mutex_lock(&se->lock); -- list_del_nreq(&rreq->nreq); -- pthread_mutex_unlock(&se->lock); -- free(rreq); -- } -+ err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); -+ if (err) { -+ pthread_mutex_lock(&se->lock); -+ list_del_nreq(&rreq->nreq); -+ pthread_mutex_unlock(&se->lock); -+ free(rreq); -+ } - -- return err; -+ return err; - } - - void *fuse_req_userdata(fuse_req_t req) - { -- return req->se->userdata; -+ return req->se->userdata; - } - - const struct fuse_ctx *fuse_req_ctx(fuse_req_t req) - { -- return &req->ctx; -+ return &req->ctx; - } - - void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, -- void *data) -+ void *data) - { -- pthread_mutex_lock(&req->lock); -- pthread_mutex_lock(&req->se->lock); -- req->u.ni.func = func; -- req->u.ni.data = data; -- pthread_mutex_unlock(&req->se->lock); -- if (req->interrupted && func) -- func(req, data); -- pthread_mutex_unlock(&req->lock); -+ pthread_mutex_lock(&req->lock); -+ pthread_mutex_lock(&req->se->lock); -+ req->u.ni.func = func; -+ req->u.ni.data = data; -+ pthread_mutex_unlock(&req->se->lock); -+ if (req->interrupted && func) { -+ func(req, data); -+ } -+ pthread_mutex_unlock(&req->lock); - } - - int fuse_req_interrupted(fuse_req_t req) - { -- int interrupted; -+ int interrupted; - -- pthread_mutex_lock(&req->se->lock); -- interrupted = req->interrupted; -- pthread_mutex_unlock(&req->se->lock); -+ pthread_mutex_lock(&req->se->lock); -+ interrupted = req->interrupted; -+ pthread_mutex_unlock(&req->se->lock); - -- return interrupted; -+ return interrupted; - } - - static struct { -- void (*func)(fuse_req_t, fuse_ino_t, const void *); -- const char *name; -+ void (*func)(fuse_req_t, fuse_ino_t, const void *); -+ const char *name; - } fuse_ll_ops[] = { -- [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, -- [FUSE_FORGET] = { do_forget, "FORGET" }, -- [FUSE_GETATTR] = { do_getattr, "GETATTR" }, -- [FUSE_SETATTR] = { do_setattr, "SETATTR" }, -- [FUSE_READLINK] = { do_readlink, "READLINK" }, -- [FUSE_SYMLINK] = { do_symlink, "SYMLINK" }, -- [FUSE_MKNOD] = { do_mknod, "MKNOD" }, -- [FUSE_MKDIR] = { do_mkdir, "MKDIR" }, -- [FUSE_UNLINK] = { do_unlink, "UNLINK" }, -- [FUSE_RMDIR] = { do_rmdir, "RMDIR" }, -- [FUSE_RENAME] = { do_rename, "RENAME" }, -- [FUSE_LINK] = { do_link, "LINK" }, -- [FUSE_OPEN] = { do_open, "OPEN" }, -- [FUSE_READ] = { do_read, "READ" }, -- [FUSE_WRITE] = { do_write, "WRITE" }, -- [FUSE_STATFS] = { do_statfs, "STATFS" }, -- [FUSE_RELEASE] = { do_release, "RELEASE" }, -- [FUSE_FSYNC] = { do_fsync, "FSYNC" }, -- [FUSE_SETXATTR] = { do_setxattr, "SETXATTR" }, -- [FUSE_GETXATTR] = { do_getxattr, "GETXATTR" }, -- [FUSE_LISTXATTR] = { do_listxattr, "LISTXATTR" }, -- [FUSE_REMOVEXATTR] = { do_removexattr, "REMOVEXATTR" }, -- [FUSE_FLUSH] = { do_flush, "FLUSH" }, -- [FUSE_INIT] = { do_init, "INIT" }, -- [FUSE_OPENDIR] = { do_opendir, "OPENDIR" }, -- [FUSE_READDIR] = { do_readdir, "READDIR" }, -- [FUSE_RELEASEDIR] = { do_releasedir, "RELEASEDIR" }, -- [FUSE_FSYNCDIR] = { do_fsyncdir, "FSYNCDIR" }, -- [FUSE_GETLK] = { do_getlk, "GETLK" }, -- [FUSE_SETLK] = { do_setlk, "SETLK" }, -- [FUSE_SETLKW] = { do_setlkw, "SETLKW" }, -- [FUSE_ACCESS] = { do_access, "ACCESS" }, -- [FUSE_CREATE] = { do_create, "CREATE" }, -- [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" }, -- [FUSE_BMAP] = { do_bmap, "BMAP" }, -- [FUSE_IOCTL] = { do_ioctl, "IOCTL" }, -- [FUSE_POLL] = { do_poll, "POLL" }, -- [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, -- [FUSE_DESTROY] = { do_destroy, "DESTROY" }, -- [FUSE_NOTIFY_REPLY] = { (void *) 1, "NOTIFY_REPLY" }, -- [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, -- [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS"}, -- [FUSE_RENAME2] = { do_rename2, "RENAME2" }, -- [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, -- [FUSE_LSEEK] = { do_lseek, "LSEEK" }, -+ [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, -+ [FUSE_FORGET] = { do_forget, "FORGET" }, -+ [FUSE_GETATTR] = { do_getattr, "GETATTR" }, -+ [FUSE_SETATTR] = { do_setattr, "SETATTR" }, -+ [FUSE_READLINK] = { do_readlink, "READLINK" }, -+ [FUSE_SYMLINK] = { do_symlink, "SYMLINK" }, -+ [FUSE_MKNOD] = { do_mknod, "MKNOD" }, -+ [FUSE_MKDIR] = { do_mkdir, "MKDIR" }, -+ [FUSE_UNLINK] = { do_unlink, "UNLINK" }, -+ [FUSE_RMDIR] = { do_rmdir, "RMDIR" }, -+ [FUSE_RENAME] = { do_rename, "RENAME" }, -+ [FUSE_LINK] = { do_link, "LINK" }, -+ [FUSE_OPEN] = { do_open, "OPEN" }, -+ [FUSE_READ] = { do_read, "READ" }, -+ [FUSE_WRITE] = { do_write, "WRITE" }, -+ [FUSE_STATFS] = { do_statfs, "STATFS" }, -+ [FUSE_RELEASE] = { do_release, "RELEASE" }, -+ [FUSE_FSYNC] = { do_fsync, "FSYNC" }, -+ [FUSE_SETXATTR] = { do_setxattr, "SETXATTR" }, -+ [FUSE_GETXATTR] = { do_getxattr, "GETXATTR" }, -+ [FUSE_LISTXATTR] = { do_listxattr, "LISTXATTR" }, -+ [FUSE_REMOVEXATTR] = { do_removexattr, "REMOVEXATTR" }, -+ [FUSE_FLUSH] = { do_flush, "FLUSH" }, -+ [FUSE_INIT] = { do_init, "INIT" }, -+ [FUSE_OPENDIR] = { do_opendir, "OPENDIR" }, -+ [FUSE_READDIR] = { do_readdir, "READDIR" }, -+ [FUSE_RELEASEDIR] = { do_releasedir, "RELEASEDIR" }, -+ [FUSE_FSYNCDIR] = { do_fsyncdir, "FSYNCDIR" }, -+ [FUSE_GETLK] = { do_getlk, "GETLK" }, -+ [FUSE_SETLK] = { do_setlk, "SETLK" }, -+ [FUSE_SETLKW] = { do_setlkw, "SETLKW" }, -+ [FUSE_ACCESS] = { do_access, "ACCESS" }, -+ [FUSE_CREATE] = { do_create, "CREATE" }, -+ [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" }, -+ [FUSE_BMAP] = { do_bmap, "BMAP" }, -+ [FUSE_IOCTL] = { do_ioctl, "IOCTL" }, -+ [FUSE_POLL] = { do_poll, "POLL" }, -+ [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, -+ [FUSE_DESTROY] = { do_destroy, "DESTROY" }, -+ [FUSE_NOTIFY_REPLY] = { (void *)1, "NOTIFY_REPLY" }, -+ [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, -+ [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS" }, -+ [FUSE_RENAME2] = { do_rename2, "RENAME2" }, -+ [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, -+ [FUSE_LSEEK] = { do_lseek, "LSEEK" }, - }; - - #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) - - static const char *opname(enum fuse_opcode opcode) - { -- if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) -- return "???"; -- else -- return fuse_ll_ops[opcode].name; -+ if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) { -+ return "???"; -+ } else { -+ return fuse_ll_ops[opcode].name; -+ } - } - - void fuse_session_process_buf(struct fuse_session *se, -- const struct fuse_buf *buf) -+ const struct fuse_buf *buf) - { -- fuse_session_process_buf_int(se, buf, NULL); -+ fuse_session_process_buf_int(se, buf, NULL); - } - - void fuse_session_process_buf_int(struct fuse_session *se, -- const struct fuse_buf *buf, struct fuse_chan *ch) --{ -- struct fuse_in_header *in; -- const void *inarg; -- struct fuse_req *req; -- int err; -- -- in = buf->mem; -- -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, -- "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n", -- (unsigned long long) in->unique, -- opname((enum fuse_opcode) in->opcode), in->opcode, -- (unsigned long long) in->nodeid, buf->size, in->pid); -- } -- -- req = fuse_ll_alloc_req(se); -- if (req == NULL) { -- struct fuse_out_header out = { -- .unique = in->unique, -- .error = -ENOMEM, -- }; -- struct iovec iov = { -- .iov_base = &out, -- .iov_len = sizeof(struct fuse_out_header), -- }; -- -- fuse_send_msg(se, ch, &iov, 1); -- return; -- } -- -- req->unique = in->unique; -- req->ctx.uid = in->uid; -- req->ctx.gid = in->gid; -- req->ctx.pid = in->pid; -- req->ch = ch; -- -- err = EIO; -- if (!se->got_init) { -- enum fuse_opcode expected; -- -- expected = se->cuse_data ? CUSE_INIT : FUSE_INIT; -- if (in->opcode != expected) -- goto reply_err; -- } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) -- goto reply_err; -- -- err = EACCES; -- /* Implement -o allow_root */ -- if (se->deny_others && in->uid != se->owner && in->uid != 0 && -- in->opcode != FUSE_INIT && in->opcode != FUSE_READ && -- in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC && -- in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR && -- in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR && -- in->opcode != FUSE_NOTIFY_REPLY && -- in->opcode != FUSE_READDIRPLUS) -- goto reply_err; -- -- err = ENOSYS; -- if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) -- goto reply_err; -- if (in->opcode != FUSE_INTERRUPT) { -- struct fuse_req *intr; -- pthread_mutex_lock(&se->lock); -- intr = check_interrupt(se, req); -- list_add_req(req, &se->list); -- pthread_mutex_unlock(&se->lock); -- if (intr) -- fuse_reply_err(intr, EAGAIN); -- } -- -- inarg = (void *) &in[1]; -- if (in->opcode == FUSE_WRITE && se->op.write_buf) -- do_write_buf(req, in->nodeid, inarg, buf); -- else if (in->opcode == FUSE_NOTIFY_REPLY) -- do_notify_reply(req, in->nodeid, inarg, buf); -- else -- fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); -- -- return; -+ const struct fuse_buf *buf, -+ struct fuse_chan *ch) -+{ -+ struct fuse_in_header *in; -+ const void *inarg; -+ struct fuse_req *req; -+ int err; -+ -+ in = buf->mem; -+ -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, -+ "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, " -+ "pid: %u\n", -+ (unsigned long long)in->unique, -+ opname((enum fuse_opcode)in->opcode), in->opcode, -+ (unsigned long long)in->nodeid, buf->size, in->pid); -+ } -+ -+ req = fuse_ll_alloc_req(se); -+ if (req == NULL) { -+ struct fuse_out_header out = { -+ .unique = in->unique, -+ .error = -ENOMEM, -+ }; -+ struct iovec iov = { -+ .iov_base = &out, -+ .iov_len = sizeof(struct fuse_out_header), -+ }; -+ -+ fuse_send_msg(se, ch, &iov, 1); -+ return; -+ } -+ -+ req->unique = in->unique; -+ req->ctx.uid = in->uid; -+ req->ctx.gid = in->gid; -+ req->ctx.pid = in->pid; -+ req->ch = ch; -+ -+ err = EIO; -+ if (!se->got_init) { -+ enum fuse_opcode expected; -+ -+ expected = se->cuse_data ? CUSE_INIT : FUSE_INIT; -+ if (in->opcode != expected) { -+ goto reply_err; -+ } -+ } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) { -+ goto reply_err; -+ } -+ -+ err = EACCES; -+ /* Implement -o allow_root */ -+ if (se->deny_others && in->uid != se->owner && in->uid != 0 && -+ in->opcode != FUSE_INIT && in->opcode != FUSE_READ && -+ in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC && -+ in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR && -+ in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR && -+ in->opcode != FUSE_NOTIFY_REPLY && in->opcode != FUSE_READDIRPLUS) { -+ goto reply_err; -+ } -+ -+ err = ENOSYS; -+ if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) { -+ goto reply_err; -+ } -+ if (in->opcode != FUSE_INTERRUPT) { -+ struct fuse_req *intr; -+ pthread_mutex_lock(&se->lock); -+ intr = check_interrupt(se, req); -+ list_add_req(req, &se->list); -+ pthread_mutex_unlock(&se->lock); -+ if (intr) { -+ fuse_reply_err(intr, EAGAIN); -+ } -+ } -+ -+ inarg = (void *)&in[1]; -+ if (in->opcode == FUSE_WRITE && se->op.write_buf) { -+ do_write_buf(req, in->nodeid, inarg, buf); -+ } else if (in->opcode == FUSE_NOTIFY_REPLY) { -+ do_notify_reply(req, in->nodeid, inarg, buf); -+ } else { -+ fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); -+ } -+ -+ return; - - reply_err: -- fuse_reply_err(req, err); -+ fuse_reply_err(req, err); - } - --#define LL_OPTION(n,o,v) \ -- { n, offsetof(struct fuse_session, o), v } -+#define LL_OPTION(n, o, v) \ -+ { \ -+ n, offsetof(struct fuse_session, o), v \ -+ } - - static const struct fuse_opt fuse_ll_opts[] = { -- LL_OPTION("debug", debug, 1), -- LL_OPTION("-d", debug, 1), -- LL_OPTION("--debug", debug, 1), -- LL_OPTION("allow_root", deny_others, 1), -- FUSE_OPT_END -+ LL_OPTION("debug", debug, 1), LL_OPTION("-d", debug, 1), -+ LL_OPTION("--debug", debug, 1), LL_OPTION("allow_root", deny_others, 1), -+ FUSE_OPT_END - }; - - void fuse_lowlevel_version(void) - { -- printf("using FUSE kernel interface version %i.%i\n", -- FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); -+ printf("using FUSE kernel interface version %i.%i\n", FUSE_KERNEL_VERSION, -+ FUSE_KERNEL_MINOR_VERSION); - } - - void fuse_lowlevel_help(void) - { -- /* These are not all options, but the ones that are -- potentially of interest to an end-user */ -- printf( --" -o allow_root allow access by root\n" --); -+ /* -+ * These are not all options, but the ones that are -+ * potentially of interest to an end-user -+ */ -+ printf(" -o allow_root allow access by root\n"); - } - - void fuse_session_destroy(struct fuse_session *se) - { -- if (se->got_init && !se->got_destroy) { -- if (se->op.destroy) -- se->op.destroy(se->userdata); -- } -- pthread_mutex_destroy(&se->lock); -- free(se->cuse_data); -- if (se->fd != -1) -- close(se->fd); -- free(se); -+ if (se->got_init && !se->got_destroy) { -+ if (se->op.destroy) { -+ se->op.destroy(se->userdata); -+ } -+ } -+ pthread_mutex_destroy(&se->lock); -+ free(se->cuse_data); -+ if (se->fd != -1) { -+ close(se->fd); -+ } -+ free(se); - } - - - struct fuse_session *fuse_session_new(struct fuse_args *args, -- const struct fuse_lowlevel_ops *op, -- size_t op_size, void *userdata) --{ -- struct fuse_session *se; -- -- if (sizeof(struct fuse_lowlevel_ops) < op_size) { -- fuse_log(FUSE_LOG_ERR, "fuse: warning: library too old, some operations may not work\n"); -- op_size = sizeof(struct fuse_lowlevel_ops); -- } -- -- if (args->argc == 0) { -- fuse_log(FUSE_LOG_ERR, "fuse: empty argv passed to fuse_session_new().\n"); -- return NULL; -- } -- -- se = (struct fuse_session *) calloc(1, sizeof(struct fuse_session)); -- if (se == NULL) { -- fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n"); -- goto out1; -- } -- se->fd = -1; -- se->conn.max_write = UINT_MAX; -- se->conn.max_readahead = UINT_MAX; -- -- /* Parse options */ -- if(fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) -- goto out2; -- if(args->argc == 1 && -- args->argv[0][0] == '-') { -- fuse_log(FUSE_LOG_ERR, "fuse: warning: argv[0] looks like an option, but " -- "will be ignored\n"); -- } else if (args->argc != 1) { -- int i; -- fuse_log(FUSE_LOG_ERR, "fuse: unknown option(s): `"); -- for(i = 1; i < args->argc-1; i++) -- fuse_log(FUSE_LOG_ERR, "%s ", args->argv[i]); -- fuse_log(FUSE_LOG_ERR, "%s'\n", args->argv[i]); -- goto out4; -- } -- -- se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + -- FUSE_BUFFER_HEADER_SIZE; -- -- list_init_req(&se->list); -- list_init_req(&se->interrupts); -- list_init_nreq(&se->notify_list); -- se->notify_ctr = 1; -- fuse_mutex_init(&se->lock); -- -- memcpy(&se->op, op, op_size); -- se->owner = getuid(); -- se->userdata = userdata; -- -- return se; -+ const struct fuse_lowlevel_ops *op, -+ size_t op_size, void *userdata) -+{ -+ struct fuse_session *se; -+ -+ if (sizeof(struct fuse_lowlevel_ops) < op_size) { -+ fuse_log( -+ FUSE_LOG_ERR, -+ "fuse: warning: library too old, some operations may not work\n"); -+ op_size = sizeof(struct fuse_lowlevel_ops); -+ } -+ -+ if (args->argc == 0) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: empty argv passed to fuse_session_new().\n"); -+ return NULL; -+ } -+ -+ se = (struct fuse_session *)calloc(1, sizeof(struct fuse_session)); -+ if (se == NULL) { -+ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n"); -+ goto out1; -+ } -+ se->fd = -1; -+ se->conn.max_write = UINT_MAX; -+ se->conn.max_readahead = UINT_MAX; -+ -+ /* Parse options */ -+ if (fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) { -+ goto out2; -+ } -+ if (args->argc == 1 && args->argv[0][0] == '-') { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: warning: argv[0] looks like an option, but " -+ "will be ignored\n"); -+ } else if (args->argc != 1) { -+ int i; -+ fuse_log(FUSE_LOG_ERR, "fuse: unknown option(s): `"); -+ for (i = 1; i < args->argc - 1; i++) { -+ fuse_log(FUSE_LOG_ERR, "%s ", args->argv[i]); -+ } -+ fuse_log(FUSE_LOG_ERR, "%s'\n", args->argv[i]); -+ goto out4; -+ } -+ -+ se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + FUSE_BUFFER_HEADER_SIZE; -+ -+ list_init_req(&se->list); -+ list_init_req(&se->interrupts); -+ list_init_nreq(&se->notify_list); -+ se->notify_ctr = 1; -+ fuse_mutex_init(&se->lock); -+ -+ memcpy(&se->op, op, op_size); -+ se->owner = getuid(); -+ se->userdata = userdata; -+ -+ return se; - - out4: -- fuse_opt_free_args(args); -+ fuse_opt_free_args(args); - out2: -- free(se); -+ free(se); - out1: -- return NULL; -+ return NULL; - } - - int fuse_session_mount(struct fuse_session *se, const char *mountpoint) - { -- int fd; -- -- /* -- * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos -- * would ensue. -- */ -- do { -- fd = open("/dev/null", O_RDWR); -- if (fd > 2) -- close(fd); -- } while (fd >= 0 && fd <= 2); -- -- /* -- * To allow FUSE daemons to run without privileges, the caller may open -- * /dev/fuse before launching the file system and pass on the file -- * descriptor by specifying /dev/fd/N as the mount point. Note that the -- * parent process takes care of performing the mount in this case. -- */ -- fd = fuse_mnt_parse_fuse_fd(mountpoint); -- if (fd != -1) { -- if (fcntl(fd, F_GETFD) == -1) { -- fuse_log(FUSE_LOG_ERR, -- "fuse: Invalid file descriptor /dev/fd/%u\n", -- fd); -- return -1; -- } -- se->fd = fd; -- return 0; -- } -- -- /* Open channel */ -- fd = fuse_kern_mount(mountpoint, se->mo); -- if (fd == -1) -- return -1; -- se->fd = fd; -- -- /* Save mountpoint */ -- se->mountpoint = strdup(mountpoint); -- if (se->mountpoint == NULL) -- goto error_out; -- -- return 0; -+ int fd; -+ -+ /* -+ * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos -+ * would ensue. -+ */ -+ do { -+ fd = open("/dev/null", O_RDWR); -+ if (fd > 2) { -+ close(fd); -+ } -+ } while (fd >= 0 && fd <= 2); -+ -+ /* -+ * To allow FUSE daemons to run without privileges, the caller may open -+ * /dev/fuse before launching the file system and pass on the file -+ * descriptor by specifying /dev/fd/N as the mount point. Note that the -+ * parent process takes care of performing the mount in this case. -+ */ -+ fd = fuse_mnt_parse_fuse_fd(mountpoint); -+ if (fd != -1) { -+ if (fcntl(fd, F_GETFD) == -1) { -+ fuse_log(FUSE_LOG_ERR, "fuse: Invalid file descriptor /dev/fd/%u\n", -+ fd); -+ return -1; -+ } -+ se->fd = fd; -+ return 0; -+ } -+ -+ /* Open channel */ -+ fd = fuse_kern_mount(mountpoint, se->mo); -+ if (fd == -1) { -+ return -1; -+ } -+ se->fd = fd; -+ -+ /* Save mountpoint */ -+ se->mountpoint = strdup(mountpoint); -+ if (se->mountpoint == NULL) { -+ goto error_out; -+ } -+ -+ return 0; - - error_out: -- fuse_kern_unmount(mountpoint, fd); -- return -1; -+ fuse_kern_unmount(mountpoint, fd); -+ return -1; - } - - int fuse_session_fd(struct fuse_session *se) - { -- return se->fd; -+ return se->fd; - } - - void fuse_session_unmount(struct fuse_session *se) -@@ -2384,61 +2519,66 @@ void fuse_session_unmount(struct fuse_session *se) - #ifdef linux - int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) - { -- char *buf; -- size_t bufsize = 1024; -- char path[128]; -- int ret; -- int fd; -- unsigned long pid = req->ctx.pid; -- char *s; -+ char *buf; -+ size_t bufsize = 1024; -+ char path[128]; -+ int ret; -+ int fd; -+ unsigned long pid = req->ctx.pid; -+ char *s; - -- sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); -+ sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); - - retry: -- buf = malloc(bufsize); -- if (buf == NULL) -- return -ENOMEM; -- -- ret = -EIO; -- fd = open(path, O_RDONLY); -- if (fd == -1) -- goto out_free; -- -- ret = read(fd, buf, bufsize); -- close(fd); -- if (ret < 0) { -- ret = -EIO; -- goto out_free; -- } -- -- if ((size_t)ret == bufsize) { -- free(buf); -- bufsize *= 4; -- goto retry; -- } -- -- ret = -EIO; -- s = strstr(buf, "\nGroups:"); -- if (s == NULL) -- goto out_free; -- -- s += 8; -- ret = 0; -- while (1) { -- char *end; -- unsigned long val = strtoul(s, &end, 0); -- if (end == s) -- break; -- -- s = end; -- if (ret < size) -- list[ret] = val; -- ret++; -- } -+ buf = malloc(bufsize); -+ if (buf == NULL) { -+ return -ENOMEM; -+ } -+ -+ ret = -EIO; -+ fd = open(path, O_RDONLY); -+ if (fd == -1) { -+ goto out_free; -+ } -+ -+ ret = read(fd, buf, bufsize); -+ close(fd); -+ if (ret < 0) { -+ ret = -EIO; -+ goto out_free; -+ } -+ -+ if ((size_t)ret == bufsize) { -+ free(buf); -+ bufsize *= 4; -+ goto retry; -+ } -+ -+ ret = -EIO; -+ s = strstr(buf, "\nGroups:"); -+ if (s == NULL) { -+ goto out_free; -+ } -+ -+ s += 8; -+ ret = 0; -+ while (1) { -+ char *end; -+ unsigned long val = strtoul(s, &end, 0); -+ if (end == s) { -+ break; -+ } -+ -+ s = end; -+ if (ret < size) { -+ list[ret] = val; -+ } -+ ret++; -+ } - - out_free: -- free(buf); -- return ret; -+ free(buf); -+ return ret; - } - #else /* linux */ - /* -@@ -2446,23 +2586,25 @@ out_free: - */ - int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) - { -- (void) req; (void) size; (void) list; -- return -ENOSYS; -+ (void)req; -+ (void)size; -+ (void)list; -+ return -ENOSYS; - } - #endif - - void fuse_session_exit(struct fuse_session *se) - { -- se->exited = 1; -+ se->exited = 1; - } - - void fuse_session_reset(struct fuse_session *se) - { -- se->exited = 0; -- se->error = 0; -+ se->exited = 0; -+ se->error = 0; - } - - int fuse_session_exited(struct fuse_session *se) - { -- return se->exited; -+ return se->exited; - } -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 6b1adfc..adb9054 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1,15 +1,16 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB. --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB. -+ */ - - #ifndef FUSE_LOWLEVEL_H_ - #define FUSE_LOWLEVEL_H_ - --/** @file -+/** -+ * @file - * - * Low level API - * -@@ -24,16 +25,16 @@ - - #include "fuse_common.h" - --#include - #include --#include - #include - #include -+#include - #include -+#include - --/* ----------------------------------------------------------- * -- * Miscellaneous definitions * -- * ----------------------------------------------------------- */ -+/* -+ * Miscellaneous definitions -+ */ - - /** The node ID of the root inode */ - #define FUSE_ROOT_ID 1 -@@ -53,47 +54,54 @@ struct fuse_session; - - /** Directory entry parameters supplied to fuse_reply_entry() */ - struct fuse_entry_param { -- /** Unique inode number -- * -- * In lookup, zero means negative entry (from version 2.5) -- * Returning ENOENT also means negative entry, but by setting zero -- * ino the kernel may cache negative entries for entry_timeout -- * seconds. -- */ -- fuse_ino_t ino; -- -- /** Generation number for this entry. -- * -- * If the file system will be exported over NFS, the -- * ino/generation pairs need to be unique over the file -- * system's lifetime (rather than just the mount time). So if -- * the file system reuses an inode after it has been deleted, -- * it must assign a new, previously unused generation number -- * to the inode at the same time. -- * -- */ -- uint64_t generation; -- -- /** Inode attributes. -- * -- * Even if attr_timeout == 0, attr must be correct. For example, -- * for open(), FUSE uses attr.st_size from lookup() to determine -- * how many bytes to request. If this value is not correct, -- * incorrect data will be returned. -- */ -- struct stat attr; -- -- /** Validity timeout (in seconds) for inode attributes. If -- attributes only change as a result of requests that come -- through the kernel, this should be set to a very large -- value. */ -- double attr_timeout; -- -- /** Validity timeout (in seconds) for the name. If directory -- entries are changed/deleted only as a result of requests -- that come through the kernel, this should be set to a very -- large value. */ -- double entry_timeout; -+ /** -+ * Unique inode number -+ * -+ * In lookup, zero means negative entry (from version 2.5) -+ * Returning ENOENT also means negative entry, but by setting zero -+ * ino the kernel may cache negative entries for entry_timeout -+ * seconds. -+ */ -+ fuse_ino_t ino; -+ -+ /** -+ * Generation number for this entry. -+ * -+ * If the file system will be exported over NFS, the -+ * ino/generation pairs need to be unique over the file -+ * system's lifetime (rather than just the mount time). So if -+ * the file system reuses an inode after it has been deleted, -+ * it must assign a new, previously unused generation number -+ * to the inode at the same time. -+ * -+ */ -+ uint64_t generation; -+ -+ /** -+ * Inode attributes. -+ * -+ * Even if attr_timeout == 0, attr must be correct. For example, -+ * for open(), FUSE uses attr.st_size from lookup() to determine -+ * how many bytes to request. If this value is not correct, -+ * incorrect data will be returned. -+ */ -+ struct stat attr; -+ -+ /** -+ * Validity timeout (in seconds) for inode attributes. If -+ * attributes only change as a result of requests that come -+ * through the kernel, this should be set to a very large -+ * value. -+ */ -+ double attr_timeout; -+ -+ /** -+ * Validity timeout (in seconds) for the name. If directory -+ * entries are changed/deleted only as a result of requests -+ * that come through the kernel, this should be set to a very -+ * large value. -+ */ -+ double entry_timeout; - }; - - /** -@@ -105,38 +113,38 @@ struct fuse_entry_param { - * there is no valid uid/pid/gid that could be reported. - */ - struct fuse_ctx { -- /** User ID of the calling process */ -- uid_t uid; -+ /** User ID of the calling process */ -+ uid_t uid; - -- /** Group ID of the calling process */ -- gid_t gid; -+ /** Group ID of the calling process */ -+ gid_t gid; - -- /** Thread ID of the calling process */ -- pid_t pid; -+ /** Thread ID of the calling process */ -+ pid_t pid; - -- /** Umask of the calling process */ -- mode_t umask; -+ /** Umask of the calling process */ -+ mode_t umask; - }; - - struct fuse_forget_data { -- fuse_ino_t ino; -- uint64_t nlookup; -+ fuse_ino_t ino; -+ uint64_t nlookup; - }; - - /* 'to_set' flags in setattr */ --#define FUSE_SET_ATTR_MODE (1 << 0) --#define FUSE_SET_ATTR_UID (1 << 1) --#define FUSE_SET_ATTR_GID (1 << 2) --#define FUSE_SET_ATTR_SIZE (1 << 3) --#define FUSE_SET_ATTR_ATIME (1 << 4) --#define FUSE_SET_ATTR_MTIME (1 << 5) --#define FUSE_SET_ATTR_ATIME_NOW (1 << 7) --#define FUSE_SET_ATTR_MTIME_NOW (1 << 8) --#define FUSE_SET_ATTR_CTIME (1 << 10) -- --/* ----------------------------------------------------------- * -- * Request methods and replies * -- * ----------------------------------------------------------- */ -+#define FUSE_SET_ATTR_MODE (1 << 0) -+#define FUSE_SET_ATTR_UID (1 << 1) -+#define FUSE_SET_ATTR_GID (1 << 2) -+#define FUSE_SET_ATTR_SIZE (1 << 3) -+#define FUSE_SET_ATTR_ATIME (1 << 4) -+#define FUSE_SET_ATTR_MTIME (1 << 5) -+#define FUSE_SET_ATTR_ATIME_NOW (1 << 7) -+#define FUSE_SET_ATTR_MTIME_NOW (1 << 8) -+#define FUSE_SET_ATTR_CTIME (1 << 10) -+ -+/* -+ * Request methods and replies -+ */ - - /** - * Low level filesystem operations -@@ -166,1075 +174,1069 @@ struct fuse_forget_data { - * this file will not be called. - */ - struct fuse_lowlevel_ops { -- /** -- * Initialize filesystem -- * -- * This function is called when libfuse establishes -- * communication with the FUSE kernel module. The file system -- * should use this module to inspect and/or modify the -- * connection parameters provided in the `conn` structure. -- * -- * Note that some parameters may be overwritten by options -- * passed to fuse_session_new() which take precedence over the -- * values set in this handler. -- * -- * There's no reply to this function -- * -- * @param userdata the user data passed to fuse_session_new() -- */ -- void (*init) (void *userdata, struct fuse_conn_info *conn); -- -- /** -- * Clean up filesystem. -- * -- * Called on filesystem exit. When this method is called, the -- * connection to the kernel may be gone already, so that eg. calls -- * to fuse_lowlevel_notify_* will fail. -- * -- * There's no reply to this function -- * -- * @param userdata the user data passed to fuse_session_new() -- */ -- void (*destroy) (void *userdata); -- -- /** -- * Look up a directory entry by name and get its attributes. -- * -- * Valid replies: -- * fuse_reply_entry -- * fuse_reply_err -- * -- * @param req request handle -- * @param parent inode number of the parent directory -- * @param name the name to look up -- */ -- void (*lookup) (fuse_req_t req, fuse_ino_t parent, const char *name); -- -- /** -- * Forget about an inode -- * -- * This function is called when the kernel removes an inode -- * from its internal caches. -- * -- * The inode's lookup count increases by one for every call to -- * fuse_reply_entry and fuse_reply_create. The nlookup parameter -- * indicates by how much the lookup count should be decreased. -- * -- * Inodes with a non-zero lookup count may receive request from -- * the kernel even after calls to unlink, rmdir or (when -- * overwriting an existing file) rename. Filesystems must handle -- * such requests properly and it is recommended to defer removal -- * of the inode until the lookup count reaches zero. Calls to -- * unlink, rmdir or rename will be followed closely by forget -- * unless the file or directory is open, in which case the -- * kernel issues forget only after the release or releasedir -- * calls. -- * -- * Note that if a file system will be exported over NFS the -- * inodes lifetime must extend even beyond forget. See the -- * generation field in struct fuse_entry_param above. -- * -- * On unmount the lookup count for all inodes implicitly drops -- * to zero. It is not guaranteed that the file system will -- * receive corresponding forget messages for the affected -- * inodes. -- * -- * Valid replies: -- * fuse_reply_none -- * -- * @param req request handle -- * @param ino the inode number -- * @param nlookup the number of lookups to forget -- */ -- void (*forget) (fuse_req_t req, fuse_ino_t ino, uint64_t nlookup); -- -- /** -- * Get file attributes. -- * -- * If writeback caching is enabled, the kernel may have a -- * better idea of a file's length than the FUSE file system -- * (eg if there has been a write that extended the file size, -- * but that has not yet been passed to the filesystem.n -- * -- * In this case, the st_size value provided by the file system -- * will be ignored. -- * -- * Valid replies: -- * fuse_reply_attr -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi for future use, currently always NULL -- */ -- void (*getattr) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi); -- -- /** -- * Set file attributes -- * -- * In the 'attr' argument only members indicated by the 'to_set' -- * bitmask contain valid values. Other members contain undefined -- * values. -- * -- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -- * expected to reset the setuid and setgid bits if the file -- * size or owner is being changed. -- * -- * If the setattr was invoked from the ftruncate() system call -- * under Linux kernel versions 2.6.15 or later, the fi->fh will -- * contain the value set by the open method or will be undefined -- * if the open method didn't set any value. Otherwise (not -- * ftruncate call, or kernel version earlier than 2.6.15) the fi -- * parameter will be NULL. -- * -- * Valid replies: -- * fuse_reply_attr -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param attr the attributes -- * @param to_set bit mask of attributes which should be set -- * @param fi file information, or NULL -- */ -- void (*setattr) (fuse_req_t req, fuse_ino_t ino, struct stat *attr, -- int to_set, struct fuse_file_info *fi); -- -- /** -- * Read symbolic link -- * -- * Valid replies: -- * fuse_reply_readlink -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- */ -- void (*readlink) (fuse_req_t req, fuse_ino_t ino); -- -- /** -- * Create file node -- * -- * Create a regular file, character device, block device, fifo or -- * socket node. -- * -- * Valid replies: -- * fuse_reply_entry -- * fuse_reply_err -- * -- * @param req request handle -- * @param parent inode number of the parent directory -- * @param name to create -- * @param mode file type and mode with which to create the new file -- * @param rdev the device number (only valid if created file is a device) -- */ -- void (*mknod) (fuse_req_t req, fuse_ino_t parent, const char *name, -- mode_t mode, dev_t rdev); -- -- /** -- * Create a directory -- * -- * Valid replies: -- * fuse_reply_entry -- * fuse_reply_err -- * -- * @param req request handle -- * @param parent inode number of the parent directory -- * @param name to create -- * @param mode with which to create the new file -- */ -- void (*mkdir) (fuse_req_t req, fuse_ino_t parent, const char *name, -- mode_t mode); -- -- /** -- * Remove a file -- * -- * If the file's inode's lookup count is non-zero, the file -- * system is expected to postpone any removal of the inode -- * until the lookup count reaches zero (see description of the -- * forget function). -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param parent inode number of the parent directory -- * @param name to remove -- */ -- void (*unlink) (fuse_req_t req, fuse_ino_t parent, const char *name); -- -- /** -- * Remove a directory -- * -- * If the directory's inode's lookup count is non-zero, the -- * file system is expected to postpone any removal of the -- * inode until the lookup count reaches zero (see description -- * of the forget function). -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param parent inode number of the parent directory -- * @param name to remove -- */ -- void (*rmdir) (fuse_req_t req, fuse_ino_t parent, const char *name); -- -- /** -- * Create a symbolic link -- * -- * Valid replies: -- * fuse_reply_entry -- * fuse_reply_err -- * -- * @param req request handle -- * @param link the contents of the symbolic link -- * @param parent inode number of the parent directory -- * @param name to create -- */ -- void (*symlink) (fuse_req_t req, const char *link, fuse_ino_t parent, -- const char *name); -- -- /** Rename a file -- * -- * If the target exists it should be atomically replaced. If -- * the target's inode's lookup count is non-zero, the file -- * system is expected to postpone any removal of the inode -- * until the lookup count reaches zero (see description of the -- * forget function). -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure with error code EINVAL, i.e. all -- * future bmap requests will fail with EINVAL without being -- * send to the filesystem process. -- * -- * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If -- * RENAME_NOREPLACE is specified, the filesystem must not -- * overwrite *newname* if it exists and return an error -- * instead. If `RENAME_EXCHANGE` is specified, the filesystem -- * must atomically exchange the two files, i.e. both must -- * exist and neither may be deleted. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param parent inode number of the old parent directory -- * @param name old name -- * @param newparent inode number of the new parent directory -- * @param newname new name -- */ -- void (*rename) (fuse_req_t req, fuse_ino_t parent, const char *name, -- fuse_ino_t newparent, const char *newname, -- unsigned int flags); -- -- /** -- * Create a hard link -- * -- * Valid replies: -- * fuse_reply_entry -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the old inode number -- * @param newparent inode number of the new parent directory -- * @param newname new name to create -- */ -- void (*link) (fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, -- const char *newname); -- -- /** -- * Open a file -- * -- * Open flags are available in fi->flags. The following rules -- * apply. -- * -- * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be -- * filtered out / handled by the kernel. -- * -- * - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used -- * by the filesystem to check if the operation is -- * permitted. If the ``-o default_permissions`` mount -- * option is given, this check is already done by the -- * kernel before calling open() and may thus be omitted by -- * the filesystem. -- * -- * - When writeback caching is enabled, the kernel may send -- * read requests even for files opened with O_WRONLY. The -- * filesystem should be prepared to handle this. -- * -- * - When writeback caching is disabled, the filesystem is -- * expected to properly handle the O_APPEND flag and ensure -- * that each write is appending to the end of the file. -- * -- * - When writeback caching is enabled, the kernel will -- * handle O_APPEND. However, unless all changes to the file -- * come through the kernel this will not work reliably. The -- * filesystem should thus either ignore the O_APPEND flag -- * (and let the kernel handle it), or return an error -- * (indicating that reliably O_APPEND is not available). -- * -- * Filesystem may store an arbitrary file handle (pointer, -- * index, etc) in fi->fh, and use this in other all other file -- * operations (read, write, flush, release, fsync). -- * -- * Filesystem may also implement stateless file I/O and not store -- * anything in fi->fh. -- * -- * There are also some flags (direct_io, keep_cache) which the -- * filesystem may set in fi, to change the way the file is opened. -- * See fuse_file_info structure in for more details. -- * -- * If this request is answered with an error code of ENOSYS -- * and FUSE_CAP_NO_OPEN_SUPPORT is set in -- * `fuse_conn_info.capable`, this is treated as success and -- * future calls to open and release will also succeed without being -- * sent to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_open -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- */ -- void (*open) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi); -- -- /** -- * Read data -- * -- * Read should send exactly the number of bytes requested except -- * on EOF or error, otherwise the rest of the data will be -- * substituted with zeroes. An exception to this is when the file -- * has been opened in 'direct_io' mode, in which case the return -- * value of the read system call will reflect the return value of -- * this operation. -- * -- * fi->fh will contain the value set by the open method, or will -- * be undefined if the open method didn't set any value. -- * -- * Valid replies: -- * fuse_reply_buf -- * fuse_reply_iov -- * fuse_reply_data -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param size number of bytes to read -- * @param off offset to read from -- * @param fi file information -- */ -- void (*read) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -- struct fuse_file_info *fi); -- -- /** -- * Write data -- * -- * Write should return exactly the number of bytes requested -- * except on error. An exception to this is when the file has -- * been opened in 'direct_io' mode, in which case the return value -- * of the write system call will reflect the return value of this -- * operation. -- * -- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -- * expected to reset the setuid and setgid bits. -- * -- * fi->fh will contain the value set by the open method, or will -- * be undefined if the open method didn't set any value. -- * -- * Valid replies: -- * fuse_reply_write -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param buf data to write -- * @param size number of bytes to write -- * @param off offset to write to -- * @param fi file information -- */ -- void (*write) (fuse_req_t req, fuse_ino_t ino, const char *buf, -- size_t size, off_t off, struct fuse_file_info *fi); -- -- /** -- * Flush method -- * -- * This is called on each close() of the opened file. -- * -- * Since file descriptors can be duplicated (dup, dup2, fork), for -- * one open call there may be many flush calls. -- * -- * Filesystems shouldn't assume that flush will always be called -- * after some writes, or that if will be called at all. -- * -- * fi->fh will contain the value set by the open method, or will -- * be undefined if the open method didn't set any value. -- * -- * NOTE: the name of the method is misleading, since (unlike -- * fsync) the filesystem is not forced to flush pending writes. -- * One reason to flush data is if the filesystem wants to return -- * write errors during close. However, such use is non-portable -- * because POSIX does not require [close] to wait for delayed I/O to -- * complete. -- * -- * If the filesystem supports file locking operations (setlk, -- * getlk) it should remove all locks belonging to 'fi->owner'. -- * -- * If this request is answered with an error code of ENOSYS, -- * this is treated as success and future calls to flush() will -- * succeed automatically without being send to the filesystem -- * process. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- * -- * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html -- */ -- void (*flush) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi); -- -- /** -- * Release an open file -- * -- * Release is called when there are no more references to an open -- * file: all file descriptors are closed and all memory mappings -- * are unmapped. -- * -- * For every open call there will be exactly one release call (unless -- * the filesystem is force-unmounted). -- * -- * The filesystem may reply with an error, but error values are -- * not returned to close() or munmap() which triggered the -- * release. -- * -- * fi->fh will contain the value set by the open method, or will -- * be undefined if the open method didn't set any value. -- * fi->flags will contain the same flags as for open. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- */ -- void (*release) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi); -- -- /** -- * Synchronize file contents -- * -- * If the datasync parameter is non-zero, then only the user data -- * should be flushed, not the meta data. -- * -- * If this request is answered with an error code of ENOSYS, -- * this is treated as success and future calls to fsync() will -- * succeed automatically without being send to the filesystem -- * process. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param datasync flag indicating if only data should be flushed -- * @param fi file information -- */ -- void (*fsync) (fuse_req_t req, fuse_ino_t ino, int datasync, -- struct fuse_file_info *fi); -- -- /** -- * Open a directory -- * -- * Filesystem may store an arbitrary file handle (pointer, index, -- * etc) in fi->fh, and use this in other all other directory -- * stream operations (readdir, releasedir, fsyncdir). -- * -- * If this request is answered with an error code of ENOSYS and -- * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`, -- * this is treated as success and future calls to opendir and -- * releasedir will also succeed without being sent to the filesystem -- * process. In addition, the kernel will cache readdir results -- * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR. -- * -- * Valid replies: -- * fuse_reply_open -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- */ -- void (*opendir) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi); -- -- /** -- * Read directory -- * -- * Send a buffer filled using fuse_add_direntry(), with size not -- * exceeding the requested size. Send an empty buffer on end of -- * stream. -- * -- * fi->fh will contain the value set by the opendir method, or -- * will be undefined if the opendir method didn't set any value. -- * -- * Returning a directory entry from readdir() does not affect -- * its lookup count. -- * -- * If off_t is non-zero, then it will correspond to one of the off_t -- * values that was previously returned by readdir() for the same -- * directory handle. In this case, readdir() should skip over entries -- * coming before the position defined by the off_t value. If entries -- * are added or removed while the directory handle is open, they filesystem -- * may still include the entries that have been removed, and may not -- * report the entries that have been created. However, addition or -- * removal of entries must never cause readdir() to skip over unrelated -- * entries or to report them more than once. This means -- * that off_t can not be a simple index that enumerates the entries -- * that have been returned but must contain sufficient information to -- * uniquely determine the next directory entry to return even when the -- * set of entries is changing. -- * -- * The function does not have to report the '.' and '..' -- * entries, but is allowed to do so. Note that, if readdir does -- * not return '.' or '..', they will not be implicitly returned, -- * and this behavior is observable by the caller. -- * -- * Valid replies: -- * fuse_reply_buf -- * fuse_reply_data -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param size maximum number of bytes to send -- * @param off offset to continue reading the directory stream -- * @param fi file information -- */ -- void (*readdir) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -- struct fuse_file_info *fi); -- -- /** -- * Release an open directory -- * -- * For every opendir call there will be exactly one releasedir -- * call (unless the filesystem is force-unmounted). -- * -- * fi->fh will contain the value set by the opendir method, or -- * will be undefined if the opendir method didn't set any value. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- */ -- void (*releasedir) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi); -- -- /** -- * Synchronize directory contents -- * -- * If the datasync parameter is non-zero, then only the directory -- * contents should be flushed, not the meta data. -- * -- * fi->fh will contain the value set by the opendir method, or -- * will be undefined if the opendir method didn't set any value. -- * -- * If this request is answered with an error code of ENOSYS, -- * this is treated as success and future calls to fsyncdir() will -- * succeed automatically without being send to the filesystem -- * process. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param datasync flag indicating if only data should be flushed -- * @param fi file information -- */ -- void (*fsyncdir) (fuse_req_t req, fuse_ino_t ino, int datasync, -- struct fuse_file_info *fi); -- -- /** -- * Get file system statistics -- * -- * Valid replies: -- * fuse_reply_statfs -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number, zero means "undefined" -- */ -- void (*statfs) (fuse_req_t req, fuse_ino_t ino); -- -- /** -- * Set an extended attribute -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -- * future setxattr() requests will fail with EOPNOTSUPP without being -- * send to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_err -- */ -- void (*setxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, -- const char *value, size_t size, int flags); -- -- /** -- * Get an extended attribute -- * -- * If size is zero, the size of the value should be sent with -- * fuse_reply_xattr. -- * -- * If the size is non-zero, and the value fits in the buffer, the -- * value should be sent with fuse_reply_buf. -- * -- * If the size is too small for the value, the ERANGE error should -- * be sent. -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -- * future getxattr() requests will fail with EOPNOTSUPP without being -- * send to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_buf -- * fuse_reply_data -- * fuse_reply_xattr -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param name of the extended attribute -- * @param size maximum size of the value to send -- */ -- void (*getxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, -- size_t size); -- -- /** -- * List extended attribute names -- * -- * If size is zero, the total size of the attribute list should be -- * sent with fuse_reply_xattr. -- * -- * If the size is non-zero, and the null character separated -- * attribute list fits in the buffer, the list should be sent with -- * fuse_reply_buf. -- * -- * If the size is too small for the list, the ERANGE error should -- * be sent. -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -- * future listxattr() requests will fail with EOPNOTSUPP without being -- * send to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_buf -- * fuse_reply_data -- * fuse_reply_xattr -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param size maximum size of the list to send -- */ -- void (*listxattr) (fuse_req_t req, fuse_ino_t ino, size_t size); -- -- /** -- * Remove an extended attribute -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -- * future removexattr() requests will fail with EOPNOTSUPP without being -- * send to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param name of the extended attribute -- */ -- void (*removexattr) (fuse_req_t req, fuse_ino_t ino, const char *name); -- -- /** -- * Check file access permissions -- * -- * This will be called for the access() and chdir() system -- * calls. If the 'default_permissions' mount option is given, -- * this method is not called. -- * -- * This method is not called under Linux kernel versions 2.4.x -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent success, i.e. this and all future access() -- * requests will succeed without being send to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param mask requested access mode -- */ -- void (*access) (fuse_req_t req, fuse_ino_t ino, int mask); -- -- /** -- * Create and open a file -- * -- * If the file does not exist, first create it with the specified -- * mode, and then open it. -- * -- * See the description of the open handler for more -- * information. -- * -- * If this method is not implemented or under Linux kernel -- * versions earlier than 2.6.15, the mknod() and open() methods -- * will be called instead. -- * -- * If this request is answered with an error code of ENOSYS, the handler -- * is treated as not implemented (i.e., for this and future requests the -- * mknod() and open() handlers will be called instead). -- * -- * Valid replies: -- * fuse_reply_create -- * fuse_reply_err -- * -- * @param req request handle -- * @param parent inode number of the parent directory -- * @param name to create -- * @param mode file type and mode with which to create the new file -- * @param fi file information -- */ -- void (*create) (fuse_req_t req, fuse_ino_t parent, const char *name, -- mode_t mode, struct fuse_file_info *fi); -- -- /** -- * Test for a POSIX file lock -- * -- * Valid replies: -- * fuse_reply_lock -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- * @param lock the region/type to test -- */ -- void (*getlk) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi, struct flock *lock); -- -- /** -- * Acquire, modify or release a POSIX file lock -- * -- * For POSIX threads (NPTL) there's a 1-1 relation between pid and -- * owner, but otherwise this is not always the case. For checking -- * lock ownership, 'fi->owner' must be used. The l_pid field in -- * 'struct flock' should only be used to fill in this field in -- * getlk(). -- * -- * Note: if the locking methods are not implemented, the kernel -- * will still allow file locking to work locally. Hence these are -- * only interesting for network filesystems and similar. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- * @param lock the region/type to set -- * @param sleep locking operation may sleep -- */ -- void (*setlk) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi, -- struct flock *lock, int sleep); -- -- /** -- * Map block index within file to block index within device -- * -- * Note: This makes sense only for block device backed filesystems -- * mounted with the 'blkdev' option -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure, i.e. all future bmap() requests will -- * fail with the same error code without being send to the filesystem -- * process. -- * -- * Valid replies: -- * fuse_reply_bmap -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param blocksize unit of block index -- * @param idx block index within file -- */ -- void (*bmap) (fuse_req_t req, fuse_ino_t ino, size_t blocksize, -- uint64_t idx); -- -- /** -- * Ioctl -- * -- * Note: For unrestricted ioctls (not allowed for FUSE -- * servers), data in and out areas can be discovered by giving -- * iovs and setting FUSE_IOCTL_RETRY in *flags*. For -- * restricted ioctls, kernel prepares in/out data area -- * according to the information encoded in cmd. -- * -- * Valid replies: -- * fuse_reply_ioctl_retry -- * fuse_reply_ioctl -- * fuse_reply_ioctl_iov -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param cmd ioctl command -- * @param arg ioctl argument -- * @param fi file information -- * @param flags for FUSE_IOCTL_* flags -- * @param in_buf data fetched from the caller -- * @param in_bufsz number of fetched bytes -- * @param out_bufsz maximum size of output data -- * -- * Note : the unsigned long request submitted by the application -- * is truncated to 32 bits. -- */ -- void (*ioctl) (fuse_req_t req, fuse_ino_t ino, unsigned int cmd, -- void *arg, struct fuse_file_info *fi, unsigned flags, -- const void *in_buf, size_t in_bufsz, size_t out_bufsz); -- -- /** -- * Poll for IO readiness -- * -- * Note: If ph is non-NULL, the client should notify -- * when IO readiness events occur by calling -- * fuse_lowlevel_notify_poll() with the specified ph. -- * -- * Regardless of the number of times poll with a non-NULL ph -- * is received, single notification is enough to clear all. -- * Notifying more times incurs overhead but doesn't harm -- * correctness. -- * -- * The callee is responsible for destroying ph with -- * fuse_pollhandle_destroy() when no longer in use. -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as success (with a kernel-defined default poll-mask) and -- * future calls to pull() will succeed the same way without being send -- * to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_poll -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- * @param ph poll handle to be used for notification -- */ -- void (*poll) (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -- struct fuse_pollhandle *ph); -- -- /** -- * Write data made available in a buffer -- * -- * This is a more generic version of the ->write() method. If -- * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the -- * kernel supports splicing from the fuse device, then the -- * data will be made available in pipe for supporting zero -- * copy data transfer. -- * -- * buf->count is guaranteed to be one (and thus buf->idx is -- * always zero). The write_buf handler must ensure that -- * bufv->off is correctly updated (reflecting the number of -- * bytes read from bufv->buf[0]). -- * -- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -- * expected to reset the setuid and setgid bits. -- * -- * Valid replies: -- * fuse_reply_write -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param bufv buffer containing the data -- * @param off offset to write to -- * @param fi file information -- */ -- void (*write_buf) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_bufvec *bufv, off_t off, -- struct fuse_file_info *fi); -- -- /** -- * Callback function for the retrieve request -- * -- * Valid replies: -- * fuse_reply_none -- * -- * @param req request handle -- * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() -- * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() -- * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() -- * @param bufv the buffer containing the returned data -- */ -- void (*retrieve_reply) (fuse_req_t req, void *cookie, fuse_ino_t ino, -- off_t offset, struct fuse_bufvec *bufv); -- -- /** -- * Forget about multiple inodes -- * -- * See description of the forget function for more -- * information. -- * -- * Valid replies: -- * fuse_reply_none -- * -- * @param req request handle -- */ -- void (*forget_multi) (fuse_req_t req, size_t count, -- struct fuse_forget_data *forgets); -- -- /** -- * Acquire, modify or release a BSD file lock -- * -- * Note: if the locking methods are not implemented, the kernel -- * will still allow file locking to work locally. Hence these are -- * only interesting for network filesystems and similar. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- * @param op the locking operation, see flock(2) -- */ -- void (*flock) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi, int op); -- -- /** -- * Allocate requested space. If this function returns success then -- * subsequent writes to the specified range shall not fail due to the lack -- * of free space on the file system storage media. -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -- * future fallocate() requests will fail with EOPNOTSUPP without being -- * send to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param offset starting point for allocated region -- * @param length size of allocated region -- * @param mode determines the operation to be performed on the given range, -- * see fallocate(2) -- */ -- void (*fallocate) (fuse_req_t req, fuse_ino_t ino, int mode, -- off_t offset, off_t length, struct fuse_file_info *fi); -- -- /** -- * Read directory with attributes -- * -- * Send a buffer filled using fuse_add_direntry_plus(), with size not -- * exceeding the requested size. Send an empty buffer on end of -- * stream. -- * -- * fi->fh will contain the value set by the opendir method, or -- * will be undefined if the opendir method didn't set any value. -- * -- * In contrast to readdir() (which does not affect the lookup counts), -- * the lookup count of every entry returned by readdirplus(), except "." -- * and "..", is incremented by one. -- * -- * Valid replies: -- * fuse_reply_buf -- * fuse_reply_data -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param size maximum number of bytes to send -- * @param off offset to continue reading the directory stream -- * @param fi file information -- */ -- void (*readdirplus) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -- struct fuse_file_info *fi); -- -- /** -- * Copy a range of data from one file to another -- * -- * Performs an optimized copy between two file descriptors without the -- * additional cost of transferring data through the FUSE kernel module -- * to user space (glibc) and then back into the FUSE filesystem again. -- * -- * In case this method is not implemented, glibc falls back to reading -- * data from the source and writing to the destination. Effectively -- * doing an inefficient copy of the data. -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -- * future copy_file_range() requests will fail with EOPNOTSUPP without -- * being send to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_write -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino_in the inode number or the source file -- * @param off_in starting point from were the data should be read -- * @param fi_in file information of the source file -- * @param ino_out the inode number or the destination file -- * @param off_out starting point where the data should be written -- * @param fi_out file information of the destination file -- * @param len maximum size of the data to copy -- * @param flags passed along with the copy_file_range() syscall -- */ -- void (*copy_file_range) (fuse_req_t req, fuse_ino_t ino_in, -- off_t off_in, struct fuse_file_info *fi_in, -- fuse_ino_t ino_out, off_t off_out, -- struct fuse_file_info *fi_out, size_t len, -- int flags); -- -- /** -- * Find next data or hole after the specified offset -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure, i.e. all future lseek() requests will -- * fail with the same error code without being send to the filesystem -- * process. -- * -- * Valid replies: -- * fuse_reply_lseek -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param off offset to start search from -- * @param whence either SEEK_DATA or SEEK_HOLE -- * @param fi file information -- */ -- void (*lseek) (fuse_req_t req, fuse_ino_t ino, off_t off, int whence, -- struct fuse_file_info *fi); -+ /** -+ * Initialize filesystem -+ * -+ * This function is called when libfuse establishes -+ * communication with the FUSE kernel module. The file system -+ * should use this module to inspect and/or modify the -+ * connection parameters provided in the `conn` structure. -+ * -+ * Note that some parameters may be overwritten by options -+ * passed to fuse_session_new() which take precedence over the -+ * values set in this handler. -+ * -+ * There's no reply to this function -+ * -+ * @param userdata the user data passed to fuse_session_new() -+ */ -+ void (*init)(void *userdata, struct fuse_conn_info *conn); -+ -+ /** -+ * Clean up filesystem. -+ * -+ * Called on filesystem exit. When this method is called, the -+ * connection to the kernel may be gone already, so that eg. calls -+ * to fuse_lowlevel_notify_* will fail. -+ * -+ * There's no reply to this function -+ * -+ * @param userdata the user data passed to fuse_session_new() -+ */ -+ void (*destroy)(void *userdata); -+ -+ /** -+ * Look up a directory entry by name and get its attributes. -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name the name to look up -+ */ -+ void (*lookup)(fuse_req_t req, fuse_ino_t parent, const char *name); -+ -+ /** -+ * Forget about an inode -+ * -+ * This function is called when the kernel removes an inode -+ * from its internal caches. -+ * -+ * The inode's lookup count increases by one for every call to -+ * fuse_reply_entry and fuse_reply_create. The nlookup parameter -+ * indicates by how much the lookup count should be decreased. -+ * -+ * Inodes with a non-zero lookup count may receive request from -+ * the kernel even after calls to unlink, rmdir or (when -+ * overwriting an existing file) rename. Filesystems must handle -+ * such requests properly and it is recommended to defer removal -+ * of the inode until the lookup count reaches zero. Calls to -+ * unlink, rmdir or rename will be followed closely by forget -+ * unless the file or directory is open, in which case the -+ * kernel issues forget only after the release or releasedir -+ * calls. -+ * -+ * Note that if a file system will be exported over NFS the -+ * inodes lifetime must extend even beyond forget. See the -+ * generation field in struct fuse_entry_param above. -+ * -+ * On unmount the lookup count for all inodes implicitly drops -+ * to zero. It is not guaranteed that the file system will -+ * receive corresponding forget messages for the affected -+ * inodes. -+ * -+ * Valid replies: -+ * fuse_reply_none -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param nlookup the number of lookups to forget -+ */ -+ void (*forget)(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup); -+ -+ /** -+ * Get file attributes. -+ * -+ * If writeback caching is enabled, the kernel may have a -+ * better idea of a file's length than the FUSE file system -+ * (eg if there has been a write that extended the file size, -+ * but that has not yet been passed to the filesystem.n -+ * -+ * In this case, the st_size value provided by the file system -+ * will be ignored. -+ * -+ * Valid replies: -+ * fuse_reply_attr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi for future use, currently always NULL -+ */ -+ void (*getattr)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); -+ -+ /** -+ * Set file attributes -+ * -+ * In the 'attr' argument only members indicated by the 'to_set' -+ * bitmask contain valid values. Other members contain undefined -+ * values. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits if the file -+ * size or owner is being changed. -+ * -+ * If the setattr was invoked from the ftruncate() system call -+ * under Linux kernel versions 2.6.15 or later, the fi->fh will -+ * contain the value set by the open method or will be undefined -+ * if the open method didn't set any value. Otherwise (not -+ * ftruncate call, or kernel version earlier than 2.6.15) the fi -+ * parameter will be NULL. -+ * -+ * Valid replies: -+ * fuse_reply_attr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param attr the attributes -+ * @param to_set bit mask of attributes which should be set -+ * @param fi file information, or NULL -+ */ -+ void (*setattr)(fuse_req_t req, fuse_ino_t ino, struct stat *attr, -+ int to_set, struct fuse_file_info *fi); -+ -+ /** -+ * Read symbolic link -+ * -+ * Valid replies: -+ * fuse_reply_readlink -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ */ -+ void (*readlink)(fuse_req_t req, fuse_ino_t ino); -+ -+ /** -+ * Create file node -+ * -+ * Create a regular file, character device, block device, fifo or -+ * socket node. -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to create -+ * @param mode file type and mode with which to create the new file -+ * @param rdev the device number (only valid if created file is a device) -+ */ -+ void (*mknod)(fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode, dev_t rdev); -+ -+ /** -+ * Create a directory -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to create -+ * @param mode with which to create the new file -+ */ -+ void (*mkdir)(fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode); -+ -+ /** -+ * Remove a file -+ * -+ * If the file's inode's lookup count is non-zero, the file -+ * system is expected to postpone any removal of the inode -+ * until the lookup count reaches zero (see description of the -+ * forget function). -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to remove -+ */ -+ void (*unlink)(fuse_req_t req, fuse_ino_t parent, const char *name); -+ -+ /** -+ * Remove a directory -+ * -+ * If the directory's inode's lookup count is non-zero, the -+ * file system is expected to postpone any removal of the -+ * inode until the lookup count reaches zero (see description -+ * of the forget function). -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to remove -+ */ -+ void (*rmdir)(fuse_req_t req, fuse_ino_t parent, const char *name); -+ -+ /** -+ * Create a symbolic link -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param link the contents of the symbolic link -+ * @param parent inode number of the parent directory -+ * @param name to create -+ */ -+ void (*symlink)(fuse_req_t req, const char *link, fuse_ino_t parent, -+ const char *name); -+ -+ /** -+ * Rename a file -+ * -+ * If the target exists it should be atomically replaced. If -+ * the target's inode's lookup count is non-zero, the file -+ * system is expected to postpone any removal of the inode -+ * until the lookup count reaches zero (see description of the -+ * forget function). -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EINVAL, i.e. all -+ * future bmap requests will fail with EINVAL without being -+ * send to the filesystem process. -+ * -+ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If -+ * RENAME_NOREPLACE is specified, the filesystem must not -+ * overwrite *newname* if it exists and return an error -+ * instead. If `RENAME_EXCHANGE` is specified, the filesystem -+ * must atomically exchange the two files, i.e. both must -+ * exist and neither may be deleted. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the old parent directory -+ * @param name old name -+ * @param newparent inode number of the new parent directory -+ * @param newname new name -+ */ -+ void (*rename)(fuse_req_t req, fuse_ino_t parent, const char *name, -+ fuse_ino_t newparent, const char *newname, -+ unsigned int flags); -+ -+ /** -+ * Create a hard link -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the old inode number -+ * @param newparent inode number of the new parent directory -+ * @param newname new name to create -+ */ -+ void (*link)(fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, -+ const char *newname); -+ -+ /** -+ * Open a file -+ * -+ * Open flags are available in fi->flags. The following rules -+ * apply. -+ * -+ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be -+ * filtered out / handled by the kernel. -+ * -+ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used -+ * by the filesystem to check if the operation is -+ * permitted. If the ``-o default_permissions`` mount -+ * option is given, this check is already done by the -+ * kernel before calling open() and may thus be omitted by -+ * the filesystem. -+ * -+ * - When writeback caching is enabled, the kernel may send -+ * read requests even for files opened with O_WRONLY. The -+ * filesystem should be prepared to handle this. -+ * -+ * - When writeback caching is disabled, the filesystem is -+ * expected to properly handle the O_APPEND flag and ensure -+ * that each write is appending to the end of the file. -+ * -+ * - When writeback caching is enabled, the kernel will -+ * handle O_APPEND. However, unless all changes to the file -+ * come through the kernel this will not work reliably. The -+ * filesystem should thus either ignore the O_APPEND flag -+ * (and let the kernel handle it), or return an error -+ * (indicating that reliably O_APPEND is not available). -+ * -+ * Filesystem may store an arbitrary file handle (pointer, -+ * index, etc) in fi->fh, and use this in other all other file -+ * operations (read, write, flush, release, fsync). -+ * -+ * Filesystem may also implement stateless file I/O and not store -+ * anything in fi->fh. -+ * -+ * There are also some flags (direct_io, keep_cache) which the -+ * filesystem may set in fi, to change the way the file is opened. -+ * See fuse_file_info structure in for more details. -+ * -+ * If this request is answered with an error code of ENOSYS -+ * and FUSE_CAP_NO_OPEN_SUPPORT is set in -+ * `fuse_conn_info.capable`, this is treated as success and -+ * future calls to open and release will also succeed without being -+ * sent to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_open -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*open)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); -+ -+ /** -+ * Read data -+ * -+ * Read should send exactly the number of bytes requested except -+ * on EOF or error, otherwise the rest of the data will be -+ * substituted with zeroes. An exception to this is when the file -+ * has been opened in 'direct_io' mode, in which case the return -+ * value of the read system call will reflect the return value of -+ * this operation. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_iov -+ * fuse_reply_data -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size number of bytes to read -+ * @param off offset to read from -+ * @param fi file information -+ */ -+ void (*read)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Write data -+ * -+ * Write should return exactly the number of bytes requested -+ * except on error. An exception to this is when the file has -+ * been opened in 'direct_io' mode, in which case the return value -+ * of the write system call will reflect the return value of this -+ * operation. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * -+ * Valid replies: -+ * fuse_reply_write -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param buf data to write -+ * @param size number of bytes to write -+ * @param off offset to write to -+ * @param fi file information -+ */ -+ void (*write)(fuse_req_t req, fuse_ino_t ino, const char *buf, size_t size, -+ off_t off, struct fuse_file_info *fi); -+ -+ /** -+ * Flush method -+ * -+ * This is called on each close() of the opened file. -+ * -+ * Since file descriptors can be duplicated (dup, dup2, fork), for -+ * one open call there may be many flush calls. -+ * -+ * Filesystems shouldn't assume that flush will always be called -+ * after some writes, or that if will be called at all. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * -+ * NOTE: the name of the method is misleading, since (unlike -+ * fsync) the filesystem is not forced to flush pending writes. -+ * One reason to flush data is if the filesystem wants to return -+ * write errors during close. However, such use is non-portable -+ * because POSIX does not require [close] to wait for delayed I/O to -+ * complete. -+ * -+ * If the filesystem supports file locking operations (setlk, -+ * getlk) it should remove all locks belonging to 'fi->owner'. -+ * -+ * If this request is answered with an error code of ENOSYS, -+ * this is treated as success and future calls to flush() will -+ * succeed automatically without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * -+ * [close]: -+ * http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html -+ */ -+ void (*flush)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); -+ -+ /** -+ * Release an open file -+ * -+ * Release is called when there are no more references to an open -+ * file: all file descriptors are closed and all memory mappings -+ * are unmapped. -+ * -+ * For every open call there will be exactly one release call (unless -+ * the filesystem is force-unmounted). -+ * -+ * The filesystem may reply with an error, but error values are -+ * not returned to close() or munmap() which triggered the -+ * release. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * fi->flags will contain the same flags as for open. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*release)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); -+ -+ /** -+ * Synchronize file contents -+ * -+ * If the datasync parameter is non-zero, then only the user data -+ * should be flushed, not the meta data. -+ * -+ * If this request is answered with an error code of ENOSYS, -+ * this is treated as success and future calls to fsync() will -+ * succeed automatically without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param datasync flag indicating if only data should be flushed -+ * @param fi file information -+ */ -+ void (*fsync)(fuse_req_t req, fuse_ino_t ino, int datasync, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Open a directory -+ * -+ * Filesystem may store an arbitrary file handle (pointer, index, -+ * etc) in fi->fh, and use this in other all other directory -+ * stream operations (readdir, releasedir, fsyncdir). -+ * -+ * If this request is answered with an error code of ENOSYS and -+ * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`, -+ * this is treated as success and future calls to opendir and -+ * releasedir will also succeed without being sent to the filesystem -+ * process. In addition, the kernel will cache readdir results -+ * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR. -+ * -+ * Valid replies: -+ * fuse_reply_open -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*opendir)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); -+ -+ /** -+ * Read directory -+ * -+ * Send a buffer filled using fuse_add_direntry(), with size not -+ * exceeding the requested size. Send an empty buffer on end of -+ * stream. -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * Returning a directory entry from readdir() does not affect -+ * its lookup count. -+ * -+ * If off_t is non-zero, then it will correspond to one of the off_t -+ * values that was previously returned by readdir() for the same -+ * directory handle. In this case, readdir() should skip over entries -+ * coming before the position defined by the off_t value. If entries -+ * are added or removed while the directory handle is open, they filesystem -+ * may still include the entries that have been removed, and may not -+ * report the entries that have been created. However, addition or -+ * removal of entries must never cause readdir() to skip over unrelated -+ * entries or to report them more than once. This means -+ * that off_t can not be a simple index that enumerates the entries -+ * that have been returned but must contain sufficient information to -+ * uniquely determine the next directory entry to return even when the -+ * set of entries is changing. -+ * -+ * The function does not have to report the '.' and '..' -+ * entries, but is allowed to do so. Note that, if readdir does -+ * not return '.' or '..', they will not be implicitly returned, -+ * and this behavior is observable by the caller. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size maximum number of bytes to send -+ * @param off offset to continue reading the directory stream -+ * @param fi file information -+ */ -+ void (*readdir)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Release an open directory -+ * -+ * For every opendir call there will be exactly one releasedir -+ * call (unless the filesystem is force-unmounted). -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*releasedir)(fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Synchronize directory contents -+ * -+ * If the datasync parameter is non-zero, then only the directory -+ * contents should be flushed, not the meta data. -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * If this request is answered with an error code of ENOSYS, -+ * this is treated as success and future calls to fsyncdir() will -+ * succeed automatically without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param datasync flag indicating if only data should be flushed -+ * @param fi file information -+ */ -+ void (*fsyncdir)(fuse_req_t req, fuse_ino_t ino, int datasync, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Get file system statistics -+ * -+ * Valid replies: -+ * fuse_reply_statfs -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number, zero means "undefined" -+ */ -+ void (*statfs)(fuse_req_t req, fuse_ino_t ino); -+ -+ /** -+ * Set an extended attribute -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future setxattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ */ -+ void (*setxattr)(fuse_req_t req, fuse_ino_t ino, const char *name, -+ const char *value, size_t size, int flags); -+ -+ /** -+ * Get an extended attribute -+ * -+ * If size is zero, the size of the value should be sent with -+ * fuse_reply_xattr. -+ * -+ * If the size is non-zero, and the value fits in the buffer, the -+ * value should be sent with fuse_reply_buf. -+ * -+ * If the size is too small for the value, the ERANGE error should -+ * be sent. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future getxattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_xattr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param name of the extended attribute -+ * @param size maximum size of the value to send -+ */ -+ void (*getxattr)(fuse_req_t req, fuse_ino_t ino, const char *name, -+ size_t size); -+ -+ /** -+ * List extended attribute names -+ * -+ * If size is zero, the total size of the attribute list should be -+ * sent with fuse_reply_xattr. -+ * -+ * If the size is non-zero, and the null character separated -+ * attribute list fits in the buffer, the list should be sent with -+ * fuse_reply_buf. -+ * -+ * If the size is too small for the list, the ERANGE error should -+ * be sent. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future listxattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_xattr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size maximum size of the list to send -+ */ -+ void (*listxattr)(fuse_req_t req, fuse_ino_t ino, size_t size); -+ -+ /** -+ * Remove an extended attribute -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future removexattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param name of the extended attribute -+ */ -+ void (*removexattr)(fuse_req_t req, fuse_ino_t ino, const char *name); -+ -+ /** -+ * Check file access permissions -+ * -+ * This will be called for the access() and chdir() system -+ * calls. If the 'default_permissions' mount option is given, -+ * this method is not called. -+ * -+ * This method is not called under Linux kernel versions 2.4.x -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent success, i.e. this and all future access() -+ * requests will succeed without being send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param mask requested access mode -+ */ -+ void (*access)(fuse_req_t req, fuse_ino_t ino, int mask); -+ -+ /** -+ * Create and open a file -+ * -+ * If the file does not exist, first create it with the specified -+ * mode, and then open it. -+ * -+ * See the description of the open handler for more -+ * information. -+ * -+ * If this method is not implemented or under Linux kernel -+ * versions earlier than 2.6.15, the mknod() and open() methods -+ * will be called instead. -+ * -+ * If this request is answered with an error code of ENOSYS, the handler -+ * is treated as not implemented (i.e., for this and future requests the -+ * mknod() and open() handlers will be called instead). -+ * -+ * Valid replies: -+ * fuse_reply_create -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to create -+ * @param mode file type and mode with which to create the new file -+ * @param fi file information -+ */ -+ void (*create)(fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode, struct fuse_file_info *fi); -+ -+ /** -+ * Test for a POSIX file lock -+ * -+ * Valid replies: -+ * fuse_reply_lock -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param lock the region/type to test -+ */ -+ void (*getlk)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ struct flock *lock); -+ -+ /** -+ * Acquire, modify or release a POSIX file lock -+ * -+ * For POSIX threads (NPTL) there's a 1-1 relation between pid and -+ * owner, but otherwise this is not always the case. For checking -+ * lock ownership, 'fi->owner' must be used. The l_pid field in -+ * 'struct flock' should only be used to fill in this field in -+ * getlk(). -+ * -+ * Note: if the locking methods are not implemented, the kernel -+ * will still allow file locking to work locally. Hence these are -+ * only interesting for network filesystems and similar. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param lock the region/type to set -+ * @param sleep locking operation may sleep -+ */ -+ void (*setlk)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ struct flock *lock, int sleep); -+ -+ /** -+ * Map block index within file to block index within device -+ * -+ * Note: This makes sense only for block device backed filesystems -+ * mounted with the 'blkdev' option -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure, i.e. all future bmap() requests will -+ * fail with the same error code without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_bmap -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param blocksize unit of block index -+ * @param idx block index within file -+ */ -+ void (*bmap)(fuse_req_t req, fuse_ino_t ino, size_t blocksize, -+ uint64_t idx); -+ -+ /** -+ * Ioctl -+ * -+ * Note: For unrestricted ioctls (not allowed for FUSE -+ * servers), data in and out areas can be discovered by giving -+ * iovs and setting FUSE_IOCTL_RETRY in *flags*. For -+ * restricted ioctls, kernel prepares in/out data area -+ * according to the information encoded in cmd. -+ * -+ * Valid replies: -+ * fuse_reply_ioctl_retry -+ * fuse_reply_ioctl -+ * fuse_reply_ioctl_iov -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param cmd ioctl command -+ * @param arg ioctl argument -+ * @param fi file information -+ * @param flags for FUSE_IOCTL_* flags -+ * @param in_buf data fetched from the caller -+ * @param in_bufsz number of fetched bytes -+ * @param out_bufsz maximum size of output data -+ * -+ * Note : the unsigned long request submitted by the application -+ * is truncated to 32 bits. -+ */ -+ void (*ioctl)(fuse_req_t req, fuse_ino_t ino, unsigned int cmd, void *arg, -+ struct fuse_file_info *fi, unsigned flags, const void *in_buf, -+ size_t in_bufsz, size_t out_bufsz); -+ -+ /** -+ * Poll for IO readiness -+ * -+ * Note: If ph is non-NULL, the client should notify -+ * when IO readiness events occur by calling -+ * fuse_lowlevel_notify_poll() with the specified ph. -+ * -+ * Regardless of the number of times poll with a non-NULL ph -+ * is received, single notification is enough to clear all. -+ * Notifying more times incurs overhead but doesn't harm -+ * correctness. -+ * -+ * The callee is responsible for destroying ph with -+ * fuse_pollhandle_destroy() when no longer in use. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as success (with a kernel-defined default poll-mask) and -+ * future calls to pull() will succeed the same way without being send -+ * to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_poll -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param ph poll handle to be used for notification -+ */ -+ void (*poll)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ struct fuse_pollhandle *ph); -+ -+ /** -+ * Write data made available in a buffer -+ * -+ * This is a more generic version of the ->write() method. If -+ * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the -+ * kernel supports splicing from the fuse device, then the -+ * data will be made available in pipe for supporting zero -+ * copy data transfer. -+ * -+ * buf->count is guaranteed to be one (and thus buf->idx is -+ * always zero). The write_buf handler must ensure that -+ * bufv->off is correctly updated (reflecting the number of -+ * bytes read from bufv->buf[0]). -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ * -+ * Valid replies: -+ * fuse_reply_write -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param bufv buffer containing the data -+ * @param off offset to write to -+ * @param fi file information -+ */ -+ void (*write_buf)(fuse_req_t req, fuse_ino_t ino, struct fuse_bufvec *bufv, -+ off_t off, struct fuse_file_info *fi); -+ -+ /** -+ * Callback function for the retrieve request -+ * -+ * Valid replies: -+ * fuse_reply_none -+ * -+ * @param req request handle -+ * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() -+ * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() -+ * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() -+ * @param bufv the buffer containing the returned data -+ */ -+ void (*retrieve_reply)(fuse_req_t req, void *cookie, fuse_ino_t ino, -+ off_t offset, struct fuse_bufvec *bufv); -+ -+ /** -+ * Forget about multiple inodes -+ * -+ * See description of the forget function for more -+ * information. -+ * -+ * Valid replies: -+ * fuse_reply_none -+ * -+ * @param req request handle -+ */ -+ void (*forget_multi)(fuse_req_t req, size_t count, -+ struct fuse_forget_data *forgets); -+ -+ /** -+ * Acquire, modify or release a BSD file lock -+ * -+ * Note: if the locking methods are not implemented, the kernel -+ * will still allow file locking to work locally. Hence these are -+ * only interesting for network filesystems and similar. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param op the locking operation, see flock(2) -+ */ -+ void (*flock)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ int op); -+ -+ /** -+ * Allocate requested space. If this function returns success then -+ * subsequent writes to the specified range shall not fail due to the lack -+ * of free space on the file system storage media. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future fallocate() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param offset starting point for allocated region -+ * @param length size of allocated region -+ * @param mode determines the operation to be performed on the given range, -+ * see fallocate(2) -+ */ -+ void (*fallocate)(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, -+ off_t length, struct fuse_file_info *fi); -+ -+ /** -+ * Read directory with attributes -+ * -+ * Send a buffer filled using fuse_add_direntry_plus(), with size not -+ * exceeding the requested size. Send an empty buffer on end of -+ * stream. -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * In contrast to readdir() (which does not affect the lookup counts), -+ * the lookup count of every entry returned by readdirplus(), except "." -+ * and "..", is incremented by one. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size maximum number of bytes to send -+ * @param off offset to continue reading the directory stream -+ * @param fi file information -+ */ -+ void (*readdirplus)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Copy a range of data from one file to another -+ * -+ * Performs an optimized copy between two file descriptors without the -+ * additional cost of transferring data through the FUSE kernel module -+ * to user space (glibc) and then back into the FUSE filesystem again. -+ * -+ * In case this method is not implemented, glibc falls back to reading -+ * data from the source and writing to the destination. Effectively -+ * doing an inefficient copy of the data. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future copy_file_range() requests will fail with EOPNOTSUPP without -+ * being send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_write -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino_in the inode number or the source file -+ * @param off_in starting point from were the data should be read -+ * @param fi_in file information of the source file -+ * @param ino_out the inode number or the destination file -+ * @param off_out starting point where the data should be written -+ * @param fi_out file information of the destination file -+ * @param len maximum size of the data to copy -+ * @param flags passed along with the copy_file_range() syscall -+ */ -+ void (*copy_file_range)(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, -+ struct fuse_file_info *fi_in, fuse_ino_t ino_out, -+ off_t off_out, struct fuse_file_info *fi_out, -+ size_t len, int flags); -+ -+ /** -+ * Find next data or hole after the specified offset -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure, i.e. all future lseek() requests will -+ * fail with the same error code without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_lseek -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param off offset to start search from -+ * @param whence either SEEK_DATA or SEEK_HOLE -+ * @param fi file information -+ */ -+ void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, -+ struct fuse_file_info *fi); - }; - - /** -@@ -1305,7 +1307,7 @@ int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e); - * @return zero for success, -errno for failure to send reply - */ - int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, -- const struct fuse_file_info *fi); -+ const struct fuse_file_info *fi); - - /** - * Reply with attributes -@@ -1315,11 +1317,11 @@ int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, - * - * @param req request handle - * @param attr the attributes -- * @param attr_timeout validity timeout (in seconds) for the attributes -+ * @param attr_timeout validity timeout (in seconds) for the attributes - * @return zero for success, -errno for failure to send reply - */ - int fuse_reply_attr(fuse_req_t req, const struct stat *attr, -- double attr_timeout); -+ double attr_timeout); - - /** - * Reply with the contents of a symbolic link -@@ -1417,7 +1419,7 @@ int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); - * @return zero for success, -errno for failure to send reply - */ - int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags); -+ enum fuse_buf_copy_flags flags); - - /** - * Reply with data vector -@@ -1480,9 +1482,9 @@ int fuse_reply_lock(fuse_req_t req, const struct flock *lock); - */ - int fuse_reply_bmap(fuse_req_t req, uint64_t idx); - --/* ----------------------------------------------------------- * -- * Filling a buffer in readdir * -- * ----------------------------------------------------------- */ -+/* -+ * Filling a buffer in readdir -+ */ - - /** - * Add a directory entry to the buffer -@@ -1512,8 +1514,7 @@ int fuse_reply_bmap(fuse_req_t req, uint64_t idx); - * @return the space needed for the entry - */ - size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, -- const char *name, const struct stat *stbuf, -- off_t off); -+ const char *name, const struct stat *stbuf, off_t off); - - /** - * Add a directory entry to the buffer with the attributes -@@ -1529,8 +1530,8 @@ size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, - * @return the space needed for the entry - */ - size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, -- const char *name, -- const struct fuse_entry_param *e, off_t off); -+ const char *name, -+ const struct fuse_entry_param *e, off_t off); - - /** - * Reply to ask for data fetch and output buffer preparation. ioctl -@@ -1547,9 +1548,9 @@ size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, - * @param out_count number of entries in out_iov - * @return zero for success, -errno for failure to send reply - */ --int fuse_reply_ioctl_retry(fuse_req_t req, -- const struct iovec *in_iov, size_t in_count, -- const struct iovec *out_iov, size_t out_count); -+int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov, -+ size_t in_count, const struct iovec *out_iov, -+ size_t out_count); - - /** - * Reply to finish ioctl -@@ -1576,7 +1577,7 @@ int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size); - * @param count the size of vector - */ - int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, -- int count); -+ int count); - - /** - * Reply with poll result event mask -@@ -1598,9 +1599,9 @@ int fuse_reply_poll(fuse_req_t req, unsigned revents); - */ - int fuse_reply_lseek(fuse_req_t req, off_t off); - --/* ----------------------------------------------------------- * -- * Notification * -- * ----------------------------------------------------------- */ -+/* -+ * Notification -+ */ - - /** - * Notify IO readiness event -@@ -1635,7 +1636,7 @@ int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph); - * @return zero for success, -errno for failure - */ - int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, -- off_t off, off_t len); -+ off_t off, off_t len); - - /** - * Notify to invalidate parent attributes and the dentry matching -@@ -1663,7 +1664,7 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, - * @return zero for success, -errno for failure - */ - int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, -- const char *name, size_t namelen); -+ const char *name, size_t namelen); - - /** - * This function behaves like fuse_lowlevel_notify_inval_entry() with -@@ -1693,9 +1694,9 @@ int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, - * @param namelen strlen() of file name - * @return zero for success, -errno for failure - */ --int fuse_lowlevel_notify_delete(struct fuse_session *se, -- fuse_ino_t parent, fuse_ino_t child, -- const char *name, size_t namelen); -+int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, -+ fuse_ino_t child, const char *name, -+ size_t namelen); - - /** - * Store data to the kernel buffers -@@ -1723,8 +1724,8 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, - * @return zero for success, -errno for failure - */ - int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, -- off_t offset, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags); -+ off_t offset, struct fuse_bufvec *bufv, -+ enum fuse_buf_copy_flags flags); - /** - * Retrieve data from the kernel buffers - * -@@ -1755,12 +1756,12 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, - * @return zero for success, -errno for failure - */ - int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, -- size_t size, off_t offset, void *cookie); -+ size_t size, off_t offset, void *cookie); - - --/* ----------------------------------------------------------- * -- * Utility functions * -- * ----------------------------------------------------------- */ -+/* -+ * Utility functions -+ */ - - /** - * Get the userdata from the request -@@ -1822,7 +1823,7 @@ typedef void (*fuse_interrupt_func_t)(fuse_req_t req, void *data); - * @param data user data passed to the callback function - */ - void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, -- void *data); -+ void *data); - - /** - * Check if a request has already been interrupted -@@ -1833,9 +1834,9 @@ void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, - int fuse_req_interrupted(fuse_req_t req); - - --/* ----------------------------------------------------------- * -- * Inquiry functions * -- * ----------------------------------------------------------- */ -+/* -+ * Inquiry functions -+ */ - - /** - * Print low-level version information to stdout. -@@ -1854,18 +1855,18 @@ void fuse_lowlevel_help(void); - */ - void fuse_cmdline_help(void); - --/* ----------------------------------------------------------- * -- * Filesystem setup & teardown * -- * ----------------------------------------------------------- */ -+/* -+ * Filesystem setup & teardown -+ */ - - struct fuse_cmdline_opts { -- int foreground; -- int debug; -- int nodefault_subtype; -- char *mountpoint; -- int show_version; -- int show_help; -- unsigned int max_idle_threads; -+ int foreground; -+ int debug; -+ int nodefault_subtype; -+ char *mountpoint; -+ int show_version; -+ int show_help; -+ unsigned int max_idle_threads; - }; - - /** -@@ -1886,8 +1887,7 @@ struct fuse_cmdline_opts { - * @param opts output argument for parsed options - * @return 0 on success, -1 on failure - */ --int fuse_parse_cmdline(struct fuse_args *args, -- struct fuse_cmdline_opts *opts); -+int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts); - - /** - * Create a low level session. -@@ -1918,8 +1918,8 @@ int fuse_parse_cmdline(struct fuse_args *args, - * @return the fuse session on success, NULL on failure - **/ - struct fuse_session *fuse_session_new(struct fuse_args *args, -- const struct fuse_lowlevel_ops *op, -- size_t op_size, void *userdata); -+ const struct fuse_lowlevel_ops *op, -+ size_t op_size, void *userdata); - - /** - * Mount a FUSE file system. -@@ -2014,9 +2014,9 @@ void fuse_session_unmount(struct fuse_session *se); - */ - void fuse_session_destroy(struct fuse_session *se); - --/* ----------------------------------------------------------- * -- * Custom event loop support * -- * ----------------------------------------------------------- */ -+/* -+ * Custom event loop support -+ */ - - /** - * Return file descriptor for communication with kernel. -@@ -2043,7 +2043,7 @@ int fuse_session_fd(struct fuse_session *se); - * @param buf the fuse_buf containing the request - */ - void fuse_session_process_buf(struct fuse_session *se, -- const struct fuse_buf *buf); -+ const struct fuse_buf *buf); - - /** - * Read a raw request from the kernel into the supplied buffer. -diff --git a/tools/virtiofsd/fuse_misc.h b/tools/virtiofsd/fuse_misc.h -index 2f6663e..f252baa 100644 ---- a/tools/virtiofsd/fuse_misc.h -+++ b/tools/virtiofsd/fuse_misc.h -@@ -1,18 +1,18 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ - - #include - - /* -- Versioned symbols cannot be used in some cases because it -- - confuse the dynamic linker in uClibc -- - not supported on MacOSX (in MachO binary format) --*/ -+ * Versioned symbols cannot be used in some cases because it -+ * - confuse the dynamic linker in uClibc -+ * - not supported on MacOSX (in MachO binary format) -+ */ - #if (!defined(__UCLIBC__) && !defined(__APPLE__)) - #define FUSE_SYMVER(x) __asm__(x) - #else -@@ -25,11 +25,11 @@ - /* Is this hack still needed? */ - static inline void fuse_mutex_init(pthread_mutex_t *mut) - { -- pthread_mutexattr_t attr; -- pthread_mutexattr_init(&attr); -- pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); -- pthread_mutex_init(mut, &attr); -- pthread_mutexattr_destroy(&attr); -+ pthread_mutexattr_t attr; -+ pthread_mutexattr_init(&attr); -+ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); -+ pthread_mutex_init(mut, &attr); -+ pthread_mutexattr_destroy(&attr); - } - #endif - -diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c -index 93066b9..edd36f4 100644 ---- a/tools/virtiofsd/fuse_opt.c -+++ b/tools/virtiofsd/fuse_opt.c -@@ -1,423 +1,450 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- Implementation of option parsing routines (dealing with `struct -- fuse_args`). -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * Implementation of option parsing routines (dealing with `struct -+ * fuse_args`). -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ - -+#include "fuse_opt.h" - #include "config.h" - #include "fuse_i.h" --#include "fuse_opt.h" - #include "fuse_misc.h" - -+#include - #include - #include - #include --#include - - struct fuse_opt_context { -- void *data; -- const struct fuse_opt *opt; -- fuse_opt_proc_t proc; -- int argctr; -- int argc; -- char **argv; -- struct fuse_args outargs; -- char *opts; -- int nonopt; -+ void *data; -+ const struct fuse_opt *opt; -+ fuse_opt_proc_t proc; -+ int argctr; -+ int argc; -+ char **argv; -+ struct fuse_args outargs; -+ char *opts; -+ int nonopt; - }; - - void fuse_opt_free_args(struct fuse_args *args) - { -- if (args) { -- if (args->argv && args->allocated) { -- int i; -- for (i = 0; i < args->argc; i++) -- free(args->argv[i]); -- free(args->argv); -- } -- args->argc = 0; -- args->argv = NULL; -- args->allocated = 0; -- } -+ if (args) { -+ if (args->argv && args->allocated) { -+ int i; -+ for (i = 0; i < args->argc; i++) { -+ free(args->argv[i]); -+ } -+ free(args->argv); -+ } -+ args->argc = 0; -+ args->argv = NULL; -+ args->allocated = 0; -+ } - } - - static int alloc_failed(void) - { -- fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -- return -1; -+ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -+ return -1; - } - - int fuse_opt_add_arg(struct fuse_args *args, const char *arg) - { -- char **newargv; -- char *newarg; -- -- assert(!args->argv || args->allocated); -- -- newarg = strdup(arg); -- if (!newarg) -- return alloc_failed(); -- -- newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *)); -- if (!newargv) { -- free(newarg); -- return alloc_failed(); -- } -- -- args->argv = newargv; -- args->allocated = 1; -- args->argv[args->argc++] = newarg; -- args->argv[args->argc] = NULL; -- return 0; -+ char **newargv; -+ char *newarg; -+ -+ assert(!args->argv || args->allocated); -+ -+ newarg = strdup(arg); -+ if (!newarg) { -+ return alloc_failed(); -+ } -+ -+ newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *)); -+ if (!newargv) { -+ free(newarg); -+ return alloc_failed(); -+ } -+ -+ args->argv = newargv; -+ args->allocated = 1; -+ args->argv[args->argc++] = newarg; -+ args->argv[args->argc] = NULL; -+ return 0; - } - - static int fuse_opt_insert_arg_common(struct fuse_args *args, int pos, -- const char *arg) -+ const char *arg) - { -- assert(pos <= args->argc); -- if (fuse_opt_add_arg(args, arg) == -1) -- return -1; -- -- if (pos != args->argc - 1) { -- char *newarg = args->argv[args->argc - 1]; -- memmove(&args->argv[pos + 1], &args->argv[pos], -- sizeof(char *) * (args->argc - pos - 1)); -- args->argv[pos] = newarg; -- } -- return 0; -+ assert(pos <= args->argc); -+ if (fuse_opt_add_arg(args, arg) == -1) { -+ return -1; -+ } -+ -+ if (pos != args->argc - 1) { -+ char *newarg = args->argv[args->argc - 1]; -+ memmove(&args->argv[pos + 1], &args->argv[pos], -+ sizeof(char *) * (args->argc - pos - 1)); -+ args->argv[pos] = newarg; -+ } -+ return 0; - } - - int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg) - { -- return fuse_opt_insert_arg_common(args, pos, arg); -+ return fuse_opt_insert_arg_common(args, pos, arg); - } - - static int next_arg(struct fuse_opt_context *ctx, const char *opt) - { -- if (ctx->argctr + 1 >= ctx->argc) { -- fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt); -- return -1; -- } -- ctx->argctr++; -- return 0; -+ if (ctx->argctr + 1 >= ctx->argc) { -+ fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt); -+ return -1; -+ } -+ ctx->argctr++; -+ return 0; - } - - static int add_arg(struct fuse_opt_context *ctx, const char *arg) - { -- return fuse_opt_add_arg(&ctx->outargs, arg); -+ return fuse_opt_add_arg(&ctx->outargs, arg); - } - - static int add_opt_common(char **opts, const char *opt, int esc) - { -- unsigned oldlen = *opts ? strlen(*opts) : 0; -- char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1); -- -- if (!d) -- return alloc_failed(); -- -- *opts = d; -- if (oldlen) { -- d += oldlen; -- *d++ = ','; -- } -- -- for (; *opt; opt++) { -- if (esc && (*opt == ',' || *opt == '\\')) -- *d++ = '\\'; -- *d++ = *opt; -- } -- *d = '\0'; -- -- return 0; -+ unsigned oldlen = *opts ? strlen(*opts) : 0; -+ char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1); -+ -+ if (!d) { -+ return alloc_failed(); -+ } -+ -+ *opts = d; -+ if (oldlen) { -+ d += oldlen; -+ *d++ = ','; -+ } -+ -+ for (; *opt; opt++) { -+ if (esc && (*opt == ',' || *opt == '\\')) { -+ *d++ = '\\'; -+ } -+ *d++ = *opt; -+ } -+ *d = '\0'; -+ -+ return 0; - } - - int fuse_opt_add_opt(char **opts, const char *opt) - { -- return add_opt_common(opts, opt, 0); -+ return add_opt_common(opts, opt, 0); - } - - int fuse_opt_add_opt_escaped(char **opts, const char *opt) - { -- return add_opt_common(opts, opt, 1); -+ return add_opt_common(opts, opt, 1); - } - - static int add_opt(struct fuse_opt_context *ctx, const char *opt) - { -- return add_opt_common(&ctx->opts, opt, 1); -+ return add_opt_common(&ctx->opts, opt, 1); - } - - static int call_proc(struct fuse_opt_context *ctx, const char *arg, int key, -- int iso) -+ int iso) - { -- if (key == FUSE_OPT_KEY_DISCARD) -- return 0; -- -- if (key != FUSE_OPT_KEY_KEEP && ctx->proc) { -- int res = ctx->proc(ctx->data, arg, key, &ctx->outargs); -- if (res == -1 || !res) -- return res; -- } -- if (iso) -- return add_opt(ctx, arg); -- else -- return add_arg(ctx, arg); -+ if (key == FUSE_OPT_KEY_DISCARD) { -+ return 0; -+ } -+ -+ if (key != FUSE_OPT_KEY_KEEP && ctx->proc) { -+ int res = ctx->proc(ctx->data, arg, key, &ctx->outargs); -+ if (res == -1 || !res) { -+ return res; -+ } -+ } -+ if (iso) { -+ return add_opt(ctx, arg); -+ } else { -+ return add_arg(ctx, arg); -+ } - } - - static int match_template(const char *t, const char *arg, unsigned *sepp) - { -- int arglen = strlen(arg); -- const char *sep = strchr(t, '='); -- sep = sep ? sep : strchr(t, ' '); -- if (sep && (!sep[1] || sep[1] == '%')) { -- int tlen = sep - t; -- if (sep[0] == '=') -- tlen ++; -- if (arglen >= tlen && strncmp(arg, t, tlen) == 0) { -- *sepp = sep - t; -- return 1; -- } -- } -- if (strcmp(t, arg) == 0) { -- *sepp = 0; -- return 1; -- } -- return 0; -+ int arglen = strlen(arg); -+ const char *sep = strchr(t, '='); -+ sep = sep ? sep : strchr(t, ' '); -+ if (sep && (!sep[1] || sep[1] == '%')) { -+ int tlen = sep - t; -+ if (sep[0] == '=') { -+ tlen++; -+ } -+ if (arglen >= tlen && strncmp(arg, t, tlen) == 0) { -+ *sepp = sep - t; -+ return 1; -+ } -+ } -+ if (strcmp(t, arg) == 0) { -+ *sepp = 0; -+ return 1; -+ } -+ return 0; - } - - static const struct fuse_opt *find_opt(const struct fuse_opt *opt, -- const char *arg, unsigned *sepp) -+ const char *arg, unsigned *sepp) - { -- for (; opt && opt->templ; opt++) -- if (match_template(opt->templ, arg, sepp)) -- return opt; -- return NULL; -+ for (; opt && opt->templ; opt++) { -+ if (match_template(opt->templ, arg, sepp)) { -+ return opt; -+ } -+ } -+ return NULL; - } - - int fuse_opt_match(const struct fuse_opt *opts, const char *opt) - { -- unsigned dummy; -- return find_opt(opts, opt, &dummy) ? 1 : 0; -+ unsigned dummy; -+ return find_opt(opts, opt, &dummy) ? 1 : 0; - } - - static int process_opt_param(void *var, const char *format, const char *param, -- const char *arg) -+ const char *arg) - { -- assert(format[0] == '%'); -- if (format[1] == 's') { -- char **s = var; -- char *copy = strdup(param); -- if (!copy) -- return alloc_failed(); -- -- free(*s); -- *s = copy; -- } else { -- if (sscanf(param, format, var) != 1) { -- fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n", arg); -- return -1; -- } -- } -- return 0; -+ assert(format[0] == '%'); -+ if (format[1] == 's') { -+ char **s = var; -+ char *copy = strdup(param); -+ if (!copy) { -+ return alloc_failed(); -+ } -+ -+ free(*s); -+ *s = copy; -+ } else { -+ if (sscanf(param, format, var) != 1) { -+ fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n", -+ arg); -+ return -1; -+ } -+ } -+ return 0; - } - --static int process_opt(struct fuse_opt_context *ctx, -- const struct fuse_opt *opt, unsigned sep, -- const char *arg, int iso) -+static int process_opt(struct fuse_opt_context *ctx, const struct fuse_opt *opt, -+ unsigned sep, const char *arg, int iso) - { -- if (opt->offset == -1U) { -- if (call_proc(ctx, arg, opt->value, iso) == -1) -- return -1; -- } else { -- void *var = (char *)ctx->data + opt->offset; -- if (sep && opt->templ[sep + 1]) { -- const char *param = arg + sep; -- if (opt->templ[sep] == '=') -- param ++; -- if (process_opt_param(var, opt->templ + sep + 1, -- param, arg) == -1) -- return -1; -- } else -- *(int *)var = opt->value; -- } -- return 0; -+ if (opt->offset == -1U) { -+ if (call_proc(ctx, arg, opt->value, iso) == -1) { -+ return -1; -+ } -+ } else { -+ void *var = (char *)ctx->data + opt->offset; -+ if (sep && opt->templ[sep + 1]) { -+ const char *param = arg + sep; -+ if (opt->templ[sep] == '=') { -+ param++; -+ } -+ if (process_opt_param(var, opt->templ + sep + 1, param, arg) == -+ -1) { -+ return -1; -+ } -+ } else { -+ *(int *)var = opt->value; -+ } -+ } -+ return 0; - } - - static int process_opt_sep_arg(struct fuse_opt_context *ctx, -- const struct fuse_opt *opt, unsigned sep, -- const char *arg, int iso) -+ const struct fuse_opt *opt, unsigned sep, -+ const char *arg, int iso) - { -- int res; -- char *newarg; -- char *param; -- -- if (next_arg(ctx, arg) == -1) -- return -1; -- -- param = ctx->argv[ctx->argctr]; -- newarg = malloc(sep + strlen(param) + 1); -- if (!newarg) -- return alloc_failed(); -- -- memcpy(newarg, arg, sep); -- strcpy(newarg + sep, param); -- res = process_opt(ctx, opt, sep, newarg, iso); -- free(newarg); -- -- return res; -+ int res; -+ char *newarg; -+ char *param; -+ -+ if (next_arg(ctx, arg) == -1) { -+ return -1; -+ } -+ -+ param = ctx->argv[ctx->argctr]; -+ newarg = malloc(sep + strlen(param) + 1); -+ if (!newarg) { -+ return alloc_failed(); -+ } -+ -+ memcpy(newarg, arg, sep); -+ strcpy(newarg + sep, param); -+ res = process_opt(ctx, opt, sep, newarg, iso); -+ free(newarg); -+ -+ return res; - } - - static int process_gopt(struct fuse_opt_context *ctx, const char *arg, int iso) - { -- unsigned sep; -- const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep); -- if (opt) { -- for (; opt; opt = find_opt(opt + 1, arg, &sep)) { -- int res; -- if (sep && opt->templ[sep] == ' ' && !arg[sep]) -- res = process_opt_sep_arg(ctx, opt, sep, arg, -- iso); -- else -- res = process_opt(ctx, opt, sep, arg, iso); -- if (res == -1) -- return -1; -- } -- return 0; -- } else -- return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso); -+ unsigned sep; -+ const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep); -+ if (opt) { -+ for (; opt; opt = find_opt(opt + 1, arg, &sep)) { -+ int res; -+ if (sep && opt->templ[sep] == ' ' && !arg[sep]) { -+ res = process_opt_sep_arg(ctx, opt, sep, arg, iso); -+ } else { -+ res = process_opt(ctx, opt, sep, arg, iso); -+ } -+ if (res == -1) { -+ return -1; -+ } -+ } -+ return 0; -+ } else { -+ return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso); -+ } - } - - static int process_real_option_group(struct fuse_opt_context *ctx, char *opts) - { -- char *s = opts; -- char *d = s; -- int end = 0; -- -- while (!end) { -- if (*s == '\0') -- end = 1; -- if (*s == ',' || end) { -- int res; -- -- *d = '\0'; -- res = process_gopt(ctx, opts, 1); -- if (res == -1) -- return -1; -- d = opts; -- } else { -- if (s[0] == '\\' && s[1] != '\0') { -- s++; -- if (s[0] >= '0' && s[0] <= '3' && -- s[1] >= '0' && s[1] <= '7' && -- s[2] >= '0' && s[2] <= '7') { -- *d++ = (s[0] - '0') * 0100 + -- (s[1] - '0') * 0010 + -- (s[2] - '0'); -- s += 2; -- } else { -- *d++ = *s; -- } -- } else { -- *d++ = *s; -- } -- } -- s++; -- } -- -- return 0; -+ char *s = opts; -+ char *d = s; -+ int end = 0; -+ -+ while (!end) { -+ if (*s == '\0') { -+ end = 1; -+ } -+ if (*s == ',' || end) { -+ int res; -+ -+ *d = '\0'; -+ res = process_gopt(ctx, opts, 1); -+ if (res == -1) { -+ return -1; -+ } -+ d = opts; -+ } else { -+ if (s[0] == '\\' && s[1] != '\0') { -+ s++; -+ if (s[0] >= '0' && s[0] <= '3' && s[1] >= '0' && s[1] <= '7' && -+ s[2] >= '0' && s[2] <= '7') { -+ *d++ = (s[0] - '0') * 0100 + (s[1] - '0') * 0010 + -+ (s[2] - '0'); -+ s += 2; -+ } else { -+ *d++ = *s; -+ } -+ } else { -+ *d++ = *s; -+ } -+ } -+ s++; -+ } -+ -+ return 0; - } - - static int process_option_group(struct fuse_opt_context *ctx, const char *opts) - { -- int res; -- char *copy = strdup(opts); -- -- if (!copy) { -- fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -- return -1; -- } -- res = process_real_option_group(ctx, copy); -- free(copy); -- return res; -+ int res; -+ char *copy = strdup(opts); -+ -+ if (!copy) { -+ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -+ return -1; -+ } -+ res = process_real_option_group(ctx, copy); -+ free(copy); -+ return res; - } - - static int process_one(struct fuse_opt_context *ctx, const char *arg) - { -- if (ctx->nonopt || arg[0] != '-') -- return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0); -- else if (arg[1] == 'o') { -- if (arg[2]) -- return process_option_group(ctx, arg + 2); -- else { -- if (next_arg(ctx, arg) == -1) -- return -1; -- -- return process_option_group(ctx, -- ctx->argv[ctx->argctr]); -- } -- } else if (arg[1] == '-' && !arg[2]) { -- if (add_arg(ctx, arg) == -1) -- return -1; -- ctx->nonopt = ctx->outargs.argc; -- return 0; -- } else -- return process_gopt(ctx, arg, 0); -+ if (ctx->nonopt || arg[0] != '-') { -+ return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0); -+ } else if (arg[1] == 'o') { -+ if (arg[2]) { -+ return process_option_group(ctx, arg + 2); -+ } else { -+ if (next_arg(ctx, arg) == -1) { -+ return -1; -+ } -+ -+ return process_option_group(ctx, ctx->argv[ctx->argctr]); -+ } -+ } else if (arg[1] == '-' && !arg[2]) { -+ if (add_arg(ctx, arg) == -1) { -+ return -1; -+ } -+ ctx->nonopt = ctx->outargs.argc; -+ return 0; -+ } else { -+ return process_gopt(ctx, arg, 0); -+ } - } - - static int opt_parse(struct fuse_opt_context *ctx) - { -- if (ctx->argc) { -- if (add_arg(ctx, ctx->argv[0]) == -1) -- return -1; -- } -- -- for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) -- if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) -- return -1; -- -- if (ctx->opts) { -- if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 || -- fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) -- return -1; -- } -- -- /* If option separator ("--") is the last argument, remove it */ -- if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc && -- strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) { -- free(ctx->outargs.argv[ctx->outargs.argc - 1]); -- ctx->outargs.argv[--ctx->outargs.argc] = NULL; -- } -- -- return 0; -+ if (ctx->argc) { -+ if (add_arg(ctx, ctx->argv[0]) == -1) { -+ return -1; -+ } -+ } -+ -+ for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) { -+ if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) { -+ return -1; -+ } -+ } -+ -+ if (ctx->opts) { -+ if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 || -+ fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) { -+ return -1; -+ } -+ } -+ -+ /* If option separator ("--") is the last argument, remove it */ -+ if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc && -+ strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) { -+ free(ctx->outargs.argv[ctx->outargs.argc - 1]); -+ ctx->outargs.argv[--ctx->outargs.argc] = NULL; -+ } -+ -+ return 0; - } - - int fuse_opt_parse(struct fuse_args *args, void *data, -- const struct fuse_opt opts[], fuse_opt_proc_t proc) -+ const struct fuse_opt opts[], fuse_opt_proc_t proc) - { -- int res; -- struct fuse_opt_context ctx = { -- .data = data, -- .opt = opts, -- .proc = proc, -- }; -- -- if (!args || !args->argv || !args->argc) -- return 0; -- -- ctx.argc = args->argc; -- ctx.argv = args->argv; -- -- res = opt_parse(&ctx); -- if (res != -1) { -- struct fuse_args tmp = *args; -- *args = ctx.outargs; -- ctx.outargs = tmp; -- } -- free(ctx.opts); -- fuse_opt_free_args(&ctx.outargs); -- return res; -+ int res; -+ struct fuse_opt_context ctx = { -+ .data = data, -+ .opt = opts, -+ .proc = proc, -+ }; -+ -+ if (!args || !args->argv || !args->argc) { -+ return 0; -+ } -+ -+ ctx.argc = args->argc; -+ ctx.argv = args->argv; -+ -+ res = opt_parse(&ctx); -+ if (res != -1) { -+ struct fuse_args tmp = *args; -+ *args = ctx.outargs; -+ ctx.outargs = tmp; -+ } -+ free(ctx.opts); -+ fuse_opt_free_args(&ctx.outargs); -+ return res; - } -diff --git a/tools/virtiofsd/fuse_opt.h b/tools/virtiofsd/fuse_opt.h -index 6910255..8f59b4d 100644 ---- a/tools/virtiofsd/fuse_opt.h -+++ b/tools/virtiofsd/fuse_opt.h -@@ -1,10 +1,10 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB. --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB. -+ */ - - #ifndef FUSE_OPT_H_ - #define FUSE_OPT_H_ -@@ -37,7 +37,7 @@ - * - * - 'offsetof(struct foo, member)' actions i) and iii) - * -- * - -1 action ii) -+ * - -1 action ii) - * - * The 'offsetof()' macro is defined in the header. - * -@@ -48,7 +48,7 @@ - * - * The types of templates are: - * -- * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only -+ * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only - * themselves. Invalid values are "--" and anything beginning - * with "-o" - * -@@ -71,58 +71,67 @@ - * freed. - */ - struct fuse_opt { -- /** Matching template and optional parameter formatting */ -- const char *templ; -+ /** Matching template and optional parameter formatting */ -+ const char *templ; - -- /** -- * Offset of variable within 'data' parameter of fuse_opt_parse() -- * or -1 -- */ -- unsigned long offset; -+ /** -+ * Offset of variable within 'data' parameter of fuse_opt_parse() -+ * or -1 -+ */ -+ unsigned long offset; - -- /** -- * Value to set the variable to, or to be passed as 'key' to the -- * processing function. Ignored if template has a format -- */ -- int value; -+ /** -+ * Value to set the variable to, or to be passed as 'key' to the -+ * processing function. Ignored if template has a format -+ */ -+ int value; - }; - - /** -- * Key option. In case of a match, the processing function will be -+ * Key option. In case of a match, the processing function will be - * called with the specified key. - */ --#define FUSE_OPT_KEY(templ, key) { templ, -1U, key } -+#define FUSE_OPT_KEY(templ, key) \ -+ { \ -+ templ, -1U, key \ -+ } - - /** -- * Last option. An array of 'struct fuse_opt' must end with a NULL -+ * Last option. An array of 'struct fuse_opt' must end with a NULL - * template value - */ --#define FUSE_OPT_END { NULL, 0, 0 } -+#define FUSE_OPT_END \ -+ { \ -+ NULL, 0, 0 \ -+ } - - /** - * Argument list - */ - struct fuse_args { -- /** Argument count */ -- int argc; -+ /** Argument count */ -+ int argc; - -- /** Argument vector. NULL terminated */ -- char **argv; -+ /** Argument vector. NULL terminated */ -+ char **argv; - -- /** Is 'argv' allocated? */ -- int allocated; -+ /** Is 'argv' allocated? */ -+ int allocated; - }; - - /** - * Initializer for 'struct fuse_args' - */ --#define FUSE_ARGS_INIT(argc, argv) { argc, argv, 0 } -+#define FUSE_ARGS_INIT(argc, argv) \ -+ { \ -+ argc, argv, 0 \ -+ } - - /** - * Key value passed to the processing function if an option did not - * match any template - */ --#define FUSE_OPT_KEY_OPT -1 -+#define FUSE_OPT_KEY_OPT -1 - - /** - * Key value passed to the processing function for all non-options -@@ -130,7 +139,7 @@ struct fuse_args { - * Non-options are the arguments beginning with a character other than - * '-' or all arguments after the special '--' option - */ --#define FUSE_OPT_KEY_NONOPT -2 -+#define FUSE_OPT_KEY_NONOPT -2 - - /** - * Special key value for options to keep -@@ -174,7 +183,7 @@ struct fuse_args { - * @return -1 on error, 0 if arg is to be discarded, 1 if arg should be kept - */ - typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key, -- struct fuse_args *outargs); -+ struct fuse_args *outargs); - - /** - * Option parsing function -@@ -197,7 +206,7 @@ typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key, - * @return -1 on error, 0 on success - */ - int fuse_opt_parse(struct fuse_args *args, void *data, -- const struct fuse_opt opts[], fuse_opt_proc_t proc); -+ const struct fuse_opt opts[], fuse_opt_proc_t proc); - - /** - * Add an option to a comma separated option list -diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c -index 4271947..19d6791 100644 ---- a/tools/virtiofsd/fuse_signals.c -+++ b/tools/virtiofsd/fuse_signals.c -@@ -1,91 +1,95 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- Utility functions for setting signal handlers. -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * Utility functions for setting signal handlers. -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ - - #include "config.h" --#include "fuse_lowlevel.h" - #include "fuse_i.h" -+#include "fuse_lowlevel.h" - --#include --#include - #include -+#include - #include -+#include - - static struct fuse_session *fuse_instance; - - static void exit_handler(int sig) - { -- if (fuse_instance) { -- fuse_session_exit(fuse_instance); -- if(sig <= 0) { -- fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n"); -- abort(); -- } -- fuse_instance->error = sig; -- } -+ if (fuse_instance) { -+ fuse_session_exit(fuse_instance); -+ if (sig <= 0) { -+ fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n"); -+ abort(); -+ } -+ fuse_instance->error = sig; -+ } - } - - static void do_nothing(int sig) - { -- (void) sig; -+ (void)sig; - } - - static int set_one_signal_handler(int sig, void (*handler)(int), int remove) - { -- struct sigaction sa; -- struct sigaction old_sa; -+ struct sigaction sa; -+ struct sigaction old_sa; - -- memset(&sa, 0, sizeof(struct sigaction)); -- sa.sa_handler = remove ? SIG_DFL : handler; -- sigemptyset(&(sa.sa_mask)); -- sa.sa_flags = 0; -+ memset(&sa, 0, sizeof(struct sigaction)); -+ sa.sa_handler = remove ? SIG_DFL : handler; -+ sigemptyset(&(sa.sa_mask)); -+ sa.sa_flags = 0; - -- if (sigaction(sig, NULL, &old_sa) == -1) { -- perror("fuse: cannot get old signal handler"); -- return -1; -- } -+ if (sigaction(sig, NULL, &old_sa) == -1) { -+ perror("fuse: cannot get old signal handler"); -+ return -1; -+ } - -- if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && -- sigaction(sig, &sa, NULL) == -1) { -- perror("fuse: cannot set signal handler"); -- return -1; -- } -- return 0; -+ if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && -+ sigaction(sig, &sa, NULL) == -1) { -+ perror("fuse: cannot set signal handler"); -+ return -1; -+ } -+ return 0; - } - - int fuse_set_signal_handlers(struct fuse_session *se) - { -- /* If we used SIG_IGN instead of the do_nothing function, -- then we would be unable to tell if we set SIG_IGN (and -- thus should reset to SIG_DFL in fuse_remove_signal_handlers) -- or if it was already set to SIG_IGN (and should be left -- untouched. */ -- if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 || -- set_one_signal_handler(SIGINT, exit_handler, 0) == -1 || -- set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 || -- set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) -- return -1; -+ /* -+ * If we used SIG_IGN instead of the do_nothing function, -+ * then we would be unable to tell if we set SIG_IGN (and -+ * thus should reset to SIG_DFL in fuse_remove_signal_handlers) -+ * or if it was already set to SIG_IGN (and should be left -+ * untouched. -+ */ -+ if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 || -+ set_one_signal_handler(SIGINT, exit_handler, 0) == -1 || -+ set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 || -+ set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) { -+ return -1; -+ } - -- fuse_instance = se; -- return 0; -+ fuse_instance = se; -+ return 0; - } - - void fuse_remove_signal_handlers(struct fuse_session *se) - { -- if (fuse_instance != se) -- fuse_log(FUSE_LOG_ERR, -- "fuse: fuse_remove_signal_handlers: unknown session\n"); -- else -- fuse_instance = NULL; -+ if (fuse_instance != se) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: fuse_remove_signal_handlers: unknown session\n"); -+ } else { -+ fuse_instance = NULL; -+ } - -- set_one_signal_handler(SIGHUP, exit_handler, 1); -- set_one_signal_handler(SIGINT, exit_handler, 1); -- set_one_signal_handler(SIGTERM, exit_handler, 1); -- set_one_signal_handler(SIGPIPE, do_nothing, 1); -+ set_one_signal_handler(SIGHUP, exit_handler, 1); -+ set_one_signal_handler(SIGINT, exit_handler, 1); -+ set_one_signal_handler(SIGTERM, exit_handler, 1); -+ set_one_signal_handler(SIGPIPE, do_nothing, 1); - } -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 5a2e64c..5711dd2 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -1,297 +1,309 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * Helper functions to create (simple) standalone programs. With the -+ * aid of these functions it should be possible to create full FUSE -+ * file system by implementing nothing but the request handlers. - -- Helper functions to create (simple) standalone programs. With the -- aid of these functions it should be possible to create full FUSE -- file system by implementing nothing but the request handlers. -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB. --*/ -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB. -+ */ - - #include "config.h" - #include "fuse_i.h" -+#include "fuse_lowlevel.h" - #include "fuse_misc.h" - #include "fuse_opt.h" --#include "fuse_lowlevel.h" - #include "mount_util.h" - -+#include -+#include -+#include - #include - #include --#include --#include - #include --#include --#include - #include -+#include - --#define FUSE_HELPER_OPT(t, p) \ -- { t, offsetof(struct fuse_cmdline_opts, p), 1 } -+#define FUSE_HELPER_OPT(t, p) \ -+ { \ -+ t, offsetof(struct fuse_cmdline_opts, p), 1 \ -+ } - - static const struct fuse_opt fuse_helper_opts[] = { -- FUSE_HELPER_OPT("-h", show_help), -- FUSE_HELPER_OPT("--help", show_help), -- FUSE_HELPER_OPT("-V", show_version), -- FUSE_HELPER_OPT("--version", show_version), -- FUSE_HELPER_OPT("-d", debug), -- FUSE_HELPER_OPT("debug", debug), -- FUSE_HELPER_OPT("-d", foreground), -- FUSE_HELPER_OPT("debug", foreground), -- FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), -- FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), -- FUSE_HELPER_OPT("-f", foreground), -- FUSE_HELPER_OPT("fsname=", nodefault_subtype), -- FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), -- FUSE_HELPER_OPT("subtype=", nodefault_subtype), -- FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), -- FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), -- FUSE_OPT_END -+ FUSE_HELPER_OPT("-h", show_help), -+ FUSE_HELPER_OPT("--help", show_help), -+ FUSE_HELPER_OPT("-V", show_version), -+ FUSE_HELPER_OPT("--version", show_version), -+ FUSE_HELPER_OPT("-d", debug), -+ FUSE_HELPER_OPT("debug", debug), -+ FUSE_HELPER_OPT("-d", foreground), -+ FUSE_HELPER_OPT("debug", foreground), -+ FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), -+ FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), -+ FUSE_HELPER_OPT("-f", foreground), -+ FUSE_HELPER_OPT("fsname=", nodefault_subtype), -+ FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), -+ FUSE_HELPER_OPT("subtype=", nodefault_subtype), -+ FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), -+ FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), -+ FUSE_OPT_END - }; - - struct fuse_conn_info_opts { -- int atomic_o_trunc; -- int no_remote_posix_lock; -- int no_remote_flock; -- int splice_write; -- int splice_move; -- int splice_read; -- int no_splice_write; -- int no_splice_move; -- int no_splice_read; -- int auto_inval_data; -- int no_auto_inval_data; -- int no_readdirplus; -- int no_readdirplus_auto; -- int async_dio; -- int no_async_dio; -- int writeback_cache; -- int no_writeback_cache; -- int async_read; -- int sync_read; -- unsigned max_write; -- unsigned max_readahead; -- unsigned max_background; -- unsigned congestion_threshold; -- unsigned time_gran; -- int set_max_write; -- int set_max_readahead; -- int set_max_background; -- int set_congestion_threshold; -- int set_time_gran; -+ int atomic_o_trunc; -+ int no_remote_posix_lock; -+ int no_remote_flock; -+ int splice_write; -+ int splice_move; -+ int splice_read; -+ int no_splice_write; -+ int no_splice_move; -+ int no_splice_read; -+ int auto_inval_data; -+ int no_auto_inval_data; -+ int no_readdirplus; -+ int no_readdirplus_auto; -+ int async_dio; -+ int no_async_dio; -+ int writeback_cache; -+ int no_writeback_cache; -+ int async_read; -+ int sync_read; -+ unsigned max_write; -+ unsigned max_readahead; -+ unsigned max_background; -+ unsigned congestion_threshold; -+ unsigned time_gran; -+ int set_max_write; -+ int set_max_readahead; -+ int set_max_background; -+ int set_congestion_threshold; -+ int set_time_gran; - }; - --#define CONN_OPTION(t, p, v) \ -- { t, offsetof(struct fuse_conn_info_opts, p), v } -+#define CONN_OPTION(t, p, v) \ -+ { \ -+ t, offsetof(struct fuse_conn_info_opts, p), v \ -+ } - static const struct fuse_opt conn_info_opt_spec[] = { -- CONN_OPTION("max_write=%u", max_write, 0), -- CONN_OPTION("max_write=", set_max_write, 1), -- CONN_OPTION("max_readahead=%u", max_readahead, 0), -- CONN_OPTION("max_readahead=", set_max_readahead, 1), -- CONN_OPTION("max_background=%u", max_background, 0), -- CONN_OPTION("max_background=", set_max_background, 1), -- CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0), -- CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1), -- CONN_OPTION("sync_read", sync_read, 1), -- CONN_OPTION("async_read", async_read, 1), -- CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1), -- CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1), -- CONN_OPTION("no_remote_lock", no_remote_flock, 1), -- CONN_OPTION("no_remote_flock", no_remote_flock, 1), -- CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1), -- CONN_OPTION("splice_write", splice_write, 1), -- CONN_OPTION("no_splice_write", no_splice_write, 1), -- CONN_OPTION("splice_move", splice_move, 1), -- CONN_OPTION("no_splice_move", no_splice_move, 1), -- CONN_OPTION("splice_read", splice_read, 1), -- CONN_OPTION("no_splice_read", no_splice_read, 1), -- CONN_OPTION("auto_inval_data", auto_inval_data, 1), -- CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1), -- CONN_OPTION("readdirplus=no", no_readdirplus, 1), -- CONN_OPTION("readdirplus=yes", no_readdirplus, 0), -- CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1), -- CONN_OPTION("readdirplus=auto", no_readdirplus, 0), -- CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0), -- CONN_OPTION("async_dio", async_dio, 1), -- CONN_OPTION("no_async_dio", no_async_dio, 1), -- CONN_OPTION("writeback_cache", writeback_cache, 1), -- CONN_OPTION("no_writeback_cache", no_writeback_cache, 1), -- CONN_OPTION("time_gran=%u", time_gran, 0), -- CONN_OPTION("time_gran=", set_time_gran, 1), -- FUSE_OPT_END -+ CONN_OPTION("max_write=%u", max_write, 0), -+ CONN_OPTION("max_write=", set_max_write, 1), -+ CONN_OPTION("max_readahead=%u", max_readahead, 0), -+ CONN_OPTION("max_readahead=", set_max_readahead, 1), -+ CONN_OPTION("max_background=%u", max_background, 0), -+ CONN_OPTION("max_background=", set_max_background, 1), -+ CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0), -+ CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1), -+ CONN_OPTION("sync_read", sync_read, 1), -+ CONN_OPTION("async_read", async_read, 1), -+ CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1), -+ CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1), -+ CONN_OPTION("no_remote_lock", no_remote_flock, 1), -+ CONN_OPTION("no_remote_flock", no_remote_flock, 1), -+ CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1), -+ CONN_OPTION("splice_write", splice_write, 1), -+ CONN_OPTION("no_splice_write", no_splice_write, 1), -+ CONN_OPTION("splice_move", splice_move, 1), -+ CONN_OPTION("no_splice_move", no_splice_move, 1), -+ CONN_OPTION("splice_read", splice_read, 1), -+ CONN_OPTION("no_splice_read", no_splice_read, 1), -+ CONN_OPTION("auto_inval_data", auto_inval_data, 1), -+ CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1), -+ CONN_OPTION("readdirplus=no", no_readdirplus, 1), -+ CONN_OPTION("readdirplus=yes", no_readdirplus, 0), -+ CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1), -+ CONN_OPTION("readdirplus=auto", no_readdirplus, 0), -+ CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0), -+ CONN_OPTION("async_dio", async_dio, 1), -+ CONN_OPTION("no_async_dio", no_async_dio, 1), -+ CONN_OPTION("writeback_cache", writeback_cache, 1), -+ CONN_OPTION("no_writeback_cache", no_writeback_cache, 1), -+ CONN_OPTION("time_gran=%u", time_gran, 0), -+ CONN_OPTION("time_gran=", set_time_gran, 1), -+ FUSE_OPT_END - }; - - - void fuse_cmdline_help(void) - { -- printf(" -h --help print help\n" -- " -V --version print version\n" -- " -d -o debug enable debug output (implies -f)\n" -- " -f foreground operation\n" -- " -o max_idle_threads the maximum number of idle worker threads\n" -- " allowed (default: 10)\n"); -+ printf( -+ " -h --help print help\n" -+ " -V --version print version\n" -+ " -d -o debug enable debug output (implies -f)\n" -+ " -f foreground operation\n" -+ " -o max_idle_threads the maximum number of idle worker threads\n" -+ " allowed (default: 10)\n"); - } - - static int fuse_helper_opt_proc(void *data, const char *arg, int key, -- struct fuse_args *outargs) -+ struct fuse_args *outargs) - { -- (void) outargs; -- struct fuse_cmdline_opts *opts = data; -- -- switch (key) { -- case FUSE_OPT_KEY_NONOPT: -- if (!opts->mountpoint) { -- if (fuse_mnt_parse_fuse_fd(arg) != -1) { -- return fuse_opt_add_opt(&opts->mountpoint, arg); -- } -- -- char mountpoint[PATH_MAX] = ""; -- if (realpath(arg, mountpoint) == NULL) { -- fuse_log(FUSE_LOG_ERR, -- "fuse: bad mount point `%s': %s\n", -- arg, strerror(errno)); -- return -1; -- } -- return fuse_opt_add_opt(&opts->mountpoint, mountpoint); -- } else { -- fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); -- return -1; -- } -- -- default: -- /* Pass through unknown options */ -- return 1; -- } -+ (void)outargs; -+ struct fuse_cmdline_opts *opts = data; -+ -+ switch (key) { -+ case FUSE_OPT_KEY_NONOPT: -+ if (!opts->mountpoint) { -+ if (fuse_mnt_parse_fuse_fd(arg) != -1) { -+ return fuse_opt_add_opt(&opts->mountpoint, arg); -+ } -+ -+ char mountpoint[PATH_MAX] = ""; -+ if (realpath(arg, mountpoint) == NULL) { -+ fuse_log(FUSE_LOG_ERR, "fuse: bad mount point `%s': %s\n", arg, -+ strerror(errno)); -+ return -1; -+ } -+ return fuse_opt_add_opt(&opts->mountpoint, mountpoint); -+ } else { -+ fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); -+ return -1; -+ } -+ -+ default: -+ /* Pass through unknown options */ -+ return 1; -+ } - } - --int fuse_parse_cmdline(struct fuse_args *args, -- struct fuse_cmdline_opts *opts) -+int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts) - { -- memset(opts, 0, sizeof(struct fuse_cmdline_opts)); -+ memset(opts, 0, sizeof(struct fuse_cmdline_opts)); - -- opts->max_idle_threads = 10; -+ opts->max_idle_threads = 10; - -- if (fuse_opt_parse(args, opts, fuse_helper_opts, -- fuse_helper_opt_proc) == -1) -- return -1; -+ if (fuse_opt_parse(args, opts, fuse_helper_opts, fuse_helper_opt_proc) == -+ -1) { -+ return -1; -+ } - -- return 0; -+ return 0; - } - - - int fuse_daemonize(int foreground) - { -- if (!foreground) { -- int nullfd; -- int waiter[2]; -- char completed; -- -- if (pipe(waiter)) { -- perror("fuse_daemonize: pipe"); -- return -1; -- } -- -- /* -- * demonize current process by forking it and killing the -- * parent. This makes current process as a child of 'init'. -- */ -- switch(fork()) { -- case -1: -- perror("fuse_daemonize: fork"); -- return -1; -- case 0: -- break; -- default: -- (void) read(waiter[0], &completed, sizeof(completed)); -- _exit(0); -- } -- -- if (setsid() == -1) { -- perror("fuse_daemonize: setsid"); -- return -1; -- } -- -- (void) chdir("/"); -- -- nullfd = open("/dev/null", O_RDWR, 0); -- if (nullfd != -1) { -- (void) dup2(nullfd, 0); -- (void) dup2(nullfd, 1); -- (void) dup2(nullfd, 2); -- if (nullfd > 2) -- close(nullfd); -- } -- -- /* Propagate completion of daemon initialization */ -- completed = 1; -- (void) write(waiter[1], &completed, sizeof(completed)); -- close(waiter[0]); -- close(waiter[1]); -- } else { -- (void) chdir("/"); -- } -- return 0; -+ if (!foreground) { -+ int nullfd; -+ int waiter[2]; -+ char completed; -+ -+ if (pipe(waiter)) { -+ perror("fuse_daemonize: pipe"); -+ return -1; -+ } -+ -+ /* -+ * demonize current process by forking it and killing the -+ * parent. This makes current process as a child of 'init'. -+ */ -+ switch (fork()) { -+ case -1: -+ perror("fuse_daemonize: fork"); -+ return -1; -+ case 0: -+ break; -+ default: -+ (void)read(waiter[0], &completed, sizeof(completed)); -+ _exit(0); -+ } -+ -+ if (setsid() == -1) { -+ perror("fuse_daemonize: setsid"); -+ return -1; -+ } -+ -+ (void)chdir("/"); -+ -+ nullfd = open("/dev/null", O_RDWR, 0); -+ if (nullfd != -1) { -+ (void)dup2(nullfd, 0); -+ (void)dup2(nullfd, 1); -+ (void)dup2(nullfd, 2); -+ if (nullfd > 2) { -+ close(nullfd); -+ } -+ } -+ -+ /* Propagate completion of daemon initialization */ -+ completed = 1; -+ (void)write(waiter[1], &completed, sizeof(completed)); -+ close(waiter[0]); -+ close(waiter[1]); -+ } else { -+ (void)chdir("/"); -+ } -+ return 0; - } - - void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, -- struct fuse_conn_info *conn) -+ struct fuse_conn_info *conn) - { -- if(opts->set_max_write) -- conn->max_write = opts->max_write; -- if(opts->set_max_background) -- conn->max_background = opts->max_background; -- if(opts->set_congestion_threshold) -- conn->congestion_threshold = opts->congestion_threshold; -- if(opts->set_time_gran) -- conn->time_gran = opts->time_gran; -- if(opts->set_max_readahead) -- conn->max_readahead = opts->max_readahead; -- --#define LL_ENABLE(cond,cap) \ -- if (cond) conn->want |= (cap) --#define LL_DISABLE(cond,cap) \ -- if (cond) conn->want &= ~(cap) -- -- LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ); -- LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ); -- -- LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE); -- LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE); -- -- LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE); -- LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE); -- -- LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); -- LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); -- -- LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS); -- LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO); -- -- LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO); -- LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO); -- -- LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE); -- LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE); -- -- LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ); -- LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ); -- -- LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS); -- LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS); -+ if (opts->set_max_write) { -+ conn->max_write = opts->max_write; -+ } -+ if (opts->set_max_background) { -+ conn->max_background = opts->max_background; -+ } -+ if (opts->set_congestion_threshold) { -+ conn->congestion_threshold = opts->congestion_threshold; -+ } -+ if (opts->set_time_gran) { -+ conn->time_gran = opts->time_gran; -+ } -+ if (opts->set_max_readahead) { -+ conn->max_readahead = opts->max_readahead; -+ } -+ -+#define LL_ENABLE(cond, cap) \ -+ if (cond) \ -+ conn->want |= (cap) -+#define LL_DISABLE(cond, cap) \ -+ if (cond) \ -+ conn->want &= ~(cap) -+ -+ LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ); -+ LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ); -+ -+ LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE); -+ LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE); -+ -+ LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE); -+ LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE); -+ -+ LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); -+ LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); -+ -+ LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS); -+ LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO); -+ -+ LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO); -+ LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO); -+ -+ LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE); -+ LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE); -+ -+ LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ); -+ LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ); -+ -+ LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS); -+ LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS); - } - --struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args) -+struct fuse_conn_info_opts *fuse_parse_conn_info_opts(struct fuse_args *args) - { -- struct fuse_conn_info_opts *opts; -- -- opts = calloc(1, sizeof(struct fuse_conn_info_opts)); -- if(opts == NULL) { -- fuse_log(FUSE_LOG_ERR, "calloc failed\n"); -- return NULL; -- } -- if(fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) { -- free(opts); -- return NULL; -- } -- return opts; -+ struct fuse_conn_info_opts *opts; -+ -+ opts = calloc(1, sizeof(struct fuse_conn_info_opts)); -+ if (opts == NULL) { -+ fuse_log(FUSE_LOG_ERR, "calloc failed\n"); -+ return NULL; -+ } -+ if (fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) { -+ free(opts); -+ return NULL; -+ } -+ return opts; - } -diff --git a/tools/virtiofsd/passthrough_helpers.h b/tools/virtiofsd/passthrough_helpers.h -index 7c5f561..0b98275 100644 ---- a/tools/virtiofsd/passthrough_helpers.h -+++ b/tools/virtiofsd/passthrough_helpers.h -@@ -28,23 +28,24 @@ - * operation - */ - static int mknod_wrapper(int dirfd, const char *path, const char *link, -- int mode, dev_t rdev) -+ int mode, dev_t rdev) - { -- int res; -+ int res; - -- if (S_ISREG(mode)) { -- res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode); -- if (res >= 0) -- res = close(res); -- } else if (S_ISDIR(mode)) { -- res = mkdirat(dirfd, path, mode); -- } else if (S_ISLNK(mode) && link != NULL) { -- res = symlinkat(link, dirfd, path); -- } else if (S_ISFIFO(mode)) { -- res = mkfifoat(dirfd, path, mode); -- } else { -- res = mknodat(dirfd, path, mode, rdev); -- } -+ if (S_ISREG(mode)) { -+ res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode); -+ if (res >= 0) { -+ res = close(res); -+ } -+ } else if (S_ISDIR(mode)) { -+ res = mkdirat(dirfd, path, mode); -+ } else if (S_ISLNK(mode) && link != NULL) { -+ res = symlinkat(link, dirfd, path); -+ } else if (S_ISFIFO(mode)) { -+ res = mkfifoat(dirfd, path, mode); -+ } else { -+ res = mknodat(dirfd, path, mode, rdev); -+ } - -- return res; -+ return res; - } -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e5f7115..c5850ef 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1,12 +1,12 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- This program can be distributed under the terms of the GNU GPLv2. -- See the file COPYING. --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * This program can be distributed under the terms of the GNU GPLv2. -+ * See the file COPYING. -+ */ - --/** @file -+/* - * - * This file system mirrors the existing file system hierarchy of the - * system, starting at the root file system. This is implemented by -@@ -28,7 +28,8 @@ - * - * Compile with: - * -- * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o passthrough_ll -+ * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o -+ * passthrough_ll - * - * ## Source code ## - * \include passthrough_ll.c -@@ -39,1299 +40,1365 @@ - - #include "config.h" - --#include --#include --#include --#include --#include --#include --#include --#include --#include - #include -+#include - #include -+#include - #include -+#include - #include -+#include -+#include -+#include -+#include -+#include - #include - #include -+#include - - #include "passthrough_helpers.h" - --/* We are re-using pointers to our `struct lo_inode` and `struct -- lo_dirp` elements as inodes. This means that we must be able to -- store uintptr_t values in a fuse_ino_t variable. The following -- incantation checks this condition at compile time. */ --#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus -+/* -+ * We are re-using pointers to our `struct lo_inode` and `struct -+ * lo_dirp` elements as inodes. This means that we must be able to -+ * store uintptr_t values in a fuse_ino_t variable. The following -+ * incantation checks this condition at compile time. -+ */ -+#if defined(__GNUC__) && \ -+ (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && \ -+ !defined __cplusplus - _Static_assert(sizeof(fuse_ino_t) >= sizeof(uintptr_t), -- "fuse_ino_t too small to hold uintptr_t values!"); -+ "fuse_ino_t too small to hold uintptr_t values!"); - #else --struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct \ -- { unsigned _uintptr_to_must_hold_fuse_ino_t: -- ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); }; -+struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { -+ unsigned _uintptr_to_must_hold_fuse_ino_t -+ : ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); -+}; - #endif - - struct lo_inode { -- struct lo_inode *next; /* protected by lo->mutex */ -- struct lo_inode *prev; /* protected by lo->mutex */ -- int fd; -- bool is_symlink; -- ino_t ino; -- dev_t dev; -- uint64_t refcount; /* protected by lo->mutex */ -+ struct lo_inode *next; /* protected by lo->mutex */ -+ struct lo_inode *prev; /* protected by lo->mutex */ -+ int fd; -+ bool is_symlink; -+ ino_t ino; -+ dev_t dev; -+ uint64_t refcount; /* protected by lo->mutex */ - }; - - enum { -- CACHE_NEVER, -- CACHE_NORMAL, -- CACHE_ALWAYS, -+ CACHE_NEVER, -+ CACHE_NORMAL, -+ CACHE_ALWAYS, - }; - - struct lo_data { -- pthread_mutex_t mutex; -- int debug; -- int writeback; -- int flock; -- int xattr; -- const char *source; -- double timeout; -- int cache; -- int timeout_set; -- struct lo_inode root; /* protected by lo->mutex */ -+ pthread_mutex_t mutex; -+ int debug; -+ int writeback; -+ int flock; -+ int xattr; -+ const char *source; -+ double timeout; -+ int cache; -+ int timeout_set; -+ struct lo_inode root; /* protected by lo->mutex */ - }; - - static const struct fuse_opt lo_opts[] = { -- { "writeback", -- offsetof(struct lo_data, writeback), 1 }, -- { "no_writeback", -- offsetof(struct lo_data, writeback), 0 }, -- { "source=%s", -- offsetof(struct lo_data, source), 0 }, -- { "flock", -- offsetof(struct lo_data, flock), 1 }, -- { "no_flock", -- offsetof(struct lo_data, flock), 0 }, -- { "xattr", -- offsetof(struct lo_data, xattr), 1 }, -- { "no_xattr", -- offsetof(struct lo_data, xattr), 0 }, -- { "timeout=%lf", -- offsetof(struct lo_data, timeout), 0 }, -- { "timeout=", -- offsetof(struct lo_data, timeout_set), 1 }, -- { "cache=never", -- offsetof(struct lo_data, cache), CACHE_NEVER }, -- { "cache=auto", -- offsetof(struct lo_data, cache), CACHE_NORMAL }, -- { "cache=always", -- offsetof(struct lo_data, cache), CACHE_ALWAYS }, -- -- FUSE_OPT_END -+ { "writeback", offsetof(struct lo_data, writeback), 1 }, -+ { "no_writeback", offsetof(struct lo_data, writeback), 0 }, -+ { "source=%s", offsetof(struct lo_data, source), 0 }, -+ { "flock", offsetof(struct lo_data, flock), 1 }, -+ { "no_flock", offsetof(struct lo_data, flock), 0 }, -+ { "xattr", offsetof(struct lo_data, xattr), 1 }, -+ { "no_xattr", offsetof(struct lo_data, xattr), 0 }, -+ { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, -+ { "timeout=", offsetof(struct lo_data, timeout_set), 1 }, -+ { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER }, -+ { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, -+ { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, -+ -+ FUSE_OPT_END - }; - - static struct lo_data *lo_data(fuse_req_t req) - { -- return (struct lo_data *) fuse_req_userdata(req); -+ return (struct lo_data *)fuse_req_userdata(req); - } - - static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) - { -- if (ino == FUSE_ROOT_ID) -- return &lo_data(req)->root; -- else -- return (struct lo_inode *) (uintptr_t) ino; -+ if (ino == FUSE_ROOT_ID) { -+ return &lo_data(req)->root; -+ } else { -+ return (struct lo_inode *)(uintptr_t)ino; -+ } - } - - static int lo_fd(fuse_req_t req, fuse_ino_t ino) - { -- return lo_inode(req, ino)->fd; -+ return lo_inode(req, ino)->fd; - } - - static bool lo_debug(fuse_req_t req) - { -- return lo_data(req)->debug != 0; -+ return lo_data(req)->debug != 0; - } - --static void lo_init(void *userdata, -- struct fuse_conn_info *conn) -+static void lo_init(void *userdata, struct fuse_conn_info *conn) - { -- struct lo_data *lo = (struct lo_data*) userdata; -- -- if(conn->capable & FUSE_CAP_EXPORT_SUPPORT) -- conn->want |= FUSE_CAP_EXPORT_SUPPORT; -- -- if (lo->writeback && -- conn->capable & FUSE_CAP_WRITEBACK_CACHE) { -- if (lo->debug) -- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); -- conn->want |= FUSE_CAP_WRITEBACK_CACHE; -- } -- if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { -- if (lo->debug) -- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); -- conn->want |= FUSE_CAP_FLOCK_LOCKS; -- } -+ struct lo_data *lo = (struct lo_data *)userdata; -+ -+ if (conn->capable & FUSE_CAP_EXPORT_SUPPORT) { -+ conn->want |= FUSE_CAP_EXPORT_SUPPORT; -+ } -+ -+ if (lo->writeback && conn->capable & FUSE_CAP_WRITEBACK_CACHE) { -+ if (lo->debug) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); -+ } -+ conn->want |= FUSE_CAP_WRITEBACK_CACHE; -+ } -+ if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { -+ if (lo->debug) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); -+ } -+ conn->want |= FUSE_CAP_FLOCK_LOCKS; -+ } - } - - static void lo_getattr(fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi) -+ struct fuse_file_info *fi) - { -- int res; -- struct stat buf; -- struct lo_data *lo = lo_data(req); -+ int res; -+ struct stat buf; -+ struct lo_data *lo = lo_data(req); - -- (void) fi; -+ (void)fi; - -- res = fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -- if (res == -1) -- return (void) fuse_reply_err(req, errno); -+ res = -+ fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) { -+ return (void)fuse_reply_err(req, errno); -+ } - -- fuse_reply_attr(req, &buf, lo->timeout); -+ fuse_reply_attr(req, &buf, lo->timeout); - } - - static int utimensat_empty_nofollow(struct lo_inode *inode, -- const struct timespec *tv) -+ const struct timespec *tv) - { -- int res; -- char procname[64]; -- -- if (inode->is_symlink) { -- res = utimensat(inode->fd, "", tv, -- AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -- if (res == -1 && errno == EINVAL) { -- /* Sorry, no race free way to set times on symlink. */ -- errno = EPERM; -- } -- return res; -- } -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -- -- return utimensat(AT_FDCWD, procname, tv, 0); -+ int res; -+ char procname[64]; -+ -+ if (inode->is_symlink) { -+ res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1 && errno == EINVAL) { -+ /* Sorry, no race free way to set times on symlink. */ -+ errno = EPERM; -+ } -+ return res; -+ } -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ return utimensat(AT_FDCWD, procname, tv, 0); - } - - static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, -- int valid, struct fuse_file_info *fi) -+ int valid, struct fuse_file_info *fi) - { -- int saverr; -- char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -- int ifd = inode->fd; -- int res; -- -- if (valid & FUSE_SET_ATTR_MODE) { -- if (fi) { -- res = fchmod(fi->fh, attr->st_mode); -- } else { -- sprintf(procname, "/proc/self/fd/%i", ifd); -- res = chmod(procname, attr->st_mode); -- } -- if (res == -1) -- goto out_err; -- } -- if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { -- uid_t uid = (valid & FUSE_SET_ATTR_UID) ? -- attr->st_uid : (uid_t) -1; -- gid_t gid = (valid & FUSE_SET_ATTR_GID) ? -- attr->st_gid : (gid_t) -1; -- -- res = fchownat(ifd, "", uid, gid, -- AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -- if (res == -1) -- goto out_err; -- } -- if (valid & FUSE_SET_ATTR_SIZE) { -- if (fi) { -- res = ftruncate(fi->fh, attr->st_size); -- } else { -- sprintf(procname, "/proc/self/fd/%i", ifd); -- res = truncate(procname, attr->st_size); -- } -- if (res == -1) -- goto out_err; -- } -- if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { -- struct timespec tv[2]; -- -- tv[0].tv_sec = 0; -- tv[1].tv_sec = 0; -- tv[0].tv_nsec = UTIME_OMIT; -- tv[1].tv_nsec = UTIME_OMIT; -- -- if (valid & FUSE_SET_ATTR_ATIME_NOW) -- tv[0].tv_nsec = UTIME_NOW; -- else if (valid & FUSE_SET_ATTR_ATIME) -- tv[0] = attr->st_atim; -- -- if (valid & FUSE_SET_ATTR_MTIME_NOW) -- tv[1].tv_nsec = UTIME_NOW; -- else if (valid & FUSE_SET_ATTR_MTIME) -- tv[1] = attr->st_mtim; -- -- if (fi) -- res = futimens(fi->fh, tv); -- else -- res = utimensat_empty_nofollow(inode, tv); -- if (res == -1) -- goto out_err; -- } -- -- return lo_getattr(req, ino, fi); -+ int saverr; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ int ifd = inode->fd; -+ int res; -+ -+ if (valid & FUSE_SET_ATTR_MODE) { -+ if (fi) { -+ res = fchmod(fi->fh, attr->st_mode); -+ } else { -+ sprintf(procname, "/proc/self/fd/%i", ifd); -+ res = chmod(procname, attr->st_mode); -+ } -+ if (res == -1) { -+ goto out_err; -+ } -+ } -+ if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { -+ uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : (uid_t)-1; -+ gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : (gid_t)-1; -+ -+ res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) { -+ goto out_err; -+ } -+ } -+ if (valid & FUSE_SET_ATTR_SIZE) { -+ if (fi) { -+ res = ftruncate(fi->fh, attr->st_size); -+ } else { -+ sprintf(procname, "/proc/self/fd/%i", ifd); -+ res = truncate(procname, attr->st_size); -+ } -+ if (res == -1) { -+ goto out_err; -+ } -+ } -+ if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { -+ struct timespec tv[2]; -+ -+ tv[0].tv_sec = 0; -+ tv[1].tv_sec = 0; -+ tv[0].tv_nsec = UTIME_OMIT; -+ tv[1].tv_nsec = UTIME_OMIT; -+ -+ if (valid & FUSE_SET_ATTR_ATIME_NOW) { -+ tv[0].tv_nsec = UTIME_NOW; -+ } else if (valid & FUSE_SET_ATTR_ATIME) { -+ tv[0] = attr->st_atim; -+ } -+ -+ if (valid & FUSE_SET_ATTR_MTIME_NOW) { -+ tv[1].tv_nsec = UTIME_NOW; -+ } else if (valid & FUSE_SET_ATTR_MTIME) { -+ tv[1] = attr->st_mtim; -+ } -+ -+ if (fi) { -+ res = futimens(fi->fh, tv); -+ } else { -+ res = utimensat_empty_nofollow(inode, tv); -+ } -+ if (res == -1) { -+ goto out_err; -+ } -+ } -+ -+ return lo_getattr(req, ino, fi); - - out_err: -- saverr = errno; -- fuse_reply_err(req, saverr); -+ saverr = errno; -+ fuse_reply_err(req, saverr); - } - - static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) - { -- struct lo_inode *p; -- struct lo_inode *ret = NULL; -- -- pthread_mutex_lock(&lo->mutex); -- for (p = lo->root.next; p != &lo->root; p = p->next) { -- if (p->ino == st->st_ino && p->dev == st->st_dev) { -- assert(p->refcount > 0); -- ret = p; -- ret->refcount++; -- break; -- } -- } -- pthread_mutex_unlock(&lo->mutex); -- return ret; -+ struct lo_inode *p; -+ struct lo_inode *ret = NULL; -+ -+ pthread_mutex_lock(&lo->mutex); -+ for (p = lo->root.next; p != &lo->root; p = p->next) { -+ if (p->ino == st->st_ino && p->dev == st->st_dev) { -+ assert(p->refcount > 0); -+ ret = p; -+ ret->refcount++; -+ break; -+ } -+ } -+ pthread_mutex_unlock(&lo->mutex); -+ return ret; - } - - static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, -- struct fuse_entry_param *e) -+ struct fuse_entry_param *e) - { -- int newfd; -- int res; -- int saverr; -- struct lo_data *lo = lo_data(req); -- struct lo_inode *inode; -- -- memset(e, 0, sizeof(*e)); -- e->attr_timeout = lo->timeout; -- e->entry_timeout = lo->timeout; -- -- newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); -- if (newfd == -1) -- goto out_err; -- -- res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -- if (res == -1) -- goto out_err; -- -- inode = lo_find(lo_data(req), &e->attr); -- if (inode) { -- close(newfd); -- newfd = -1; -- } else { -- struct lo_inode *prev, *next; -- -- saverr = ENOMEM; -- inode = calloc(1, sizeof(struct lo_inode)); -- if (!inode) -- goto out_err; -- -- inode->is_symlink = S_ISLNK(e->attr.st_mode); -- inode->refcount = 1; -- inode->fd = newfd; -- inode->ino = e->attr.st_ino; -- inode->dev = e->attr.st_dev; -- -- pthread_mutex_lock(&lo->mutex); -- prev = &lo->root; -- next = prev->next; -- next->prev = inode; -- inode->next = next; -- inode->prev = prev; -- prev->next = inode; -- pthread_mutex_unlock(&lo->mutex); -- } -- e->ino = (uintptr_t) inode; -- -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -- (unsigned long long) parent, name, (unsigned long long) e->ino); -- -- return 0; -+ int newfd; -+ int res; -+ int saverr; -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode; -+ -+ memset(e, 0, sizeof(*e)); -+ e->attr_timeout = lo->timeout; -+ e->entry_timeout = lo->timeout; -+ -+ newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); -+ if (newfd == -1) { -+ goto out_err; -+ } -+ -+ res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) { -+ goto out_err; -+ } -+ -+ inode = lo_find(lo_data(req), &e->attr); -+ if (inode) { -+ close(newfd); -+ newfd = -1; -+ } else { -+ struct lo_inode *prev, *next; -+ -+ saverr = ENOMEM; -+ inode = calloc(1, sizeof(struct lo_inode)); -+ if (!inode) { -+ goto out_err; -+ } -+ -+ inode->is_symlink = S_ISLNK(e->attr.st_mode); -+ inode->refcount = 1; -+ inode->fd = newfd; -+ inode->ino = e->attr.st_ino; -+ inode->dev = e->attr.st_dev; -+ -+ pthread_mutex_lock(&lo->mutex); -+ prev = &lo->root; -+ next = prev->next; -+ next->prev = inode; -+ inode->next = next; -+ inode->prev = prev; -+ prev->next = inode; -+ pthread_mutex_unlock(&lo->mutex); -+ } -+ e->ino = (uintptr_t)inode; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -+ (unsigned long long)parent, name, (unsigned long long)e->ino); -+ } -+ -+ return 0; - - out_err: -- saverr = errno; -- if (newfd != -1) -- close(newfd); -- return saverr; -+ saverr = errno; -+ if (newfd != -1) { -+ close(newfd); -+ } -+ return saverr; - } - - static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) - { -- struct fuse_entry_param e; -- int err; -- -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", -- parent, name); -- -- err = lo_do_lookup(req, parent, name, &e); -- if (err) -- fuse_reply_err(req, err); -- else -- fuse_reply_entry(req, &e); -+ struct fuse_entry_param e; -+ int err; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", -+ parent, name); -+ } -+ -+ err = lo_do_lookup(req, parent, name, &e); -+ if (err) { -+ fuse_reply_err(req, err); -+ } else { -+ fuse_reply_entry(req, &e); -+ } - } - - static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, -- const char *name, mode_t mode, dev_t rdev, -- const char *link) -+ const char *name, mode_t mode, dev_t rdev, -+ const char *link) - { -- int res; -- int saverr; -- struct lo_inode *dir = lo_inode(req, parent); -- struct fuse_entry_param e; -+ int res; -+ int saverr; -+ struct lo_inode *dir = lo_inode(req, parent); -+ struct fuse_entry_param e; - -- saverr = ENOMEM; -+ saverr = ENOMEM; - -- res = mknod_wrapper(dir->fd, name, link, mode, rdev); -+ res = mknod_wrapper(dir->fd, name, link, mode, rdev); - -- saverr = errno; -- if (res == -1) -- goto out; -+ saverr = errno; -+ if (res == -1) { -+ goto out; -+ } - -- saverr = lo_do_lookup(req, parent, name, &e); -- if (saverr) -- goto out; -+ saverr = lo_do_lookup(req, parent, name, &e); -+ if (saverr) { -+ goto out; -+ } - -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -- (unsigned long long) parent, name, (unsigned long long) e.ino); -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -+ (unsigned long long)parent, name, (unsigned long long)e.ino); -+ } - -- fuse_reply_entry(req, &e); -- return; -+ fuse_reply_entry(req, &e); -+ return; - - out: -- fuse_reply_err(req, saverr); -+ fuse_reply_err(req, saverr); - } - --static void lo_mknod(fuse_req_t req, fuse_ino_t parent, -- const char *name, mode_t mode, dev_t rdev) -+static void lo_mknod(fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode, dev_t rdev) - { -- lo_mknod_symlink(req, parent, name, mode, rdev, NULL); -+ lo_mknod_symlink(req, parent, name, mode, rdev, NULL); - } - - static void lo_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, -- mode_t mode) -+ mode_t mode) - { -- lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL); -+ lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL); - } - --static void lo_symlink(fuse_req_t req, const char *link, -- fuse_ino_t parent, const char *name) -+static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent, -+ const char *name) - { -- lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); -+ lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); - } - - static int linkat_empty_nofollow(struct lo_inode *inode, int dfd, -- const char *name) -+ const char *name) - { -- int res; -- char procname[64]; -+ int res; -+ char procname[64]; - -- if (inode->is_symlink) { -- res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); -- if (res == -1 && (errno == ENOENT || errno == EINVAL)) { -- /* Sorry, no race free way to hard-link a symlink. */ -- errno = EPERM; -- } -- return res; -- } -+ if (inode->is_symlink) { -+ res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); -+ if (res == -1 && (errno == ENOENT || errno == EINVAL)) { -+ /* Sorry, no race free way to hard-link a symlink. */ -+ errno = EPERM; -+ } -+ return res; -+ } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); - -- return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); -+ return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); - } - - static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, -- const char *name) -+ const char *name) - { -- int res; -- struct lo_data *lo = lo_data(req); -- struct lo_inode *inode = lo_inode(req, ino); -- struct fuse_entry_param e; -- int saverr; -- -- memset(&e, 0, sizeof(struct fuse_entry_param)); -- e.attr_timeout = lo->timeout; -- e.entry_timeout = lo->timeout; -- -- res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); -- if (res == -1) -- goto out_err; -- -- res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -- if (res == -1) -- goto out_err; -- -- pthread_mutex_lock(&lo->mutex); -- inode->refcount++; -- pthread_mutex_unlock(&lo->mutex); -- e.ino = (uintptr_t) inode; -- -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -- (unsigned long long) parent, name, -- (unsigned long long) e.ino); -- -- fuse_reply_entry(req, &e); -- return; -+ int res; -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode = lo_inode(req, ino); -+ struct fuse_entry_param e; -+ int saverr; -+ -+ memset(&e, 0, sizeof(struct fuse_entry_param)); -+ e.attr_timeout = lo->timeout; -+ e.entry_timeout = lo->timeout; -+ -+ res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); -+ if (res == -1) { -+ goto out_err; -+ } -+ -+ res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) { -+ goto out_err; -+ } -+ -+ pthread_mutex_lock(&lo->mutex); -+ inode->refcount++; -+ pthread_mutex_unlock(&lo->mutex); -+ e.ino = (uintptr_t)inode; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -+ (unsigned long long)parent, name, (unsigned long long)e.ino); -+ } -+ -+ fuse_reply_entry(req, &e); -+ return; - - out_err: -- saverr = errno; -- fuse_reply_err(req, saverr); -+ saverr = errno; -+ fuse_reply_err(req, saverr); - } - - static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) - { -- int res; -+ int res; - -- res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); -+ res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); - -- fuse_reply_err(req, res == -1 ? errno : 0); -+ fuse_reply_err(req, res == -1 ? errno : 0); - } - - static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, -- fuse_ino_t newparent, const char *newname, -- unsigned int flags) -+ fuse_ino_t newparent, const char *newname, -+ unsigned int flags) - { -- int res; -+ int res; - -- if (flags) { -- fuse_reply_err(req, EINVAL); -- return; -- } -+ if (flags) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - -- res = renameat(lo_fd(req, parent), name, -- lo_fd(req, newparent), newname); -+ res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname); - -- fuse_reply_err(req, res == -1 ? errno : 0); -+ fuse_reply_err(req, res == -1 ? errno : 0); - } - - static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) - { -- int res; -+ int res; - -- res = unlinkat(lo_fd(req, parent), name, 0); -+ res = unlinkat(lo_fd(req, parent), name, 0); - -- fuse_reply_err(req, res == -1 ? errno : 0); -+ fuse_reply_err(req, res == -1 ? errno : 0); - } - - static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) - { -- if (!inode) -- return; -- -- pthread_mutex_lock(&lo->mutex); -- assert(inode->refcount >= n); -- inode->refcount -= n; -- if (!inode->refcount) { -- struct lo_inode *prev, *next; -- -- prev = inode->prev; -- next = inode->next; -- next->prev = prev; -- prev->next = next; -- -- pthread_mutex_unlock(&lo->mutex); -- close(inode->fd); -- free(inode); -- -- } else { -- pthread_mutex_unlock(&lo->mutex); -- } -+ if (!inode) { -+ return; -+ } -+ -+ pthread_mutex_lock(&lo->mutex); -+ assert(inode->refcount >= n); -+ inode->refcount -= n; -+ if (!inode->refcount) { -+ struct lo_inode *prev, *next; -+ -+ prev = inode->prev; -+ next = inode->next; -+ next->prev = prev; -+ prev->next = next; -+ -+ pthread_mutex_unlock(&lo->mutex); -+ close(inode->fd); -+ free(inode); -+ -+ } else { -+ pthread_mutex_unlock(&lo->mutex); -+ } - } - - static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - { -- struct lo_data *lo = lo_data(req); -- struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode = lo_inode(req, ino); - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", -- (unsigned long long) ino, -- (unsigned long long) inode->refcount, -- (unsigned long long) nlookup); -- } -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", -+ (unsigned long long)ino, (unsigned long long)inode->refcount, -+ (unsigned long long)nlookup); -+ } - -- unref_inode(lo, inode, nlookup); -+ unref_inode(lo, inode, nlookup); - } - - static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - { -- lo_forget_one(req, ino, nlookup); -- fuse_reply_none(req); -+ lo_forget_one(req, ino, nlookup); -+ fuse_reply_none(req); - } - - static void lo_forget_multi(fuse_req_t req, size_t count, -- struct fuse_forget_data *forgets) -+ struct fuse_forget_data *forgets) - { -- int i; -+ int i; - -- for (i = 0; i < count; i++) -- lo_forget_one(req, forgets[i].ino, forgets[i].nlookup); -- fuse_reply_none(req); -+ for (i = 0; i < count; i++) { -+ lo_forget_one(req, forgets[i].ino, forgets[i].nlookup); -+ } -+ fuse_reply_none(req); - } - - static void lo_readlink(fuse_req_t req, fuse_ino_t ino) - { -- char buf[PATH_MAX + 1]; -- int res; -+ char buf[PATH_MAX + 1]; -+ int res; - -- res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf)); -- if (res == -1) -- return (void) fuse_reply_err(req, errno); -+ res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf)); -+ if (res == -1) { -+ return (void)fuse_reply_err(req, errno); -+ } - -- if (res == sizeof(buf)) -- return (void) fuse_reply_err(req, ENAMETOOLONG); -+ if (res == sizeof(buf)) { -+ return (void)fuse_reply_err(req, ENAMETOOLONG); -+ } - -- buf[res] = '\0'; -+ buf[res] = '\0'; - -- fuse_reply_readlink(req, buf); -+ fuse_reply_readlink(req, buf); - } - - struct lo_dirp { -- DIR *dp; -- struct dirent *entry; -- off_t offset; -+ DIR *dp; -+ struct dirent *entry; -+ off_t offset; - }; - - static struct lo_dirp *lo_dirp(struct fuse_file_info *fi) - { -- return (struct lo_dirp *) (uintptr_t) fi->fh; -+ return (struct lo_dirp *)(uintptr_t)fi->fh; - } - --static void lo_opendir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+static void lo_opendir(fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi) - { -- int error = ENOMEM; -- struct lo_data *lo = lo_data(req); -- struct lo_dirp *d; -- int fd; -- -- d = calloc(1, sizeof(struct lo_dirp)); -- if (d == NULL) -- goto out_err; -- -- fd = openat(lo_fd(req, ino), ".", O_RDONLY); -- if (fd == -1) -- goto out_errno; -- -- d->dp = fdopendir(fd); -- if (d->dp == NULL) -- goto out_errno; -- -- d->offset = 0; -- d->entry = NULL; -- -- fi->fh = (uintptr_t) d; -- if (lo->cache == CACHE_ALWAYS) -- fi->keep_cache = 1; -- fuse_reply_open(req, fi); -- return; -+ int error = ENOMEM; -+ struct lo_data *lo = lo_data(req); -+ struct lo_dirp *d; -+ int fd; -+ -+ d = calloc(1, sizeof(struct lo_dirp)); -+ if (d == NULL) { -+ goto out_err; -+ } -+ -+ fd = openat(lo_fd(req, ino), ".", O_RDONLY); -+ if (fd == -1) { -+ goto out_errno; -+ } -+ -+ d->dp = fdopendir(fd); -+ if (d->dp == NULL) { -+ goto out_errno; -+ } -+ -+ d->offset = 0; -+ d->entry = NULL; -+ -+ fi->fh = (uintptr_t)d; -+ if (lo->cache == CACHE_ALWAYS) { -+ fi->keep_cache = 1; -+ } -+ fuse_reply_open(req, fi); -+ return; - - out_errno: -- error = errno; -+ error = errno; - out_err: -- if (d) { -- if (fd != -1) -- close(fd); -- free(d); -- } -- fuse_reply_err(req, error); -+ if (d) { -+ if (fd != -1) { -+ close(fd); -+ } -+ free(d); -+ } -+ fuse_reply_err(req, error); - } - - static int is_dot_or_dotdot(const char *name) - { -- return name[0] == '.' && (name[1] == '\0' || -- (name[1] == '.' && name[2] == '\0')); -+ return name[0] == '.' && -+ (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); - } - - static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, -- off_t offset, struct fuse_file_info *fi, int plus) -+ off_t offset, struct fuse_file_info *fi, int plus) - { -- struct lo_dirp *d = lo_dirp(fi); -- char *buf; -- char *p; -- size_t rem = size; -- int err; -- -- (void) ino; -- -- buf = calloc(1, size); -- if (!buf) { -- err = ENOMEM; -- goto error; -- } -- p = buf; -- -- if (offset != d->offset) { -- seekdir(d->dp, offset); -- d->entry = NULL; -- d->offset = offset; -- } -- while (1) { -- size_t entsize; -- off_t nextoff; -- const char *name; -- -- if (!d->entry) { -- errno = 0; -- d->entry = readdir(d->dp); -- if (!d->entry) { -- if (errno) { // Error -- err = errno; -- goto error; -- } else { // End of stream -- break; -- } -- } -- } -- nextoff = d->entry->d_off; -- name = d->entry->d_name; -- fuse_ino_t entry_ino = 0; -- if (plus) { -- struct fuse_entry_param e; -- if (is_dot_or_dotdot(name)) { -- e = (struct fuse_entry_param) { -- .attr.st_ino = d->entry->d_ino, -- .attr.st_mode = d->entry->d_type << 12, -- }; -- } else { -- err = lo_do_lookup(req, ino, name, &e); -- if (err) -- goto error; -- entry_ino = e.ino; -- } -- -- entsize = fuse_add_direntry_plus(req, p, rem, name, -- &e, nextoff); -- } else { -- struct stat st = { -- .st_ino = d->entry->d_ino, -- .st_mode = d->entry->d_type << 12, -- }; -- entsize = fuse_add_direntry(req, p, rem, name, -- &st, nextoff); -- } -- if (entsize > rem) { -- if (entry_ino != 0) -- lo_forget_one(req, entry_ino, 1); -- break; -- } -- -- p += entsize; -- rem -= entsize; -- -- d->entry = NULL; -- d->offset = nextoff; -- } -+ struct lo_dirp *d = lo_dirp(fi); -+ char *buf; -+ char *p; -+ size_t rem = size; -+ int err; -+ -+ (void)ino; -+ -+ buf = calloc(1, size); -+ if (!buf) { -+ err = ENOMEM; -+ goto error; -+ } -+ p = buf; -+ -+ if (offset != d->offset) { -+ seekdir(d->dp, offset); -+ d->entry = NULL; -+ d->offset = offset; -+ } -+ while (1) { -+ size_t entsize; -+ off_t nextoff; -+ const char *name; -+ -+ if (!d->entry) { -+ errno = 0; -+ d->entry = readdir(d->dp); -+ if (!d->entry) { -+ if (errno) { /* Error */ -+ err = errno; -+ goto error; -+ } else { /* End of stream */ -+ break; -+ } -+ } -+ } -+ nextoff = d->entry->d_off; -+ name = d->entry->d_name; -+ fuse_ino_t entry_ino = 0; -+ if (plus) { -+ struct fuse_entry_param e; -+ if (is_dot_or_dotdot(name)) { -+ e = (struct fuse_entry_param){ -+ .attr.st_ino = d->entry->d_ino, -+ .attr.st_mode = d->entry->d_type << 12, -+ }; -+ } else { -+ err = lo_do_lookup(req, ino, name, &e); -+ if (err) { -+ goto error; -+ } -+ entry_ino = e.ino; -+ } -+ -+ entsize = fuse_add_direntry_plus(req, p, rem, name, &e, nextoff); -+ } else { -+ struct stat st = { -+ .st_ino = d->entry->d_ino, -+ .st_mode = d->entry->d_type << 12, -+ }; -+ entsize = fuse_add_direntry(req, p, rem, name, &st, nextoff); -+ } -+ if (entsize > rem) { -+ if (entry_ino != 0) { -+ lo_forget_one(req, entry_ino, 1); -+ } -+ break; -+ } -+ -+ p += entsize; -+ rem -= entsize; -+ -+ d->entry = NULL; -+ d->offset = nextoff; -+ } - - err = 0; - error: -- // If there's an error, we can only signal it if we haven't stored -- // any entries yet - otherwise we'd end up with wrong lookup -- // counts for the entries that are already in the buffer. So we -- // return what we've collected until that point. -- if (err && rem == size) -- fuse_reply_err(req, err); -- else -- fuse_reply_buf(req, buf, size - rem); -+ /* -+ * If there's an error, we can only signal it if we haven't stored -+ * any entries yet - otherwise we'd end up with wrong lookup -+ * counts for the entries that are already in the buffer. So we -+ * return what we've collected until that point. -+ */ -+ if (err && rem == size) { -+ fuse_reply_err(req, err); -+ } else { -+ fuse_reply_buf(req, buf, size - rem); -+ } - free(buf); - } - - static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, -- off_t offset, struct fuse_file_info *fi) -+ off_t offset, struct fuse_file_info *fi) - { -- lo_do_readdir(req, ino, size, offset, fi, 0); -+ lo_do_readdir(req, ino, size, offset, fi, 0); - } - - static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, -- off_t offset, struct fuse_file_info *fi) -+ off_t offset, struct fuse_file_info *fi) - { -- lo_do_readdir(req, ino, size, offset, fi, 1); -+ lo_do_readdir(req, ino, size, offset, fi, 1); - } - --static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi) - { -- struct lo_dirp *d = lo_dirp(fi); -- (void) ino; -- closedir(d->dp); -- free(d); -- fuse_reply_err(req, 0); -+ struct lo_dirp *d = lo_dirp(fi); -+ (void)ino; -+ closedir(d->dp); -+ free(d); -+ fuse_reply_err(req, 0); - } - - static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, -- mode_t mode, struct fuse_file_info *fi) -+ mode_t mode, struct fuse_file_info *fi) - { -- int fd; -- struct lo_data *lo = lo_data(req); -- struct fuse_entry_param e; -- int err; -- -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", -- parent, name); -- -- fd = openat(lo_fd(req, parent), name, -- (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode); -- if (fd == -1) -- return (void) fuse_reply_err(req, errno); -- -- fi->fh = fd; -- if (lo->cache == CACHE_NEVER) -- fi->direct_io = 1; -- else if (lo->cache == CACHE_ALWAYS) -- fi->keep_cache = 1; -- -- err = lo_do_lookup(req, parent, name, &e); -- if (err) -- fuse_reply_err(req, err); -- else -- fuse_reply_create(req, &e, fi); -+ int fd; -+ struct lo_data *lo = lo_data(req); -+ struct fuse_entry_param e; -+ int err; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", -+ parent, name); -+ } -+ -+ fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, -+ mode); -+ if (fd == -1) { -+ return (void)fuse_reply_err(req, errno); -+ } -+ -+ fi->fh = fd; -+ if (lo->cache == CACHE_NEVER) { -+ fi->direct_io = 1; -+ } else if (lo->cache == CACHE_ALWAYS) { -+ fi->keep_cache = 1; -+ } -+ -+ err = lo_do_lookup(req, parent, name, &e); -+ if (err) { -+ fuse_reply_err(req, err); -+ } else { -+ fuse_reply_create(req, &e, fi); -+ } - } - - static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, -- struct fuse_file_info *fi) -+ struct fuse_file_info *fi) - { -- int res; -- int fd = dirfd(lo_dirp(fi)->dp); -- (void) ino; -- if (datasync) -- res = fdatasync(fd); -- else -- res = fsync(fd); -- fuse_reply_err(req, res == -1 ? errno : 0); -+ int res; -+ int fd = dirfd(lo_dirp(fi)->dp); -+ (void)ino; -+ if (datasync) { -+ res = fdatasync(fd); -+ } else { -+ res = fsync(fd); -+ } -+ fuse_reply_err(req, res == -1 ? errno : 0); - } - - static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - { -- int fd; -- char buf[64]; -- struct lo_data *lo = lo_data(req); -- -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", -- ino, fi->flags); -- -- /* With writeback cache, kernel may send read requests even -- when userspace opened write-only */ -- if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { -- fi->flags &= ~O_ACCMODE; -- fi->flags |= O_RDWR; -- } -- -- /* With writeback cache, O_APPEND is handled by the kernel. -- This breaks atomicity (since the file may change in the -- underlying filesystem, so that the kernel's idea of the -- end of the file isn't accurate anymore). In this example, -- we just accept that. A more rigorous filesystem may want -- to return an error here */ -- if (lo->writeback && (fi->flags & O_APPEND)) -- fi->flags &= ~O_APPEND; -- -- sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); -- fd = open(buf, fi->flags & ~O_NOFOLLOW); -- if (fd == -1) -- return (void) fuse_reply_err(req, errno); -- -- fi->fh = fd; -- if (lo->cache == CACHE_NEVER) -- fi->direct_io = 1; -- else if (lo->cache == CACHE_ALWAYS) -- fi->keep_cache = 1; -- fuse_reply_open(req, fi); -+ int fd; -+ char buf[64]; -+ struct lo_data *lo = lo_data(req); -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, -+ fi->flags); -+ } -+ -+ /* -+ * With writeback cache, kernel may send read requests even -+ * when userspace opened write-only -+ */ -+ if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { -+ fi->flags &= ~O_ACCMODE; -+ fi->flags |= O_RDWR; -+ } -+ -+ /* -+ * With writeback cache, O_APPEND is handled by the kernel. -+ * This breaks atomicity (since the file may change in the -+ * underlying filesystem, so that the kernel's idea of the -+ * end of the file isn't accurate anymore). In this example, -+ * we just accept that. A more rigorous filesystem may want -+ * to return an error here -+ */ -+ if (lo->writeback && (fi->flags & O_APPEND)) { -+ fi->flags &= ~O_APPEND; -+ } -+ -+ sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); -+ fd = open(buf, fi->flags & ~O_NOFOLLOW); -+ if (fd == -1) { -+ return (void)fuse_reply_err(req, errno); -+ } -+ -+ fi->fh = fd; -+ if (lo->cache == CACHE_NEVER) { -+ fi->direct_io = 1; -+ } else if (lo->cache == CACHE_ALWAYS) { -+ fi->keep_cache = 1; -+ } -+ fuse_reply_open(req, fi); - } - --static void lo_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+static void lo_release(fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi) - { -- (void) ino; -+ (void)ino; - -- close(fi->fh); -- fuse_reply_err(req, 0); -+ close(fi->fh); -+ fuse_reply_err(req, 0); - } - - static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - { -- int res; -- (void) ino; -- res = close(dup(fi->fh)); -- fuse_reply_err(req, res == -1 ? errno : 0); -+ int res; -+ (void)ino; -+ res = close(dup(fi->fh)); -+ fuse_reply_err(req, res == -1 ? errno : 0); - } - - static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, -- struct fuse_file_info *fi) -+ struct fuse_file_info *fi) - { -- int res; -- (void) ino; -- if (datasync) -- res = fdatasync(fi->fh); -- else -- res = fsync(fi->fh); -- fuse_reply_err(req, res == -1 ? errno : 0); -+ int res; -+ (void)ino; -+ if (datasync) { -+ res = fdatasync(fi->fh); -+ } else { -+ res = fsync(fi->fh); -+ } -+ fuse_reply_err(req, res == -1 ? errno : 0); - } - --static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, -- off_t offset, struct fuse_file_info *fi) -+static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, -+ struct fuse_file_info *fi) - { -- struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); -+ struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); - -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, "lo_read(ino=%" PRIu64 ", size=%zd, " -- "off=%lu)\n", ino, size, (unsigned long) offset); -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_read(ino=%" PRIu64 ", size=%zd, " -+ "off=%lu)\n", -+ ino, size, (unsigned long)offset); -+ } - -- buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -- buf.buf[0].fd = fi->fh; -- buf.buf[0].pos = offset; -+ buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -+ buf.buf[0].fd = fi->fh; -+ buf.buf[0].pos = offset; - -- fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); -+ fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); - } - - static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, -- struct fuse_bufvec *in_buf, off_t off, -- struct fuse_file_info *fi) -+ struct fuse_bufvec *in_buf, off_t off, -+ struct fuse_file_info *fi) - { -- (void) ino; -- ssize_t res; -- struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); -- -- out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -- out_buf.buf[0].fd = fi->fh; -- out_buf.buf[0].pos = off; -- -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", -- ino, out_buf.buf[0].size, (unsigned long) off); -- -- res = fuse_buf_copy(&out_buf, in_buf, 0); -- if(res < 0) -- fuse_reply_err(req, -res); -- else -- fuse_reply_write(req, (size_t) res); -+ (void)ino; -+ ssize_t res; -+ struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); -+ -+ out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -+ out_buf.buf[0].fd = fi->fh; -+ out_buf.buf[0].pos = off; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, -+ out_buf.buf[0].size, (unsigned long)off); -+ } -+ -+ res = fuse_buf_copy(&out_buf, in_buf, 0); -+ if (res < 0) { -+ fuse_reply_err(req, -res); -+ } else { -+ fuse_reply_write(req, (size_t)res); -+ } - } - - static void lo_statfs(fuse_req_t req, fuse_ino_t ino) - { -- int res; -- struct statvfs stbuf; -- -- res = fstatvfs(lo_fd(req, ino), &stbuf); -- if (res == -1) -- fuse_reply_err(req, errno); -- else -- fuse_reply_statfs(req, &stbuf); -+ int res; -+ struct statvfs stbuf; -+ -+ res = fstatvfs(lo_fd(req, ino), &stbuf); -+ if (res == -1) { -+ fuse_reply_err(req, errno); -+ } else { -+ fuse_reply_statfs(req, &stbuf); -+ } - } - --static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, -- off_t offset, off_t length, struct fuse_file_info *fi) -+static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, -+ off_t length, struct fuse_file_info *fi) - { -- int err = EOPNOTSUPP; -- (void) ino; -+ int err = EOPNOTSUPP; -+ (void)ino; - - #ifdef HAVE_FALLOCATE -- err = fallocate(fi->fh, mode, offset, length); -- if (err < 0) -- err = errno; -+ err = fallocate(fi->fh, mode, offset, length); -+ if (err < 0) { -+ err = errno; -+ } - - #elif defined(HAVE_POSIX_FALLOCATE) -- if (mode) { -- fuse_reply_err(req, EOPNOTSUPP); -- return; -- } -+ if (mode) { -+ fuse_reply_err(req, EOPNOTSUPP); -+ return; -+ } - -- err = posix_fallocate(fi->fh, offset, length); -+ err = posix_fallocate(fi->fh, offset, length); - #endif - -- fuse_reply_err(req, err); -+ fuse_reply_err(req, err); - } - - static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -- int op) -+ int op) - { -- int res; -- (void) ino; -+ int res; -+ (void)ino; - -- res = flock(fi->fh, op); -+ res = flock(fi->fh, op); - -- fuse_reply_err(req, res == -1 ? errno : 0); -+ fuse_reply_err(req, res == -1 ? errno : 0); - } - - static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, -- size_t size) -+ size_t size) - { -- char *value = NULL; -- char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -- ssize_t ret; -- int saverr; -- -- saverr = ENOSYS; -- if (!lo_data(req)->xattr) -- goto out; -- -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", -- ino, name, size); -- } -- -- if (inode->is_symlink) { -- /* Sorry, no race free way to getxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -- -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -- -- if (size) { -- value = malloc(size); -- if (!value) -- goto out_err; -- -- ret = getxattr(procname, name, value, size); -- if (ret == -1) -- goto out_err; -- saverr = 0; -- if (ret == 0) -- goto out; -- -- fuse_reply_buf(req, value, ret); -- } else { -- ret = getxattr(procname, name, NULL, 0); -- if (ret == -1) -- goto out_err; -- -- fuse_reply_xattr(req, ret); -- } -+ char *value = NULL; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; -+ -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) { -+ goto out; -+ } -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", ino, name, -+ size); -+ } -+ -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to getxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } -+ -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ if (size) { -+ value = malloc(size); -+ if (!value) { -+ goto out_err; -+ } -+ -+ ret = getxattr(procname, name, value, size); -+ if (ret == -1) { -+ goto out_err; -+ } -+ saverr = 0; -+ if (ret == 0) { -+ goto out; -+ } -+ -+ fuse_reply_buf(req, value, ret); -+ } else { -+ ret = getxattr(procname, name, NULL, 0); -+ if (ret == -1) { -+ goto out_err; -+ } -+ -+ fuse_reply_xattr(req, ret); -+ } - out_free: -- free(value); -- return; -+ free(value); -+ return; - - out_err: -- saverr = errno; -+ saverr = errno; - out: -- fuse_reply_err(req, saverr); -- goto out_free; -+ fuse_reply_err(req, saverr); -+ goto out_free; - } - - static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - { -- char *value = NULL; -- char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -- ssize_t ret; -- int saverr; -- -- saverr = ENOSYS; -- if (!lo_data(req)->xattr) -- goto out; -- -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", -- ino, size); -- } -- -- if (inode->is_symlink) { -- /* Sorry, no race free way to listxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -- -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -- -- if (size) { -- value = malloc(size); -- if (!value) -- goto out_err; -- -- ret = listxattr(procname, value, size); -- if (ret == -1) -- goto out_err; -- saverr = 0; -- if (ret == 0) -- goto out; -- -- fuse_reply_buf(req, value, ret); -- } else { -- ret = listxattr(procname, NULL, 0); -- if (ret == -1) -- goto out_err; -- -- fuse_reply_xattr(req, ret); -- } -+ char *value = NULL; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; -+ -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) { -+ goto out; -+ } -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", -+ ino, size); -+ } -+ -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to listxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } -+ -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ if (size) { -+ value = malloc(size); -+ if (!value) { -+ goto out_err; -+ } -+ -+ ret = listxattr(procname, value, size); -+ if (ret == -1) { -+ goto out_err; -+ } -+ saverr = 0; -+ if (ret == 0) { -+ goto out; -+ } -+ -+ fuse_reply_buf(req, value, ret); -+ } else { -+ ret = listxattr(procname, NULL, 0); -+ if (ret == -1) { -+ goto out_err; -+ } -+ -+ fuse_reply_xattr(req, ret); -+ } - out_free: -- free(value); -- return; -+ free(value); -+ return; - - out_err: -- saverr = errno; -+ saverr = errno; - out: -- fuse_reply_err(req, saverr); -- goto out_free; -+ fuse_reply_err(req, saverr); -+ goto out_free; - } - - static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, -- const char *value, size_t size, int flags) -+ const char *value, size_t size, int flags) - { -- char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -- ssize_t ret; -- int saverr; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; - -- saverr = ENOSYS; -- if (!lo_data(req)->xattr) -- goto out; -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) { -+ goto out; -+ } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", -- ino, name, value, size); -- } -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", -+ ino, name, value, size); -+ } - -- if (inode->is_symlink) { -- /* Sorry, no race free way to setxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to setxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); - -- ret = setxattr(procname, name, value, size, flags); -- saverr = ret == -1 ? errno : 0; -+ ret = setxattr(procname, name, value, size, flags); -+ saverr = ret == -1 ? errno : 0; - - out: -- fuse_reply_err(req, saverr); -+ fuse_reply_err(req, saverr); - } - - static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) - { -- char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -- ssize_t ret; -- int saverr; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; - -- saverr = ENOSYS; -- if (!lo_data(req)->xattr) -- goto out; -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) { -+ goto out; -+ } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", -- ino, name); -- } -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", -+ ino, name); -+ } - -- if (inode->is_symlink) { -- /* Sorry, no race free way to setxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to setxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); - -- ret = removexattr(procname, name); -- saverr = ret == -1 ? errno : 0; -+ ret = removexattr(procname, name); -+ saverr = ret == -1 ? errno : 0; - - out: -- fuse_reply_err(req, saverr); -+ fuse_reply_err(req, saverr); - } - - #ifdef HAVE_COPY_FILE_RANGE - static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, -- struct fuse_file_info *fi_in, -- fuse_ino_t ino_out, off_t off_out, -- struct fuse_file_info *fi_out, size_t len, -- int flags) -+ struct fuse_file_info *fi_in, fuse_ino_t ino_out, -+ off_t off_out, struct fuse_file_info *fi_out, -+ size_t len, int flags) - { -- ssize_t res; -- -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " -- "off=%lu, ino=%" PRIu64 "/fd=%lu, " -- "off=%lu, size=%zd, flags=0x%x)\n", -- ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, -- len, flags); -- -- res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, -- flags); -- if (res < 0) -- fuse_reply_err(req, -errno); -- else -- fuse_reply_write(req, res); -+ ssize_t res; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " -+ "off=%lu, ino=%" PRIu64 "/fd=%lu, " -+ "off=%lu, size=%zd, flags=0x%x)\n", -+ ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, len, -+ flags); -+ -+ res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, flags); -+ if (res < 0) { -+ fuse_reply_err(req, -errno); -+ } else { -+ fuse_reply_write(req, res); -+ } - } - #endif - - static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, -- struct fuse_file_info *fi) -+ struct fuse_file_info *fi) - { -- off_t res; -- -- (void)ino; -- res = lseek(fi->fh, off, whence); -- if (res != -1) -- fuse_reply_lseek(req, res); -- else -- fuse_reply_err(req, errno); -+ off_t res; -+ -+ (void)ino; -+ res = lseek(fi->fh, off, whence); -+ if (res != -1) { -+ fuse_reply_lseek(req, res); -+ } else { -+ fuse_reply_err(req, errno); -+ } - } - - static struct fuse_lowlevel_ops lo_oper = { -- .init = lo_init, -- .lookup = lo_lookup, -- .mkdir = lo_mkdir, -- .mknod = lo_mknod, -- .symlink = lo_symlink, -- .link = lo_link, -- .unlink = lo_unlink, -- .rmdir = lo_rmdir, -- .rename = lo_rename, -- .forget = lo_forget, -- .forget_multi = lo_forget_multi, -- .getattr = lo_getattr, -- .setattr = lo_setattr, -- .readlink = lo_readlink, -- .opendir = lo_opendir, -- .readdir = lo_readdir, -- .readdirplus = lo_readdirplus, -- .releasedir = lo_releasedir, -- .fsyncdir = lo_fsyncdir, -- .create = lo_create, -- .open = lo_open, -- .release = lo_release, -- .flush = lo_flush, -- .fsync = lo_fsync, -- .read = lo_read, -- .write_buf = lo_write_buf, -- .statfs = lo_statfs, -- .fallocate = lo_fallocate, -- .flock = lo_flock, -- .getxattr = lo_getxattr, -- .listxattr = lo_listxattr, -- .setxattr = lo_setxattr, -- .removexattr = lo_removexattr, -+ .init = lo_init, -+ .lookup = lo_lookup, -+ .mkdir = lo_mkdir, -+ .mknod = lo_mknod, -+ .symlink = lo_symlink, -+ .link = lo_link, -+ .unlink = lo_unlink, -+ .rmdir = lo_rmdir, -+ .rename = lo_rename, -+ .forget = lo_forget, -+ .forget_multi = lo_forget_multi, -+ .getattr = lo_getattr, -+ .setattr = lo_setattr, -+ .readlink = lo_readlink, -+ .opendir = lo_opendir, -+ .readdir = lo_readdir, -+ .readdirplus = lo_readdirplus, -+ .releasedir = lo_releasedir, -+ .fsyncdir = lo_fsyncdir, -+ .create = lo_create, -+ .open = lo_open, -+ .release = lo_release, -+ .flush = lo_flush, -+ .fsync = lo_fsync, -+ .read = lo_read, -+ .write_buf = lo_write_buf, -+ .statfs = lo_statfs, -+ .fallocate = lo_fallocate, -+ .flock = lo_flock, -+ .getxattr = lo_getxattr, -+ .listxattr = lo_listxattr, -+ .setxattr = lo_setxattr, -+ .removexattr = lo_removexattr, - #ifdef HAVE_COPY_FILE_RANGE -- .copy_file_range = lo_copy_file_range, -+ .copy_file_range = lo_copy_file_range, - #endif -- .lseek = lo_lseek, -+ .lseek = lo_lseek, - }; - - int main(int argc, char *argv[]) - { -- struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -- struct fuse_session *se; -- struct fuse_cmdline_opts opts; -- struct lo_data lo = { .debug = 0, -- .writeback = 0 }; -- int ret = -1; -- -- /* Don't mask creation mode, kernel already did that */ -- umask(0); -- -- pthread_mutex_init(&lo.mutex, NULL); -- lo.root.next = lo.root.prev = &lo.root; -- lo.root.fd = -1; -- lo.cache = CACHE_NORMAL; -- -- if (fuse_parse_cmdline(&args, &opts) != 0) -- return 1; -- if (opts.show_help) { -- printf("usage: %s [options] \n\n", argv[0]); -- fuse_cmdline_help(); -- fuse_lowlevel_help(); -- ret = 0; -- goto err_out1; -- } else if (opts.show_version) { -- fuse_lowlevel_version(); -- ret = 0; -- goto err_out1; -- } -- -- if(opts.mountpoint == NULL) { -- printf("usage: %s [options] \n", argv[0]); -- printf(" %s --help\n", argv[0]); -- ret = 1; -- goto err_out1; -- } -- -- if (fuse_opt_parse(&args, &lo, lo_opts, NULL)== -1) -- return 1; -- -- lo.debug = opts.debug; -- lo.root.refcount = 2; -- if (lo.source) { -- struct stat stat; -- int res; -- -- res = lstat(lo.source, &stat); -- if (res == -1) { -- fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n", -- lo.source); -- exit(1); -- } -- if (!S_ISDIR(stat.st_mode)) { -- fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); -- exit(1); -- } -- -- } else { -- lo.source = "/"; -- } -- lo.root.is_symlink = false; -- if (!lo.timeout_set) { -- switch (lo.cache) { -- case CACHE_NEVER: -- lo.timeout = 0.0; -- break; -- -- case CACHE_NORMAL: -- lo.timeout = 1.0; -- break; -- -- case CACHE_ALWAYS: -- lo.timeout = 86400.0; -- break; -- } -- } else if (lo.timeout < 0) { -- fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", -- lo.timeout); -- exit(1); -- } -- -- lo.root.fd = open(lo.source, O_PATH); -- if (lo.root.fd == -1) { -- fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", -- lo.source); -- exit(1); -- } -- -- se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); -- if (se == NULL) -- goto err_out1; -- -- if (fuse_set_signal_handlers(se) != 0) -- goto err_out2; -- -- if (fuse_session_mount(se, opts.mountpoint) != 0) -- goto err_out3; -- -- fuse_daemonize(opts.foreground); -- -- /* Block until ctrl+c or fusermount -u */ -- if (opts.singlethread) -- ret = fuse_session_loop(se); -- else -- ret = fuse_session_loop_mt(se, opts.clone_fd); -- -- fuse_session_unmount(se); -+ struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -+ struct fuse_session *se; -+ struct fuse_cmdline_opts opts; -+ struct lo_data lo = { .debug = 0, .writeback = 0 }; -+ int ret = -1; -+ -+ /* Don't mask creation mode, kernel already did that */ -+ umask(0); -+ -+ pthread_mutex_init(&lo.mutex, NULL); -+ lo.root.next = lo.root.prev = &lo.root; -+ lo.root.fd = -1; -+ lo.cache = CACHE_NORMAL; -+ -+ if (fuse_parse_cmdline(&args, &opts) != 0) { -+ return 1; -+ } -+ if (opts.show_help) { -+ printf("usage: %s [options] \n\n", argv[0]); -+ fuse_cmdline_help(); -+ fuse_lowlevel_help(); -+ ret = 0; -+ goto err_out1; -+ } else if (opts.show_version) { -+ fuse_lowlevel_version(); -+ ret = 0; -+ goto err_out1; -+ } -+ -+ if (opts.mountpoint == NULL) { -+ printf("usage: %s [options] \n", argv[0]); -+ printf(" %s --help\n", argv[0]); -+ ret = 1; -+ goto err_out1; -+ } -+ -+ if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { -+ return 1; -+ } -+ -+ lo.debug = opts.debug; -+ lo.root.refcount = 2; -+ if (lo.source) { -+ struct stat stat; -+ int res; -+ -+ res = lstat(lo.source, &stat); -+ if (res == -1) { -+ fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n", -+ lo.source); -+ exit(1); -+ } -+ if (!S_ISDIR(stat.st_mode)) { -+ fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); -+ exit(1); -+ } -+ -+ } else { -+ lo.source = "/"; -+ } -+ lo.root.is_symlink = false; -+ if (!lo.timeout_set) { -+ switch (lo.cache) { -+ case CACHE_NEVER: -+ lo.timeout = 0.0; -+ break; -+ -+ case CACHE_NORMAL: -+ lo.timeout = 1.0; -+ break; -+ -+ case CACHE_ALWAYS: -+ lo.timeout = 86400.0; -+ break; -+ } -+ } else if (lo.timeout < 0) { -+ fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", lo.timeout); -+ exit(1); -+ } -+ -+ lo.root.fd = open(lo.source, O_PATH); -+ if (lo.root.fd == -1) { -+ fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source); -+ exit(1); -+ } -+ -+ se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); -+ if (se == NULL) { -+ goto err_out1; -+ } -+ -+ if (fuse_set_signal_handlers(se) != 0) { -+ goto err_out2; -+ } -+ -+ if (fuse_session_mount(se, opts.mountpoint) != 0) { -+ goto err_out3; -+ } -+ -+ fuse_daemonize(opts.foreground); -+ -+ /* Block until ctrl+c or fusermount -u */ -+ if (opts.singlethread) { -+ ret = fuse_session_loop(se); -+ } else { -+ ret = fuse_session_loop_mt(se, opts.clone_fd); -+ } -+ -+ fuse_session_unmount(se); - err_out3: -- fuse_remove_signal_handlers(se); -+ fuse_remove_signal_handlers(se); - err_out2: -- fuse_session_destroy(se); -+ fuse_session_destroy(se); - err_out1: -- free(opts.mountpoint); -- fuse_opt_free_args(&args); -+ free(opts.mountpoint); -+ fuse_opt_free_args(&args); - -- if (lo.root.fd >= 0) -- close(lo.root.fd); -+ if (lo.root.fd >= 0) { -+ close(lo.root.fd); -+ } - -- return ret ? 1 : 0; -+ return ret ? 1 : 0; - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Handle-hard-reboot.patch b/SOURCES/kvm-virtiofsd-Handle-hard-reboot.patch deleted file mode 100644 index 8888030..0000000 --- a/SOURCES/kvm-virtiofsd-Handle-hard-reboot.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 616407b06517361ce444dcc0960aeaf55b52da33 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:41 +0100 -Subject: [PATCH 070/116] virtiofsd: Handle hard reboot -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-67-dgilbert@redhat.com> -Patchwork-id: 93521 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 066/112] virtiofsd: Handle hard reboot -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Handle a - mount - hard reboot (without unmount) - mount - -we get another 'init' which FUSE doesn't normally expect. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit e8556f49098b5d95634e592d79a97f761b76c96e) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 16 +++++++++++++++- - 1 file changed, 15 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 7d742b5..65f91da 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2433,7 +2433,21 @@ void fuse_session_process_buf_int(struct fuse_session *se, - goto reply_err; - } - } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) { -- goto reply_err; -+ if (fuse_lowlevel_is_virtio(se)) { -+ /* -+ * TODO: This is after a hard reboot typically, we need to do -+ * a destroy, but we can't reply to this request yet so -+ * we can't use do_destroy -+ */ -+ fuse_log(FUSE_LOG_DEBUG, "%s: reinit\n", __func__); -+ se->got_destroy = 1; -+ se->got_init = 0; -+ if (se->op.destroy) { -+ se->op.destroy(se->userdata); -+ } -+ } else { -+ goto reply_err; -+ } - } - - err = EACCES; --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Handle-reinit.patch b/SOURCES/kvm-virtiofsd-Handle-reinit.patch deleted file mode 100644 index 3f9577b..0000000 --- a/SOURCES/kvm-virtiofsd-Handle-reinit.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 485adfa1aa1b3e2d1449edf5c42d6ec396cbfb5d Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:40 +0100 -Subject: [PATCH 069/116] virtiofsd: Handle reinit -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-66-dgilbert@redhat.com> -Patchwork-id: 93520 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 065/112] virtiofsd: Handle reinit -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Allow init->destroy->init for mount->umount->mount - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit c806d6435fe95fd54b379920aca2f4e3ea1f3258) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index a7a1968..7d742b5 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2028,6 +2028,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, - } - - se->got_init = 1; -+ se->got_destroy = 0; - if (se->op.init) { - se->op.init(se->userdata, &se->conn); - } -@@ -2130,6 +2131,7 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, - (void)iter; - - se->got_destroy = 1; -+ se->got_init = 0; - if (se->op.destroy) { - se->op.destroy(se->userdata); - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Keep-track-of-replies.patch b/SOURCES/kvm-virtiofsd-Keep-track-of-replies.patch deleted file mode 100644 index 18be3e0..0000000 --- a/SOURCES/kvm-virtiofsd-Keep-track-of-replies.patch +++ /dev/null @@ -1,116 +0,0 @@ -From c818a1cb603cad07aa5c49ce808aa09435667c7c Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:04 +0100 -Subject: [PATCH 033/116] virtiofsd: Keep track of replies -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-30-dgilbert@redhat.com> -Patchwork-id: 93481 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 029/112] virtiofsd: Keep track of replies -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Keep track of whether we sent a reply to a request; this is a bit -paranoid but it means: - a) We should always recycle an element even if there was an error - in the request - b) Never try and send two replies on one queue element - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 2f65e69a7f22da8d20c747f34f339ebb40a0634f) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 23 ++++++++++++++++++++--- - 1 file changed, 20 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 05d0e29..f1adeb6 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -44,6 +44,7 @@ struct fv_QueueInfo { - - /* The element for the command currently being processed */ - VuVirtqElement *qe; -+ bool reply_sent; - }; - - /* -@@ -178,6 +179,7 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, - { - VuVirtqElement *elem; - VuVirtq *q; -+ int ret = 0; - - assert(count >= 1); - assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); -@@ -191,6 +193,7 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, - assert(out->unique); - /* For virtio we always have ch */ - assert(ch); -+ assert(!ch->qi->reply_sent); - elem = ch->qi->qe; - q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; - -@@ -208,19 +211,23 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, - if (in_len < sizeof(struct fuse_out_header)) { - fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", - __func__, elem->index); -- return -E2BIG; -+ ret = -E2BIG; -+ goto err; - } - if (in_len < tosend_len) { - fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n", - __func__, elem->index, tosend_len); -- return -E2BIG; -+ ret = -E2BIG; -+ goto err; - } - - copy_iov(iov, count, in_sg, in_num, tosend_len); - vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); - vu_queue_notify(&se->virtio_dev->dev, q); -+ ch->qi->reply_sent = true; - -- return 0; -+err: -+ return ret; - } - - /* Thread function for individual queues, created when a queue is 'started' */ -@@ -296,6 +303,9 @@ static void *fv_queue_thread(void *opaque) - break; - } - -+ qi->qe = elem; -+ qi->reply_sent = false; -+ - if (!fbuf.mem) { - fbuf.mem = malloc(se->bufsize); - assert(fbuf.mem); -@@ -331,6 +341,13 @@ static void *fv_queue_thread(void *opaque) - /* TODO: Add checks for fuse_session_exited */ - fuse_session_process_buf_int(se, &fbuf, &ch); - -+ if (!qi->reply_sent) { -+ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", -+ __func__, elem->index); -+ /* I think we've still got to recycle the element */ -+ vu_queue_push(dev, q, elem, 0); -+ vu_queue_notify(dev, q); -+ } - qi->qe = NULL; - free(elem); - elem = NULL; --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch b/SOURCES/kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch deleted file mode 100644 index 5e054f3..0000000 --- a/SOURCES/kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch +++ /dev/null @@ -1,143 +0,0 @@ -From b37344c38b866c7e7fb773b4a3172a39306bac7e Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:42 +0100 -Subject: [PATCH 071/116] virtiofsd: Kill threads when queues are stopped -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-68-dgilbert@redhat.com> -Patchwork-id: 93522 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 067/112] virtiofsd: Kill threads when queues are stopped -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Kill the threads we've started when the queues get stopped. - -Signed-off-by: Dr. David Alan Gilbert -With improvements by: -Signed-off-by: Eryu Guan -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 10477ac47fc57d00a84802ff97c15450cd8021c1) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 51 +++++++++++++++++++++++++++++++++++++------ - 1 file changed, 44 insertions(+), 7 deletions(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 872968f..7a8774a 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -41,6 +41,7 @@ struct fv_QueueInfo { - /* Our queue index, corresponds to array position */ - int qidx; - int kick_fd; -+ int kill_fd; /* For killing the thread */ - - /* The element for the command currently being processed */ - VuVirtqElement *qe; -@@ -412,14 +413,17 @@ static void *fv_queue_thread(void *opaque) - fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, - qi->qidx, qi->kick_fd); - while (1) { -- struct pollfd pf[1]; -+ struct pollfd pf[2]; - pf[0].fd = qi->kick_fd; - pf[0].events = POLLIN; - pf[0].revents = 0; -+ pf[1].fd = qi->kill_fd; -+ pf[1].events = POLLIN; -+ pf[1].revents = 0; - - fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for Queue %d event\n", __func__, - qi->qidx); -- int poll_res = ppoll(pf, 1, NULL, NULL); -+ int poll_res = ppoll(pf, 2, NULL, NULL); - - if (poll_res == -1) { - if (errno == EINTR) { -@@ -430,12 +434,23 @@ static void *fv_queue_thread(void *opaque) - fuse_log(FUSE_LOG_ERR, "fv_queue_thread ppoll: %m\n"); - break; - } -- assert(poll_res == 1); -+ assert(poll_res >= 1); - if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) { - fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x Queue %d\n", - __func__, pf[0].revents, qi->qidx); - break; - } -+ if (pf[1].revents & (POLLERR | POLLHUP | POLLNVAL)) { -+ fuse_log(FUSE_LOG_ERR, -+ "%s: Unexpected poll revents %x Queue %d killfd\n", -+ __func__, pf[1].revents, qi->qidx); -+ break; -+ } -+ if (pf[1].revents) { -+ fuse_log(FUSE_LOG_INFO, "%s: kill event on queue %d - quitting\n", -+ __func__, qi->qidx); -+ break; -+ } - assert(pf[0].revents & POLLIN); - fuse_log(FUSE_LOG_DEBUG, "%s: Got queue event on Queue %d\n", __func__, - qi->qidx); -@@ -589,6 +604,28 @@ out: - return NULL; - } - -+static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx) -+{ -+ int ret; -+ struct fv_QueueInfo *ourqi; -+ -+ assert(qidx < vud->nqueues); -+ ourqi = vud->qi[qidx]; -+ -+ /* Kill the thread */ -+ if (eventfd_write(ourqi->kill_fd, 1)) { -+ fuse_log(FUSE_LOG_ERR, "Eventfd_write for queue %d: %s\n", -+ qidx, strerror(errno)); -+ } -+ ret = pthread_join(ourqi->thread, NULL); -+ if (ret) { -+ fuse_log(FUSE_LOG_ERR, "%s: Failed to join thread idx %d err %d\n", -+ __func__, qidx, ret); -+ } -+ close(ourqi->kill_fd); -+ ourqi->kick_fd = -1; -+} -+ - /* Callback from libvhost-user on start or stop of a queue */ - static void fv_queue_set_started(VuDev *dev, int qidx, bool started) - { -@@ -633,16 +670,16 @@ static void fv_queue_set_started(VuDev *dev, int qidx, bool started) - } - ourqi = vud->qi[qidx]; - ourqi->kick_fd = dev->vq[qidx].kick_fd; -+ -+ ourqi->kill_fd = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE); -+ assert(ourqi->kill_fd != -1); - if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) { - fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n", - __func__, qidx); - assert(0); - } - } else { -- /* TODO: Kill the thread */ -- assert(qidx < vud->nqueues); -- ourqi = vud->qi[qidx]; -- ourqi->kick_fd = -1; -+ fv_queue_cleanup_thread(vud, qidx); - } - } - --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch b/SOURCES/kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch deleted file mode 100644 index 98211cb..0000000 --- a/SOURCES/kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch +++ /dev/null @@ -1,96 +0,0 @@ -From f09f13f9a001a50ee3465c165f4bbaf870fcadb9 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:53 +0100 -Subject: [PATCH 022/116] virtiofsd: Make fsync work even if only inode is - passed in -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-19-dgilbert@redhat.com> -Patchwork-id: 93472 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 018/112] virtiofsd: Make fsync work even if only inode is passed in -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Vivek Goyal - -If caller has not sent file handle in request, then using inode, retrieve -the fd opened using O_PATH and use that to open file again and issue -fsync. This will be needed when dax_flush() calls fsync. At that time -we only have inode information (and not file). - -Signed-off-by: Vivek Goyal -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 1b209805f8159c3f4d89ddb9390a5f64887cebff) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 6 +++++- - tools/virtiofsd/passthrough_ll.c | 28 ++++++++++++++++++++++++++-- - 2 files changed, 31 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 514d79c..8552cfb 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -1075,7 +1075,11 @@ static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - fi.fh = arg->fh; - - if (req->se->op.fsync) { -- req->se->op.fsync(req, nodeid, datasync, &fi); -+ if (fi.fh == (uint64_t)-1) { -+ req->se->op.fsync(req, nodeid, datasync, NULL); -+ } else { -+ req->se->op.fsync(req, nodeid, datasync, &fi); -+ } - } else { - fuse_reply_err(req, ENOSYS); - } -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 6c4da18..26ac870 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -903,10 +903,34 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, - { - int res; - (void)ino; -+ int fd; -+ char *buf; -+ -+ fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino, -+ (void *)fi); -+ -+ if (!fi) { -+ res = asprintf(&buf, "/proc/self/fd/%i", lo_fd(req, ino)); -+ if (res == -1) { -+ return (void)fuse_reply_err(req, errno); -+ } -+ -+ fd = open(buf, O_RDWR); -+ free(buf); -+ if (fd == -1) { -+ return (void)fuse_reply_err(req, errno); -+ } -+ } else { -+ fd = fi->fh; -+ } -+ - if (datasync) { -- res = fdatasync(fi->fh); -+ res = fdatasync(fd); - } else { -- res = fsync(fi->fh); -+ res = fsync(fd); -+ } -+ if (!fi) { -+ close(fd); - } - fuse_reply_err(req, res == -1 ? errno : 0); - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch b/SOURCES/kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch deleted file mode 100644 index 2c9874d..0000000 --- a/SOURCES/kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch +++ /dev/null @@ -1,257 +0,0 @@ -From a96042f05eaf494fbe26a9cbd940f5f815f782f9 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:56 +0100 -Subject: [PATCH 025/116] virtiofsd: Open vhost connection instead of mounting -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-22-dgilbert@redhat.com> -Patchwork-id: 93476 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 021/112] virtiofsd: Open vhost connection instead of mounting -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -When run with vhost-user options we conect to the QEMU instead -via a socket. Start this off by creating the socket. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit d14bf584dd965821e80d14c16d9292a464b1ab85) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 7 ++-- - tools/virtiofsd/fuse_lowlevel.c | 55 ++++------------------------ - tools/virtiofsd/fuse_virtio.c | 79 +++++++++++++++++++++++++++++++++++++++++ - tools/virtiofsd/fuse_virtio.h | 23 ++++++++++++ - 4 files changed, 114 insertions(+), 50 deletions(-) - create mode 100644 tools/virtiofsd/fuse_virtio.c - create mode 100644 tools/virtiofsd/fuse_virtio.h - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index 26b1a7d..82d6ac7 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -6,9 +6,10 @@ - * See the file COPYING.LIB - */ - --#define FUSE_USE_VERSION 31 -- -+#ifndef FUSE_I_H -+#define FUSE_I_H - -+#define FUSE_USE_VERSION 31 - #include "fuse.h" - #include "fuse_lowlevel.h" - -@@ -101,3 +102,5 @@ void fuse_session_process_buf_int(struct fuse_session *se, - - /* room needed in buffer to accommodate header */ - #define FUSE_BUFFER_HEADER_SIZE 0x1000 -+ -+#endif -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 17e8718..5df124e 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -14,6 +14,7 @@ - #include "standard-headers/linux/fuse.h" - #include "fuse_misc.h" - #include "fuse_opt.h" -+#include "fuse_virtio.h" - - #include - #include -@@ -2202,6 +2203,11 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - goto out4; - } - -+ if (!se->vu_socket_path) { -+ fprintf(stderr, "fuse: missing -o vhost_user_socket option\n"); -+ goto out4; -+ } -+ - se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + FUSE_BUFFER_HEADER_SIZE; - - list_init_req(&se->list); -@@ -2224,54 +2230,7 @@ out1: - - int fuse_session_mount(struct fuse_session *se) - { -- int fd; -- -- /* -- * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos -- * would ensue. -- */ -- do { -- fd = open("/dev/null", O_RDWR); -- if (fd > 2) { -- close(fd); -- } -- } while (fd >= 0 && fd <= 2); -- -- /* -- * To allow FUSE daemons to run without privileges, the caller may open -- * /dev/fuse before launching the file system and pass on the file -- * descriptor by specifying /dev/fd/N as the mount point. Note that the -- * parent process takes care of performing the mount in this case. -- */ -- fd = fuse_mnt_parse_fuse_fd(mountpoint); -- if (fd != -1) { -- if (fcntl(fd, F_GETFD) == -1) { -- fuse_log(FUSE_LOG_ERR, "fuse: Invalid file descriptor /dev/fd/%u\n", -- fd); -- return -1; -- } -- se->fd = fd; -- return 0; -- } -- -- /* Open channel */ -- fd = fuse_kern_mount(mountpoint, se->mo); -- if (fd == -1) { -- return -1; -- } -- se->fd = fd; -- -- /* Save mountpoint */ -- se->mountpoint = strdup(mountpoint); -- if (se->mountpoint == NULL) { -- goto error_out; -- } -- -- return 0; -- --error_out: -- fuse_kern_unmount(mountpoint, fd); -- return -1; -+ return virtio_session_mount(se); - } - - int fuse_session_fd(struct fuse_session *se) -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -new file mode 100644 -index 0000000..cbef6ff ---- /dev/null -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -0,0 +1,79 @@ -+/* -+ * virtio-fs glue for FUSE -+ * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates -+ * -+ * Authors: -+ * Dave Gilbert -+ * -+ * Implements the glue between libfuse and libvhost-user -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ -+ -+#include "fuse_i.h" -+#include "standard-headers/linux/fuse.h" -+#include "fuse_misc.h" -+#include "fuse_opt.h" -+#include "fuse_virtio.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* From spec */ -+struct virtio_fs_config { -+ char tag[36]; -+ uint32_t num_queues; -+}; -+ -+int virtio_session_mount(struct fuse_session *se) -+{ -+ struct sockaddr_un un; -+ mode_t old_umask; -+ -+ if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) { -+ fuse_log(FUSE_LOG_ERR, "Socket path too long\n"); -+ return -1; -+ } -+ -+ se->fd = -1; -+ -+ /* -+ * Create the Unix socket to communicate with qemu -+ * based on QEMU's vhost-user-bridge -+ */ -+ unlink(se->vu_socket_path); -+ strcpy(un.sun_path, se->vu_socket_path); -+ size_t addr_len = sizeof(un); -+ -+ int listen_sock = socket(AF_UNIX, SOCK_STREAM, 0); -+ if (listen_sock == -1) { -+ fuse_log(FUSE_LOG_ERR, "vhost socket creation: %m\n"); -+ return -1; -+ } -+ un.sun_family = AF_UNIX; -+ -+ /* -+ * Unfortunately bind doesn't let you set the mask on the socket, -+ * so set umask to 077 and restore it later. -+ */ -+ old_umask = umask(0077); -+ if (bind(listen_sock, (struct sockaddr *)&un, addr_len) == -1) { -+ fuse_log(FUSE_LOG_ERR, "vhost socket bind: %m\n"); -+ umask(old_umask); -+ return -1; -+ } -+ umask(old_umask); -+ -+ if (listen(listen_sock, 1) == -1) { -+ fuse_log(FUSE_LOG_ERR, "vhost socket listen: %m\n"); -+ return -1; -+ } -+ -+ return -1; -+} -diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h -new file mode 100644 -index 0000000..8f2edb6 ---- /dev/null -+++ b/tools/virtiofsd/fuse_virtio.h -@@ -0,0 +1,23 @@ -+/* -+ * virtio-fs glue for FUSE -+ * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates -+ * -+ * Authors: -+ * Dave Gilbert -+ * -+ * Implements the glue between libfuse and libvhost-user -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ -+ -+#ifndef FUSE_VIRTIO_H -+#define FUSE_VIRTIO_H -+ -+#include "fuse_i.h" -+ -+struct fuse_session; -+ -+int virtio_session_mount(struct fuse_session *se); -+ -+#endif --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch b/SOURCES/kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch deleted file mode 100644 index 8d8de78..0000000 --- a/SOURCES/kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch +++ /dev/null @@ -1,76 +0,0 @@ -From ade3dcad8a907d281549b341a8908851e36ba458 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:31 +0100 -Subject: [PATCH 060/116] virtiofsd: Parse flag FUSE_WRITE_KILL_PRIV -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-57-dgilbert@redhat.com> -Patchwork-id: 93505 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 056/112] virtiofsd: Parse flag FUSE_WRITE_KILL_PRIV -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Vivek Goyal - -Caller can set FUSE_WRITE_KILL_PRIV in write_flags. Parse it and pass it -to the filesystem. - -Signed-off-by: Vivek Goyal -Reviewed-by: Misono Tomohiro -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit f779bc5265e7e7abb13a03d4bfbc74151afc15c2) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_common.h | 6 +++++- - tools/virtiofsd/fuse_lowlevel.c | 4 +++- - 2 files changed, 8 insertions(+), 2 deletions(-) - -diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h -index f8f6433..686c42c 100644 ---- a/tools/virtiofsd/fuse_common.h -+++ b/tools/virtiofsd/fuse_common.h -@@ -93,8 +93,12 @@ struct fuse_file_info { - */ - unsigned int cache_readdir:1; - -+ /* Indicates that suid/sgid bits should be removed upon write */ -+ unsigned int kill_priv:1; -+ -+ - /** Padding. Reserved for future use*/ -- unsigned int padding:25; -+ unsigned int padding:24; - unsigned int padding2:32; - - /* -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 02e1d83..2d6dc5a 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -1142,6 +1142,7 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; -+ fi.kill_priv = !!(arg->write_flags & FUSE_WRITE_KILL_PRIV); - - fi.lock_owner = arg->lock_owner; - fi.flags = arg->flags; -@@ -1177,7 +1178,8 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, - fi.lock_owner = arg->lock_owner; - fi.flags = arg->flags; - fi.fh = arg->fh; -- fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; -+ fi.writepage = !!(arg->write_flags & FUSE_WRITE_CACHE); -+ fi.kill_priv = !!(arg->write_flags & FUSE_WRITE_KILL_PRIV); - - if (ibufv->count == 1) { - assert(!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)); --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch b/SOURCES/kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch deleted file mode 100644 index 7d095c9..0000000 --- a/SOURCES/kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch +++ /dev/null @@ -1,140 +0,0 @@ -From d5986c804f05070a07dfe702f7c66357daaa1ab6 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:20 +0100 -Subject: [PATCH 049/116] virtiofsd: Pass write iov's all the way through -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-46-dgilbert@redhat.com> -Patchwork-id: 93497 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 045/112] virtiofsd: Pass write iov's all the way through -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Pass the write iov pointing to guest RAM all the way through rather -than copying the data. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Xiao Yang -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit e17f7a580e2c599330ad3a6946be615ca2fe97d9) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 79 +++++++++++++++++++++++++++++++++++++++---- - 1 file changed, 73 insertions(+), 6 deletions(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index fd588a4..872968f 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -454,6 +454,10 @@ static void *fv_queue_thread(void *opaque) - __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); - - while (1) { -+ bool allocated_bufv = false; -+ struct fuse_bufvec bufv; -+ struct fuse_bufvec *pbufv; -+ - /* - * An element contains one request and the space to send our - * response They're spread over multiple descriptors in a -@@ -495,14 +499,76 @@ static void *fv_queue_thread(void *opaque) - __func__, elem->index); - assert(0); /* TODO */ - } -- copy_from_iov(&fbuf, out_num, out_sg); -- fbuf.size = out_len; -+ /* Copy just the first element and look at it */ -+ copy_from_iov(&fbuf, 1, out_sg); -+ -+ if (out_num > 2 && -+ out_sg[0].iov_len == sizeof(struct fuse_in_header) && -+ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && -+ out_sg[1].iov_len == sizeof(struct fuse_write_in)) { -+ /* -+ * For a write we don't actually need to copy the -+ * data, we can just do it straight out of guest memory -+ * but we must still copy the headers in case the guest -+ * was nasty and changed them while we were using them. -+ */ -+ fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__); -+ -+ /* copy the fuse_write_in header after the fuse_in_header */ -+ fbuf.mem += out_sg->iov_len; -+ copy_from_iov(&fbuf, 1, out_sg + 1); -+ fbuf.mem -= out_sg->iov_len; -+ fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; -+ -+ /* Allocate the bufv, with space for the rest of the iov */ -+ allocated_bufv = true; -+ pbufv = malloc(sizeof(struct fuse_bufvec) + -+ sizeof(struct fuse_buf) * (out_num - 2)); -+ if (!pbufv) { -+ vu_queue_unpop(dev, q, elem, 0); -+ free(elem); -+ fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", -+ __func__); -+ goto out; -+ } -+ -+ pbufv->count = 1; -+ pbufv->buf[0] = fbuf; -+ -+ size_t iovindex, pbufvindex; -+ iovindex = 2; /* 2 headers, separate iovs */ -+ pbufvindex = 1; /* 2 headers, 1 fusebuf */ -+ -+ for (; iovindex < out_num; iovindex++, pbufvindex++) { -+ pbufv->count++; -+ pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ -+ pbufv->buf[pbufvindex].flags = 0; -+ pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; -+ pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; -+ } -+ } else { -+ /* Normal (non fast write) path */ -+ -+ /* Copy the rest of the buffer */ -+ fbuf.mem += out_sg->iov_len; -+ copy_from_iov(&fbuf, out_num - 1, out_sg + 1); -+ fbuf.mem -= out_sg->iov_len; -+ fbuf.size = out_len; - -- /* TODO! Endianness of header */ -+ /* TODO! Endianness of header */ - -- /* TODO: Add checks for fuse_session_exited */ -- struct fuse_bufvec bufv = { .buf[0] = fbuf, .count = 1 }; -- fuse_session_process_buf_int(se, &bufv, &ch); -+ /* TODO: Add checks for fuse_session_exited */ -+ bufv.buf[0] = fbuf; -+ bufv.count = 1; -+ pbufv = &bufv; -+ } -+ pbufv->idx = 0; -+ pbufv->off = 0; -+ fuse_session_process_buf_int(se, pbufv, &ch); -+ -+ if (allocated_bufv) { -+ free(pbufv); -+ } - - if (!qi->reply_sent) { - fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", -@@ -516,6 +582,7 @@ static void *fv_queue_thread(void *opaque) - elem = NULL; - } - } -+out: - pthread_mutex_destroy(&ch.lock); - free(fbuf.mem); - --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch b/SOURCES/kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch deleted file mode 100644 index 834ced1..0000000 --- a/SOURCES/kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch +++ /dev/null @@ -1,168 +0,0 @@ -From 9e4320eec5204da851ac95fb7a7e6520c9ccee7d Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:19 +0100 -Subject: [PATCH 048/116] virtiofsd: Plumb fuse_bufvec through to do_write_buf -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-45-dgilbert@redhat.com> -Patchwork-id: 93499 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 044/112] virtiofsd: Plumb fuse_bufvec through to do_write_buf -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Let fuse_session_process_buf_int take a fuse_bufvec * instead of a -fuse_buf; and then through to do_write_buf - where in the best -case it can pass that straight through to op.write_buf without copying -(other than skipping a header). - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 469f9d2fc405b0508e6cf1b4b5bbcadfc82064e5) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 2 +- - tools/virtiofsd/fuse_lowlevel.c | 61 +++++++++++++++++++++++++++-------------- - tools/virtiofsd/fuse_virtio.c | 3 +- - 3 files changed, 44 insertions(+), 22 deletions(-) - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index 45995f3..a20854f 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -100,7 +100,7 @@ int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, - void fuse_free_req(fuse_req_t req); - - void fuse_session_process_buf_int(struct fuse_session *se, -- const struct fuse_buf *buf, -+ struct fuse_bufvec *bufv, - struct fuse_chan *ch); - - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 95f4db8..7e10995 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -1004,11 +1004,12 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - - static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, -- const struct fuse_buf *ibuf) -+ struct fuse_bufvec *ibufv) - { - struct fuse_session *se = req->se; -- struct fuse_bufvec bufv = { -- .buf[0] = *ibuf, -+ struct fuse_bufvec *pbufv = ibufv; -+ struct fuse_bufvec tmpbufv = { -+ .buf[0] = ibufv->buf[0], - .count = 1, - }; - struct fuse_write_in *arg = (struct fuse_write_in *)inarg; -@@ -1018,22 +1019,31 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, - fi.fh = arg->fh; - fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; - -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { -- bufv.buf[0].mem = PARAM(arg); -- } -- -- bufv.buf[0].size -= -- sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); -- if (bufv.buf[0].size < arg->size) { -- fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); -- fuse_reply_err(req, EIO); -- return; -+ if (ibufv->count == 1) { -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ if (!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)) { -+ tmpbufv.buf[0].mem = PARAM(arg); -+ } -+ tmpbufv.buf[0].size -= -+ sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); -+ if (tmpbufv.buf[0].size < arg->size) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: do_write_buf: buffer size too small\n"); -+ fuse_reply_err(req, EIO); -+ return; -+ } -+ tmpbufv.buf[0].size = arg->size; -+ pbufv = &tmpbufv; -+ } else { -+ /* -+ * Input bufv contains the headers in the first element -+ * and the data in the rest, we need to skip that first element -+ */ -+ ibufv->buf[0].size = 0; - } -- bufv.buf[0].size = arg->size; - -- se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); -+ se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi); - } - - static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -@@ -2024,13 +2034,24 @@ static const char *opname(enum fuse_opcode opcode) - void fuse_session_process_buf(struct fuse_session *se, - const struct fuse_buf *buf) - { -- fuse_session_process_buf_int(se, buf, NULL); -+ struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 }; -+ fuse_session_process_buf_int(se, &bufv, NULL); - } - -+/* -+ * Restriction: -+ * bufv is normally a single entry buffer, except for a write -+ * where (if it's in memory) then the bufv may be multiple entries, -+ * where the first entry contains all headers and subsequent entries -+ * contain data -+ * bufv shall not use any offsets etc to make the data anything -+ * other than contiguous starting from 0. -+ */ - void fuse_session_process_buf_int(struct fuse_session *se, -- const struct fuse_buf *buf, -+ struct fuse_bufvec *bufv, - struct fuse_chan *ch) - { -+ const struct fuse_buf *buf = bufv->buf; - struct fuse_in_header *in; - const void *inarg; - struct fuse_req *req; -@@ -2108,7 +2129,7 @@ void fuse_session_process_buf_int(struct fuse_session *se, - - inarg = (void *)&in[1]; - if (in->opcode == FUSE_WRITE && se->op.write_buf) { -- do_write_buf(req, in->nodeid, inarg, buf); -+ do_write_buf(req, in->nodeid, inarg, bufv); - } else { - fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); - } -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 635f877..fd588a4 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -501,7 +501,8 @@ static void *fv_queue_thread(void *opaque) - /* TODO! Endianness of header */ - - /* TODO: Add checks for fuse_session_exited */ -- fuse_session_process_buf_int(se, &fbuf, &ch); -+ struct fuse_bufvec bufv = { .buf[0] = fbuf, .count = 1 }; -+ fuse_session_process_buf_int(se, &bufv, &ch); - - if (!qi->reply_sent) { - fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Poll-kick_fd-for-queue.patch b/SOURCES/kvm-virtiofsd-Poll-kick_fd-for-queue.patch deleted file mode 100644 index d7c6c0a..0000000 --- a/SOURCES/kvm-virtiofsd-Poll-kick_fd-for-queue.patch +++ /dev/null @@ -1,97 +0,0 @@ -From 083b944fac29bc3115a19eb38e176f6b23f04938 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:01 +0100 -Subject: [PATCH 030/116] virtiofsd: Poll kick_fd for queue -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-27-dgilbert@redhat.com> -Patchwork-id: 93483 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 026/112] virtiofsd: Poll kick_fd for queue -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -In the queue thread poll the kick_fd we're passed. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 5dcd1f56141378226d33dc3df68ec57913e0aa04) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 40 +++++++++++++++++++++++++++++++++++++++- - 1 file changed, 39 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 2a94bb3..05e7258 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -24,6 +24,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -100,13 +101,50 @@ static void fv_panic(VuDev *dev, const char *err) - exit(EXIT_FAILURE); - } - -+/* Thread function for individual queues, created when a queue is 'started' */ - static void *fv_queue_thread(void *opaque) - { - struct fv_QueueInfo *qi = opaque; - fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, - qi->qidx, qi->kick_fd); - while (1) { -- /* TODO */ -+ struct pollfd pf[1]; -+ pf[0].fd = qi->kick_fd; -+ pf[0].events = POLLIN; -+ pf[0].revents = 0; -+ -+ fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for Queue %d event\n", __func__, -+ qi->qidx); -+ int poll_res = ppoll(pf, 1, NULL, NULL); -+ -+ if (poll_res == -1) { -+ if (errno == EINTR) { -+ fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n", -+ __func__); -+ continue; -+ } -+ fuse_log(FUSE_LOG_ERR, "fv_queue_thread ppoll: %m\n"); -+ break; -+ } -+ assert(poll_res == 1); -+ if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) { -+ fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x Queue %d\n", -+ __func__, pf[0].revents, qi->qidx); -+ break; -+ } -+ assert(pf[0].revents & POLLIN); -+ fuse_log(FUSE_LOG_DEBUG, "%s: Got queue event on Queue %d\n", __func__, -+ qi->qidx); -+ -+ eventfd_t evalue; -+ if (eventfd_read(qi->kick_fd, &evalue)) { -+ fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n"); -+ break; -+ } -+ if (qi->virtio_dev->se->debug) { -+ fprintf(stderr, "%s: Queue %d gave evalue: %zx\n", __func__, -+ qi->qidx, (size_t)evalue); -+ } - } - - return NULL; --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch b/SOURCES/kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch deleted file mode 100644 index d4e1ea1..0000000 --- a/SOURCES/kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch +++ /dev/null @@ -1,144 +0,0 @@ -From ab336e3aea97d76c1b2ac725d19b4518f47dd8f0 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:59 +0100 -Subject: [PATCH 088/116] virtiofsd: Prevent multiply running with same - vhost_user_socket -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-85-dgilbert@redhat.com> -Patchwork-id: 93541 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 084/112] virtiofsd: Prevent multiply running with same vhost_user_socket -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Masayoshi Mizuma - -virtiofsd can run multiply even if the vhost_user_socket is same path. - - ]# ./virtiofsd -o vhost_user_socket=/tmp/vhostqemu -o source=/tmp/share & - [1] 244965 - virtio_session_mount: Waiting for vhost-user socket connection... - ]# ./virtiofsd -o vhost_user_socket=/tmp/vhostqemu -o source=/tmp/share & - [2] 244966 - virtio_session_mount: Waiting for vhost-user socket connection... - ]# - -The user will get confused about the situation and maybe the cause of the -unexpected problem. So it's better to prevent the multiple running. - -Create a regular file under localstatedir directory to exclude the -vhost_user_socket. To create and lock the file, use qemu_write_pidfile() -because the API has some sanity checks and file lock. - -Signed-off-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert - Applied fixes from Stefan's review and moved osdep include -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 96814800d2b49d18737c36e021c387697ec40c62) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 1 + - tools/virtiofsd/fuse_virtio.c | 49 ++++++++++++++++++++++++++++++++++++++++- - 2 files changed, 49 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 440508a..aac282f 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -18,6 +18,7 @@ - - #include - #include -+#include - #include - #include - #include -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index e7bd772..b7948de 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -13,11 +13,12 @@ - - #include "qemu/osdep.h" - #include "qemu/iov.h" --#include "fuse_virtio.h" -+#include "qapi/error.h" - #include "fuse_i.h" - #include "standard-headers/linux/fuse.h" - #include "fuse_misc.h" - #include "fuse_opt.h" -+#include "fuse_virtio.h" - - #include - #include -@@ -743,6 +744,42 @@ int virtio_loop(struct fuse_session *se) - return 0; - } - -+static void strreplace(char *s, char old, char new) -+{ -+ for (; *s; ++s) { -+ if (*s == old) { -+ *s = new; -+ } -+ } -+} -+ -+static bool fv_socket_lock(struct fuse_session *se) -+{ -+ g_autofree gchar *sk_name = NULL; -+ g_autofree gchar *pidfile = NULL; -+ g_autofree gchar *dir = NULL; -+ Error *local_err = NULL; -+ -+ dir = qemu_get_local_state_pathname("run/virtiofsd"); -+ -+ if (g_mkdir_with_parents(dir, S_IRWXU) < 0) { -+ fuse_log(FUSE_LOG_ERR, "%s: Failed to create directory %s: %s", -+ __func__, dir, strerror(errno)); -+ return false; -+ } -+ -+ sk_name = g_strdup(se->vu_socket_path); -+ strreplace(sk_name, '/', '.'); -+ pidfile = g_strdup_printf("%s/%s.pid", dir, sk_name); -+ -+ if (!qemu_write_pidfile(pidfile, &local_err)) { -+ error_report_err(local_err); -+ return false; -+ } -+ -+ return true; -+} -+ - static int fv_create_listen_socket(struct fuse_session *se) - { - struct sockaddr_un un; -@@ -758,6 +795,16 @@ static int fv_create_listen_socket(struct fuse_session *se) - return -1; - } - -+ if (!strlen(se->vu_socket_path)) { -+ fuse_log(FUSE_LOG_ERR, "Socket path is empty\n"); -+ return -1; -+ } -+ -+ /* Check the vu_socket_path is already used */ -+ if (!fv_socket_lock(se)) { -+ return -1; -+ } -+ - /* - * Create the Unix socket to communicate with qemu - * based on QEMU's vhost-user-bridge --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch b/SOURCES/kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch deleted file mode 100644 index f30f23a..0000000 --- a/SOURCES/kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch +++ /dev/null @@ -1,945 +0,0 @@ -From e7c1ad608117b21f80c762f5505a66b21c56e9d3 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:40 +0100 -Subject: [PATCH 009/116] virtiofsd: Pull in kernel's fuse.h -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-6-dgilbert@redhat.com> -Patchwork-id: 93460 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 005/112] virtiofsd: Pull in kernel's fuse.h -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Update scripts/update-linux-headers.sh to add fuse.h and -use it to pull in fuse.h from the kernel; from v5.5-rc1 - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit a62a9e192bc5f0aa0bc076b51db5a069add87c78) -Signed-off-by: Miroslav Rezanina ---- - include/standard-headers/linux/fuse.h | 891 ++++++++++++++++++++++++++++++++++ - scripts/update-linux-headers.sh | 1 + - 2 files changed, 892 insertions(+) - create mode 100644 include/standard-headers/linux/fuse.h - -diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h -new file mode 100644 -index 0000000..f4df0a4 ---- /dev/null -+++ b/include/standard-headers/linux/fuse.h -@@ -0,0 +1,891 @@ -+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ -+/* -+ This file defines the kernel interface of FUSE -+ Copyright (C) 2001-2008 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU GPL. -+ See the file COPYING. -+ -+ This -- and only this -- header file may also be distributed under -+ the terms of the BSD Licence as follows: -+ -+ Copyright (C) 2001-2007 Miklos Szeredi. All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ 1. Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ 2. Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ -+ THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND -+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE -+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ SUCH DAMAGE. -+*/ -+ -+/* -+ * This file defines the kernel interface of FUSE -+ * -+ * Protocol changelog: -+ * -+ * 7.1: -+ * - add the following messages: -+ * FUSE_SETATTR, FUSE_SYMLINK, FUSE_MKNOD, FUSE_MKDIR, FUSE_UNLINK, -+ * FUSE_RMDIR, FUSE_RENAME, FUSE_LINK, FUSE_OPEN, FUSE_READ, FUSE_WRITE, -+ * FUSE_RELEASE, FUSE_FSYNC, FUSE_FLUSH, FUSE_SETXATTR, FUSE_GETXATTR, -+ * FUSE_LISTXATTR, FUSE_REMOVEXATTR, FUSE_OPENDIR, FUSE_READDIR, -+ * FUSE_RELEASEDIR -+ * - add padding to messages to accommodate 32-bit servers on 64-bit kernels -+ * -+ * 7.2: -+ * - add FOPEN_DIRECT_IO and FOPEN_KEEP_CACHE flags -+ * - add FUSE_FSYNCDIR message -+ * -+ * 7.3: -+ * - add FUSE_ACCESS message -+ * - add FUSE_CREATE message -+ * - add filehandle to fuse_setattr_in -+ * -+ * 7.4: -+ * - add frsize to fuse_kstatfs -+ * - clean up request size limit checking -+ * -+ * 7.5: -+ * - add flags and max_write to fuse_init_out -+ * -+ * 7.6: -+ * - add max_readahead to fuse_init_in and fuse_init_out -+ * -+ * 7.7: -+ * - add FUSE_INTERRUPT message -+ * - add POSIX file lock support -+ * -+ * 7.8: -+ * - add lock_owner and flags fields to fuse_release_in -+ * - add FUSE_BMAP message -+ * - add FUSE_DESTROY message -+ * -+ * 7.9: -+ * - new fuse_getattr_in input argument of GETATTR -+ * - add lk_flags in fuse_lk_in -+ * - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in -+ * - add blksize field to fuse_attr -+ * - add file flags field to fuse_read_in and fuse_write_in -+ * - Add ATIME_NOW and MTIME_NOW flags to fuse_setattr_in -+ * -+ * 7.10 -+ * - add nonseekable open flag -+ * -+ * 7.11 -+ * - add IOCTL message -+ * - add unsolicited notification support -+ * - add POLL message and NOTIFY_POLL notification -+ * -+ * 7.12 -+ * - add umask flag to input argument of create, mknod and mkdir -+ * - add notification messages for invalidation of inodes and -+ * directory entries -+ * -+ * 7.13 -+ * - make max number of background requests and congestion threshold -+ * tunables -+ * -+ * 7.14 -+ * - add splice support to fuse device -+ * -+ * 7.15 -+ * - add store notify -+ * - add retrieve notify -+ * -+ * 7.16 -+ * - add BATCH_FORGET request -+ * - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct -+ * fuse_ioctl_iovec' instead of ambiguous 'struct iovec' -+ * - add FUSE_IOCTL_32BIT flag -+ * -+ * 7.17 -+ * - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK -+ * -+ * 7.18 -+ * - add FUSE_IOCTL_DIR flag -+ * - add FUSE_NOTIFY_DELETE -+ * -+ * 7.19 -+ * - add FUSE_FALLOCATE -+ * -+ * 7.20 -+ * - add FUSE_AUTO_INVAL_DATA -+ * -+ * 7.21 -+ * - add FUSE_READDIRPLUS -+ * - send the requested events in POLL request -+ * -+ * 7.22 -+ * - add FUSE_ASYNC_DIO -+ * -+ * 7.23 -+ * - add FUSE_WRITEBACK_CACHE -+ * - add time_gran to fuse_init_out -+ * - add reserved space to fuse_init_out -+ * - add FATTR_CTIME -+ * - add ctime and ctimensec to fuse_setattr_in -+ * - add FUSE_RENAME2 request -+ * - add FUSE_NO_OPEN_SUPPORT flag -+ * -+ * 7.24 -+ * - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support -+ * -+ * 7.25 -+ * - add FUSE_PARALLEL_DIROPS -+ * -+ * 7.26 -+ * - add FUSE_HANDLE_KILLPRIV -+ * - add FUSE_POSIX_ACL -+ * -+ * 7.27 -+ * - add FUSE_ABORT_ERROR -+ * -+ * 7.28 -+ * - add FUSE_COPY_FILE_RANGE -+ * - add FOPEN_CACHE_DIR -+ * - add FUSE_MAX_PAGES, add max_pages to init_out -+ * - add FUSE_CACHE_SYMLINKS -+ * -+ * 7.29 -+ * - add FUSE_NO_OPENDIR_SUPPORT flag -+ * -+ * 7.30 -+ * - add FUSE_EXPLICIT_INVAL_DATA -+ * - add FUSE_IOCTL_COMPAT_X32 -+ * -+ * 7.31 -+ * - add FUSE_WRITE_KILL_PRIV flag -+ * - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING -+ * - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag -+ */ -+ -+#ifndef _LINUX_FUSE_H -+#define _LINUX_FUSE_H -+ -+#include -+ -+/* -+ * Version negotiation: -+ * -+ * Both the kernel and userspace send the version they support in the -+ * INIT request and reply respectively. -+ * -+ * If the major versions match then both shall use the smallest -+ * of the two minor versions for communication. -+ * -+ * If the kernel supports a larger major version, then userspace shall -+ * reply with the major version it supports, ignore the rest of the -+ * INIT message and expect a new INIT message from the kernel with a -+ * matching major version. -+ * -+ * If the library supports a larger major version, then it shall fall -+ * back to the major protocol version sent by the kernel for -+ * communication and reply with that major version (and an arbitrary -+ * supported minor version). -+ */ -+ -+/** Version number of this interface */ -+#define FUSE_KERNEL_VERSION 7 -+ -+/** Minor version number of this interface */ -+#define FUSE_KERNEL_MINOR_VERSION 31 -+ -+/** The node ID of the root inode */ -+#define FUSE_ROOT_ID 1 -+ -+/* Make sure all structures are padded to 64bit boundary, so 32bit -+ userspace works under 64bit kernels */ -+ -+struct fuse_attr { -+ uint64_t ino; -+ uint64_t size; -+ uint64_t blocks; -+ uint64_t atime; -+ uint64_t mtime; -+ uint64_t ctime; -+ uint32_t atimensec; -+ uint32_t mtimensec; -+ uint32_t ctimensec; -+ uint32_t mode; -+ uint32_t nlink; -+ uint32_t uid; -+ uint32_t gid; -+ uint32_t rdev; -+ uint32_t blksize; -+ uint32_t padding; -+}; -+ -+struct fuse_kstatfs { -+ uint64_t blocks; -+ uint64_t bfree; -+ uint64_t bavail; -+ uint64_t files; -+ uint64_t ffree; -+ uint32_t bsize; -+ uint32_t namelen; -+ uint32_t frsize; -+ uint32_t padding; -+ uint32_t spare[6]; -+}; -+ -+struct fuse_file_lock { -+ uint64_t start; -+ uint64_t end; -+ uint32_t type; -+ uint32_t pid; /* tgid */ -+}; -+ -+/** -+ * Bitmasks for fuse_setattr_in.valid -+ */ -+#define FATTR_MODE (1 << 0) -+#define FATTR_UID (1 << 1) -+#define FATTR_GID (1 << 2) -+#define FATTR_SIZE (1 << 3) -+#define FATTR_ATIME (1 << 4) -+#define FATTR_MTIME (1 << 5) -+#define FATTR_FH (1 << 6) -+#define FATTR_ATIME_NOW (1 << 7) -+#define FATTR_MTIME_NOW (1 << 8) -+#define FATTR_LOCKOWNER (1 << 9) -+#define FATTR_CTIME (1 << 10) -+ -+/** -+ * Flags returned by the OPEN request -+ * -+ * FOPEN_DIRECT_IO: bypass page cache for this open file -+ * FOPEN_KEEP_CACHE: don't invalidate the data cache on open -+ * FOPEN_NONSEEKABLE: the file is not seekable -+ * FOPEN_CACHE_DIR: allow caching this directory -+ * FOPEN_STREAM: the file is stream-like (no file position at all) -+ */ -+#define FOPEN_DIRECT_IO (1 << 0) -+#define FOPEN_KEEP_CACHE (1 << 1) -+#define FOPEN_NONSEEKABLE (1 << 2) -+#define FOPEN_CACHE_DIR (1 << 3) -+#define FOPEN_STREAM (1 << 4) -+ -+/** -+ * INIT request/reply flags -+ * -+ * FUSE_ASYNC_READ: asynchronous read requests -+ * FUSE_POSIX_LOCKS: remote locking for POSIX file locks -+ * FUSE_FILE_OPS: kernel sends file handle for fstat, etc... (not yet supported) -+ * FUSE_ATOMIC_O_TRUNC: handles the O_TRUNC open flag in the filesystem -+ * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".." -+ * FUSE_BIG_WRITES: filesystem can handle write size larger than 4kB -+ * FUSE_DONT_MASK: don't apply umask to file mode on create operations -+ * FUSE_SPLICE_WRITE: kernel supports splice write on the device -+ * FUSE_SPLICE_MOVE: kernel supports splice move on the device -+ * FUSE_SPLICE_READ: kernel supports splice read on the device -+ * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks -+ * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories -+ * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages -+ * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one) -+ * FUSE_READDIRPLUS_AUTO: adaptive readdirplus -+ * FUSE_ASYNC_DIO: asynchronous direct I/O submission -+ * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes -+ * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens -+ * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir -+ * FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc -+ * FUSE_POSIX_ACL: filesystem supports posix acls -+ * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED -+ * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages -+ * FUSE_CACHE_SYMLINKS: cache READLINK responses -+ * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir -+ * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request -+ * FUSE_MAP_ALIGNMENT: map_alignment field is valid -+ */ -+#define FUSE_ASYNC_READ (1 << 0) -+#define FUSE_POSIX_LOCKS (1 << 1) -+#define FUSE_FILE_OPS (1 << 2) -+#define FUSE_ATOMIC_O_TRUNC (1 << 3) -+#define FUSE_EXPORT_SUPPORT (1 << 4) -+#define FUSE_BIG_WRITES (1 << 5) -+#define FUSE_DONT_MASK (1 << 6) -+#define FUSE_SPLICE_WRITE (1 << 7) -+#define FUSE_SPLICE_MOVE (1 << 8) -+#define FUSE_SPLICE_READ (1 << 9) -+#define FUSE_FLOCK_LOCKS (1 << 10) -+#define FUSE_HAS_IOCTL_DIR (1 << 11) -+#define FUSE_AUTO_INVAL_DATA (1 << 12) -+#define FUSE_DO_READDIRPLUS (1 << 13) -+#define FUSE_READDIRPLUS_AUTO (1 << 14) -+#define FUSE_ASYNC_DIO (1 << 15) -+#define FUSE_WRITEBACK_CACHE (1 << 16) -+#define FUSE_NO_OPEN_SUPPORT (1 << 17) -+#define FUSE_PARALLEL_DIROPS (1 << 18) -+#define FUSE_HANDLE_KILLPRIV (1 << 19) -+#define FUSE_POSIX_ACL (1 << 20) -+#define FUSE_ABORT_ERROR (1 << 21) -+#define FUSE_MAX_PAGES (1 << 22) -+#define FUSE_CACHE_SYMLINKS (1 << 23) -+#define FUSE_NO_OPENDIR_SUPPORT (1 << 24) -+#define FUSE_EXPLICIT_INVAL_DATA (1 << 25) -+#define FUSE_MAP_ALIGNMENT (1 << 26) -+ -+/** -+ * CUSE INIT request/reply flags -+ * -+ * CUSE_UNRESTRICTED_IOCTL: use unrestricted ioctl -+ */ -+#define CUSE_UNRESTRICTED_IOCTL (1 << 0) -+ -+/** -+ * Release flags -+ */ -+#define FUSE_RELEASE_FLUSH (1 << 0) -+#define FUSE_RELEASE_FLOCK_UNLOCK (1 << 1) -+ -+/** -+ * Getattr flags -+ */ -+#define FUSE_GETATTR_FH (1 << 0) -+ -+/** -+ * Lock flags -+ */ -+#define FUSE_LK_FLOCK (1 << 0) -+ -+/** -+ * WRITE flags -+ * -+ * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed -+ * FUSE_WRITE_LOCKOWNER: lock_owner field is valid -+ * FUSE_WRITE_KILL_PRIV: kill suid and sgid bits -+ */ -+#define FUSE_WRITE_CACHE (1 << 0) -+#define FUSE_WRITE_LOCKOWNER (1 << 1) -+#define FUSE_WRITE_KILL_PRIV (1 << 2) -+ -+/** -+ * Read flags -+ */ -+#define FUSE_READ_LOCKOWNER (1 << 1) -+ -+/** -+ * Ioctl flags -+ * -+ * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine -+ * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed -+ * FUSE_IOCTL_RETRY: retry with new iovecs -+ * FUSE_IOCTL_32BIT: 32bit ioctl -+ * FUSE_IOCTL_DIR: is a directory -+ * FUSE_IOCTL_COMPAT_X32: x32 compat ioctl on 64bit machine (64bit time_t) -+ * -+ * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs -+ */ -+#define FUSE_IOCTL_COMPAT (1 << 0) -+#define FUSE_IOCTL_UNRESTRICTED (1 << 1) -+#define FUSE_IOCTL_RETRY (1 << 2) -+#define FUSE_IOCTL_32BIT (1 << 3) -+#define FUSE_IOCTL_DIR (1 << 4) -+#define FUSE_IOCTL_COMPAT_X32 (1 << 5) -+ -+#define FUSE_IOCTL_MAX_IOV 256 -+ -+/** -+ * Poll flags -+ * -+ * FUSE_POLL_SCHEDULE_NOTIFY: request poll notify -+ */ -+#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0) -+ -+/** -+ * Fsync flags -+ * -+ * FUSE_FSYNC_FDATASYNC: Sync data only, not metadata -+ */ -+#define FUSE_FSYNC_FDATASYNC (1 << 0) -+ -+enum fuse_opcode { -+ FUSE_LOOKUP = 1, -+ FUSE_FORGET = 2, /* no reply */ -+ FUSE_GETATTR = 3, -+ FUSE_SETATTR = 4, -+ FUSE_READLINK = 5, -+ FUSE_SYMLINK = 6, -+ FUSE_MKNOD = 8, -+ FUSE_MKDIR = 9, -+ FUSE_UNLINK = 10, -+ FUSE_RMDIR = 11, -+ FUSE_RENAME = 12, -+ FUSE_LINK = 13, -+ FUSE_OPEN = 14, -+ FUSE_READ = 15, -+ FUSE_WRITE = 16, -+ FUSE_STATFS = 17, -+ FUSE_RELEASE = 18, -+ FUSE_FSYNC = 20, -+ FUSE_SETXATTR = 21, -+ FUSE_GETXATTR = 22, -+ FUSE_LISTXATTR = 23, -+ FUSE_REMOVEXATTR = 24, -+ FUSE_FLUSH = 25, -+ FUSE_INIT = 26, -+ FUSE_OPENDIR = 27, -+ FUSE_READDIR = 28, -+ FUSE_RELEASEDIR = 29, -+ FUSE_FSYNCDIR = 30, -+ FUSE_GETLK = 31, -+ FUSE_SETLK = 32, -+ FUSE_SETLKW = 33, -+ FUSE_ACCESS = 34, -+ FUSE_CREATE = 35, -+ FUSE_INTERRUPT = 36, -+ FUSE_BMAP = 37, -+ FUSE_DESTROY = 38, -+ FUSE_IOCTL = 39, -+ FUSE_POLL = 40, -+ FUSE_NOTIFY_REPLY = 41, -+ FUSE_BATCH_FORGET = 42, -+ FUSE_FALLOCATE = 43, -+ FUSE_READDIRPLUS = 44, -+ FUSE_RENAME2 = 45, -+ FUSE_LSEEK = 46, -+ FUSE_COPY_FILE_RANGE = 47, -+ FUSE_SETUPMAPPING = 48, -+ FUSE_REMOVEMAPPING = 49, -+ -+ /* CUSE specific operations */ -+ CUSE_INIT = 4096, -+ -+ /* Reserved opcodes: helpful to detect structure endian-ness */ -+ CUSE_INIT_BSWAP_RESERVED = 1048576, /* CUSE_INIT << 8 */ -+ FUSE_INIT_BSWAP_RESERVED = 436207616, /* FUSE_INIT << 24 */ -+}; -+ -+enum fuse_notify_code { -+ FUSE_NOTIFY_POLL = 1, -+ FUSE_NOTIFY_INVAL_INODE = 2, -+ FUSE_NOTIFY_INVAL_ENTRY = 3, -+ FUSE_NOTIFY_STORE = 4, -+ FUSE_NOTIFY_RETRIEVE = 5, -+ FUSE_NOTIFY_DELETE = 6, -+ FUSE_NOTIFY_CODE_MAX, -+}; -+ -+/* The read buffer is required to be at least 8k, but may be much larger */ -+#define FUSE_MIN_READ_BUFFER 8192 -+ -+#define FUSE_COMPAT_ENTRY_OUT_SIZE 120 -+ -+struct fuse_entry_out { -+ uint64_t nodeid; /* Inode ID */ -+ uint64_t generation; /* Inode generation: nodeid:gen must -+ be unique for the fs's lifetime */ -+ uint64_t entry_valid; /* Cache timeout for the name */ -+ uint64_t attr_valid; /* Cache timeout for the attributes */ -+ uint32_t entry_valid_nsec; -+ uint32_t attr_valid_nsec; -+ struct fuse_attr attr; -+}; -+ -+struct fuse_forget_in { -+ uint64_t nlookup; -+}; -+ -+struct fuse_forget_one { -+ uint64_t nodeid; -+ uint64_t nlookup; -+}; -+ -+struct fuse_batch_forget_in { -+ uint32_t count; -+ uint32_t dummy; -+}; -+ -+struct fuse_getattr_in { -+ uint32_t getattr_flags; -+ uint32_t dummy; -+ uint64_t fh; -+}; -+ -+#define FUSE_COMPAT_ATTR_OUT_SIZE 96 -+ -+struct fuse_attr_out { -+ uint64_t attr_valid; /* Cache timeout for the attributes */ -+ uint32_t attr_valid_nsec; -+ uint32_t dummy; -+ struct fuse_attr attr; -+}; -+ -+#define FUSE_COMPAT_MKNOD_IN_SIZE 8 -+ -+struct fuse_mknod_in { -+ uint32_t mode; -+ uint32_t rdev; -+ uint32_t umask; -+ uint32_t padding; -+}; -+ -+struct fuse_mkdir_in { -+ uint32_t mode; -+ uint32_t umask; -+}; -+ -+struct fuse_rename_in { -+ uint64_t newdir; -+}; -+ -+struct fuse_rename2_in { -+ uint64_t newdir; -+ uint32_t flags; -+ uint32_t padding; -+}; -+ -+struct fuse_link_in { -+ uint64_t oldnodeid; -+}; -+ -+struct fuse_setattr_in { -+ uint32_t valid; -+ uint32_t padding; -+ uint64_t fh; -+ uint64_t size; -+ uint64_t lock_owner; -+ uint64_t atime; -+ uint64_t mtime; -+ uint64_t ctime; -+ uint32_t atimensec; -+ uint32_t mtimensec; -+ uint32_t ctimensec; -+ uint32_t mode; -+ uint32_t unused4; -+ uint32_t uid; -+ uint32_t gid; -+ uint32_t unused5; -+}; -+ -+struct fuse_open_in { -+ uint32_t flags; -+ uint32_t unused; -+}; -+ -+struct fuse_create_in { -+ uint32_t flags; -+ uint32_t mode; -+ uint32_t umask; -+ uint32_t padding; -+}; -+ -+struct fuse_open_out { -+ uint64_t fh; -+ uint32_t open_flags; -+ uint32_t padding; -+}; -+ -+struct fuse_release_in { -+ uint64_t fh; -+ uint32_t flags; -+ uint32_t release_flags; -+ uint64_t lock_owner; -+}; -+ -+struct fuse_flush_in { -+ uint64_t fh; -+ uint32_t unused; -+ uint32_t padding; -+ uint64_t lock_owner; -+}; -+ -+struct fuse_read_in { -+ uint64_t fh; -+ uint64_t offset; -+ uint32_t size; -+ uint32_t read_flags; -+ uint64_t lock_owner; -+ uint32_t flags; -+ uint32_t padding; -+}; -+ -+#define FUSE_COMPAT_WRITE_IN_SIZE 24 -+ -+struct fuse_write_in { -+ uint64_t fh; -+ uint64_t offset; -+ uint32_t size; -+ uint32_t write_flags; -+ uint64_t lock_owner; -+ uint32_t flags; -+ uint32_t padding; -+}; -+ -+struct fuse_write_out { -+ uint32_t size; -+ uint32_t padding; -+}; -+ -+#define FUSE_COMPAT_STATFS_SIZE 48 -+ -+struct fuse_statfs_out { -+ struct fuse_kstatfs st; -+}; -+ -+struct fuse_fsync_in { -+ uint64_t fh; -+ uint32_t fsync_flags; -+ uint32_t padding; -+}; -+ -+struct fuse_setxattr_in { -+ uint32_t size; -+ uint32_t flags; -+}; -+ -+struct fuse_getxattr_in { -+ uint32_t size; -+ uint32_t padding; -+}; -+ -+struct fuse_getxattr_out { -+ uint32_t size; -+ uint32_t padding; -+}; -+ -+struct fuse_lk_in { -+ uint64_t fh; -+ uint64_t owner; -+ struct fuse_file_lock lk; -+ uint32_t lk_flags; -+ uint32_t padding; -+}; -+ -+struct fuse_lk_out { -+ struct fuse_file_lock lk; -+}; -+ -+struct fuse_access_in { -+ uint32_t mask; -+ uint32_t padding; -+}; -+ -+struct fuse_init_in { -+ uint32_t major; -+ uint32_t minor; -+ uint32_t max_readahead; -+ uint32_t flags; -+}; -+ -+#define FUSE_COMPAT_INIT_OUT_SIZE 8 -+#define FUSE_COMPAT_22_INIT_OUT_SIZE 24 -+ -+struct fuse_init_out { -+ uint32_t major; -+ uint32_t minor; -+ uint32_t max_readahead; -+ uint32_t flags; -+ uint16_t max_background; -+ uint16_t congestion_threshold; -+ uint32_t max_write; -+ uint32_t time_gran; -+ uint16_t max_pages; -+ uint16_t map_alignment; -+ uint32_t unused[8]; -+}; -+ -+#define CUSE_INIT_INFO_MAX 4096 -+ -+struct cuse_init_in { -+ uint32_t major; -+ uint32_t minor; -+ uint32_t unused; -+ uint32_t flags; -+}; -+ -+struct cuse_init_out { -+ uint32_t major; -+ uint32_t minor; -+ uint32_t unused; -+ uint32_t flags; -+ uint32_t max_read; -+ uint32_t max_write; -+ uint32_t dev_major; /* chardev major */ -+ uint32_t dev_minor; /* chardev minor */ -+ uint32_t spare[10]; -+}; -+ -+struct fuse_interrupt_in { -+ uint64_t unique; -+}; -+ -+struct fuse_bmap_in { -+ uint64_t block; -+ uint32_t blocksize; -+ uint32_t padding; -+}; -+ -+struct fuse_bmap_out { -+ uint64_t block; -+}; -+ -+struct fuse_ioctl_in { -+ uint64_t fh; -+ uint32_t flags; -+ uint32_t cmd; -+ uint64_t arg; -+ uint32_t in_size; -+ uint32_t out_size; -+}; -+ -+struct fuse_ioctl_iovec { -+ uint64_t base; -+ uint64_t len; -+}; -+ -+struct fuse_ioctl_out { -+ int32_t result; -+ uint32_t flags; -+ uint32_t in_iovs; -+ uint32_t out_iovs; -+}; -+ -+struct fuse_poll_in { -+ uint64_t fh; -+ uint64_t kh; -+ uint32_t flags; -+ uint32_t events; -+}; -+ -+struct fuse_poll_out { -+ uint32_t revents; -+ uint32_t padding; -+}; -+ -+struct fuse_notify_poll_wakeup_out { -+ uint64_t kh; -+}; -+ -+struct fuse_fallocate_in { -+ uint64_t fh; -+ uint64_t offset; -+ uint64_t length; -+ uint32_t mode; -+ uint32_t padding; -+}; -+ -+struct fuse_in_header { -+ uint32_t len; -+ uint32_t opcode; -+ uint64_t unique; -+ uint64_t nodeid; -+ uint32_t uid; -+ uint32_t gid; -+ uint32_t pid; -+ uint32_t padding; -+}; -+ -+struct fuse_out_header { -+ uint32_t len; -+ int32_t error; -+ uint64_t unique; -+}; -+ -+struct fuse_dirent { -+ uint64_t ino; -+ uint64_t off; -+ uint32_t namelen; -+ uint32_t type; -+ char name[]; -+}; -+ -+#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) -+#define FUSE_DIRENT_ALIGN(x) \ -+ (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1)) -+#define FUSE_DIRENT_SIZE(d) \ -+ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) -+ -+struct fuse_direntplus { -+ struct fuse_entry_out entry_out; -+ struct fuse_dirent dirent; -+}; -+ -+#define FUSE_NAME_OFFSET_DIRENTPLUS \ -+ offsetof(struct fuse_direntplus, dirent.name) -+#define FUSE_DIRENTPLUS_SIZE(d) \ -+ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen) -+ -+struct fuse_notify_inval_inode_out { -+ uint64_t ino; -+ int64_t off; -+ int64_t len; -+}; -+ -+struct fuse_notify_inval_entry_out { -+ uint64_t parent; -+ uint32_t namelen; -+ uint32_t padding; -+}; -+ -+struct fuse_notify_delete_out { -+ uint64_t parent; -+ uint64_t child; -+ uint32_t namelen; -+ uint32_t padding; -+}; -+ -+struct fuse_notify_store_out { -+ uint64_t nodeid; -+ uint64_t offset; -+ uint32_t size; -+ uint32_t padding; -+}; -+ -+struct fuse_notify_retrieve_out { -+ uint64_t notify_unique; -+ uint64_t nodeid; -+ uint64_t offset; -+ uint32_t size; -+ uint32_t padding; -+}; -+ -+/* Matches the size of fuse_write_in */ -+struct fuse_notify_retrieve_in { -+ uint64_t dummy1; -+ uint64_t offset; -+ uint32_t size; -+ uint32_t dummy2; -+ uint64_t dummy3; -+ uint64_t dummy4; -+}; -+ -+/* Device ioctls: */ -+#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t) -+ -+struct fuse_lseek_in { -+ uint64_t fh; -+ uint64_t offset; -+ uint32_t whence; -+ uint32_t padding; -+}; -+ -+struct fuse_lseek_out { -+ uint64_t offset; -+}; -+ -+struct fuse_copy_file_range_in { -+ uint64_t fh_in; -+ uint64_t off_in; -+ uint64_t nodeid_out; -+ uint64_t fh_out; -+ uint64_t off_out; -+ uint64_t len; -+ uint64_t flags; -+}; -+ -+#endif /* _LINUX_FUSE_H */ -diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh -index f76d773..29c27f4 100755 ---- a/scripts/update-linux-headers.sh -+++ b/scripts/update-linux-headers.sh -@@ -186,6 +186,7 @@ rm -rf "$output/include/standard-headers/linux" - mkdir -p "$output/include/standard-headers/linux" - for i in "$tmpdir"/include/linux/*virtio*.h \ - "$tmpdir/include/linux/qemu_fw_cfg.h" \ -+ "$tmpdir/include/linux/fuse.h" \ - "$tmpdir/include/linux/input.h" \ - "$tmpdir/include/linux/input-event-codes.h" \ - "$tmpdir/include/linux/pci_regs.h" \ --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Pull-in-upstream-headers.patch b/SOURCES/kvm-virtiofsd-Pull-in-upstream-headers.patch deleted file mode 100644 index 78784fb..0000000 --- a/SOURCES/kvm-virtiofsd-Pull-in-upstream-headers.patch +++ /dev/null @@ -1,4911 +0,0 @@ -From 434b51e5c2fce756906dec4803900397bc98ad72 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:39 +0100 -Subject: [PATCH 008/116] virtiofsd: Pull in upstream headers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-5-dgilbert@redhat.com> -Patchwork-id: 93457 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 004/112] virtiofsd: Pull in upstream headers -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Pull in headers fromlibfuse's upstream fuse-3.8.0 - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit ee46c78901eb7fa78e328e04c0494ad6d207238b) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse.h | 1275 ++++++++++++++++++++ - tools/virtiofsd/fuse_common.h | 823 +++++++++++++ - tools/virtiofsd/fuse_i.h | 139 +++ - tools/virtiofsd/fuse_log.h | 82 ++ - tools/virtiofsd/fuse_lowlevel.h | 2089 +++++++++++++++++++++++++++++++++ - tools/virtiofsd/fuse_misc.h | 59 + - tools/virtiofsd/fuse_opt.h | 271 +++++ - tools/virtiofsd/passthrough_helpers.h | 76 ++ - 8 files changed, 4814 insertions(+) - create mode 100644 tools/virtiofsd/fuse.h - create mode 100644 tools/virtiofsd/fuse_common.h - create mode 100644 tools/virtiofsd/fuse_i.h - create mode 100644 tools/virtiofsd/fuse_log.h - create mode 100644 tools/virtiofsd/fuse_lowlevel.h - create mode 100644 tools/virtiofsd/fuse_misc.h - create mode 100644 tools/virtiofsd/fuse_opt.h - create mode 100644 tools/virtiofsd/passthrough_helpers.h - -diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h -new file mode 100644 -index 0000000..883f6e5 ---- /dev/null -+++ b/tools/virtiofsd/fuse.h -@@ -0,0 +1,1275 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB. -+*/ -+ -+#ifndef FUSE_H_ -+#define FUSE_H_ -+ -+/** @file -+ * -+ * This file defines the library interface of FUSE -+ * -+ * IMPORTANT: you should define FUSE_USE_VERSION before including this header. -+ */ -+ -+#include "fuse_common.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* ----------------------------------------------------------- * -+ * Basic FUSE API * -+ * ----------------------------------------------------------- */ -+ -+/** Handle for a FUSE filesystem */ -+struct fuse; -+ -+/** -+ * Readdir flags, passed to ->readdir() -+ */ -+enum fuse_readdir_flags { -+ /** -+ * "Plus" mode. -+ * -+ * The kernel wants to prefill the inode cache during readdir. The -+ * filesystem may honour this by filling in the attributes and setting -+ * FUSE_FILL_DIR_FLAGS for the filler function. The filesystem may also -+ * just ignore this flag completely. -+ */ -+ FUSE_READDIR_PLUS = (1 << 0), -+}; -+ -+enum fuse_fill_dir_flags { -+ /** -+ * "Plus" mode: all file attributes are valid -+ * -+ * The attributes are used by the kernel to prefill the inode cache -+ * during a readdir. -+ * -+ * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set -+ * and vice versa. -+ */ -+ FUSE_FILL_DIR_PLUS = (1 << 1), -+}; -+ -+/** Function to add an entry in a readdir() operation -+ * -+ * The *off* parameter can be any non-zero value that enables the -+ * filesystem to identify the current point in the directory -+ * stream. It does not need to be the actual physical position. A -+ * value of zero is reserved to indicate that seeking in directories -+ * is not supported. -+ * -+ * @param buf the buffer passed to the readdir() operation -+ * @param name the file name of the directory entry -+ * @param stat file attributes, can be NULL -+ * @param off offset of the next entry or zero -+ * @param flags fill flags -+ * @return 1 if buffer is full, zero otherwise -+ */ -+typedef int (*fuse_fill_dir_t) (void *buf, const char *name, -+ const struct stat *stbuf, off_t off, -+ enum fuse_fill_dir_flags flags); -+/** -+ * Configuration of the high-level API -+ * -+ * This structure is initialized from the arguments passed to -+ * fuse_new(), and then passed to the file system's init() handler -+ * which should ensure that the configuration is compatible with the -+ * file system implementation. -+ */ -+struct fuse_config { -+ /** -+ * If `set_gid` is non-zero, the st_gid attribute of each file -+ * is overwritten with the value of `gid`. -+ */ -+ int set_gid; -+ unsigned int gid; -+ -+ /** -+ * If `set_uid` is non-zero, the st_uid attribute of each file -+ * is overwritten with the value of `uid`. -+ */ -+ int set_uid; -+ unsigned int uid; -+ -+ /** -+ * If `set_mode` is non-zero, the any permissions bits set in -+ * `umask` are unset in the st_mode attribute of each file. -+ */ -+ int set_mode; -+ unsigned int umask; -+ -+ /** -+ * The timeout in seconds for which name lookups will be -+ * cached. -+ */ -+ double entry_timeout; -+ -+ /** -+ * The timeout in seconds for which a negative lookup will be -+ * cached. This means, that if file did not exist (lookup -+ * retuned ENOENT), the lookup will only be redone after the -+ * timeout, and the file/directory will be assumed to not -+ * exist until then. A value of zero means that negative -+ * lookups are not cached. -+ */ -+ double negative_timeout; -+ -+ /** -+ * The timeout in seconds for which file/directory attributes -+ * (as returned by e.g. the `getattr` handler) are cached. -+ */ -+ double attr_timeout; -+ -+ /** -+ * Allow requests to be interrupted -+ */ -+ int intr; -+ -+ /** -+ * Specify which signal number to send to the filesystem when -+ * a request is interrupted. The default is hardcoded to -+ * USR1. -+ */ -+ int intr_signal; -+ -+ /** -+ * Normally, FUSE assigns inodes to paths only for as long as -+ * the kernel is aware of them. With this option inodes are -+ * instead remembered for at least this many seconds. This -+ * will require more memory, but may be necessary when using -+ * applications that make use of inode numbers. -+ * -+ * A number of -1 means that inodes will be remembered for the -+ * entire life-time of the file-system process. -+ */ -+ int remember; -+ -+ /** -+ * The default behavior is that if an open file is deleted, -+ * the file is renamed to a hidden file (.fuse_hiddenXXX), and -+ * only removed when the file is finally released. This -+ * relieves the filesystem implementation of having to deal -+ * with this problem. This option disables the hiding -+ * behavior, and files are removed immediately in an unlink -+ * operation (or in a rename operation which overwrites an -+ * existing file). -+ * -+ * It is recommended that you not use the hard_remove -+ * option. When hard_remove is set, the following libc -+ * functions fail on unlinked files (returning errno of -+ * ENOENT): read(2), write(2), fsync(2), close(2), f*xattr(2), -+ * ftruncate(2), fstat(2), fchmod(2), fchown(2) -+ */ -+ int hard_remove; -+ -+ /** -+ * Honor the st_ino field in the functions getattr() and -+ * fill_dir(). This value is used to fill in the st_ino field -+ * in the stat(2), lstat(2), fstat(2) functions and the d_ino -+ * field in the readdir(2) function. The filesystem does not -+ * have to guarantee uniqueness, however some applications -+ * rely on this value being unique for the whole filesystem. -+ * -+ * Note that this does *not* affect the inode that libfuse -+ * and the kernel use internally (also called the "nodeid"). -+ */ -+ int use_ino; -+ -+ /** -+ * If use_ino option is not given, still try to fill in the -+ * d_ino field in readdir(2). If the name was previously -+ * looked up, and is still in the cache, the inode number -+ * found there will be used. Otherwise it will be set to -1. -+ * If use_ino option is given, this option is ignored. -+ */ -+ int readdir_ino; -+ -+ /** -+ * This option disables the use of page cache (file content cache) -+ * in the kernel for this filesystem. This has several affects: -+ * -+ * 1. Each read(2) or write(2) system call will initiate one -+ * or more read or write operations, data will not be -+ * cached in the kernel. -+ * -+ * 2. The return value of the read() and write() system calls -+ * will correspond to the return values of the read and -+ * write operations. This is useful for example if the -+ * file size is not known in advance (before reading it). -+ * -+ * Internally, enabling this option causes fuse to set the -+ * `direct_io` field of `struct fuse_file_info` - overwriting -+ * any value that was put there by the file system. -+ */ -+ int direct_io; -+ -+ /** -+ * This option disables flushing the cache of the file -+ * contents on every open(2). This should only be enabled on -+ * filesystems where the file data is never changed -+ * externally (not through the mounted FUSE filesystem). Thus -+ * it is not suitable for network filesystems and other -+ * intermediate filesystems. -+ * -+ * NOTE: if this option is not specified (and neither -+ * direct_io) data is still cached after the open(2), so a -+ * read(2) system call will not always initiate a read -+ * operation. -+ * -+ * Internally, enabling this option causes fuse to set the -+ * `keep_cache` field of `struct fuse_file_info` - overwriting -+ * any value that was put there by the file system. -+ */ -+ int kernel_cache; -+ -+ /** -+ * This option is an alternative to `kernel_cache`. Instead of -+ * unconditionally keeping cached data, the cached data is -+ * invalidated on open(2) if if the modification time or the -+ * size of the file has changed since it was last opened. -+ */ -+ int auto_cache; -+ -+ /** -+ * The timeout in seconds for which file attributes are cached -+ * for the purpose of checking if auto_cache should flush the -+ * file data on open. -+ */ -+ int ac_attr_timeout_set; -+ double ac_attr_timeout; -+ -+ /** -+ * If this option is given the file-system handlers for the -+ * following operations will not receive path information: -+ * read, write, flush, release, fsync, readdir, releasedir, -+ * fsyncdir, lock, ioctl and poll. -+ * -+ * For the truncate, getattr, chmod, chown and utimens -+ * operations the path will be provided only if the struct -+ * fuse_file_info argument is NULL. -+ */ -+ int nullpath_ok; -+ -+ /** -+ * The remaining options are used by libfuse internally and -+ * should not be touched. -+ */ -+ int show_help; -+ char *modules; -+ int debug; -+}; -+ -+ -+/** -+ * The file system operations: -+ * -+ * Most of these should work very similarly to the well known UNIX -+ * file system operations. A major exception is that instead of -+ * returning an error in 'errno', the operation should return the -+ * negated error value (-errno) directly. -+ * -+ * All methods are optional, but some are essential for a useful -+ * filesystem (e.g. getattr). Open, flush, release, fsync, opendir, -+ * releasedir, fsyncdir, access, create, truncate, lock, init and -+ * destroy are special purpose methods, without which a full featured -+ * filesystem can still be implemented. -+ * -+ * In general, all methods are expected to perform any necessary -+ * permission checking. However, a filesystem may delegate this task -+ * to the kernel by passing the `default_permissions` mount option to -+ * `fuse_new()`. In this case, methods will only be called if -+ * the kernel's permission check has succeeded. -+ * -+ * Almost all operations take a path which can be of any length. -+ */ -+struct fuse_operations { -+ /** Get file attributes. -+ * -+ * Similar to stat(). The 'st_dev' and 'st_blksize' fields are -+ * ignored. The 'st_ino' field is ignored except if the 'use_ino' -+ * mount option is given. In that case it is passed to userspace, -+ * but libfuse and the kernel will still assign a different -+ * inode for internal use (called the "nodeid"). -+ * -+ * `fi` will always be NULL if the file is not currently open, but -+ * may also be NULL if the file is open. -+ */ -+ int (*getattr) (const char *, struct stat *, struct fuse_file_info *fi); -+ -+ /** Read the target of a symbolic link -+ * -+ * The buffer should be filled with a null terminated string. The -+ * buffer size argument includes the space for the terminating -+ * null character. If the linkname is too long to fit in the -+ * buffer, it should be truncated. The return value should be 0 -+ * for success. -+ */ -+ int (*readlink) (const char *, char *, size_t); -+ -+ /** Create a file node -+ * -+ * This is called for creation of all non-directory, non-symlink -+ * nodes. If the filesystem defines a create() method, then for -+ * regular files that will be called instead. -+ */ -+ int (*mknod) (const char *, mode_t, dev_t); -+ -+ /** Create a directory -+ * -+ * Note that the mode argument may not have the type specification -+ * bits set, i.e. S_ISDIR(mode) can be false. To obtain the -+ * correct directory type bits use mode|S_IFDIR -+ * */ -+ int (*mkdir) (const char *, mode_t); -+ -+ /** Remove a file */ -+ int (*unlink) (const char *); -+ -+ /** Remove a directory */ -+ int (*rmdir) (const char *); -+ -+ /** Create a symbolic link */ -+ int (*symlink) (const char *, const char *); -+ -+ /** Rename a file -+ * -+ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If -+ * RENAME_NOREPLACE is specified, the filesystem must not -+ * overwrite *newname* if it exists and return an error -+ * instead. If `RENAME_EXCHANGE` is specified, the filesystem -+ * must atomically exchange the two files, i.e. both must -+ * exist and neither may be deleted. -+ */ -+ int (*rename) (const char *, const char *, unsigned int flags); -+ -+ /** Create a hard link to a file */ -+ int (*link) (const char *, const char *); -+ -+ /** Change the permission bits of a file -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ */ -+ int (*chmod) (const char *, mode_t, struct fuse_file_info *fi); -+ -+ /** Change the owner and group of a file -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*chown) (const char *, uid_t, gid_t, struct fuse_file_info *fi); -+ -+ /** Change the size of a file -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*truncate) (const char *, off_t, struct fuse_file_info *fi); -+ -+ /** Open a file -+ * -+ * Open flags are available in fi->flags. The following rules -+ * apply. -+ * -+ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be -+ * filtered out / handled by the kernel. -+ * -+ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR, O_EXEC, O_SEARCH) -+ * should be used by the filesystem to check if the operation is -+ * permitted. If the ``-o default_permissions`` mount option is -+ * given, this check is already done by the kernel before calling -+ * open() and may thus be omitted by the filesystem. -+ * -+ * - When writeback caching is enabled, the kernel may send -+ * read requests even for files opened with O_WRONLY. The -+ * filesystem should be prepared to handle this. -+ * -+ * - When writeback caching is disabled, the filesystem is -+ * expected to properly handle the O_APPEND flag and ensure -+ * that each write is appending to the end of the file. -+ * -+ * - When writeback caching is enabled, the kernel will -+ * handle O_APPEND. However, unless all changes to the file -+ * come through the kernel this will not work reliably. The -+ * filesystem should thus either ignore the O_APPEND flag -+ * (and let the kernel handle it), or return an error -+ * (indicating that reliably O_APPEND is not available). -+ * -+ * Filesystem may store an arbitrary file handle (pointer, -+ * index, etc) in fi->fh, and use this in other all other file -+ * operations (read, write, flush, release, fsync). -+ * -+ * Filesystem may also implement stateless file I/O and not store -+ * anything in fi->fh. -+ * -+ * There are also some flags (direct_io, keep_cache) which the -+ * filesystem may set in fi, to change the way the file is opened. -+ * See fuse_file_info structure in for more details. -+ * -+ * If this request is answered with an error code of ENOSYS -+ * and FUSE_CAP_NO_OPEN_SUPPORT is set in -+ * `fuse_conn_info.capable`, this is treated as success and -+ * future calls to open will also succeed without being send -+ * to the filesystem process. -+ * -+ */ -+ int (*open) (const char *, struct fuse_file_info *); -+ -+ /** Read data from an open file -+ * -+ * Read should return exactly the number of bytes requested except -+ * on EOF or error, otherwise the rest of the data will be -+ * substituted with zeroes. An exception to this is when the -+ * 'direct_io' mount option is specified, in which case the return -+ * value of the read system call will reflect the return value of -+ * this operation. -+ */ -+ int (*read) (const char *, char *, size_t, off_t, -+ struct fuse_file_info *); -+ -+ /** Write data to an open file -+ * -+ * Write should return exactly the number of bytes requested -+ * except on error. An exception to this is when the 'direct_io' -+ * mount option is specified (see read operation). -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*write) (const char *, const char *, size_t, off_t, -+ struct fuse_file_info *); -+ -+ /** Get file system statistics -+ * -+ * The 'f_favail', 'f_fsid' and 'f_flag' fields are ignored -+ */ -+ int (*statfs) (const char *, struct statvfs *); -+ -+ /** Possibly flush cached data -+ * -+ * BIG NOTE: This is not equivalent to fsync(). It's not a -+ * request to sync dirty data. -+ * -+ * Flush is called on each close() of a file descriptor, as opposed to -+ * release which is called on the close of the last file descriptor for -+ * a file. Under Linux, errors returned by flush() will be passed to -+ * userspace as errors from close(), so flush() is a good place to write -+ * back any cached dirty data. However, many applications ignore errors -+ * on close(), and on non-Linux systems, close() may succeed even if flush() -+ * returns an error. For these reasons, filesystems should not assume -+ * that errors returned by flush will ever be noticed or even -+ * delivered. -+ * -+ * NOTE: The flush() method may be called more than once for each -+ * open(). This happens if more than one file descriptor refers to an -+ * open file handle, e.g. due to dup(), dup2() or fork() calls. It is -+ * not possible to determine if a flush is final, so each flush should -+ * be treated equally. Multiple write-flush sequences are relatively -+ * rare, so this shouldn't be a problem. -+ * -+ * Filesystems shouldn't assume that flush will be called at any -+ * particular point. It may be called more times than expected, or not -+ * at all. -+ * -+ * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html -+ */ -+ int (*flush) (const char *, struct fuse_file_info *); -+ -+ /** Release an open file -+ * -+ * Release is called when there are no more references to an open -+ * file: all file descriptors are closed and all memory mappings -+ * are unmapped. -+ * -+ * For every open() call there will be exactly one release() call -+ * with the same flags and file handle. It is possible to -+ * have a file opened more than once, in which case only the last -+ * release will mean, that no more reads/writes will happen on the -+ * file. The return value of release is ignored. -+ */ -+ int (*release) (const char *, struct fuse_file_info *); -+ -+ /** Synchronize file contents -+ * -+ * If the datasync parameter is non-zero, then only the user data -+ * should be flushed, not the meta data. -+ */ -+ int (*fsync) (const char *, int, struct fuse_file_info *); -+ -+ /** Set extended attributes */ -+ int (*setxattr) (const char *, const char *, const char *, size_t, int); -+ -+ /** Get extended attributes */ -+ int (*getxattr) (const char *, const char *, char *, size_t); -+ -+ /** List extended attributes */ -+ int (*listxattr) (const char *, char *, size_t); -+ -+ /** Remove extended attributes */ -+ int (*removexattr) (const char *, const char *); -+ -+ /** Open directory -+ * -+ * Unless the 'default_permissions' mount option is given, -+ * this method should check if opendir is permitted for this -+ * directory. Optionally opendir may also return an arbitrary -+ * filehandle in the fuse_file_info structure, which will be -+ * passed to readdir, releasedir and fsyncdir. -+ */ -+ int (*opendir) (const char *, struct fuse_file_info *); -+ -+ /** Read directory -+ * -+ * The filesystem may choose between two modes of operation: -+ * -+ * 1) The readdir implementation ignores the offset parameter, and -+ * passes zero to the filler function's offset. The filler -+ * function will not return '1' (unless an error happens), so the -+ * whole directory is read in a single readdir operation. -+ * -+ * 2) The readdir implementation keeps track of the offsets of the -+ * directory entries. It uses the offset parameter and always -+ * passes non-zero offset to the filler function. When the buffer -+ * is full (or an error happens) the filler function will return -+ * '1'. -+ */ -+ int (*readdir) (const char *, void *, fuse_fill_dir_t, off_t, -+ struct fuse_file_info *, enum fuse_readdir_flags); -+ -+ /** Release directory -+ */ -+ int (*releasedir) (const char *, struct fuse_file_info *); -+ -+ /** Synchronize directory contents -+ * -+ * If the datasync parameter is non-zero, then only the user data -+ * should be flushed, not the meta data -+ */ -+ int (*fsyncdir) (const char *, int, struct fuse_file_info *); -+ -+ /** -+ * Initialize filesystem -+ * -+ * The return value will passed in the `private_data` field of -+ * `struct fuse_context` to all file operations, and as a -+ * parameter to the destroy() method. It overrides the initial -+ * value provided to fuse_main() / fuse_new(). -+ */ -+ void *(*init) (struct fuse_conn_info *conn, -+ struct fuse_config *cfg); -+ -+ /** -+ * Clean up filesystem -+ * -+ * Called on filesystem exit. -+ */ -+ void (*destroy) (void *private_data); -+ -+ /** -+ * Check file access permissions -+ * -+ * This will be called for the access() system call. If the -+ * 'default_permissions' mount option is given, this method is not -+ * called. -+ * -+ * This method is not called under Linux kernel versions 2.4.x -+ */ -+ int (*access) (const char *, int); -+ -+ /** -+ * Create and open a file -+ * -+ * If the file does not exist, first create it with the specified -+ * mode, and then open it. -+ * -+ * If this method is not implemented or under Linux kernel -+ * versions earlier than 2.6.15, the mknod() and open() methods -+ * will be called instead. -+ */ -+ int (*create) (const char *, mode_t, struct fuse_file_info *); -+ -+ /** -+ * Perform POSIX file locking operation -+ * -+ * The cmd argument will be either F_GETLK, F_SETLK or F_SETLKW. -+ * -+ * For the meaning of fields in 'struct flock' see the man page -+ * for fcntl(2). The l_whence field will always be set to -+ * SEEK_SET. -+ * -+ * For checking lock ownership, the 'fuse_file_info->owner' -+ * argument must be used. -+ * -+ * For F_GETLK operation, the library will first check currently -+ * held locks, and if a conflicting lock is found it will return -+ * information without calling this method. This ensures, that -+ * for local locks the l_pid field is correctly filled in. The -+ * results may not be accurate in case of race conditions and in -+ * the presence of hard links, but it's unlikely that an -+ * application would rely on accurate GETLK results in these -+ * cases. If a conflicting lock is not found, this method will be -+ * called, and the filesystem may fill out l_pid by a meaningful -+ * value, or it may leave this field zero. -+ * -+ * For F_SETLK and F_SETLKW the l_pid field will be set to the pid -+ * of the process performing the locking operation. -+ * -+ * Note: if this method is not implemented, the kernel will still -+ * allow file locking to work locally. Hence it is only -+ * interesting for network filesystems and similar. -+ */ -+ int (*lock) (const char *, struct fuse_file_info *, int cmd, -+ struct flock *); -+ -+ /** -+ * Change the access and modification times of a file with -+ * nanosecond resolution -+ * -+ * This supersedes the old utime() interface. New applications -+ * should use this. -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ * -+ * See the utimensat(2) man page for details. -+ */ -+ int (*utimens) (const char *, const struct timespec tv[2], -+ struct fuse_file_info *fi); -+ -+ /** -+ * Map block index within file to block index within device -+ * -+ * Note: This makes sense only for block device backed filesystems -+ * mounted with the 'blkdev' option -+ */ -+ int (*bmap) (const char *, size_t blocksize, uint64_t *idx); -+ -+ /** -+ * Ioctl -+ * -+ * flags will have FUSE_IOCTL_COMPAT set for 32bit ioctls in -+ * 64bit environment. The size and direction of data is -+ * determined by _IOC_*() decoding of cmd. For _IOC_NONE, -+ * data will be NULL, for _IOC_WRITE data is out area, for -+ * _IOC_READ in area and if both are set in/out area. In all -+ * non-NULL cases, the area is of _IOC_SIZE(cmd) bytes. -+ * -+ * If flags has FUSE_IOCTL_DIR then the fuse_file_info refers to a -+ * directory file handle. -+ * -+ * Note : the unsigned long request submitted by the application -+ * is truncated to 32 bits. -+ */ -+ int (*ioctl) (const char *, unsigned int cmd, void *arg, -+ struct fuse_file_info *, unsigned int flags, void *data); -+ -+ /** -+ * Poll for IO readiness events -+ * -+ * Note: If ph is non-NULL, the client should notify -+ * when IO readiness events occur by calling -+ * fuse_notify_poll() with the specified ph. -+ * -+ * Regardless of the number of times poll with a non-NULL ph -+ * is received, single notification is enough to clear all. -+ * Notifying more times incurs overhead but doesn't harm -+ * correctness. -+ * -+ * The callee is responsible for destroying ph with -+ * fuse_pollhandle_destroy() when no longer in use. -+ */ -+ int (*poll) (const char *, struct fuse_file_info *, -+ struct fuse_pollhandle *ph, unsigned *reventsp); -+ -+ /** Write contents of buffer to an open file -+ * -+ * Similar to the write() method, but data is supplied in a -+ * generic buffer. Use fuse_buf_copy() to transfer data to -+ * the destination. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*write_buf) (const char *, struct fuse_bufvec *buf, off_t off, -+ struct fuse_file_info *); -+ -+ /** Store data from an open file in a buffer -+ * -+ * Similar to the read() method, but data is stored and -+ * returned in a generic buffer. -+ * -+ * No actual copying of data has to take place, the source -+ * file descriptor may simply be stored in the buffer for -+ * later data transfer. -+ * -+ * The buffer must be allocated dynamically and stored at the -+ * location pointed to by bufp. If the buffer contains memory -+ * regions, they too must be allocated using malloc(). The -+ * allocated memory will be freed by the caller. -+ */ -+ int (*read_buf) (const char *, struct fuse_bufvec **bufp, -+ size_t size, off_t off, struct fuse_file_info *); -+ /** -+ * Perform BSD file locking operation -+ * -+ * The op argument will be either LOCK_SH, LOCK_EX or LOCK_UN -+ * -+ * Nonblocking requests will be indicated by ORing LOCK_NB to -+ * the above operations -+ * -+ * For more information see the flock(2) manual page. -+ * -+ * Additionally fi->owner will be set to a value unique to -+ * this open file. This same value will be supplied to -+ * ->release() when the file is released. -+ * -+ * Note: if this method is not implemented, the kernel will still -+ * allow file locking to work locally. Hence it is only -+ * interesting for network filesystems and similar. -+ */ -+ int (*flock) (const char *, struct fuse_file_info *, int op); -+ -+ /** -+ * Allocates space for an open file -+ * -+ * This function ensures that required space is allocated for specified -+ * file. If this function returns success then any subsequent write -+ * request to specified range is guaranteed not to fail because of lack -+ * of space on the file system media. -+ */ -+ int (*fallocate) (const char *, int, off_t, off_t, -+ struct fuse_file_info *); -+ -+ /** -+ * Copy a range of data from one file to another -+ * -+ * Performs an optimized copy between two file descriptors without the -+ * additional cost of transferring data through the FUSE kernel module -+ * to user space (glibc) and then back into the FUSE filesystem again. -+ * -+ * In case this method is not implemented, glibc falls back to reading -+ * data from the source and writing to the destination. Effectively -+ * doing an inefficient copy of the data. -+ */ -+ ssize_t (*copy_file_range) (const char *path_in, -+ struct fuse_file_info *fi_in, -+ off_t offset_in, const char *path_out, -+ struct fuse_file_info *fi_out, -+ off_t offset_out, size_t size, int flags); -+ -+ /** -+ * Find next data or hole after the specified offset -+ */ -+ off_t (*lseek) (const char *, off_t off, int whence, struct fuse_file_info *); -+}; -+ -+/** Extra context that may be needed by some filesystems -+ * -+ * The uid, gid and pid fields are not filled in case of a writepage -+ * operation. -+ */ -+struct fuse_context { -+ /** Pointer to the fuse object */ -+ struct fuse *fuse; -+ -+ /** User ID of the calling process */ -+ uid_t uid; -+ -+ /** Group ID of the calling process */ -+ gid_t gid; -+ -+ /** Process ID of the calling thread */ -+ pid_t pid; -+ -+ /** Private filesystem data */ -+ void *private_data; -+ -+ /** Umask of the calling process */ -+ mode_t umask; -+}; -+ -+/** -+ * Main function of FUSE. -+ * -+ * This is for the lazy. This is all that has to be called from the -+ * main() function. -+ * -+ * This function does the following: -+ * - parses command line options, and handles --help and -+ * --version -+ * - installs signal handlers for INT, HUP, TERM and PIPE -+ * - registers an exit handler to unmount the filesystem on program exit -+ * - creates a fuse handle -+ * - registers the operations -+ * - calls either the single-threaded or the multi-threaded event loop -+ * -+ * Most file systems will have to parse some file-system specific -+ * arguments before calling this function. It is recommended to do -+ * this with fuse_opt_parse() and a processing function that passes -+ * through any unknown options (this can also be achieved by just -+ * passing NULL as the processing function). That way, the remaining -+ * options can be passed directly to fuse_main(). -+ * -+ * fuse_main() accepts all options that can be passed to -+ * fuse_parse_cmdline(), fuse_new(), or fuse_session_new(). -+ * -+ * Option parsing skips argv[0], which is assumed to contain the -+ * program name. This element must always be present and is used to -+ * construct a basic ``usage: `` message for the --help -+ * output. argv[0] may also be set to the empty string. In this case -+ * the usage message is suppressed. This can be used by file systems -+ * to print their own usage line first. See hello.c for an example of -+ * how to do this. -+ * -+ * Note: this is currently implemented as a macro. -+ * -+ * The following error codes may be returned from fuse_main(): -+ * 1: Invalid option arguments -+ * 2: No mount point specified -+ * 3: FUSE setup failed -+ * 4: Mounting failed -+ * 5: Failed to daemonize (detach from session) -+ * 6: Failed to set up signal handlers -+ * 7: An error occured during the life of the file system -+ * -+ * @param argc the argument counter passed to the main() function -+ * @param argv the argument vector passed to the main() function -+ * @param op the file system operation -+ * @param private_data Initial value for the `private_data` -+ * field of `struct fuse_context`. May be overridden by the -+ * `struct fuse_operations.init` handler. -+ * @return 0 on success, nonzero on failure -+ * -+ * Example usage, see hello.c -+ */ -+/* -+ int fuse_main(int argc, char *argv[], const struct fuse_operations *op, -+ void *private_data); -+*/ -+#define fuse_main(argc, argv, op, private_data) \ -+ fuse_main_real(argc, argv, op, sizeof(*(op)), private_data) -+ -+/* ----------------------------------------------------------- * -+ * More detailed API * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Print available options (high- and low-level) to stdout. This is -+ * not an exhaustive list, but includes only those options that may be -+ * of interest to an end-user of a file system. -+ * -+ * The function looks at the argument vector only to determine if -+ * there are additional modules to be loaded (module=foo option), -+ * and attempts to call their help functions as well. -+ * -+ * @param args the argument vector. -+ */ -+void fuse_lib_help(struct fuse_args *args); -+ -+/** -+ * Create a new FUSE filesystem. -+ * -+ * This function accepts most file-system independent mount options -+ * (like context, nodev, ro - see mount(8)), as well as the -+ * FUSE-specific mount options from mount.fuse(8). -+ * -+ * If the --help option is specified, the function writes a help text -+ * to stdout and returns NULL. -+ * -+ * Option parsing skips argv[0], which is assumed to contain the -+ * program name. This element must always be present and is used to -+ * construct a basic ``usage: `` message for the --help output. If -+ * argv[0] is set to the empty string, no usage message is included in -+ * the --help output. -+ * -+ * If an unknown option is passed in, an error message is written to -+ * stderr and the function returns NULL. -+ * -+ * @param args argument vector -+ * @param op the filesystem operations -+ * @param op_size the size of the fuse_operations structure -+ * @param private_data Initial value for the `private_data` -+ * field of `struct fuse_context`. May be overridden by the -+ * `struct fuse_operations.init` handler. -+ * @return the created FUSE handle -+ */ -+#if FUSE_USE_VERSION == 30 -+struct fuse *fuse_new_30(struct fuse_args *args, const struct fuse_operations *op, -+ size_t op_size, void *private_data); -+#define fuse_new(args, op, size, data) fuse_new_30(args, op, size, data) -+#else -+struct fuse *fuse_new(struct fuse_args *args, const struct fuse_operations *op, -+ size_t op_size, void *private_data); -+#endif -+ -+/** -+ * Mount a FUSE file system. -+ * -+ * @param mountpoint the mount point path -+ * @param f the FUSE handle -+ * -+ * @return 0 on success, -1 on failure. -+ **/ -+int fuse_mount(struct fuse *f, const char *mountpoint); -+ -+/** -+ * Unmount a FUSE file system. -+ * -+ * See fuse_session_unmount() for additional information. -+ * -+ * @param f the FUSE handle -+ **/ -+void fuse_unmount(struct fuse *f); -+ -+/** -+ * Destroy the FUSE handle. -+ * -+ * NOTE: This function does not unmount the filesystem. If this is -+ * needed, call fuse_unmount() before calling this function. -+ * -+ * @param f the FUSE handle -+ */ -+void fuse_destroy(struct fuse *f); -+ -+/** -+ * FUSE event loop. -+ * -+ * Requests from the kernel are processed, and the appropriate -+ * operations are called. -+ * -+ * For a description of the return value and the conditions when the -+ * event loop exits, refer to the documentation of -+ * fuse_session_loop(). -+ * -+ * @param f the FUSE handle -+ * @return see fuse_session_loop() -+ * -+ * See also: fuse_loop_mt() -+ */ -+int fuse_loop(struct fuse *f); -+ -+/** -+ * Flag session as terminated -+ * -+ * This function will cause any running event loops to exit on -+ * the next opportunity. -+ * -+ * @param f the FUSE handle -+ */ -+void fuse_exit(struct fuse *f); -+ -+/** -+ * FUSE event loop with multiple threads -+ * -+ * Requests from the kernel are processed, and the appropriate -+ * operations are called. Request are processed in parallel by -+ * distributing them between multiple threads. -+ * -+ * For a description of the return value and the conditions when the -+ * event loop exits, refer to the documentation of -+ * fuse_session_loop(). -+ * -+ * Note: using fuse_loop() instead of fuse_loop_mt() means you are running in -+ * single-threaded mode, and that you will not have to worry about reentrancy, -+ * though you will have to worry about recursive lookups. In single-threaded -+ * mode, FUSE will wait for one callback to return before calling another. -+ * -+ * Enabling multiple threads, by using fuse_loop_mt(), will cause FUSE to make -+ * multiple simultaneous calls into the various callback functions given by your -+ * fuse_operations record. -+ * -+ * If you are using multiple threads, you can enjoy all the parallel execution -+ * and interactive response benefits of threads, and you get to enjoy all the -+ * benefits of race conditions and locking bugs, too. Ensure that any code used -+ * in the callback function of fuse_operations is also thread-safe. -+ * -+ * @param f the FUSE handle -+ * @param config loop configuration -+ * @return see fuse_session_loop() -+ * -+ * See also: fuse_loop() -+ */ -+#if FUSE_USE_VERSION < 32 -+int fuse_loop_mt_31(struct fuse *f, int clone_fd); -+#define fuse_loop_mt(f, clone_fd) fuse_loop_mt_31(f, clone_fd) -+#else -+int fuse_loop_mt(struct fuse *f, struct fuse_loop_config *config); -+#endif -+ -+/** -+ * Get the current context -+ * -+ * The context is only valid for the duration of a filesystem -+ * operation, and thus must not be stored and used later. -+ * -+ * @return the context -+ */ -+struct fuse_context *fuse_get_context(void); -+ -+/** -+ * Get the current supplementary group IDs for the current request -+ * -+ * Similar to the getgroups(2) system call, except the return value is -+ * always the total number of group IDs, even if it is larger than the -+ * specified size. -+ * -+ * The current fuse kernel module in linux (as of 2.6.30) doesn't pass -+ * the group list to userspace, hence this function needs to parse -+ * "/proc/$TID/task/$TID/status" to get the group IDs. -+ * -+ * This feature may not be supported on all operating systems. In -+ * such a case this function will return -ENOSYS. -+ * -+ * @param size size of given array -+ * @param list array of group IDs to be filled in -+ * @return the total number of supplementary group IDs or -errno on failure -+ */ -+int fuse_getgroups(int size, gid_t list[]); -+ -+/** -+ * Check if the current request has already been interrupted -+ * -+ * @return 1 if the request has been interrupted, 0 otherwise -+ */ -+int fuse_interrupted(void); -+ -+/** -+ * Invalidates cache for the given path. -+ * -+ * This calls fuse_lowlevel_notify_inval_inode internally. -+ * -+ * @return 0 on successful invalidation, negative error value otherwise. -+ * This routine may return -ENOENT to indicate that there was -+ * no entry to be invalidated, e.g., because the path has not -+ * been seen before or has been forgotten; this should not be -+ * considered to be an error. -+ */ -+int fuse_invalidate_path(struct fuse *f, const char *path); -+ -+/** -+ * The real main function -+ * -+ * Do not call this directly, use fuse_main() -+ */ -+int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, -+ size_t op_size, void *private_data); -+ -+/** -+ * Start the cleanup thread when using option "remember". -+ * -+ * This is done automatically by fuse_loop_mt() -+ * @param fuse struct fuse pointer for fuse instance -+ * @return 0 on success and -1 on error -+ */ -+int fuse_start_cleanup_thread(struct fuse *fuse); -+ -+/** -+ * Stop the cleanup thread when using option "remember". -+ * -+ * This is done automatically by fuse_loop_mt() -+ * @param fuse struct fuse pointer for fuse instance -+ */ -+void fuse_stop_cleanup_thread(struct fuse *fuse); -+ -+/** -+ * Iterate over cache removing stale entries -+ * use in conjunction with "-oremember" -+ * -+ * NOTE: This is already done for the standard sessions -+ * -+ * @param fuse struct fuse pointer for fuse instance -+ * @return the number of seconds until the next cleanup -+ */ -+int fuse_clean_cache(struct fuse *fuse); -+ -+/* -+ * Stacking API -+ */ -+ -+/** -+ * Fuse filesystem object -+ * -+ * This is opaque object represents a filesystem layer -+ */ -+struct fuse_fs; -+ -+/* -+ * These functions call the relevant filesystem operation, and return -+ * the result. -+ * -+ * If the operation is not defined, they return -ENOSYS, with the -+ * exception of fuse_fs_open, fuse_fs_release, fuse_fs_opendir, -+ * fuse_fs_releasedir and fuse_fs_statfs, which return 0. -+ */ -+ -+int fuse_fs_getattr(struct fuse_fs *fs, const char *path, struct stat *buf, -+ struct fuse_file_info *fi); -+int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, -+ const char *newpath, unsigned int flags); -+int fuse_fs_unlink(struct fuse_fs *fs, const char *path); -+int fuse_fs_rmdir(struct fuse_fs *fs, const char *path); -+int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, -+ const char *path); -+int fuse_fs_link(struct fuse_fs *fs, const char *oldpath, const char *newpath); -+int fuse_fs_release(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi); -+int fuse_fs_open(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi); -+int fuse_fs_read(struct fuse_fs *fs, const char *path, char *buf, size_t size, -+ off_t off, struct fuse_file_info *fi); -+int fuse_fs_read_buf(struct fuse_fs *fs, const char *path, -+ struct fuse_bufvec **bufp, size_t size, off_t off, -+ struct fuse_file_info *fi); -+int fuse_fs_write(struct fuse_fs *fs, const char *path, const char *buf, -+ size_t size, off_t off, struct fuse_file_info *fi); -+int fuse_fs_write_buf(struct fuse_fs *fs, const char *path, -+ struct fuse_bufvec *buf, off_t off, -+ struct fuse_file_info *fi); -+int fuse_fs_fsync(struct fuse_fs *fs, const char *path, int datasync, -+ struct fuse_file_info *fi); -+int fuse_fs_flush(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi); -+int fuse_fs_statfs(struct fuse_fs *fs, const char *path, struct statvfs *buf); -+int fuse_fs_opendir(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi); -+int fuse_fs_readdir(struct fuse_fs *fs, const char *path, void *buf, -+ fuse_fill_dir_t filler, off_t off, -+ struct fuse_file_info *fi, enum fuse_readdir_flags flags); -+int fuse_fs_fsyncdir(struct fuse_fs *fs, const char *path, int datasync, -+ struct fuse_file_info *fi); -+int fuse_fs_releasedir(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi); -+int fuse_fs_create(struct fuse_fs *fs, const char *path, mode_t mode, -+ struct fuse_file_info *fi); -+int fuse_fs_lock(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi, int cmd, struct flock *lock); -+int fuse_fs_flock(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi, int op); -+int fuse_fs_chmod(struct fuse_fs *fs, const char *path, mode_t mode, -+ struct fuse_file_info *fi); -+int fuse_fs_chown(struct fuse_fs *fs, const char *path, uid_t uid, gid_t gid, -+ struct fuse_file_info *fi); -+int fuse_fs_truncate(struct fuse_fs *fs, const char *path, off_t size, -+ struct fuse_file_info *fi); -+int fuse_fs_utimens(struct fuse_fs *fs, const char *path, -+ const struct timespec tv[2], struct fuse_file_info *fi); -+int fuse_fs_access(struct fuse_fs *fs, const char *path, int mask); -+int fuse_fs_readlink(struct fuse_fs *fs, const char *path, char *buf, -+ size_t len); -+int fuse_fs_mknod(struct fuse_fs *fs, const char *path, mode_t mode, -+ dev_t rdev); -+int fuse_fs_mkdir(struct fuse_fs *fs, const char *path, mode_t mode); -+int fuse_fs_setxattr(struct fuse_fs *fs, const char *path, const char *name, -+ const char *value, size_t size, int flags); -+int fuse_fs_getxattr(struct fuse_fs *fs, const char *path, const char *name, -+ char *value, size_t size); -+int fuse_fs_listxattr(struct fuse_fs *fs, const char *path, char *list, -+ size_t size); -+int fuse_fs_removexattr(struct fuse_fs *fs, const char *path, -+ const char *name); -+int fuse_fs_bmap(struct fuse_fs *fs, const char *path, size_t blocksize, -+ uint64_t *idx); -+int fuse_fs_ioctl(struct fuse_fs *fs, const char *path, unsigned int cmd, -+ void *arg, struct fuse_file_info *fi, unsigned int flags, -+ void *data); -+int fuse_fs_poll(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi, struct fuse_pollhandle *ph, -+ unsigned *reventsp); -+int fuse_fs_fallocate(struct fuse_fs *fs, const char *path, int mode, -+ off_t offset, off_t length, struct fuse_file_info *fi); -+ssize_t fuse_fs_copy_file_range(struct fuse_fs *fs, const char *path_in, -+ struct fuse_file_info *fi_in, off_t off_in, -+ const char *path_out, -+ struct fuse_file_info *fi_out, off_t off_out, -+ size_t len, int flags); -+off_t fuse_fs_lseek(struct fuse_fs *fs, const char *path, off_t off, int whence, -+ struct fuse_file_info *fi); -+void fuse_fs_init(struct fuse_fs *fs, struct fuse_conn_info *conn, -+ struct fuse_config *cfg); -+void fuse_fs_destroy(struct fuse_fs *fs); -+ -+int fuse_notify_poll(struct fuse_pollhandle *ph); -+ -+/** -+ * Create a new fuse filesystem object -+ * -+ * This is usually called from the factory of a fuse module to create -+ * a new instance of a filesystem. -+ * -+ * @param op the filesystem operations -+ * @param op_size the size of the fuse_operations structure -+ * @param private_data Initial value for the `private_data` -+ * field of `struct fuse_context`. May be overridden by the -+ * `struct fuse_operations.init` handler. -+ * @return a new filesystem object -+ */ -+struct fuse_fs *fuse_fs_new(const struct fuse_operations *op, size_t op_size, -+ void *private_data); -+ -+/** -+ * Factory for creating filesystem objects -+ * -+ * The function may use and remove options from 'args' that belong -+ * to this module. -+ * -+ * For now the 'fs' vector always contains exactly one filesystem. -+ * This is the filesystem which will be below the newly created -+ * filesystem in the stack. -+ * -+ * @param args the command line arguments -+ * @param fs NULL terminated filesystem object vector -+ * @return the new filesystem object -+ */ -+typedef struct fuse_fs *(*fuse_module_factory_t)(struct fuse_args *args, -+ struct fuse_fs *fs[]); -+/** -+ * Register filesystem module -+ * -+ * If the "-omodules=*name*_:..." option is present, filesystem -+ * objects are created and pushed onto the stack with the *factory_* -+ * function. -+ * -+ * @param name_ the name of this filesystem module -+ * @param factory_ the factory function for this filesystem module -+ */ -+#define FUSE_REGISTER_MODULE(name_, factory_) \ -+ fuse_module_factory_t fuse_module_ ## name_ ## _factory = factory_ -+ -+/** Get session from fuse object */ -+struct fuse_session *fuse_get_session(struct fuse *f); -+ -+/** -+ * Open a FUSE file descriptor and set up the mount for the given -+ * mountpoint and flags. -+ * -+ * @param mountpoint reference to the mount in the file system -+ * @param options mount options -+ * @return the FUSE file descriptor or -1 upon error -+ */ -+int fuse_open_channel(const char *mountpoint, const char *options); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* FUSE_H_ */ -diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h -new file mode 100644 -index 0000000..2d686b2 ---- /dev/null -+++ b/tools/virtiofsd/fuse_common.h -@@ -0,0 +1,823 @@ -+/* FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB. -+*/ -+ -+/** @file */ -+ -+#if !defined(FUSE_H_) && !defined(FUSE_LOWLEVEL_H_) -+#error "Never include directly; use or instead." -+#endif -+ -+#ifndef FUSE_COMMON_H_ -+#define FUSE_COMMON_H_ -+ -+#include "fuse_opt.h" -+#include "fuse_log.h" -+#include -+#include -+ -+/** Major version of FUSE library interface */ -+#define FUSE_MAJOR_VERSION 3 -+ -+/** Minor version of FUSE library interface */ -+#define FUSE_MINOR_VERSION 2 -+ -+#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) -+#define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/** -+ * Information about an open file. -+ * -+ * File Handles are created by the open, opendir, and create methods and closed -+ * by the release and releasedir methods. Multiple file handles may be -+ * concurrently open for the same file. Generally, a client will create one -+ * file handle per file descriptor, though in some cases multiple file -+ * descriptors can share a single file handle. -+ */ -+struct fuse_file_info { -+ /** Open flags. Available in open() and release() */ -+ int flags; -+ -+ /** In case of a write operation indicates if this was caused -+ by a delayed write from the page cache. If so, then the -+ context's pid, uid, and gid fields will not be valid, and -+ the *fh* value may not match the *fh* value that would -+ have been sent with the corresponding individual write -+ requests if write caching had been disabled. */ -+ unsigned int writepage : 1; -+ -+ /** Can be filled in by open, to use direct I/O on this file. */ -+ unsigned int direct_io : 1; -+ -+ /** Can be filled in by open. It signals the kernel that any -+ currently cached file data (ie., data that the filesystem -+ provided the last time the file was open) need not be -+ invalidated. Has no effect when set in other contexts (in -+ particular it does nothing when set by opendir()). */ -+ unsigned int keep_cache : 1; -+ -+ /** Indicates a flush operation. Set in flush operation, also -+ maybe set in highlevel lock operation and lowlevel release -+ operation. */ -+ unsigned int flush : 1; -+ -+ /** Can be filled in by open, to indicate that the file is not -+ seekable. */ -+ unsigned int nonseekable : 1; -+ -+ /* Indicates that flock locks for this file should be -+ released. If set, lock_owner shall contain a valid value. -+ May only be set in ->release(). */ -+ unsigned int flock_release : 1; -+ -+ /** Can be filled in by opendir. It signals the kernel to -+ enable caching of entries returned by readdir(). Has no -+ effect when set in other contexts (in particular it does -+ nothing when set by open()). */ -+ unsigned int cache_readdir : 1; -+ -+ /** Padding. Reserved for future use*/ -+ unsigned int padding : 25; -+ unsigned int padding2 : 32; -+ -+ /** File handle id. May be filled in by filesystem in create, -+ * open, and opendir(). Available in most other file operations on the -+ * same file handle. */ -+ uint64_t fh; -+ -+ /** Lock owner id. Available in locking operations and flush */ -+ uint64_t lock_owner; -+ -+ /** Requested poll events. Available in ->poll. Only set on kernels -+ which support it. If unsupported, this field is set to zero. */ -+ uint32_t poll_events; -+}; -+ -+/** -+ * Configuration parameters passed to fuse_session_loop_mt() and -+ * fuse_loop_mt(). -+ */ -+struct fuse_loop_config { -+ /** -+ * whether to use separate device fds for each thread -+ * (may increase performance) -+ */ -+ int clone_fd; -+ -+ /** -+ * The maximum number of available worker threads before they -+ * start to get deleted when they become idle. If not -+ * specified, the default is 10. -+ * -+ * Adjusting this has performance implications; a very small number -+ * of threads in the pool will cause a lot of thread creation and -+ * deletion overhead and performance may suffer. When set to 0, a new -+ * thread will be created to service every operation. -+ */ -+ unsigned int max_idle_threads; -+}; -+ -+/************************************************************************** -+ * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' * -+ **************************************************************************/ -+ -+/** -+ * Indicates that the filesystem supports asynchronous read requests. -+ * -+ * If this capability is not requested/available, the kernel will -+ * ensure that there is at most one pending read request per -+ * file-handle at any time, and will attempt to order read requests by -+ * increasing offset. -+ * -+ * This feature is enabled by default when supported by the kernel. -+ */ -+#define FUSE_CAP_ASYNC_READ (1 << 0) -+ -+/** -+ * Indicates that the filesystem supports "remote" locking. -+ * -+ * This feature is enabled by default when supported by the kernel, -+ * and if getlk() and setlk() handlers are implemented. -+ */ -+#define FUSE_CAP_POSIX_LOCKS (1 << 1) -+ -+/** -+ * Indicates that the filesystem supports the O_TRUNC open flag. If -+ * disabled, and an application specifies O_TRUNC, fuse first calls -+ * truncate() and then open() with O_TRUNC filtered out. -+ * -+ * This feature is enabled by default when supported by the kernel. -+ */ -+#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3) -+ -+/** -+ * Indicates that the filesystem supports lookups of "." and "..". -+ * -+ * This feature is disabled by default. -+ */ -+#define FUSE_CAP_EXPORT_SUPPORT (1 << 4) -+ -+/** -+ * Indicates that the kernel should not apply the umask to the -+ * file mode on create operations. -+ * -+ * This feature is disabled by default. -+ */ -+#define FUSE_CAP_DONT_MASK (1 << 6) -+ -+/** -+ * Indicates that libfuse should try to use splice() when writing to -+ * the fuse device. This may improve performance. -+ * -+ * This feature is disabled by default. -+ */ -+#define FUSE_CAP_SPLICE_WRITE (1 << 7) -+ -+/** -+ * Indicates that libfuse should try to move pages instead of copying when -+ * writing to / reading from the fuse device. This may improve performance. -+ * -+ * This feature is disabled by default. -+ */ -+#define FUSE_CAP_SPLICE_MOVE (1 << 8) -+ -+/** -+ * Indicates that libfuse should try to use splice() when reading from -+ * the fuse device. This may improve performance. -+ * -+ * This feature is enabled by default when supported by the kernel and -+ * if the filesystem implements a write_buf() handler. -+ */ -+#define FUSE_CAP_SPLICE_READ (1 << 9) -+ -+/** -+ * If set, the calls to flock(2) will be emulated using POSIX locks and must -+ * then be handled by the filesystem's setlock() handler. -+ * -+ * If not set, flock(2) calls will be handled by the FUSE kernel module -+ * internally (so any access that does not go through the kernel cannot be taken -+ * into account). -+ * -+ * This feature is enabled by default when supported by the kernel and -+ * if the filesystem implements a flock() handler. -+ */ -+#define FUSE_CAP_FLOCK_LOCKS (1 << 10) -+ -+/** -+ * Indicates that the filesystem supports ioctl's on directories. -+ * -+ * This feature is enabled by default when supported by the kernel. -+ */ -+#define FUSE_CAP_IOCTL_DIR (1 << 11) -+ -+/** -+ * Traditionally, while a file is open the FUSE kernel module only -+ * asks the filesystem for an update of the file's attributes when a -+ * client attempts to read beyond EOF. This is unsuitable for -+ * e.g. network filesystems, where the file contents may change -+ * without the kernel knowing about it. -+ * -+ * If this flag is set, FUSE will check the validity of the attributes -+ * on every read. If the attributes are no longer valid (i.e., if the -+ * *attr_timeout* passed to fuse_reply_attr() or set in `struct -+ * fuse_entry_param` has passed), it will first issue a `getattr` -+ * request. If the new mtime differs from the previous value, any -+ * cached file *contents* will be invalidated as well. -+ * -+ * This flag should always be set when available. If all file changes -+ * go through the kernel, *attr_timeout* should be set to a very large -+ * number to avoid unnecessary getattr() calls. -+ * -+ * This feature is enabled by default when supported by the kernel. -+ */ -+#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12) -+ -+/** -+ * Indicates that the filesystem supports readdirplus. -+ * -+ * This feature is enabled by default when supported by the kernel and if the -+ * filesystem implements a readdirplus() handler. -+ */ -+#define FUSE_CAP_READDIRPLUS (1 << 13) -+ -+/** -+ * Indicates that the filesystem supports adaptive readdirplus. -+ * -+ * If FUSE_CAP_READDIRPLUS is not set, this flag has no effect. -+ * -+ * If FUSE_CAP_READDIRPLUS is set and this flag is not set, the kernel -+ * will always issue readdirplus() requests to retrieve directory -+ * contents. -+ * -+ * If FUSE_CAP_READDIRPLUS is set and this flag is set, the kernel -+ * will issue both readdir() and readdirplus() requests, depending on -+ * how much information is expected to be required. -+ * -+ * As of Linux 4.20, the algorithm is as follows: when userspace -+ * starts to read directory entries, issue a READDIRPLUS request to -+ * the filesystem. If any entry attributes have been looked up by the -+ * time userspace requests the next batch of entries continue with -+ * READDIRPLUS, otherwise switch to plain READDIR. This will reasult -+ * in eg plain "ls" triggering READDIRPLUS first then READDIR after -+ * that because it doesn't do lookups. "ls -l" should result in all -+ * READDIRPLUS, except if dentries are already cached. -+ * -+ * This feature is enabled by default when supported by the kernel and -+ * if the filesystem implements both a readdirplus() and a readdir() -+ * handler. -+ */ -+#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14) -+ -+/** -+ * Indicates that the filesystem supports asynchronous direct I/O submission. -+ * -+ * If this capability is not requested/available, the kernel will ensure that -+ * there is at most one pending read and one pending write request per direct -+ * I/O file-handle at any time. -+ * -+ * This feature is enabled by default when supported by the kernel. -+ */ -+#define FUSE_CAP_ASYNC_DIO (1 << 15) -+ -+/** -+ * Indicates that writeback caching should be enabled. This means that -+ * individual write request may be buffered and merged in the kernel -+ * before they are send to the filesystem. -+ * -+ * This feature is disabled by default. -+ */ -+#define FUSE_CAP_WRITEBACK_CACHE (1 << 16) -+ -+/** -+ * Indicates support for zero-message opens. If this flag is set in -+ * the `capable` field of the `fuse_conn_info` structure, then the -+ * filesystem may return `ENOSYS` from the open() handler to indicate -+ * success. Further attempts to open files will be handled in the -+ * kernel. (If this flag is not set, returning ENOSYS will be treated -+ * as an error and signaled to the caller). -+ * -+ * Setting (or unsetting) this flag in the `want` field has *no -+ * effect*. -+ */ -+#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17) -+ -+/** -+ * Indicates support for parallel directory operations. If this flag -+ * is unset, the FUSE kernel module will ensure that lookup() and -+ * readdir() requests are never issued concurrently for the same -+ * directory. -+ * -+ * This feature is enabled by default when supported by the kernel. -+ */ -+#define FUSE_CAP_PARALLEL_DIROPS (1 << 18) -+ -+/** -+ * Indicates support for POSIX ACLs. -+ * -+ * If this feature is enabled, the kernel will cache and have -+ * responsibility for enforcing ACLs. ACL will be stored as xattrs and -+ * passed to userspace, which is responsible for updating the ACLs in -+ * the filesystem, keeping the file mode in sync with the ACL, and -+ * ensuring inheritance of default ACLs when new filesystem nodes are -+ * created. Note that this requires that the file system is able to -+ * parse and interpret the xattr representation of ACLs. -+ * -+ * Enabling this feature implicitly turns on the -+ * ``default_permissions`` mount option (even if it was not passed to -+ * mount(2)). -+ * -+ * This feature is disabled by default. -+ */ -+#define FUSE_CAP_POSIX_ACL (1 << 19) -+ -+/** -+ * Indicates that the filesystem is responsible for unsetting -+ * setuid and setgid bits when a file is written, truncated, or -+ * its owner is changed. -+ * -+ * This feature is enabled by default when supported by the kernel. -+ */ -+#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20) -+ -+/** -+ * Indicates support for zero-message opendirs. If this flag is set in -+ * the `capable` field of the `fuse_conn_info` structure, then the filesystem -+ * may return `ENOSYS` from the opendir() handler to indicate success. Further -+ * opendir and releasedir messages will be handled in the kernel. (If this -+ * flag is not set, returning ENOSYS will be treated as an error and signalled -+ * to the caller.) -+ * -+ * Setting (or unsetting) this flag in the `want` field has *no effect*. -+ */ -+#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) -+ -+/** -+ * Ioctl flags -+ * -+ * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine -+ * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed -+ * FUSE_IOCTL_RETRY: retry with new iovecs -+ * FUSE_IOCTL_DIR: is a directory -+ * -+ * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs -+ */ -+#define FUSE_IOCTL_COMPAT (1 << 0) -+#define FUSE_IOCTL_UNRESTRICTED (1 << 1) -+#define FUSE_IOCTL_RETRY (1 << 2) -+#define FUSE_IOCTL_DIR (1 << 4) -+ -+#define FUSE_IOCTL_MAX_IOV 256 -+ -+/** -+ * Connection information, passed to the ->init() method -+ * -+ * Some of the elements are read-write, these can be changed to -+ * indicate the value requested by the filesystem. The requested -+ * value must usually be smaller than the indicated value. -+ */ -+struct fuse_conn_info { -+ /** -+ * Major version of the protocol (read-only) -+ */ -+ unsigned proto_major; -+ -+ /** -+ * Minor version of the protocol (read-only) -+ */ -+ unsigned proto_minor; -+ -+ /** -+ * Maximum size of the write buffer -+ */ -+ unsigned max_write; -+ -+ /** -+ * Maximum size of read requests. A value of zero indicates no -+ * limit. However, even if the filesystem does not specify a -+ * limit, the maximum size of read requests will still be -+ * limited by the kernel. -+ * -+ * NOTE: For the time being, the maximum size of read requests -+ * must be set both here *and* passed to fuse_session_new() -+ * using the ``-o max_read=`` mount option. At some point -+ * in the future, specifying the mount option will no longer -+ * be necessary. -+ */ -+ unsigned max_read; -+ -+ /** -+ * Maximum readahead -+ */ -+ unsigned max_readahead; -+ -+ /** -+ * Capability flags that the kernel supports (read-only) -+ */ -+ unsigned capable; -+ -+ /** -+ * Capability flags that the filesystem wants to enable. -+ * -+ * libfuse attempts to initialize this field with -+ * reasonable default values before calling the init() handler. -+ */ -+ unsigned want; -+ -+ /** -+ * Maximum number of pending "background" requests. A -+ * background request is any type of request for which the -+ * total number is not limited by other means. As of kernel -+ * 4.8, only two types of requests fall into this category: -+ * -+ * 1. Read-ahead requests -+ * 2. Asynchronous direct I/O requests -+ * -+ * Read-ahead requests are generated (if max_readahead is -+ * non-zero) by the kernel to preemptively fill its caches -+ * when it anticipates that userspace will soon read more -+ * data. -+ * -+ * Asynchronous direct I/O requests are generated if -+ * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large -+ * direct I/O request. In this case the kernel will internally -+ * split it up into multiple smaller requests and submit them -+ * to the filesystem concurrently. -+ * -+ * Note that the following requests are *not* background -+ * requests: writeback requests (limited by the kernel's -+ * flusher algorithm), regular (i.e., synchronous and -+ * buffered) userspace read/write requests (limited to one per -+ * thread), asynchronous read requests (Linux's io_submit(2) -+ * call actually blocks, so these are also limited to one per -+ * thread). -+ */ -+ unsigned max_background; -+ -+ /** -+ * Kernel congestion threshold parameter. If the number of pending -+ * background requests exceeds this number, the FUSE kernel module will -+ * mark the filesystem as "congested". This instructs the kernel to -+ * expect that queued requests will take some time to complete, and to -+ * adjust its algorithms accordingly (e.g. by putting a waiting thread -+ * to sleep instead of using a busy-loop). -+ */ -+ unsigned congestion_threshold; -+ -+ /** -+ * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible -+ * for updating mtime and ctime when write requests are received. The -+ * updated values are passed to the filesystem with setattr() requests. -+ * However, if the filesystem does not support the full resolution of -+ * the kernel timestamps (nanoseconds), the mtime and ctime values used -+ * by kernel and filesystem will differ (and result in an apparent -+ * change of times after a cache flush). -+ * -+ * To prevent this problem, this variable can be used to inform the -+ * kernel about the timestamp granularity supported by the file-system. -+ * The value should be power of 10. The default is 1, i.e. full -+ * nano-second resolution. Filesystems supporting only second resolution -+ * should set this to 1000000000. -+ */ -+ unsigned time_gran; -+ -+ /** -+ * For future use. -+ */ -+ unsigned reserved[22]; -+}; -+ -+struct fuse_session; -+struct fuse_pollhandle; -+struct fuse_conn_info_opts; -+ -+/** -+ * This function parses several command-line options that can be used -+ * to override elements of struct fuse_conn_info. The pointer returned -+ * by this function should be passed to the -+ * fuse_apply_conn_info_opts() method by the file system's init() -+ * handler. -+ * -+ * Before using this function, think twice if you really want these -+ * parameters to be adjustable from the command line. In most cases, -+ * they should be determined by the file system internally. -+ * -+ * The following options are recognized: -+ * -+ * -o max_write=N sets conn->max_write -+ * -o max_readahead=N sets conn->max_readahead -+ * -o max_background=N sets conn->max_background -+ * -o congestion_threshold=N sets conn->congestion_threshold -+ * -o async_read sets FUSE_CAP_ASYNC_READ in conn->want -+ * -o sync_read unsets FUSE_CAP_ASYNC_READ in conn->want -+ * -o atomic_o_trunc sets FUSE_CAP_ATOMIC_O_TRUNC in conn->want -+ * -o no_remote_lock Equivalent to -o no_remote_flock,no_remote_posix_lock -+ * -o no_remote_flock Unsets FUSE_CAP_FLOCK_LOCKS in conn->want -+ * -o no_remote_posix_lock Unsets FUSE_CAP_POSIX_LOCKS in conn->want -+ * -o [no_]splice_write (un-)sets FUSE_CAP_SPLICE_WRITE in conn->want -+ * -o [no_]splice_move (un-)sets FUSE_CAP_SPLICE_MOVE in conn->want -+ * -o [no_]splice_read (un-)sets FUSE_CAP_SPLICE_READ in conn->want -+ * -o [no_]auto_inval_data (un-)sets FUSE_CAP_AUTO_INVAL_DATA in conn->want -+ * -o readdirplus=no unsets FUSE_CAP_READDIRPLUS in conn->want -+ * -o readdirplus=yes sets FUSE_CAP_READDIRPLUS and unsets -+ * FUSE_CAP_READDIRPLUS_AUTO in conn->want -+ * -o readdirplus=auto sets FUSE_CAP_READDIRPLUS and -+ * FUSE_CAP_READDIRPLUS_AUTO in conn->want -+ * -o [no_]async_dio (un-)sets FUSE_CAP_ASYNC_DIO in conn->want -+ * -o [no_]writeback_cache (un-)sets FUSE_CAP_WRITEBACK_CACHE in conn->want -+ * -o time_gran=N sets conn->time_gran -+ * -+ * Known options will be removed from *args*, unknown options will be -+ * passed through unchanged. -+ * -+ * @param args argument vector (input+output) -+ * @return parsed options -+ **/ -+struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args); -+ -+/** -+ * This function applies the (parsed) parameters in *opts* to the -+ * *conn* pointer. It may modify the following fields: wants, -+ * max_write, max_readahead, congestion_threshold, max_background, -+ * time_gran. A field is only set (or unset) if the corresponding -+ * option has been explicitly set. -+ */ -+void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, -+ struct fuse_conn_info *conn); -+ -+/** -+ * Go into the background -+ * -+ * @param foreground if true, stay in the foreground -+ * @return 0 on success, -1 on failure -+ */ -+int fuse_daemonize(int foreground); -+ -+/** -+ * Get the version of the library -+ * -+ * @return the version -+ */ -+int fuse_version(void); -+ -+/** -+ * Get the full package version string of the library -+ * -+ * @return the package version -+ */ -+const char *fuse_pkgversion(void); -+ -+/** -+ * Destroy poll handle -+ * -+ * @param ph the poll handle -+ */ -+void fuse_pollhandle_destroy(struct fuse_pollhandle *ph); -+ -+/* ----------------------------------------------------------- * -+ * Data buffer * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Buffer flags -+ */ -+enum fuse_buf_flags { -+ /** -+ * Buffer contains a file descriptor -+ * -+ * If this flag is set, the .fd field is valid, otherwise the -+ * .mem fields is valid. -+ */ -+ FUSE_BUF_IS_FD = (1 << 1), -+ -+ /** -+ * Seek on the file descriptor -+ * -+ * If this flag is set then the .pos field is valid and is -+ * used to seek to the given offset before performing -+ * operation on file descriptor. -+ */ -+ FUSE_BUF_FD_SEEK = (1 << 2), -+ -+ /** -+ * Retry operation on file descriptor -+ * -+ * If this flag is set then retry operation on file descriptor -+ * until .size bytes have been copied or an error or EOF is -+ * detected. -+ */ -+ FUSE_BUF_FD_RETRY = (1 << 3), -+}; -+ -+/** -+ * Buffer copy flags -+ */ -+enum fuse_buf_copy_flags { -+ /** -+ * Don't use splice(2) -+ * -+ * Always fall back to using read and write instead of -+ * splice(2) to copy data from one file descriptor to another. -+ * -+ * If this flag is not set, then only fall back if splice is -+ * unavailable. -+ */ -+ FUSE_BUF_NO_SPLICE = (1 << 1), -+ -+ /** -+ * Force splice -+ * -+ * Always use splice(2) to copy data from one file descriptor -+ * to another. If splice is not available, return -EINVAL. -+ */ -+ FUSE_BUF_FORCE_SPLICE = (1 << 2), -+ -+ /** -+ * Try to move data with splice. -+ * -+ * If splice is used, try to move pages from the source to the -+ * destination instead of copying. See documentation of -+ * SPLICE_F_MOVE in splice(2) man page. -+ */ -+ FUSE_BUF_SPLICE_MOVE = (1 << 3), -+ -+ /** -+ * Don't block on the pipe when copying data with splice -+ * -+ * Makes the operations on the pipe non-blocking (if the pipe -+ * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) -+ * man page. -+ */ -+ FUSE_BUF_SPLICE_NONBLOCK= (1 << 4), -+}; -+ -+/** -+ * Single data buffer -+ * -+ * Generic data buffer for I/O, extended attributes, etc... Data may -+ * be supplied as a memory pointer or as a file descriptor -+ */ -+struct fuse_buf { -+ /** -+ * Size of data in bytes -+ */ -+ size_t size; -+ -+ /** -+ * Buffer flags -+ */ -+ enum fuse_buf_flags flags; -+ -+ /** -+ * Memory pointer -+ * -+ * Used unless FUSE_BUF_IS_FD flag is set. -+ */ -+ void *mem; -+ -+ /** -+ * File descriptor -+ * -+ * Used if FUSE_BUF_IS_FD flag is set. -+ */ -+ int fd; -+ -+ /** -+ * File position -+ * -+ * Used if FUSE_BUF_FD_SEEK flag is set. -+ */ -+ off_t pos; -+}; -+ -+/** -+ * Data buffer vector -+ * -+ * An array of data buffers, each containing a memory pointer or a -+ * file descriptor. -+ * -+ * Allocate dynamically to add more than one buffer. -+ */ -+struct fuse_bufvec { -+ /** -+ * Number of buffers in the array -+ */ -+ size_t count; -+ -+ /** -+ * Index of current buffer within the array -+ */ -+ size_t idx; -+ -+ /** -+ * Current offset within the current buffer -+ */ -+ size_t off; -+ -+ /** -+ * Array of buffers -+ */ -+ struct fuse_buf buf[1]; -+}; -+ -+/* Initialize bufvec with a single buffer of given size */ -+#define FUSE_BUFVEC_INIT(size__) \ -+ ((struct fuse_bufvec) { \ -+ /* .count= */ 1, \ -+ /* .idx = */ 0, \ -+ /* .off = */ 0, \ -+ /* .buf = */ { /* [0] = */ { \ -+ /* .size = */ (size__), \ -+ /* .flags = */ (enum fuse_buf_flags) 0, \ -+ /* .mem = */ NULL, \ -+ /* .fd = */ -1, \ -+ /* .pos = */ 0, \ -+ } } \ -+ } ) -+ -+/** -+ * Get total size of data in a fuse buffer vector -+ * -+ * @param bufv buffer vector -+ * @return size of data -+ */ -+size_t fuse_buf_size(const struct fuse_bufvec *bufv); -+ -+/** -+ * Copy data from one buffer vector to another -+ * -+ * @param dst destination buffer vector -+ * @param src source buffer vector -+ * @param flags flags controlling the copy -+ * @return actual number of bytes copied or -errno on error -+ */ -+ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src, -+ enum fuse_buf_copy_flags flags); -+ -+/* ----------------------------------------------------------- * -+ * Signal handling * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Exit session on HUP, TERM and INT signals and ignore PIPE signal -+ * -+ * Stores session in a global variable. May only be called once per -+ * process until fuse_remove_signal_handlers() is called. -+ * -+ * Once either of the POSIX signals arrives, the signal handler calls -+ * fuse_session_exit(). -+ * -+ * @param se the session to exit -+ * @return 0 on success, -1 on failure -+ * -+ * See also: -+ * fuse_remove_signal_handlers() -+ */ -+int fuse_set_signal_handlers(struct fuse_session *se); -+ -+/** -+ * Restore default signal handlers -+ * -+ * Resets global session. After this fuse_set_signal_handlers() may -+ * be called again. -+ * -+ * @param se the same session as given in fuse_set_signal_handlers() -+ * -+ * See also: -+ * fuse_set_signal_handlers() -+ */ -+void fuse_remove_signal_handlers(struct fuse_session *se); -+ -+/* ----------------------------------------------------------- * -+ * Compatibility stuff * -+ * ----------------------------------------------------------- */ -+ -+#if !defined(FUSE_USE_VERSION) || FUSE_USE_VERSION < 30 -+# error only API version 30 or greater is supported -+#endif -+ -+#ifdef __cplusplus -+} -+#endif -+ -+ -+/* -+ * This interface uses 64 bit off_t. -+ * -+ * On 32bit systems please add -D_FILE_OFFSET_BITS=64 to your compile flags! -+ */ -+ -+#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus -+_Static_assert(sizeof(off_t) == 8, "fuse: off_t must be 64bit"); -+#else -+struct _fuse_off_t_must_be_64bit_dummy_struct \ -+ { unsigned _fuse_off_t_must_be_64bit:((sizeof(off_t) == 8) ? 1 : -1); }; -+#endif -+ -+#endif /* FUSE_COMMON_H_ */ -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -new file mode 100644 -index 0000000..d38b630 ---- /dev/null -+++ b/tools/virtiofsd/fuse_i.h -@@ -0,0 +1,139 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB -+*/ -+ -+#include "fuse.h" -+#include "fuse_lowlevel.h" -+ -+struct mount_opts; -+ -+struct fuse_req { -+ struct fuse_session *se; -+ uint64_t unique; -+ int ctr; -+ pthread_mutex_t lock; -+ struct fuse_ctx ctx; -+ struct fuse_chan *ch; -+ int interrupted; -+ unsigned int ioctl_64bit : 1; -+ union { -+ struct { -+ uint64_t unique; -+ } i; -+ struct { -+ fuse_interrupt_func_t func; -+ void *data; -+ } ni; -+ } u; -+ struct fuse_req *next; -+ struct fuse_req *prev; -+}; -+ -+struct fuse_notify_req { -+ uint64_t unique; -+ void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t, -+ const void *, const struct fuse_buf *); -+ struct fuse_notify_req *next; -+ struct fuse_notify_req *prev; -+}; -+ -+struct fuse_session { -+ char *mountpoint; -+ volatile int exited; -+ int fd; -+ struct mount_opts *mo; -+ int debug; -+ int deny_others; -+ struct fuse_lowlevel_ops op; -+ int got_init; -+ struct cuse_data *cuse_data; -+ void *userdata; -+ uid_t owner; -+ struct fuse_conn_info conn; -+ struct fuse_req list; -+ struct fuse_req interrupts; -+ pthread_mutex_t lock; -+ int got_destroy; -+ pthread_key_t pipe_key; -+ int broken_splice_nonblock; -+ uint64_t notify_ctr; -+ struct fuse_notify_req notify_list; -+ size_t bufsize; -+ int error; -+}; -+ -+struct fuse_chan { -+ pthread_mutex_t lock; -+ int ctr; -+ int fd; -+}; -+ -+/** -+ * Filesystem module -+ * -+ * Filesystem modules are registered with the FUSE_REGISTER_MODULE() -+ * macro. -+ * -+ */ -+struct fuse_module { -+ char *name; -+ fuse_module_factory_t factory; -+ struct fuse_module *next; -+ struct fusemod_so *so; -+ int ctr; -+}; -+ -+/* ----------------------------------------------------------- * -+ * Channel interface (when using -o clone_fd) * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Obtain counted reference to the channel -+ * -+ * @param ch the channel -+ * @return the channel -+ */ -+struct fuse_chan *fuse_chan_get(struct fuse_chan *ch); -+ -+/** -+ * Drop counted reference to a channel -+ * -+ * @param ch the channel -+ */ -+void fuse_chan_put(struct fuse_chan *ch); -+ -+struct mount_opts *parse_mount_opts(struct fuse_args *args); -+void destroy_mount_opts(struct mount_opts *mo); -+void fuse_mount_version(void); -+unsigned get_max_read(struct mount_opts *o); -+void fuse_kern_unmount(const char *mountpoint, int fd); -+int fuse_kern_mount(const char *mountpoint, struct mount_opts *mo); -+ -+int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, -+ int count); -+void fuse_free_req(fuse_req_t req); -+ -+void cuse_lowlevel_init(fuse_req_t req, fuse_ino_t nodeide, const void *inarg); -+ -+int fuse_start_thread(pthread_t *thread_id, void *(*func)(void *), void *arg); -+ -+int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, -+ struct fuse_chan *ch); -+void fuse_session_process_buf_int(struct fuse_session *se, -+ const struct fuse_buf *buf, struct fuse_chan *ch); -+ -+struct fuse *fuse_new_31(struct fuse_args *args, const struct fuse_operations *op, -+ size_t op_size, void *private_data); -+int fuse_loop_mt_32(struct fuse *f, struct fuse_loop_config *config); -+int fuse_session_loop_mt_32(struct fuse_session *se, struct fuse_loop_config *config); -+ -+#define FUSE_MAX_MAX_PAGES 256 -+#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32 -+ -+/* room needed in buffer to accommodate header */ -+#define FUSE_BUFFER_HEADER_SIZE 0x1000 -+ -diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h -new file mode 100644 -index 0000000..5e112e0 ---- /dev/null -+++ b/tools/virtiofsd/fuse_log.h -@@ -0,0 +1,82 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2019 Red Hat, Inc. -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB. -+*/ -+ -+#ifndef FUSE_LOG_H_ -+#define FUSE_LOG_H_ -+ -+/** @file -+ * -+ * This file defines the logging interface of FUSE -+ */ -+ -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/** -+ * Log severity level -+ * -+ * These levels correspond to syslog(2) log levels since they are widely used. -+ */ -+enum fuse_log_level { -+ FUSE_LOG_EMERG, -+ FUSE_LOG_ALERT, -+ FUSE_LOG_CRIT, -+ FUSE_LOG_ERR, -+ FUSE_LOG_WARNING, -+ FUSE_LOG_NOTICE, -+ FUSE_LOG_INFO, -+ FUSE_LOG_DEBUG -+}; -+ -+/** -+ * Log message handler function. -+ * -+ * This function must be thread-safe. It may be called from any libfuse -+ * function, including fuse_parse_cmdline() and other functions invoked before -+ * a FUSE filesystem is created. -+ * -+ * Install a custom log message handler function using fuse_set_log_func(). -+ * -+ * @param level log severity level -+ * @param fmt sprintf-style format string including newline -+ * @param ap format string arguments -+ */ -+typedef void (*fuse_log_func_t)(enum fuse_log_level level, -+ const char *fmt, va_list ap); -+ -+/** -+ * Install a custom log handler function. -+ * -+ * Log messages are emitted by libfuse functions to report errors and debug -+ * information. Messages are printed to stderr by default but this can be -+ * overridden by installing a custom log message handler function. -+ * -+ * The log message handler function is global and affects all FUSE filesystems -+ * created within this process. -+ * -+ * @param func a custom log message handler function or NULL to revert to -+ * the default -+ */ -+void fuse_set_log_func(fuse_log_func_t func); -+ -+/** -+ * Emit a log message -+ * -+ * @param level severity level (FUSE_LOG_ERR, FUSE_LOG_DEBUG, etc) -+ * @param fmt sprintf-style format string including newline -+ */ -+void fuse_log(enum fuse_log_level level, const char *fmt, ...); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* FUSE_LOG_H_ */ -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -new file mode 100644 -index 0000000..18c6363 ---- /dev/null -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -0,0 +1,2089 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB. -+*/ -+ -+#ifndef FUSE_LOWLEVEL_H_ -+#define FUSE_LOWLEVEL_H_ -+ -+/** @file -+ * -+ * Low level API -+ * -+ * IMPORTANT: you should define FUSE_USE_VERSION before including this -+ * header. To use the newest API define it to 31 (recommended for any -+ * new application). -+ */ -+ -+#ifndef FUSE_USE_VERSION -+#error FUSE_USE_VERSION not defined -+#endif -+ -+#include "fuse_common.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* ----------------------------------------------------------- * -+ * Miscellaneous definitions * -+ * ----------------------------------------------------------- */ -+ -+/** The node ID of the root inode */ -+#define FUSE_ROOT_ID 1 -+ -+/** Inode number type */ -+typedef uint64_t fuse_ino_t; -+ -+/** Request pointer type */ -+typedef struct fuse_req *fuse_req_t; -+ -+/** -+ * Session -+ * -+ * This provides hooks for processing requests, and exiting -+ */ -+struct fuse_session; -+ -+/** Directory entry parameters supplied to fuse_reply_entry() */ -+struct fuse_entry_param { -+ /** Unique inode number -+ * -+ * In lookup, zero means negative entry (from version 2.5) -+ * Returning ENOENT also means negative entry, but by setting zero -+ * ino the kernel may cache negative entries for entry_timeout -+ * seconds. -+ */ -+ fuse_ino_t ino; -+ -+ /** Generation number for this entry. -+ * -+ * If the file system will be exported over NFS, the -+ * ino/generation pairs need to be unique over the file -+ * system's lifetime (rather than just the mount time). So if -+ * the file system reuses an inode after it has been deleted, -+ * it must assign a new, previously unused generation number -+ * to the inode at the same time. -+ * -+ */ -+ uint64_t generation; -+ -+ /** Inode attributes. -+ * -+ * Even if attr_timeout == 0, attr must be correct. For example, -+ * for open(), FUSE uses attr.st_size from lookup() to determine -+ * how many bytes to request. If this value is not correct, -+ * incorrect data will be returned. -+ */ -+ struct stat attr; -+ -+ /** Validity timeout (in seconds) for inode attributes. If -+ attributes only change as a result of requests that come -+ through the kernel, this should be set to a very large -+ value. */ -+ double attr_timeout; -+ -+ /** Validity timeout (in seconds) for the name. If directory -+ entries are changed/deleted only as a result of requests -+ that come through the kernel, this should be set to a very -+ large value. */ -+ double entry_timeout; -+}; -+ -+/** -+ * Additional context associated with requests. -+ * -+ * Note that the reported client uid, gid and pid may be zero in some -+ * situations. For example, if the FUSE file system is running in a -+ * PID or user namespace but then accessed from outside the namespace, -+ * there is no valid uid/pid/gid that could be reported. -+ */ -+struct fuse_ctx { -+ /** User ID of the calling process */ -+ uid_t uid; -+ -+ /** Group ID of the calling process */ -+ gid_t gid; -+ -+ /** Thread ID of the calling process */ -+ pid_t pid; -+ -+ /** Umask of the calling process */ -+ mode_t umask; -+}; -+ -+struct fuse_forget_data { -+ fuse_ino_t ino; -+ uint64_t nlookup; -+}; -+ -+/* 'to_set' flags in setattr */ -+#define FUSE_SET_ATTR_MODE (1 << 0) -+#define FUSE_SET_ATTR_UID (1 << 1) -+#define FUSE_SET_ATTR_GID (1 << 2) -+#define FUSE_SET_ATTR_SIZE (1 << 3) -+#define FUSE_SET_ATTR_ATIME (1 << 4) -+#define FUSE_SET_ATTR_MTIME (1 << 5) -+#define FUSE_SET_ATTR_ATIME_NOW (1 << 7) -+#define FUSE_SET_ATTR_MTIME_NOW (1 << 8) -+#define FUSE_SET_ATTR_CTIME (1 << 10) -+ -+/* ----------------------------------------------------------- * -+ * Request methods and replies * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Low level filesystem operations -+ * -+ * Most of the methods (with the exception of init and destroy) -+ * receive a request handle (fuse_req_t) as their first argument. -+ * This handle must be passed to one of the specified reply functions. -+ * -+ * This may be done inside the method invocation, or after the call -+ * has returned. The request handle is valid until one of the reply -+ * functions is called. -+ * -+ * Other pointer arguments (name, fuse_file_info, etc) are not valid -+ * after the call has returned, so if they are needed later, their -+ * contents have to be copied. -+ * -+ * In general, all methods are expected to perform any necessary -+ * permission checking. However, a filesystem may delegate this task -+ * to the kernel by passing the `default_permissions` mount option to -+ * `fuse_session_new()`. In this case, methods will only be called if -+ * the kernel's permission check has succeeded. -+ * -+ * The filesystem sometimes needs to handle a return value of -ENOENT -+ * from the reply function, which means, that the request was -+ * interrupted, and the reply discarded. For example if -+ * fuse_reply_open() return -ENOENT means, that the release method for -+ * this file will not be called. -+ */ -+struct fuse_lowlevel_ops { -+ /** -+ * Initialize filesystem -+ * -+ * This function is called when libfuse establishes -+ * communication with the FUSE kernel module. The file system -+ * should use this module to inspect and/or modify the -+ * connection parameters provided in the `conn` structure. -+ * -+ * Note that some parameters may be overwritten by options -+ * passed to fuse_session_new() which take precedence over the -+ * values set in this handler. -+ * -+ * There's no reply to this function -+ * -+ * @param userdata the user data passed to fuse_session_new() -+ */ -+ void (*init) (void *userdata, struct fuse_conn_info *conn); -+ -+ /** -+ * Clean up filesystem. -+ * -+ * Called on filesystem exit. When this method is called, the -+ * connection to the kernel may be gone already, so that eg. calls -+ * to fuse_lowlevel_notify_* will fail. -+ * -+ * There's no reply to this function -+ * -+ * @param userdata the user data passed to fuse_session_new() -+ */ -+ void (*destroy) (void *userdata); -+ -+ /** -+ * Look up a directory entry by name and get its attributes. -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name the name to look up -+ */ -+ void (*lookup) (fuse_req_t req, fuse_ino_t parent, const char *name); -+ -+ /** -+ * Forget about an inode -+ * -+ * This function is called when the kernel removes an inode -+ * from its internal caches. -+ * -+ * The inode's lookup count increases by one for every call to -+ * fuse_reply_entry and fuse_reply_create. The nlookup parameter -+ * indicates by how much the lookup count should be decreased. -+ * -+ * Inodes with a non-zero lookup count may receive request from -+ * the kernel even after calls to unlink, rmdir or (when -+ * overwriting an existing file) rename. Filesystems must handle -+ * such requests properly and it is recommended to defer removal -+ * of the inode until the lookup count reaches zero. Calls to -+ * unlink, rmdir or rename will be followed closely by forget -+ * unless the file or directory is open, in which case the -+ * kernel issues forget only after the release or releasedir -+ * calls. -+ * -+ * Note that if a file system will be exported over NFS the -+ * inodes lifetime must extend even beyond forget. See the -+ * generation field in struct fuse_entry_param above. -+ * -+ * On unmount the lookup count for all inodes implicitly drops -+ * to zero. It is not guaranteed that the file system will -+ * receive corresponding forget messages for the affected -+ * inodes. -+ * -+ * Valid replies: -+ * fuse_reply_none -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param nlookup the number of lookups to forget -+ */ -+ void (*forget) (fuse_req_t req, fuse_ino_t ino, uint64_t nlookup); -+ -+ /** -+ * Get file attributes. -+ * -+ * If writeback caching is enabled, the kernel may have a -+ * better idea of a file's length than the FUSE file system -+ * (eg if there has been a write that extended the file size, -+ * but that has not yet been passed to the filesystem.n -+ * -+ * In this case, the st_size value provided by the file system -+ * will be ignored. -+ * -+ * Valid replies: -+ * fuse_reply_attr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi for future use, currently always NULL -+ */ -+ void (*getattr) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Set file attributes -+ * -+ * In the 'attr' argument only members indicated by the 'to_set' -+ * bitmask contain valid values. Other members contain undefined -+ * values. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits if the file -+ * size or owner is being changed. -+ * -+ * If the setattr was invoked from the ftruncate() system call -+ * under Linux kernel versions 2.6.15 or later, the fi->fh will -+ * contain the value set by the open method or will be undefined -+ * if the open method didn't set any value. Otherwise (not -+ * ftruncate call, or kernel version earlier than 2.6.15) the fi -+ * parameter will be NULL. -+ * -+ * Valid replies: -+ * fuse_reply_attr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param attr the attributes -+ * @param to_set bit mask of attributes which should be set -+ * @param fi file information, or NULL -+ */ -+ void (*setattr) (fuse_req_t req, fuse_ino_t ino, struct stat *attr, -+ int to_set, struct fuse_file_info *fi); -+ -+ /** -+ * Read symbolic link -+ * -+ * Valid replies: -+ * fuse_reply_readlink -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ */ -+ void (*readlink) (fuse_req_t req, fuse_ino_t ino); -+ -+ /** -+ * Create file node -+ * -+ * Create a regular file, character device, block device, fifo or -+ * socket node. -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to create -+ * @param mode file type and mode with which to create the new file -+ * @param rdev the device number (only valid if created file is a device) -+ */ -+ void (*mknod) (fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode, dev_t rdev); -+ -+ /** -+ * Create a directory -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to create -+ * @param mode with which to create the new file -+ */ -+ void (*mkdir) (fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode); -+ -+ /** -+ * Remove a file -+ * -+ * If the file's inode's lookup count is non-zero, the file -+ * system is expected to postpone any removal of the inode -+ * until the lookup count reaches zero (see description of the -+ * forget function). -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to remove -+ */ -+ void (*unlink) (fuse_req_t req, fuse_ino_t parent, const char *name); -+ -+ /** -+ * Remove a directory -+ * -+ * If the directory's inode's lookup count is non-zero, the -+ * file system is expected to postpone any removal of the -+ * inode until the lookup count reaches zero (see description -+ * of the forget function). -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to remove -+ */ -+ void (*rmdir) (fuse_req_t req, fuse_ino_t parent, const char *name); -+ -+ /** -+ * Create a symbolic link -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param link the contents of the symbolic link -+ * @param parent inode number of the parent directory -+ * @param name to create -+ */ -+ void (*symlink) (fuse_req_t req, const char *link, fuse_ino_t parent, -+ const char *name); -+ -+ /** Rename a file -+ * -+ * If the target exists it should be atomically replaced. If -+ * the target's inode's lookup count is non-zero, the file -+ * system is expected to postpone any removal of the inode -+ * until the lookup count reaches zero (see description of the -+ * forget function). -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EINVAL, i.e. all -+ * future bmap requests will fail with EINVAL without being -+ * send to the filesystem process. -+ * -+ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If -+ * RENAME_NOREPLACE is specified, the filesystem must not -+ * overwrite *newname* if it exists and return an error -+ * instead. If `RENAME_EXCHANGE` is specified, the filesystem -+ * must atomically exchange the two files, i.e. both must -+ * exist and neither may be deleted. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the old parent directory -+ * @param name old name -+ * @param newparent inode number of the new parent directory -+ * @param newname new name -+ */ -+ void (*rename) (fuse_req_t req, fuse_ino_t parent, const char *name, -+ fuse_ino_t newparent, const char *newname, -+ unsigned int flags); -+ -+ /** -+ * Create a hard link -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the old inode number -+ * @param newparent inode number of the new parent directory -+ * @param newname new name to create -+ */ -+ void (*link) (fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, -+ const char *newname); -+ -+ /** -+ * Open a file -+ * -+ * Open flags are available in fi->flags. The following rules -+ * apply. -+ * -+ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be -+ * filtered out / handled by the kernel. -+ * -+ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used -+ * by the filesystem to check if the operation is -+ * permitted. If the ``-o default_permissions`` mount -+ * option is given, this check is already done by the -+ * kernel before calling open() and may thus be omitted by -+ * the filesystem. -+ * -+ * - When writeback caching is enabled, the kernel may send -+ * read requests even for files opened with O_WRONLY. The -+ * filesystem should be prepared to handle this. -+ * -+ * - When writeback caching is disabled, the filesystem is -+ * expected to properly handle the O_APPEND flag and ensure -+ * that each write is appending to the end of the file. -+ * -+ * - When writeback caching is enabled, the kernel will -+ * handle O_APPEND. However, unless all changes to the file -+ * come through the kernel this will not work reliably. The -+ * filesystem should thus either ignore the O_APPEND flag -+ * (and let the kernel handle it), or return an error -+ * (indicating that reliably O_APPEND is not available). -+ * -+ * Filesystem may store an arbitrary file handle (pointer, -+ * index, etc) in fi->fh, and use this in other all other file -+ * operations (read, write, flush, release, fsync). -+ * -+ * Filesystem may also implement stateless file I/O and not store -+ * anything in fi->fh. -+ * -+ * There are also some flags (direct_io, keep_cache) which the -+ * filesystem may set in fi, to change the way the file is opened. -+ * See fuse_file_info structure in for more details. -+ * -+ * If this request is answered with an error code of ENOSYS -+ * and FUSE_CAP_NO_OPEN_SUPPORT is set in -+ * `fuse_conn_info.capable`, this is treated as success and -+ * future calls to open and release will also succeed without being -+ * sent to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_open -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*open) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Read data -+ * -+ * Read should send exactly the number of bytes requested except -+ * on EOF or error, otherwise the rest of the data will be -+ * substituted with zeroes. An exception to this is when the file -+ * has been opened in 'direct_io' mode, in which case the return -+ * value of the read system call will reflect the return value of -+ * this operation. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_iov -+ * fuse_reply_data -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size number of bytes to read -+ * @param off offset to read from -+ * @param fi file information -+ */ -+ void (*read) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Write data -+ * -+ * Write should return exactly the number of bytes requested -+ * except on error. An exception to this is when the file has -+ * been opened in 'direct_io' mode, in which case the return value -+ * of the write system call will reflect the return value of this -+ * operation. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * -+ * Valid replies: -+ * fuse_reply_write -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param buf data to write -+ * @param size number of bytes to write -+ * @param off offset to write to -+ * @param fi file information -+ */ -+ void (*write) (fuse_req_t req, fuse_ino_t ino, const char *buf, -+ size_t size, off_t off, struct fuse_file_info *fi); -+ -+ /** -+ * Flush method -+ * -+ * This is called on each close() of the opened file. -+ * -+ * Since file descriptors can be duplicated (dup, dup2, fork), for -+ * one open call there may be many flush calls. -+ * -+ * Filesystems shouldn't assume that flush will always be called -+ * after some writes, or that if will be called at all. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * -+ * NOTE: the name of the method is misleading, since (unlike -+ * fsync) the filesystem is not forced to flush pending writes. -+ * One reason to flush data is if the filesystem wants to return -+ * write errors during close. However, such use is non-portable -+ * because POSIX does not require [close] to wait for delayed I/O to -+ * complete. -+ * -+ * If the filesystem supports file locking operations (setlk, -+ * getlk) it should remove all locks belonging to 'fi->owner'. -+ * -+ * If this request is answered with an error code of ENOSYS, -+ * this is treated as success and future calls to flush() will -+ * succeed automatically without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * -+ * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html -+ */ -+ void (*flush) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Release an open file -+ * -+ * Release is called when there are no more references to an open -+ * file: all file descriptors are closed and all memory mappings -+ * are unmapped. -+ * -+ * For every open call there will be exactly one release call (unless -+ * the filesystem is force-unmounted). -+ * -+ * The filesystem may reply with an error, but error values are -+ * not returned to close() or munmap() which triggered the -+ * release. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * fi->flags will contain the same flags as for open. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*release) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Synchronize file contents -+ * -+ * If the datasync parameter is non-zero, then only the user data -+ * should be flushed, not the meta data. -+ * -+ * If this request is answered with an error code of ENOSYS, -+ * this is treated as success and future calls to fsync() will -+ * succeed automatically without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param datasync flag indicating if only data should be flushed -+ * @param fi file information -+ */ -+ void (*fsync) (fuse_req_t req, fuse_ino_t ino, int datasync, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Open a directory -+ * -+ * Filesystem may store an arbitrary file handle (pointer, index, -+ * etc) in fi->fh, and use this in other all other directory -+ * stream operations (readdir, releasedir, fsyncdir). -+ * -+ * If this request is answered with an error code of ENOSYS and -+ * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`, -+ * this is treated as success and future calls to opendir and -+ * releasedir will also succeed without being sent to the filesystem -+ * process. In addition, the kernel will cache readdir results -+ * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR. -+ * -+ * Valid replies: -+ * fuse_reply_open -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*opendir) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Read directory -+ * -+ * Send a buffer filled using fuse_add_direntry(), with size not -+ * exceeding the requested size. Send an empty buffer on end of -+ * stream. -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * Returning a directory entry from readdir() does not affect -+ * its lookup count. -+ * -+ * If off_t is non-zero, then it will correspond to one of the off_t -+ * values that was previously returned by readdir() for the same -+ * directory handle. In this case, readdir() should skip over entries -+ * coming before the position defined by the off_t value. If entries -+ * are added or removed while the directory handle is open, they filesystem -+ * may still include the entries that have been removed, and may not -+ * report the entries that have been created. However, addition or -+ * removal of entries must never cause readdir() to skip over unrelated -+ * entries or to report them more than once. This means -+ * that off_t can not be a simple index that enumerates the entries -+ * that have been returned but must contain sufficient information to -+ * uniquely determine the next directory entry to return even when the -+ * set of entries is changing. -+ * -+ * The function does not have to report the '.' and '..' -+ * entries, but is allowed to do so. Note that, if readdir does -+ * not return '.' or '..', they will not be implicitly returned, -+ * and this behavior is observable by the caller. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size maximum number of bytes to send -+ * @param off offset to continue reading the directory stream -+ * @param fi file information -+ */ -+ void (*readdir) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Release an open directory -+ * -+ * For every opendir call there will be exactly one releasedir -+ * call (unless the filesystem is force-unmounted). -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*releasedir) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Synchronize directory contents -+ * -+ * If the datasync parameter is non-zero, then only the directory -+ * contents should be flushed, not the meta data. -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * If this request is answered with an error code of ENOSYS, -+ * this is treated as success and future calls to fsyncdir() will -+ * succeed automatically without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param datasync flag indicating if only data should be flushed -+ * @param fi file information -+ */ -+ void (*fsyncdir) (fuse_req_t req, fuse_ino_t ino, int datasync, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Get file system statistics -+ * -+ * Valid replies: -+ * fuse_reply_statfs -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number, zero means "undefined" -+ */ -+ void (*statfs) (fuse_req_t req, fuse_ino_t ino); -+ -+ /** -+ * Set an extended attribute -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future setxattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ */ -+ void (*setxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, -+ const char *value, size_t size, int flags); -+ -+ /** -+ * Get an extended attribute -+ * -+ * If size is zero, the size of the value should be sent with -+ * fuse_reply_xattr. -+ * -+ * If the size is non-zero, and the value fits in the buffer, the -+ * value should be sent with fuse_reply_buf. -+ * -+ * If the size is too small for the value, the ERANGE error should -+ * be sent. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future getxattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_xattr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param name of the extended attribute -+ * @param size maximum size of the value to send -+ */ -+ void (*getxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, -+ size_t size); -+ -+ /** -+ * List extended attribute names -+ * -+ * If size is zero, the total size of the attribute list should be -+ * sent with fuse_reply_xattr. -+ * -+ * If the size is non-zero, and the null character separated -+ * attribute list fits in the buffer, the list should be sent with -+ * fuse_reply_buf. -+ * -+ * If the size is too small for the list, the ERANGE error should -+ * be sent. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future listxattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_xattr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size maximum size of the list to send -+ */ -+ void (*listxattr) (fuse_req_t req, fuse_ino_t ino, size_t size); -+ -+ /** -+ * Remove an extended attribute -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future removexattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param name of the extended attribute -+ */ -+ void (*removexattr) (fuse_req_t req, fuse_ino_t ino, const char *name); -+ -+ /** -+ * Check file access permissions -+ * -+ * This will be called for the access() and chdir() system -+ * calls. If the 'default_permissions' mount option is given, -+ * this method is not called. -+ * -+ * This method is not called under Linux kernel versions 2.4.x -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent success, i.e. this and all future access() -+ * requests will succeed without being send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param mask requested access mode -+ */ -+ void (*access) (fuse_req_t req, fuse_ino_t ino, int mask); -+ -+ /** -+ * Create and open a file -+ * -+ * If the file does not exist, first create it with the specified -+ * mode, and then open it. -+ * -+ * See the description of the open handler for more -+ * information. -+ * -+ * If this method is not implemented or under Linux kernel -+ * versions earlier than 2.6.15, the mknod() and open() methods -+ * will be called instead. -+ * -+ * If this request is answered with an error code of ENOSYS, the handler -+ * is treated as not implemented (i.e., for this and future requests the -+ * mknod() and open() handlers will be called instead). -+ * -+ * Valid replies: -+ * fuse_reply_create -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to create -+ * @param mode file type and mode with which to create the new file -+ * @param fi file information -+ */ -+ void (*create) (fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode, struct fuse_file_info *fi); -+ -+ /** -+ * Test for a POSIX file lock -+ * -+ * Valid replies: -+ * fuse_reply_lock -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param lock the region/type to test -+ */ -+ void (*getlk) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi, struct flock *lock); -+ -+ /** -+ * Acquire, modify or release a POSIX file lock -+ * -+ * For POSIX threads (NPTL) there's a 1-1 relation between pid and -+ * owner, but otherwise this is not always the case. For checking -+ * lock ownership, 'fi->owner' must be used. The l_pid field in -+ * 'struct flock' should only be used to fill in this field in -+ * getlk(). -+ * -+ * Note: if the locking methods are not implemented, the kernel -+ * will still allow file locking to work locally. Hence these are -+ * only interesting for network filesystems and similar. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param lock the region/type to set -+ * @param sleep locking operation may sleep -+ */ -+ void (*setlk) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi, -+ struct flock *lock, int sleep); -+ -+ /** -+ * Map block index within file to block index within device -+ * -+ * Note: This makes sense only for block device backed filesystems -+ * mounted with the 'blkdev' option -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure, i.e. all future bmap() requests will -+ * fail with the same error code without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_bmap -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param blocksize unit of block index -+ * @param idx block index within file -+ */ -+ void (*bmap) (fuse_req_t req, fuse_ino_t ino, size_t blocksize, -+ uint64_t idx); -+ -+ /** -+ * Ioctl -+ * -+ * Note: For unrestricted ioctls (not allowed for FUSE -+ * servers), data in and out areas can be discovered by giving -+ * iovs and setting FUSE_IOCTL_RETRY in *flags*. For -+ * restricted ioctls, kernel prepares in/out data area -+ * according to the information encoded in cmd. -+ * -+ * Valid replies: -+ * fuse_reply_ioctl_retry -+ * fuse_reply_ioctl -+ * fuse_reply_ioctl_iov -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param cmd ioctl command -+ * @param arg ioctl argument -+ * @param fi file information -+ * @param flags for FUSE_IOCTL_* flags -+ * @param in_buf data fetched from the caller -+ * @param in_bufsz number of fetched bytes -+ * @param out_bufsz maximum size of output data -+ * -+ * Note : the unsigned long request submitted by the application -+ * is truncated to 32 bits. -+ */ -+ void (*ioctl) (fuse_req_t req, fuse_ino_t ino, unsigned int cmd, -+ void *arg, struct fuse_file_info *fi, unsigned flags, -+ const void *in_buf, size_t in_bufsz, size_t out_bufsz); -+ -+ /** -+ * Poll for IO readiness -+ * -+ * Note: If ph is non-NULL, the client should notify -+ * when IO readiness events occur by calling -+ * fuse_lowlevel_notify_poll() with the specified ph. -+ * -+ * Regardless of the number of times poll with a non-NULL ph -+ * is received, single notification is enough to clear all. -+ * Notifying more times incurs overhead but doesn't harm -+ * correctness. -+ * -+ * The callee is responsible for destroying ph with -+ * fuse_pollhandle_destroy() when no longer in use. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as success (with a kernel-defined default poll-mask) and -+ * future calls to pull() will succeed the same way without being send -+ * to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_poll -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param ph poll handle to be used for notification -+ */ -+ void (*poll) (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ struct fuse_pollhandle *ph); -+ -+ /** -+ * Write data made available in a buffer -+ * -+ * This is a more generic version of the ->write() method. If -+ * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the -+ * kernel supports splicing from the fuse device, then the -+ * data will be made available in pipe for supporting zero -+ * copy data transfer. -+ * -+ * buf->count is guaranteed to be one (and thus buf->idx is -+ * always zero). The write_buf handler must ensure that -+ * bufv->off is correctly updated (reflecting the number of -+ * bytes read from bufv->buf[0]). -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ * -+ * Valid replies: -+ * fuse_reply_write -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param bufv buffer containing the data -+ * @param off offset to write to -+ * @param fi file information -+ */ -+ void (*write_buf) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_bufvec *bufv, off_t off, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Callback function for the retrieve request -+ * -+ * Valid replies: -+ * fuse_reply_none -+ * -+ * @param req request handle -+ * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() -+ * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() -+ * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() -+ * @param bufv the buffer containing the returned data -+ */ -+ void (*retrieve_reply) (fuse_req_t req, void *cookie, fuse_ino_t ino, -+ off_t offset, struct fuse_bufvec *bufv); -+ -+ /** -+ * Forget about multiple inodes -+ * -+ * See description of the forget function for more -+ * information. -+ * -+ * Valid replies: -+ * fuse_reply_none -+ * -+ * @param req request handle -+ */ -+ void (*forget_multi) (fuse_req_t req, size_t count, -+ struct fuse_forget_data *forgets); -+ -+ /** -+ * Acquire, modify or release a BSD file lock -+ * -+ * Note: if the locking methods are not implemented, the kernel -+ * will still allow file locking to work locally. Hence these are -+ * only interesting for network filesystems and similar. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param op the locking operation, see flock(2) -+ */ -+ void (*flock) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi, int op); -+ -+ /** -+ * Allocate requested space. If this function returns success then -+ * subsequent writes to the specified range shall not fail due to the lack -+ * of free space on the file system storage media. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future fallocate() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param offset starting point for allocated region -+ * @param length size of allocated region -+ * @param mode determines the operation to be performed on the given range, -+ * see fallocate(2) -+ */ -+ void (*fallocate) (fuse_req_t req, fuse_ino_t ino, int mode, -+ off_t offset, off_t length, struct fuse_file_info *fi); -+ -+ /** -+ * Read directory with attributes -+ * -+ * Send a buffer filled using fuse_add_direntry_plus(), with size not -+ * exceeding the requested size. Send an empty buffer on end of -+ * stream. -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * In contrast to readdir() (which does not affect the lookup counts), -+ * the lookup count of every entry returned by readdirplus(), except "." -+ * and "..", is incremented by one. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size maximum number of bytes to send -+ * @param off offset to continue reading the directory stream -+ * @param fi file information -+ */ -+ void (*readdirplus) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Copy a range of data from one file to another -+ * -+ * Performs an optimized copy between two file descriptors without the -+ * additional cost of transferring data through the FUSE kernel module -+ * to user space (glibc) and then back into the FUSE filesystem again. -+ * -+ * In case this method is not implemented, glibc falls back to reading -+ * data from the source and writing to the destination. Effectively -+ * doing an inefficient copy of the data. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future copy_file_range() requests will fail with EOPNOTSUPP without -+ * being send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_write -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino_in the inode number or the source file -+ * @param off_in starting point from were the data should be read -+ * @param fi_in file information of the source file -+ * @param ino_out the inode number or the destination file -+ * @param off_out starting point where the data should be written -+ * @param fi_out file information of the destination file -+ * @param len maximum size of the data to copy -+ * @param flags passed along with the copy_file_range() syscall -+ */ -+ void (*copy_file_range) (fuse_req_t req, fuse_ino_t ino_in, -+ off_t off_in, struct fuse_file_info *fi_in, -+ fuse_ino_t ino_out, off_t off_out, -+ struct fuse_file_info *fi_out, size_t len, -+ int flags); -+ -+ /** -+ * Find next data or hole after the specified offset -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure, i.e. all future lseek() requests will -+ * fail with the same error code without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_lseek -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param off offset to start search from -+ * @param whence either SEEK_DATA or SEEK_HOLE -+ * @param fi file information -+ */ -+ void (*lseek) (fuse_req_t req, fuse_ino_t ino, off_t off, int whence, -+ struct fuse_file_info *fi); -+}; -+ -+/** -+ * Reply with an error code or success. -+ * -+ * Possible requests: -+ * all except forget -+ * -+ * Whereever possible, error codes should be chosen from the list of -+ * documented error conditions in the corresponding system calls -+ * manpage. -+ * -+ * An error code of ENOSYS is sometimes treated specially. This is -+ * indicated in the documentation of the affected handler functions. -+ * -+ * The following requests may be answered with a zero error code: -+ * unlink, rmdir, rename, flush, release, fsync, fsyncdir, setxattr, -+ * removexattr, setlk. -+ * -+ * @param req request handle -+ * @param err the positive error value, or zero for success -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_err(fuse_req_t req, int err); -+ -+/** -+ * Don't send reply -+ * -+ * Possible requests: -+ * forget -+ * forget_multi -+ * retrieve_reply -+ * -+ * @param req request handle -+ */ -+void fuse_reply_none(fuse_req_t req); -+ -+/** -+ * Reply with a directory entry -+ * -+ * Possible requests: -+ * lookup, mknod, mkdir, symlink, link -+ * -+ * Side effects: -+ * increments the lookup count on success -+ * -+ * @param req request handle -+ * @param e the entry parameters -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e); -+ -+/** -+ * Reply with a directory entry and open parameters -+ * -+ * currently the following members of 'fi' are used: -+ * fh, direct_io, keep_cache -+ * -+ * Possible requests: -+ * create -+ * -+ * Side effects: -+ * increments the lookup count on success -+ * -+ * @param req request handle -+ * @param e the entry parameters -+ * @param fi file information -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, -+ const struct fuse_file_info *fi); -+ -+/** -+ * Reply with attributes -+ * -+ * Possible requests: -+ * getattr, setattr -+ * -+ * @param req request handle -+ * @param attr the attributes -+ * @param attr_timeout validity timeout (in seconds) for the attributes -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_attr(fuse_req_t req, const struct stat *attr, -+ double attr_timeout); -+ -+/** -+ * Reply with the contents of a symbolic link -+ * -+ * Possible requests: -+ * readlink -+ * -+ * @param req request handle -+ * @param link symbolic link contents -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_readlink(fuse_req_t req, const char *link); -+ -+/** -+ * Reply with open parameters -+ * -+ * currently the following members of 'fi' are used: -+ * fh, direct_io, keep_cache -+ * -+ * Possible requests: -+ * open, opendir -+ * -+ * @param req request handle -+ * @param fi file information -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *fi); -+ -+/** -+ * Reply with number of bytes written -+ * -+ * Possible requests: -+ * write -+ * -+ * @param req request handle -+ * @param count the number of bytes written -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_write(fuse_req_t req, size_t count); -+ -+/** -+ * Reply with data -+ * -+ * Possible requests: -+ * read, readdir, getxattr, listxattr -+ * -+ * @param req request handle -+ * @param buf buffer containing data -+ * @param size the size of data in bytes -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); -+ -+/** -+ * Reply with data copied/moved from buffer(s) -+ * -+ * Zero copy data transfer ("splicing") will be used under -+ * the following circumstances: -+ * -+ * 1. FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.want, and -+ * 2. the kernel supports splicing from the fuse device -+ * (FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.capable), and -+ * 3. *flags* does not contain FUSE_BUF_NO_SPLICE -+ * 4. The amount of data that is provided in file-descriptor backed -+ * buffers (i.e., buffers for which bufv[n].flags == FUSE_BUF_FD) -+ * is at least twice the page size. -+ * -+ * In order for SPLICE_F_MOVE to be used, the following additional -+ * conditions have to be fulfilled: -+ * -+ * 1. FUSE_CAP_SPLICE_MOVE is set in fuse_conn_info.want, and -+ * 2. the kernel supports it (i.e, FUSE_CAP_SPLICE_MOVE is set in -+ fuse_conn_info.capable), and -+ * 3. *flags* contains FUSE_BUF_SPLICE_MOVE -+ * -+ * Note that, if splice is used, the data is actually spliced twice: -+ * once into a temporary pipe (to prepend header data), and then again -+ * into the kernel. If some of the provided buffers are memory-backed, -+ * the data in them is copied in step one and spliced in step two. -+ * -+ * The FUSE_BUF_SPLICE_FORCE_SPLICE and FUSE_BUF_SPLICE_NONBLOCK flags -+ * are silently ignored. -+ * -+ * Possible requests: -+ * read, readdir, getxattr, listxattr -+ * -+ * Side effects: -+ * when used to return data from a readdirplus() (but not readdir()) -+ * call, increments the lookup count of each returned entry by one -+ * on success. -+ * -+ * @param req request handle -+ * @param bufv buffer vector -+ * @param flags flags controlling the copy -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, -+ enum fuse_buf_copy_flags flags); -+ -+/** -+ * Reply with data vector -+ * -+ * Possible requests: -+ * read, readdir, getxattr, listxattr -+ * -+ * @param req request handle -+ * @param iov the vector containing the data -+ * @param count the size of vector -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count); -+ -+/** -+ * Reply with filesystem statistics -+ * -+ * Possible requests: -+ * statfs -+ * -+ * @param req request handle -+ * @param stbuf filesystem statistics -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf); -+ -+/** -+ * Reply with needed buffer size -+ * -+ * Possible requests: -+ * getxattr, listxattr -+ * -+ * @param req request handle -+ * @param count the buffer size needed in bytes -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_xattr(fuse_req_t req, size_t count); -+ -+/** -+ * Reply with file lock information -+ * -+ * Possible requests: -+ * getlk -+ * -+ * @param req request handle -+ * @param lock the lock information -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_lock(fuse_req_t req, const struct flock *lock); -+ -+/** -+ * Reply with block index -+ * -+ * Possible requests: -+ * bmap -+ * -+ * @param req request handle -+ * @param idx block index within device -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_bmap(fuse_req_t req, uint64_t idx); -+ -+/* ----------------------------------------------------------- * -+ * Filling a buffer in readdir * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Add a directory entry to the buffer -+ * -+ * Buffer needs to be large enough to hold the entry. If it's not, -+ * then the entry is not filled in but the size of the entry is still -+ * returned. The caller can check this by comparing the bufsize -+ * parameter with the returned entry size. If the entry size is -+ * larger than the buffer size, the operation failed. -+ * -+ * From the 'stbuf' argument the st_ino field and bits 12-15 of the -+ * st_mode field are used. The other fields are ignored. -+ * -+ * *off* should be any non-zero value that the filesystem can use to -+ * identify the current point in the directory stream. It does not -+ * need to be the actual physical position. A value of zero is -+ * reserved to mean "from the beginning", and should therefore never -+ * be used (the first call to fuse_add_direntry should be passed the -+ * offset of the second directory entry). -+ * -+ * @param req request handle -+ * @param buf the point where the new entry will be added to the buffer -+ * @param bufsize remaining size of the buffer -+ * @param name the name of the entry -+ * @param stbuf the file attributes -+ * @param off the offset of the next entry -+ * @return the space needed for the entry -+ */ -+size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, -+ const char *name, const struct stat *stbuf, -+ off_t off); -+ -+/** -+ * Add a directory entry to the buffer with the attributes -+ * -+ * See documentation of `fuse_add_direntry()` for more details. -+ * -+ * @param req request handle -+ * @param buf the point where the new entry will be added to the buffer -+ * @param bufsize remaining size of the buffer -+ * @param name the name of the entry -+ * @param e the directory entry -+ * @param off the offset of the next entry -+ * @return the space needed for the entry -+ */ -+size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, -+ const char *name, -+ const struct fuse_entry_param *e, off_t off); -+ -+/** -+ * Reply to ask for data fetch and output buffer preparation. ioctl -+ * will be retried with the specified input data fetched and output -+ * buffer prepared. -+ * -+ * Possible requests: -+ * ioctl -+ * -+ * @param req request handle -+ * @param in_iov iovec specifying data to fetch from the caller -+ * @param in_count number of entries in in_iov -+ * @param out_iov iovec specifying addresses to write output to -+ * @param out_count number of entries in out_iov -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_ioctl_retry(fuse_req_t req, -+ const struct iovec *in_iov, size_t in_count, -+ const struct iovec *out_iov, size_t out_count); -+ -+/** -+ * Reply to finish ioctl -+ * -+ * Possible requests: -+ * ioctl -+ * -+ * @param req request handle -+ * @param result result to be passed to the caller -+ * @param buf buffer containing output data -+ * @param size length of output data -+ */ -+int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size); -+ -+/** -+ * Reply to finish ioctl with iov buffer -+ * -+ * Possible requests: -+ * ioctl -+ * -+ * @param req request handle -+ * @param result result to be passed to the caller -+ * @param iov the vector containing the data -+ * @param count the size of vector -+ */ -+int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, -+ int count); -+ -+/** -+ * Reply with poll result event mask -+ * -+ * @param req request handle -+ * @param revents poll result event mask -+ */ -+int fuse_reply_poll(fuse_req_t req, unsigned revents); -+ -+/** -+ * Reply with offset -+ * -+ * Possible requests: -+ * lseek -+ * -+ * @param req request handle -+ * @param off offset of next data or hole -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_lseek(fuse_req_t req, off_t off); -+ -+/* ----------------------------------------------------------- * -+ * Notification * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Notify IO readiness event -+ * -+ * For more information, please read comment for poll operation. -+ * -+ * @param ph poll handle to notify IO readiness event for -+ */ -+int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph); -+ -+/** -+ * Notify to invalidate cache for an inode. -+ * -+ * Added in FUSE protocol version 7.12. If the kernel does not support -+ * this (or a newer) version, the function will return -ENOSYS and do -+ * nothing. -+ * -+ * If the filesystem has writeback caching enabled, invalidating an -+ * inode will first trigger a writeback of all dirty pages. The call -+ * will block until all writeback requests have completed and the -+ * inode has been invalidated. It will, however, not wait for -+ * completion of pending writeback requests that have been issued -+ * before. -+ * -+ * If there are no dirty pages, this function will never block. -+ * -+ * @param se the session object -+ * @param ino the inode number -+ * @param off the offset in the inode where to start invalidating -+ * or negative to invalidate attributes only -+ * @param len the amount of cache to invalidate or 0 for all -+ * @return zero for success, -errno for failure -+ */ -+int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, -+ off_t off, off_t len); -+ -+/** -+ * Notify to invalidate parent attributes and the dentry matching -+ * parent/name -+ * -+ * To avoid a deadlock this function must not be called in the -+ * execution path of a related filesytem operation or within any code -+ * that could hold a lock that could be needed to execute such an -+ * operation. As of kernel 4.18, a "related operation" is a lookup(), -+ * symlink(), mknod(), mkdir(), unlink(), rename(), link() or create() -+ * request for the parent, and a setattr(), unlink(), rmdir(), -+ * rename(), setxattr(), removexattr(), readdir() or readdirplus() -+ * request for the inode itself. -+ * -+ * When called correctly, this function will never block. -+ * -+ * Added in FUSE protocol version 7.12. If the kernel does not support -+ * this (or a newer) version, the function will return -ENOSYS and do -+ * nothing. -+ * -+ * @param se the session object -+ * @param parent inode number -+ * @param name file name -+ * @param namelen strlen() of file name -+ * @return zero for success, -errno for failure -+ */ -+int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, -+ const char *name, size_t namelen); -+ -+/** -+ * This function behaves like fuse_lowlevel_notify_inval_entry() with -+ * the following additional effect (at least as of Linux kernel 4.8): -+ * -+ * If the provided *child* inode matches the inode that is currently -+ * associated with the cached dentry, and if there are any inotify -+ * watches registered for the dentry, then the watchers are informed -+ * that the dentry has been deleted. -+ * -+ * To avoid a deadlock this function must not be called while -+ * executing a related filesytem operation or while holding a lock -+ * that could be needed to execute such an operation (see the -+ * description of fuse_lowlevel_notify_inval_entry() for more -+ * details). -+ * -+ * When called correctly, this function will never block. -+ * -+ * Added in FUSE protocol version 7.18. If the kernel does not support -+ * this (or a newer) version, the function will return -ENOSYS and do -+ * nothing. -+ * -+ * @param se the session object -+ * @param parent inode number -+ * @param child inode number -+ * @param name file name -+ * @param namelen strlen() of file name -+ * @return zero for success, -errno for failure -+ */ -+int fuse_lowlevel_notify_delete(struct fuse_session *se, -+ fuse_ino_t parent, fuse_ino_t child, -+ const char *name, size_t namelen); -+ -+/** -+ * Store data to the kernel buffers -+ * -+ * Synchronously store data in the kernel buffers belonging to the -+ * given inode. The stored data is marked up-to-date (no read will be -+ * performed against it, unless it's invalidated or evicted from the -+ * cache). -+ * -+ * If the stored data overflows the current file size, then the size -+ * is extended, similarly to a write(2) on the filesystem. -+ * -+ * If this function returns an error, then the store wasn't fully -+ * completed, but it may have been partially completed. -+ * -+ * Added in FUSE protocol version 7.15. If the kernel does not support -+ * this (or a newer) version, the function will return -ENOSYS and do -+ * nothing. -+ * -+ * @param se the session object -+ * @param ino the inode number -+ * @param offset the starting offset into the file to store to -+ * @param bufv buffer vector -+ * @param flags flags controlling the copy -+ * @return zero for success, -errno for failure -+ */ -+int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, -+ off_t offset, struct fuse_bufvec *bufv, -+ enum fuse_buf_copy_flags flags); -+/** -+ * Retrieve data from the kernel buffers -+ * -+ * Retrieve data in the kernel buffers belonging to the given inode. -+ * If successful then the retrieve_reply() method will be called with -+ * the returned data. -+ * -+ * Only present pages are returned in the retrieve reply. Retrieving -+ * stops when it finds a non-present page and only data prior to that -+ * is returned. -+ * -+ * If this function returns an error, then the retrieve will not be -+ * completed and no reply will be sent. -+ * -+ * This function doesn't change the dirty state of pages in the kernel -+ * buffer. For dirty pages the write() method will be called -+ * regardless of having been retrieved previously. -+ * -+ * Added in FUSE protocol version 7.15. If the kernel does not support -+ * this (or a newer) version, the function will return -ENOSYS and do -+ * nothing. -+ * -+ * @param se the session object -+ * @param ino the inode number -+ * @param size the number of bytes to retrieve -+ * @param offset the starting offset into the file to retrieve from -+ * @param cookie user data to supply to the reply callback -+ * @return zero for success, -errno for failure -+ */ -+int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, -+ size_t size, off_t offset, void *cookie); -+ -+ -+/* ----------------------------------------------------------- * -+ * Utility functions * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Get the userdata from the request -+ * -+ * @param req request handle -+ * @return the user data passed to fuse_session_new() -+ */ -+void *fuse_req_userdata(fuse_req_t req); -+ -+/** -+ * Get the context from the request -+ * -+ * The pointer returned by this function will only be valid for the -+ * request's lifetime -+ * -+ * @param req request handle -+ * @return the context structure -+ */ -+const struct fuse_ctx *fuse_req_ctx(fuse_req_t req); -+ -+/** -+ * Get the current supplementary group IDs for the specified request -+ * -+ * Similar to the getgroups(2) system call, except the return value is -+ * always the total number of group IDs, even if it is larger than the -+ * specified size. -+ * -+ * The current fuse kernel module in linux (as of 2.6.30) doesn't pass -+ * the group list to userspace, hence this function needs to parse -+ * "/proc/$TID/task/$TID/status" to get the group IDs. -+ * -+ * This feature may not be supported on all operating systems. In -+ * such a case this function will return -ENOSYS. -+ * -+ * @param req request handle -+ * @param size size of given array -+ * @param list array of group IDs to be filled in -+ * @return the total number of supplementary group IDs or -errno on failure -+ */ -+int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]); -+ -+/** -+ * Callback function for an interrupt -+ * -+ * @param req interrupted request -+ * @param data user data -+ */ -+typedef void (*fuse_interrupt_func_t)(fuse_req_t req, void *data); -+ -+/** -+ * Register/unregister callback for an interrupt -+ * -+ * If an interrupt has already happened, then the callback function is -+ * called from within this function, hence it's not possible for -+ * interrupts to be lost. -+ * -+ * @param req request handle -+ * @param func the callback function or NULL for unregister -+ * @param data user data passed to the callback function -+ */ -+void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, -+ void *data); -+ -+/** -+ * Check if a request has already been interrupted -+ * -+ * @param req request handle -+ * @return 1 if the request has been interrupted, 0 otherwise -+ */ -+int fuse_req_interrupted(fuse_req_t req); -+ -+ -+/* ----------------------------------------------------------- * -+ * Inquiry functions * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Print low-level version information to stdout. -+ */ -+void fuse_lowlevel_version(void); -+ -+/** -+ * Print available low-level options to stdout. This is not an -+ * exhaustive list, but includes only those options that may be of -+ * interest to an end-user of a file system. -+ */ -+void fuse_lowlevel_help(void); -+ -+/** -+ * Print available options for `fuse_parse_cmdline()`. -+ */ -+void fuse_cmdline_help(void); -+ -+/* ----------------------------------------------------------- * -+ * Filesystem setup & teardown * -+ * ----------------------------------------------------------- */ -+ -+struct fuse_cmdline_opts { -+ int singlethread; -+ int foreground; -+ int debug; -+ int nodefault_subtype; -+ char *mountpoint; -+ int show_version; -+ int show_help; -+ int clone_fd; -+ unsigned int max_idle_threads; -+}; -+ -+/** -+ * Utility function to parse common options for simple file systems -+ * using the low-level API. A help text that describes the available -+ * options can be printed with `fuse_cmdline_help`. A single -+ * non-option argument is treated as the mountpoint. Multiple -+ * non-option arguments will result in an error. -+ * -+ * If neither -o subtype= or -o fsname= options are given, a new -+ * subtype option will be added and set to the basename of the program -+ * (the fsname will remain unset, and then defaults to "fuse"). -+ * -+ * Known options will be removed from *args*, unknown options will -+ * remain. -+ * -+ * @param args argument vector (input+output) -+ * @param opts output argument for parsed options -+ * @return 0 on success, -1 on failure -+ */ -+int fuse_parse_cmdline(struct fuse_args *args, -+ struct fuse_cmdline_opts *opts); -+ -+/** -+ * Create a low level session. -+ * -+ * Returns a session structure suitable for passing to -+ * fuse_session_mount() and fuse_session_loop(). -+ * -+ * This function accepts most file-system independent mount options -+ * (like context, nodev, ro - see mount(8)), as well as the general -+ * fuse mount options listed in mount.fuse(8) (e.g. -o allow_root and -+ * -o default_permissions, but not ``-o use_ino``). Instead of `-o -+ * debug`, debugging may also enabled with `-d` or `--debug`. -+ * -+ * If not all options are known, an error message is written to stderr -+ * and the function returns NULL. -+ * -+ * Option parsing skips argv[0], which is assumed to contain the -+ * program name. To prevent accidentally passing an option in -+ * argv[0], this element must always be present (even if no options -+ * are specified). It may be set to the empty string ('\0') if no -+ * reasonable value can be provided. -+ * -+ * @param args argument vector -+ * @param op the (low-level) filesystem operations -+ * @param op_size sizeof(struct fuse_lowlevel_ops) -+ * @param userdata user data -+ * -+ * @return the fuse session on success, NULL on failure -+ **/ -+struct fuse_session *fuse_session_new(struct fuse_args *args, -+ const struct fuse_lowlevel_ops *op, -+ size_t op_size, void *userdata); -+ -+/** -+ * Mount a FUSE file system. -+ * -+ * @param mountpoint the mount point path -+ * @param se session object -+ * -+ * @return 0 on success, -1 on failure. -+ **/ -+int fuse_session_mount(struct fuse_session *se, const char *mountpoint); -+ -+/** -+ * Enter a single threaded, blocking event loop. -+ * -+ * When the event loop terminates because the connection to the FUSE -+ * kernel module has been closed, this function returns zero. This -+ * happens when the filesystem is unmounted regularly (by the -+ * filesystem owner or root running the umount(8) or fusermount(1) -+ * command), or if connection is explicitly severed by writing ``1`` -+ * to the``abort`` file in ``/sys/fs/fuse/connections/NNN``. The only -+ * way to distinguish between these two conditions is to check if the -+ * filesystem is still mounted after the session loop returns. -+ * -+ * When some error occurs during request processing, the function -+ * returns a negated errno(3) value. -+ * -+ * If the loop has been terminated because of a signal handler -+ * installed by fuse_set_signal_handlers(), this function returns the -+ * (positive) signal value that triggered the exit. -+ * -+ * @param se the session -+ * @return 0, -errno, or a signal value -+ */ -+int fuse_session_loop(struct fuse_session *se); -+ -+/** -+ * Enter a multi-threaded event loop. -+ * -+ * For a description of the return value and the conditions when the -+ * event loop exits, refer to the documentation of -+ * fuse_session_loop(). -+ * -+ * @param se the session -+ * @param config session loop configuration -+ * @return see fuse_session_loop() -+ */ -+#if FUSE_USE_VERSION < 32 -+int fuse_session_loop_mt_31(struct fuse_session *se, int clone_fd); -+#define fuse_session_loop_mt(se, clone_fd) fuse_session_loop_mt_31(se, clone_fd) -+#else -+int fuse_session_loop_mt(struct fuse_session *se, struct fuse_loop_config *config); -+#endif -+ -+/** -+ * Flag a session as terminated. -+ * -+ * This function is invoked by the POSIX signal handlers, when -+ * registered using fuse_set_signal_handlers(). It will cause any -+ * running event loops to terminate on the next opportunity. -+ * -+ * @param se the session -+ */ -+void fuse_session_exit(struct fuse_session *se); -+ -+/** -+ * Reset the terminated flag of a session -+ * -+ * @param se the session -+ */ -+void fuse_session_reset(struct fuse_session *se); -+ -+/** -+ * Query the terminated flag of a session -+ * -+ * @param se the session -+ * @return 1 if exited, 0 if not exited -+ */ -+int fuse_session_exited(struct fuse_session *se); -+ -+/** -+ * Ensure that file system is unmounted. -+ * -+ * In regular operation, the file system is typically unmounted by the -+ * user calling umount(8) or fusermount(1), which then terminates the -+ * FUSE session loop. However, the session loop may also terminate as -+ * a result of an explicit call to fuse_session_exit() (e.g. by a -+ * signal handler installed by fuse_set_signal_handler()). In this -+ * case the filesystem remains mounted, but any attempt to access it -+ * will block (while the filesystem process is still running) or give -+ * an ESHUTDOWN error (after the filesystem process has terminated). -+ * -+ * If the communication channel with the FUSE kernel module is still -+ * open (i.e., if the session loop was terminated by an explicit call -+ * to fuse_session_exit()), this function will close it and unmount -+ * the filesystem. If the communication channel has been closed by the -+ * kernel, this method will do (almost) nothing. -+ * -+ * NOTE: The above semantics mean that if the connection to the kernel -+ * is terminated via the ``/sys/fs/fuse/connections/NNN/abort`` file, -+ * this method will *not* unmount the filesystem. -+ * -+ * @param se the session -+ */ -+void fuse_session_unmount(struct fuse_session *se); -+ -+/** -+ * Destroy a session -+ * -+ * @param se the session -+ */ -+void fuse_session_destroy(struct fuse_session *se); -+ -+/* ----------------------------------------------------------- * -+ * Custom event loop support * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Return file descriptor for communication with kernel. -+ * -+ * The file selector can be used to integrate FUSE with a custom event -+ * loop. Whenever data is available for reading on the provided fd, -+ * the event loop should call `fuse_session_receive_buf` followed by -+ * `fuse_session_process_buf` to process the request. -+ * -+ * The returned file descriptor is valid until `fuse_session_unmount` -+ * is called. -+ * -+ * @param se the session -+ * @return a file descriptor -+ */ -+int fuse_session_fd(struct fuse_session *se); -+ -+/** -+ * Process a raw request supplied in a generic buffer -+ * -+ * The fuse_buf may contain a memory buffer or a pipe file descriptor. -+ * -+ * @param se the session -+ * @param buf the fuse_buf containing the request -+ */ -+void fuse_session_process_buf(struct fuse_session *se, -+ const struct fuse_buf *buf); -+ -+/** -+ * Read a raw request from the kernel into the supplied buffer. -+ * -+ * Depending on file system options, system capabilities, and request -+ * size the request is either read into a memory buffer or spliced -+ * into a temporary pipe. -+ * -+ * @param se the session -+ * @param buf the fuse_buf to store the request in -+ * @return the actual size of the raw request, or -errno on error -+ */ -+int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* FUSE_LOWLEVEL_H_ */ -diff --git a/tools/virtiofsd/fuse_misc.h b/tools/virtiofsd/fuse_misc.h -new file mode 100644 -index 0000000..2f6663e ---- /dev/null -+++ b/tools/virtiofsd/fuse_misc.h -@@ -0,0 +1,59 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB -+*/ -+ -+#include -+ -+/* -+ Versioned symbols cannot be used in some cases because it -+ - confuse the dynamic linker in uClibc -+ - not supported on MacOSX (in MachO binary format) -+*/ -+#if (!defined(__UCLIBC__) && !defined(__APPLE__)) -+#define FUSE_SYMVER(x) __asm__(x) -+#else -+#define FUSE_SYMVER(x) -+#endif -+ -+#ifndef USE_UCLIBC -+#define fuse_mutex_init(mut) pthread_mutex_init(mut, NULL) -+#else -+/* Is this hack still needed? */ -+static inline void fuse_mutex_init(pthread_mutex_t *mut) -+{ -+ pthread_mutexattr_t attr; -+ pthread_mutexattr_init(&attr); -+ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); -+ pthread_mutex_init(mut, &attr); -+ pthread_mutexattr_destroy(&attr); -+} -+#endif -+ -+#ifdef HAVE_STRUCT_STAT_ST_ATIM -+/* Linux */ -+#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atim.tv_nsec) -+#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctim.tv_nsec) -+#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtim.tv_nsec) -+#define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atim.tv_nsec = (val) -+#define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctim.tv_nsec = (val) -+#define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtim.tv_nsec = (val) -+#elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC) -+/* FreeBSD */ -+#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atimespec.tv_nsec) -+#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctimespec.tv_nsec) -+#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtimespec.tv_nsec) -+#define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atimespec.tv_nsec = (val) -+#define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctimespec.tv_nsec = (val) -+#define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtimespec.tv_nsec = (val) -+#else -+#define ST_ATIM_NSEC(stbuf) 0 -+#define ST_CTIM_NSEC(stbuf) 0 -+#define ST_MTIM_NSEC(stbuf) 0 -+#define ST_ATIM_NSEC_SET(stbuf, val) do { } while (0) -+#define ST_CTIM_NSEC_SET(stbuf, val) do { } while (0) -+#define ST_MTIM_NSEC_SET(stbuf, val) do { } while (0) -+#endif -diff --git a/tools/virtiofsd/fuse_opt.h b/tools/virtiofsd/fuse_opt.h -new file mode 100644 -index 0000000..d8573e7 ---- /dev/null -+++ b/tools/virtiofsd/fuse_opt.h -@@ -0,0 +1,271 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB. -+*/ -+ -+#ifndef FUSE_OPT_H_ -+#define FUSE_OPT_H_ -+ -+/** @file -+ * -+ * This file defines the option parsing interface of FUSE -+ */ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/** -+ * Option description -+ * -+ * This structure describes a single option, and action associated -+ * with it, in case it matches. -+ * -+ * More than one such match may occur, in which case the action for -+ * each match is executed. -+ * -+ * There are three possible actions in case of a match: -+ * -+ * i) An integer (int or unsigned) variable determined by 'offset' is -+ * set to 'value' -+ * -+ * ii) The processing function is called, with 'value' as the key -+ * -+ * iii) An integer (any) or string (char *) variable determined by -+ * 'offset' is set to the value of an option parameter -+ * -+ * 'offset' should normally be either set to -+ * -+ * - 'offsetof(struct foo, member)' actions i) and iii) -+ * -+ * - -1 action ii) -+ * -+ * The 'offsetof()' macro is defined in the header. -+ * -+ * The template determines which options match, and also have an -+ * effect on the action. Normally the action is either i) or ii), but -+ * if a format is present in the template, then action iii) is -+ * performed. -+ * -+ * The types of templates are: -+ * -+ * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only -+ * themselves. Invalid values are "--" and anything beginning -+ * with "-o" -+ * -+ * 2) "foo", "foo-bar", etc. These match "-ofoo", "-ofoo-bar" or -+ * the relevant option in a comma separated option list -+ * -+ * 3) "bar=", "--foo=", etc. These are variations of 1) and 2) -+ * which have a parameter -+ * -+ * 4) "bar=%s", "--foo=%lu", etc. Same matching as above but perform -+ * action iii). -+ * -+ * 5) "-x ", etc. Matches either "-xparam" or "-x param" as -+ * two separate arguments -+ * -+ * 6) "-x %s", etc. Combination of 4) and 5) -+ * -+ * If the format is "%s", memory is allocated for the string unlike with -+ * scanf(). The previous value (if non-NULL) stored at the this location is -+ * freed. -+ */ -+struct fuse_opt { -+ /** Matching template and optional parameter formatting */ -+ const char *templ; -+ -+ /** -+ * Offset of variable within 'data' parameter of fuse_opt_parse() -+ * or -1 -+ */ -+ unsigned long offset; -+ -+ /** -+ * Value to set the variable to, or to be passed as 'key' to the -+ * processing function. Ignored if template has a format -+ */ -+ int value; -+}; -+ -+/** -+ * Key option. In case of a match, the processing function will be -+ * called with the specified key. -+ */ -+#define FUSE_OPT_KEY(templ, key) { templ, -1U, key } -+ -+/** -+ * Last option. An array of 'struct fuse_opt' must end with a NULL -+ * template value -+ */ -+#define FUSE_OPT_END { NULL, 0, 0 } -+ -+/** -+ * Argument list -+ */ -+struct fuse_args { -+ /** Argument count */ -+ int argc; -+ -+ /** Argument vector. NULL terminated */ -+ char **argv; -+ -+ /** Is 'argv' allocated? */ -+ int allocated; -+}; -+ -+/** -+ * Initializer for 'struct fuse_args' -+ */ -+#define FUSE_ARGS_INIT(argc, argv) { argc, argv, 0 } -+ -+/** -+ * Key value passed to the processing function if an option did not -+ * match any template -+ */ -+#define FUSE_OPT_KEY_OPT -1 -+ -+/** -+ * Key value passed to the processing function for all non-options -+ * -+ * Non-options are the arguments beginning with a character other than -+ * '-' or all arguments after the special '--' option -+ */ -+#define FUSE_OPT_KEY_NONOPT -2 -+ -+/** -+ * Special key value for options to keep -+ * -+ * Argument is not passed to processing function, but behave as if the -+ * processing function returned 1 -+ */ -+#define FUSE_OPT_KEY_KEEP -3 -+ -+/** -+ * Special key value for options to discard -+ * -+ * Argument is not passed to processing function, but behave as if the -+ * processing function returned zero -+ */ -+#define FUSE_OPT_KEY_DISCARD -4 -+ -+/** -+ * Processing function -+ * -+ * This function is called if -+ * - option did not match any 'struct fuse_opt' -+ * - argument is a non-option -+ * - option did match and offset was set to -1 -+ * -+ * The 'arg' parameter will always contain the whole argument or -+ * option including the parameter if exists. A two-argument option -+ * ("-x foo") is always converted to single argument option of the -+ * form "-xfoo" before this function is called. -+ * -+ * Options of the form '-ofoo' are passed to this function without the -+ * '-o' prefix. -+ * -+ * The return value of this function determines whether this argument -+ * is to be inserted into the output argument vector, or discarded. -+ * -+ * @param data is the user data passed to the fuse_opt_parse() function -+ * @param arg is the whole argument or option -+ * @param key determines why the processing function was called -+ * @param outargs the current output argument list -+ * @return -1 on error, 0 if arg is to be discarded, 1 if arg should be kept -+ */ -+typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key, -+ struct fuse_args *outargs); -+ -+/** -+ * Option parsing function -+ * -+ * If 'args' was returned from a previous call to fuse_opt_parse() or -+ * it was constructed from -+ * -+ * A NULL 'args' is equivalent to an empty argument vector -+ * -+ * A NULL 'opts' is equivalent to an 'opts' array containing a single -+ * end marker -+ * -+ * A NULL 'proc' is equivalent to a processing function always -+ * returning '1' -+ * -+ * @param args is the input and output argument list -+ * @param data is the user data -+ * @param opts is the option description array -+ * @param proc is the processing function -+ * @return -1 on error, 0 on success -+ */ -+int fuse_opt_parse(struct fuse_args *args, void *data, -+ const struct fuse_opt opts[], fuse_opt_proc_t proc); -+ -+/** -+ * Add an option to a comma separated option list -+ * -+ * @param opts is a pointer to an option list, may point to a NULL value -+ * @param opt is the option to add -+ * @return -1 on allocation error, 0 on success -+ */ -+int fuse_opt_add_opt(char **opts, const char *opt); -+ -+/** -+ * Add an option, escaping commas, to a comma separated option list -+ * -+ * @param opts is a pointer to an option list, may point to a NULL value -+ * @param opt is the option to add -+ * @return -1 on allocation error, 0 on success -+ */ -+int fuse_opt_add_opt_escaped(char **opts, const char *opt); -+ -+/** -+ * Add an argument to a NULL terminated argument vector -+ * -+ * @param args is the structure containing the current argument list -+ * @param arg is the new argument to add -+ * @return -1 on allocation error, 0 on success -+ */ -+int fuse_opt_add_arg(struct fuse_args *args, const char *arg); -+ -+/** -+ * Add an argument at the specified position in a NULL terminated -+ * argument vector -+ * -+ * Adds the argument to the N-th position. This is useful for adding -+ * options at the beginning of the array which must not come after the -+ * special '--' option. -+ * -+ * @param args is the structure containing the current argument list -+ * @param pos is the position at which to add the argument -+ * @param arg is the new argument to add -+ * @return -1 on allocation error, 0 on success -+ */ -+int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg); -+ -+/** -+ * Free the contents of argument list -+ * -+ * The structure itself is not freed -+ * -+ * @param args is the structure containing the argument list -+ */ -+void fuse_opt_free_args(struct fuse_args *args); -+ -+ -+/** -+ * Check if an option matches -+ * -+ * @param opts is the option description array -+ * @param opt is the option to match -+ * @return 1 if a match is found, 0 if not -+ */ -+int fuse_opt_match(const struct fuse_opt opts[], const char *opt); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* FUSE_OPT_H_ */ -diff --git a/tools/virtiofsd/passthrough_helpers.h b/tools/virtiofsd/passthrough_helpers.h -new file mode 100644 -index 0000000..6b77c33 ---- /dev/null -+++ b/tools/virtiofsd/passthrough_helpers.h -@@ -0,0 +1,76 @@ -+/* -+ * FUSE: Filesystem in Userspace -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE -+ */ -+ -+/* -+ * Creates files on the underlying file system in response to a FUSE_MKNOD -+ * operation -+ */ -+static int mknod_wrapper(int dirfd, const char *path, const char *link, -+ int mode, dev_t rdev) -+{ -+ int res; -+ -+ if (S_ISREG(mode)) { -+ res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode); -+ if (res >= 0) -+ res = close(res); -+ } else if (S_ISDIR(mode)) { -+ res = mkdirat(dirfd, path, mode); -+ } else if (S_ISLNK(mode) && link != NULL) { -+ res = symlinkat(link, dirfd, path); -+ } else if (S_ISFIFO(mode)) { -+ res = mkfifoat(dirfd, path, mode); -+#ifdef __FreeBSD__ -+ } else if (S_ISSOCK(mode)) { -+ struct sockaddr_un su; -+ int fd; -+ -+ if (strlen(path) >= sizeof(su.sun_path)) { -+ errno = ENAMETOOLONG; -+ return -1; -+ } -+ fd = socket(AF_UNIX, SOCK_STREAM, 0); -+ if (fd >= 0) { -+ /* -+ * We must bind the socket to the underlying file -+ * system to create the socket file, even though -+ * we'll never listen on this socket. -+ */ -+ su.sun_family = AF_UNIX; -+ strncpy(su.sun_path, path, sizeof(su.sun_path)); -+ res = bindat(dirfd, fd, (struct sockaddr*)&su, -+ sizeof(su)); -+ if (res == 0) -+ close(fd); -+ } else { -+ res = -1; -+ } -+#endif -+ } else { -+ res = mknodat(dirfd, path, mode, rdev); -+ } -+ -+ return res; -+} --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Remove-fuse_req_getgroups.patch b/SOURCES/kvm-virtiofsd-Remove-fuse_req_getgroups.patch deleted file mode 100644 index 27e71f2..0000000 --- a/SOURCES/kvm-virtiofsd-Remove-fuse_req_getgroups.patch +++ /dev/null @@ -1,193 +0,0 @@ -From 7a1860c83ff042f3e796c449e780ee0528107213 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 3 Mar 2020 18:43:08 +0000 -Subject: [PATCH 12/18] virtiofsd: Remove fuse_req_getgroups -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200303184314.155564-2-dgilbert@redhat.com> -Patchwork-id: 94122 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/7] virtiofsd: Remove fuse_req_getgroups -Bugzilla: 1797064 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Ján Tomko - -From: "Dr. David Alan Gilbert" - -Remove fuse_req_getgroups that's unused in virtiofsd; it came in -from libfuse but we don't actually use it. It was called from -fuse_getgroups which we previously removed (but had left it's header -in). - -Coverity had complained about null termination in it, but removing -it is the easiest answer. - -Fixes: Coverity CID: 1413117 (String not null terminated) -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit 988717b46b6424907618cb845ace9d69062703af) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/fuse.h | 20 ----------- - tools/virtiofsd/fuse_lowlevel.c | 77 ----------------------------------------- - tools/virtiofsd/fuse_lowlevel.h | 21 ----------- - 3 files changed, 118 deletions(-) - -diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h -index 7a4c713..aba13fe 100644 ---- a/tools/virtiofsd/fuse.h -+++ b/tools/virtiofsd/fuse.h -@@ -1007,26 +1007,6 @@ void fuse_exit(struct fuse *f); - struct fuse_context *fuse_get_context(void); - - /** -- * Get the current supplementary group IDs for the current request -- * -- * Similar to the getgroups(2) system call, except the return value is -- * always the total number of group IDs, even if it is larger than the -- * specified size. -- * -- * The current fuse kernel module in linux (as of 2.6.30) doesn't pass -- * the group list to userspace, hence this function needs to parse -- * "/proc/$TID/task/$TID/status" to get the group IDs. -- * -- * This feature may not be supported on all operating systems. In -- * such a case this function will return -ENOSYS. -- * -- * @param size size of given array -- * @param list array of group IDs to be filled in -- * @return the total number of supplementary group IDs or -errno on failure -- */ --int fuse_getgroups(int size, gid_t list[]); -- --/** - * Check if the current request has already been interrupted - * - * @return 1 if the request has been interrupted, 0 otherwise -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index de2e2e0..01c418a 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2667,83 +2667,6 @@ int fuse_lowlevel_is_virtio(struct fuse_session *se) - return !!se->virtio_dev; - } - --#ifdef linux --int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) --{ -- char *buf; -- size_t bufsize = 1024; -- char path[128]; -- int ret; -- int fd; -- unsigned long pid = req->ctx.pid; -- char *s; -- -- sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); -- --retry: -- buf = malloc(bufsize); -- if (buf == NULL) { -- return -ENOMEM; -- } -- -- ret = -EIO; -- fd = open(path, O_RDONLY); -- if (fd == -1) { -- goto out_free; -- } -- -- ret = read(fd, buf, bufsize); -- close(fd); -- if (ret < 0) { -- ret = -EIO; -- goto out_free; -- } -- -- if ((size_t)ret == bufsize) { -- free(buf); -- bufsize *= 4; -- goto retry; -- } -- -- ret = -EIO; -- s = strstr(buf, "\nGroups:"); -- if (s == NULL) { -- goto out_free; -- } -- -- s += 8; -- ret = 0; -- while (1) { -- char *end; -- unsigned long val = strtoul(s, &end, 0); -- if (end == s) { -- break; -- } -- -- s = end; -- if (ret < size) { -- list[ret] = val; -- } -- ret++; -- } -- --out_free: -- free(buf); -- return ret; --} --#else /* linux */ --/* -- * This is currently not implemented on other than Linux... -- */ --int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) --{ -- (void)req; -- (void)size; -- (void)list; -- return -ENOSYS; --} --#endif -- - void fuse_session_exit(struct fuse_session *se) - { - se->exited = 1; -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 138041e..8f6d705 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1705,27 +1705,6 @@ void *fuse_req_userdata(fuse_req_t req); - const struct fuse_ctx *fuse_req_ctx(fuse_req_t req); - - /** -- * Get the current supplementary group IDs for the specified request -- * -- * Similar to the getgroups(2) system call, except the return value is -- * always the total number of group IDs, even if it is larger than the -- * specified size. -- * -- * The current fuse kernel module in linux (as of 2.6.30) doesn't pass -- * the group list to userspace, hence this function needs to parse -- * "/proc/$TID/task/$TID/status" to get the group IDs. -- * -- * This feature may not be supported on all operating systems. In -- * such a case this function will return -ENOSYS. -- * -- * @param req request handle -- * @param size size of given array -- * @param list array of group IDs to be filled in -- * @return the total number of supplementary group IDs or -errno on failure -- */ --int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]); -- --/** - * Callback function for an interrupt - * - * @param req interrupted request --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch b/SOURCES/kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch deleted file mode 100644 index 7f9c5bb..0000000 --- a/SOURCES/kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch +++ /dev/null @@ -1,271 +0,0 @@ -From 80237df2b22eca685037456e65d149fed4654165 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:48 +0100 -Subject: [PATCH 017/116] virtiofsd: Remove unused enum fuse_buf_copy_flags -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-14-dgilbert@redhat.com> -Patchwork-id: 93465 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 013/112] virtiofsd: Remove unused enum fuse_buf_copy_flags -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Xiao Yang - -Signed-off-by: Xiao Yang -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 8c3fe75e0308ba2f01d160ace534b7e386cea808) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 7 +++--- - tools/virtiofsd/fuse_common.h | 46 +--------------------------------------- - tools/virtiofsd/fuse_lowlevel.c | 13 +++++------- - tools/virtiofsd/fuse_lowlevel.h | 35 ++---------------------------- - tools/virtiofsd/passthrough_ll.c | 4 ++-- - 5 files changed, 13 insertions(+), 92 deletions(-) - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index 5df946c..4d507f3 100644 ---- a/tools/virtiofsd/buffer.c -+++ b/tools/virtiofsd/buffer.c -@@ -171,7 +171,7 @@ static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, - - static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, - const struct fuse_buf *src, size_t src_off, -- size_t len, enum fuse_buf_copy_flags flags) -+ size_t len) - { - int src_is_fd = src->flags & FUSE_BUF_IS_FD; - int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; -@@ -224,8 +224,7 @@ static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) - return 1; - } - --ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, -- enum fuse_buf_copy_flags flags) -+ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv) - { - size_t copied = 0; - -@@ -249,7 +248,7 @@ ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, - dst_len = dst->size - dstv->off; - len = min_size(src_len, dst_len); - -- res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); -+ res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len); - if (res < 0) { - if (!copied) { - return res; -diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h -index bd9bf86..0cb33ac 100644 ---- a/tools/virtiofsd/fuse_common.h -+++ b/tools/virtiofsd/fuse_common.h -@@ -605,48 +605,6 @@ enum fuse_buf_flags { - }; - - /** -- * Buffer copy flags -- */ --enum fuse_buf_copy_flags { -- /** -- * Don't use splice(2) -- * -- * Always fall back to using read and write instead of -- * splice(2) to copy data from one file descriptor to another. -- * -- * If this flag is not set, then only fall back if splice is -- * unavailable. -- */ -- FUSE_BUF_NO_SPLICE = (1 << 1), -- -- /** -- * Force splice -- * -- * Always use splice(2) to copy data from one file descriptor -- * to another. If splice is not available, return -EINVAL. -- */ -- FUSE_BUF_FORCE_SPLICE = (1 << 2), -- -- /** -- * Try to move data with splice. -- * -- * If splice is used, try to move pages from the source to the -- * destination instead of copying. See documentation of -- * SPLICE_F_MOVE in splice(2) man page. -- */ -- FUSE_BUF_SPLICE_MOVE = (1 << 3), -- -- /** -- * Don't block on the pipe when copying data with splice -- * -- * Makes the operations on the pipe non-blocking (if the pipe -- * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) -- * man page. -- */ -- FUSE_BUF_SPLICE_NONBLOCK = (1 << 4), --}; -- --/** - * Single data buffer - * - * Generic data buffer for I/O, extended attributes, etc... Data may -@@ -741,11 +699,9 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv); - * - * @param dst destination buffer vector - * @param src source buffer vector -- * @param flags flags controlling the copy - * @return actual number of bytes copied or -errno on error - */ --ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src, -- enum fuse_buf_copy_flags flags); -+ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src); - - /* - * Signal handling -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index eb0ec49..3da80de 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -490,16 +490,14 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, - - static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - struct iovec *iov, int iov_count, -- struct fuse_bufvec *buf, unsigned int flags) -+ struct fuse_bufvec *buf) - { - size_t len = fuse_buf_size(buf); -- (void)flags; - - return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); - } - --int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags) -+int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv) - { - struct iovec iov[2]; - struct fuse_out_header out; -@@ -511,7 +509,7 @@ int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, - out.unique = req->unique; - out.error = 0; - -- res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); -+ res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv); - if (res <= 0) { - fuse_free_req(req); - return res; -@@ -1969,8 +1967,7 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, - } - - int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, -- off_t offset, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags) -+ off_t offset, struct fuse_bufvec *bufv) - { - struct fuse_out_header out; - struct fuse_notify_store_out outarg; -@@ -1999,7 +1996,7 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - -- res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); -+ res = fuse_send_data_iov(se, NULL, iov, 2, bufv); - if (res > 0) { - res = -res; - } -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 12a84b4..2fa225d 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1363,33 +1363,6 @@ int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); - /** - * Reply with data copied/moved from buffer(s) - * -- * Zero copy data transfer ("splicing") will be used under -- * the following circumstances: -- * -- * 1. FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.want, and -- * 2. the kernel supports splicing from the fuse device -- * (FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.capable), and -- * 3. *flags* does not contain FUSE_BUF_NO_SPLICE -- * 4. The amount of data that is provided in file-descriptor backed -- * buffers (i.e., buffers for which bufv[n].flags == FUSE_BUF_FD) -- * is at least twice the page size. -- * -- * In order for SPLICE_F_MOVE to be used, the following additional -- * conditions have to be fulfilled: -- * -- * 1. FUSE_CAP_SPLICE_MOVE is set in fuse_conn_info.want, and -- * 2. the kernel supports it (i.e, FUSE_CAP_SPLICE_MOVE is set in -- fuse_conn_info.capable), and -- * 3. *flags* contains FUSE_BUF_SPLICE_MOVE -- * -- * Note that, if splice is used, the data is actually spliced twice: -- * once into a temporary pipe (to prepend header data), and then again -- * into the kernel. If some of the provided buffers are memory-backed, -- * the data in them is copied in step one and spliced in step two. -- * -- * The FUSE_BUF_SPLICE_FORCE_SPLICE and FUSE_BUF_SPLICE_NONBLOCK flags -- * are silently ignored. -- * - * Possible requests: - * read, readdir, getxattr, listxattr - * -@@ -1400,11 +1373,9 @@ int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); - * - * @param req request handle - * @param bufv buffer vector -- * @param flags flags controlling the copy - * @return zero for success, -errno for failure to send reply - */ --int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags); -+int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv); - - /** - * Reply with data vector -@@ -1705,12 +1676,10 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, - * @param ino the inode number - * @param offset the starting offset into the file to store to - * @param bufv buffer vector -- * @param flags flags controlling the copy - * @return zero for success, -errno for failure - */ - int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, -- off_t offset, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags); -+ off_t offset, struct fuse_bufvec *bufv); - - /* - * Utility functions -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 9377718..126a56c 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -931,7 +931,7 @@ static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, - buf.buf[0].fd = fi->fh; - buf.buf[0].pos = offset; - -- fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); -+ fuse_reply_data(req, &buf); - } - - static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, -@@ -952,7 +952,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, - out_buf.buf[0].size, (unsigned long)off); - } - -- res = fuse_buf_copy(&out_buf, in_buf, 0); -+ res = fuse_buf_copy(&out_buf, in_buf); - if (res < 0) { - fuse_reply_err(req, -res); - } else { --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch b/SOURCES/kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch deleted file mode 100644 index e1a3cd1..0000000 --- a/SOURCES/kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch +++ /dev/null @@ -1,72 +0,0 @@ -From b8d62021f28114f054571b96ec0cd4dad4476923 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:14 +0100 -Subject: [PATCH 103/116] virtiofsd: Reset O_DIRECT flag during file open -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-100-dgilbert@redhat.com> -Patchwork-id: 93553 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 099/112] virtiofsd: Reset O_DIRECT flag during file open -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Vivek Goyal - -If an application wants to do direct IO and opens a file with O_DIRECT -in guest, that does not necessarily mean that we need to bypass page -cache on host as well. So reset this flag on host. - -If somebody needs to bypass page cache on host as well (and it is safe to -do so), we can add a knob in daemon later to control this behavior. - -I check virtio-9p and they do reset O_DIRECT flag. - -Signed-off-by: Vivek Goyal -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 65da4539803373ec4eec97ffc49ee90083e56efd) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 14 ++++++++++++++ - 1 file changed, 14 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index ccbbec1..948cb19 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1721,6 +1721,13 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - goto out; - } - -+ /* -+ * O_DIRECT in guest should not necessarily mean bypassing page -+ * cache on host as well. If somebody needs that behavior, it -+ * probably should be a configuration knob in daemon. -+ */ -+ fi->flags &= ~O_DIRECT; -+ - fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, - mode); - err = fd == -1 ? errno : 0; -@@ -1950,6 +1957,13 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - fi->flags &= ~O_APPEND; - } - -+ /* -+ * O_DIRECT in guest should not necessarily mean bypassing page -+ * cache on host as well. If somebody needs that behavior, it -+ * probably should be a configuration knob in daemon. -+ */ -+ fi->flags &= ~O_DIRECT; -+ - sprintf(buf, "%i", lo_fd(req, ino)); - fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); - if (fd == -1) { --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Send-replies-to-messages.patch b/SOURCES/kvm-virtiofsd-Send-replies-to-messages.patch deleted file mode 100644 index 5453fda..0000000 --- a/SOURCES/kvm-virtiofsd-Send-replies-to-messages.patch +++ /dev/null @@ -1,199 +0,0 @@ -From bb1f691dc410ce11ac9675ced70e78a3ce2511b0 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:03 +0100 -Subject: [PATCH 032/116] virtiofsd: Send replies to messages -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-29-dgilbert@redhat.com> -Patchwork-id: 93485 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 028/112] virtiofsd: Send replies to messages -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Route fuse out messages back through the same queue elements -that had the command that triggered the request. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit df57ba919ec3edef9cc208d35685095e6e92713e) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 4 ++ - tools/virtiofsd/fuse_virtio.c | 107 ++++++++++++++++++++++++++++++++++++++-- - tools/virtiofsd/fuse_virtio.h | 4 ++ - 3 files changed, 111 insertions(+), 4 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index af09fa2..380d93b 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -171,6 +171,10 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, - } - } - -+ if (fuse_lowlevel_is_virtio(se)) { -+ return virtio_send_msg(se, ch, iov, count); -+ } -+ - abort(); /* virtio should have taken it before here */ - return 0; - } -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 3841b20..05d0e29 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -41,6 +41,9 @@ struct fv_QueueInfo { - /* Our queue index, corresponds to array position */ - int qidx; - int kick_fd; -+ -+ /* The element for the command currently being processed */ -+ VuVirtqElement *qe; - }; - - /* -@@ -121,6 +124,105 @@ static void copy_from_iov(struct fuse_buf *buf, size_t out_num, - } - } - -+/* -+ * Copy from one iov to another, the given number of bytes -+ * The caller must have checked sizes. -+ */ -+static void copy_iov(struct iovec *src_iov, int src_count, -+ struct iovec *dst_iov, int dst_count, size_t to_copy) -+{ -+ size_t dst_offset = 0; -+ /* Outer loop copies 'src' elements */ -+ while (to_copy) { -+ assert(src_count); -+ size_t src_len = src_iov[0].iov_len; -+ size_t src_offset = 0; -+ -+ if (src_len > to_copy) { -+ src_len = to_copy; -+ } -+ /* Inner loop copies contents of one 'src' to maybe multiple dst. */ -+ while (src_len) { -+ assert(dst_count); -+ size_t dst_len = dst_iov[0].iov_len - dst_offset; -+ if (dst_len > src_len) { -+ dst_len = src_len; -+ } -+ -+ memcpy(dst_iov[0].iov_base + dst_offset, -+ src_iov[0].iov_base + src_offset, dst_len); -+ src_len -= dst_len; -+ to_copy -= dst_len; -+ src_offset += dst_len; -+ dst_offset += dst_len; -+ -+ assert(dst_offset <= dst_iov[0].iov_len); -+ if (dst_offset == dst_iov[0].iov_len) { -+ dst_offset = 0; -+ dst_iov++; -+ dst_count--; -+ } -+ } -+ src_iov++; -+ src_count--; -+ } -+} -+ -+/* -+ * Called back by ll whenever it wants to send a reply/message back -+ * The 1st element of the iov starts with the fuse_out_header -+ * 'unique'==0 means it's a notify message. -+ */ -+int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, -+ struct iovec *iov, int count) -+{ -+ VuVirtqElement *elem; -+ VuVirtq *q; -+ -+ assert(count >= 1); -+ assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); -+ -+ struct fuse_out_header *out = iov[0].iov_base; -+ /* TODO: Endianness! */ -+ -+ size_t tosend_len = iov_size(iov, count); -+ -+ /* unique == 0 is notification, which we don't support */ -+ assert(out->unique); -+ /* For virtio we always have ch */ -+ assert(ch); -+ elem = ch->qi->qe; -+ q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; -+ -+ /* The 'in' part of the elem is to qemu */ -+ unsigned int in_num = elem->in_num; -+ struct iovec *in_sg = elem->in_sg; -+ size_t in_len = iov_size(in_sg, in_num); -+ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n", -+ __func__, elem->index, in_num, in_len); -+ -+ /* -+ * The elem should have room for a 'fuse_out_header' (out from fuse) -+ * plus the data based on the len in the header. -+ */ -+ if (in_len < sizeof(struct fuse_out_header)) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", -+ __func__, elem->index); -+ return -E2BIG; -+ } -+ if (in_len < tosend_len) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n", -+ __func__, elem->index, tosend_len); -+ return -E2BIG; -+ } -+ -+ copy_iov(iov, count, in_sg, in_num, tosend_len); -+ vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); -+ vu_queue_notify(&se->virtio_dev->dev, q); -+ -+ return 0; -+} -+ - /* Thread function for individual queues, created when a queue is 'started' */ - static void *fv_queue_thread(void *opaque) - { -@@ -226,13 +328,10 @@ static void *fv_queue_thread(void *opaque) - - /* TODO! Endianness of header */ - -- /* TODO: Fixup fuse_send_msg */ - /* TODO: Add checks for fuse_session_exited */ - fuse_session_process_buf_int(se, &fbuf, &ch); - -- /* TODO: vu_queue_push(dev, q, elem, qi->write_count); */ -- vu_queue_notify(dev, q); -- -+ qi->qe = NULL; - free(elem); - elem = NULL; - } -diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h -index 23026d6..135a148 100644 ---- a/tools/virtiofsd/fuse_virtio.h -+++ b/tools/virtiofsd/fuse_virtio.h -@@ -22,4 +22,8 @@ int virtio_session_mount(struct fuse_session *se); - - int virtio_loop(struct fuse_session *se); - -+ -+int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, -+ struct iovec *iov, int count); -+ - #endif --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Show-submounts.patch b/SOURCES/kvm-virtiofsd-Show-submounts.patch deleted file mode 100644 index d45a030..0000000 --- a/SOURCES/kvm-virtiofsd-Show-submounts.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 717373379510cd6ecf8c6d0e1aae65edfac4551d Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 5 May 2020 16:35:58 +0100 -Subject: [PATCH 7/9] virtiofsd: Show submounts - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200505163600.22956-6-dgilbert@redhat.com> -Patchwork-id: 96273 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 5/7] virtiofsd: Show submounts -Bugzilla: 1817445 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz -RH-Acked-by: Michael S. Tsirkin - -From: Max Reitz - -Currently, setup_mounts() bind-mounts the shared directory without -MS_REC. This makes all submounts disappear. - -Pass MS_REC so that the guest can see submounts again. - -Fixes: 5baa3b8e95064c2434bd9e2f312edd5e9ae275dc -Signed-off-by: Max Reitz -Message-Id: <20200424133516.73077-1-mreitz@redhat.com> -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert - Changed Fixes to point to the commit with the problem rather than - the commit that turned it on -(cherry picked from commit ace0829c0d08f0e5f1451e402e94495bc2166772) - -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/passthrough_ll.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 73d8405..614ba55 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2670,7 +2670,7 @@ static void setup_mounts(const char *source) - int oldroot; - int newroot; - -- if (mount(source, source, NULL, MS_BIND, NULL) < 0) { -+ if (mount(source, source, NULL, MS_BIND | MS_REC, NULL) < 0) { - fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); - exit(1); - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Start-queue-threads.patch b/SOURCES/kvm-virtiofsd-Start-queue-threads.patch deleted file mode 100644 index 8b03cd6..0000000 --- a/SOURCES/kvm-virtiofsd-Start-queue-threads.patch +++ /dev/null @@ -1,165 +0,0 @@ -From 38282d996cde61261211160577b366b83cad8012 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:00 +0100 -Subject: [PATCH 029/116] virtiofsd: Start queue threads -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-26-dgilbert@redhat.com> -Patchwork-id: 93479 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 025/112] virtiofsd: Start queue threads -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Start a thread for each queue when we get notified it's been started. - -Signed-off-by: Dr. David Alan Gilbert -fix by: -Signed-off-by: Jun Piao -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit e4c55a3c144493b436e40031e2eed61a84eca47b) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 89 +++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 89 insertions(+) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 4819e56..2a94bb3 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -11,6 +11,7 @@ - * See the file COPYING.LIB - */ - -+#include "qemu/osdep.h" - #include "fuse_virtio.h" - #include "fuse_i.h" - #include "standard-headers/linux/fuse.h" -@@ -30,6 +31,15 @@ - - #include "contrib/libvhost-user/libvhost-user.h" - -+struct fv_QueueInfo { -+ pthread_t thread; -+ struct fv_VuDev *virtio_dev; -+ -+ /* Our queue index, corresponds to array position */ -+ int qidx; -+ int kick_fd; -+}; -+ - /* - * We pass the dev element into libvhost-user - * and then use it to get back to the outer -@@ -38,6 +48,13 @@ - struct fv_VuDev { - VuDev dev; - struct fuse_session *se; -+ -+ /* -+ * The following pair of fields are only accessed in the main -+ * virtio_loop -+ */ -+ size_t nqueues; -+ struct fv_QueueInfo **qi; - }; - - /* From spec */ -@@ -83,6 +100,75 @@ static void fv_panic(VuDev *dev, const char *err) - exit(EXIT_FAILURE); - } - -+static void *fv_queue_thread(void *opaque) -+{ -+ struct fv_QueueInfo *qi = opaque; -+ fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, -+ qi->qidx, qi->kick_fd); -+ while (1) { -+ /* TODO */ -+ } -+ -+ return NULL; -+} -+ -+/* Callback from libvhost-user on start or stop of a queue */ -+static void fv_queue_set_started(VuDev *dev, int qidx, bool started) -+{ -+ struct fv_VuDev *vud = container_of(dev, struct fv_VuDev, dev); -+ struct fv_QueueInfo *ourqi; -+ -+ fuse_log(FUSE_LOG_INFO, "%s: qidx=%d started=%d\n", __func__, qidx, -+ started); -+ assert(qidx >= 0); -+ -+ /* -+ * Ignore additional request queues for now. passthrough_ll.c must be -+ * audited for thread-safety issues first. It was written with a -+ * well-behaved client in mind and may not protect against all types of -+ * races yet. -+ */ -+ if (qidx > 1) { -+ fuse_log(FUSE_LOG_ERR, -+ "%s: multiple request queues not yet implemented, please only " -+ "configure 1 request queue\n", -+ __func__); -+ exit(EXIT_FAILURE); -+ } -+ -+ if (started) { -+ /* Fire up a thread to watch this queue */ -+ if (qidx >= vud->nqueues) { -+ vud->qi = realloc(vud->qi, (qidx + 1) * sizeof(vud->qi[0])); -+ assert(vud->qi); -+ memset(vud->qi + vud->nqueues, 0, -+ sizeof(vud->qi[0]) * (1 + (qidx - vud->nqueues))); -+ vud->nqueues = qidx + 1; -+ } -+ if (!vud->qi[qidx]) { -+ vud->qi[qidx] = calloc(sizeof(struct fv_QueueInfo), 1); -+ assert(vud->qi[qidx]); -+ vud->qi[qidx]->virtio_dev = vud; -+ vud->qi[qidx]->qidx = qidx; -+ } else { -+ /* Shouldn't have been started */ -+ assert(vud->qi[qidx]->kick_fd == -1); -+ } -+ ourqi = vud->qi[qidx]; -+ ourqi->kick_fd = dev->vq[qidx].kick_fd; -+ if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) { -+ fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n", -+ __func__, qidx); -+ assert(0); -+ } -+ } else { -+ /* TODO: Kill the thread */ -+ assert(qidx < vud->nqueues); -+ ourqi = vud->qi[qidx]; -+ ourqi->kick_fd = -1; -+ } -+} -+ - static bool fv_queue_order(VuDev *dev, int qidx) - { - return false; -@@ -92,6 +178,9 @@ static const VuDevIface fv_iface = { - .get_features = fv_get_features, - .set_features = fv_set_features, - -+ /* Don't need process message, we've not got any at vhost-user level */ -+ .queue_set_started = fv_queue_set_started, -+ - .queue_is_processed_in_order = fv_queue_order, - }; - --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Start-reading-commands-from-queue.patch b/SOURCES/kvm-virtiofsd-Start-reading-commands-from-queue.patch deleted file mode 100644 index 2022480..0000000 --- a/SOURCES/kvm-virtiofsd-Start-reading-commands-from-queue.patch +++ /dev/null @@ -1,200 +0,0 @@ -From b4af2eff8ecadb4e2c9520602455f77fac2cb943 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:02 +0100 -Subject: [PATCH 031/116] virtiofsd: Start reading commands from queue -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-28-dgilbert@redhat.com> -Patchwork-id: 93484 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 027/112] virtiofsd: Start reading commands from queue -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Pop queue elements off queues, copy the data from them and -pass that to fuse. - - Note: 'out' in a VuVirtqElement is from QEMU - 'in' in libfuse is into the daemon - - So we read from the out iov's to get a fuse_in_header - -When we get a kick we've got to read all the elements until the queue -is empty. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit b509e1228b3e5eb83c14819045988999fc2dbd1b) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 2 + - tools/virtiofsd/fuse_virtio.c | 99 +++++++++++++++++++++++++++++++++++++++++-- - 2 files changed, 98 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index ec04449..1126723 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -14,6 +14,7 @@ - #include "fuse_lowlevel.h" - - struct fv_VuDev; -+struct fv_QueueInfo; - - struct fuse_req { - struct fuse_session *se; -@@ -75,6 +76,7 @@ struct fuse_chan { - pthread_mutex_t lock; - int ctr; - int fd; -+ struct fv_QueueInfo *qi; - }; - - /** -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 05e7258..3841b20 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -12,6 +12,7 @@ - */ - - #include "qemu/osdep.h" -+#include "qemu/iov.h" - #include "fuse_virtio.h" - #include "fuse_i.h" - #include "standard-headers/linux/fuse.h" -@@ -32,6 +33,7 @@ - - #include "contrib/libvhost-user/libvhost-user.h" - -+struct fv_VuDev; - struct fv_QueueInfo { - pthread_t thread; - struct fv_VuDev *virtio_dev; -@@ -101,10 +103,41 @@ static void fv_panic(VuDev *dev, const char *err) - exit(EXIT_FAILURE); - } - -+/* -+ * Copy from an iovec into a fuse_buf (memory only) -+ * Caller must ensure there is space -+ */ -+static void copy_from_iov(struct fuse_buf *buf, size_t out_num, -+ const struct iovec *out_sg) -+{ -+ void *dest = buf->mem; -+ -+ while (out_num) { -+ size_t onelen = out_sg->iov_len; -+ memcpy(dest, out_sg->iov_base, onelen); -+ dest += onelen; -+ out_sg++; -+ out_num--; -+ } -+} -+ - /* Thread function for individual queues, created when a queue is 'started' */ - static void *fv_queue_thread(void *opaque) - { - struct fv_QueueInfo *qi = opaque; -+ struct VuDev *dev = &qi->virtio_dev->dev; -+ struct VuVirtq *q = vu_get_queue(dev, qi->qidx); -+ struct fuse_session *se = qi->virtio_dev->se; -+ struct fuse_chan ch; -+ struct fuse_buf fbuf; -+ -+ fbuf.mem = NULL; -+ fbuf.flags = 0; -+ -+ fuse_mutex_init(&ch.lock); -+ ch.fd = (int)0xdaff0d111; -+ ch.qi = qi; -+ - fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, - qi->qidx, qi->kick_fd); - while (1) { -@@ -141,11 +174,71 @@ static void *fv_queue_thread(void *opaque) - fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n"); - break; - } -- if (qi->virtio_dev->se->debug) { -- fprintf(stderr, "%s: Queue %d gave evalue: %zx\n", __func__, -- qi->qidx, (size_t)evalue); -+ /* out is from guest, in is too guest */ -+ unsigned int in_bytes, out_bytes; -+ vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0); -+ -+ fuse_log(FUSE_LOG_DEBUG, -+ "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n", -+ __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); -+ -+ while (1) { -+ /* -+ * An element contains one request and the space to send our -+ * response They're spread over multiple descriptors in a -+ * scatter/gather set and we can't trust the guest to keep them -+ * still; so copy in/out. -+ */ -+ VuVirtqElement *elem = vu_queue_pop(dev, q, sizeof(VuVirtqElement)); -+ if (!elem) { -+ break; -+ } -+ -+ if (!fbuf.mem) { -+ fbuf.mem = malloc(se->bufsize); -+ assert(fbuf.mem); -+ assert(se->bufsize > sizeof(struct fuse_in_header)); -+ } -+ /* The 'out' part of the elem is from qemu */ -+ unsigned int out_num = elem->out_num; -+ struct iovec *out_sg = elem->out_sg; -+ size_t out_len = iov_size(out_sg, out_num); -+ fuse_log(FUSE_LOG_DEBUG, -+ "%s: elem %d: with %d out desc of length %zd\n", __func__, -+ elem->index, out_num, out_len); -+ -+ /* -+ * The elem should contain a 'fuse_in_header' (in to fuse) -+ * plus the data based on the len in the header. -+ */ -+ if (out_len < sizeof(struct fuse_in_header)) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n", -+ __func__, elem->index); -+ assert(0); /* TODO */ -+ } -+ if (out_len > se->bufsize) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", -+ __func__, elem->index); -+ assert(0); /* TODO */ -+ } -+ copy_from_iov(&fbuf, out_num, out_sg); -+ fbuf.size = out_len; -+ -+ /* TODO! Endianness of header */ -+ -+ /* TODO: Fixup fuse_send_msg */ -+ /* TODO: Add checks for fuse_session_exited */ -+ fuse_session_process_buf_int(se, &fbuf, &ch); -+ -+ /* TODO: vu_queue_push(dev, q, elem, qi->write_count); */ -+ vu_queue_notify(dev, q); -+ -+ free(elem); -+ elem = NULL; - } - } -+ pthread_mutex_destroy(&ch.lock); -+ free(fbuf.mem); - - return NULL; - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Start-wiring-up-vhost-user.patch b/SOURCES/kvm-virtiofsd-Start-wiring-up-vhost-user.patch deleted file mode 100644 index 7b50118..0000000 --- a/SOURCES/kvm-virtiofsd-Start-wiring-up-vhost-user.patch +++ /dev/null @@ -1,247 +0,0 @@ -From 020f593031b0b54e4c35faffea489b700aed6a72 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:57 +0100 -Subject: [PATCH 026/116] virtiofsd: Start wiring up vhost-user -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-23-dgilbert@redhat.com> -Patchwork-id: 93477 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 022/112] virtiofsd: Start wiring up vhost-user -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Listen on our unix socket for the connection from QEMU, when we get it -initialise vhost-user and dive into our own loop variant (currently -dummy). - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit f6f3573c6f271af5ded63ce28589a113f7205c72) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 4 ++ - tools/virtiofsd/fuse_lowlevel.c | 5 +++ - tools/virtiofsd/fuse_lowlevel.h | 7 ++++ - tools/virtiofsd/fuse_virtio.c | 87 +++++++++++++++++++++++++++++++++++++++- - tools/virtiofsd/fuse_virtio.h | 2 + - tools/virtiofsd/passthrough_ll.c | 7 +--- - 6 files changed, 106 insertions(+), 6 deletions(-) - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index 82d6ac7..ec04449 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -13,6 +13,8 @@ - #include "fuse.h" - #include "fuse_lowlevel.h" - -+struct fv_VuDev; -+ - struct fuse_req { - struct fuse_session *se; - uint64_t unique; -@@ -65,6 +67,8 @@ struct fuse_session { - size_t bufsize; - int error; - char *vu_socket_path; -+ int vu_socketfd; -+ struct fv_VuDev *virtio_dev; - }; - - struct fuse_chan { -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 5df124e..af09fa2 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2242,6 +2242,11 @@ void fuse_session_unmount(struct fuse_session *se) - { - } - -+int fuse_lowlevel_is_virtio(struct fuse_session *se) -+{ -+ return se->vu_socket_path != NULL; -+} -+ - #ifdef linux - int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) - { -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 2fa225d..f6b3470 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1755,6 +1755,13 @@ void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, - */ - int fuse_req_interrupted(fuse_req_t req); - -+/** -+ * Check if the session is connected via virtio -+ * -+ * @param se session object -+ * @return 1 if the session is a virtio session -+ */ -+int fuse_lowlevel_is_virtio(struct fuse_session *se); - - /* - * Inquiry functions -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index cbef6ff..2ae3c76 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -19,18 +19,78 @@ - - #include - #include -+#include - #include - #include - #include - #include - #include - -+#include "contrib/libvhost-user/libvhost-user.h" -+ -+/* -+ * We pass the dev element into libvhost-user -+ * and then use it to get back to the outer -+ * container for other data. -+ */ -+struct fv_VuDev { -+ VuDev dev; -+ struct fuse_session *se; -+}; -+ - /* From spec */ - struct virtio_fs_config { - char tag[36]; - uint32_t num_queues; - }; - -+/* -+ * Callback from libvhost-user if there's a new fd we're supposed to listen -+ * to, typically a queue kick? -+ */ -+static void fv_set_watch(VuDev *dev, int fd, int condition, vu_watch_cb cb, -+ void *data) -+{ -+ fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd); -+} -+ -+/* -+ * Callback from libvhost-user if we're no longer supposed to listen on an fd -+ */ -+static void fv_remove_watch(VuDev *dev, int fd) -+{ -+ fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd); -+} -+ -+/* Callback from libvhost-user to panic */ -+static void fv_panic(VuDev *dev, const char *err) -+{ -+ fuse_log(FUSE_LOG_ERR, "%s: libvhost-user: %s\n", __func__, err); -+ /* TODO: Allow reconnects?? */ -+ exit(EXIT_FAILURE); -+} -+ -+static bool fv_queue_order(VuDev *dev, int qidx) -+{ -+ return false; -+} -+ -+static const VuDevIface fv_iface = { -+ /* TODO: Add other callbacks */ -+ .queue_is_processed_in_order = fv_queue_order, -+}; -+ -+int virtio_loop(struct fuse_session *se) -+{ -+ fuse_log(FUSE_LOG_INFO, "%s: Entry\n", __func__); -+ -+ while (1) { -+ /* TODO: Add stuffing */ -+ } -+ -+ fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__); -+} -+ - int virtio_session_mount(struct fuse_session *se) - { - struct sockaddr_un un; -@@ -75,5 +135,30 @@ int virtio_session_mount(struct fuse_session *se) - return -1; - } - -- return -1; -+ fuse_log(FUSE_LOG_INFO, "%s: Waiting for vhost-user socket connection...\n", -+ __func__); -+ int data_sock = accept(listen_sock, NULL, NULL); -+ if (data_sock == -1) { -+ fuse_log(FUSE_LOG_ERR, "vhost socket accept: %m\n"); -+ close(listen_sock); -+ return -1; -+ } -+ close(listen_sock); -+ fuse_log(FUSE_LOG_INFO, "%s: Received vhost-user socket connection\n", -+ __func__); -+ -+ /* TODO: Some cleanup/deallocation! */ -+ se->virtio_dev = calloc(sizeof(struct fv_VuDev), 1); -+ if (!se->virtio_dev) { -+ fuse_log(FUSE_LOG_ERR, "%s: virtio_dev calloc failed\n", __func__); -+ close(data_sock); -+ return -1; -+ } -+ -+ se->vu_socketfd = data_sock; -+ se->virtio_dev->se = se; -+ vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, fv_set_watch, -+ fv_remove_watch, &fv_iface); -+ -+ return 0; - } -diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h -index 8f2edb6..23026d6 100644 ---- a/tools/virtiofsd/fuse_virtio.h -+++ b/tools/virtiofsd/fuse_virtio.h -@@ -20,4 +20,6 @@ struct fuse_session; - - int virtio_session_mount(struct fuse_session *se); - -+int virtio_loop(struct fuse_session *se); -+ - #endif -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index fc9b264..037c5d7 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -36,6 +36,7 @@ - */ - - #include "qemu/osdep.h" -+#include "fuse_virtio.h" - #include "fuse_lowlevel.h" - #include - #include -@@ -1395,11 +1396,7 @@ int main(int argc, char *argv[]) - fuse_daemonize(opts.foreground); - - /* Block until ctrl+c or fusermount -u */ -- if (opts.singlethread) { -- ret = fuse_session_loop(se); -- } else { -- ret = fuse_session_loop_mt(se, opts.clone_fd); -- } -+ ret = virtio_loop(se); - - fuse_session_unmount(se); - err_out3: --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Support-remote-posix-locks.patch b/SOURCES/kvm-virtiofsd-Support-remote-posix-locks.patch deleted file mode 100644 index e60364a..0000000 --- a/SOURCES/kvm-virtiofsd-Support-remote-posix-locks.patch +++ /dev/null @@ -1,355 +0,0 @@ -From 8e46d0862c4c204f92c08ce2ae961921f270efb5 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:03 +0100 -Subject: [PATCH 092/116] virtiofsd: Support remote posix locks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-89-dgilbert@redhat.com> -Patchwork-id: 93537 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 088/112] virtiofsd: Support remote posix locks -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Vivek Goyal - -Doing posix locks with-in guest kernel are not sufficient if a file/dir -is being shared by multiple guests. So we need the notion of daemon doing -the locks which are visible to rest of the guests. - -Given posix locks are per process, one can not call posix lock API on host, -otherwise bunch of basic posix locks properties are broken. For example, -If two processes (A and B) in guest open the file and take locks on different -sections of file, if one of the processes closes the fd, it will close -fd on virtiofsd and all posix locks on file will go away. This means if -process A closes the fd, then locks of process B will go away too. - -Similar other problems exist too. - -This patch set tries to emulate posix locks while using open file -description locks provided on Linux. - -Daemon provides two options (-o posix_lock, -o no_posix_lock) to enable -or disable posix locking in daemon. By default it is enabled. - -There are few issues though. - -- GETLK() returns pid of process holding lock. As we are emulating locks - using OFD, and these locks are not per process and don't return pid - of process, so GETLK() in guest does not reuturn process pid. - -- As of now only F_SETLK is supported and not F_SETLKW. We can't block - the thread in virtiofsd for arbitrary long duration as there is only - one thread serving the queue. That means unlock request will not make - it to daemon and F_SETLKW will block infinitely and bring virtio-fs - to a halt. This is a solvable problem though and will require significant - changes in virtiofsd and kernel. Left as a TODO item for now. - -Signed-off-by: Vivek Goyal -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 0e81414c54161296212f6bc8a1c70526c4a9755a) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/helper.c | 3 + - tools/virtiofsd/passthrough_ll.c | 189 +++++++++++++++++++++++++++++++++++++++ - 2 files changed, 192 insertions(+) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 5672024..33749bf 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -156,6 +156,9 @@ void fuse_cmdline_help(void) - " allowed (default: 10)\n" - " -o norace disable racy fallback\n" - " default: false\n" -+ " -o posix_lock|no_posix_lock\n" -+ " enable/disable remote posix lock\n" -+ " default: posix_lock\n" - " -o readdirplus|no_readdirplus\n" - " enable/disable readirplus\n" - " default: readdirplus except with " -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 05b5f89..9414935 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -67,6 +67,12 @@ - #include "passthrough_helpers.h" - #include "seccomp.h" - -+/* Keep track of inode posix locks for each owner. */ -+struct lo_inode_plock { -+ uint64_t lock_owner; -+ int fd; /* fd for OFD locks */ -+}; -+ - struct lo_map_elem { - union { - struct lo_inode *inode; -@@ -95,6 +101,8 @@ struct lo_inode { - struct lo_key key; - uint64_t refcount; /* protected by lo->mutex */ - fuse_ino_t fuse_ino; -+ pthread_mutex_t plock_mutex; -+ GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ - }; - - struct lo_cred { -@@ -114,6 +122,7 @@ struct lo_data { - int norace; - int writeback; - int flock; -+ int posix_lock; - int xattr; - char *source; - double timeout; -@@ -137,6 +146,8 @@ static const struct fuse_opt lo_opts[] = { - { "source=%s", offsetof(struct lo_data, source), 0 }, - { "flock", offsetof(struct lo_data, flock), 1 }, - { "no_flock", offsetof(struct lo_data, flock), 0 }, -+ { "posix_lock", offsetof(struct lo_data, posix_lock), 1 }, -+ { "no_posix_lock", offsetof(struct lo_data, posix_lock), 0 }, - { "xattr", offsetof(struct lo_data, xattr), 1 }, - { "no_xattr", offsetof(struct lo_data, xattr), 0 }, - { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, -@@ -485,6 +496,17 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) - fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); - conn->want |= FUSE_CAP_FLOCK_LOCKS; - } -+ -+ if (conn->capable & FUSE_CAP_POSIX_LOCKS) { -+ if (lo->posix_lock) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating posix locks\n"); -+ conn->want |= FUSE_CAP_POSIX_LOCKS; -+ } else { -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix locks\n"); -+ conn->want &= ~FUSE_CAP_POSIX_LOCKS; -+ } -+ } -+ - if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) || - lo->readdirplus_clear) { - fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); -@@ -772,6 +794,19 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) - return p; - } - -+/* value_destroy_func for posix_locks GHashTable */ -+static void posix_locks_value_destroy(gpointer data) -+{ -+ struct lo_inode_plock *plock = data; -+ -+ /* -+ * We had used open() for locks and had only one fd. So -+ * closing this fd should release all OFD locks. -+ */ -+ close(plock->fd); -+ free(plock); -+} -+ - static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - struct fuse_entry_param *e) - { -@@ -825,6 +860,9 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - newfd = -1; - inode->key.ino = e->attr.st_ino; - inode->key.dev = e->attr.st_dev; -+ pthread_mutex_init(&inode->plock_mutex, NULL); -+ inode->posix_locks = g_hash_table_new_full( -+ g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); - - pthread_mutex_lock(&lo->mutex); - inode->fuse_ino = lo_add_inode_mapping(req, inode); -@@ -1160,6 +1198,11 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, - if (!inode->refcount) { - lo_map_remove(&lo->ino_map, inode->fuse_ino); - g_hash_table_remove(lo->inodes, &inode->key); -+ if (g_hash_table_size(inode->posix_locks)) { -+ fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n"); -+ } -+ g_hash_table_destroy(inode->posix_locks); -+ pthread_mutex_destroy(&inode->plock_mutex); - pthread_mutex_unlock(&lo->mutex); - close(inode->fd); - free(inode); -@@ -1516,6 +1559,136 @@ out: - } - } - -+/* Should be called with inode->plock_mutex held */ -+static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, -+ struct lo_inode *inode, -+ uint64_t lock_owner, -+ pid_t pid, int *err) -+{ -+ struct lo_inode_plock *plock; -+ char procname[64]; -+ int fd; -+ -+ plock = -+ g_hash_table_lookup(inode->posix_locks, GUINT_TO_POINTER(lock_owner)); -+ -+ if (plock) { -+ return plock; -+ } -+ -+ plock = malloc(sizeof(struct lo_inode_plock)); -+ if (!plock) { -+ *err = ENOMEM; -+ return NULL; -+ } -+ -+ /* Open another instance of file which can be used for ofd locks. */ -+ sprintf(procname, "%i", inode->fd); -+ -+ /* TODO: What if file is not writable? */ -+ fd = openat(lo->proc_self_fd, procname, O_RDWR); -+ if (fd == -1) { -+ *err = errno; -+ free(plock); -+ return NULL; -+ } -+ -+ plock->lock_owner = lock_owner; -+ plock->fd = fd; -+ g_hash_table_insert(inode->posix_locks, GUINT_TO_POINTER(plock->lock_owner), -+ plock); -+ return plock; -+} -+ -+static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ struct flock *lock) -+{ -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode; -+ struct lo_inode_plock *plock; -+ int ret, saverr = 0; -+ -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_getlk(ino=%" PRIu64 ", flags=%d)" -+ " owner=0x%lx, l_type=%d l_start=0x%lx" -+ " l_len=0x%lx\n", -+ ino, fi->flags, fi->lock_owner, lock->l_type, lock->l_start, -+ lock->l_len); -+ -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ -+ pthread_mutex_lock(&inode->plock_mutex); -+ plock = -+ lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); -+ if (!plock) { -+ pthread_mutex_unlock(&inode->plock_mutex); -+ fuse_reply_err(req, ret); -+ return; -+ } -+ -+ ret = fcntl(plock->fd, F_OFD_GETLK, lock); -+ if (ret == -1) { -+ saverr = errno; -+ } -+ pthread_mutex_unlock(&inode->plock_mutex); -+ -+ if (saverr) { -+ fuse_reply_err(req, saverr); -+ } else { -+ fuse_reply_lock(req, lock); -+ } -+} -+ -+static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ struct flock *lock, int sleep) -+{ -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode; -+ struct lo_inode_plock *plock; -+ int ret, saverr = 0; -+ -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_setlk(ino=%" PRIu64 ", flags=%d)" -+ " cmd=%d pid=%d owner=0x%lx sleep=%d l_whence=%d" -+ " l_start=0x%lx l_len=0x%lx\n", -+ ino, fi->flags, lock->l_type, lock->l_pid, fi->lock_owner, sleep, -+ lock->l_whence, lock->l_start, lock->l_len); -+ -+ if (sleep) { -+ fuse_reply_err(req, EOPNOTSUPP); -+ return; -+ } -+ -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ -+ pthread_mutex_lock(&inode->plock_mutex); -+ plock = -+ lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); -+ -+ if (!plock) { -+ pthread_mutex_unlock(&inode->plock_mutex); -+ fuse_reply_err(req, ret); -+ return; -+ } -+ -+ /* TODO: Is it alright to modify flock? */ -+ lock->l_pid = 0; -+ ret = fcntl(plock->fd, F_OFD_SETLK, lock); -+ if (ret == -1) { -+ saverr = errno; -+ } -+ pthread_mutex_unlock(&inode->plock_mutex); -+ fuse_reply_err(req, saverr); -+} -+ - static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, - struct fuse_file_info *fi) - { -@@ -1617,6 +1790,19 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - { - int res; - (void)ino; -+ struct lo_inode *inode; -+ -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ -+ /* An fd is going away. Cleanup associated posix locks */ -+ pthread_mutex_lock(&inode->plock_mutex); -+ g_hash_table_remove(inode->posix_locks, GUINT_TO_POINTER(fi->lock_owner)); -+ pthread_mutex_unlock(&inode->plock_mutex); -+ - res = close(dup(lo_fi_fd(req, fi))); - fuse_reply_err(req, res == -1 ? errno : 0); - } -@@ -2080,6 +2266,8 @@ static struct fuse_lowlevel_ops lo_oper = { - .releasedir = lo_releasedir, - .fsyncdir = lo_fsyncdir, - .create = lo_create, -+ .getlk = lo_getlk, -+ .setlk = lo_setlk, - .open = lo_open, - .release = lo_release, - .flush = lo_flush, -@@ -2434,6 +2622,7 @@ int main(int argc, char *argv[]) - struct lo_data lo = { - .debug = 0, - .writeback = 0, -+ .posix_lock = 1, - .proc_self_fd = -1, - }; - struct lo_map_elem *root_elem; --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Trim-down-imported-files.patch b/SOURCES/kvm-virtiofsd-Trim-down-imported-files.patch deleted file mode 100644 index f3f1e85..0000000 --- a/SOURCES/kvm-virtiofsd-Trim-down-imported-files.patch +++ /dev/null @@ -1,1582 +0,0 @@ -From 9d3788b1c2fa5cb4f14e292232a05c6a5217802d Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:44 +0100 -Subject: [PATCH 013/116] virtiofsd: Trim down imported files -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-10-dgilbert@redhat.com> -Patchwork-id: 93463 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 009/112] virtiofsd: Trim down imported files -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -There's a lot of the original fuse code we don't need; trim them down. - -Signed-off-by: Dr. David Alan Gilbert -with additional trimming by: -Signed-off-by: Misono Tomohiro -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Xiao Yang -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit a3e23f325439a290c504d6bbc48c2e742149ecab) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 71 +--- - tools/virtiofsd/fuse.h | 46 --- - tools/virtiofsd/fuse_common.h | 32 -- - tools/virtiofsd/fuse_i.h | 41 --- - tools/virtiofsd/fuse_log.h | 8 - - tools/virtiofsd/fuse_lowlevel.c | 675 +--------------------------------- - tools/virtiofsd/fuse_lowlevel.h | 28 -- - tools/virtiofsd/fuse_opt.h | 8 - - tools/virtiofsd/helper.c | 143 ------- - tools/virtiofsd/passthrough_helpers.h | 26 -- - tools/virtiofsd/passthrough_ll.c | 1 - - 11 files changed, 8 insertions(+), 1071 deletions(-) - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index 5ab9b87..aefb7db 100644 ---- a/tools/virtiofsd/buffer.c -+++ b/tools/virtiofsd/buffer.c -@@ -157,73 +157,6 @@ static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, - return copied; - } - --#ifdef HAVE_SPLICE --static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, -- const struct fuse_buf *src, size_t src_off, -- size_t len, enum fuse_buf_copy_flags flags) --{ -- int splice_flags = 0; -- off_t *srcpos = NULL; -- off_t *dstpos = NULL; -- off_t srcpos_val; -- off_t dstpos_val; -- ssize_t res; -- size_t copied = 0; -- -- if (flags & FUSE_BUF_SPLICE_MOVE) -- splice_flags |= SPLICE_F_MOVE; -- if (flags & FUSE_BUF_SPLICE_NONBLOCK) -- splice_flags |= SPLICE_F_NONBLOCK; -- -- if (src->flags & FUSE_BUF_FD_SEEK) { -- srcpos_val = src->pos + src_off; -- srcpos = &srcpos_val; -- } -- if (dst->flags & FUSE_BUF_FD_SEEK) { -- dstpos_val = dst->pos + dst_off; -- dstpos = &dstpos_val; -- } -- -- while (len) { -- res = splice(src->fd, srcpos, dst->fd, dstpos, len, -- splice_flags); -- if (res == -1) { -- if (copied) -- break; -- -- if (errno != EINVAL || (flags & FUSE_BUF_FORCE_SPLICE)) -- return -errno; -- -- /* Maybe splice is not supported for this combination */ -- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, -- len); -- } -- if (res == 0) -- break; -- -- copied += res; -- if (!(src->flags & FUSE_BUF_FD_RETRY) && -- !(dst->flags & FUSE_BUF_FD_RETRY)) { -- break; -- } -- -- len -= res; -- } -- -- return copied; --} --#else --static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, -- const struct fuse_buf *src, size_t src_off, -- size_t len, enum fuse_buf_copy_flags flags) --{ -- (void) flags; -- -- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); --} --#endif -- -- - static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, - const struct fuse_buf *src, size_t src_off, - size_t len, enum fuse_buf_copy_flags flags) -@@ -247,10 +180,8 @@ static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, - return fuse_buf_write(dst, dst_off, src, src_off, len); - } else if (!dst_is_fd) { - return fuse_buf_read(dst, dst_off, src, src_off, len); -- } else if (flags & FUSE_BUF_NO_SPLICE) { -- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); - } else { -- return fuse_buf_splice(dst, dst_off, src, src_off, len, flags); -+ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); - } - } - -diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h -index 883f6e5..3202fba 100644 ---- a/tools/virtiofsd/fuse.h -+++ b/tools/virtiofsd/fuse.h -@@ -25,10 +25,6 @@ - #include - #include - --#ifdef __cplusplus --extern "C" { --#endif -- - /* ----------------------------------------------------------- * - * Basic FUSE API * - * ----------------------------------------------------------- */ -@@ -979,44 +975,6 @@ int fuse_loop(struct fuse *f); - void fuse_exit(struct fuse *f); - - /** -- * FUSE event loop with multiple threads -- * -- * Requests from the kernel are processed, and the appropriate -- * operations are called. Request are processed in parallel by -- * distributing them between multiple threads. -- * -- * For a description of the return value and the conditions when the -- * event loop exits, refer to the documentation of -- * fuse_session_loop(). -- * -- * Note: using fuse_loop() instead of fuse_loop_mt() means you are running in -- * single-threaded mode, and that you will not have to worry about reentrancy, -- * though you will have to worry about recursive lookups. In single-threaded -- * mode, FUSE will wait for one callback to return before calling another. -- * -- * Enabling multiple threads, by using fuse_loop_mt(), will cause FUSE to make -- * multiple simultaneous calls into the various callback functions given by your -- * fuse_operations record. -- * -- * If you are using multiple threads, you can enjoy all the parallel execution -- * and interactive response benefits of threads, and you get to enjoy all the -- * benefits of race conditions and locking bugs, too. Ensure that any code used -- * in the callback function of fuse_operations is also thread-safe. -- * -- * @param f the FUSE handle -- * @param config loop configuration -- * @return see fuse_session_loop() -- * -- * See also: fuse_loop() -- */ --#if FUSE_USE_VERSION < 32 --int fuse_loop_mt_31(struct fuse *f, int clone_fd); --#define fuse_loop_mt(f, clone_fd) fuse_loop_mt_31(f, clone_fd) --#else --int fuse_loop_mt(struct fuse *f, struct fuse_loop_config *config); --#endif -- --/** - * Get the current context - * - * The context is only valid for the duration of a filesystem -@@ -1268,8 +1226,4 @@ struct fuse_session *fuse_get_session(struct fuse *f); - */ - int fuse_open_channel(const char *mountpoint, const char *options); - --#ifdef __cplusplus --} --#endif -- - #endif /* FUSE_H_ */ -diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h -index 2d686b2..bf8f8cc 100644 ---- a/tools/virtiofsd/fuse_common.h -+++ b/tools/virtiofsd/fuse_common.h -@@ -28,10 +28,6 @@ - #define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) - #define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) - --#ifdef __cplusplus --extern "C" { --#endif -- - /** - * Information about an open file. - * -@@ -100,30 +96,6 @@ struct fuse_file_info { - uint32_t poll_events; - }; - --/** -- * Configuration parameters passed to fuse_session_loop_mt() and -- * fuse_loop_mt(). -- */ --struct fuse_loop_config { -- /** -- * whether to use separate device fds for each thread -- * (may increase performance) -- */ -- int clone_fd; -- -- /** -- * The maximum number of available worker threads before they -- * start to get deleted when they become idle. If not -- * specified, the default is 10. -- * -- * Adjusting this has performance implications; a very small number -- * of threads in the pool will cause a lot of thread creation and -- * deletion overhead and performance may suffer. When set to 0, a new -- * thread will be created to service every operation. -- */ -- unsigned int max_idle_threads; --}; -- - /************************************************************************** - * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' * - **************************************************************************/ -@@ -802,10 +774,6 @@ void fuse_remove_signal_handlers(struct fuse_session *se); - # error only API version 30 or greater is supported - #endif - --#ifdef __cplusplus --} --#endif -- - - /* - * This interface uses 64 bit off_t. -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index d38b630..b39522e 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -9,8 +9,6 @@ - #include "fuse.h" - #include "fuse_lowlevel.h" - --struct mount_opts; -- - struct fuse_req { - struct fuse_session *se; - uint64_t unique; -@@ -45,7 +43,6 @@ struct fuse_session { - char *mountpoint; - volatile int exited; - int fd; -- struct mount_opts *mo; - int debug; - int deny_others; - struct fuse_lowlevel_ops op; -@@ -58,7 +55,6 @@ struct fuse_session { - struct fuse_req interrupts; - pthread_mutex_t lock; - int got_destroy; -- pthread_key_t pipe_key; - int broken_splice_nonblock; - uint64_t notify_ctr; - struct fuse_notify_req notify_list; -@@ -87,53 +83,16 @@ struct fuse_module { - int ctr; - }; - --/* ----------------------------------------------------------- * -- * Channel interface (when using -o clone_fd) * -- * ----------------------------------------------------------- */ -- --/** -- * Obtain counted reference to the channel -- * -- * @param ch the channel -- * @return the channel -- */ --struct fuse_chan *fuse_chan_get(struct fuse_chan *ch); -- --/** -- * Drop counted reference to a channel -- * -- * @param ch the channel -- */ --void fuse_chan_put(struct fuse_chan *ch); -- --struct mount_opts *parse_mount_opts(struct fuse_args *args); --void destroy_mount_opts(struct mount_opts *mo); --void fuse_mount_version(void); --unsigned get_max_read(struct mount_opts *o); --void fuse_kern_unmount(const char *mountpoint, int fd); --int fuse_kern_mount(const char *mountpoint, struct mount_opts *mo); -- - int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, - int count); - void fuse_free_req(fuse_req_t req); - --void cuse_lowlevel_init(fuse_req_t req, fuse_ino_t nodeide, const void *inarg); -- --int fuse_start_thread(pthread_t *thread_id, void *(*func)(void *), void *arg); -- --int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, -- struct fuse_chan *ch); - void fuse_session_process_buf_int(struct fuse_session *se, - const struct fuse_buf *buf, struct fuse_chan *ch); - --struct fuse *fuse_new_31(struct fuse_args *args, const struct fuse_operations *op, -- size_t op_size, void *private_data); --int fuse_loop_mt_32(struct fuse *f, struct fuse_loop_config *config); --int fuse_session_loop_mt_32(struct fuse_session *se, struct fuse_loop_config *config); - - #define FUSE_MAX_MAX_PAGES 256 - #define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32 - - /* room needed in buffer to accommodate header */ - #define FUSE_BUFFER_HEADER_SIZE 0x1000 -- -diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h -index 5e112e0..0af700d 100644 ---- a/tools/virtiofsd/fuse_log.h -+++ b/tools/virtiofsd/fuse_log.h -@@ -16,10 +16,6 @@ - - #include - --#ifdef __cplusplus --extern "C" { --#endif -- - /** - * Log severity level - * -@@ -75,8 +71,4 @@ void fuse_set_log_func(fuse_log_func_t func); - */ - void fuse_log(enum fuse_log_level level, const char *fmt, ...); - --#ifdef __cplusplus --} --#endif -- - #endif /* FUSE_LOG_H_ */ -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index f2d7038..e6fa247 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -16,7 +16,6 @@ - #include "fuse_kernel.h" - #include "fuse_opt.h" - #include "fuse_misc.h" --#include "mount_util.h" - - #include - #include -@@ -28,12 +27,6 @@ - #include - #include - --#ifndef F_LINUX_SPECIFIC_BASE --#define F_LINUX_SPECIFIC_BASE 1024 --#endif --#ifndef F_SETPIPE_SZ --#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) --#endif - - - #define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) -@@ -137,7 +130,6 @@ void fuse_free_req(fuse_req_t req) - req->u.ni.data = NULL; - list_del_req(req); - ctr = --req->ctr; -- fuse_chan_put(req->ch); - req->ch = NULL; - pthread_mutex_unlock(&se->lock); - if (!ctr) -@@ -184,19 +176,7 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, - } - } - -- ssize_t res = writev(ch ? ch->fd : se->fd, -- iov, count); -- int err = errno; -- -- if (res == -1) { -- assert(se != NULL); -- -- /* ENOENT means the operation was interrupted */ -- if (!fuse_session_exited(se) && err != ENOENT) -- perror("fuse: writing device"); -- return -err; -- } -- -+ abort(); /* virtio should have taken it before here */ - return 0; - } - -@@ -480,10 +460,6 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, - struct fuse_bufvec *buf, - size_t len) - { -- struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); -- void *mbuf; -- int res; -- - /* Optimize common case */ - if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && - !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { -@@ -496,350 +472,10 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, - return fuse_send_msg(se, ch, iov, iov_count); - } - -- res = posix_memalign(&mbuf, pagesize, len); -- if (res != 0) -- return res; -- -- mem_buf.buf[0].mem = mbuf; -- res = fuse_buf_copy(&mem_buf, buf, 0); -- if (res < 0) { -- free(mbuf); -- return -res; -- } -- len = res; -- -- iov[iov_count].iov_base = mbuf; -- iov[iov_count].iov_len = len; -- iov_count++; -- res = fuse_send_msg(se, ch, iov, iov_count); -- free(mbuf); -- -- return res; --} -- --struct fuse_ll_pipe { -- size_t size; -- int can_grow; -- int pipe[2]; --}; -- --static void fuse_ll_pipe_free(struct fuse_ll_pipe *llp) --{ -- close(llp->pipe[0]); -- close(llp->pipe[1]); -- free(llp); --} -- --#ifdef HAVE_SPLICE --#if !defined(HAVE_PIPE2) || !defined(O_CLOEXEC) --static int fuse_pipe(int fds[2]) --{ -- int rv = pipe(fds); -- -- if (rv == -1) -- return rv; -- -- if (fcntl(fds[0], F_SETFL, O_NONBLOCK) == -1 || -- fcntl(fds[1], F_SETFL, O_NONBLOCK) == -1 || -- fcntl(fds[0], F_SETFD, FD_CLOEXEC) == -1 || -- fcntl(fds[1], F_SETFD, FD_CLOEXEC) == -1) { -- close(fds[0]); -- close(fds[1]); -- rv = -1; -- } -- return rv; --} --#else --static int fuse_pipe(int fds[2]) --{ -- return pipe2(fds, O_CLOEXEC | O_NONBLOCK); --} --#endif -- --static struct fuse_ll_pipe *fuse_ll_get_pipe(struct fuse_session *se) --{ -- struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); -- if (llp == NULL) { -- int res; -- -- llp = malloc(sizeof(struct fuse_ll_pipe)); -- if (llp == NULL) -- return NULL; -- -- res = fuse_pipe(llp->pipe); -- if (res == -1) { -- free(llp); -- return NULL; -- } -- -- /* -- *the default size is 16 pages on linux -- */ -- llp->size = pagesize * 16; -- llp->can_grow = 1; -- -- pthread_setspecific(se->pipe_key, llp); -- } -- -- return llp; --} --#endif -- --static void fuse_ll_clear_pipe(struct fuse_session *se) --{ -- struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); -- if (llp) { -- pthread_setspecific(se->pipe_key, NULL); -- fuse_ll_pipe_free(llp); -- } --} -- --#if defined(HAVE_SPLICE) && defined(HAVE_VMSPLICE) --static int read_back(int fd, char *buf, size_t len) --{ -- int res; -- -- res = read(fd, buf, len); -- if (res == -1) { -- fuse_log(FUSE_LOG_ERR, "fuse: internal error: failed to read back from pipe: %s\n", strerror(errno)); -- return -EIO; -- } -- if (res != len) { -- fuse_log(FUSE_LOG_ERR, "fuse: internal error: short read back from pipe: %i from %zi\n", res, len); -- return -EIO; -- } -+ abort(); /* Will have taken vhost path */ - return 0; - } - --static int grow_pipe_to_max(int pipefd) --{ -- int max; -- int res; -- int maxfd; -- char buf[32]; -- -- maxfd = open("/proc/sys/fs/pipe-max-size", O_RDONLY); -- if (maxfd < 0) -- return -errno; -- -- res = read(maxfd, buf, sizeof(buf) - 1); -- if (res < 0) { -- int saved_errno; -- -- saved_errno = errno; -- close(maxfd); -- return -saved_errno; -- } -- close(maxfd); -- buf[res] = '\0'; -- -- max = atoi(buf); -- res = fcntl(pipefd, F_SETPIPE_SZ, max); -- if (res < 0) -- return -errno; -- return max; --} -- --static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, -- struct iovec *iov, int iov_count, -- struct fuse_bufvec *buf, unsigned int flags) --{ -- int res; -- size_t len = fuse_buf_size(buf); -- struct fuse_out_header *out = iov[0].iov_base; -- struct fuse_ll_pipe *llp; -- int splice_flags; -- size_t pipesize; -- size_t total_fd_size; -- size_t idx; -- size_t headerlen; -- struct fuse_bufvec pipe_buf = FUSE_BUFVEC_INIT(len); -- -- if (se->broken_splice_nonblock) -- goto fallback; -- -- if (flags & FUSE_BUF_NO_SPLICE) -- goto fallback; -- -- total_fd_size = 0; -- for (idx = buf->idx; idx < buf->count; idx++) { -- if (buf->buf[idx].flags & FUSE_BUF_IS_FD) { -- total_fd_size = buf->buf[idx].size; -- if (idx == buf->idx) -- total_fd_size -= buf->off; -- } -- } -- if (total_fd_size < 2 * pagesize) -- goto fallback; -- -- if (se->conn.proto_minor < 14 || -- !(se->conn.want & FUSE_CAP_SPLICE_WRITE)) -- goto fallback; -- -- llp = fuse_ll_get_pipe(se); -- if (llp == NULL) -- goto fallback; -- -- -- headerlen = iov_length(iov, iov_count); -- -- out->len = headerlen + len; -- -- /* -- * Heuristic for the required pipe size, does not work if the -- * source contains less than page size fragments -- */ -- pipesize = pagesize * (iov_count + buf->count + 1) + out->len; -- -- if (llp->size < pipesize) { -- if (llp->can_grow) { -- res = fcntl(llp->pipe[0], F_SETPIPE_SZ, pipesize); -- if (res == -1) { -- res = grow_pipe_to_max(llp->pipe[0]); -- if (res > 0) -- llp->size = res; -- llp->can_grow = 0; -- goto fallback; -- } -- llp->size = res; -- } -- if (llp->size < pipesize) -- goto fallback; -- } -- -- -- res = vmsplice(llp->pipe[1], iov, iov_count, SPLICE_F_NONBLOCK); -- if (res == -1) -- goto fallback; -- -- if (res != headerlen) { -- res = -EIO; -- fuse_log(FUSE_LOG_ERR, "fuse: short vmsplice to pipe: %u/%zu\n", res, -- headerlen); -- goto clear_pipe; -- } -- -- pipe_buf.buf[0].flags = FUSE_BUF_IS_FD; -- pipe_buf.buf[0].fd = llp->pipe[1]; -- -- res = fuse_buf_copy(&pipe_buf, buf, -- FUSE_BUF_FORCE_SPLICE | FUSE_BUF_SPLICE_NONBLOCK); -- if (res < 0) { -- if (res == -EAGAIN || res == -EINVAL) { -- /* -- * Should only get EAGAIN on kernels with -- * broken SPLICE_F_NONBLOCK support (<= -- * 2.6.35) where this error or a short read is -- * returned even if the pipe itself is not -- * full -- * -- * EINVAL might mean that splice can't handle -- * this combination of input and output. -- */ -- if (res == -EAGAIN) -- se->broken_splice_nonblock = 1; -- -- pthread_setspecific(se->pipe_key, NULL); -- fuse_ll_pipe_free(llp); -- goto fallback; -- } -- res = -res; -- goto clear_pipe; -- } -- -- if (res != 0 && res < len) { -- struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); -- void *mbuf; -- size_t now_len = res; -- /* -- * For regular files a short count is either -- * 1) due to EOF, or -- * 2) because of broken SPLICE_F_NONBLOCK (see above) -- * -- * For other inputs it's possible that we overflowed -- * the pipe because of small buffer fragments. -- */ -- -- res = posix_memalign(&mbuf, pagesize, len); -- if (res != 0) -- goto clear_pipe; -- -- mem_buf.buf[0].mem = mbuf; -- mem_buf.off = now_len; -- res = fuse_buf_copy(&mem_buf, buf, 0); -- if (res > 0) { -- char *tmpbuf; -- size_t extra_len = res; -- /* -- * Trickiest case: got more data. Need to get -- * back the data from the pipe and then fall -- * back to regular write. -- */ -- tmpbuf = malloc(headerlen); -- if (tmpbuf == NULL) { -- free(mbuf); -- res = ENOMEM; -- goto clear_pipe; -- } -- res = read_back(llp->pipe[0], tmpbuf, headerlen); -- free(tmpbuf); -- if (res != 0) { -- free(mbuf); -- goto clear_pipe; -- } -- res = read_back(llp->pipe[0], mbuf, now_len); -- if (res != 0) { -- free(mbuf); -- goto clear_pipe; -- } -- len = now_len + extra_len; -- iov[iov_count].iov_base = mbuf; -- iov[iov_count].iov_len = len; -- iov_count++; -- res = fuse_send_msg(se, ch, iov, iov_count); -- free(mbuf); -- return res; -- } -- free(mbuf); -- res = now_len; -- } -- len = res; -- out->len = headerlen + len; -- -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, -- " unique: %llu, success, outsize: %i (splice)\n", -- (unsigned long long) out->unique, out->len); -- } -- -- splice_flags = 0; -- if ((flags & FUSE_BUF_SPLICE_MOVE) && -- (se->conn.want & FUSE_CAP_SPLICE_MOVE)) -- splice_flags |= SPLICE_F_MOVE; -- -- res = splice(llp->pipe[0], NULL, ch ? ch->fd : se->fd, -- NULL, out->len, splice_flags); -- if (res == -1) { -- res = -errno; -- perror("fuse: splice from pipe"); -- goto clear_pipe; -- } -- if (res != out->len) { -- res = -EIO; -- fuse_log(FUSE_LOG_ERR, "fuse: short splice from pipe: %u/%u\n", -- res, out->len); -- goto clear_pipe; -- } -- return 0; -- --clear_pipe: -- fuse_ll_clear_pipe(se); -- return res; -- --fallback: -- return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); --} --#else - static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - struct iovec *iov, int iov_count, - struct fuse_bufvec *buf, unsigned int flags) -@@ -849,7 +485,6 @@ static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - - return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); - } --#endif - - int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, - enum fuse_buf_copy_flags flags) -@@ -1408,16 +1043,11 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, - if (bufv.buf[0].size < arg->size) { - fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); - fuse_reply_err(req, EIO); -- goto out; -+ return; - } - bufv.buf[0].size = arg->size; - - se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); -- --out: -- /* Need to reset the pipe if ->write_buf() didn't consume all data */ -- if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) -- fuse_ll_clear_pipe(se); - } - - static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -@@ -2038,17 +1668,6 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - return; - } - -- unsigned max_read_mo = get_max_read(se->mo); -- if (se->conn.max_read != max_read_mo) { -- fuse_log(FUSE_LOG_ERR, "fuse: error: init() and fuse_session_new() " -- "requested different maximum read size (%u vs %u)\n", -- se->conn.max_read, max_read_mo); -- fuse_reply_err(req, EPROTO); -- se->error = -EPROTO; -- fuse_session_exit(se); -- return; -- } -- - if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { - se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; - } -@@ -2364,8 +1983,6 @@ static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, - } - out: - free(rreq); -- if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) -- fuse_ll_clear_pipe(se); - } - - int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, -@@ -2496,7 +2113,6 @@ static struct { - [FUSE_RENAME2] = { do_rename2, "RENAME2" }, - [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, - [FUSE_LSEEK] = { do_lseek, "LSEEK" }, -- [CUSE_INIT] = { cuse_lowlevel_init, "CUSE_INIT" }, - }; - - #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) -@@ -2509,21 +2125,6 @@ static const char *opname(enum fuse_opcode opcode) - return fuse_ll_ops[opcode].name; - } - --static int fuse_ll_copy_from_pipe(struct fuse_bufvec *dst, -- struct fuse_bufvec *src) --{ -- ssize_t res = fuse_buf_copy(dst, src, 0); -- if (res < 0) { -- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", strerror(-res)); -- return res; -- } -- if ((size_t)res < fuse_buf_size(dst)) { -- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); -- return -1; -- } -- return 0; --} -- - void fuse_session_process_buf(struct fuse_session *se, - const struct fuse_buf *buf) - { -@@ -2533,36 +2134,12 @@ void fuse_session_process_buf(struct fuse_session *se, - void fuse_session_process_buf_int(struct fuse_session *se, - const struct fuse_buf *buf, struct fuse_chan *ch) - { -- const size_t write_header_size = sizeof(struct fuse_in_header) + -- sizeof(struct fuse_write_in); -- struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 }; -- struct fuse_bufvec tmpbuf = FUSE_BUFVEC_INIT(write_header_size); - struct fuse_in_header *in; - const void *inarg; - struct fuse_req *req; -- void *mbuf = NULL; - int err; -- int res; -- -- if (buf->flags & FUSE_BUF_IS_FD) { -- if (buf->size < tmpbuf.buf[0].size) -- tmpbuf.buf[0].size = buf->size; - -- mbuf = malloc(tmpbuf.buf[0].size); -- if (mbuf == NULL) { -- fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate header\n"); -- goto clear_pipe; -- } -- tmpbuf.buf[0].mem = mbuf; -- -- res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); -- if (res < 0) -- goto clear_pipe; -- -- in = mbuf; -- } else { -- in = buf->mem; -- } -+ in = buf->mem; - - if (se->debug) { - fuse_log(FUSE_LOG_DEBUG, -@@ -2584,14 +2161,14 @@ void fuse_session_process_buf_int(struct fuse_session *se, - }; - - fuse_send_msg(se, ch, &iov, 1); -- goto clear_pipe; -+ return; - } - - req->unique = in->unique; - req->ctx.uid = in->uid; - req->ctx.gid = in->gid; - req->ctx.pid = in->pid; -- req->ch = ch ? fuse_chan_get(ch) : NULL; -+ req->ch = ch; - - err = EIO; - if (!se->got_init) { -@@ -2627,28 +2204,6 @@ void fuse_session_process_buf_int(struct fuse_session *se, - fuse_reply_err(intr, EAGAIN); - } - -- if ((buf->flags & FUSE_BUF_IS_FD) && write_header_size < buf->size && -- (in->opcode != FUSE_WRITE || !se->op.write_buf) && -- in->opcode != FUSE_NOTIFY_REPLY) { -- void *newmbuf; -- -- err = ENOMEM; -- newmbuf = realloc(mbuf, buf->size); -- if (newmbuf == NULL) -- goto reply_err; -- mbuf = newmbuf; -- -- tmpbuf = FUSE_BUFVEC_INIT(buf->size - write_header_size); -- tmpbuf.buf[0].mem = (char *)mbuf + write_header_size; -- -- res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); -- err = -res; -- if (res < 0) -- goto reply_err; -- -- in = mbuf; -- } -- - inarg = (void *) &in[1]; - if (in->opcode == FUSE_WRITE && se->op.write_buf) - do_write_buf(req, in->nodeid, inarg, buf); -@@ -2657,16 +2212,10 @@ void fuse_session_process_buf_int(struct fuse_session *se, - else - fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); - --out_free: -- free(mbuf); - return; - - reply_err: - fuse_reply_err(req, err); --clear_pipe: -- if (buf->flags & FUSE_BUF_IS_FD) -- fuse_ll_clear_pipe(se); -- goto out_free; - } - - #define LL_OPTION(n,o,v) \ -@@ -2684,7 +2233,6 @@ void fuse_lowlevel_version(void) - { - printf("using FUSE kernel interface version %i.%i\n", - FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); -- fuse_mount_version(); - } - - void fuse_lowlevel_help(void) -@@ -2692,204 +2240,29 @@ void fuse_lowlevel_help(void) - /* These are not all options, but the ones that are - potentially of interest to an end-user */ - printf( --" -o allow_other allow access by all users\n" - " -o allow_root allow access by root\n" --" -o auto_unmount auto unmount on process termination\n"); -+); - } - - void fuse_session_destroy(struct fuse_session *se) - { -- struct fuse_ll_pipe *llp; -- - if (se->got_init && !se->got_destroy) { - if (se->op.destroy) - se->op.destroy(se->userdata); - } -- llp = pthread_getspecific(se->pipe_key); -- if (llp != NULL) -- fuse_ll_pipe_free(llp); -- pthread_key_delete(se->pipe_key); - pthread_mutex_destroy(&se->lock); - free(se->cuse_data); - if (se->fd != -1) - close(se->fd); -- destroy_mount_opts(se->mo); - free(se); - } - - --static void fuse_ll_pipe_destructor(void *data) --{ -- struct fuse_ll_pipe *llp = data; -- fuse_ll_pipe_free(llp); --} -- --int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf) --{ -- return fuse_session_receive_buf_int(se, buf, NULL); --} -- --int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, -- struct fuse_chan *ch) --{ -- int err; -- ssize_t res; --#ifdef HAVE_SPLICE -- size_t bufsize = se->bufsize; -- struct fuse_ll_pipe *llp; -- struct fuse_buf tmpbuf; -- -- if (se->conn.proto_minor < 14 || !(se->conn.want & FUSE_CAP_SPLICE_READ)) -- goto fallback; -- -- llp = fuse_ll_get_pipe(se); -- if (llp == NULL) -- goto fallback; -- -- if (llp->size < bufsize) { -- if (llp->can_grow) { -- res = fcntl(llp->pipe[0], F_SETPIPE_SZ, bufsize); -- if (res == -1) { -- llp->can_grow = 0; -- res = grow_pipe_to_max(llp->pipe[0]); -- if (res > 0) -- llp->size = res; -- goto fallback; -- } -- llp->size = res; -- } -- if (llp->size < bufsize) -- goto fallback; -- } -- -- res = splice(ch ? ch->fd : se->fd, -- NULL, llp->pipe[1], NULL, bufsize, 0); -- err = errno; -- -- if (fuse_session_exited(se)) -- return 0; -- -- if (res == -1) { -- if (err == ENODEV) { -- /* Filesystem was unmounted, or connection was aborted -- via /sys/fs/fuse/connections */ -- fuse_session_exit(se); -- return 0; -- } -- if (err != EINTR && err != EAGAIN) -- perror("fuse: splice from device"); -- return -err; -- } -- -- if (res < sizeof(struct fuse_in_header)) { -- fuse_log(FUSE_LOG_ERR, "short splice from fuse device\n"); -- return -EIO; -- } -- -- tmpbuf = (struct fuse_buf) { -- .size = res, -- .flags = FUSE_BUF_IS_FD, -- .fd = llp->pipe[0], -- }; -- -- /* -- * Don't bother with zero copy for small requests. -- * fuse_loop_mt() needs to check for FORGET so this more than -- * just an optimization. -- */ -- if (res < sizeof(struct fuse_in_header) + -- sizeof(struct fuse_write_in) + pagesize) { -- struct fuse_bufvec src = { .buf[0] = tmpbuf, .count = 1 }; -- struct fuse_bufvec dst = { .count = 1 }; -- -- if (!buf->mem) { -- buf->mem = malloc(se->bufsize); -- if (!buf->mem) { -- fuse_log(FUSE_LOG_ERR, -- "fuse: failed to allocate read buffer\n"); -- return -ENOMEM; -- } -- } -- buf->size = se->bufsize; -- buf->flags = 0; -- dst.buf[0] = *buf; -- -- res = fuse_buf_copy(&dst, &src, 0); -- if (res < 0) { -- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", -- strerror(-res)); -- fuse_ll_clear_pipe(se); -- return res; -- } -- if (res < tmpbuf.size) { -- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); -- fuse_ll_clear_pipe(se); -- return -EIO; -- } -- assert(res == tmpbuf.size); -- -- } else { -- /* Don't overwrite buf->mem, as that would cause a leak */ -- buf->fd = tmpbuf.fd; -- buf->flags = tmpbuf.flags; -- } -- buf->size = tmpbuf.size; -- -- return res; -- --fallback: --#endif -- if (!buf->mem) { -- buf->mem = malloc(se->bufsize); -- if (!buf->mem) { -- fuse_log(FUSE_LOG_ERR, -- "fuse: failed to allocate read buffer\n"); -- return -ENOMEM; -- } -- } -- --restart: -- res = read(ch ? ch->fd : se->fd, buf->mem, se->bufsize); -- err = errno; -- -- if (fuse_session_exited(se)) -- return 0; -- if (res == -1) { -- /* ENOENT means the operation was interrupted, it's safe -- to restart */ -- if (err == ENOENT) -- goto restart; -- -- if (err == ENODEV) { -- /* Filesystem was unmounted, or connection was aborted -- via /sys/fs/fuse/connections */ -- fuse_session_exit(se); -- return 0; -- } -- /* Errors occurring during normal operation: EINTR (read -- interrupted), EAGAIN (nonblocking I/O), ENODEV (filesystem -- umounted) */ -- if (err != EINTR && err != EAGAIN) -- perror("fuse: reading device"); -- return -err; -- } -- if ((size_t) res < sizeof(struct fuse_in_header)) { -- fuse_log(FUSE_LOG_ERR, "short read on fuse device\n"); -- return -EIO; -- } -- -- buf->size = res; -- -- return res; --} -- - struct fuse_session *fuse_session_new(struct fuse_args *args, - const struct fuse_lowlevel_ops *op, - size_t op_size, void *userdata) - { -- int err; - struct fuse_session *se; -- struct mount_opts *mo; - - if (sizeof(struct fuse_lowlevel_ops) < op_size) { - fuse_log(FUSE_LOG_ERR, "fuse: warning: library too old, some operations may not work\n"); -@@ -2913,20 +2286,6 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - /* Parse options */ - if(fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) - goto out2; -- if(se->deny_others) { -- /* Allowing access only by root is done by instructing -- * kernel to allow access by everyone, and then restricting -- * access to root and mountpoint owner in libfuse. -- */ -- // We may be adding the option a second time, but -- // that doesn't hurt. -- if(fuse_opt_add_arg(args, "-oallow_other") == -1) -- goto out2; -- } -- mo = parse_mount_opts(args); -- if (mo == NULL) -- goto out3; -- - if(args->argc == 1 && - args->argv[0][0] == '-') { - fuse_log(FUSE_LOG_ERR, "fuse: warning: argv[0] looks like an option, but " -@@ -2940,9 +2299,6 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - goto out4; - } - -- if (se->debug) -- fuse_log(FUSE_LOG_DEBUG, "FUSE library version: %s\n", PACKAGE_VERSION); -- - se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + - FUSE_BUFFER_HEADER_SIZE; - -@@ -2952,26 +2308,14 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - se->notify_ctr = 1; - fuse_mutex_init(&se->lock); - -- err = pthread_key_create(&se->pipe_key, fuse_ll_pipe_destructor); -- if (err) { -- fuse_log(FUSE_LOG_ERR, "fuse: failed to create thread specific key: %s\n", -- strerror(err)); -- goto out5; -- } -- - memcpy(&se->op, op, op_size); - se->owner = getuid(); - se->userdata = userdata; - -- se->mo = mo; - return se; - --out5: -- pthread_mutex_destroy(&se->lock); - out4: - fuse_opt_free_args(args); --out3: -- free(mo); - out2: - free(se); - out1: -@@ -3035,11 +2379,6 @@ int fuse_session_fd(struct fuse_session *se) - - void fuse_session_unmount(struct fuse_session *se) - { -- if (se->mountpoint != NULL) { -- fuse_kern_unmount(se->mountpoint, se->fd); -- free(se->mountpoint); -- se->mountpoint = NULL; -- } - } - - #ifdef linux -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 18c6363..6b1adfc 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -31,10 +31,6 @@ - #include - #include - --#ifdef __cplusplus --extern "C" { --#endif -- - /* ----------------------------------------------------------- * - * Miscellaneous definitions * - * ----------------------------------------------------------- */ -@@ -1863,14 +1859,12 @@ void fuse_cmdline_help(void); - * ----------------------------------------------------------- */ - - struct fuse_cmdline_opts { -- int singlethread; - int foreground; - int debug; - int nodefault_subtype; - char *mountpoint; - int show_version; - int show_help; -- int clone_fd; - unsigned int max_idle_threads; - }; - -@@ -1962,24 +1956,6 @@ int fuse_session_mount(struct fuse_session *se, const char *mountpoint); - int fuse_session_loop(struct fuse_session *se); - - /** -- * Enter a multi-threaded event loop. -- * -- * For a description of the return value and the conditions when the -- * event loop exits, refer to the documentation of -- * fuse_session_loop(). -- * -- * @param se the session -- * @param config session loop configuration -- * @return see fuse_session_loop() -- */ --#if FUSE_USE_VERSION < 32 --int fuse_session_loop_mt_31(struct fuse_session *se, int clone_fd); --#define fuse_session_loop_mt(se, clone_fd) fuse_session_loop_mt_31(se, clone_fd) --#else --int fuse_session_loop_mt(struct fuse_session *se, struct fuse_loop_config *config); --#endif -- --/** - * Flag a session as terminated. - * - * This function is invoked by the POSIX signal handlers, when -@@ -2082,8 +2058,4 @@ void fuse_session_process_buf(struct fuse_session *se, - */ - int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf); - --#ifdef __cplusplus --} --#endif -- - #endif /* FUSE_LOWLEVEL_H_ */ -diff --git a/tools/virtiofsd/fuse_opt.h b/tools/virtiofsd/fuse_opt.h -index d8573e7..6910255 100644 ---- a/tools/virtiofsd/fuse_opt.h -+++ b/tools/virtiofsd/fuse_opt.h -@@ -14,10 +14,6 @@ - * This file defines the option parsing interface of FUSE - */ - --#ifdef __cplusplus --extern "C" { --#endif -- - /** - * Option description - * -@@ -264,8 +260,4 @@ void fuse_opt_free_args(struct fuse_args *args); - */ - int fuse_opt_match(const struct fuse_opt opts[], const char *opt); - --#ifdef __cplusplus --} --#endif -- - #endif /* FUSE_OPT_H_ */ -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 64ff7ad..5a2e64c 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -41,14 +41,10 @@ static const struct fuse_opt fuse_helper_opts[] = { - FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), - FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), - FUSE_HELPER_OPT("-f", foreground), -- FUSE_HELPER_OPT("-s", singlethread), - FUSE_HELPER_OPT("fsname=", nodefault_subtype), - FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), --#ifndef __FreeBSD__ - FUSE_HELPER_OPT("subtype=", nodefault_subtype), - FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), --#endif -- FUSE_HELPER_OPT("clone_fd", clone_fd), - FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), - FUSE_OPT_END - }; -@@ -132,9 +128,6 @@ void fuse_cmdline_help(void) - " -V --version print version\n" - " -d -o debug enable debug output (implies -f)\n" - " -f foreground operation\n" -- " -s disable multi-threaded operation\n" -- " -o clone_fd use separate fuse device fd for each thread\n" -- " (may improve performance)\n" - " -o max_idle_threads the maximum number of idle worker threads\n" - " allowed (default: 10)\n"); - } -@@ -171,34 +164,6 @@ static int fuse_helper_opt_proc(void *data, const char *arg, int key, - } - } - --/* Under FreeBSD, there is no subtype option so this -- function actually sets the fsname */ --static int add_default_subtype(const char *progname, struct fuse_args *args) --{ -- int res; -- char *subtype_opt; -- -- const char *basename = strrchr(progname, '/'); -- if (basename == NULL) -- basename = progname; -- else if (basename[1] != '\0') -- basename++; -- -- subtype_opt = (char *) malloc(strlen(basename) + 64); -- if (subtype_opt == NULL) { -- fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -- return -1; -- } --#ifdef __FreeBSD__ -- sprintf(subtype_opt, "-ofsname=%s", basename); --#else -- sprintf(subtype_opt, "-osubtype=%s", basename); --#endif -- res = fuse_opt_add_arg(args, subtype_opt); -- free(subtype_opt); -- return res; --} -- - int fuse_parse_cmdline(struct fuse_args *args, - struct fuse_cmdline_opts *opts) - { -@@ -210,14 +175,6 @@ int fuse_parse_cmdline(struct fuse_args *args, - fuse_helper_opt_proc) == -1) - return -1; - -- /* *Linux*: if neither -o subtype nor -o fsname are specified, -- set subtype to program's basename. -- *FreeBSD*: if fsname is not specified, set to program's -- basename. */ -- if (!opts->nodefault_subtype) -- if (add_default_subtype(args->argv[0], args) == -1) -- return -1; -- - return 0; - } - -@@ -276,88 +233,6 @@ int fuse_daemonize(int foreground) - return 0; - } - --int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, -- size_t op_size, void *user_data) --{ -- struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -- struct fuse *fuse; -- struct fuse_cmdline_opts opts; -- int res; -- -- if (fuse_parse_cmdline(&args, &opts) != 0) -- return 1; -- -- if (opts.show_version) { -- printf("FUSE library version %s\n", PACKAGE_VERSION); -- fuse_lowlevel_version(); -- res = 0; -- goto out1; -- } -- -- if (opts.show_help) { -- if(args.argv[0][0] != '\0') -- printf("usage: %s [options] \n\n", -- args.argv[0]); -- printf("FUSE options:\n"); -- fuse_cmdline_help(); -- fuse_lib_help(&args); -- res = 0; -- goto out1; -- } -- -- if (!opts.show_help && -- !opts.mountpoint) { -- fuse_log(FUSE_LOG_ERR, "error: no mountpoint specified\n"); -- res = 2; -- goto out1; -- } -- -- -- fuse = fuse_new_31(&args, op, op_size, user_data); -- if (fuse == NULL) { -- res = 3; -- goto out1; -- } -- -- if (fuse_mount(fuse,opts.mountpoint) != 0) { -- res = 4; -- goto out2; -- } -- -- if (fuse_daemonize(opts.foreground) != 0) { -- res = 5; -- goto out3; -- } -- -- struct fuse_session *se = fuse_get_session(fuse); -- if (fuse_set_signal_handlers(se) != 0) { -- res = 6; -- goto out3; -- } -- -- if (opts.singlethread) -- res = fuse_loop(fuse); -- else { -- struct fuse_loop_config loop_config; -- loop_config.clone_fd = opts.clone_fd; -- loop_config.max_idle_threads = opts.max_idle_threads; -- res = fuse_loop_mt_32(fuse, &loop_config); -- } -- if (res) -- res = 7; -- -- fuse_remove_signal_handlers(se); --out3: -- fuse_unmount(fuse); --out2: -- fuse_destroy(fuse); --out1: -- free(opts.mountpoint); -- fuse_opt_free_args(&args); -- return res; --} -- -- - void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, - struct fuse_conn_info *conn) - { -@@ -420,21 +295,3 @@ struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args) - } - return opts; - } -- --int fuse_open_channel(const char *mountpoint, const char* options) --{ -- struct mount_opts *opts = NULL; -- int fd = -1; -- const char *argv[] = { "", "-o", options }; -- int argc = sizeof(argv) / sizeof(argv[0]); -- struct fuse_args args = FUSE_ARGS_INIT(argc, (char**) argv); -- -- opts = parse_mount_opts(&args); -- if (opts == NULL) -- return -1; -- -- fd = fuse_kern_mount(mountpoint, opts); -- destroy_mount_opts(opts); -- -- return fd; --} -diff --git a/tools/virtiofsd/passthrough_helpers.h b/tools/virtiofsd/passthrough_helpers.h -index 6b77c33..7c5f561 100644 ---- a/tools/virtiofsd/passthrough_helpers.h -+++ b/tools/virtiofsd/passthrough_helpers.h -@@ -42,32 +42,6 @@ static int mknod_wrapper(int dirfd, const char *path, const char *link, - res = symlinkat(link, dirfd, path); - } else if (S_ISFIFO(mode)) { - res = mkfifoat(dirfd, path, mode); --#ifdef __FreeBSD__ -- } else if (S_ISSOCK(mode)) { -- struct sockaddr_un su; -- int fd; -- -- if (strlen(path) >= sizeof(su.sun_path)) { -- errno = ENAMETOOLONG; -- return -1; -- } -- fd = socket(AF_UNIX, SOCK_STREAM, 0); -- if (fd >= 0) { -- /* -- * We must bind the socket to the underlying file -- * system to create the socket file, even though -- * we'll never listen on this socket. -- */ -- su.sun_family = AF_UNIX; -- strncpy(su.sun_path, path, sizeof(su.sun_path)); -- res = bindat(dirfd, fd, (struct sockaddr*)&su, -- sizeof(su)); -- if (res == 0) -- close(fd); -- } else { -- res = -1; -- } --#endif - } else { - res = mknodat(dirfd, path, mode, rdev); - } -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e1a6056..e5f7115 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1240,7 +1240,6 @@ int main(int argc, char *argv[]) - ret = 0; - goto err_out1; - } else if (opts.show_version) { -- printf("FUSE library version %s\n", fuse_pkgversion()); - fuse_lowlevel_version(); - ret = 0; - goto err_out1; --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Trim-out-compatibility-code.patch b/SOURCES/kvm-virtiofsd-Trim-out-compatibility-code.patch deleted file mode 100644 index 411af77..0000000 --- a/SOURCES/kvm-virtiofsd-Trim-out-compatibility-code.patch +++ /dev/null @@ -1,545 +0,0 @@ -From ff16b837e402de773581f77ca188f8806c0b500f Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:51 +0100 -Subject: [PATCH 020/116] virtiofsd: Trim out compatibility code -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-17-dgilbert@redhat.com> -Patchwork-id: 93468 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 016/112] virtiofsd: Trim out compatibility code -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -virtiofsd only supports major=7, minor>=31; trim out a lot of -old compatibility code. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 72c42e2d65510e073cf78fdc924d121c77fa0080) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 330 +++++++++++++++------------------------- - 1 file changed, 119 insertions(+), 211 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 07fb8a6..514d79c 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -387,16 +387,7 @@ static void fill_open(struct fuse_open_out *arg, const struct fuse_file_info *f) - int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) - { - struct fuse_entry_out arg; -- size_t size = req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ENTRY_OUT_SIZE : -- sizeof(arg); -- -- /* -- * before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant -- * negative entry -- */ -- if (!e->ino && req->se->conn.proto_minor < 4) { -- return fuse_reply_err(req, ENOENT); -- } -+ size_t size = sizeof(arg); - - memset(&arg, 0, sizeof(arg)); - fill_entry(&arg, e); -@@ -407,9 +398,7 @@ int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, - const struct fuse_file_info *f) - { - char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; -- size_t entrysize = req->se->conn.proto_minor < 9 ? -- FUSE_COMPAT_ENTRY_OUT_SIZE : -- sizeof(struct fuse_entry_out); -+ size_t entrysize = sizeof(struct fuse_entry_out); - struct fuse_entry_out *earg = (struct fuse_entry_out *)buf; - struct fuse_open_out *oarg = (struct fuse_open_out *)(buf + entrysize); - -@@ -423,8 +412,7 @@ int fuse_reply_attr(fuse_req_t req, const struct stat *attr, - double attr_timeout) - { - struct fuse_attr_out arg; -- size_t size = -- req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); -+ size_t size = sizeof(arg); - - memset(&arg, 0, sizeof(arg)); - arg.attr_valid = calc_timeout_sec(attr_timeout); -@@ -519,8 +507,7 @@ int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv) - int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf) - { - struct fuse_statfs_out arg; -- size_t size = -- req->se->conn.proto_minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(arg); -+ size_t size = sizeof(arg); - - memset(&arg, 0, sizeof(arg)); - convert_statfs(stbuf, &arg.st); -@@ -604,45 +591,31 @@ int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov, - iov[count].iov_len = sizeof(arg); - count++; - -- if (req->se->conn.proto_minor < 16) { -- if (in_count) { -- iov[count].iov_base = (void *)in_iov; -- iov[count].iov_len = sizeof(in_iov[0]) * in_count; -- count++; -- } -+ /* Can't handle non-compat 64bit ioctls on 32bit */ -+ if (sizeof(void *) == 4 && req->ioctl_64bit) { -+ res = fuse_reply_err(req, EINVAL); -+ goto out; -+ } - -- if (out_count) { -- iov[count].iov_base = (void *)out_iov; -- iov[count].iov_len = sizeof(out_iov[0]) * out_count; -- count++; -+ if (in_count) { -+ in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); -+ if (!in_fiov) { -+ goto enomem; - } -- } else { -- /* Can't handle non-compat 64bit ioctls on 32bit */ -- if (sizeof(void *) == 4 && req->ioctl_64bit) { -- res = fuse_reply_err(req, EINVAL); -- goto out; -- } -- -- if (in_count) { -- in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); -- if (!in_fiov) { -- goto enomem; -- } - -- iov[count].iov_base = (void *)in_fiov; -- iov[count].iov_len = sizeof(in_fiov[0]) * in_count; -- count++; -+ iov[count].iov_base = (void *)in_fiov; -+ iov[count].iov_len = sizeof(in_fiov[0]) * in_count; -+ count++; -+ } -+ if (out_count) { -+ out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); -+ if (!out_fiov) { -+ goto enomem; - } -- if (out_count) { -- out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); -- if (!out_fiov) { -- goto enomem; -- } - -- iov[count].iov_base = (void *)out_fiov; -- iov[count].iov_len = sizeof(out_fiov[0]) * out_count; -- count++; -- } -+ iov[count].iov_base = (void *)out_fiov; -+ iov[count].iov_len = sizeof(out_fiov[0]) * out_count; -+ count++; - } - - res = send_reply_iov(req, 0, iov, count); -@@ -784,14 +757,12 @@ static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - struct fuse_file_info *fip = NULL; - struct fuse_file_info fi; - -- if (req->se->conn.proto_minor >= 9) { -- struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; -+ struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; - -- if (arg->getattr_flags & FUSE_GETATTR_FH) { -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fip = &fi; -- } -+ if (arg->getattr_flags & FUSE_GETATTR_FH) { -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fip = &fi; - } - - if (req->se->op.getattr) { -@@ -856,11 +827,7 @@ static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - struct fuse_mknod_in *arg = (struct fuse_mknod_in *)inarg; - char *name = PARAM(arg); - -- if (req->se->conn.proto_minor >= 12) { -- req->ctx.umask = arg->umask; -- } else { -- name = (char *)inarg + FUSE_COMPAT_MKNOD_IN_SIZE; -- } -+ req->ctx.umask = arg->umask; - - if (req->se->op.mknod) { - req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); -@@ -873,9 +840,7 @@ static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { - struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *)inarg; - -- if (req->se->conn.proto_minor >= 12) { -- req->ctx.umask = arg->umask; -- } -+ req->ctx.umask = arg->umask; - - if (req->se->op.mkdir) { - req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); -@@ -967,11 +932,7 @@ static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - memset(&fi, 0, sizeof(fi)); - fi.flags = arg->flags; - -- if (req->se->conn.proto_minor >= 12) { -- req->ctx.umask = arg->umask; -- } else { -- name = (char *)inarg + sizeof(struct fuse_open_in); -- } -+ req->ctx.umask = arg->umask; - - req->se->op.create(req, nodeid, name, arg->mode, &fi); - } else { -@@ -1003,10 +964,8 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; -- if (req->se->conn.proto_minor >= 9) { -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- } -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; - req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); - } else { - fuse_reply_err(req, ENOSYS); -@@ -1023,13 +982,9 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - fi.fh = arg->fh; - fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; - -- if (req->se->conn.proto_minor < 9) { -- param = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; -- } else { -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- param = PARAM(arg); -- } -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ param = PARAM(arg); - - if (req->se->op.write) { - req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi); -@@ -1053,21 +1008,14 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, - fi.fh = arg->fh; - fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; - -- if (se->conn.proto_minor < 9) { -- bufv.buf[0].mem = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; -- bufv.buf[0].size -= -- sizeof(struct fuse_in_header) + FUSE_COMPAT_WRITE_IN_SIZE; -- assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); -- } else { -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { -- bufv.buf[0].mem = PARAM(arg); -- } -- -- bufv.buf[0].size -= -- sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { -+ bufv.buf[0].mem = PARAM(arg); - } -+ -+ bufv.buf[0].size -= -+ sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); - if (bufv.buf[0].size < arg->size) { - fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); - fuse_reply_err(req, EIO); -@@ -1086,9 +1034,7 @@ static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - fi.flush = 1; -- if (req->se->conn.proto_minor >= 7) { -- fi.lock_owner = arg->lock_owner; -- } -+ fi.lock_owner = arg->lock_owner; - - if (req->se->op.flush) { - req->se->op.flush(req, nodeid, &fi); -@@ -1105,10 +1051,8 @@ static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - memset(&fi, 0, sizeof(fi)); - fi.flags = arg->flags; - fi.fh = arg->fh; -- if (req->se->conn.proto_minor >= 8) { -- fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; -- fi.lock_owner = arg->lock_owner; -- } -+ fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; -+ fi.lock_owner = arg->lock_owner; - if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { - fi.flock_release = 1; - fi.lock_owner = arg->lock_owner; -@@ -1477,8 +1421,7 @@ static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - -- if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && -- !(flags & FUSE_IOCTL_32BIT)) { -+ if (sizeof(void *) == 4 && !(flags & FUSE_IOCTL_32BIT)) { - req->ioctl_64bit = 1; - } - -@@ -1603,7 +1546,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - outarg.major = FUSE_KERNEL_VERSION; - outarg.minor = FUSE_KERNEL_MINOR_VERSION; - -- if (arg->major < 7) { -+ if (arg->major < 7 || (arg->major == 7 && arg->minor < 31)) { - fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", - arg->major, arg->minor); - fuse_reply_err(req, EPROTO); -@@ -1616,81 +1559,71 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - return; - } - -- if (arg->minor >= 6) { -- if (arg->max_readahead < se->conn.max_readahead) { -- se->conn.max_readahead = arg->max_readahead; -- } -- if (arg->flags & FUSE_ASYNC_READ) { -- se->conn.capable |= FUSE_CAP_ASYNC_READ; -- } -- if (arg->flags & FUSE_POSIX_LOCKS) { -- se->conn.capable |= FUSE_CAP_POSIX_LOCKS; -- } -- if (arg->flags & FUSE_ATOMIC_O_TRUNC) { -- se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; -- } -- if (arg->flags & FUSE_EXPORT_SUPPORT) { -- se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; -- } -- if (arg->flags & FUSE_DONT_MASK) { -- se->conn.capable |= FUSE_CAP_DONT_MASK; -- } -- if (arg->flags & FUSE_FLOCK_LOCKS) { -- se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; -- } -- if (arg->flags & FUSE_AUTO_INVAL_DATA) { -- se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; -- } -- if (arg->flags & FUSE_DO_READDIRPLUS) { -- se->conn.capable |= FUSE_CAP_READDIRPLUS; -- } -- if (arg->flags & FUSE_READDIRPLUS_AUTO) { -- se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; -- } -- if (arg->flags & FUSE_ASYNC_DIO) { -- se->conn.capable |= FUSE_CAP_ASYNC_DIO; -- } -- if (arg->flags & FUSE_WRITEBACK_CACHE) { -- se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; -- } -- if (arg->flags & FUSE_NO_OPEN_SUPPORT) { -- se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; -- } -- if (arg->flags & FUSE_PARALLEL_DIROPS) { -- se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; -- } -- if (arg->flags & FUSE_POSIX_ACL) { -- se->conn.capable |= FUSE_CAP_POSIX_ACL; -- } -- if (arg->flags & FUSE_HANDLE_KILLPRIV) { -- se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; -- } -- if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) { -- se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; -- } -- if (!(arg->flags & FUSE_MAX_PAGES)) { -- size_t max_bufsize = -- FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() + -- FUSE_BUFFER_HEADER_SIZE; -- if (bufsize > max_bufsize) { -- bufsize = max_bufsize; -- } -+ if (arg->max_readahead < se->conn.max_readahead) { -+ se->conn.max_readahead = arg->max_readahead; -+ } -+ if (arg->flags & FUSE_ASYNC_READ) { -+ se->conn.capable |= FUSE_CAP_ASYNC_READ; -+ } -+ if (arg->flags & FUSE_POSIX_LOCKS) { -+ se->conn.capable |= FUSE_CAP_POSIX_LOCKS; -+ } -+ if (arg->flags & FUSE_ATOMIC_O_TRUNC) { -+ se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; -+ } -+ if (arg->flags & FUSE_EXPORT_SUPPORT) { -+ se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; -+ } -+ if (arg->flags & FUSE_DONT_MASK) { -+ se->conn.capable |= FUSE_CAP_DONT_MASK; -+ } -+ if (arg->flags & FUSE_FLOCK_LOCKS) { -+ se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; -+ } -+ if (arg->flags & FUSE_AUTO_INVAL_DATA) { -+ se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; -+ } -+ if (arg->flags & FUSE_DO_READDIRPLUS) { -+ se->conn.capable |= FUSE_CAP_READDIRPLUS; -+ } -+ if (arg->flags & FUSE_READDIRPLUS_AUTO) { -+ se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; -+ } -+ if (arg->flags & FUSE_ASYNC_DIO) { -+ se->conn.capable |= FUSE_CAP_ASYNC_DIO; -+ } -+ if (arg->flags & FUSE_WRITEBACK_CACHE) { -+ se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; -+ } -+ if (arg->flags & FUSE_NO_OPEN_SUPPORT) { -+ se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; -+ } -+ if (arg->flags & FUSE_PARALLEL_DIROPS) { -+ se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; -+ } -+ if (arg->flags & FUSE_POSIX_ACL) { -+ se->conn.capable |= FUSE_CAP_POSIX_ACL; -+ } -+ if (arg->flags & FUSE_HANDLE_KILLPRIV) { -+ se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; -+ } -+ if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) { -+ se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; -+ } -+ if (!(arg->flags & FUSE_MAX_PAGES)) { -+ size_t max_bufsize = FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() + -+ FUSE_BUFFER_HEADER_SIZE; -+ if (bufsize > max_bufsize) { -+ bufsize = max_bufsize; - } -- } else { -- se->conn.max_readahead = 0; - } -- -- if (se->conn.proto_minor >= 14) { - #ifdef HAVE_SPLICE - #ifdef HAVE_VMSPLICE -- se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; -+ se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; - #endif -- se->conn.capable |= FUSE_CAP_SPLICE_READ; -+ se->conn.capable |= FUSE_CAP_SPLICE_READ; - #endif -- } -- if (se->conn.proto_minor >= 18) { -- se->conn.capable |= FUSE_CAP_IOCTL_DIR; -- } -+ se->conn.capable |= FUSE_CAP_IOCTL_DIR; - - /* - * Default settings for modern filesystems. -@@ -1797,24 +1730,20 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - outarg.max_readahead = se->conn.max_readahead; - outarg.max_write = se->conn.max_write; -- if (se->conn.proto_minor >= 13) { -- if (se->conn.max_background >= (1 << 16)) { -- se->conn.max_background = (1 << 16) - 1; -- } -- if (se->conn.congestion_threshold > se->conn.max_background) { -- se->conn.congestion_threshold = se->conn.max_background; -- } -- if (!se->conn.congestion_threshold) { -- se->conn.congestion_threshold = se->conn.max_background * 3 / 4; -- } -- -- outarg.max_background = se->conn.max_background; -- outarg.congestion_threshold = se->conn.congestion_threshold; -+ if (se->conn.max_background >= (1 << 16)) { -+ se->conn.max_background = (1 << 16) - 1; -+ } -+ if (se->conn.congestion_threshold > se->conn.max_background) { -+ se->conn.congestion_threshold = se->conn.max_background; - } -- if (se->conn.proto_minor >= 23) { -- outarg.time_gran = se->conn.time_gran; -+ if (!se->conn.congestion_threshold) { -+ se->conn.congestion_threshold = se->conn.max_background * 3 / 4; - } - -+ outarg.max_background = se->conn.max_background; -+ outarg.congestion_threshold = se->conn.congestion_threshold; -+ outarg.time_gran = se->conn.time_gran; -+ - if (se->debug) { - fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, - outarg.minor); -@@ -1828,11 +1757,6 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - outarg.congestion_threshold); - fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); - } -- if (arg->minor < 5) { -- outargsize = FUSE_COMPAT_INIT_OUT_SIZE; -- } else if (arg->minor < 23) { -- outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; -- } - - send_reply_ok(req, &outarg, outargsize); - } -@@ -1896,10 +1820,6 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, - return -EINVAL; - } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { -- return -ENOSYS; -- } -- - outarg.ino = ino; - outarg.off = off; - outarg.len = len; -@@ -1920,10 +1840,6 @@ int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, - return -EINVAL; - } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { -- return -ENOSYS; -- } -- - outarg.parent = parent; - outarg.namelen = namelen; - outarg.padding = 0; -@@ -1947,10 +1863,6 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, - return -EINVAL; - } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) { -- return -ENOSYS; -- } -- - outarg.parent = parent; - outarg.child = child; - outarg.namelen = namelen; -@@ -1977,10 +1889,6 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, - return -EINVAL; - } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { -- return -ENOSYS; -- } -- - out.unique = 0; - out.error = FUSE_NOTIFY_STORE; - --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-Whitelist-fchmod.patch b/SOURCES/kvm-virtiofsd-Whitelist-fchmod.patch deleted file mode 100644 index a4f95d9..0000000 --- a/SOURCES/kvm-virtiofsd-Whitelist-fchmod.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 181ed1777c3dd50b1ff9907b0a4199e845af1270 Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Fri, 18 Jun 2021 16:21:17 -0400 -Subject: [PATCH 1/4] virtiofsd: Whitelist fchmod -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Max Reitz -Message-id: <20210618162117.97775-2-mreitz@redhat.com> -Patchwork-id: 101719 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH 1/1] virtiofsd: Whitelist fchmod -Bugzilla: 1967914 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Vivek Goyal -RH-Acked-by: Connor Kuehl - -lo_setattr() invokes fchmod() in a rarely used code path, so it should -be whitelisted or virtiofsd will crash with EBADSYS. - -Said code path can be triggered for example as follows: - -On the host, in the shared directory, create a file with the sticky bit -set and a security.capability xattr: -(1) # touch foo -(2) # chmod u+s foo -(3) # setcap '' foo - -Then in the guest let some process truncate that file after it has -dropped all of its capabilities (at least CAP_FSETID): - -int main(int argc, char *argv[]) -{ - capng_setpid(getpid()); - capng_clear(CAPNG_SELECT_BOTH); - capng_updatev(CAPNG_ADD, CAPNG_PERMITTED | CAPNG_EFFECTIVE, 0); - capng_apply(CAPNG_SELECT_BOTH); - - ftruncate(open(argv[1], O_RDWR), 0); -} - -This will cause the guest kernel to drop the sticky bit (i.e. perform a -mode change) as part of the truncate (where FATTR_FH is set), and that -will cause virtiofsd to invoke fchmod() instead of fchmodat(). - -(A similar configuration exists further below with futimens() vs. -utimensat(), but the former is not a syscall but just a wrapper for the -latter, so no further whitelisting is required.) - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1842667 -Reported-by: Qian Cai -Cc: qemu-stable@nongnu.org -Signed-off-by: Max Reitz -Message-Id: <20200608093111.14942-1-mreitz@redhat.com> -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Vivek Goyal -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 63659fe74e76f5c5285466f0c5cfbdca65b3688e) -Signed-off-by: Max Reitz -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/seccomp.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c -index bd9e7b083c..3b1522acdd 100644 ---- a/tools/virtiofsd/seccomp.c -+++ b/tools/virtiofsd/seccomp.c -@@ -42,6 +42,7 @@ static const int syscall_whitelist[] = { - SCMP_SYS(exit_group), - SCMP_SYS(fallocate), - SCMP_SYS(fchdir), -+ SCMP_SYS(fchmod), - SCMP_SYS(fchmodat), - SCMP_SYS(fchownat), - SCMP_SYS(fcntl), --- -2.27.0 - diff --git a/SOURCES/kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch b/SOURCES/kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch deleted file mode 100644 index a0882d5..0000000 --- a/SOURCES/kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch +++ /dev/null @@ -1,93 +0,0 @@ -From e4c8fd1060fb69a093064851ebf66dd82533ec0e Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:17 +0100 -Subject: [PATCH 106/116] virtiofsd: add definition of fuse_buf_writev() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-103-dgilbert@redhat.com> -Patchwork-id: 93557 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 102/112] virtiofsd: add definition of fuse_buf_writev() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: piaojun - -Define fuse_buf_writev() which use pwritev and writev to improve io -bandwidth. Especially, the src bufs with 0 size should be skipped as -their mems are not *block_size* aligned which will cause writev failed -in direct io mode. - -Signed-off-by: Jun Piao -Suggested-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 9ceaaa15cf21073c2b23058c374f61c30cd39c31) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 38 ++++++++++++++++++++++++++++++++++++++ - 1 file changed, 38 insertions(+) - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index 42a608f..37befeb 100644 ---- a/tools/virtiofsd/buffer.c -+++ b/tools/virtiofsd/buffer.c -@@ -14,6 +14,7 @@ - #include "fuse_lowlevel.h" - #include - #include -+#include - #include - #include - -@@ -33,6 +34,43 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv) - return size; - } - -+__attribute__((unused)) -+static ssize_t fuse_buf_writev(struct fuse_buf *out_buf, -+ struct fuse_bufvec *in_buf) -+{ -+ ssize_t res, i, j; -+ size_t iovcnt = in_buf->count; -+ struct iovec *iov; -+ int fd = out_buf->fd; -+ -+ iov = calloc(iovcnt, sizeof(struct iovec)); -+ if (!iov) { -+ return -ENOMEM; -+ } -+ -+ for (i = 0, j = 0; i < iovcnt; i++) { -+ /* Skip the buf with 0 size */ -+ if (in_buf->buf[i].size) { -+ iov[j].iov_base = in_buf->buf[i].mem; -+ iov[j].iov_len = in_buf->buf[i].size; -+ j++; -+ } -+ } -+ -+ if (out_buf->flags & FUSE_BUF_FD_SEEK) { -+ res = pwritev(fd, iov, iovcnt, out_buf->pos); -+ } else { -+ res = writev(fd, iov, iovcnt); -+ } -+ -+ if (res == -1) { -+ res = -errno; -+ } -+ -+ free(iov); -+ return res; -+} -+ - static size_t min_size(size_t s1, size_t s2) - { - return s1 < s2 ? s1 : s2; --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch b/SOURCES/kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch deleted file mode 100644 index 451f12b..0000000 --- a/SOURCES/kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch +++ /dev/null @@ -1,170 +0,0 @@ -From f91a9bdc171142174110e9ff1716b611f6fb0039 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:07 +0100 -Subject: [PATCH 036/116] virtiofsd: add --fd=FDNUM fd passing option -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-33-dgilbert@redhat.com> -Patchwork-id: 93487 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 032/112] virtiofsd: add --fd=FDNUM fd passing option -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Although --socket-path=PATH is useful for manual invocations, management -tools typically create the UNIX domain socket themselves and pass it to -the vhost-user device backend. This way QEMU can be launched -immediately with a valid socket. No waiting for the vhost-user device -backend is required when fd passing is used. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit cee8e35d4386e34bf79c3ca2aab7f7b1bb48cf8d) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 1 + - tools/virtiofsd/fuse_lowlevel.c | 16 ++++++++++++---- - tools/virtiofsd/fuse_virtio.c | 31 +++++++++++++++++++++++++------ - 3 files changed, 38 insertions(+), 10 deletions(-) - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index 1126723..45995f3 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -68,6 +68,7 @@ struct fuse_session { - size_t bufsize; - int error; - char *vu_socket_path; -+ int vu_listen_fd; - int vu_socketfd; - struct fv_VuDev *virtio_dev; - }; -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 4f4684d..95f4db8 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2130,6 +2130,7 @@ static const struct fuse_opt fuse_ll_opts[] = { - LL_OPTION("--debug", debug, 1), - LL_OPTION("allow_root", deny_others, 1), - LL_OPTION("--socket-path=%s", vu_socket_path, 0), -+ LL_OPTION("--fd=%d", vu_listen_fd, 0), - FUSE_OPT_END - }; - -@@ -2147,7 +2148,8 @@ void fuse_lowlevel_help(void) - */ - printf( - " -o allow_root allow access by root\n" -- " --socket-path=PATH path for the vhost-user socket\n"); -+ " --socket-path=PATH path for the vhost-user socket\n" -+ " --fd=FDNUM fd number of vhost-user socket\n"); - } - - void fuse_session_destroy(struct fuse_session *se) -@@ -2191,6 +2193,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - goto out1; - } - se->fd = -1; -+ se->vu_listen_fd = -1; - se->conn.max_write = UINT_MAX; - se->conn.max_readahead = UINT_MAX; - -@@ -2212,8 +2215,13 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - goto out4; - } - -- if (!se->vu_socket_path) { -- fprintf(stderr, "fuse: missing -o vhost_user_socket option\n"); -+ if (!se->vu_socket_path && se->vu_listen_fd < 0) { -+ fuse_log(FUSE_LOG_ERR, "fuse: missing --socket-path or --fd option\n"); -+ goto out4; -+ } -+ if (se->vu_socket_path && se->vu_listen_fd >= 0) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: --socket-path and --fd cannot be given together\n"); - goto out4; - } - -@@ -2253,7 +2261,7 @@ void fuse_session_unmount(struct fuse_session *se) - - int fuse_lowlevel_is_virtio(struct fuse_session *se) - { -- return se->vu_socket_path != NULL; -+ return !!se->virtio_dev; - } - - #ifdef linux -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 7e2711b..635f877 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -638,18 +638,21 @@ int virtio_loop(struct fuse_session *se) - return 0; - } - --int virtio_session_mount(struct fuse_session *se) -+static int fv_create_listen_socket(struct fuse_session *se) - { - struct sockaddr_un un; - mode_t old_umask; - -+ /* Nothing to do if fd is already initialized */ -+ if (se->vu_listen_fd >= 0) { -+ return 0; -+ } -+ - if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) { - fuse_log(FUSE_LOG_ERR, "Socket path too long\n"); - return -1; - } - -- se->fd = -1; -- - /* - * Create the Unix socket to communicate with qemu - * based on QEMU's vhost-user-bridge -@@ -682,15 +685,31 @@ int virtio_session_mount(struct fuse_session *se) - return -1; - } - -+ se->vu_listen_fd = listen_sock; -+ return 0; -+} -+ -+int virtio_session_mount(struct fuse_session *se) -+{ -+ int ret; -+ -+ ret = fv_create_listen_socket(se); -+ if (ret < 0) { -+ return ret; -+ } -+ -+ se->fd = -1; -+ - fuse_log(FUSE_LOG_INFO, "%s: Waiting for vhost-user socket connection...\n", - __func__); -- int data_sock = accept(listen_sock, NULL, NULL); -+ int data_sock = accept(se->vu_listen_fd, NULL, NULL); - if (data_sock == -1) { - fuse_log(FUSE_LOG_ERR, "vhost socket accept: %m\n"); -- close(listen_sock); -+ close(se->vu_listen_fd); - return -1; - } -- close(listen_sock); -+ close(se->vu_listen_fd); -+ se->vu_listen_fd = -1; - fuse_log(FUSE_LOG_INFO, "%s: Received vhost-user socket connection\n", - __func__); - --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-add-fuse_mbuf_iter-API.patch b/SOURCES/kvm-virtiofsd-add-fuse_mbuf_iter-API.patch deleted file mode 100644 index b874dc9..0000000 --- a/SOURCES/kvm-virtiofsd-add-fuse_mbuf_iter-API.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 1b0edd3d0a2ee5c097bcf3501c1dfa937f02e473 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:21 +0100 -Subject: [PATCH 050/116] virtiofsd: add fuse_mbuf_iter API -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-47-dgilbert@redhat.com> -Patchwork-id: 93502 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 046/112] virtiofsd: add fuse_mbuf_iter API -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Introduce an API for consuming bytes from a buffer with size checks. -All FUSE operations will be converted to use this safe API instead of -void *inarg. - -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit dad157e880416ab3a0e45beaa0e81977516568bc) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 28 +++++++++++++++++++++++++ - tools/virtiofsd/fuse_common.h | 49 ++++++++++++++++++++++++++++++++++++++++++- - 2 files changed, 76 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index 772efa9..42a608f 100644 ---- a/tools/virtiofsd/buffer.c -+++ b/tools/virtiofsd/buffer.c -@@ -267,3 +267,31 @@ ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv) - - return copied; - } -+ -+void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len) -+{ -+ void *ptr; -+ -+ if (len > iter->size - iter->pos) { -+ return NULL; -+ } -+ -+ ptr = iter->mem + iter->pos; -+ iter->pos += len; -+ return ptr; -+} -+ -+const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter) -+{ -+ const char *str = iter->mem + iter->pos; -+ size_t remaining = iter->size - iter->pos; -+ size_t i; -+ -+ for (i = 0; i < remaining; i++) { -+ if (str[i] == '\0') { -+ iter->pos += i + 1; -+ return str; -+ } -+ } -+ return NULL; -+} -diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h -index 0cb33ac..f8f6433 100644 ---- a/tools/virtiofsd/fuse_common.h -+++ b/tools/virtiofsd/fuse_common.h -@@ -703,10 +703,57 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv); - */ - ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src); - -+/** -+ * Memory buffer iterator -+ * -+ */ -+struct fuse_mbuf_iter { -+ /** -+ * Data pointer -+ */ -+ void *mem; -+ -+ /** -+ * Total length, in bytes -+ */ -+ size_t size; -+ -+ /** -+ * Offset from start of buffer -+ */ -+ size_t pos; -+}; -+ -+/* Initialize memory buffer iterator from a fuse_buf */ -+#define FUSE_MBUF_ITER_INIT(fbuf) \ -+ ((struct fuse_mbuf_iter){ \ -+ .mem = fbuf->mem, \ -+ .size = fbuf->size, \ -+ .pos = 0, \ -+ }) -+ -+/** -+ * Consume bytes from a memory buffer iterator -+ * -+ * @param iter memory buffer iterator -+ * @param len number of bytes to consume -+ * @return pointer to start of consumed bytes or -+ * NULL if advancing beyond end of buffer -+ */ -+void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len); -+ -+/** -+ * Consume a NUL-terminated string from a memory buffer iterator -+ * -+ * @param iter memory buffer iterator -+ * @return pointer to the string or -+ * NULL if advancing beyond end of buffer or there is no NUL-terminator -+ */ -+const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter); -+ - /* - * Signal handling - */ -- - /** - * Exit session on HUP, TERM and INT signals and ignore PIPE signal - * --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch b/SOURCES/kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch deleted file mode 100644 index bdef115..0000000 --- a/SOURCES/kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 7a3c94e10b087c06635ef72aadb1550184dd5c58 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:58 +0100 -Subject: [PATCH 087/116] virtiofsd: add helper for lo_data cleanup -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-84-dgilbert@redhat.com> -Patchwork-id: 93538 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 083/112] virtiofsd: add helper for lo_data cleanup -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Liu Bo - -This offers an helper function for lo_data's cleanup. - -Signed-off-by: Liu Bo -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 18a69cbbb6a4caa7c2040c6db4a33b044a32be7e) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 37 +++++++++++++++++++++---------------- - 1 file changed, 21 insertions(+), 16 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 056ebe8..e8dc5c7 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2407,6 +2407,26 @@ static gboolean lo_key_equal(gconstpointer a, gconstpointer b) - return la->ino == lb->ino && la->dev == lb->dev; - } - -+static void fuse_lo_data_cleanup(struct lo_data *lo) -+{ -+ if (lo->inodes) { -+ g_hash_table_destroy(lo->inodes); -+ } -+ lo_map_destroy(&lo->fd_map); -+ lo_map_destroy(&lo->dirp_map); -+ lo_map_destroy(&lo->ino_map); -+ -+ if (lo->proc_self_fd >= 0) { -+ close(lo->proc_self_fd); -+ } -+ -+ if (lo->root.fd >= 0) { -+ close(lo->root.fd); -+ } -+ -+ free(lo->source); -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -@@ -2554,22 +2574,7 @@ err_out2: - err_out1: - fuse_opt_free_args(&args); - -- if (lo.inodes) { -- g_hash_table_destroy(lo.inodes); -- } -- lo_map_destroy(&lo.fd_map); -- lo_map_destroy(&lo.dirp_map); -- lo_map_destroy(&lo.ino_map); -- -- if (lo.proc_self_fd >= 0) { -- close(lo.proc_self_fd); -- } -- -- if (lo.root.fd >= 0) { -- close(lo.root.fd); -- } -- -- free(lo.source); -+ fuse_lo_data_cleanup(&lo); - - return ret ? 1 : 0; - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-add-o-source-PATH-to-help-output.patch b/SOURCES/kvm-virtiofsd-add-o-source-PATH-to-help-output.patch deleted file mode 100644 index 5e81663..0000000 --- a/SOURCES/kvm-virtiofsd-add-o-source-PATH-to-help-output.patch +++ /dev/null @@ -1,46 +0,0 @@ -From c55995c25f60168e3cb6b5bae1bf9a47813383d0 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:55 +0100 -Subject: [PATCH 024/116] virtiofsd: add -o source=PATH to help output -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-21-dgilbert@redhat.com> -Patchwork-id: 93474 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 020/112] virtiofsd: add -o source=PATH to help output -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -The -o source=PATH option will be used by most command-line invocations. -Let's document it! - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 4ff075f72be2f489c8998ae492ec5cdbbbd73e07) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 26ac870..fc9b264 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1319,6 +1319,7 @@ int main(int argc, char *argv[]) - if (opts.show_help) { - printf("usage: %s [options]\n\n", argv[0]); - fuse_cmdline_help(); -+ printf(" -o source=PATH shared directory tree\n"); - fuse_lowlevel_help(); - ret = 0; - goto err_out1; --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-add-print-capabilities-option.patch b/SOURCES/kvm-virtiofsd-add-print-capabilities-option.patch deleted file mode 100644 index b57e408..0000000 --- a/SOURCES/kvm-virtiofsd-add-print-capabilities-option.patch +++ /dev/null @@ -1,121 +0,0 @@ -From 23d81ee7564084f29e32fedaed5196ae1a5a3240 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:10 +0100 -Subject: [PATCH 039/116] virtiofsd: add --print-capabilities option -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-36-dgilbert@redhat.com> -Patchwork-id: 93486 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 035/112] virtiofsd: add --print-capabilities option -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Add the --print-capabilities option as per vhost-user.rst "Backend -programs conventions". Currently there are no advertised features. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 45018fbb0a73ce66fd3dd87ecd2872b45658add4) -Signed-off-by: Miroslav Rezanina ---- - docs/interop/vhost-user.json | 4 +++- - tools/virtiofsd/fuse_lowlevel.h | 1 + - tools/virtiofsd/helper.c | 2 ++ - tools/virtiofsd/passthrough_ll.c | 12 ++++++++++++ - 4 files changed, 18 insertions(+), 1 deletion(-) - -diff --git a/docs/interop/vhost-user.json b/docs/interop/vhost-user.json -index da6aaf5..d4ea1f7 100644 ---- a/docs/interop/vhost-user.json -+++ b/docs/interop/vhost-user.json -@@ -31,6 +31,7 @@ - # @rproc-serial: virtio remoteproc serial link - # @scsi: virtio scsi - # @vsock: virtio vsock transport -+# @fs: virtio fs (since 4.2) - # - # Since: 4.0 - ## -@@ -50,7 +51,8 @@ - 'rpmsg', - 'rproc-serial', - 'scsi', -- 'vsock' -+ 'vsock', -+ 'fs' - ] - } - -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index f6b3470..0d61df8 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1794,6 +1794,7 @@ struct fuse_cmdline_opts { - int nodefault_subtype; - int show_version; - int show_help; -+ int print_capabilities; - unsigned int max_idle_threads; - }; - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index a3645fc..b8ec5ac 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -40,6 +40,7 @@ static const struct fuse_opt fuse_helper_opts[] = { - FUSE_HELPER_OPT("--help", show_help), - FUSE_HELPER_OPT("-V", show_version), - FUSE_HELPER_OPT("--version", show_version), -+ FUSE_HELPER_OPT("--print-capabilities", print_capabilities), - FUSE_HELPER_OPT("-d", debug), - FUSE_HELPER_OPT("debug", debug), - FUSE_HELPER_OPT("-d", foreground), -@@ -135,6 +136,7 @@ void fuse_cmdline_help(void) - { - printf(" -h --help print help\n" - " -V --version print version\n" -+ " --print-capabilities print vhost-user.json\n" - " -d -o debug enable debug output (implies -f)\n" - " -f foreground operation\n" - " --daemonize run in background\n" -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 037c5d7..cd27c09 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1298,6 +1298,14 @@ static struct fuse_lowlevel_ops lo_oper = { - .lseek = lo_lseek, - }; - -+/* Print vhost-user.json backend program capabilities */ -+static void print_capabilities(void) -+{ -+ printf("{\n"); -+ printf(" \"type\": \"fs\"\n"); -+ printf("}\n"); -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -@@ -1328,6 +1336,10 @@ int main(int argc, char *argv[]) - fuse_lowlevel_version(); - ret = 0; - goto err_out1; -+ } else if (opts.print_capabilities) { -+ print_capabilities(); -+ ret = 0; -+ goto err_out1; - } - - if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-add-rlimit-nofile-NUM-option.patch b/SOURCES/kvm-virtiofsd-add-rlimit-nofile-NUM-option.patch deleted file mode 100644 index a6a9cc9..0000000 --- a/SOURCES/kvm-virtiofsd-add-rlimit-nofile-NUM-option.patch +++ /dev/null @@ -1,164 +0,0 @@ -From 555ec3463b3dbfd6e08eac7840419d176f113e46 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 5 May 2020 16:35:55 +0100 -Subject: [PATCH 4/9] virtiofsd: add --rlimit-nofile=NUM option -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200505163600.22956-3-dgilbert@redhat.com> -Patchwork-id: 96270 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 2/7] virtiofsd: add --rlimit-nofile=NUM option -Bugzilla: 1817445 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Max Reitz -RH-Acked-by: Michael S. Tsirkin - -From: Stefan Hajnoczi - -Make it possible to specify the RLIMIT_NOFILE on the command-line. -Users running multiple virtiofsd processes should allocate a certain -number to each process so that the system-wide limit can never be -exhausted. - -When this option is set to 0 the rlimit is left at its current value. -This is useful when a management tool wants to configure the rlimit -itself. - -The default behavior remains unchanged: try to set the limit to -1,000,000 file descriptors if the current rlimit is lower. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Dr. David Alan Gilbert -Message-Id: <20200501140644.220940-2-stefanha@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 6dbb716877728ce4eb51619885ef6ef4ada9565f) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/fuse_lowlevel.h | 1 + - tools/virtiofsd/helper.c | 23 +++++++++++++++++++++++ - tools/virtiofsd/passthrough_ll.c | 22 ++++++++-------------- - 3 files changed, 32 insertions(+), 14 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 8f6d705..562fd52 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1777,6 +1777,7 @@ struct fuse_cmdline_opts { - int syslog; - int log_level; - unsigned int max_idle_threads; -+ unsigned long rlimit_nofile; - }; - - /** -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 0801cf7..9b3eddc 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -23,6 +23,8 @@ - #include - #include - #include -+#include -+#include - #include - - #define FUSE_HELPER_OPT(t, p) \ -@@ -53,6 +55,7 @@ static const struct fuse_opt fuse_helper_opts[] = { - FUSE_HELPER_OPT("subtype=", nodefault_subtype), - FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), - FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), -+ FUSE_HELPER_OPT("--rlimit-nofile=%lu", rlimit_nofile), - FUSE_HELPER_OPT("--syslog", syslog), - FUSE_HELPER_OPT_VALUE("log_level=debug", log_level, FUSE_LOG_DEBUG), - FUSE_HELPER_OPT_VALUE("log_level=info", log_level, FUSE_LOG_INFO), -@@ -171,6 +174,9 @@ void fuse_cmdline_help(void) - " default: no_writeback\n" - " -o xattr|no_xattr enable/disable xattr\n" - " default: no_xattr\n" -+ " --rlimit-nofile= set maximum number of file descriptors\n" -+ " (0 leaves rlimit unchanged)\n" -+ " default: 1,000,000 if the current rlimit is lower\n" - ); - } - -@@ -191,11 +197,28 @@ static int fuse_helper_opt_proc(void *data, const char *arg, int key, - } - } - -+static unsigned long get_default_rlimit_nofile(void) -+{ -+ rlim_t max_fds = 1000000; /* our default RLIMIT_NOFILE target */ -+ struct rlimit rlim; -+ -+ if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) { -+ fuse_log(FUSE_LOG_ERR, "getrlimit(RLIMIT_NOFILE): %m\n"); -+ exit(1); -+ } -+ -+ if (rlim.rlim_cur >= max_fds) { -+ return 0; /* we have more fds available than required! */ -+ } -+ return max_fds; -+} -+ - int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts) - { - memset(opts, 0, sizeof(struct fuse_cmdline_opts)); - - opts->max_idle_threads = 10; -+ opts->rlimit_nofile = get_default_rlimit_nofile(); - opts->foreground = 1; - - if (fuse_opt_parse(args, opts, fuse_helper_opts, fuse_helper_opt_proc) == -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 50ff672..184ad0f 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2711,24 +2711,18 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se, - setup_seccomp(enable_syslog); - } - --/* Raise the maximum number of open file descriptors */ --static void setup_nofile_rlimit(void) -+/* Set the maximum number of open file descriptors */ -+static void setup_nofile_rlimit(unsigned long rlimit_nofile) - { -- const rlim_t max_fds = 1000000; -- struct rlimit rlim; -- -- if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) { -- fuse_log(FUSE_LOG_ERR, "getrlimit(RLIMIT_NOFILE): %m\n"); -- exit(1); -- } -+ struct rlimit rlim = { -+ .rlim_cur = rlimit_nofile, -+ .rlim_max = rlimit_nofile, -+ }; - -- if (rlim.rlim_cur >= max_fds) { -+ if (rlimit_nofile == 0) { - return; /* nothing to do */ - } - -- rlim.rlim_cur = max_fds; -- rlim.rlim_max = max_fds; -- - if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) { - /* Ignore SELinux denials */ - if (errno == EPERM) { -@@ -2981,7 +2975,7 @@ int main(int argc, char *argv[]) - - fuse_daemonize(opts.foreground); - -- setup_nofile_rlimit(); -+ setup_nofile_rlimit(opts.rlimit_nofile); - - /* Must be before sandbox since it wants /proc */ - setup_capng(); --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-add-seccomp-whitelist.patch b/SOURCES/kvm-virtiofsd-add-seccomp-whitelist.patch deleted file mode 100644 index b34108e..0000000 --- a/SOURCES/kvm-virtiofsd-add-seccomp-whitelist.patch +++ /dev/null @@ -1,285 +0,0 @@ -From 58c4e9473b364fb62aac797b0d69fd8ddb02c8c7 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:30 +0100 -Subject: [PATCH 059/116] virtiofsd: add seccomp whitelist -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-56-dgilbert@redhat.com> -Patchwork-id: 93511 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 055/112] virtiofsd: add seccomp whitelist -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Only allow system calls that are needed by virtiofsd. All other system -calls cause SIGSYS to be directed at the thread and the process will -coredump. - -Restricting system calls reduces the kernel attack surface and limits -what the process can do when compromised. - -Signed-off-by: Stefan Hajnoczi -with additional entries by: -Signed-off-by: Ganesh Maharaj Mahalingam -Signed-off-by: Masayoshi Mizuma -Signed-off-by: Misono Tomohiro -Signed-off-by: piaojun -Signed-off-by: Vivek Goyal -Signed-off-by: Eric Ren -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 4f8bde99c175ffd86b5125098a4707d43f5e80c6) - -Signed-off-by: Miroslav Rezanina ---- - Makefile | 5 +- - tools/virtiofsd/Makefile.objs | 5 +- - tools/virtiofsd/passthrough_ll.c | 2 + - tools/virtiofsd/seccomp.c | 151 +++++++++++++++++++++++++++++++++++++++ - tools/virtiofsd/seccomp.h | 14 ++++ - 5 files changed, 174 insertions(+), 3 deletions(-) - create mode 100644 tools/virtiofsd/seccomp.c - create mode 100644 tools/virtiofsd/seccomp.h - -diff --git a/Makefile b/Makefile -index 0e9755d..6879a06 100644 ---- a/Makefile -+++ b/Makefile -@@ -330,7 +330,7 @@ endif - endif - endif - --ifdef CONFIG_LINUX -+ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) - HELPERS-y += virtiofsd$(EXESUF) - vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json - endif -@@ -681,7 +681,8 @@ rdmacm-mux$(EXESUF): LIBS += "-libumad" - rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS) - $(call LINK, $^) - --ifdef CONFIG_LINUX # relies on Linux-specific syscalls -+# relies on Linux-specific syscalls -+ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) - virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS) - $(call LINK, $^) - endif -diff --git a/tools/virtiofsd/Makefile.objs b/tools/virtiofsd/Makefile.objs -index 45a8075..076f667 100644 ---- a/tools/virtiofsd/Makefile.objs -+++ b/tools/virtiofsd/Makefile.objs -@@ -5,5 +5,8 @@ virtiofsd-obj-y = buffer.o \ - fuse_signals.o \ - fuse_virtio.o \ - helper.o \ -- passthrough_ll.o -+ passthrough_ll.o \ -+ seccomp.o - -+seccomp.o-cflags := $(SECCOMP_CFLAGS) -+seccomp.o-libs := $(SECCOMP_LIBS) -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 0947d14..bd8925b 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -59,6 +59,7 @@ - #include - - #include "passthrough_helpers.h" -+#include "seccomp.h" - - struct lo_map_elem { - union { -@@ -2091,6 +2092,7 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) - { - setup_namespaces(lo, se); - setup_mounts(lo->source); -+ setup_seccomp(); - } - - int main(int argc, char *argv[]) -diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c -new file mode 100644 -index 0000000..691fb63 ---- /dev/null -+++ b/tools/virtiofsd/seccomp.c -@@ -0,0 +1,151 @@ -+/* -+ * Seccomp sandboxing for virtiofsd -+ * -+ * Copyright (C) 2019 Red Hat, Inc. -+ * -+ * SPDX-License-Identifier: GPL-2.0-or-later -+ */ -+ -+#include "qemu/osdep.h" -+#include "seccomp.h" -+#include "fuse_i.h" -+#include "fuse_log.h" -+#include -+#include -+#include -+#include -+ -+/* Bodge for libseccomp 2.4.2 which broke ppoll */ -+#if !defined(__SNR_ppoll) && defined(__SNR_brk) -+#ifdef __NR_ppoll -+#define __SNR_ppoll __NR_ppoll -+#else -+#define __SNR_ppoll __PNR_ppoll -+#endif -+#endif -+ -+static const int syscall_whitelist[] = { -+ /* TODO ireg sem*() syscalls */ -+ SCMP_SYS(brk), -+ SCMP_SYS(capget), /* For CAP_FSETID */ -+ SCMP_SYS(capset), -+ SCMP_SYS(clock_gettime), -+ SCMP_SYS(clone), -+#ifdef __NR_clone3 -+ SCMP_SYS(clone3), -+#endif -+ SCMP_SYS(close), -+ SCMP_SYS(copy_file_range), -+ SCMP_SYS(dup), -+ SCMP_SYS(eventfd2), -+ SCMP_SYS(exit), -+ SCMP_SYS(exit_group), -+ SCMP_SYS(fallocate), -+ SCMP_SYS(fchmodat), -+ SCMP_SYS(fchownat), -+ SCMP_SYS(fcntl), -+ SCMP_SYS(fdatasync), -+ SCMP_SYS(fgetxattr), -+ SCMP_SYS(flistxattr), -+ SCMP_SYS(flock), -+ SCMP_SYS(fremovexattr), -+ SCMP_SYS(fsetxattr), -+ SCMP_SYS(fstat), -+ SCMP_SYS(fstatfs), -+ SCMP_SYS(fsync), -+ SCMP_SYS(ftruncate), -+ SCMP_SYS(futex), -+ SCMP_SYS(getdents), -+ SCMP_SYS(getdents64), -+ SCMP_SYS(getegid), -+ SCMP_SYS(geteuid), -+ SCMP_SYS(getpid), -+ SCMP_SYS(gettid), -+ SCMP_SYS(gettimeofday), -+ SCMP_SYS(linkat), -+ SCMP_SYS(lseek), -+ SCMP_SYS(madvise), -+ SCMP_SYS(mkdirat), -+ SCMP_SYS(mknodat), -+ SCMP_SYS(mmap), -+ SCMP_SYS(mprotect), -+ SCMP_SYS(mremap), -+ SCMP_SYS(munmap), -+ SCMP_SYS(newfstatat), -+ SCMP_SYS(open), -+ SCMP_SYS(openat), -+ SCMP_SYS(ppoll), -+ SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */ -+ SCMP_SYS(preadv), -+ SCMP_SYS(pread64), -+ SCMP_SYS(pwritev), -+ SCMP_SYS(pwrite64), -+ SCMP_SYS(read), -+ SCMP_SYS(readlinkat), -+ SCMP_SYS(recvmsg), -+ SCMP_SYS(renameat), -+ SCMP_SYS(renameat2), -+ SCMP_SYS(rt_sigaction), -+ SCMP_SYS(rt_sigprocmask), -+ SCMP_SYS(rt_sigreturn), -+ SCMP_SYS(sendmsg), -+ SCMP_SYS(setresgid), -+ SCMP_SYS(setresuid), -+#ifdef __NR_setresgid32 -+ SCMP_SYS(setresgid32), -+#endif -+#ifdef __NR_setresuid32 -+ SCMP_SYS(setresuid32), -+#endif -+ SCMP_SYS(set_robust_list), -+ SCMP_SYS(symlinkat), -+ SCMP_SYS(time), /* Rarely needed, except on static builds */ -+ SCMP_SYS(tgkill), -+ SCMP_SYS(unlinkat), -+ SCMP_SYS(utimensat), -+ SCMP_SYS(write), -+ SCMP_SYS(writev), -+}; -+ -+void setup_seccomp(void) -+{ -+ scmp_filter_ctx ctx; -+ size_t i; -+ -+#ifdef SCMP_ACT_KILL_PROCESS -+ ctx = seccomp_init(SCMP_ACT_KILL_PROCESS); -+ /* Handle a newer libseccomp but an older kernel */ -+ if (!ctx && errno == EOPNOTSUPP) { -+ ctx = seccomp_init(SCMP_ACT_TRAP); -+ } -+#else -+ ctx = seccomp_init(SCMP_ACT_TRAP); -+#endif -+ if (!ctx) { -+ fuse_log(FUSE_LOG_ERR, "seccomp_init() failed\n"); -+ exit(1); -+ } -+ -+ for (i = 0; i < G_N_ELEMENTS(syscall_whitelist); i++) { -+ if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, -+ syscall_whitelist[i], 0) != 0) { -+ fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d", -+ syscall_whitelist[i]); -+ exit(1); -+ } -+ } -+ -+ /* libvhost-user calls this for post-copy migration, we don't need it */ -+ if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS), -+ SCMP_SYS(userfaultfd), 0) != 0) { -+ fuse_log(FUSE_LOG_ERR, "seccomp_rule_add userfaultfd failed\n"); -+ exit(1); -+ } -+ -+ if (seccomp_load(ctx) < 0) { -+ fuse_log(FUSE_LOG_ERR, "seccomp_load() failed\n"); -+ exit(1); -+ } -+ -+ seccomp_release(ctx); -+} -diff --git a/tools/virtiofsd/seccomp.h b/tools/virtiofsd/seccomp.h -new file mode 100644 -index 0000000..86bce72 ---- /dev/null -+++ b/tools/virtiofsd/seccomp.h -@@ -0,0 +1,14 @@ -+/* -+ * Seccomp sandboxing for virtiofsd -+ * -+ * Copyright (C) 2019 Red Hat, Inc. -+ * -+ * SPDX-License-Identifier: GPL-2.0-or-later -+ */ -+ -+#ifndef VIRTIOFSD_SECCOMP_H -+#define VIRTIOFSD_SECCOMP_H -+ -+void setup_seccomp(void); -+ -+#endif /* VIRTIOFSD_SECCOMP_H */ --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-add-some-options-to-the-help-message.patch b/SOURCES/kvm-virtiofsd-add-some-options-to-the-help-message.patch deleted file mode 100644 index ac6dc54..0000000 --- a/SOURCES/kvm-virtiofsd-add-some-options-to-the-help-message.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 6d62abb99b6b918f05f099b01a99f4326a69d650 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:26 +0100 -Subject: [PATCH 115/116] virtiofsd: add some options to the help message -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-112-dgilbert@redhat.com> -Patchwork-id: 93565 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 111/112] virtiofsd: add some options to the help message -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Masayoshi Mizuma - -Add following options to the help message: -- cache -- flock|no_flock -- norace -- posix_lock|no_posix_lock -- readdirplus|no_readdirplus -- timeout -- writeback|no_writeback -- xattr|no_xattr - -Signed-off-by: Masayoshi Mizuma - -dgilbert: Split cache, norace, posix_lock, readdirplus off - into our own earlier patches that added the options - -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 1d59b1b210d7c3b0bdf4b10ebe0bb1fccfcb8b95) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/helper.c | 10 +++++++++- - 1 file changed, 9 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index f98d8f2..0801cf7 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -148,6 +148,8 @@ void fuse_cmdline_help(void) - " -o cache= cache mode. could be one of \"auto, " - "always, none\"\n" - " default: auto\n" -+ " -o flock|no_flock enable/disable flock\n" -+ " default: no_flock\n" - " -o log_level= log level, default to \"info\"\n" - " level could be one of \"debug, " - "info, warn, err\"\n" -@@ -163,7 +165,13 @@ void fuse_cmdline_help(void) - " enable/disable readirplus\n" - " default: readdirplus except with " - "cache=none\n" -- ); -+ " -o timeout= I/O timeout (second)\n" -+ " default: depends on cache= option.\n" -+ " -o writeback|no_writeback enable/disable writeback cache\n" -+ " default: no_writeback\n" -+ " -o xattr|no_xattr enable/disable xattr\n" -+ " default: no_xattr\n" -+ ); - } - - static int fuse_helper_opt_proc(void *data, const char *arg, int key, --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-add-syslog-command-line-option.patch b/SOURCES/kvm-virtiofsd-add-syslog-command-line-option.patch deleted file mode 100644 index 5b55342..0000000 --- a/SOURCES/kvm-virtiofsd-add-syslog-command-line-option.patch +++ /dev/null @@ -1,239 +0,0 @@ -From 6f5cf644bebc189bdb16f1caf3d7c47835d7c287 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:36 +0100 -Subject: [PATCH 065/116] virtiofsd: add --syslog command-line option -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-62-dgilbert@redhat.com> -Patchwork-id: 93509 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 061/112] virtiofsd: add --syslog command-line option -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Sometimes collecting output from stderr is inconvenient or does not fit -within the overall logging architecture. Add syslog(3) support for -cases where stderr cannot be used. - -Signed-off-by: Stefan Hajnoczi -dgilbert: Reworked as a logging function -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit f185621d41f03a23b55795b89e6584253fa23505) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.h | 1 + - tools/virtiofsd/helper.c | 2 ++ - tools/virtiofsd/passthrough_ll.c | 50 +++++++++++++++++++++++++++++++++++++--- - tools/virtiofsd/seccomp.c | 32 +++++++++++++++++-------- - tools/virtiofsd/seccomp.h | 4 +++- - 5 files changed, 76 insertions(+), 13 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 0d61df8..f2750bc 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1795,6 +1795,7 @@ struct fuse_cmdline_opts { - int show_version; - int show_help; - int print_capabilities; -+ int syslog; - unsigned int max_idle_threads; - }; - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 5531425..9692ef9 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -54,6 +54,7 @@ static const struct fuse_opt fuse_helper_opts[] = { - FUSE_HELPER_OPT("subtype=", nodefault_subtype), - FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), - FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), -+ FUSE_HELPER_OPT("--syslog", syslog), - FUSE_OPT_END - }; - -@@ -138,6 +139,7 @@ void fuse_cmdline_help(void) - " -V --version print version\n" - " --print-capabilities print vhost-user.json\n" - " -d -o debug enable debug output (implies -f)\n" -+ " --syslog log to syslog (default stderr)\n" - " -f foreground operation\n" - " --daemonize run in background\n" - " -o max_idle_threads the maximum number of idle worker " -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index c281d81..0372aca 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -58,6 +58,7 @@ - #include - #include - #include -+#include - #include - - #include "passthrough_helpers.h" -@@ -138,6 +139,7 @@ static const struct fuse_opt lo_opts[] = { - { "norace", offsetof(struct lo_data, norace), 1 }, - FUSE_OPT_END - }; -+static bool use_syslog = false; - - static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); - -@@ -2262,11 +2264,12 @@ static void setup_mounts(const char *source) - * Lock down this process to prevent access to other processes or files outside - * source directory. This reduces the impact of arbitrary code execution bugs. - */ --static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) -+static void setup_sandbox(struct lo_data *lo, struct fuse_session *se, -+ bool enable_syslog) - { - setup_namespaces(lo, se); - setup_mounts(lo->source); -- setup_seccomp(); -+ setup_seccomp(enable_syslog); - } - - /* Raise the maximum number of open file descriptors */ -@@ -2298,6 +2301,42 @@ static void setup_nofile_rlimit(void) - } - } - -+static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) -+{ -+ if (use_syslog) { -+ int priority = LOG_ERR; -+ switch (level) { -+ case FUSE_LOG_EMERG: -+ priority = LOG_EMERG; -+ break; -+ case FUSE_LOG_ALERT: -+ priority = LOG_ALERT; -+ break; -+ case FUSE_LOG_CRIT: -+ priority = LOG_CRIT; -+ break; -+ case FUSE_LOG_ERR: -+ priority = LOG_ERR; -+ break; -+ case FUSE_LOG_WARNING: -+ priority = LOG_WARNING; -+ break; -+ case FUSE_LOG_NOTICE: -+ priority = LOG_NOTICE; -+ break; -+ case FUSE_LOG_INFO: -+ priority = LOG_INFO; -+ break; -+ case FUSE_LOG_DEBUG: -+ priority = LOG_DEBUG; -+ break; -+ } -+ vsyslog(priority, fmt, ap); -+ } else { -+ vfprintf(stderr, fmt, ap); -+ } -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -@@ -2336,6 +2375,11 @@ int main(int argc, char *argv[]) - if (fuse_parse_cmdline(&args, &opts) != 0) { - return 1; - } -+ fuse_set_log_func(log_func); -+ use_syslog = opts.syslog; -+ if (use_syslog) { -+ openlog("virtiofsd", LOG_PID, LOG_DAEMON); -+ } - if (opts.show_help) { - printf("usage: %s [options]\n\n", argv[0]); - fuse_cmdline_help(); -@@ -2424,7 +2468,7 @@ int main(int argc, char *argv[]) - /* Must be before sandbox since it wants /proc */ - setup_capng(); - -- setup_sandbox(&lo, se); -+ setup_sandbox(&lo, se, opts.syslog); - - /* Block until ctrl+c or fusermount -u */ - ret = virtio_loop(se); -diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c -index 691fb63..2d9d4a7 100644 ---- a/tools/virtiofsd/seccomp.c -+++ b/tools/virtiofsd/seccomp.c -@@ -107,11 +107,28 @@ static const int syscall_whitelist[] = { - SCMP_SYS(writev), - }; - --void setup_seccomp(void) -+/* Syscalls used when --syslog is enabled */ -+static const int syscall_whitelist_syslog[] = { -+ SCMP_SYS(sendto), -+}; -+ -+static void add_whitelist(scmp_filter_ctx ctx, const int syscalls[], size_t len) - { -- scmp_filter_ctx ctx; - size_t i; - -+ for (i = 0; i < len; i++) { -+ if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscalls[i], 0) != 0) { -+ fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d failed\n", -+ syscalls[i]); -+ exit(1); -+ } -+ } -+} -+ -+void setup_seccomp(bool enable_syslog) -+{ -+ scmp_filter_ctx ctx; -+ - #ifdef SCMP_ACT_KILL_PROCESS - ctx = seccomp_init(SCMP_ACT_KILL_PROCESS); - /* Handle a newer libseccomp but an older kernel */ -@@ -126,13 +143,10 @@ void setup_seccomp(void) - exit(1); - } - -- for (i = 0; i < G_N_ELEMENTS(syscall_whitelist); i++) { -- if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, -- syscall_whitelist[i], 0) != 0) { -- fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d", -- syscall_whitelist[i]); -- exit(1); -- } -+ add_whitelist(ctx, syscall_whitelist, G_N_ELEMENTS(syscall_whitelist)); -+ if (enable_syslog) { -+ add_whitelist(ctx, syscall_whitelist_syslog, -+ G_N_ELEMENTS(syscall_whitelist_syslog)); - } - - /* libvhost-user calls this for post-copy migration, we don't need it */ -diff --git a/tools/virtiofsd/seccomp.h b/tools/virtiofsd/seccomp.h -index 86bce72..d47c8ea 100644 ---- a/tools/virtiofsd/seccomp.h -+++ b/tools/virtiofsd/seccomp.h -@@ -9,6 +9,8 @@ - #ifndef VIRTIOFSD_SECCOMP_H - #define VIRTIOFSD_SECCOMP_H - --void setup_seccomp(void); -+#include -+ -+void setup_seccomp(bool enable_syslog); - - #endif /* VIRTIOFSD_SECCOMP_H */ --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-add-thread-pool-size-NUM-option.patch b/SOURCES/kvm-virtiofsd-add-thread-pool-size-NUM-option.patch deleted file mode 100644 index 0241a9d..0000000 --- a/SOURCES/kvm-virtiofsd-add-thread-pool-size-NUM-option.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 3dbfb932288eb5a55dfdc0eebca7e4c7f0cf6f33 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:22 +0100 -Subject: [PATCH 111/116] virtiofsd: add --thread-pool-size=NUM option -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-108-dgilbert@redhat.com> -Patchwork-id: 93561 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 107/112] virtiofsd: add --thread-pool-size=NUM option -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Add an option to control the size of the thread pool. Requests are now -processed in parallel by default. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 951b3120dbc971f08681e1d860360e4a1e638902) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 1 + - tools/virtiofsd/fuse_lowlevel.c | 7 ++++++- - tools/virtiofsd/fuse_virtio.c | 5 +++-- - 3 files changed, 10 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index 1447d86..4e47e58 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -72,6 +72,7 @@ struct fuse_session { - int vu_listen_fd; - int vu_socketfd; - struct fv_VuDev *virtio_dev; -+ int thread_pool_size; - }; - - struct fuse_chan { -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 79a4031..de2e2e0 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -28,6 +28,7 @@ - #include - #include - -+#define THREAD_POOL_SIZE 64 - - #define OFFSET_MAX 0x7fffffffffffffffLL - -@@ -2519,6 +2520,7 @@ static const struct fuse_opt fuse_ll_opts[] = { - LL_OPTION("allow_root", deny_others, 1), - LL_OPTION("--socket-path=%s", vu_socket_path, 0), - LL_OPTION("--fd=%d", vu_listen_fd, 0), -+ LL_OPTION("--thread-pool-size=%d", thread_pool_size, 0), - FUSE_OPT_END - }; - -@@ -2537,7 +2539,9 @@ void fuse_lowlevel_help(void) - printf( - " -o allow_root allow access by root\n" - " --socket-path=PATH path for the vhost-user socket\n" -- " --fd=FDNUM fd number of vhost-user socket\n"); -+ " --fd=FDNUM fd number of vhost-user socket\n" -+ " --thread-pool-size=NUM thread pool size limit (default %d)\n", -+ THREAD_POOL_SIZE); - } - - void fuse_session_destroy(struct fuse_session *se) -@@ -2591,6 +2595,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - } - se->fd = -1; - se->vu_listen_fd = -1; -+ se->thread_pool_size = THREAD_POOL_SIZE; - se->conn.max_write = UINT_MAX; - se->conn.max_readahead = UINT_MAX; - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 0dcf2ef..9f65823 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -572,10 +572,11 @@ static void *fv_queue_thread(void *opaque) - struct fv_QueueInfo *qi = opaque; - struct VuDev *dev = &qi->virtio_dev->dev; - struct VuVirtq *q = vu_get_queue(dev, qi->qidx); -+ struct fuse_session *se = qi->virtio_dev->se; - GThreadPool *pool; - -- pool = g_thread_pool_new(fv_queue_worker, qi, 1 /* TODO max_threads */, -- TRUE, NULL); -+ pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size, TRUE, -+ NULL); - if (!pool) { - fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__); - return NULL; --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-add-vhost-user.json-file.patch b/SOURCES/kvm-virtiofsd-add-vhost-user.json-file.patch deleted file mode 100644 index a24b24f..0000000 --- a/SOURCES/kvm-virtiofsd-add-vhost-user.json-file.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 77eb3258e76a1ac240503572d4f41d45cb832ba2 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:09 +0100 -Subject: [PATCH 038/116] virtiofsd: add vhost-user.json file -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-35-dgilbert@redhat.com> -Patchwork-id: 93490 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 034/112] virtiofsd: add vhost-user.json file -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Install a vhost-user.json file describing virtiofsd. This allows -libvirt and other management tools to enumerate vhost-user backend -programs. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 315616ed50ba15a5d7236ade8a402a93898202de) -Signed-off-by: Miroslav Rezanina ---- - .gitignore | 1 + - Makefile | 1 + - tools/virtiofsd/50-qemu-virtiofsd.json.in | 5 +++++ - 3 files changed, 7 insertions(+) - create mode 100644 tools/virtiofsd/50-qemu-virtiofsd.json.in - -diff --git a/.gitignore b/.gitignore -index aefad32..d7a4f99 100644 ---- a/.gitignore -+++ b/.gitignore -@@ -6,6 +6,7 @@ - /config-target.* - /config.status - /config-temp -+/tools/virtiofsd/50-qemu-virtiofsd.json - /elf2dmp - /trace-events-all - /trace/generated-events.h -diff --git a/Makefile b/Makefile -index 1526775..0e9755d 100644 ---- a/Makefile -+++ b/Makefile -@@ -332,6 +332,7 @@ endif - - ifdef CONFIG_LINUX - HELPERS-y += virtiofsd$(EXESUF) -+vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json - endif - - # Sphinx does not allow building manuals into the same directory as -diff --git a/tools/virtiofsd/50-qemu-virtiofsd.json.in b/tools/virtiofsd/50-qemu-virtiofsd.json.in -new file mode 100644 -index 0000000..9bcd86f ---- /dev/null -+++ b/tools/virtiofsd/50-qemu-virtiofsd.json.in -@@ -0,0 +1,5 @@ -+{ -+ "description": "QEMU virtiofsd vhost-user-fs", -+ "type": "fs", -+ "binary": "@libexecdir@/virtiofsd" -+} --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-cap-ng-helpers.patch b/SOURCES/kvm-virtiofsd-cap-ng-helpers.patch deleted file mode 100644 index 305745d..0000000 --- a/SOURCES/kvm-virtiofsd-cap-ng-helpers.patch +++ /dev/null @@ -1,175 +0,0 @@ -From f62613d8058bcb60b26727d980a37537103b0033 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:32 +0100 -Subject: [PATCH 061/116] virtiofsd: cap-ng helpers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-58-dgilbert@redhat.com> -Patchwork-id: 93512 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 057/112] virtiofsd: cap-ng helpers -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -libcap-ng reads /proc during capng_get_caps_process, and virtiofsd's -sandboxing doesn't have /proc mounted; thus we have to do the -caps read before we sandbox it and save/restore the state. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 2405f3c0d19eb4d516a88aa4e5c54e5f9c6bbea3) -Signed-off-by: Miroslav Rezanina ---- - Makefile | 4 +-- - tools/virtiofsd/passthrough_ll.c | 72 ++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 74 insertions(+), 2 deletions(-) - -diff --git a/Makefile b/Makefile -index 6879a06..ff05c30 100644 ---- a/Makefile -+++ b/Makefile -@@ -330,7 +330,7 @@ endif - endif - endif - --ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) -+ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy) - HELPERS-y += virtiofsd$(EXESUF) - vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json - endif -@@ -682,7 +682,7 @@ rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS) - $(call LINK, $^) - - # relies on Linux-specific syscalls --ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) -+ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy) - virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS) - $(call LINK, $^) - endif -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index bd8925b..97e7c75 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -39,6 +39,7 @@ - #include "fuse_virtio.h" - #include "fuse_lowlevel.h" - #include -+#include - #include - #include - #include -@@ -139,6 +140,13 @@ static const struct fuse_opt lo_opts[] = { - - static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); - -+static struct { -+ pthread_mutex_t mutex; -+ void *saved; -+} cap; -+/* That we loaded cap-ng in the current thread from the saved */ -+static __thread bool cap_loaded = 0; -+ - static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st); - - static int is_dot_or_dotdot(const char *name) -@@ -162,6 +170,37 @@ static struct lo_data *lo_data(fuse_req_t req) - return (struct lo_data *)fuse_req_userdata(req); - } - -+/* -+ * Load capng's state from our saved state if the current thread -+ * hadn't previously been loaded. -+ * returns 0 on success -+ */ -+static int load_capng(void) -+{ -+ if (!cap_loaded) { -+ pthread_mutex_lock(&cap.mutex); -+ capng_restore_state(&cap.saved); -+ /* -+ * restore_state free's the saved copy -+ * so make another. -+ */ -+ cap.saved = capng_save_state(); -+ if (!cap.saved) { -+ fuse_log(FUSE_LOG_ERR, "capng_save_state (thread)\n"); -+ return -EINVAL; -+ } -+ pthread_mutex_unlock(&cap.mutex); -+ -+ /* -+ * We want to use the loaded state for our pid, -+ * not the original -+ */ -+ capng_setpid(syscall(SYS_gettid)); -+ cap_loaded = true; -+ } -+ return 0; -+} -+ - static void lo_map_init(struct lo_map *map) - { - map->elems = NULL; -@@ -2024,6 +2063,35 @@ static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) - } - - /* -+ * Capture the capability state, we'll need to restore this for individual -+ * threads later; see load_capng. -+ */ -+static void setup_capng(void) -+{ -+ /* Note this accesses /proc so has to happen before the sandbox */ -+ if (capng_get_caps_process()) { -+ fuse_log(FUSE_LOG_ERR, "capng_get_caps_process\n"); -+ exit(1); -+ } -+ pthread_mutex_init(&cap.mutex, NULL); -+ pthread_mutex_lock(&cap.mutex); -+ cap.saved = capng_save_state(); -+ if (!cap.saved) { -+ fuse_log(FUSE_LOG_ERR, "capng_save_state\n"); -+ exit(1); -+ } -+ pthread_mutex_unlock(&cap.mutex); -+} -+ -+static void cleanup_capng(void) -+{ -+ free(cap.saved); -+ cap.saved = NULL; -+ pthread_mutex_destroy(&cap.mutex); -+} -+ -+ -+/* - * Make the source directory our root so symlinks cannot escape and no other - * files are accessible. Assumes unshare(CLONE_NEWNS) was already called. - */ -@@ -2216,12 +2284,16 @@ int main(int argc, char *argv[]) - - fuse_daemonize(opts.foreground); - -+ /* Must be before sandbox since it wants /proc */ -+ setup_capng(); -+ - setup_sandbox(&lo, se); - - /* Block until ctrl+c or fusermount -u */ - ret = virtio_loop(se); - - fuse_session_unmount(se); -+ cleanup_capng(); - err_out3: - fuse_remove_signal_handlers(se); - err_out2: --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch b/SOURCES/kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch deleted file mode 100644 index caa4560..0000000 --- a/SOURCES/kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch +++ /dev/null @@ -1,1111 +0,0 @@ -From d6a0067e6c08523a8f605f775be980eaf0a23690 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:23 +0100 -Subject: [PATCH 052/116] virtiofsd: check input buffer size in fuse_lowlevel.c - ops -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-49-dgilbert@redhat.com> -Patchwork-id: 93503 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 048/112] virtiofsd: check input buffer size in fuse_lowlevel.c ops -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Each FUSE operation involves parsing the input buffer. Currently the -code assumes the input buffer is large enough for the expected -arguments. This patch uses fuse_mbuf_iter to check the size. - -Most operations are simple to convert. Some are more complicated due to -variable-length inputs or different sizes depending on the protocol -version. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 70995754416eb4491c31607fe380a83cfd25a087) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 581 +++++++++++++++++++++++++++++++--------- - 1 file changed, 456 insertions(+), 125 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 611e8b0..02e1d83 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -19,6 +19,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -27,7 +28,6 @@ - #include - - --#define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) - #define OFFSET_MAX 0x7fffffffffffffffLL - - struct fuse_pollhandle { -@@ -706,9 +706,14 @@ int fuse_reply_lseek(fuse_req_t req, off_t off) - return send_reply_ok(req, &arg, sizeof(arg)); - } - --static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- char *name = (char *)inarg; -+ const char *name = fuse_mbuf_iter_advance_str(iter); -+ if (!name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.lookup) { - req->se->op.lookup(req, nodeid, name); -@@ -717,9 +722,16 @@ static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_forget(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_forget_in *arg = (struct fuse_forget_in *)inarg; -+ struct fuse_forget_in *arg; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.forget) { - req->se->op.forget(req, nodeid, arg->nlookup); -@@ -729,20 +741,48 @@ static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - - static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, -- const void *inarg) -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_batch_forget_in *arg = (void *)inarg; -- struct fuse_forget_one *param = (void *)PARAM(arg); -- unsigned int i; -+ struct fuse_batch_forget_in *arg; -+ struct fuse_forget_data *forgets; -+ size_t scount; - - (void)nodeid; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_none(req); -+ return; -+ } -+ -+ /* -+ * Prevent integer overflow. The compiler emits the following warning -+ * unless we use the scount local variable: -+ * -+ * error: comparison is always false due to limited range of data type -+ * [-Werror=type-limits] -+ * -+ * This may be true on 64-bit hosts but we need this check for 32-bit -+ * hosts. -+ */ -+ scount = arg->count; -+ if (scount > SIZE_MAX / sizeof(forgets[0])) { -+ fuse_reply_none(req); -+ return; -+ } -+ -+ forgets = fuse_mbuf_iter_advance(iter, arg->count * sizeof(forgets[0])); -+ if (!forgets) { -+ fuse_reply_none(req); -+ return; -+ } -+ - if (req->se->op.forget_multi) { -- req->se->op.forget_multi(req, arg->count, -- (struct fuse_forget_data *)param); -+ req->se->op.forget_multi(req, arg->count, forgets); - } else if (req->se->op.forget) { -+ unsigned int i; -+ - for (i = 0; i < arg->count; i++) { -- struct fuse_forget_one *forget = ¶m[i]; - struct fuse_req *dummy_req; - - dummy_req = fuse_ll_alloc_req(req->se); -@@ -754,7 +794,7 @@ static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, - dummy_req->ctx = req->ctx; - dummy_req->ch = NULL; - -- req->se->op.forget(dummy_req, forget->nodeid, forget->nlookup); -+ req->se->op.forget(dummy_req, forgets[i].ino, forgets[i].nlookup); - } - fuse_reply_none(req); - } else { -@@ -762,12 +802,19 @@ static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, - } - } - --static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { - struct fuse_file_info *fip = NULL; - struct fuse_file_info fi; - -- struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; -+ struct fuse_getattr_in *arg; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (arg->getattr_flags & FUSE_GETATTR_FH) { - memset(&fi, 0, sizeof(fi)); -@@ -782,14 +829,21 @@ static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_setattr_in *arg = (struct fuse_setattr_in *)inarg; -- - if (req->se->op.setattr) { -+ struct fuse_setattr_in *arg; - struct fuse_file_info *fi = NULL; - struct fuse_file_info fi_store; - struct stat stbuf; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&stbuf, 0, sizeof(stbuf)); - convert_attr(arg, &stbuf); - if (arg->valid & FATTR_FH) { -@@ -810,9 +864,16 @@ static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_access(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_access_in *arg = (struct fuse_access_in *)inarg; -+ struct fuse_access_in *arg; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.access) { - req->se->op.access(req, nodeid, arg->mask); -@@ -821,9 +882,10 @@ static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- (void)inarg; -+ (void)iter; - - if (req->se->op.readlink) { - req->se->op.readlink(req, nodeid); -@@ -832,10 +894,18 @@ static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_mknod_in *arg = (struct fuse_mknod_in *)inarg; -- char *name = PARAM(arg); -+ struct fuse_mknod_in *arg; -+ const char *name; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ name = fuse_mbuf_iter_advance_str(iter); -+ if (!arg || !name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - req->ctx.umask = arg->umask; - -@@ -846,22 +916,37 @@ static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *)inarg; -+ struct fuse_mkdir_in *arg; -+ const char *name; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ name = fuse_mbuf_iter_advance_str(iter); -+ if (!arg || !name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - req->ctx.umask = arg->umask; - - if (req->se->op.mkdir) { -- req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); -+ req->se->op.mkdir(req, nodeid, name, arg->mode); - } else { - fuse_reply_err(req, ENOSYS); - } - } - --static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- char *name = (char *)inarg; -+ const char *name = fuse_mbuf_iter_advance_str(iter); -+ -+ if (!name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.unlink) { - req->se->op.unlink(req, nodeid, name); -@@ -870,9 +955,15 @@ static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- char *name = (char *)inarg; -+ const char *name = fuse_mbuf_iter_advance_str(iter); -+ -+ if (!name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.rmdir) { - req->se->op.rmdir(req, nodeid, name); -@@ -881,10 +972,16 @@ static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- char *name = (char *)inarg; -- char *linkname = ((char *)inarg) + strlen((char *)inarg) + 1; -+ const char *name = fuse_mbuf_iter_advance_str(iter); -+ const char *linkname = fuse_mbuf_iter_advance_str(iter); -+ -+ if (!name || !linkname) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.symlink) { - req->se->op.symlink(req, linkname, nodeid, name); -@@ -893,11 +990,20 @@ static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_rename(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_rename_in *arg = (struct fuse_rename_in *)inarg; -- char *oldname = PARAM(arg); -- char *newname = oldname + strlen(oldname) + 1; -+ struct fuse_rename_in *arg; -+ const char *oldname; -+ const char *newname; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ oldname = fuse_mbuf_iter_advance_str(iter); -+ newname = fuse_mbuf_iter_advance_str(iter); -+ if (!arg || !oldname || !newname) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.rename) { - req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, 0); -@@ -906,11 +1012,20 @@ static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_rename2_in *arg = (struct fuse_rename2_in *)inarg; -- char *oldname = PARAM(arg); -- char *newname = oldname + strlen(oldname) + 1; -+ struct fuse_rename2_in *arg; -+ const char *oldname; -+ const char *newname; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ oldname = fuse_mbuf_iter_advance_str(iter); -+ newname = fuse_mbuf_iter_advance_str(iter); -+ if (!arg || !oldname || !newname) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.rename) { - req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, -@@ -920,24 +1035,38 @@ static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_link(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_link(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_link_in *arg = (struct fuse_link_in *)inarg; -+ struct fuse_link_in *arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ const char *name = fuse_mbuf_iter_advance_str(iter); -+ -+ if (!arg || !name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.link) { -- req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); -+ req->se->op.link(req, arg->oldnodeid, nodeid, name); - } else { - fuse_reply_err(req, ENOSYS); - } - } - --static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_create(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_create_in *arg = (struct fuse_create_in *)inarg; -- - if (req->se->op.create) { -+ struct fuse_create_in *arg; - struct fuse_file_info fi; -- char *name = PARAM(arg); -+ const char *name; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ name = fuse_mbuf_iter_advance_str(iter); -+ if (!arg || !name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - memset(&fi, 0, sizeof(fi)); - fi.flags = arg->flags; -@@ -950,11 +1079,18 @@ static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_open(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_open_in *arg = (struct fuse_open_in *)inarg; -+ struct fuse_open_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.flags = arg->flags; - -@@ -965,13 +1101,15 @@ static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_read(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_read_in *arg = (struct fuse_read_in *)inarg; -- - if (req->se->op.read) { -+ struct fuse_read_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - fi.lock_owner = arg->lock_owner; -@@ -982,11 +1120,24 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_write(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_write_in *arg = (struct fuse_write_in *)inarg; -+ struct fuse_write_in *arg; - struct fuse_file_info fi; -- char *param; -+ const char *param; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ -+ param = fuse_mbuf_iter_advance(iter, arg->size); -+ if (!param) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; -@@ -994,7 +1145,6 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - - fi.lock_owner = arg->lock_owner; - fi.flags = arg->flags; -- param = PARAM(arg); - - if (req->se->op.write) { - req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi); -@@ -1052,11 +1202,18 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, - se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi); - } - --static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_flush(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_flush_in *arg = (struct fuse_flush_in *)inarg; -+ struct fuse_flush_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - fi.flush = 1; -@@ -1069,19 +1226,26 @@ static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_release(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_release_in *arg = (struct fuse_release_in *)inarg; -+ struct fuse_release_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.flags = arg->flags; - fi.fh = arg->fh; - fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; - fi.lock_owner = arg->lock_owner; -+ - if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { - fi.flock_release = 1; -- fi.lock_owner = arg->lock_owner; - } - - if (req->se->op.release) { -@@ -1091,11 +1255,19 @@ static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; -+ struct fuse_fsync_in *arg; - struct fuse_file_info fi; -- int datasync = arg->fsync_flags & 1; -+ int datasync; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ datasync = arg->fsync_flags & 1; - - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; -@@ -1111,11 +1283,18 @@ static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_open_in *arg = (struct fuse_open_in *)inarg; -+ struct fuse_open_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.flags = arg->flags; - -@@ -1126,11 +1305,18 @@ static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_read_in *arg = (struct fuse_read_in *)inarg; -+ struct fuse_read_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - -@@ -1141,11 +1327,18 @@ static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_read_in *arg = (struct fuse_read_in *)inarg; -+ struct fuse_read_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - -@@ -1156,11 +1349,18 @@ static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_release_in *arg = (struct fuse_release_in *)inarg; -+ struct fuse_release_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.flags = arg->flags; - fi.fh = arg->fh; -@@ -1172,11 +1372,19 @@ static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; -+ struct fuse_fsync_in *arg; - struct fuse_file_info fi; -- int datasync = arg->fsync_flags & 1; -+ int datasync; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ datasync = arg->fsync_flags & 1; - - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; -@@ -1188,10 +1396,11 @@ static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { - (void)nodeid; -- (void)inarg; -+ (void)iter; - - if (req->se->op.statfs) { - req->se->op.statfs(req, nodeid); -@@ -1204,11 +1413,25 @@ static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *)inarg; -- char *name = PARAM(arg); -- char *value = name + strlen(name) + 1; -+ struct fuse_setxattr_in *arg; -+ const char *name; -+ const char *value; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ name = fuse_mbuf_iter_advance_str(iter); -+ if (!arg || !name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ -+ value = fuse_mbuf_iter_advance(iter, arg->size); -+ if (!value) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.setxattr) { - req->se->op.setxattr(req, nodeid, name, value, arg->size, arg->flags); -@@ -1217,20 +1440,36 @@ static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; -+ struct fuse_getxattr_in *arg; -+ const char *name; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ name = fuse_mbuf_iter_advance_str(iter); -+ if (!arg || !name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.getxattr) { -- req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); -+ req->se->op.getxattr(req, nodeid, name, arg->size); - } else { - fuse_reply_err(req, ENOSYS); - } - } - --static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; -+ struct fuse_getxattr_in *arg; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.listxattr) { - req->se->op.listxattr(req, nodeid, arg->size); -@@ -1239,9 +1478,15 @@ static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- char *name = (char *)inarg; -+ const char *name = fuse_mbuf_iter_advance_str(iter); -+ -+ if (!name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.removexattr) { - req->se->op.removexattr(req, nodeid, name); -@@ -1265,12 +1510,19 @@ static void convert_fuse_file_lock(struct fuse_file_lock *fl, - flock->l_pid = fl->pid; - } - --static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; -+ struct fuse_lk_in *arg; - struct fuse_file_info fi; - struct flock flock; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - fi.lock_owner = arg->owner; -@@ -1284,12 +1536,18 @@ static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - - static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, -- const void *inarg, int sleep) -+ struct fuse_mbuf_iter *iter, int sleep) - { -- struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; -+ struct fuse_lk_in *arg; - struct fuse_file_info fi; - struct flock flock; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - fi.lock_owner = arg->owner; -@@ -1327,14 +1585,16 @@ static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, - } - } - --static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- do_setlk_common(req, nodeid, inarg, 0); -+ do_setlk_common(req, nodeid, iter, 0); - } - --static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- do_setlk_common(req, nodeid, inarg, 1); -+ do_setlk_common(req, nodeid, iter, 1); - } - - static int find_interrupted(struct fuse_session *se, struct fuse_req *req) -@@ -1379,12 +1639,20 @@ static int find_interrupted(struct fuse_session *se, struct fuse_req *req) - return 0; - } - --static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *)inarg; -+ struct fuse_interrupt_in *arg; - struct fuse_session *se = req->se; - - (void)nodeid; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - if (se->debug) { - fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", - (unsigned long long)arg->unique); -@@ -1425,9 +1693,15 @@ static struct fuse_req *check_interrupt(struct fuse_session *se, - } - } - --static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_bmap_in *arg = (struct fuse_bmap_in *)inarg; -+ struct fuse_bmap_in *arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.bmap) { - req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); -@@ -1436,18 +1710,34 @@ static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *)inarg; -- unsigned int flags = arg->flags; -- void *in_buf = arg->in_size ? PARAM(arg) : NULL; -+ struct fuse_ioctl_in *arg; -+ unsigned int flags; -+ void *in_buf = NULL; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ -+ flags = arg->flags; - if (flags & FUSE_IOCTL_DIR && !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { - fuse_reply_err(req, ENOTTY); - return; - } - -+ if (arg->in_size) { -+ in_buf = fuse_mbuf_iter_advance(iter, arg->in_size); -+ if (!in_buf) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - -@@ -1468,11 +1758,18 @@ void fuse_pollhandle_destroy(struct fuse_pollhandle *ph) - free(ph); - } - --static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_poll(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_poll_in *arg = (struct fuse_poll_in *)inarg; -+ struct fuse_poll_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - fi.poll_events = arg->events; -@@ -1496,11 +1793,18 @@ static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *)inarg; -+ struct fuse_fallocate_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - -@@ -1513,12 +1817,17 @@ static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - - static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, -- const void *inarg) -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_copy_file_range_in *arg = -- (struct fuse_copy_file_range_in *)inarg; -+ struct fuse_copy_file_range_in *arg; - struct fuse_file_info fi_in, fi_out; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi_in, 0, sizeof(fi_in)); - fi_in.fh = arg->fh_in; - -@@ -1535,11 +1844,17 @@ static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, - } - } - --static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_lseek_in *arg = (struct fuse_lseek_in *)inarg; -+ struct fuse_lseek_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - -@@ -1550,15 +1865,33 @@ static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_init(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_init_in *arg = (struct fuse_init_in *)inarg; -+ size_t compat_size = offsetof(struct fuse_init_in, max_readahead); -+ struct fuse_init_in *arg; - struct fuse_init_out outarg; - struct fuse_session *se = req->se; - size_t bufsize = se->bufsize; - size_t outargsize = sizeof(outarg); - - (void)nodeid; -+ -+ /* First consume the old fields... */ -+ arg = fuse_mbuf_iter_advance(iter, compat_size); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ -+ /* ...and now consume the new fields. */ -+ if (arg->major == 7 && arg->minor >= 6) { -+ if (!fuse_mbuf_iter_advance(iter, sizeof(*arg) - compat_size)) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ } -+ - if (se->debug) { - fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); - if (arg->major == 7 && arg->minor >= 6) { -@@ -1791,12 +2124,13 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - send_reply_ok(req, &outarg, outargsize); - } - --static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { - struct fuse_session *se = req->se; - - (void)nodeid; -- (void)inarg; -+ (void)iter; - - se->got_destroy = 1; - if (se->op.destroy) { -@@ -1976,7 +2310,7 @@ int fuse_req_interrupted(fuse_req_t req) - } - - static struct { -- void (*func)(fuse_req_t, fuse_ino_t, const void *); -+ void (*func)(fuse_req_t, fuse_ino_t, struct fuse_mbuf_iter *); - const char *name; - } fuse_ll_ops[] = { - [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, -@@ -2060,7 +2394,6 @@ void fuse_session_process_buf_int(struct fuse_session *se, - const struct fuse_buf *buf = bufv->buf; - struct fuse_mbuf_iter iter = FUSE_MBUF_ITER_INIT(buf); - struct fuse_in_header *in; -- const void *inarg; - struct fuse_req *req; - int err; - -@@ -2138,13 +2471,11 @@ void fuse_session_process_buf_int(struct fuse_session *se, - } - } - -- inarg = (void *)&in[1]; - if (in->opcode == FUSE_WRITE && se->op.write_buf) { - do_write_buf(req, in->nodeid, &iter, bufv); - } else { -- fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); -+ fuse_ll_ops[in->opcode].func(req, in->nodeid, &iter); - } -- - return; - - reply_err: --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-cleanup-allocated-resource-in-se.patch b/SOURCES/kvm-virtiofsd-cleanup-allocated-resource-in-se.patch deleted file mode 100644 index b6de0a9..0000000 --- a/SOURCES/kvm-virtiofsd-cleanup-allocated-resource-in-se.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 99ff67682ef7c5659bdc9836008541861ae313d5 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:56 +0100 -Subject: [PATCH 085/116] virtiofsd: cleanup allocated resource in se -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-82-dgilbert@redhat.com> -Patchwork-id: 93533 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 081/112] virtiofsd: cleanup allocated resource in se -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Liu Bo - -This cleans up unfreed resources in se on quiting, including -se->virtio_dev, se->vu_socket_path, se->vu_socketfd. - -Signed-off-by: Liu Bo -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 61cfc44982e566c33b9d5df17858e4d5ae373873) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 7 +++++++ - tools/virtiofsd/fuse_virtio.c | 7 +++++++ - tools/virtiofsd/fuse_virtio.h | 2 +- - 3 files changed, 15 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 65f91da..440508a 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2532,6 +2532,13 @@ void fuse_session_destroy(struct fuse_session *se) - if (se->fd != -1) { - close(se->fd); - } -+ -+ if (se->vu_socket_path) { -+ virtio_session_close(se); -+ free(se->vu_socket_path); -+ se->vu_socket_path = NULL; -+ } -+ - free(se); - } - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 7a8774a..e7bd772 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -833,3 +833,10 @@ int virtio_session_mount(struct fuse_session *se) - - return 0; - } -+ -+void virtio_session_close(struct fuse_session *se) -+{ -+ close(se->vu_socketfd); -+ free(se->virtio_dev); -+ se->virtio_dev = NULL; -+} -diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h -index cc676b9..1116840 100644 ---- a/tools/virtiofsd/fuse_virtio.h -+++ b/tools/virtiofsd/fuse_virtio.h -@@ -19,7 +19,7 @@ - struct fuse_session; - - int virtio_session_mount(struct fuse_session *se); -- -+void virtio_session_close(struct fuse_session *se); - int virtio_loop(struct fuse_session *se); - - --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch b/SOURCES/kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch deleted file mode 100644 index d01b000..0000000 --- a/SOURCES/kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch +++ /dev/null @@ -1,99 +0,0 @@ -From e00543b0384fba61a9c7274c73e11a25e7ab2946 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:13 +0100 -Subject: [PATCH 102/116] virtiofsd: convert more fprintf and perror to use - fuse log infra -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-99-dgilbert@redhat.com> -Patchwork-id: 93552 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 098/112] virtiofsd: convert more fprintf and perror to use fuse log infra -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Eryu Guan - -Signed-off-by: Eryu Guan -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Misono Tomohiro -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit fc1aed0bf96259d0b46b1cfea7497b7762c4ee3d) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_signals.c | 7 +++++-- - tools/virtiofsd/helper.c | 9 ++++++--- - 2 files changed, 11 insertions(+), 5 deletions(-) - -diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c -index dc7c8ac..f18625b 100644 ---- a/tools/virtiofsd/fuse_signals.c -+++ b/tools/virtiofsd/fuse_signals.c -@@ -12,6 +12,7 @@ - #include "fuse_i.h" - #include "fuse_lowlevel.h" - -+#include - #include - #include - #include -@@ -47,13 +48,15 @@ static int set_one_signal_handler(int sig, void (*handler)(int), int remove) - sa.sa_flags = 0; - - if (sigaction(sig, NULL, &old_sa) == -1) { -- perror("fuse: cannot get old signal handler"); -+ fuse_log(FUSE_LOG_ERR, "fuse: cannot get old signal handler: %s\n", -+ strerror(errno)); - return -1; - } - - if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && - sigaction(sig, &sa, NULL) == -1) { -- perror("fuse: cannot set signal handler"); -+ fuse_log(FUSE_LOG_ERR, "fuse: cannot set signal handler: %s\n", -+ strerror(errno)); - return -1; - } - return 0; -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 33749bf..f98d8f2 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -208,7 +208,8 @@ int fuse_daemonize(int foreground) - char completed; - - if (pipe(waiter)) { -- perror("fuse_daemonize: pipe"); -+ fuse_log(FUSE_LOG_ERR, "fuse_daemonize: pipe: %s\n", -+ strerror(errno)); - return -1; - } - -@@ -218,7 +219,8 @@ int fuse_daemonize(int foreground) - */ - switch (fork()) { - case -1: -- perror("fuse_daemonize: fork"); -+ fuse_log(FUSE_LOG_ERR, "fuse_daemonize: fork: %s\n", -+ strerror(errno)); - return -1; - case 0: - break; -@@ -228,7 +230,8 @@ int fuse_daemonize(int foreground) - } - - if (setsid() == -1) { -- perror("fuse_daemonize: setsid"); -+ fuse_log(FUSE_LOG_ERR, "fuse_daemonize: setsid: %s\n", -+ strerror(errno)); - return -1; - } - --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch b/SOURCES/kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch deleted file mode 100644 index 8c1022a..0000000 --- a/SOURCES/kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 8e6473e906dfc7d2a62abaf1ec80ff461e4d201d Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:12 +0100 -Subject: [PATCH 101/116] virtiofsd: do not always set FUSE_FLOCK_LOCKS -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-98-dgilbert@redhat.com> -Patchwork-id: 93551 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 097/112] virtiofsd: do not always set FUSE_FLOCK_LOCKS -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Peng Tao - -Right now we always enable it regardless of given commandlines. -Fix it by setting the flag relying on the lo->flock bit. - -Signed-off-by: Peng Tao -Reviewed-by: Misono Tomohiro -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit e468d4af5f5192ab33283464a9f6933044ce47f7) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 11 ++++++++--- - 1 file changed, 8 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index ab16135..ccbbec1 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -546,9 +546,14 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) - fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); - conn->want |= FUSE_CAP_WRITEBACK_CACHE; - } -- if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { -- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); -- conn->want |= FUSE_CAP_FLOCK_LOCKS; -+ if (conn->capable & FUSE_CAP_FLOCK_LOCKS) { -+ if (lo->flock) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); -+ conn->want |= FUSE_CAP_FLOCK_LOCKS; -+ } else { -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling flock locks\n"); -+ conn->want &= ~FUSE_CAP_FLOCK_LOCKS; -+ } - } - - if (conn->capable & FUSE_CAP_POSIX_LOCKS) { --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-do_read-missing-NULL-check.patch b/SOURCES/kvm-virtiofsd-do_read-missing-NULL-check.patch deleted file mode 100644 index 4f8e5ef..0000000 --- a/SOURCES/kvm-virtiofsd-do_read-missing-NULL-check.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 901c005299b0316bbca7bc190de56f6c7a2a9880 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 3 Mar 2020 18:43:11 +0000 -Subject: [PATCH 15/18] virtiofsd: do_read missing NULL check -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200303184314.155564-5-dgilbert@redhat.com> -Patchwork-id: 94127 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/7] virtiofsd: do_read missing NULL check -Bugzilla: 1797064 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Ján Tomko - -From: "Dr. David Alan Gilbert" - -Missing a NULL check if the argument fetch fails. - -Fixes: Coverity CID 1413119 -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit 99ce9a7e60fd12b213b985343ff8fcc172de59fd) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/fuse_lowlevel.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 01c418a..704c036 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -1116,6 +1116,10 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, - struct fuse_file_info fi; - - arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-drop-all-capabilities-in-the-wait-parent-p.patch b/SOURCES/kvm-virtiofsd-drop-all-capabilities-in-the-wait-parent-p.patch deleted file mode 100644 index 569096d..0000000 --- a/SOURCES/kvm-virtiofsd-drop-all-capabilities-in-the-wait-parent-p.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 78152453940967f9ece9fe3ffc5017c669d6ec28 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 5 May 2020 16:36:00 +0100 -Subject: [PATCH 9/9] virtiofsd: drop all capabilities in the wait parent - process -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200505163600.22956-8-dgilbert@redhat.com> -Patchwork-id: 96274 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 7/7] virtiofsd: drop all capabilities in the wait parent process -Bugzilla: 1817445 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Michael S. Tsirkin - -From: Stefan Hajnoczi - -All this process does is wait for its child. No capabilities are -needed. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 66502bbca37ca7a3bfa57e82cfc03b89a7a11eae) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/passthrough_ll.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 6358874..f41a6b0 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2535,6 +2535,17 @@ static void print_capabilities(void) - } - - /* -+ * Drop all Linux capabilities because the wait parent process only needs to -+ * sit in waitpid(2) and terminate. -+ */ -+static void setup_wait_parent_capabilities(void) -+{ -+ capng_setpid(syscall(SYS_gettid)); -+ capng_clear(CAPNG_SELECT_BOTH); -+ capng_apply(CAPNG_SELECT_BOTH); -+} -+ -+/* - * Move to a new mount, net, and pid namespaces to isolate this process. - */ - static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) -@@ -2567,6 +2578,8 @@ static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) - pid_t waited; - int wstatus; - -+ setup_wait_parent_capabilities(); -+ - /* The parent waits for the child */ - do { - waited = waitpid(child, &wstatus, 0); --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch b/SOURCES/kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch deleted file mode 100644 index 3279a5e..0000000 --- a/SOURCES/kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch +++ /dev/null @@ -1,47 +0,0 @@ -From bc127914b29f2e4163bc7ca786e04ed955d96016 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:00 +0100 -Subject: [PATCH 089/116] virtiofsd: enable PARALLEL_DIROPS during INIT -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-86-dgilbert@redhat.com> -Patchwork-id: 93539 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 085/112] virtiofsd: enable PARALLEL_DIROPS during INIT -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Liu Bo - -lookup is a RO operations, PARALLEL_DIROPS can be enabled. - -Signed-off-by: Liu Bo -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit b7ed733a3841c4d489d3bd6ca7ed23c84db119c2) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index aac282f..70568d2 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2062,6 +2062,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, - if (se->conn.want & FUSE_CAP_ASYNC_READ) { - outarg.flags |= FUSE_ASYNC_READ; - } -+ if (se->conn.want & FUSE_CAP_PARALLEL_DIROPS) { -+ outarg.flags |= FUSE_PARALLEL_DIROPS; -+ } - if (se->conn.want & FUSE_CAP_POSIX_LOCKS) { - outarg.flags |= FUSE_POSIX_LOCKS; - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch b/SOURCES/kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch deleted file mode 100644 index b0f678f..0000000 --- a/SOURCES/kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch +++ /dev/null @@ -1,167 +0,0 @@ -From c02ebc7e43f55b9423a065a7c53ba72bdb821c98 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 Feb 2021 23:14:54 -0500 -Subject: [PATCH 1/3] virtiofsd: extract lo_do_open() from lo_open() - -RH-Author: Jon Maloy -Message-id: <20210209231456.1555472-2-jmaloy@redhat.com> -Patchwork-id: 101024 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/3] virtiofsd: extract lo_do_open() from lo_open() -Bugzilla: 1919111 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Greg Kurz -RH-Acked-by: Dr. David Alan Gilbert - -From: Stefan Hajnoczi - -Both lo_open() and lo_create() have similar code to open a file. Extract -a common lo_do_open() function from lo_open() that will be used by -lo_create() in a later commit. - -Since lo_do_open() does not otherwise need fuse_req_t req, convert -lo_add_fd_mapping() to use struct lo_data *lo instead. - -Signed-off-by: Stefan Hajnoczi -Message-Id: <20210204150208.367837-2-stefanha@redhat.com> -Reviewed-by: Greg Kurz -Signed-off-by: Dr. David Alan Gilbert - -(cherry-picked from commit 8afaaee976965b7fb90ec225a51d60f35c5f173c) - -Conflict: update_open_flags() takes fewer arguments in this version - than in upstream. Instead of applying commit e12a0edafeb - ("virtiofsd: Add -o allow_direct_io|no_allow_direct_io - options") we keep the old signature, since this seems to - be an unrelated change. - -Signed-off-by: Jon Maloy -Signed-off-by: Jon Maloy ---- - tools/virtiofsd/passthrough_ll.c | 73 ++++++++++++++++++++------------ - 1 file changed, 46 insertions(+), 27 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index f41a6b07c8..518ba11c47 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -439,17 +439,17 @@ static void lo_map_remove(struct lo_map *map, size_t key) - } - - /* Assumes lo->mutex is held */ --static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd) -+static ssize_t lo_add_fd_mapping(struct lo_data *lo, int fd) - { - struct lo_map_elem *elem; - -- elem = lo_map_alloc_elem(&lo_data(req)->fd_map); -+ elem = lo_map_alloc_elem(&lo->fd_map); - if (!elem) { - return -1; - } - - elem->fd = fd; -- return elem - lo_data(req)->fd_map.elems; -+ return elem - lo->fd_map.elems; - } - - /* Assumes lo->mutex is held */ -@@ -1712,6 +1712,38 @@ static void update_open_flags(int writeback, struct fuse_file_info *fi) - fi->flags &= ~O_DIRECT; - } - -+static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, -+ struct fuse_file_info *fi) -+{ -+ char buf[64]; -+ ssize_t fh; -+ int fd; -+ -+ update_open_flags(lo->writeback, fi); -+ -+ sprintf(buf, "%i", inode->fd); -+ fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); -+ if (fd == -1) { -+ return errno; -+ } -+ -+ pthread_mutex_lock(&lo->mutex); -+ fh = lo_add_fd_mapping(lo, fd); -+ pthread_mutex_unlock(&lo->mutex); -+ if (fh == -1) { -+ close(fd); -+ return ENOMEM; -+ } -+ -+ fi->fh = fh; -+ if (lo->cache == CACHE_NONE) { -+ fi->direct_io = 1; -+ } else if (lo->cache == CACHE_ALWAYS) { -+ fi->keep_cache = 1; -+ } -+ return 0; -+} -+ - static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - mode_t mode, struct fuse_file_info *fi) - { -@@ -1752,7 +1784,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - ssize_t fh; - - pthread_mutex_lock(&lo->mutex); -- fh = lo_add_fd_mapping(req, fd); -+ fh = lo_add_fd_mapping(lo, fd); - pthread_mutex_unlock(&lo->mutex); - if (fh == -1) { - close(fd); -@@ -1943,38 +1975,25 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, - - static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - { -- int fd; -- ssize_t fh; -- char buf[64]; - struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode = lo_inode(req, ino); -+ int err; - - fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, - fi->flags); - -- update_open_flags(lo->writeback, fi); -- -- sprintf(buf, "%i", lo_fd(req, ino)); -- fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); -- if (fd == -1) { -- return (void)fuse_reply_err(req, errno); -- } -- -- pthread_mutex_lock(&lo->mutex); -- fh = lo_add_fd_mapping(req, fd); -- pthread_mutex_unlock(&lo->mutex); -- if (fh == -1) { -- close(fd); -- fuse_reply_err(req, ENOMEM); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); - return; - } - -- fi->fh = fh; -- if (lo->cache == CACHE_NONE) { -- fi->direct_io = 1; -- } else if (lo->cache == CACHE_ALWAYS) { -- fi->keep_cache = 1; -+ err = lo_do_open(lo, inode, fi); -+ lo_inode_put(lo, &inode); -+ if (err) { -+ fuse_reply_err(req, err); -+ } else { -+ fuse_reply_open(req, fi); - } -- fuse_reply_open(req, fi); - } - - static void lo_release(fuse_req_t req, fuse_ino_t ino, --- -2.18.2 - diff --git a/SOURCES/kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch b/SOURCES/kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch deleted file mode 100644 index 96f91a1..0000000 --- a/SOURCES/kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch +++ /dev/null @@ -1,111 +0,0 @@ -From 983b383bc4a92a9f7ecff0332cadefed2f58f502 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:50 +0100 -Subject: [PATCH 079/116] virtiofsd: extract root inode init into setup_root() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-76-dgilbert@redhat.com> -Patchwork-id: 93527 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 075/112] virtiofsd: extract root inode init into setup_root() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -Inititialize the root inode in a single place. - -Signed-off-by: Miklos Szeredi -Signed-off-by: Stefan Hajnoczi -dgilbert: -with fix suggested by Misono Tomohiro -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 3ca8a2b1c83eb185c232a4e87abbb65495263756) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 35 +++++++++++++++++++++++++---------- - 1 file changed, 25 insertions(+), 10 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 33bfb4d..9e7191e 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2351,6 +2351,30 @@ static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) - } - } - -+static void setup_root(struct lo_data *lo, struct lo_inode *root) -+{ -+ int fd, res; -+ struct stat stat; -+ -+ fd = open("/", O_PATH); -+ if (fd == -1) { -+ fuse_log(FUSE_LOG_ERR, "open(%s, O_PATH): %m\n", lo->source); -+ exit(1); -+ } -+ -+ res = fstatat(fd, "", &stat, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) { -+ fuse_log(FUSE_LOG_ERR, "fstatat(%s): %m\n", lo->source); -+ exit(1); -+ } -+ -+ root->is_symlink = false; -+ root->fd = fd; -+ root->ino = stat.st_ino; -+ root->dev = stat.st_dev; -+ root->refcount = 2; -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -@@ -2426,8 +2450,6 @@ int main(int argc, char *argv[]) - if (lo.debug) { - current_log_level = FUSE_LOG_DEBUG; - } -- lo.root.refcount = 2; -- - if (lo.source) { - struct stat stat; - int res; -@@ -2446,7 +2468,6 @@ int main(int argc, char *argv[]) - } else { - lo.source = "/"; - } -- lo.root.is_symlink = false; - if (!lo.timeout_set) { - switch (lo.cache) { - case CACHE_NEVER: -@@ -2466,13 +2487,6 @@ int main(int argc, char *argv[]) - exit(1); - } - -- lo.root.fd = open(lo.source, O_PATH); -- -- if (lo.root.fd == -1) { -- fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source); -- exit(1); -- } -- - se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); - if (se == NULL) { - goto err_out1; -@@ -2495,6 +2509,7 @@ int main(int argc, char *argv[]) - - setup_sandbox(&lo, se, opts.syslog); - -+ setup_root(&lo, &lo.root); - /* Block until ctrl+c or fusermount -u */ - ret = virtio_loop(se); - --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch b/SOURCES/kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch deleted file mode 100644 index 4860bec..0000000 --- a/SOURCES/kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch +++ /dev/null @@ -1,85 +0,0 @@ -From b3cd18ab58e331d3610cf00f857d6a945f11a030 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:49 +0100 -Subject: [PATCH 078/116] virtiofsd: fail when parent inode isn't known in - lo_do_lookup() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-75-dgilbert@redhat.com> -Patchwork-id: 93529 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 074/112] virtiofsd: fail when parent inode isn't known in lo_do_lookup() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -The Linux file handle APIs (struct export_operations) can access inodes -that are not attached to parents because path name traversal is not -performed. Refuse if there is no parent in lo_do_lookup(). - -Also clean up lo_do_lookup() while we're here. - -Signed-off-by: Miklos Szeredi -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 9de4fab5995d115f8ebfb41d8d94a866d80a1708) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 14 ++++++++++++-- - 1 file changed, 12 insertions(+), 2 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index de12e75..33bfb4d 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -777,6 +777,15 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - struct lo_data *lo = lo_data(req); - struct lo_inode *inode, *dir = lo_inode(req, parent); - -+ /* -+ * name_to_handle_at() and open_by_handle_at() can reach here with fuse -+ * mount point in guest, but we don't have its inode info in the -+ * ino_map. -+ */ -+ if (!dir) { -+ return ENOENT; -+ } -+ - memset(e, 0, sizeof(*e)); - e->attr_timeout = lo->timeout; - e->entry_timeout = lo->timeout; -@@ -786,7 +795,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - name = "."; - } - -- newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); -+ newfd = openat(dir->fd, name, O_PATH | O_NOFOLLOW); - if (newfd == -1) { - goto out_err; - } -@@ -796,7 +805,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - goto out_err; - } - -- inode = lo_find(lo_data(req), &e->attr); -+ inode = lo_find(lo, &e->attr); - if (inode) { - close(newfd); - newfd = -1; -@@ -812,6 +821,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - inode->is_symlink = S_ISLNK(e->attr.st_mode); - inode->refcount = 1; - inode->fd = newfd; -+ newfd = -1; - inode->ino = e->attr.st_ino; - inode->dev = e->attr.st_dev; - --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-fix-error-handling-in-main.patch b/SOURCES/kvm-virtiofsd-fix-error-handling-in-main.patch deleted file mode 100644 index a831992..0000000 --- a/SOURCES/kvm-virtiofsd-fix-error-handling-in-main.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 0ea1c7375d6509367399c706eb9d1e8cf79a5830 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:55 +0100 -Subject: [PATCH 084/116] virtiofsd: fix error handling in main() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-81-dgilbert@redhat.com> -Patchwork-id: 93534 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 080/112] virtiofsd: fix error handling in main() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Liu Bo - -Neither fuse_parse_cmdline() nor fuse_opt_parse() goes to the right place -to do cleanup. - -Signed-off-by: Liu Bo -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit c6de804670f2255ce776263124c37f3370dc5ac1) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 9ed77a1..af050c6 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2443,13 +2443,14 @@ int main(int argc, char *argv[]) - lo_map_init(&lo.fd_map); - - if (fuse_parse_cmdline(&args, &opts) != 0) { -- return 1; -+ goto err_out1; - } - fuse_set_log_func(log_func); - use_syslog = opts.syslog; - if (use_syslog) { - openlog("virtiofsd", LOG_PID, LOG_DAEMON); - } -+ - if (opts.show_help) { - printf("usage: %s [options]\n\n", argv[0]); - fuse_cmdline_help(); -@@ -2468,7 +2469,7 @@ int main(int argc, char *argv[]) - } - - if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { -- return 1; -+ goto err_out1; - } - - /* --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch b/SOURCES/kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch deleted file mode 100644 index 420a8a6..0000000 --- a/SOURCES/kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 9c291ca8624318613ede6e4174d08cf45aae8384 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:01 +0100 -Subject: [PATCH 090/116] virtiofsd: fix incorrect error handling in - lo_do_lookup -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-87-dgilbert@redhat.com> -Patchwork-id: 93543 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 086/112] virtiofsd: fix incorrect error handling in lo_do_lookup -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Eric Ren - -Signed-off-by: Eric Ren -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit fc3f0041b43b6c64aa97b3558a6abe1a10028354) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e8dc5c7..05b5f89 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -814,7 +814,6 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - close(newfd); - newfd = -1; - } else { -- saverr = ENOMEM; - inode = calloc(1, sizeof(struct lo_inode)); - if (!inode) { - goto out_err; --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-fix-libfuse-information-leaks.patch b/SOURCES/kvm-virtiofsd-fix-libfuse-information-leaks.patch deleted file mode 100644 index 90debb0..0000000 --- a/SOURCES/kvm-virtiofsd-fix-libfuse-information-leaks.patch +++ /dev/null @@ -1,322 +0,0 @@ -From e0d64e481e5a9fab5ff90d2a8f84afcd3311d13b Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:35 +0100 -Subject: [PATCH 064/116] virtiofsd: fix libfuse information leaks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-61-dgilbert@redhat.com> -Patchwork-id: 93515 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 060/112] virtiofsd: fix libfuse information leaks -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Some FUSE message replies contain padding fields that are not -initialized by libfuse. This is fine in traditional FUSE applications -because the kernel is trusted. virtiofsd does not trust the guest and -must not expose uninitialized memory. - -Use C struct initializers to automatically zero out memory. Not all of -these code changes are strictly necessary but they will prevent future -information leaks if the structs are extended. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 3db2876a0153ac7103c077c53090e020faffb3ea) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 150 ++++++++++++++++++++-------------------- - 1 file changed, 76 insertions(+), 74 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 2d6dc5a..6ceb33d 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -44,21 +44,23 @@ static __attribute__((constructor)) void fuse_ll_init_pagesize(void) - - static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr) - { -- attr->ino = stbuf->st_ino; -- attr->mode = stbuf->st_mode; -- attr->nlink = stbuf->st_nlink; -- attr->uid = stbuf->st_uid; -- attr->gid = stbuf->st_gid; -- attr->rdev = stbuf->st_rdev; -- attr->size = stbuf->st_size; -- attr->blksize = stbuf->st_blksize; -- attr->blocks = stbuf->st_blocks; -- attr->atime = stbuf->st_atime; -- attr->mtime = stbuf->st_mtime; -- attr->ctime = stbuf->st_ctime; -- attr->atimensec = ST_ATIM_NSEC(stbuf); -- attr->mtimensec = ST_MTIM_NSEC(stbuf); -- attr->ctimensec = ST_CTIM_NSEC(stbuf); -+ *attr = (struct fuse_attr){ -+ .ino = stbuf->st_ino, -+ .mode = stbuf->st_mode, -+ .nlink = stbuf->st_nlink, -+ .uid = stbuf->st_uid, -+ .gid = stbuf->st_gid, -+ .rdev = stbuf->st_rdev, -+ .size = stbuf->st_size, -+ .blksize = stbuf->st_blksize, -+ .blocks = stbuf->st_blocks, -+ .atime = stbuf->st_atime, -+ .mtime = stbuf->st_mtime, -+ .ctime = stbuf->st_ctime, -+ .atimensec = ST_ATIM_NSEC(stbuf), -+ .mtimensec = ST_MTIM_NSEC(stbuf), -+ .ctimensec = ST_CTIM_NSEC(stbuf), -+ }; - } - - static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf) -@@ -183,16 +185,16 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, - int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, - int count) - { -- struct fuse_out_header out; -+ struct fuse_out_header out = { -+ .unique = req->unique, -+ .error = error, -+ }; - - if (error <= -1000 || error > 0) { - fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); - error = -ERANGE; - } - -- out.unique = req->unique; -- out.error = error; -- - iov[0].iov_base = &out; - iov[0].iov_len = sizeof(struct fuse_out_header); - -@@ -277,14 +279,16 @@ size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, - static void convert_statfs(const struct statvfs *stbuf, - struct fuse_kstatfs *kstatfs) - { -- kstatfs->bsize = stbuf->f_bsize; -- kstatfs->frsize = stbuf->f_frsize; -- kstatfs->blocks = stbuf->f_blocks; -- kstatfs->bfree = stbuf->f_bfree; -- kstatfs->bavail = stbuf->f_bavail; -- kstatfs->files = stbuf->f_files; -- kstatfs->ffree = stbuf->f_ffree; -- kstatfs->namelen = stbuf->f_namemax; -+ *kstatfs = (struct fuse_kstatfs){ -+ .bsize = stbuf->f_bsize, -+ .frsize = stbuf->f_frsize, -+ .blocks = stbuf->f_blocks, -+ .bfree = stbuf->f_bfree, -+ .bavail = stbuf->f_bavail, -+ .files = stbuf->f_files, -+ .ffree = stbuf->f_ffree, -+ .namelen = stbuf->f_namemax, -+ }; - } - - static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize) -@@ -328,12 +332,14 @@ static unsigned int calc_timeout_nsec(double t) - static void fill_entry(struct fuse_entry_out *arg, - const struct fuse_entry_param *e) - { -- arg->nodeid = e->ino; -- arg->generation = e->generation; -- arg->entry_valid = calc_timeout_sec(e->entry_timeout); -- arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); -- arg->attr_valid = calc_timeout_sec(e->attr_timeout); -- arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); -+ *arg = (struct fuse_entry_out){ -+ .nodeid = e->ino, -+ .generation = e->generation, -+ .entry_valid = calc_timeout_sec(e->entry_timeout), -+ .entry_valid_nsec = calc_timeout_nsec(e->entry_timeout), -+ .attr_valid = calc_timeout_sec(e->attr_timeout), -+ .attr_valid_nsec = calc_timeout_nsec(e->attr_timeout), -+ }; - convert_stat(&e->attr, &arg->attr); - } - -@@ -362,10 +368,12 @@ size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, - fill_entry(&dp->entry_out, e); - - struct fuse_dirent *dirent = &dp->dirent; -- dirent->ino = e->attr.st_ino; -- dirent->off = off; -- dirent->namelen = namelen; -- dirent->type = (e->attr.st_mode & S_IFMT) >> 12; -+ *dirent = (struct fuse_dirent){ -+ .ino = e->attr.st_ino, -+ .off = off, -+ .namelen = namelen, -+ .type = (e->attr.st_mode & S_IFMT) >> 12, -+ }; - memcpy(dirent->name, name, namelen); - memset(dirent->name + namelen, 0, entlen_padded - entlen); - -@@ -496,15 +504,14 @@ static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv) - { - struct iovec iov[2]; -- struct fuse_out_header out; -+ struct fuse_out_header out = { -+ .unique = req->unique, -+ }; - int res; - - iov[0].iov_base = &out; - iov[0].iov_len = sizeof(struct fuse_out_header); - -- out.unique = req->unique; -- out.error = 0; -- - res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv); - if (res <= 0) { - fuse_free_req(req); -@@ -2145,14 +2152,14 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, - static int send_notify_iov(struct fuse_session *se, int notify_code, - struct iovec *iov, int count) - { -- struct fuse_out_header out; -+ struct fuse_out_header out = { -+ .error = notify_code, -+ }; - - if (!se->got_init) { - return -ENOTCONN; - } - -- out.unique = 0; -- out.error = notify_code; - iov[0].iov_base = &out; - iov[0].iov_len = sizeof(struct fuse_out_header); - -@@ -2162,11 +2169,11 @@ static int send_notify_iov(struct fuse_session *se, int notify_code, - int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) - { - if (ph != NULL) { -- struct fuse_notify_poll_wakeup_out outarg; -+ struct fuse_notify_poll_wakeup_out outarg = { -+ .kh = ph->kh, -+ }; - struct iovec iov[2]; - -- outarg.kh = ph->kh; -- - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - -@@ -2179,17 +2186,17 @@ int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) - int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, - off_t off, off_t len) - { -- struct fuse_notify_inval_inode_out outarg; -+ struct fuse_notify_inval_inode_out outarg = { -+ .ino = ino, -+ .off = off, -+ .len = len, -+ }; - struct iovec iov[2]; - - if (!se) { - return -EINVAL; - } - -- outarg.ino = ino; -- outarg.off = off; -- outarg.len = len; -- - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - -@@ -2199,17 +2206,16 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, - int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, - const char *name, size_t namelen) - { -- struct fuse_notify_inval_entry_out outarg; -+ struct fuse_notify_inval_entry_out outarg = { -+ .parent = parent, -+ .namelen = namelen, -+ }; - struct iovec iov[3]; - - if (!se) { - return -EINVAL; - } - -- outarg.parent = parent; -- outarg.namelen = namelen; -- outarg.padding = 0; -- - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - iov[2].iov_base = (void *)name; -@@ -2222,18 +2228,17 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, - fuse_ino_t child, const char *name, - size_t namelen) - { -- struct fuse_notify_delete_out outarg; -+ struct fuse_notify_delete_out outarg = { -+ .parent = parent, -+ .child = child, -+ .namelen = namelen, -+ }; - struct iovec iov[3]; - - if (!se) { - return -EINVAL; - } - -- outarg.parent = parent; -- outarg.child = child; -- outarg.namelen = namelen; -- outarg.padding = 0; -- - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - iov[2].iov_base = (void *)name; -@@ -2245,24 +2250,21 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, - int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, - off_t offset, struct fuse_bufvec *bufv) - { -- struct fuse_out_header out; -- struct fuse_notify_store_out outarg; -+ struct fuse_out_header out = { -+ .error = FUSE_NOTIFY_STORE, -+ }; -+ struct fuse_notify_store_out outarg = { -+ .nodeid = ino, -+ .offset = offset, -+ .size = fuse_buf_size(bufv), -+ }; - struct iovec iov[3]; -- size_t size = fuse_buf_size(bufv); - int res; - - if (!se) { - return -EINVAL; - } - -- out.unique = 0; -- out.error = FUSE_NOTIFY_STORE; -- -- outarg.nodeid = ino; -- outarg.offset = offset; -- outarg.size = size; -- outarg.padding = 0; -- - iov[0].iov_base = &out; - iov[0].iov_len = sizeof(out); - iov[1].iov_base = &outarg; --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch b/SOURCES/kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch deleted file mode 100644 index 6243037..0000000 --- a/SOURCES/kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 9a44d78f5019280b006bb5b3de7164336289d639 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:21 +0100 -Subject: [PATCH 110/116] virtiofsd: fix lo_destroy() resource leaks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-107-dgilbert@redhat.com> -Patchwork-id: 93560 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 106/112] virtiofsd: fix lo_destroy() resource leaks -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Now that lo_destroy() is serialized we can call unref_inode() so that -all inode resources are freed. - -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 28f7a3b026f231bfe8de5fed6a18a8d27b1dfcee) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 41 ++++++++++++++++++++-------------------- - 1 file changed, 20 insertions(+), 21 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 79b8b71..eb001b9 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1371,26 +1371,6 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, - } - } - --static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) --{ -- struct lo_inode *inode = value; -- struct lo_data *lo = user_data; -- -- inode->nlookup = 0; -- lo_map_remove(&lo->ino_map, inode->fuse_ino); -- close(inode->fd); -- lo_inode_put(lo, &inode); /* Drop our refcount from lo_do_lookup() */ -- -- return TRUE; --} -- --static void unref_all_inodes(struct lo_data *lo) --{ -- pthread_mutex_lock(&lo->mutex); -- g_hash_table_foreach_remove(lo->inodes, unref_all_inodes_cb, lo); -- pthread_mutex_unlock(&lo->mutex); --} -- - static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - { - struct lo_data *lo = lo_data(req); -@@ -2477,7 +2457,26 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, - static void lo_destroy(void *userdata) - { - struct lo_data *lo = (struct lo_data *)userdata; -- unref_all_inodes(lo); -+ -+ /* -+ * Normally lo->mutex must be taken when traversing lo->inodes but -+ * lo_destroy() is a serialized request so no races are possible here. -+ * -+ * In addition, we cannot acquire lo->mutex since unref_inode() takes it -+ * too and this would result in a recursive lock. -+ */ -+ while (true) { -+ GHashTableIter iter; -+ gpointer key, value; -+ -+ g_hash_table_iter_init(&iter, lo->inodes); -+ if (!g_hash_table_iter_next(&iter, &key, &value)) { -+ break; -+ } -+ -+ struct lo_inode *inode = value; -+ unref_inode_lolocked(lo, inode, inode->nlookup); -+ } - } - - static struct fuse_lowlevel_ops lo_oper = { --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-fix-memory-leak-on-lo.source.patch b/SOURCES/kvm-virtiofsd-fix-memory-leak-on-lo.source.patch deleted file mode 100644 index 4d7d6dc..0000000 --- a/SOURCES/kvm-virtiofsd-fix-memory-leak-on-lo.source.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 9e0f5b64f30c2f841f297e25c2f3a6d82c8a16b8 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:57 +0100 -Subject: [PATCH 086/116] virtiofsd: fix memory leak on lo.source -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-83-dgilbert@redhat.com> -Patchwork-id: 93536 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 082/112] virtiofsd: fix memory leak on lo.source -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Liu Bo - -valgrind reported that lo.source is leaked on quiting, but it was defined -as (const char*) as it may point to a const string "/". - -Signed-off-by: Liu Bo -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit eb68a33b5fc5dde87bd9b99b94e7c33a5d8ea82e) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index af050c6..056ebe8 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -115,7 +115,7 @@ struct lo_data { - int writeback; - int flock; - int xattr; -- const char *source; -+ char *source; - double timeout; - int cache; - int timeout_set; -@@ -2497,9 +2497,8 @@ int main(int argc, char *argv[]) - fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); - exit(1); - } -- - } else { -- lo.source = "/"; -+ lo.source = strdup("/"); - } - if (!lo.timeout_set) { - switch (lo.cache) { -@@ -2570,5 +2569,7 @@ err_out1: - close(lo.root.fd); - } - -+ free(lo.source); -+ - return ret ? 1 : 0; - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch b/SOURCES/kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch deleted file mode 100644 index b17d93c..0000000 --- a/SOURCES/kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 3b6461ee08654b2cbb6d4e0cc15c02f89a6610d5 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 3 Mar 2020 18:43:09 +0000 -Subject: [PATCH 13/18] virtiofsd: fv_create_listen_socket error path socket - leak -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200303184314.155564-3-dgilbert@redhat.com> -Patchwork-id: 94124 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/7] virtiofsd: fv_create_listen_socket error path socket leak -Bugzilla: 1797064 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Ján Tomko - -From: "Dr. David Alan Gilbert" - -If we fail when bringing up the socket we can leak the listen_fd; -in practice the daemon will exit so it's not really a problem. - -Fixes: Coverity CID 1413121 -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit 6fa249027f97e3080f3d9c0fab3f94f8f80828fe) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/fuse_virtio.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 80a6e92..dd1c605 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -916,6 +916,7 @@ static int fv_create_listen_socket(struct fuse_session *se) - old_umask = umask(0077); - if (bind(listen_sock, (struct sockaddr *)&un, addr_len) == -1) { - fuse_log(FUSE_LOG_ERR, "vhost socket bind: %m\n"); -+ close(listen_sock); - umask(old_umask); - return -1; - } -@@ -923,6 +924,7 @@ static int fv_create_listen_socket(struct fuse_session *se) - - if (listen(listen_sock, 1) == -1) { - fuse_log(FUSE_LOG_ERR, "vhost socket listen: %m\n"); -+ close(listen_sock); - return -1; - } - --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-get-set-features-callbacks.patch b/SOURCES/kvm-virtiofsd-get-set-features-callbacks.patch deleted file mode 100644 index fcb5ca2..0000000 --- a/SOURCES/kvm-virtiofsd-get-set-features-callbacks.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 59bfe3ad924d00dc9c7a4363fcd3db36ea247988 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:59 +0100 -Subject: [PATCH 028/116] virtiofsd: get/set features callbacks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-25-dgilbert@redhat.com> -Patchwork-id: 93478 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 024/112] virtiofsd: get/set features callbacks -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Add the get/set features callbacks. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit f2cef5fb9ae20136ca18d16328787b69b3abfa18) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 15 ++++++++++++++- - 1 file changed, 14 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 1928a20..4819e56 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -46,6 +46,17 @@ struct virtio_fs_config { - uint32_t num_queues; - }; - -+/* Callback from libvhost-user */ -+static uint64_t fv_get_features(VuDev *dev) -+{ -+ return 1ULL << VIRTIO_F_VERSION_1; -+} -+ -+/* Callback from libvhost-user */ -+static void fv_set_features(VuDev *dev, uint64_t features) -+{ -+} -+ - /* - * Callback from libvhost-user if there's a new fd we're supposed to listen - * to, typically a queue kick? -@@ -78,7 +89,9 @@ static bool fv_queue_order(VuDev *dev, int qidx) - } - - static const VuDevIface fv_iface = { -- /* TODO: Add other callbacks */ -+ .get_features = fv_get_features, -+ .set_features = fv_set_features, -+ - .queue_is_processed_in_order = fv_queue_order, - }; - --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch b/SOURCES/kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch deleted file mode 100644 index 68d20e7..0000000 --- a/SOURCES/kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch +++ /dev/null @@ -1,589 +0,0 @@ -From da6ee5c24397d2ca93dfaf275fdd9dafc922da15 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:11 +0100 -Subject: [PATCH 100/116] virtiofsd: introduce inode refcount to prevent - use-after-free -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-97-dgilbert@redhat.com> -Patchwork-id: 93550 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 096/112] virtiofsd: introduce inode refcount to prevent use-after-free -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -If thread A is using an inode it must not be deleted by thread B when -processing a FUSE_FORGET request. - -The FUSE protocol itself already has a counter called nlookup that is -used in FUSE_FORGET messages. We cannot trust this counter since the -untrusted client can manipulate it via FUSE_FORGET messages. - -Introduce a new refcount to keep inodes alive for the required lifespan. -lo_inode_put() must be called to release a reference. FUSE's nlookup -counter holds exactly one reference so that the inode stays alive as -long as the client still wants to remember it. - -Note that the lo_inode->is_symlink field is moved to avoid creating a -hole in the struct due to struct field alignment. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Misono Tomohiro -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit c241aa9457d88c6a0d027f48fadfed131646bce3) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 169 +++++++++++++++++++++++++++++++++------ - 1 file changed, 146 insertions(+), 23 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e3a6d6b..ab16135 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -97,7 +97,13 @@ struct lo_key { - - struct lo_inode { - int fd; -- bool is_symlink; -+ -+ /* -+ * Atomic reference count for this object. The nlookup field holds a -+ * reference and release it when nlookup reaches 0. -+ */ -+ gint refcount; -+ - struct lo_key key; - - /* -@@ -116,6 +122,8 @@ struct lo_inode { - fuse_ino_t fuse_ino; - pthread_mutex_t plock_mutex; - GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ -+ -+ bool is_symlink; - }; - - struct lo_cred { -@@ -471,6 +479,23 @@ static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) - return elem - lo_data(req)->ino_map.elems; - } - -+static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep) -+{ -+ struct lo_inode *inode = *inodep; -+ -+ if (!inode) { -+ return; -+ } -+ -+ *inodep = NULL; -+ -+ if (g_atomic_int_dec_and_test(&inode->refcount)) { -+ close(inode->fd); -+ free(inode); -+ } -+} -+ -+/* Caller must release refcount using lo_inode_put() */ - static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) - { - struct lo_data *lo = lo_data(req); -@@ -478,6 +503,9 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) - - pthread_mutex_lock(&lo->mutex); - elem = lo_map_get(&lo->ino_map, ino); -+ if (elem) { -+ g_atomic_int_inc(&elem->inode->refcount); -+ } - pthread_mutex_unlock(&lo->mutex); - - if (!elem) { -@@ -487,10 +515,23 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) - return elem->inode; - } - -+/* -+ * TODO Remove this helper and force callers to hold an inode refcount until -+ * they are done with the fd. This will be done in a later patch to make -+ * review easier. -+ */ - static int lo_fd(fuse_req_t req, fuse_ino_t ino) - { - struct lo_inode *inode = lo_inode(req, ino); -- return inode ? inode->fd : -1; -+ int fd; -+ -+ if (!inode) { -+ return -1; -+ } -+ -+ fd = inode->fd; -+ lo_inode_put(lo_data(req), &inode); -+ return fd; - } - - static void lo_init(void *userdata, struct fuse_conn_info *conn) -@@ -545,6 +586,10 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino, - fuse_reply_attr(req, &buf, lo->timeout); - } - -+/* -+ * Increments parent->nlookup and caller must release refcount using -+ * lo_inode_put(&parent). -+ */ - static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, - char path[PATH_MAX], struct lo_inode **parent) - { -@@ -582,6 +627,7 @@ retry: - p = &lo->root; - pthread_mutex_lock(&lo->mutex); - p->nlookup++; -+ g_atomic_int_inc(&p->refcount); - pthread_mutex_unlock(&lo->mutex); - } else { - *last = '\0'; -@@ -625,6 +671,7 @@ retry: - - fail_unref: - unref_inode_lolocked(lo, p, 1); -+ lo_inode_put(lo, &p); - fail: - if (retries) { - retries--; -@@ -663,6 +710,7 @@ fallback: - if (res != -1) { - res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW); - unref_inode_lolocked(lo, parent, 1); -+ lo_inode_put(lo, &parent); - } - - return res; -@@ -780,11 +828,13 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - goto out_err; - } - } -+ lo_inode_put(lo, &inode); - - return lo_getattr(req, ino, fi); - - out_err: - saverr = errno; -+ lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - } - -@@ -801,6 +851,7 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) - if (p) { - assert(p->nlookup > 0); - p->nlookup++; -+ g_atomic_int_inc(&p->refcount); - } - pthread_mutex_unlock(&lo->mutex); - -@@ -820,6 +871,10 @@ static void posix_locks_value_destroy(gpointer data) - free(plock); - } - -+/* -+ * Increments nlookup and caller must release refcount using -+ * lo_inode_put(&parent). -+ */ - static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - struct fuse_entry_param *e) - { -@@ -827,7 +882,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - int res; - int saverr; - struct lo_data *lo = lo_data(req); -- struct lo_inode *inode, *dir = lo_inode(req, parent); -+ struct lo_inode *inode = NULL; -+ struct lo_inode *dir = lo_inode(req, parent); - - /* - * name_to_handle_at() and open_by_handle_at() can reach here with fuse -@@ -868,6 +924,13 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - } - - inode->is_symlink = S_ISLNK(e->attr.st_mode); -+ -+ /* -+ * One for the caller and one for nlookup (released in -+ * unref_inode_lolocked()) -+ */ -+ g_atomic_int_set(&inode->refcount, 2); -+ - inode->nlookup = 1; - inode->fd = newfd; - newfd = -1; -@@ -883,6 +946,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - pthread_mutex_unlock(&lo->mutex); - } - e->ino = inode->fuse_ino; -+ lo_inode_put(lo, &inode); -+ lo_inode_put(lo, &dir); - - fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, - name, (unsigned long long)e->ino); -@@ -894,6 +959,8 @@ out_err: - if (newfd != -1) { - close(newfd); - } -+ lo_inode_put(lo, &inode); -+ lo_inode_put(lo, &dir); - return saverr; - } - -@@ -991,6 +1058,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - { - int res; - int saverr; -+ struct lo_data *lo = lo_data(req); - struct lo_inode *dir; - struct fuse_entry_param e; - struct lo_cred old = {}; -@@ -1032,9 +1100,11 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - name, (unsigned long long)e.ino); - - fuse_reply_entry(req, &e); -+ lo_inode_put(lo, &dir); - return; - - out: -+ lo_inode_put(lo, &dir); - fuse_reply_err(req, saverr); - } - -@@ -1085,6 +1155,7 @@ fallback: - if (res != -1) { - res = linkat(parent->fd, path, dfd, name, 0); - unref_inode_lolocked(lo, parent, 1); -+ lo_inode_put(lo, &parent); - } - - return res; -@@ -1095,6 +1166,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - { - int res; - struct lo_data *lo = lo_data(req); -+ struct lo_inode *parent_inode; - struct lo_inode *inode; - struct fuse_entry_param e; - int saverr; -@@ -1104,17 +1176,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - return; - } - -+ parent_inode = lo_inode(req, parent); - inode = lo_inode(req, ino); -- if (!inode) { -- fuse_reply_err(req, EBADF); -- return; -+ if (!parent_inode || !inode) { -+ errno = EBADF; -+ goto out_err; - } - - memset(&e, 0, sizeof(struct fuse_entry_param)); - e.attr_timeout = lo->timeout; - e.entry_timeout = lo->timeout; - -- res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name); -+ res = linkat_empty_nofollow(lo, inode, parent_inode->fd, name); - if (res == -1) { - goto out_err; - } -@@ -1133,13 +1206,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - name, (unsigned long long)e.ino); - - fuse_reply_entry(req, &e); -+ lo_inode_put(lo, &parent_inode); -+ lo_inode_put(lo, &inode); - return; - - out_err: - saverr = errno; -+ lo_inode_put(lo, &parent_inode); -+ lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - } - -+/* Increments nlookup and caller must release refcount using lo_inode_put() */ - static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent, - const char *name) - { -@@ -1176,6 +1254,7 @@ static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) - - fuse_reply_err(req, res == -1 ? errno : 0); - unref_inode_lolocked(lo, inode, 1); -+ lo_inode_put(lo, &inode); - } - - static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, -@@ -1183,8 +1262,10 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - unsigned int flags) - { - int res; -- struct lo_inode *oldinode; -- struct lo_inode *newinode; -+ struct lo_inode *parent_inode; -+ struct lo_inode *newparent_inode; -+ struct lo_inode *oldinode = NULL; -+ struct lo_inode *newinode = NULL; - struct lo_data *lo = lo_data(req); - - if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { -@@ -1192,6 +1273,13 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - return; - } - -+ parent_inode = lo_inode(req, parent); -+ newparent_inode = lo_inode(req, newparent); -+ if (!parent_inode || !newparent_inode) { -+ fuse_reply_err(req, EBADF); -+ goto out; -+ } -+ - oldinode = lookup_name(req, parent, name); - newinode = lookup_name(req, newparent, newname); - -@@ -1204,8 +1292,8 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - #ifndef SYS_renameat2 - fuse_reply_err(req, EINVAL); - #else -- res = syscall(SYS_renameat2, lo_fd(req, parent), name, -- lo_fd(req, newparent), newname, flags); -+ res = syscall(SYS_renameat2, parent_inode->fd, name, -+ newparent_inode->fd, newname, flags); - if (res == -1 && errno == ENOSYS) { - fuse_reply_err(req, EINVAL); - } else { -@@ -1215,12 +1303,16 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - goto out; - } - -- res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname); -+ res = renameat(parent_inode->fd, name, newparent_inode->fd, newname); - - fuse_reply_err(req, res == -1 ? errno : 0); - out: - unref_inode_lolocked(lo, oldinode, 1); - unref_inode_lolocked(lo, newinode, 1); -+ lo_inode_put(lo, &oldinode); -+ lo_inode_put(lo, &newinode); -+ lo_inode_put(lo, &parent_inode); -+ lo_inode_put(lo, &newparent_inode); - } - - static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) -@@ -1244,6 +1336,7 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) - - fuse_reply_err(req, res == -1 ? errno : 0); - unref_inode_lolocked(lo, inode, 1); -+ lo_inode_put(lo, &inode); - } - - static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, -@@ -1265,8 +1358,9 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, - g_hash_table_destroy(inode->posix_locks); - pthread_mutex_destroy(&inode->plock_mutex); - pthread_mutex_unlock(&lo->mutex); -- close(inode->fd); -- free(inode); -+ -+ /* Drop our refcount from lo_do_lookup() */ -+ lo_inode_put(lo, &inode); - } else { - pthread_mutex_unlock(&lo->mutex); - } -@@ -1280,6 +1374,7 @@ static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) - inode->nlookup = 0; - lo_map_remove(&lo->ino_map, inode->fuse_ino); - close(inode->fd); -+ lo_inode_put(lo, &inode); /* Drop our refcount from lo_do_lookup() */ - - return TRUE; - } -@@ -1306,6 +1401,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - (unsigned long long)nlookup); - - unref_inode_lolocked(lo, inode, nlookup); -+ lo_inode_put(lo, &inode); - } - - static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) -@@ -1537,6 +1633,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - err = 0; - error: - lo_dirp_put(&d); -+ lo_inode_put(lo, &dinode); - - /* - * If there's an error, we can only signal it if we haven't stored -@@ -1595,6 +1692,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - { - int fd; - struct lo_data *lo = lo_data(req); -+ struct lo_inode *parent_inode; - struct fuse_entry_param e; - int err; - struct lo_cred old = {}; -@@ -1607,12 +1705,18 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - return; - } - -+ parent_inode = lo_inode(req, parent); -+ if (!parent_inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ - err = lo_change_cred(req, &old); - if (err) { - goto out; - } - -- fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, -+ fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, - mode); - err = fd == -1 ? errno : 0; - lo_restore_cred(&old); -@@ -1625,8 +1729,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - pthread_mutex_unlock(&lo->mutex); - if (fh == -1) { - close(fd); -- fuse_reply_err(req, ENOMEM); -- return; -+ err = ENOMEM; -+ goto out; - } - - fi->fh = fh; -@@ -1639,6 +1743,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - } - - out: -+ lo_inode_put(lo, &parent_inode); -+ - if (err) { - fuse_reply_err(req, err); - } else { -@@ -1712,16 +1818,18 @@ static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, - plock = - lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); - if (!plock) { -- pthread_mutex_unlock(&inode->plock_mutex); -- fuse_reply_err(req, ret); -- return; -+ saverr = ret; -+ goto out; - } - - ret = fcntl(plock->fd, F_OFD_GETLK, lock); - if (ret == -1) { - saverr = errno; - } -+ -+out: - pthread_mutex_unlock(&inode->plock_mutex); -+ lo_inode_put(lo, &inode); - - if (saverr) { - fuse_reply_err(req, saverr); -@@ -1761,9 +1869,8 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, - lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); - - if (!plock) { -- pthread_mutex_unlock(&inode->plock_mutex); -- fuse_reply_err(req, ret); -- return; -+ saverr = ret; -+ goto out; - } - - /* TODO: Is it alright to modify flock? */ -@@ -1772,7 +1879,11 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, - if (ret == -1) { - saverr = errno; - } -+ -+out: - pthread_mutex_unlock(&inode->plock_mutex); -+ lo_inode_put(lo, &inode); -+ - fuse_reply_err(req, saverr); - } - -@@ -1898,6 +2009,7 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - pthread_mutex_unlock(&inode->plock_mutex); - - res = close(dup(lo_fi_fd(req, fi))); -+ lo_inode_put(lo_data(req), &inode); - fuse_reply_err(req, res == -1 ? errno : 0); - } - -@@ -2115,11 +2227,14 @@ out_free: - if (fd >= 0) { - close(fd); - } -+ -+ lo_inode_put(lo, &inode); - return; - - out_err: - saverr = errno; - out: -+ lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - goto out_free; - } -@@ -2190,11 +2305,14 @@ out_free: - if (fd >= 0) { - close(fd); - } -+ -+ lo_inode_put(lo, &inode); - return; - - out_err: - saverr = errno; - out: -+ lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - goto out_free; - } -@@ -2243,6 +2361,8 @@ out: - if (fd >= 0) { - close(fd); - } -+ -+ lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - } - -@@ -2289,6 +2409,8 @@ out: - if (fd >= 0) { - close(fd); - } -+ -+ lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - } - -@@ -2671,6 +2793,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) - root->key.ino = stat.st_ino; - root->key.dev = stat.st_dev; - root->nlookup = 2; -+ g_atomic_int_set(&root->refcount, 2); - } - - static guint lo_key_hash(gconstpointer key) --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-jail-lo-proc_self_fd.patch b/SOURCES/kvm-virtiofsd-jail-lo-proc_self_fd.patch deleted file mode 100644 index df69242..0000000 --- a/SOURCES/kvm-virtiofsd-jail-lo-proc_self_fd.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 852a0a22d674b0594aecf0912a0885d197f34978 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 5 May 2020 16:35:57 +0100 -Subject: [PATCH 6/9] virtiofsd: jail lo->proc_self_fd - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200505163600.22956-5-dgilbert@redhat.com> -Patchwork-id: 96275 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 4/7] virtiofsd: jail lo->proc_self_fd -Bugzilla: 1817445 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz -RH-Acked-by: Michael S. Tsirkin - -From: Miklos Szeredi - -While it's not possible to escape the proc filesystem through -lo->proc_self_fd, it is possible to escape to the root of the proc -filesystem itself through "../..". - -Use a temporary mount for opening lo->proc_self_fd, that has it's root at -/proc/self/fd/, preventing access to the ancestor directories. - -Signed-off-by: Miklos Szeredi -Message-Id: <20200429124733.22488-1-mszeredi@redhat.com> -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 397ae982f4df46e7d4b2625c431062c9146f3b83) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/passthrough_ll.c | 27 +++++++++++++++++++++++++-- - 1 file changed, 25 insertions(+), 2 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 184ad0f..73d8405 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2540,6 +2540,8 @@ static void print_capabilities(void) - static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) - { - pid_t child; -+ char template[] = "virtiofsd-XXXXXX"; -+ char *tmpdir; - - /* - * Create a new pid namespace for *child* processes. We'll have to -@@ -2601,12 +2603,33 @@ static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) - exit(1); - } - -+ tmpdir = mkdtemp(template); -+ if (!tmpdir) { -+ fuse_log(FUSE_LOG_ERR, "tmpdir(%s): %m\n", template); -+ exit(1); -+ } -+ -+ if (mount("/proc/self/fd", tmpdir, NULL, MS_BIND, NULL) < 0) { -+ fuse_log(FUSE_LOG_ERR, "mount(/proc/self/fd, %s, MS_BIND): %m\n", -+ tmpdir); -+ exit(1); -+ } -+ - /* Now we can get our /proc/self/fd directory file descriptor */ -- lo->proc_self_fd = open("/proc/self/fd", O_PATH); -+ lo->proc_self_fd = open(tmpdir, O_PATH); - if (lo->proc_self_fd == -1) { -- fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n"); -+ fuse_log(FUSE_LOG_ERR, "open(%s, O_PATH): %m\n", tmpdir); - exit(1); - } -+ -+ if (umount2(tmpdir, MNT_DETACH) < 0) { -+ fuse_log(FUSE_LOG_ERR, "umount2(%s, MNT_DETACH): %m\n", tmpdir); -+ exit(1); -+ } -+ -+ if (rmdir(tmpdir) < 0) { -+ fuse_log(FUSE_LOG_ERR, "rmdir(%s): %m\n", tmpdir); -+ } - } - - /* --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-load_capng-missing-unlock.patch b/SOURCES/kvm-virtiofsd-load_capng-missing-unlock.patch deleted file mode 100644 index bc04f6b..0000000 --- a/SOURCES/kvm-virtiofsd-load_capng-missing-unlock.patch +++ /dev/null @@ -1,46 +0,0 @@ -From ece7649025fbdbde48ff0b954e8ec2e42c4a8b3d Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 3 Mar 2020 18:43:10 +0000 -Subject: [PATCH 14/18] virtiofsd: load_capng missing unlock -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200303184314.155564-4-dgilbert@redhat.com> -Patchwork-id: 94126 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/7] virtiofsd: load_capng missing unlock -Bugzilla: 1797064 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Ján Tomko - -From: "Dr. David Alan Gilbert" - -Missing unlock in error path. - -Fixes: Covertiy CID 1413123 -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit 686391112fd42c615bcc4233472887a66a9b5a4a) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/passthrough_ll.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e6f2399..c635fc8 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -232,6 +232,7 @@ static int load_capng(void) - */ - cap.saved = capng_save_state(); - if (!cap.saved) { -+ pthread_mutex_unlock(&cap.mutex); - fuse_log(FUSE_LOG_ERR, "capng_save_state (thread)\n"); - return -EINVAL; - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-make-f-foreground-the-default.patch b/SOURCES/kvm-virtiofsd-make-f-foreground-the-default.patch deleted file mode 100644 index d6cb0e3..0000000 --- a/SOURCES/kvm-virtiofsd-make-f-foreground-the-default.patch +++ /dev/null @@ -1,76 +0,0 @@ -From 7f2e1f79a3addb242c3018c7a80e2e57589119f0 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:08 +0100 -Subject: [PATCH 037/116] virtiofsd: make -f (foreground) the default -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-34-dgilbert@redhat.com> -Patchwork-id: 93489 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 033/112] virtiofsd: make -f (foreground) the default -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -According to vhost-user.rst "Backend program conventions", backend -programs should run in the foregound by default. Follow the -conventions so libvirt and other management tools can control virtiofsd -in a standard way. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 0bbd31753714ac2899efda0f0de31e353e965789) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/helper.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 676032e..a3645fc 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -29,6 +29,11 @@ - { \ - t, offsetof(struct fuse_cmdline_opts, p), 1 \ - } -+#define FUSE_HELPER_OPT_VALUE(t, p, v) \ -+ { \ -+ t, offsetof(struct fuse_cmdline_opts, p), v \ -+ } -+ - - static const struct fuse_opt fuse_helper_opts[] = { - FUSE_HELPER_OPT("-h", show_help), -@@ -42,6 +47,7 @@ static const struct fuse_opt fuse_helper_opts[] = { - FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), - FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), - FUSE_HELPER_OPT("-f", foreground), -+ FUSE_HELPER_OPT_VALUE("--daemonize", foreground, 0), - FUSE_HELPER_OPT("fsname=", nodefault_subtype), - FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), - FUSE_HELPER_OPT("subtype=", nodefault_subtype), -@@ -131,6 +137,7 @@ void fuse_cmdline_help(void) - " -V --version print version\n" - " -d -o debug enable debug output (implies -f)\n" - " -f foreground operation\n" -+ " --daemonize run in background\n" - " -o max_idle_threads the maximum number of idle worker " - "threads\n" - " allowed (default: 10)\n"); -@@ -158,6 +165,7 @@ int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts) - memset(opts, 0, sizeof(struct fuse_cmdline_opts)); - - opts->max_idle_threads = 10; -+ opts->foreground = 1; - - if (fuse_opt_parse(args, opts, fuse_helper_opts, fuse_helper_opt_proc) == - -1) { --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-make-lo_release-atomic.patch b/SOURCES/kvm-virtiofsd-make-lo_release-atomic.patch deleted file mode 100644 index 6d88549..0000000 --- a/SOURCES/kvm-virtiofsd-make-lo_release-atomic.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 4ebabb66f4132186152edf8e1907fce436bf5c69 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:06 +0100 -Subject: [PATCH 095/116] virtiofsd: make lo_release() atomic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-92-dgilbert@redhat.com> -Patchwork-id: 93545 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 091/112] virtiofsd: make lo_release() atomic -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Hold the lock across both lo_map_get() and lo_map_remove() to prevent -races between two FUSE_RELEASE requests. In this case I don't see a -serious bug but it's safer to do things atomically. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit baed65c060c0e524530bc243eec427fb408bd477) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 12 ++++++++---- - 1 file changed, 8 insertions(+), 4 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 9414935..690edbc 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1772,14 +1772,18 @@ static void lo_release(fuse_req_t req, fuse_ino_t ino, - struct fuse_file_info *fi) - { - struct lo_data *lo = lo_data(req); -- int fd; -+ struct lo_map_elem *elem; -+ int fd = -1; - - (void)ino; - -- fd = lo_fi_fd(req, fi); -- - pthread_mutex_lock(&lo->mutex); -- lo_map_remove(&lo->fd_map, fi->fh); -+ elem = lo_map_get(&lo->fd_map, fi->fh); -+ if (elem) { -+ fd = elem->fd; -+ elem = NULL; -+ lo_map_remove(&lo->fd_map, fi->fh); -+ } - pthread_mutex_unlock(&lo->mutex); - - close(fd); --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-move-to-a-new-pid-namespace.patch b/SOURCES/kvm-virtiofsd-move-to-a-new-pid-namespace.patch deleted file mode 100644 index 9a33d1b..0000000 --- a/SOURCES/kvm-virtiofsd-move-to-a-new-pid-namespace.patch +++ /dev/null @@ -1,223 +0,0 @@ -From a7a87a751a9893830d031a957a751b7622b71fb2 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:29 +0100 -Subject: [PATCH 058/116] virtiofsd: move to a new pid namespace -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-55-dgilbert@redhat.com> -Patchwork-id: 93510 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 054/112] virtiofsd: move to a new pid namespace -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -virtiofsd needs access to /proc/self/fd. Let's move to a new pid -namespace so that a compromised process cannot see another other -processes running on the system. - -One wrinkle in this approach: unshare(CLONE_NEWPID) affects *child* -processes and not the current process. Therefore we need to fork the -pid 1 process that will actually run virtiofsd and leave a parent in -waitpid(2). This is not the same thing as daemonization and parent -processes should not notice a difference. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 8e1d4ef231d8327be219f7aea7aa15d181375bbc) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 134 +++++++++++++++++++++++++-------------- - 1 file changed, 86 insertions(+), 48 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 27ab328..0947d14 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -51,7 +51,10 @@ - #include - #include - #include -+#include - #include -+#include -+#include - #include - #include - -@@ -1945,24 +1948,95 @@ static void print_capabilities(void) - } - - /* -- * Called after our UNIX domain sockets have been created, now we can move to -- * an empty network namespace to prevent TCP/IP and other network activity in -- * case this process is compromised. -+ * Move to a new mount, net, and pid namespaces to isolate this process. - */ --static void setup_net_namespace(void) -+static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) - { -- if (unshare(CLONE_NEWNET) != 0) { -- fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNET): %m\n"); -+ pid_t child; -+ -+ /* -+ * Create a new pid namespace for *child* processes. We'll have to -+ * fork in order to enter the new pid namespace. A new mount namespace -+ * is also needed so that we can remount /proc for the new pid -+ * namespace. -+ * -+ * Our UNIX domain sockets have been created. Now we can move to -+ * an empty network namespace to prevent TCP/IP and other network -+ * activity in case this process is compromised. -+ */ -+ if (unshare(CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET) != 0) { -+ fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWPID | CLONE_NEWNS): %m\n"); -+ exit(1); -+ } -+ -+ child = fork(); -+ if (child < 0) { -+ fuse_log(FUSE_LOG_ERR, "fork() failed: %m\n"); -+ exit(1); -+ } -+ if (child > 0) { -+ pid_t waited; -+ int wstatus; -+ -+ /* The parent waits for the child */ -+ do { -+ waited = waitpid(child, &wstatus, 0); -+ } while (waited < 0 && errno == EINTR && !se->exited); -+ -+ /* We were terminated by a signal, see fuse_signals.c */ -+ if (se->exited) { -+ exit(0); -+ } -+ -+ if (WIFEXITED(wstatus)) { -+ exit(WEXITSTATUS(wstatus)); -+ } -+ -+ exit(1); -+ } -+ -+ /* Send us SIGTERM when the parent thread terminates, see prctl(2) */ -+ prctl(PR_SET_PDEATHSIG, SIGTERM); -+ -+ /* -+ * If the mounts have shared propagation then we want to opt out so our -+ * mount changes don't affect the parent mount namespace. -+ */ -+ if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) { -+ fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_SLAVE): %m\n"); -+ exit(1); -+ } -+ -+ /* The child must remount /proc to use the new pid namespace */ -+ if (mount("proc", "/proc", "proc", -+ MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RELATIME, NULL) < 0) { -+ fuse_log(FUSE_LOG_ERR, "mount(/proc): %m\n"); -+ exit(1); -+ } -+ -+ /* Now we can get our /proc/self/fd directory file descriptor */ -+ lo->proc_self_fd = open("/proc/self/fd", O_PATH); -+ if (lo->proc_self_fd == -1) { -+ fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n"); - exit(1); - } - } - --/* This magic is based on lxc's lxc_pivot_root() */ --static void setup_pivot_root(const char *source) -+/* -+ * Make the source directory our root so symlinks cannot escape and no other -+ * files are accessible. Assumes unshare(CLONE_NEWNS) was already called. -+ */ -+static void setup_mounts(const char *source) - { - int oldroot; - int newroot; - -+ if (mount(source, source, NULL, MS_BIND, NULL) < 0) { -+ fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); -+ exit(1); -+ } -+ -+ /* This magic is based on lxc's lxc_pivot_root() */ - oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC); - if (oldroot < 0) { - fuse_log(FUSE_LOG_ERR, "open(/): %m\n"); -@@ -2009,47 +2083,14 @@ static void setup_pivot_root(const char *source) - close(oldroot); - } - --static void setup_proc_self_fd(struct lo_data *lo) --{ -- lo->proc_self_fd = open("/proc/self/fd", O_PATH); -- if (lo->proc_self_fd == -1) { -- fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n"); -- exit(1); -- } --} -- --/* -- * Make the source directory our root so symlinks cannot escape and no other -- * files are accessible. -- */ --static void setup_mount_namespace(const char *source) --{ -- if (unshare(CLONE_NEWNS) != 0) { -- fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNS): %m\n"); -- exit(1); -- } -- -- if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) { -- fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_PRIVATE): %m\n"); -- exit(1); -- } -- -- if (mount(source, source, NULL, MS_BIND, NULL) < 0) { -- fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); -- exit(1); -- } -- -- setup_pivot_root(source); --} -- - /* - * Lock down this process to prevent access to other processes or files outside - * source directory. This reduces the impact of arbitrary code execution bugs. - */ --static void setup_sandbox(struct lo_data *lo) -+static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) - { -- setup_net_namespace(); -- setup_mount_namespace(lo->source); -+ setup_namespaces(lo, se); -+ setup_mounts(lo->source); - } - - int main(int argc, char *argv[]) -@@ -2173,10 +2214,7 @@ int main(int argc, char *argv[]) - - fuse_daemonize(opts.foreground); - -- /* Must be after daemonize to get the right /proc/self/fd */ -- setup_proc_self_fd(&lo); -- -- setup_sandbox(&lo); -+ setup_sandbox(&lo, se); - - /* Block until ctrl+c or fusermount -u */ - ret = virtio_loop(se); --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-move-to-an-empty-network-namespace.patch b/SOURCES/kvm-virtiofsd-move-to-an-empty-network-namespace.patch deleted file mode 100644 index 69a7c20..0000000 --- a/SOURCES/kvm-virtiofsd-move-to-an-empty-network-namespace.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 19a16f26bdeb6302159736e182a18b06160a3f42 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:28 +0100 -Subject: [PATCH 057/116] virtiofsd: move to an empty network namespace -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-54-dgilbert@redhat.com> -Patchwork-id: 93508 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 053/112] virtiofsd: move to an empty network namespace -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -If the process is compromised there should be no network access. Use an -empty network namespace to sandbox networking. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit d74830d12ae233186ff74ddf64c552d26bb39e50) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 14 ++++++++++++++ - 1 file changed, 14 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 0570453..27ab328 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1944,6 +1944,19 @@ static void print_capabilities(void) - printf("}\n"); - } - -+/* -+ * Called after our UNIX domain sockets have been created, now we can move to -+ * an empty network namespace to prevent TCP/IP and other network activity in -+ * case this process is compromised. -+ */ -+static void setup_net_namespace(void) -+{ -+ if (unshare(CLONE_NEWNET) != 0) { -+ fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNET): %m\n"); -+ exit(1); -+ } -+} -+ - /* This magic is based on lxc's lxc_pivot_root() */ - static void setup_pivot_root(const char *source) - { -@@ -2035,6 +2048,7 @@ static void setup_mount_namespace(const char *source) - */ - static void setup_sandbox(struct lo_data *lo) - { -+ setup_net_namespace(); - setup_mount_namespace(lo->source); - } - --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-only-retain-file-system-capabilities.patch b/SOURCES/kvm-virtiofsd-only-retain-file-system-capabilities.patch deleted file mode 100644 index 15c8cd8..0000000 --- a/SOURCES/kvm-virtiofsd-only-retain-file-system-capabilities.patch +++ /dev/null @@ -1,112 +0,0 @@ -From 8727e4904e7a6588e39f231d837f4527f265e47e Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 5 May 2020 16:35:59 +0100 -Subject: [PATCH 8/9] virtiofsd: only retain file system capabilities - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200505163600.22956-7-dgilbert@redhat.com> -Patchwork-id: 96272 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 6/7] virtiofsd: only retain file system capabilities -Bugzilla: 1817445 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz -RH-Acked-by: Michael S. Tsirkin - -From: Stefan Hajnoczi - -virtiofsd runs as root but only needs a subset of root's Linux -capabilities(7). As a file server its purpose is to create and access -files on behalf of a client. It needs to be able to access files with -arbitrary uid/gid owners. It also needs to be create device nodes. - -Introduce a Linux capabilities(7) whitelist and drop all capabilities -that we don't need, making the virtiofsd process less powerful than a -regular uid root process. - - # cat /proc/PID/status - ... - Before After - CapInh: 0000000000000000 0000000000000000 - CapPrm: 0000003fffffffff 00000000880000df - CapEff: 0000003fffffffff 00000000880000df - CapBnd: 0000003fffffffff 0000000000000000 - CapAmb: 0000000000000000 0000000000000000 - -Note that file capabilities cannot be used to achieve the same effect on -the virtiofsd executable because mount is used during sandbox setup. -Therefore we drop capabilities programmatically at the right point -during startup. - -This patch only affects the sandboxed child process. The parent process -that sits in waitpid(2) still has full root capabilities and will be -addressed in the next patch. - -Signed-off-by: Stefan Hajnoczi -Message-Id: <20200416164907.244868-2-stefanha@redhat.com> -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit a59feb483b8fae24d043569ccfcc97ea23d54a02) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/passthrough_ll.c | 38 ++++++++++++++++++++++++++++++++++++++ - 1 file changed, 38 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 614ba55..6358874 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2723,6 +2723,43 @@ static void setup_mounts(const char *source) - } - - /* -+ * Only keep whitelisted capabilities that are needed for file system operation -+ */ -+static void setup_capabilities(void) -+{ -+ pthread_mutex_lock(&cap.mutex); -+ capng_restore_state(&cap.saved); -+ -+ /* -+ * Whitelist file system-related capabilities that are needed for a file -+ * server to act like root. Drop everything else like networking and -+ * sysadmin capabilities. -+ * -+ * Exclusions: -+ * 1. CAP_LINUX_IMMUTABLE is not included because it's only used via ioctl -+ * and we don't support that. -+ * 2. CAP_MAC_OVERRIDE is not included because it only seems to be -+ * used by the Smack LSM. Omit it until there is demand for it. -+ */ -+ capng_setpid(syscall(SYS_gettid)); -+ capng_clear(CAPNG_SELECT_BOTH); -+ capng_updatev(CAPNG_ADD, CAPNG_PERMITTED | CAPNG_EFFECTIVE, -+ CAP_CHOWN, -+ CAP_DAC_OVERRIDE, -+ CAP_DAC_READ_SEARCH, -+ CAP_FOWNER, -+ CAP_FSETID, -+ CAP_SETGID, -+ CAP_SETUID, -+ CAP_MKNOD, -+ CAP_SETFCAP); -+ capng_apply(CAPNG_SELECT_BOTH); -+ -+ cap.saved = capng_save_state(); -+ pthread_mutex_unlock(&cap.mutex); -+} -+ -+/* - * Lock down this process to prevent access to other processes or files outside - * source directory. This reduces the impact of arbitrary code execution bugs. - */ -@@ -2732,6 +2769,7 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se, - setup_namespaces(lo, se); - setup_mounts(lo->source); - setup_seccomp(enable_syslog); -+ setup_capabilities(); - } - - /* Set the maximum number of open file descriptors */ --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch b/SOURCES/kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch deleted file mode 100644 index f21d793..0000000 --- a/SOURCES/kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch +++ /dev/null @@ -1,124 +0,0 @@ -From f2c0b07088966c396ddcee54f4bed97cdb01192f Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 Feb 2021 23:14:55 -0500 -Subject: [PATCH 2/3] virtiofsd: optionally return inode pointer from - lo_do_lookup() - -RH-Author: Jon Maloy -Message-id: <20210209231456.1555472-3-jmaloy@redhat.com> -Patchwork-id: 101022 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 2/3] virtiofsd: optionally return inode pointer from lo_do_lookup() -Bugzilla: 1919111 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Greg Kurz -RH-Acked-by: Dr. David Alan Gilbert - -From: Stefan Hajnoczi - -lo_do_lookup() finds an existing inode or allocates a new one. It -increments nlookup so that the inode stays alive until the client -releases it. - -Existing callers don't need the struct lo_inode so the function doesn't -return it. Extend the function to optionally return the inode. The next -commit will need it. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Greg Kurz -Message-Id: <20210204150208.367837-3-stefanha@redhat.com> -Signed-off-by: Dr. David Alan Gilbert - -(cherry-picked from commit 22d2ece71e533310da31f2857ebc4a00d91968b3) -Signed-off-by: Jon Maloy -Signed-off-by: Jon Maloy ---- - tools/virtiofsd/passthrough_ll.c | 29 +++++++++++++++++++++-------- - 1 file changed, 21 insertions(+), 8 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 518ba11c47..e5bd3d73e4 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -878,11 +878,13 @@ static void posix_locks_value_destroy(gpointer data) - } - - /* -- * Increments nlookup and caller must release refcount using -- * lo_inode_put(&parent). -+ * Increments nlookup on the inode on success. unref_inode_lolocked() must be -+ * called eventually to decrement nlookup again. If inodep is non-NULL, the -+ * inode pointer is stored and the caller must call lo_inode_put(). - */ - static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, -- struct fuse_entry_param *e) -+ struct fuse_entry_param *e, -+ struct lo_inode **inodep) - { - int newfd; - int res; -@@ -891,6 +893,10 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - struct lo_inode *inode = NULL; - struct lo_inode *dir = lo_inode(req, parent); - -+ if (inodep) { -+ *inodep = NULL; -+ } -+ - /* - * name_to_handle_at() and open_by_handle_at() can reach here with fuse - * mount point in guest, but we don't have its inode info in the -@@ -953,7 +959,14 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - pthread_mutex_unlock(&lo->mutex); - } - e->ino = inode->fuse_ino; -- lo_inode_put(lo, &inode); -+ -+ /* Transfer ownership of inode pointer to caller or drop it */ -+ if (inodep) { -+ *inodep = inode; -+ } else { -+ lo_inode_put(lo, &inode); -+ } -+ - lo_inode_put(lo, &dir); - - fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, -@@ -988,7 +1001,7 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) - return; - } - -- err = lo_do_lookup(req, parent, name, &e); -+ err = lo_do_lookup(req, parent, name, &e, NULL); - if (err) { - fuse_reply_err(req, err); - } else { -@@ -1098,7 +1111,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - goto out; - } - -- saverr = lo_do_lookup(req, parent, name, &e); -+ saverr = lo_do_lookup(req, parent, name, &e, NULL); - if (saverr) { - goto out; - } -@@ -1599,7 +1612,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - - if (plus) { - if (!is_dot_or_dotdot(name)) { -- err = lo_do_lookup(req, ino, name, &e); -+ err = lo_do_lookup(req, ino, name, &e, NULL); - if (err) { - goto error; - } -@@ -1793,7 +1806,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - } - - fi->fh = fh; -- err = lo_do_lookup(req, parent, name, &e); -+ err = lo_do_lookup(req, parent, name, &e, NULL); - } - if (lo->cache == CACHE_NONE) { - fi->direct_io = 1; --- -2.18.2 - diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch deleted file mode 100644 index e3d5773..0000000 --- a/SOURCES/kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch +++ /dev/null @@ -1,54 +0,0 @@ -From fe031dbbf5e287f64de9fcc9aec361e8ab492109 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:24 +0100 -Subject: [PATCH 113/116] virtiofsd/passthrough_ll: Pass errno to - fuse_reply_err() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-110-dgilbert@redhat.com> -Patchwork-id: 93559 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 109/112] virtiofsd/passthrough_ll: Pass errno to fuse_reply_err() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Xiao Yang - -lo_copy_file_range() passes -errno to fuse_reply_err() and then fuse_reply_err() -changes it to errno again, so that subsequent fuse_send_reply_iov_nofree() catches -the wrong errno.(i.e. reports "fuse: bad error value: ..."). - -Make fuse_send_reply_iov_nofree() accept the correct -errno by passing errno -directly in lo_copy_file_range(). - -Signed-off-by: Xiao Yang -Reviewed-by: Eryu Guan - -dgilbert: Sent upstream and now Merged as aa1185e153f774f1df65 -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit a931b6861e59c78d861017e9c6a9c161ff49a163) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index fc15d61..e6f2399 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2441,7 +2441,7 @@ static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, - - res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags); - if (res < 0) { -- fuse_reply_err(req, -errno); -+ fuse_reply_err(req, errno); - } else { - fuse_reply_write(req, res); - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch deleted file mode 100644 index ddacdbe..0000000 --- a/SOURCES/kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 83b03fc4a3ecf6086394363488bbebc8d55428c0 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:16 +0100 -Subject: [PATCH 105/116] virtiofsd: passthrough_ll: Use cache_readdir for - directory open -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-102-dgilbert@redhat.com> -Patchwork-id: 93555 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 101/112] virtiofsd: passthrough_ll: Use cache_readdir for directory open -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Misono Tomohiro - -Since keep_cache(FOPEN_KEEP_CACHE) has no effect for directory as -described in fuse_common.h, use cache_readdir(FOPNE_CACHE_DIR) for -diretory open when cache=always mode. - -Signed-off-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 9b610b09b49b1aada256097b338d49da805da6ae) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 4c61ac5..79b8b71 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1523,7 +1523,7 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, - - fi->fh = fh; - if (lo->cache == CACHE_ALWAYS) { -- fi->keep_cache = 1; -+ fi->cache_readdir = 1; - } - fuse_reply_open(req, fi); - return; --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch deleted file mode 100644 index 0506574..0000000 --- a/SOURCES/kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch +++ /dev/null @@ -1,238 +0,0 @@ -From 474d0adafed4d73720d6413b2903d6c4b529e5e6 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:15 +0100 -Subject: [PATCH 044/116] virtiofsd: passthrough_ll: add dirp_map to hide - lo_dirp pointers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-41-dgilbert@redhat.com> -Patchwork-id: 93495 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 040/112] virtiofsd: passthrough_ll: add dirp_map to hide lo_dirp pointers -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Do not expose lo_dirp pointers to clients. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit b39bce121bfad8757eec0ee41f14607b883935d3) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 103 +++++++++++++++++++++++++++++---------- - 1 file changed, 76 insertions(+), 27 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index a3ebf74..5f5a72f 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -56,27 +56,10 @@ - - #include "passthrough_helpers.h" - --/* -- * We are re-using pointers to our `struct lo_inode` -- * elements as inodes. This means that we must be able to -- * store uintptr_t values in a fuse_ino_t variable. The following -- * incantation checks this condition at compile time. -- */ --#if defined(__GNUC__) && \ -- (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && \ -- !defined __cplusplus --_Static_assert(sizeof(fuse_ino_t) >= sizeof(uintptr_t), -- "fuse_ino_t too small to hold uintptr_t values!"); --#else --struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { -- unsigned _uintptr_to_must_hold_fuse_ino_t -- : ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); --}; --#endif -- - struct lo_map_elem { - union { - struct lo_inode *inode; -+ struct lo_dirp *dirp; - ssize_t freelist; - }; - bool in_use; -@@ -123,6 +106,7 @@ struct lo_data { - int timeout_set; - struct lo_inode root; /* protected by lo->mutex */ - struct lo_map ino_map; /* protected by lo->mutex */ -+ struct lo_map dirp_map; /* protected by lo->mutex */ - }; - - static const struct fuse_opt lo_opts[] = { -@@ -253,6 +237,20 @@ static void lo_map_remove(struct lo_map *map, size_t key) - } - - /* Assumes lo->mutex is held */ -+static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp) -+{ -+ struct lo_map_elem *elem; -+ -+ elem = lo_map_alloc_elem(&lo_data(req)->dirp_map); -+ if (!elem) { -+ return -1; -+ } -+ -+ elem->dirp = dirp; -+ return elem - lo_data(req)->dirp_map.elems; -+} -+ -+/* Assumes lo->mutex is held */ - static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) - { - struct lo_map_elem *elem; -@@ -861,9 +859,19 @@ struct lo_dirp { - off_t offset; - }; - --static struct lo_dirp *lo_dirp(struct fuse_file_info *fi) -+static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi) - { -- return (struct lo_dirp *)(uintptr_t)fi->fh; -+ struct lo_data *lo = lo_data(req); -+ struct lo_map_elem *elem; -+ -+ pthread_mutex_lock(&lo->mutex); -+ elem = lo_map_get(&lo->dirp_map, fi->fh); -+ pthread_mutex_unlock(&lo->mutex); -+ if (!elem) { -+ return NULL; -+ } -+ -+ return elem->dirp; - } - - static void lo_opendir(fuse_req_t req, fuse_ino_t ino, -@@ -873,6 +881,7 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, - struct lo_data *lo = lo_data(req); - struct lo_dirp *d; - int fd; -+ ssize_t fh; - - d = calloc(1, sizeof(struct lo_dirp)); - if (d == NULL) { -@@ -892,7 +901,14 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, - d->offset = 0; - d->entry = NULL; - -- fi->fh = (uintptr_t)d; -+ pthread_mutex_lock(&lo->mutex); -+ fh = lo_add_dirp_mapping(req, d); -+ pthread_mutex_unlock(&lo->mutex); -+ if (fh == -1) { -+ goto out_err; -+ } -+ -+ fi->fh = fh; - if (lo->cache == CACHE_ALWAYS) { - fi->keep_cache = 1; - } -@@ -903,6 +919,9 @@ out_errno: - error = errno; - out_err: - if (d) { -+ if (d->dp) { -+ closedir(d->dp); -+ } - if (fd != -1) { - close(fd); - } -@@ -920,17 +939,21 @@ static int is_dot_or_dotdot(const char *name) - static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - off_t offset, struct fuse_file_info *fi, int plus) - { -- struct lo_dirp *d = lo_dirp(fi); -- char *buf; -+ struct lo_dirp *d; -+ char *buf = NULL; - char *p; - size_t rem = size; -- int err; -+ int err = ENOMEM; - - (void)ino; - -+ d = lo_dirp(req, fi); -+ if (!d) { -+ goto error; -+ } -+ - buf = calloc(1, size); - if (!buf) { -- err = ENOMEM; - goto error; - } - p = buf; -@@ -1028,8 +1051,21 @@ static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, - static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, - struct fuse_file_info *fi) - { -- struct lo_dirp *d = lo_dirp(fi); -+ struct lo_data *lo = lo_data(req); -+ struct lo_dirp *d; -+ - (void)ino; -+ -+ d = lo_dirp(req, fi); -+ if (!d) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ -+ pthread_mutex_lock(&lo->mutex); -+ lo_map_remove(&lo->dirp_map, fi->fh); -+ pthread_mutex_unlock(&lo->mutex); -+ - closedir(d->dp); - free(d); - fuse_reply_err(req, 0); -@@ -1081,8 +1117,18 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, - struct fuse_file_info *fi) - { - int res; -- int fd = dirfd(lo_dirp(fi)->dp); -+ struct lo_dirp *d; -+ int fd; -+ - (void)ino; -+ -+ d = lo_dirp(req, fi); -+ if (!d) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ -+ fd = dirfd(d->dp); - if (datasync) { - res = fdatasync(fd); - } else { -@@ -1614,6 +1660,8 @@ int main(int argc, char *argv[]) - root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino); - root_elem->inode = &lo.root; - -+ lo_map_init(&lo.dirp_map); -+ - if (fuse_parse_cmdline(&args, &opts) != 0) { - return 1; - } -@@ -1710,6 +1758,7 @@ err_out2: - err_out1: - fuse_opt_free_args(&args); - -+ lo_map_destroy(&lo.dirp_map); - lo_map_destroy(&lo.ino_map); - - if (lo.root.fd >= 0) { --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch deleted file mode 100644 index b8de3d8..0000000 --- a/SOURCES/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch +++ /dev/null @@ -1,303 +0,0 @@ -From 03effbc021064bb77d231ae5ca02d1a579c71ee1 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:17 +0100 -Subject: [PATCH 046/116] virtiofsd: passthrough_ll: add fallback for racy ops -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-43-dgilbert@redhat.com> -Patchwork-id: 93496 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 042/112] virtiofsd: passthrough_ll: add fallback for racy ops -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -We have two operations that cannot be done race-free on a symlink in -certain cases: utimes and link. - -Add racy fallback for these if the race-free method doesn't work. We do -our best to avoid races even in this case: - - - get absolute path by reading /proc/self/fd/NN symlink - - - lookup parent directory: after this we are safe against renames in - ancestors - - - lookup name in parent directory, and verify that we got to the original - inode, if not retry the whole thing - -Both utimes(2) and link(2) hold i_lock on the inode across the operation, -so a racing rename/delete by this fuse instance is not possible, only from -other entities changing the filesystem. - -If the "norace" option is given, then disable the racy fallbacks. - -Signed-off-by: Miklos Szeredi -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 5fe319a7b19c9c328e6e061bffcf1ff6cc8b89ce) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/helper.c | 5 +- - tools/virtiofsd/passthrough_ll.c | 157 +++++++++++++++++++++++++++++++++++---- - 2 files changed, 145 insertions(+), 17 deletions(-) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index b8ec5ac..5531425 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -142,7 +142,10 @@ void fuse_cmdline_help(void) - " --daemonize run in background\n" - " -o max_idle_threads the maximum number of idle worker " - "threads\n" -- " allowed (default: 10)\n"); -+ " allowed (default: 10)\n" -+ " -o norace disable racy fallback\n" -+ " default: false\n" -+ ); - } - - static int fuse_helper_opt_proc(void *data, const char *arg, int key, -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 9815bfa..ac380ef 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -98,6 +98,7 @@ enum { - struct lo_data { - pthread_mutex_t mutex; - int debug; -+ int norace; - int writeback; - int flock; - int xattr; -@@ -124,10 +125,15 @@ static const struct fuse_opt lo_opts[] = { - { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER }, - { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, - { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, -- -+ { "norace", offsetof(struct lo_data, norace), 1 }, - FUSE_OPT_END - }; - -+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); -+ -+static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st); -+ -+ - static struct lo_data *lo_data(fuse_req_t req) - { - return (struct lo_data *)fuse_req_userdata(req); -@@ -347,23 +353,127 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino, - fuse_reply_attr(req, &buf, lo->timeout); - } - --static int utimensat_empty_nofollow(struct lo_inode *inode, -- const struct timespec *tv) -+static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, -+ char path[PATH_MAX], struct lo_inode **parent) - { -- int res; - char procname[64]; -+ char *last; -+ struct stat stat; -+ struct lo_inode *p; -+ int retries = 2; -+ int res; -+ -+retry: -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ res = readlink(procname, path, PATH_MAX); -+ if (res < 0) { -+ fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__); -+ goto fail_noretry; -+ } -+ -+ if (res >= PATH_MAX) { -+ fuse_log(FUSE_LOG_WARNING, "%s: readlink overflowed\n", __func__); -+ goto fail_noretry; -+ } -+ path[res] = '\0'; -+ -+ last = strrchr(path, '/'); -+ if (last == NULL) { -+ /* Shouldn't happen */ -+ fuse_log( -+ FUSE_LOG_WARNING, -+ "%s: INTERNAL ERROR: bad path read from proc\n", __func__); -+ goto fail_noretry; -+ } -+ if (last == path) { -+ p = &lo->root; -+ pthread_mutex_lock(&lo->mutex); -+ p->refcount++; -+ pthread_mutex_unlock(&lo->mutex); -+ } else { -+ *last = '\0'; -+ res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0); -+ if (res == -1) { -+ if (!retries) { -+ fuse_log(FUSE_LOG_WARNING, -+ "%s: failed to stat parent: %m\n", __func__); -+ } -+ goto fail; -+ } -+ p = lo_find(lo, &stat); -+ if (p == NULL) { -+ if (!retries) { -+ fuse_log(FUSE_LOG_WARNING, -+ "%s: failed to find parent\n", __func__); -+ } -+ goto fail; -+ } -+ } -+ last++; -+ res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW); -+ if (res == -1) { -+ if (!retries) { -+ fuse_log(FUSE_LOG_WARNING, -+ "%s: failed to stat last\n", __func__); -+ } -+ goto fail_unref; -+ } -+ if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) { -+ if (!retries) { -+ fuse_log(FUSE_LOG_WARNING, -+ "%s: failed to match last\n", __func__); -+ } -+ goto fail_unref; -+ } -+ *parent = p; -+ memmove(path, last, strlen(last) + 1); -+ -+ return 0; -+ -+fail_unref: -+ unref_inode(lo, p, 1); -+fail: -+ if (retries) { -+ retries--; -+ goto retry; -+ } -+fail_noretry: -+ errno = EIO; -+ return -1; -+} -+ -+static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode, -+ const struct timespec *tv) -+{ -+ int res; -+ struct lo_inode *parent; -+ char path[PATH_MAX]; - - if (inode->is_symlink) { -- res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH); - if (res == -1 && errno == EINVAL) { - /* Sorry, no race free way to set times on symlink. */ -- errno = EPERM; -+ if (lo->norace) { -+ errno = EPERM; -+ } else { -+ goto fallback; -+ } - } - return res; - } -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(path, "/proc/self/fd/%i", inode->fd); - -- return utimensat(AT_FDCWD, procname, tv, 0); -+ return utimensat(AT_FDCWD, path, tv, 0); -+ -+fallback: -+ res = lo_parent_and_name(lo, inode, path, &parent); -+ if (res != -1) { -+ res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW); -+ unref_inode(lo, parent, 1); -+ } -+ -+ return res; - } - - static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi) -@@ -387,6 +497,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - { - int saverr; - char procname[64]; -+ struct lo_data *lo = lo_data(req); - struct lo_inode *inode; - int ifd; - int res; -@@ -459,7 +570,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - if (fi) { - res = futimens(fd, tv); - } else { -- res = utimensat_empty_nofollow(inode, tv); -+ res = utimensat_empty(lo, inode, tv); - } - if (res == -1) { - goto out_err; -@@ -709,24 +820,38 @@ static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent, - lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); - } - --static int linkat_empty_nofollow(struct lo_inode *inode, int dfd, -- const char *name) -+static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode, -+ int dfd, const char *name) - { - int res; -- char procname[64]; -+ struct lo_inode *parent; -+ char path[PATH_MAX]; - - if (inode->is_symlink) { - res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); - if (res == -1 && (errno == ENOENT || errno == EINVAL)) { - /* Sorry, no race free way to hard-link a symlink. */ -- errno = EPERM; -+ if (lo->norace) { -+ errno = EPERM; -+ } else { -+ goto fallback; -+ } - } - return res; - } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(path, "/proc/self/fd/%i", inode->fd); -+ -+ return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW); -+ -+fallback: -+ res = lo_parent_and_name(lo, inode, path, &parent); -+ if (res != -1) { -+ res = linkat(parent->fd, path, dfd, name, 0); -+ unref_inode(lo, parent, 1); -+ } - -- return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); -+ return res; - } - - static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, -@@ -748,7 +873,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - e.attr_timeout = lo->timeout; - e.entry_timeout = lo->timeout; - -- res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); -+ res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name); - if (res == -1) { - goto out_err; - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch deleted file mode 100644 index 24b2a6e..0000000 --- a/SOURCES/kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch +++ /dev/null @@ -1,328 +0,0 @@ -From 35337e604e9149d6d8fcf74b8b82ac33a8611ebb Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:16 +0100 -Subject: [PATCH 045/116] virtiofsd: passthrough_ll: add fd_map to hide file - descriptors -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-42-dgilbert@redhat.com> -Patchwork-id: 93494 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 041/112] virtiofsd: passthrough_ll: add fd_map to hide file descriptors -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Do not expose file descriptor numbers to clients. This prevents the -abuse of internal file descriptors (like stdin/stdout). - -Signed-off-by: Stefan Hajnoczi -Fix from: -Signed-off-by: Xiao Yang -dgilbert: - Added lseek -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 73b4d19dfc4248a74c1f3e511cfa934681d9c602) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 116 +++++++++++++++++++++++++++++++-------- - 1 file changed, 94 insertions(+), 22 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 5f5a72f..9815bfa 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -60,6 +60,7 @@ struct lo_map_elem { - union { - struct lo_inode *inode; - struct lo_dirp *dirp; -+ int fd; - ssize_t freelist; - }; - bool in_use; -@@ -107,6 +108,7 @@ struct lo_data { - struct lo_inode root; /* protected by lo->mutex */ - struct lo_map ino_map; /* protected by lo->mutex */ - struct lo_map dirp_map; /* protected by lo->mutex */ -+ struct lo_map fd_map; /* protected by lo->mutex */ - }; - - static const struct fuse_opt lo_opts[] = { -@@ -237,6 +239,20 @@ static void lo_map_remove(struct lo_map *map, size_t key) - } - - /* Assumes lo->mutex is held */ -+static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd) -+{ -+ struct lo_map_elem *elem; -+ -+ elem = lo_map_alloc_elem(&lo_data(req)->fd_map); -+ if (!elem) { -+ return -1; -+ } -+ -+ elem->fd = fd; -+ return elem - lo_data(req)->fd_map.elems; -+} -+ -+/* Assumes lo->mutex is held */ - static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp) - { - struct lo_map_elem *elem; -@@ -350,6 +366,22 @@ static int utimensat_empty_nofollow(struct lo_inode *inode, - return utimensat(AT_FDCWD, procname, tv, 0); - } - -+static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi) -+{ -+ struct lo_data *lo = lo_data(req); -+ struct lo_map_elem *elem; -+ -+ pthread_mutex_lock(&lo->mutex); -+ elem = lo_map_get(&lo->fd_map, fi->fh); -+ pthread_mutex_unlock(&lo->mutex); -+ -+ if (!elem) { -+ return -1; -+ } -+ -+ return elem->fd; -+} -+ - static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - int valid, struct fuse_file_info *fi) - { -@@ -358,6 +390,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - struct lo_inode *inode; - int ifd; - int res; -+ int fd; - - inode = lo_inode(req, ino); - if (!inode) { -@@ -367,9 +400,14 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - - ifd = inode->fd; - -+ /* If fi->fh is invalid we'll report EBADF later */ -+ if (fi) { -+ fd = lo_fi_fd(req, fi); -+ } -+ - if (valid & FUSE_SET_ATTR_MODE) { - if (fi) { -- res = fchmod(fi->fh, attr->st_mode); -+ res = fchmod(fd, attr->st_mode); - } else { - sprintf(procname, "/proc/self/fd/%i", ifd); - res = chmod(procname, attr->st_mode); -@@ -389,7 +427,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - } - if (valid & FUSE_SET_ATTR_SIZE) { - if (fi) { -- res = ftruncate(fi->fh, attr->st_size); -+ res = ftruncate(fd, attr->st_size); - } else { - sprintf(procname, "/proc/self/fd/%i", ifd); - res = truncate(procname, attr->st_size); -@@ -419,7 +457,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - } - - if (fi) { -- res = futimens(fi->fh, tv); -+ res = futimens(fd, tv); - } else { - res = utimensat_empty_nofollow(inode, tv); - } -@@ -1096,7 +1134,18 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - lo_restore_cred(&old); - - if (!err) { -- fi->fh = fd; -+ ssize_t fh; -+ -+ pthread_mutex_lock(&lo->mutex); -+ fh = lo_add_fd_mapping(req, fd); -+ pthread_mutex_unlock(&lo->mutex); -+ if (fh == -1) { -+ close(fd); -+ fuse_reply_err(req, ENOMEM); -+ return; -+ } -+ -+ fi->fh = fh; - err = lo_do_lookup(req, parent, name, &e); - } - if (lo->cache == CACHE_NEVER) { -@@ -1140,6 +1189,7 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, - static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - { - int fd; -+ ssize_t fh; - char buf[64]; - struct lo_data *lo = lo_data(req); - -@@ -1175,7 +1225,16 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - return (void)fuse_reply_err(req, errno); - } - -- fi->fh = fd; -+ pthread_mutex_lock(&lo->mutex); -+ fh = lo_add_fd_mapping(req, fd); -+ pthread_mutex_unlock(&lo->mutex); -+ if (fh == -1) { -+ close(fd); -+ fuse_reply_err(req, ENOMEM); -+ return; -+ } -+ -+ fi->fh = fh; - if (lo->cache == CACHE_NEVER) { - fi->direct_io = 1; - } else if (lo->cache == CACHE_ALWAYS) { -@@ -1187,9 +1246,18 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - static void lo_release(fuse_req_t req, fuse_ino_t ino, - struct fuse_file_info *fi) - { -+ struct lo_data *lo = lo_data(req); -+ int fd; -+ - (void)ino; - -- close(fi->fh); -+ fd = lo_fi_fd(req, fi); -+ -+ pthread_mutex_lock(&lo->mutex); -+ lo_map_remove(&lo->fd_map, fi->fh); -+ pthread_mutex_unlock(&lo->mutex); -+ -+ close(fd); - fuse_reply_err(req, 0); - } - -@@ -1197,7 +1265,7 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - { - int res; - (void)ino; -- res = close(dup(fi->fh)); -+ res = close(dup(lo_fi_fd(req, fi))); - fuse_reply_err(req, res == -1 ? errno : 0); - } - -@@ -1224,7 +1292,7 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, - return (void)fuse_reply_err(req, errno); - } - } else { -- fd = fi->fh; -+ fd = lo_fi_fd(req, fi); - } - - if (datasync) { -@@ -1251,7 +1319,7 @@ static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, - } - - buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -- buf.buf[0].fd = fi->fh; -+ buf.buf[0].fd = lo_fi_fd(req, fi); - buf.buf[0].pos = offset; - - fuse_reply_data(req, &buf); -@@ -1266,7 +1334,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, - struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); - - out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -- out_buf.buf[0].fd = fi->fh; -+ out_buf.buf[0].fd = lo_fi_fd(req, fi); - out_buf.buf[0].pos = off; - - if (lo_debug(req)) { -@@ -1303,7 +1371,7 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, - (void)ino; - - #ifdef CONFIG_FALLOCATE -- err = fallocate(fi->fh, mode, offset, length); -+ err = fallocate(lo_fi_fd(req, fi), mode, offset, length); - if (err < 0) { - err = errno; - } -@@ -1314,7 +1382,7 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, - return; - } - -- err = posix_fallocate(fi->fh, offset, length); -+ err = posix_fallocate(lo_fi_fd(req, fi), offset, length); - #endif - - fuse_reply_err(req, err); -@@ -1326,7 +1394,7 @@ static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, - int res; - (void)ino; - -- res = flock(fi->fh, op); -+ res = flock(lo_fi_fd(req, fi), op); - - fuse_reply_err(req, res == -1 ? errno : 0); - } -@@ -1551,17 +1619,19 @@ static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, - off_t off_out, struct fuse_file_info *fi_out, - size_t len, int flags) - { -+ int in_fd, out_fd; - ssize_t res; - -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, -- "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " -- "off=%lu, ino=%" PRIu64 "/fd=%lu, " -- "off=%lu, size=%zd, flags=0x%x)\n", -- ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, len, -- flags); -+ in_fd = lo_fi_fd(req, fi_in); -+ out_fd = lo_fi_fd(req, fi_out); -+ -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_copy_file_range(ino=%" PRIu64 "/fd=%d, " -+ "off=%lu, ino=%" PRIu64 "/fd=%d, " -+ "off=%lu, size=%zd, flags=0x%x)\n", -+ ino_in, in_fd, off_in, ino_out, out_fd, off_out, len, flags); - -- res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, flags); -+ res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags); - if (res < 0) { - fuse_reply_err(req, -errno); - } else { -@@ -1576,7 +1646,7 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, - off_t res; - - (void)ino; -- res = lseek(fi->fh, off, whence); -+ res = lseek(lo_fi_fd(req, fi), off, whence); - if (res != -1) { - fuse_reply_lseek(req, res); - } else { -@@ -1661,6 +1731,7 @@ int main(int argc, char *argv[]) - root_elem->inode = &lo.root; - - lo_map_init(&lo.dirp_map); -+ lo_map_init(&lo.fd_map); - - if (fuse_parse_cmdline(&args, &opts) != 0) { - return 1; -@@ -1758,6 +1829,7 @@ err_out2: - err_out1: - fuse_opt_free_args(&args); - -+ lo_map_destroy(&lo.fd_map); - lo_map_destroy(&lo.dirp_map); - lo_map_destroy(&lo.ino_map); - --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch deleted file mode 100644 index ba8b730..0000000 --- a/SOURCES/kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch +++ /dev/null @@ -1,395 +0,0 @@ -From d81396cc3d9815730903b0755c9d2e67d6954d54 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:14 +0100 -Subject: [PATCH 043/116] virtiofsd: passthrough_ll: add ino_map to hide - lo_inode pointers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-40-dgilbert@redhat.com> -Patchwork-id: 93493 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 039/112] virtiofsd: passthrough_ll: add ino_map to hide lo_inode pointers -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Do not expose lo_inode pointers to clients. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 92fb57b83cdbfc4bf53c0c46a3d0bcbc36e64126) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 144 +++++++++++++++++++++++++++++++-------- - 1 file changed, 114 insertions(+), 30 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e83a976..a3ebf74 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -57,8 +57,8 @@ - #include "passthrough_helpers.h" - - /* -- * We are re-using pointers to our `struct lo_inode` and `struct -- * lo_dirp` elements as inodes. This means that we must be able to -+ * We are re-using pointers to our `struct lo_inode` -+ * elements as inodes. This means that we must be able to - * store uintptr_t values in a fuse_ino_t variable. The following - * incantation checks this condition at compile time. - */ -@@ -76,7 +76,7 @@ struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { - - struct lo_map_elem { - union { -- /* Element values will go here... */ -+ struct lo_inode *inode; - ssize_t freelist; - }; - bool in_use; -@@ -97,6 +97,7 @@ struct lo_inode { - ino_t ino; - dev_t dev; - uint64_t refcount; /* protected by lo->mutex */ -+ fuse_ino_t fuse_ino; - }; - - struct lo_cred { -@@ -121,6 +122,7 @@ struct lo_data { - int cache; - int timeout_set; - struct lo_inode root; /* protected by lo->mutex */ -+ struct lo_map ino_map; /* protected by lo->mutex */ - }; - - static const struct fuse_opt lo_opts[] = { -@@ -145,14 +147,14 @@ static struct lo_data *lo_data(fuse_req_t req) - return (struct lo_data *)fuse_req_userdata(req); - } - --__attribute__((unused)) static void lo_map_init(struct lo_map *map) -+static void lo_map_init(struct lo_map *map) - { - map->elems = NULL; - map->nelems = 0; - map->freelist = -1; - } - --__attribute__((unused)) static void lo_map_destroy(struct lo_map *map) -+static void lo_map_destroy(struct lo_map *map) - { - free(map->elems); - } -@@ -183,8 +185,7 @@ static int lo_map_grow(struct lo_map *map, size_t new_nelems) - return 1; - } - --__attribute__((unused)) static struct lo_map_elem * --lo_map_alloc_elem(struct lo_map *map) -+static struct lo_map_elem *lo_map_alloc_elem(struct lo_map *map) - { - struct lo_map_elem *elem; - -@@ -200,8 +201,7 @@ lo_map_alloc_elem(struct lo_map *map) - return elem; - } - --__attribute__((unused)) static struct lo_map_elem * --lo_map_reserve(struct lo_map *map, size_t key) -+static struct lo_map_elem *lo_map_reserve(struct lo_map *map, size_t key) - { - ssize_t *prev; - -@@ -222,8 +222,7 @@ lo_map_reserve(struct lo_map *map, size_t key) - return NULL; - } - --__attribute__((unused)) static struct lo_map_elem * --lo_map_get(struct lo_map *map, size_t key) -+static struct lo_map_elem *lo_map_get(struct lo_map *map, size_t key) - { - if (key >= map->nelems) { - return NULL; -@@ -234,8 +233,7 @@ lo_map_get(struct lo_map *map, size_t key) - return &map->elems[key]; - } - --__attribute__((unused)) static void lo_map_remove(struct lo_map *map, -- size_t key) -+static void lo_map_remove(struct lo_map *map, size_t key) - { - struct lo_map_elem *elem; - -@@ -254,18 +252,40 @@ __attribute__((unused)) static void lo_map_remove(struct lo_map *map, - map->freelist = key; - } - -+/* Assumes lo->mutex is held */ -+static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) -+{ -+ struct lo_map_elem *elem; -+ -+ elem = lo_map_alloc_elem(&lo_data(req)->ino_map); -+ if (!elem) { -+ return -1; -+ } -+ -+ elem->inode = inode; -+ return elem - lo_data(req)->ino_map.elems; -+} -+ - static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) - { -- if (ino == FUSE_ROOT_ID) { -- return &lo_data(req)->root; -- } else { -- return (struct lo_inode *)(uintptr_t)ino; -+ struct lo_data *lo = lo_data(req); -+ struct lo_map_elem *elem; -+ -+ pthread_mutex_lock(&lo->mutex); -+ elem = lo_map_get(&lo->ino_map, ino); -+ pthread_mutex_unlock(&lo->mutex); -+ -+ if (!elem) { -+ return NULL; - } -+ -+ return elem->inode; - } - - static int lo_fd(fuse_req_t req, fuse_ino_t ino) - { -- return lo_inode(req, ino)->fd; -+ struct lo_inode *inode = lo_inode(req, ino); -+ return inode ? inode->fd : -1; - } - - static bool lo_debug(fuse_req_t req) -@@ -337,10 +357,18 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - { - int saverr; - char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -- int ifd = inode->fd; -+ struct lo_inode *inode; -+ int ifd; - int res; - -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ -+ ifd = inode->fd; -+ - if (valid & FUSE_SET_ATTR_MODE) { - if (fi) { - res = fchmod(fi->fh, attr->st_mode); -@@ -470,6 +498,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - inode->dev = e->attr.st_dev; - - pthread_mutex_lock(&lo->mutex); -+ inode->fuse_ino = lo_add_inode_mapping(req, inode); - prev = &lo->root; - next = prev->next; - next->prev = inode; -@@ -478,7 +507,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - prev->next = inode; - pthread_mutex_unlock(&lo->mutex); - } -- e->ino = (uintptr_t)inode; -+ e->ino = inode->fuse_ino; - - if (lo_debug(req)) { - fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -@@ -582,10 +611,16 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - { - int res; - int saverr; -- struct lo_inode *dir = lo_inode(req, parent); -+ struct lo_inode *dir; - struct fuse_entry_param e; - struct lo_cred old = {}; - -+ dir = lo_inode(req, parent); -+ if (!dir) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ - saverr = ENOMEM; - - saverr = lo_change_cred(req, &old); -@@ -663,10 +698,16 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - { - int res; - struct lo_data *lo = lo_data(req); -- struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_inode *inode; - struct fuse_entry_param e; - int saverr; - -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ - memset(&e, 0, sizeof(struct fuse_entry_param)); - e.attr_timeout = lo->timeout; - e.entry_timeout = lo->timeout; -@@ -684,7 +725,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - pthread_mutex_lock(&lo->mutex); - inode->refcount++; - pthread_mutex_unlock(&lo->mutex); -- e.ino = (uintptr_t)inode; -+ e.ino = inode->fuse_ino; - - if (lo_debug(req)) { - fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -@@ -750,10 +791,10 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) - next->prev = prev; - prev->next = next; - -+ lo_map_remove(&lo->ino_map, inode->fuse_ino); - pthread_mutex_unlock(&lo->mutex); - close(inode->fd); - free(inode); -- - } else { - pthread_mutex_unlock(&lo->mutex); - } -@@ -762,7 +803,12 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) - static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - { - struct lo_data *lo = lo_data(req); -- struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_inode *inode; -+ -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ return; -+ } - - if (lo_debug(req)) { - fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", -@@ -1244,10 +1290,16 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - { - char *value = NULL; - char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_inode *inode; - ssize_t ret; - int saverr; - -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ - saverr = ENOSYS; - if (!lo_data(req)->xattr) { - goto out; -@@ -1306,10 +1358,16 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - { - char *value = NULL; - char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_inode *inode; - ssize_t ret; - int saverr; - -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ - saverr = ENOSYS; - if (!lo_data(req)->xattr) { - goto out; -@@ -1367,10 +1425,16 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - const char *value, size_t size, int flags) - { - char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_inode *inode; - ssize_t ret; - int saverr; - -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ - saverr = ENOSYS; - if (!lo_data(req)->xattr) { - goto out; -@@ -1400,10 +1464,16 @@ out: - static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) - { - char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_inode *inode; - ssize_t ret; - int saverr; - -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ - saverr = ENOSYS; - if (!lo_data(req)->xattr) { - goto out; -@@ -1522,6 +1592,7 @@ int main(int argc, char *argv[]) - struct fuse_session *se; - struct fuse_cmdline_opts opts; - struct lo_data lo = { .debug = 0, .writeback = 0 }; -+ struct lo_map_elem *root_elem; - int ret = -1; - - /* Don't mask creation mode, kernel already did that */ -@@ -1530,8 +1601,19 @@ int main(int argc, char *argv[]) - pthread_mutex_init(&lo.mutex, NULL); - lo.root.next = lo.root.prev = &lo.root; - lo.root.fd = -1; -+ lo.root.fuse_ino = FUSE_ROOT_ID; - lo.cache = CACHE_NORMAL; - -+ /* -+ * Set up the ino map like this: -+ * [0] Reserved (will not be used) -+ * [1] Root inode -+ */ -+ lo_map_init(&lo.ino_map); -+ lo_map_reserve(&lo.ino_map, 0)->in_use = false; -+ root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino); -+ root_elem->inode = &lo.root; -+ - if (fuse_parse_cmdline(&args, &opts) != 0) { - return 1; - } -@@ -1628,6 +1710,8 @@ err_out2: - err_out1: - fuse_opt_free_args(&args); - -+ lo_map_destroy(&lo.ino_map); -+ - if (lo.root.fd >= 0) { - close(lo.root.fd); - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch deleted file mode 100644 index 4751f95..0000000 --- a/SOURCES/kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch +++ /dev/null @@ -1,182 +0,0 @@ -From d56651e227bae83ee0cceb12bd91e3e9f6045ab3 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:13 +0100 -Subject: [PATCH 042/116] virtiofsd: passthrough_ll: add lo_map for ino/fh - indirection -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-39-dgilbert@redhat.com> -Patchwork-id: 93492 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 038/112] virtiofsd: passthrough_ll: add lo_map for ino/fh indirection -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -A layer of indirection is needed because passthrough_ll cannot expose -pointers or file descriptor numbers to untrusted clients. Malicious -clients could send invalid pointers or file descriptors in order to -crash or exploit the file system daemon. - -lo_map provides an integer key->value mapping. This will be used for -ino and fh fields in the patches that follow. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 25c135727b08dca90f00094e522a69170b13dfac) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 124 +++++++++++++++++++++++++++++++++++++++ - 1 file changed, 124 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 5e06179..e83a976 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -74,6 +74,21 @@ struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { - }; - #endif - -+struct lo_map_elem { -+ union { -+ /* Element values will go here... */ -+ ssize_t freelist; -+ }; -+ bool in_use; -+}; -+ -+/* Maps FUSE fh or ino values to internal objects */ -+struct lo_map { -+ struct lo_map_elem *elems; -+ size_t nelems; -+ ssize_t freelist; -+}; -+ - struct lo_inode { - struct lo_inode *next; /* protected by lo->mutex */ - struct lo_inode *prev; /* protected by lo->mutex */ -@@ -130,6 +145,115 @@ static struct lo_data *lo_data(fuse_req_t req) - return (struct lo_data *)fuse_req_userdata(req); - } - -+__attribute__((unused)) static void lo_map_init(struct lo_map *map) -+{ -+ map->elems = NULL; -+ map->nelems = 0; -+ map->freelist = -1; -+} -+ -+__attribute__((unused)) static void lo_map_destroy(struct lo_map *map) -+{ -+ free(map->elems); -+} -+ -+static int lo_map_grow(struct lo_map *map, size_t new_nelems) -+{ -+ struct lo_map_elem *new_elems; -+ size_t i; -+ -+ if (new_nelems <= map->nelems) { -+ return 1; -+ } -+ -+ new_elems = realloc(map->elems, sizeof(map->elems[0]) * new_nelems); -+ if (!new_elems) { -+ return 0; -+ } -+ -+ for (i = map->nelems; i < new_nelems; i++) { -+ new_elems[i].freelist = i + 1; -+ new_elems[i].in_use = false; -+ } -+ new_elems[new_nelems - 1].freelist = -1; -+ -+ map->elems = new_elems; -+ map->freelist = map->nelems; -+ map->nelems = new_nelems; -+ return 1; -+} -+ -+__attribute__((unused)) static struct lo_map_elem * -+lo_map_alloc_elem(struct lo_map *map) -+{ -+ struct lo_map_elem *elem; -+ -+ if (map->freelist == -1 && !lo_map_grow(map, map->nelems + 256)) { -+ return NULL; -+ } -+ -+ elem = &map->elems[map->freelist]; -+ map->freelist = elem->freelist; -+ -+ elem->in_use = true; -+ -+ return elem; -+} -+ -+__attribute__((unused)) static struct lo_map_elem * -+lo_map_reserve(struct lo_map *map, size_t key) -+{ -+ ssize_t *prev; -+ -+ if (!lo_map_grow(map, key + 1)) { -+ return NULL; -+ } -+ -+ for (prev = &map->freelist; *prev != -1; -+ prev = &map->elems[*prev].freelist) { -+ if (*prev == key) { -+ struct lo_map_elem *elem = &map->elems[key]; -+ -+ *prev = elem->freelist; -+ elem->in_use = true; -+ return elem; -+ } -+ } -+ return NULL; -+} -+ -+__attribute__((unused)) static struct lo_map_elem * -+lo_map_get(struct lo_map *map, size_t key) -+{ -+ if (key >= map->nelems) { -+ return NULL; -+ } -+ if (!map->elems[key].in_use) { -+ return NULL; -+ } -+ return &map->elems[key]; -+} -+ -+__attribute__((unused)) static void lo_map_remove(struct lo_map *map, -+ size_t key) -+{ -+ struct lo_map_elem *elem; -+ -+ if (key >= map->nelems) { -+ return; -+ } -+ -+ elem = &map->elems[key]; -+ if (!elem->in_use) { -+ return; -+ } -+ -+ elem->in_use = false; -+ -+ elem->freelist = map->freelist; -+ map->freelist = key; -+} -+ - static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) - { - if (ino == FUSE_ROOT_ID) { --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch deleted file mode 100644 index a3f7970..0000000 --- a/SOURCES/kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 86b4f2865f2ebd7e6b3d85beb66a9390eb46eb96 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:45 +0100 -Subject: [PATCH 074/116] virtiofsd: passthrough_ll: add renameat2 support -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-71-dgilbert@redhat.com> -Patchwork-id: 93531 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 070/112] virtiofsd: passthrough_ll: add renameat2 support -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -Signed-off-by: Miklos Szeredi -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit f0ab7d6f78a7d3c1c19fd81a91c9b1199f56c4f6) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 98114a3..18d69ab 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1099,7 +1099,17 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - } - - if (flags) { -+#ifndef SYS_renameat2 - fuse_reply_err(req, EINVAL); -+#else -+ res = syscall(SYS_renameat2, lo_fd(req, parent), name, -+ lo_fd(req, newparent), newname, flags); -+ if (res == -1 && errno == ENOSYS) { -+ fuse_reply_err(req, EINVAL); -+ } else { -+ fuse_reply_err(req, res == -1 ? errno : 0); -+ } -+#endif - return; - } - --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch deleted file mode 100644 index dc87ef2..0000000 --- a/SOURCES/kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch +++ /dev/null @@ -1,138 +0,0 @@ -From 079199c53f483f0051f994b195ebb595aec76a39 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:51 +0100 -Subject: [PATCH 080/116] virtiofsd: passthrough_ll: clean up cache related - options -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-77-dgilbert@redhat.com> -Patchwork-id: 93530 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 076/112] virtiofsd: passthrough_ll: clean up cache related options -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - - - Rename "cache=never" to "cache=none" to match 9p's similar option. - - - Rename CACHE_NORMAL constant to CACHE_AUTO to match the "cache=auto" - option. - -Signed-off-by: Miklos Szeredi -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 230e777b5e250759ee0480fcc0e9ccfa2b082fba) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/helper.c | 5 ++++- - tools/virtiofsd/passthrough_ll.c | 20 ++++++++++---------- - 2 files changed, 14 insertions(+), 11 deletions(-) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 14f5d70..5672024 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -145,6 +145,9 @@ void fuse_cmdline_help(void) - " --syslog log to syslog (default stderr)\n" - " -f foreground operation\n" - " --daemonize run in background\n" -+ " -o cache= cache mode. could be one of \"auto, " -+ "always, none\"\n" -+ " default: auto\n" - " -o log_level= log level, default to \"info\"\n" - " level could be one of \"debug, " - "info, warn, err\"\n" -@@ -156,7 +159,7 @@ void fuse_cmdline_help(void) - " -o readdirplus|no_readdirplus\n" - " enable/disable readirplus\n" - " default: readdirplus except with " -- "cache=never\n" -+ "cache=none\n" - ); - } - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 9e7191e..b40f287 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -101,8 +101,8 @@ struct lo_cred { - }; - - enum { -- CACHE_NEVER, -- CACHE_NORMAL, -+ CACHE_NONE, -+ CACHE_AUTO, - CACHE_ALWAYS, - }; - -@@ -138,8 +138,8 @@ static const struct fuse_opt lo_opts[] = { - { "no_xattr", offsetof(struct lo_data, xattr), 0 }, - { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, - { "timeout=", offsetof(struct lo_data, timeout_set), 1 }, -- { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER }, -- { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, -+ { "cache=none", offsetof(struct lo_data, cache), CACHE_NONE }, -+ { "cache=auto", offsetof(struct lo_data, cache), CACHE_AUTO }, - { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, - { "norace", offsetof(struct lo_data, norace), 1 }, - { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, -@@ -482,7 +482,7 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) - fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); - conn->want |= FUSE_CAP_FLOCK_LOCKS; - } -- if ((lo->cache == CACHE_NEVER && !lo->readdirplus_set) || -+ if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) || - lo->readdirplus_clear) { - fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); - conn->want &= ~FUSE_CAP_READDIRPLUS; -@@ -1493,7 +1493,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - fi->fh = fh; - err = lo_do_lookup(req, parent, name, &e); - } -- if (lo->cache == CACHE_NEVER) { -+ if (lo->cache == CACHE_NONE) { - fi->direct_io = 1; - } else if (lo->cache == CACHE_ALWAYS) { - fi->keep_cache = 1; -@@ -1578,7 +1578,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - } - - fi->fh = fh; -- if (lo->cache == CACHE_NEVER) { -+ if (lo->cache == CACHE_NONE) { - fi->direct_io = 1; - } else if (lo->cache == CACHE_ALWAYS) { - fi->keep_cache = 1; -@@ -2395,7 +2395,7 @@ int main(int argc, char *argv[]) - lo.root.next = lo.root.prev = &lo.root; - lo.root.fd = -1; - lo.root.fuse_ino = FUSE_ROOT_ID; -- lo.cache = CACHE_NORMAL; -+ lo.cache = CACHE_AUTO; - - /* - * Set up the ino map like this: -@@ -2470,11 +2470,11 @@ int main(int argc, char *argv[]) - } - if (!lo.timeout_set) { - switch (lo.cache) { -- case CACHE_NEVER: -+ case CACHE_NONE: - lo.timeout = 0.0; - break; - -- case CACHE_NORMAL: -+ case CACHE_AUTO: - lo.timeout = 1.0; - break; - --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch deleted file mode 100644 index c55eead..0000000 --- a/SOURCES/kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch +++ /dev/null @@ -1,154 +0,0 @@ -From f93ea308351cbe2630d7ecf637c3b69894d84a11 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 3 Mar 2020 18:43:13 +0000 -Subject: [PATCH 17/18] virtiofsd: passthrough_ll: cleanup getxattr/listxattr -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200303184314.155564-7-dgilbert@redhat.com> -Patchwork-id: 94125 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 6/7] virtiofsd: passthrough_ll: cleanup getxattr/listxattr -Bugzilla: 1797064 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Ján Tomko - -From: Misono Tomohiro - -This is a cleanup patch to simplify the following xattr fix and -there is no functional changes. - -- Move memory allocation to head of the function -- Unify fgetxattr/flistxattr call for both size == 0 and - size != 0 case -- Remove redundant lo_inode_put call in error path - (Note: second call is ignored now since @inode is already NULL) - -Signed-off-by: Misono Tomohiro -Message-Id: <20200227055927.24566-2-misono.tomohiro@jp.fujitsu.com> -Acked-by: Vivek Goyal -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 16e15a73089102c3d8846792d514e769300fcc3c) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/passthrough_ll.c | 54 ++++++++++++++++------------------------ - 1 file changed, 22 insertions(+), 32 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index c635fc8..50c7273 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2199,34 +2199,30 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - goto out; - } - -+ if (size) { -+ value = malloc(size); -+ if (!value) { -+ goto out_err; -+ } -+ } -+ - sprintf(procname, "%i", inode->fd); - fd = openat(lo->proc_self_fd, procname, O_RDONLY); - if (fd < 0) { - goto out_err; - } - -+ ret = fgetxattr(fd, name, value, size); -+ if (ret == -1) { -+ goto out_err; -+ } - if (size) { -- value = malloc(size); -- if (!value) { -- goto out_err; -- } -- -- ret = fgetxattr(fd, name, value, size); -- if (ret == -1) { -- goto out_err; -- } - saverr = 0; - if (ret == 0) { - goto out; - } -- - fuse_reply_buf(req, value, ret); - } else { -- ret = fgetxattr(fd, name, NULL, 0); -- if (ret == -1) { -- goto out_err; -- } -- - fuse_reply_xattr(req, ret); - } - out_free: -@@ -2242,7 +2238,6 @@ out_free: - out_err: - saverr = errno; - out: -- lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - goto out_free; - } -@@ -2277,34 +2272,30 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - goto out; - } - -+ if (size) { -+ value = malloc(size); -+ if (!value) { -+ goto out_err; -+ } -+ } -+ - sprintf(procname, "%i", inode->fd); - fd = openat(lo->proc_self_fd, procname, O_RDONLY); - if (fd < 0) { - goto out_err; - } - -+ ret = flistxattr(fd, value, size); -+ if (ret == -1) { -+ goto out_err; -+ } - if (size) { -- value = malloc(size); -- if (!value) { -- goto out_err; -- } -- -- ret = flistxattr(fd, value, size); -- if (ret == -1) { -- goto out_err; -- } - saverr = 0; - if (ret == 0) { - goto out; - } -- - fuse_reply_buf(req, value, ret); - } else { -- ret = flistxattr(fd, NULL, 0); -- if (ret == -1) { -- goto out_err; -- } -- - fuse_reply_xattr(req, ret); - } - out_free: -@@ -2320,7 +2311,6 @@ out_free: - out_err: - saverr = errno; - out: -- lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - goto out_free; - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-control-readdirplus.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-control-readdirplus.patch deleted file mode 100644 index 98d00fc..0000000 --- a/SOURCES/kvm-virtiofsd-passthrough_ll-control-readdirplus.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 0f1d456fad4ba6a696eff8976b9fe8a0f251e1b5 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:47 +0100 -Subject: [PATCH 076/116] virtiofsd: passthrough_ll: control readdirplus -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-73-dgilbert@redhat.com> -Patchwork-id: 93524 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 072/112] virtiofsd: passthrough_ll: control readdirplus -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -Signed-off-by: Miklos Szeredi -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 59aef494be2d8d91055ff3f3a8eb13d9f32873d8) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/helper.c | 4 ++++ - tools/virtiofsd/passthrough_ll.c | 7 ++++++- - 2 files changed, 10 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 6d50a46..14f5d70 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -153,6 +153,10 @@ void fuse_cmdline_help(void) - " allowed (default: 10)\n" - " -o norace disable racy fallback\n" - " default: false\n" -+ " -o readdirplus|no_readdirplus\n" -+ " enable/disable readirplus\n" -+ " default: readdirplus except with " -+ "cache=never\n" - ); - } - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 6480c51..8b1784f 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -117,6 +117,8 @@ struct lo_data { - double timeout; - int cache; - int timeout_set; -+ int readdirplus_set; -+ int readdirplus_clear; - struct lo_inode root; /* protected by lo->mutex */ - struct lo_map ino_map; /* protected by lo->mutex */ - struct lo_map dirp_map; /* protected by lo->mutex */ -@@ -140,6 +142,8 @@ static const struct fuse_opt lo_opts[] = { - { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, - { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, - { "norace", offsetof(struct lo_data, norace), 1 }, -+ { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, -+ { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 }, - FUSE_OPT_END - }; - static bool use_syslog = false; -@@ -478,7 +482,8 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) - fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); - conn->want |= FUSE_CAP_FLOCK_LOCKS; - } -- if (lo->cache == CACHE_NEVER) { -+ if ((lo->cache == CACHE_NEVER && !lo->readdirplus_set) || -+ lo->readdirplus_clear) { - fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); - conn->want &= ~FUSE_CAP_READDIRPLUS; - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch deleted file mode 100644 index 4b02779..0000000 --- a/SOURCES/kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch +++ /dev/null @@ -1,198 +0,0 @@ -From af14ef1dba9356e566c9c7531b8fd23361c2b16d Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:12 +0100 -Subject: [PATCH 041/116] virtiofsd: passthrough_ll: create new files in - caller's context -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-38-dgilbert@redhat.com> -Patchwork-id: 93488 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 037/112] virtiofsd: passthrough_ll: create new files in caller's context -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Vivek Goyal - -We need to create files in the caller's context. Otherwise after -creating a file, the caller might not be able to do file operations on -that file. - -Changed effective uid/gid to caller's uid/gid, create file and then -switch back to uid/gid 0. - -Use syscall(setresuid, ...) otherwise glibc does some magic to change EUID -in all threads, which is not what we want. - -Signed-off-by: Vivek Goyal -Signed-off-by: Miklos Szeredi -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 929cfb7a9a1b101cdfc9ac19807ecab4c81a13e4) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 96 +++++++++++++++++++++++++++++++++++++--- - 1 file changed, 91 insertions(+), 5 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index cd27c09..5e06179 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -50,6 +50,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -83,6 +84,11 @@ struct lo_inode { - uint64_t refcount; /* protected by lo->mutex */ - }; - -+struct lo_cred { -+ uid_t euid; -+ gid_t egid; -+}; -+ - enum { - CACHE_NEVER, - CACHE_NORMAL, -@@ -383,6 +389,69 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) - } - } - -+/* -+ * On some archs, setres*id is limited to 2^16 but they -+ * provide setres*id32 variants that allow 2^32. -+ * Others just let setres*id do 2^32 anyway. -+ */ -+#ifdef SYS_setresgid32 -+#define OURSYS_setresgid SYS_setresgid32 -+#else -+#define OURSYS_setresgid SYS_setresgid -+#endif -+ -+#ifdef SYS_setresuid32 -+#define OURSYS_setresuid SYS_setresuid32 -+#else -+#define OURSYS_setresuid SYS_setresuid -+#endif -+ -+/* -+ * Change to uid/gid of caller so that file is created with -+ * ownership of caller. -+ * TODO: What about selinux context? -+ */ -+static int lo_change_cred(fuse_req_t req, struct lo_cred *old) -+{ -+ int res; -+ -+ old->euid = geteuid(); -+ old->egid = getegid(); -+ -+ res = syscall(OURSYS_setresgid, -1, fuse_req_ctx(req)->gid, -1); -+ if (res == -1) { -+ return errno; -+ } -+ -+ res = syscall(OURSYS_setresuid, -1, fuse_req_ctx(req)->uid, -1); -+ if (res == -1) { -+ int errno_save = errno; -+ -+ syscall(OURSYS_setresgid, -1, old->egid, -1); -+ return errno_save; -+ } -+ -+ return 0; -+} -+ -+/* Regain Privileges */ -+static void lo_restore_cred(struct lo_cred *old) -+{ -+ int res; -+ -+ res = syscall(OURSYS_setresuid, -1, old->euid, -1); -+ if (res == -1) { -+ fuse_log(FUSE_LOG_ERR, "seteuid(%u): %m\n", old->euid); -+ exit(1); -+ } -+ -+ res = syscall(OURSYS_setresgid, -1, old->egid, -1); -+ if (res == -1) { -+ fuse_log(FUSE_LOG_ERR, "setegid(%u): %m\n", old->egid); -+ exit(1); -+ } -+} -+ - static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - const char *name, mode_t mode, dev_t rdev, - const char *link) -@@ -391,12 +460,21 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - int saverr; - struct lo_inode *dir = lo_inode(req, parent); - struct fuse_entry_param e; -+ struct lo_cred old = {}; - - saverr = ENOMEM; - -+ saverr = lo_change_cred(req, &old); -+ if (saverr) { -+ goto out; -+ } -+ - res = mknod_wrapper(dir->fd, name, link, mode, rdev); - - saverr = errno; -+ -+ lo_restore_cred(&old); -+ - if (res == -1) { - goto out; - } -@@ -794,26 +872,34 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - struct lo_data *lo = lo_data(req); - struct fuse_entry_param e; - int err; -+ struct lo_cred old = {}; - - if (lo_debug(req)) { - fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", - parent, name); - } - -+ err = lo_change_cred(req, &old); -+ if (err) { -+ goto out; -+ } -+ - fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, - mode); -- if (fd == -1) { -- return (void)fuse_reply_err(req, errno); -- } -+ err = fd == -1 ? errno : 0; -+ lo_restore_cred(&old); - -- fi->fh = fd; -+ if (!err) { -+ fi->fh = fd; -+ err = lo_do_lookup(req, parent, name, &e); -+ } - if (lo->cache == CACHE_NEVER) { - fi->direct_io = 1; - } else if (lo->cache == CACHE_ALWAYS) { - fi->keep_cache = 1; - } - -- err = lo_do_lookup(req, parent, name, &e); -+out: - if (err) { - fuse_reply_err(req, err); - } else { --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch deleted file mode 100644 index 4a531a3..0000000 --- a/SOURCES/kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch +++ /dev/null @@ -1,50 +0,0 @@ -From bbf92338e5e5eed796d511d2bd3c3686b7d1e5fd Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:46 +0100 -Subject: [PATCH 075/116] virtiofsd: passthrough_ll: disable readdirplus on - cache=never -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-72-dgilbert@redhat.com> -Patchwork-id: 93525 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 071/112] virtiofsd: passthrough_ll: disable readdirplus on cache=never -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -...because the attributes sent in the READDIRPLUS reply would be discarded -anyway. - -Signed-off-by: Miklos Szeredi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit ddcbabcb0ea177be3ec3500726b699c7c26ffd93) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 18d69ab..6480c51 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -478,6 +478,10 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) - fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); - conn->want |= FUSE_CAP_FLOCK_LOCKS; - } -+ if (lo->cache == CACHE_NEVER) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); -+ conn->want &= ~FUSE_CAP_READDIRPLUS; -+ } - } - - static void lo_getattr(fuse_req_t req, fuse_ino_t ino, --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch deleted file mode 100644 index 00e11b4..0000000 --- a/SOURCES/kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch +++ /dev/null @@ -1,143 +0,0 @@ -From 5e33269d5fbc4ba4614bab4a6b9e0ef759bebcb7 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:10 +0100 -Subject: [PATCH 099/116] virtiofsd: passthrough_ll: fix refcounting on - remove/rename -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-96-dgilbert@redhat.com> -Patchwork-id: 93549 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 095/112] virtiofsd: passthrough_ll: fix refcounting on remove/rename -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -Signed-off-by: Miklos Szeredi -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 9257e514d861afa759c36704e1904d43ca3fec88) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 50 +++++++++++++++++++++++++++++++++++++++- - 1 file changed, 49 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index c819b5f..e3a6d6b 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1140,17 +1140,42 @@ out_err: - fuse_reply_err(req, saverr); - } - -+static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent, -+ const char *name) -+{ -+ int res; -+ struct stat attr; -+ -+ res = fstatat(lo_fd(req, parent), name, &attr, -+ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) { -+ return NULL; -+ } -+ -+ return lo_find(lo_data(req), &attr); -+} -+ - static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) - { - int res; -+ struct lo_inode *inode; -+ struct lo_data *lo = lo_data(req); -+ - if (!is_safe_path_component(name)) { - fuse_reply_err(req, EINVAL); - return; - } - -+ inode = lookup_name(req, parent, name); -+ if (!inode) { -+ fuse_reply_err(req, EIO); -+ return; -+ } -+ - res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); - - fuse_reply_err(req, res == -1 ? errno : 0); -+ unref_inode_lolocked(lo, inode, 1); - } - - static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, -@@ -1158,12 +1183,23 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - unsigned int flags) - { - int res; -+ struct lo_inode *oldinode; -+ struct lo_inode *newinode; -+ struct lo_data *lo = lo_data(req); - - if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { - fuse_reply_err(req, EINVAL); - return; - } - -+ oldinode = lookup_name(req, parent, name); -+ newinode = lookup_name(req, newparent, newname); -+ -+ if (!oldinode) { -+ fuse_reply_err(req, EIO); -+ goto out; -+ } -+ - if (flags) { - #ifndef SYS_renameat2 - fuse_reply_err(req, EINVAL); -@@ -1176,26 +1212,38 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - fuse_reply_err(req, res == -1 ? errno : 0); - } - #endif -- return; -+ goto out; - } - - res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname); - - fuse_reply_err(req, res == -1 ? errno : 0); -+out: -+ unref_inode_lolocked(lo, oldinode, 1); -+ unref_inode_lolocked(lo, newinode, 1); - } - - static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) - { - int res; -+ struct lo_inode *inode; -+ struct lo_data *lo = lo_data(req); - - if (!is_safe_path_component(name)) { - fuse_reply_err(req, EINVAL); - return; - } - -+ inode = lookup_name(req, parent, name); -+ if (!inode) { -+ fuse_reply_err(req, EIO); -+ return; -+ } -+ - res = unlinkat(lo_fd(req, parent), name, 0); - - fuse_reply_err(req, res == -1 ? errno : 0); -+ unref_inode_lolocked(lo, inode, 1); - } - - static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-use-hashtable.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-use-hashtable.patch deleted file mode 100644 index b0be1f9..0000000 --- a/SOURCES/kvm-virtiofsd-passthrough_ll-use-hashtable.patch +++ /dev/null @@ -1,211 +0,0 @@ -From 44f4434b1305f6ff47b4f63fafcf39bcea9e4ceb Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:52 +0100 -Subject: [PATCH 081/116] virtiofsd: passthrough_ll: use hashtable -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-78-dgilbert@redhat.com> -Patchwork-id: 93528 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 077/112] virtiofsd: passthrough_ll: use hashtable -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -Improve performance of inode lookup by using a hash table. - -Signed-off-by: Miklos Szeredi -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Liu Bo -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit bfc50a6e06b10b2f9dbaf6c1a89dd523322e016f) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 81 ++++++++++++++++++++++------------------ - 1 file changed, 45 insertions(+), 36 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index b40f287..b176a31 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -84,13 +84,15 @@ struct lo_map { - ssize_t freelist; - }; - -+struct lo_key { -+ ino_t ino; -+ dev_t dev; -+}; -+ - struct lo_inode { -- struct lo_inode *next; /* protected by lo->mutex */ -- struct lo_inode *prev; /* protected by lo->mutex */ - int fd; - bool is_symlink; -- ino_t ino; -- dev_t dev; -+ struct lo_key key; - uint64_t refcount; /* protected by lo->mutex */ - fuse_ino_t fuse_ino; - }; -@@ -119,7 +121,8 @@ struct lo_data { - int timeout_set; - int readdirplus_set; - int readdirplus_clear; -- struct lo_inode root; /* protected by lo->mutex */ -+ struct lo_inode root; -+ GHashTable *inodes; /* protected by lo->mutex */ - struct lo_map ino_map; /* protected by lo->mutex */ - struct lo_map dirp_map; /* protected by lo->mutex */ - struct lo_map fd_map; /* protected by lo->mutex */ -@@ -573,7 +576,7 @@ retry: - } - goto fail_unref; - } -- if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) { -+ if (stat.st_dev != inode->key.dev || stat.st_ino != inode->key.ino) { - if (!retries) { - fuse_log(FUSE_LOG_WARNING, - "%s: failed to match last\n", __func__); -@@ -753,19 +756,20 @@ out_err: - static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) - { - struct lo_inode *p; -- struct lo_inode *ret = NULL; -+ struct lo_key key = { -+ .ino = st->st_ino, -+ .dev = st->st_dev, -+ }; - - pthread_mutex_lock(&lo->mutex); -- for (p = lo->root.next; p != &lo->root; p = p->next) { -- if (p->ino == st->st_ino && p->dev == st->st_dev) { -- assert(p->refcount > 0); -- ret = p; -- ret->refcount++; -- break; -- } -+ p = g_hash_table_lookup(lo->inodes, &key); -+ if (p) { -+ assert(p->refcount > 0); -+ p->refcount++; - } - pthread_mutex_unlock(&lo->mutex); -- return ret; -+ -+ return p; - } - - static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, -@@ -810,8 +814,6 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - close(newfd); - newfd = -1; - } else { -- struct lo_inode *prev, *next; -- - saverr = ENOMEM; - inode = calloc(1, sizeof(struct lo_inode)); - if (!inode) { -@@ -822,17 +824,12 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - inode->refcount = 1; - inode->fd = newfd; - newfd = -1; -- inode->ino = e->attr.st_ino; -- inode->dev = e->attr.st_dev; -+ inode->key.ino = e->attr.st_ino; -+ inode->key.dev = e->attr.st_dev; - - pthread_mutex_lock(&lo->mutex); - inode->fuse_ino = lo_add_inode_mapping(req, inode); -- prev = &lo->root; -- next = prev->next; -- next->prev = inode; -- inode->next = next; -- inode->prev = prev; -- prev->next = inode; -+ g_hash_table_insert(lo->inodes, &inode->key, inode); - pthread_mutex_unlock(&lo->mutex); - } - e->ino = inode->fuse_ino; -@@ -1162,14 +1159,8 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, - assert(inode->refcount >= n); - inode->refcount -= n; - if (!inode->refcount) { -- struct lo_inode *prev, *next; -- -- prev = inode->prev; -- next = inode->next; -- next->prev = prev; -- prev->next = next; -- - lo_map_remove(&lo->ino_map, inode->fuse_ino); -+ g_hash_table_remove(lo->inodes, &inode->key); - pthread_mutex_unlock(&lo->mutex); - close(inode->fd); - free(inode); -@@ -1369,7 +1360,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - - /* Hide root's parent directory */ - if (dinode == &lo->root && strcmp(name, "..") == 0) { -- e.attr.st_ino = lo->root.ino; -+ e.attr.st_ino = lo->root.key.ino; - e.attr.st_mode = DT_DIR << 12; - } - -@@ -2370,11 +2361,26 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) - - root->is_symlink = false; - root->fd = fd; -- root->ino = stat.st_ino; -- root->dev = stat.st_dev; -+ root->key.ino = stat.st_ino; -+ root->key.dev = stat.st_dev; - root->refcount = 2; - } - -+static guint lo_key_hash(gconstpointer key) -+{ -+ const struct lo_key *lkey = key; -+ -+ return (guint)lkey->ino + (guint)lkey->dev; -+} -+ -+static gboolean lo_key_equal(gconstpointer a, gconstpointer b) -+{ -+ const struct lo_key *la = a; -+ const struct lo_key *lb = b; -+ -+ return la->ino == lb->ino && la->dev == lb->dev; -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -@@ -2392,7 +2398,7 @@ int main(int argc, char *argv[]) - umask(0); - - pthread_mutex_init(&lo.mutex, NULL); -- lo.root.next = lo.root.prev = &lo.root; -+ lo.inodes = g_hash_table_new(lo_key_hash, lo_key_equal); - lo.root.fd = -1; - lo.root.fuse_ino = FUSE_ROOT_ID; - lo.cache = CACHE_AUTO; -@@ -2522,6 +2528,9 @@ err_out2: - err_out1: - fuse_opt_free_args(&args); - -+ if (lo.inodes) { -+ g_hash_table_destroy(lo.inodes); -+ } - lo_map_destroy(&lo.fd_map); - lo_map_destroy(&lo.dirp_map); - lo_map_destroy(&lo.ino_map); --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch b/SOURCES/kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch deleted file mode 100644 index 68eb03e..0000000 --- a/SOURCES/kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch +++ /dev/null @@ -1,54 +0,0 @@ -From feb005dfeb15dd5ac5156c994f323ab4c573b1fc Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:24 +0100 -Subject: [PATCH 053/116] virtiofsd: prevent ".." escape in lo_do_lookup() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-50-dgilbert@redhat.com> -Patchwork-id: 93500 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 049/112] virtiofsd: prevent ".." escape in lo_do_lookup() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 854684bc0b3d63eb90b3abdfe471c2e4271ef176) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e375406..79d5966 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -624,12 +624,17 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - int res; - int saverr; - struct lo_data *lo = lo_data(req); -- struct lo_inode *inode; -+ struct lo_inode *inode, *dir = lo_inode(req, parent); - - memset(e, 0, sizeof(*e)); - e->attr_timeout = lo->timeout; - e->entry_timeout = lo->timeout; - -+ /* Do not allow escaping root directory */ -+ if (dir == &lo->root && strcmp(name, "..") == 0) { -+ name = "."; -+ } -+ - newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); - if (newfd == -1) { - goto out_err; --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch b/SOURCES/kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch deleted file mode 100644 index 5f97cbf..0000000 --- a/SOURCES/kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch +++ /dev/null @@ -1,108 +0,0 @@ -From 97e232e75bbc0032f4a309d248f383384612eafe Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:25 +0100 -Subject: [PATCH 054/116] virtiofsd: prevent ".." escape in lo_do_readdir() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-51-dgilbert@redhat.com> -Patchwork-id: 93507 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 050/112] virtiofsd: prevent ".." escape in lo_do_readdir() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Construct a fake dirent for the root directory's ".." entry. This hides -the parent directory from the FUSE client. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 752272da2b68a2312f0e11fc5303015a6c3ee1ac) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 36 ++++++++++++++++++++++-------------- - 1 file changed, 22 insertions(+), 14 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 79d5966..e3d65c3 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1149,19 +1149,25 @@ out_err: - static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - off_t offset, struct fuse_file_info *fi, int plus) - { -+ struct lo_data *lo = lo_data(req); - struct lo_dirp *d; -+ struct lo_inode *dinode; - char *buf = NULL; - char *p; - size_t rem = size; -- int err = ENOMEM; -+ int err = EBADF; - -- (void)ino; -+ dinode = lo_inode(req, ino); -+ if (!dinode) { -+ goto error; -+ } - - d = lo_dirp(req, fi); - if (!d) { - goto error; - } - -+ err = ENOMEM; - buf = calloc(1, size); - if (!buf) { - goto error; -@@ -1192,15 +1198,21 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - } - nextoff = d->entry->d_off; - name = d->entry->d_name; -+ - fuse_ino_t entry_ino = 0; -+ struct fuse_entry_param e = (struct fuse_entry_param){ -+ .attr.st_ino = d->entry->d_ino, -+ .attr.st_mode = d->entry->d_type << 12, -+ }; -+ -+ /* Hide root's parent directory */ -+ if (dinode == &lo->root && strcmp(name, "..") == 0) { -+ e.attr.st_ino = lo->root.ino; -+ e.attr.st_mode = DT_DIR << 12; -+ } -+ - if (plus) { -- struct fuse_entry_param e; -- if (is_dot_or_dotdot(name)) { -- e = (struct fuse_entry_param){ -- .attr.st_ino = d->entry->d_ino, -- .attr.st_mode = d->entry->d_type << 12, -- }; -- } else { -+ if (!is_dot_or_dotdot(name)) { - err = lo_do_lookup(req, ino, name, &e); - if (err) { - goto error; -@@ -1210,11 +1222,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - - entsize = fuse_add_direntry_plus(req, p, rem, name, &e, nextoff); - } else { -- struct stat st = { -- .st_ino = d->entry->d_ino, -- .st_mode = d->entry->d_type << 12, -- }; -- entsize = fuse_add_direntry(req, p, rem, name, &st, nextoff); -+ entsize = fuse_add_direntry(req, p, rem, name, &e.attr, nextoff); - } - if (entsize > rem) { - if (entry_ino != 0) { --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch b/SOURCES/kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch deleted file mode 100644 index be7c120..0000000 --- a/SOURCES/kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 249c02ae54739dc5894ee1b2905bbe8f1e79e909 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:20 +0100 -Subject: [PATCH 109/116] virtiofsd: prevent FUSE_INIT/FUSE_DESTROY races -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-106-dgilbert@redhat.com> -Patchwork-id: 93562 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 105/112] virtiofsd: prevent FUSE_INIT/FUSE_DESTROY races -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -When running with multiple threads it can be tricky to handle -FUSE_INIT/FUSE_DESTROY in parallel with other request types or in -parallel with themselves. Serialize FUSE_INIT and FUSE_DESTROY so that -malicious clients cannot trigger race conditions. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit cdc497c6925be745bc895355bd4674a17a4b2a8b) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 1 + - tools/virtiofsd/fuse_lowlevel.c | 18 ++++++++++++++++++ - 2 files changed, 19 insertions(+) - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index a20854f..1447d86 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -61,6 +61,7 @@ struct fuse_session { - struct fuse_req list; - struct fuse_req interrupts; - pthread_mutex_t lock; -+ pthread_rwlock_t init_rwlock; - int got_destroy; - int broken_splice_nonblock; - uint64_t notify_ctr; -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index dab6a31..79a4031 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2428,6 +2428,19 @@ void fuse_session_process_buf_int(struct fuse_session *se, - req->ctx.pid = in->pid; - req->ch = ch; - -+ /* -+ * INIT and DESTROY requests are serialized, all other request types -+ * run in parallel. This prevents races between FUSE_INIT and ordinary -+ * requests, FUSE_INIT and FUSE_INIT, FUSE_INIT and FUSE_DESTROY, and -+ * FUSE_DESTROY and FUSE_DESTROY. -+ */ -+ if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT || -+ in->opcode == FUSE_DESTROY) { -+ pthread_rwlock_wrlock(&se->init_rwlock); -+ } else { -+ pthread_rwlock_rdlock(&se->init_rwlock); -+ } -+ - err = EIO; - if (!se->got_init) { - enum fuse_opcode expected; -@@ -2485,10 +2498,13 @@ void fuse_session_process_buf_int(struct fuse_session *se, - } else { - fuse_ll_ops[in->opcode].func(req, in->nodeid, &iter); - } -+ -+ pthread_rwlock_unlock(&se->init_rwlock); - return; - - reply_err: - fuse_reply_err(req, err); -+ pthread_rwlock_unlock(&se->init_rwlock); - } - - #define LL_OPTION(n, o, v) \ -@@ -2531,6 +2547,7 @@ void fuse_session_destroy(struct fuse_session *se) - se->op.destroy(se->userdata); - } - } -+ pthread_rwlock_destroy(&se->init_rwlock); - pthread_mutex_destroy(&se->lock); - free(se->cuse_data); - if (se->fd != -1) { -@@ -2610,6 +2627,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - list_init_req(&se->list); - list_init_req(&se->interrupts); - fuse_mutex_init(&se->lock); -+ pthread_rwlock_init(&se->init_rwlock, NULL); - - memcpy(&se->op, op, op_size); - se->owner = getuid(); --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch b/SOURCES/kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch deleted file mode 100644 index 8eabede..0000000 --- a/SOURCES/kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch +++ /dev/null @@ -1,149 +0,0 @@ -From 69c6a829f8136a8c95ccdf480f2fd0173d64b6ec Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:05 +0100 -Subject: [PATCH 094/116] virtiofsd: prevent fv_queue_thread() vs virtio_loop() - races -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-91-dgilbert@redhat.com> -Patchwork-id: 93544 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 090/112] virtiofsd: prevent fv_queue_thread() vs virtio_loop() races -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -We call into libvhost-user from the virtqueue handler thread and the -vhost-user message processing thread without a lock. There is nothing -protecting the virtqueue handler thread if the vhost-user message -processing thread changes the virtqueue or memory table while it is -running. - -This patch introduces a read-write lock. Virtqueue handler threads are -readers. The vhost-user message processing thread is a writer. This -will allow concurrency for multiqueue in the future while protecting -against fv_queue_thread() vs virtio_loop() races. - -Note that the critical sections could be made smaller but it would be -more invasive and require libvhost-user changes. Let's start simple and -improve performance later, if necessary. Another option would be an -RCU-style approach with lighter-weight primitives. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit e7b337326d594b71b07cd6dbb332c49c122c80a4) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 34 +++++++++++++++++++++++++++++++++- - 1 file changed, 33 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index fb8d6d1..f6242f9 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -59,6 +59,18 @@ struct fv_VuDev { - struct fuse_session *se; - - /* -+ * Either handle virtqueues or vhost-user protocol messages. Don't do -+ * both at the same time since that could lead to race conditions if -+ * virtqueues or memory tables change while another thread is accessing -+ * them. -+ * -+ * The assumptions are: -+ * 1. fv_queue_thread() reads/writes to virtqueues and only reads VuDev. -+ * 2. virtio_loop() reads/writes virtqueues and VuDev. -+ */ -+ pthread_rwlock_t vu_dispatch_rwlock; -+ -+ /* - * The following pair of fields are only accessed in the main - * virtio_loop - */ -@@ -415,6 +427,8 @@ static void *fv_queue_thread(void *opaque) - qi->qidx, qi->kick_fd); - while (1) { - struct pollfd pf[2]; -+ int ret; -+ - pf[0].fd = qi->kick_fd; - pf[0].events = POLLIN; - pf[0].revents = 0; -@@ -461,6 +475,9 @@ static void *fv_queue_thread(void *opaque) - fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n"); - break; - } -+ /* Mutual exclusion with virtio_loop() */ -+ ret = pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); -+ assert(ret == 0); /* there is no possible error case */ - /* out is from guest, in is too guest */ - unsigned int in_bytes, out_bytes; - vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0); -@@ -469,6 +486,7 @@ static void *fv_queue_thread(void *opaque) - "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n", - __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); - -+ - while (1) { - bool allocated_bufv = false; - struct fuse_bufvec bufv; -@@ -597,6 +615,8 @@ static void *fv_queue_thread(void *opaque) - free(elem); - elem = NULL; - } -+ -+ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); - } - out: - pthread_mutex_destroy(&ch.lock); -@@ -711,6 +731,8 @@ int virtio_loop(struct fuse_session *se) - - while (!fuse_session_exited(se)) { - struct pollfd pf[1]; -+ bool ok; -+ int ret; - pf[0].fd = se->vu_socketfd; - pf[0].events = POLLIN; - pf[0].revents = 0; -@@ -735,7 +757,15 @@ int virtio_loop(struct fuse_session *se) - } - assert(pf[0].revents & POLLIN); - fuse_log(FUSE_LOG_DEBUG, "%s: Got VU event\n", __func__); -- if (!vu_dispatch(&se->virtio_dev->dev)) { -+ /* Mutual exclusion with fv_queue_thread() */ -+ ret = pthread_rwlock_wrlock(&se->virtio_dev->vu_dispatch_rwlock); -+ assert(ret == 0); /* there is no possible error case */ -+ -+ ok = vu_dispatch(&se->virtio_dev->dev); -+ -+ pthread_rwlock_unlock(&se->virtio_dev->vu_dispatch_rwlock); -+ -+ if (!ok) { - fuse_log(FUSE_LOG_ERR, "%s: vu_dispatch failed\n", __func__); - break; - } -@@ -877,6 +907,7 @@ int virtio_session_mount(struct fuse_session *se) - - se->vu_socketfd = data_sock; - se->virtio_dev->se = se; -+ pthread_rwlock_init(&se->virtio_dev->vu_dispatch_rwlock, NULL); - vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, fv_set_watch, - fv_remove_watch, &fv_iface); - -@@ -892,6 +923,7 @@ void virtio_session_close(struct fuse_session *se) - } - - free(se->virtio_dev->qi); -+ pthread_rwlock_destroy(&se->virtio_dev->vu_dispatch_rwlock); - free(se->virtio_dev); - se->virtio_dev = NULL; - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch b/SOURCES/kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch deleted file mode 100644 index 5956dce..0000000 --- a/SOURCES/kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch +++ /dev/null @@ -1,314 +0,0 @@ -From cc9a776fba8ec62c862db55753107f19459dafa8 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 Feb 2021 23:14:56 -0500 -Subject: [PATCH 3/3] virtiofsd: prevent opening of special files - (CVE-2020-35517) - -RH-Author: Jon Maloy -Message-id: <20210209231456.1555472-4-jmaloy@redhat.com> -Patchwork-id: 101023 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 3/3] virtiofsd: prevent opening of special files (CVE-2020-35517) -Bugzilla: 1919111 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Greg Kurz -RH-Acked-by: Dr. David Alan Gilbert - -From: Stefan Hajnoczi - -A well-behaved FUSE client does not attempt to open special files with -FUSE_OPEN because they are handled on the client side (e.g. device nodes -are handled by client-side device drivers). - -The check to prevent virtiofsd from opening special files is missing in -a few cases, most notably FUSE_OPEN. A malicious client can cause -virtiofsd to open a device node, potentially allowing the guest to -escape. This can be exploited by a modified guest device driver. It is -not exploitable from guest userspace since the guest kernel will handle -special files inside the guest instead of sending FUSE requests. - -This patch fixes this issue by introducing the lo_inode_open() function -to check the file type before opening it. This is a short-term solution -because it does not prevent a compromised virtiofsd process from opening -device nodes on the host. - -Restructure lo_create() to try O_CREAT | O_EXCL first. Note that O_CREAT -| O_EXCL does not follow symlinks, so O_NOFOLLOW masking is not -necessary here. If the file exists and the user did not specify O_EXCL, -open it via lo_do_open(). - -Reported-by: Alex Xu -Fixes: CVE-2020-35517 -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Vivek Goyal -Reviewed-by: Greg Kurz -Signed-off-by: Stefan Hajnoczi -Message-Id: <20210204150208.367837-4-stefanha@redhat.com> -Signed-off-by: Dr. David Alan Gilbert - -(cherry picked from commit a3fdbbc7f271bff7d53d0501b29d910ece0b3789) -Signed-off-by: Jon Maloy -Signed-off-by: Jon Maloy ---- - tools/virtiofsd/passthrough_ll.c | 144 ++++++++++++++++++++----------- - 1 file changed, 92 insertions(+), 52 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e5bd3d73e4..cb0992f2db 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -535,6 +535,38 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino) - return fd; - } - -+/* -+ * Open a file descriptor for an inode. Returns -EBADF if the inode is not a -+ * regular file or a directory. -+ * -+ * Use this helper function instead of raw openat(2) to prevent security issues -+ * when a malicious client opens special files such as block device nodes. -+ * Symlink inodes are also rejected since symlinks must already have been -+ * traversed on the client side. -+ */ -+static int lo_inode_open(struct lo_data *lo, struct lo_inode *inode, -+ int open_flags) -+{ -+ g_autofree char *fd_str = g_strdup_printf("%d", inode->fd); -+ int fd; -+ -+ if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) { -+ return -EBADF; -+ } -+ -+ /* -+ * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier -+ * that the inode is not a special file but if an external process races -+ * with us then symlinks are traversed here. It is not possible to escape -+ * the shared directory since it is mounted as "/" though. -+ */ -+ fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW); -+ if (fd < 0) { -+ return -errno; -+ } -+ return fd; -+} -+ - static void lo_init(void *userdata, struct fuse_conn_info *conn) - { - struct lo_data *lo = (struct lo_data *)userdata; -@@ -788,9 +820,9 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - if (fi) { - truncfd = fd; - } else { -- sprintf(procname, "%i", ifd); -- truncfd = openat(lo->proc_self_fd, procname, O_RDWR); -+ truncfd = lo_inode_open(lo, inode, O_RDWR); - if (truncfd < 0) { -+ errno = -truncfd; - goto out_err; - } - } -@@ -894,7 +926,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - struct lo_inode *dir = lo_inode(req, parent); - - if (inodep) { -- *inodep = NULL; -+ *inodep = NULL; /* in case there is an error */ - } - - /* -@@ -1725,19 +1757,26 @@ static void update_open_flags(int writeback, struct fuse_file_info *fi) - fi->flags &= ~O_DIRECT; - } - -+/* -+ * Open a regular file, set up an fd mapping, and fill out the struct -+ * fuse_file_info for it. If existing_fd is not negative, use that fd instead -+ * opening a new one. Takes ownership of existing_fd. -+ * -+ * Returns 0 on success or a positive errno. -+ */ - static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, -- struct fuse_file_info *fi) -+ int existing_fd, struct fuse_file_info *fi) - { -- char buf[64]; - ssize_t fh; -- int fd; -+ int fd = existing_fd; - - update_open_flags(lo->writeback, fi); - -- sprintf(buf, "%i", inode->fd); -- fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); -- if (fd == -1) { -- return errno; -+ if (fd < 0) { -+ fd = lo_inode_open(lo, inode, fi->flags); -+ if (fd < 0) { -+ return -fd; -+ } - } - - pthread_mutex_lock(&lo->mutex); -@@ -1760,9 +1799,10 @@ static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, - static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - mode_t mode, struct fuse_file_info *fi) - { -- int fd; -+ int fd = -1; - struct lo_data *lo = lo_data(req); - struct lo_inode *parent_inode; -+ struct lo_inode *inode = NULL; - struct fuse_entry_param e; - int err; - struct lo_cred old = {}; -@@ -1788,36 +1828,38 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - - update_open_flags(lo->writeback, fi); - -- fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, -- mode); -+ /* Try to create a new file but don't open existing files */ -+ fd = openat(parent_inode->fd, name, fi->flags | O_CREAT | O_EXCL, mode); - err = fd == -1 ? errno : 0; -- lo_restore_cred(&old); - -- if (!err) { -- ssize_t fh; -+ lo_restore_cred(&old); - -- pthread_mutex_lock(&lo->mutex); -- fh = lo_add_fd_mapping(lo, fd); -- pthread_mutex_unlock(&lo->mutex); -- if (fh == -1) { -- close(fd); -- err = ENOMEM; -- goto out; -- } -+ /* Ignore the error if file exists and O_EXCL was not given */ -+ if (err && (err != EEXIST || (fi->flags & O_EXCL))) { -+ goto out; -+ } - -- fi->fh = fh; -- err = lo_do_lookup(req, parent, name, &e, NULL); -+ err = lo_do_lookup(req, parent, name, &e, &inode); -+ if (err) { -+ goto out; - } -- if (lo->cache == CACHE_NONE) { -- fi->direct_io = 1; -- } else if (lo->cache == CACHE_ALWAYS) { -- fi->keep_cache = 1; -+ -+ err = lo_do_open(lo, inode, fd, fi); -+ fd = -1; /* lo_do_open() takes ownership of fd */ -+ if (err) { -+ /* Undo lo_do_lookup() nlookup ref */ -+ unref_inode_lolocked(lo, inode, 1); - } - - out: -+ lo_inode_put(lo, &inode); - lo_inode_put(lo, &parent_inode); - - if (err) { -+ if (fd >= 0) { -+ close(fd); -+ } -+ - fuse_reply_err(req, err); - } else { - fuse_reply_create(req, &e, fi); -@@ -1831,7 +1873,6 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, - pid_t pid, int *err) - { - struct lo_inode_plock *plock; -- char procname[64]; - int fd; - - plock = -@@ -1848,12 +1889,10 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, - } - - /* Open another instance of file which can be used for ofd locks. */ -- sprintf(procname, "%i", inode->fd); -- - /* TODO: What if file is not writable? */ -- fd = openat(lo->proc_self_fd, procname, O_RDWR); -- if (fd == -1) { -- *err = errno; -+ fd = lo_inode_open(lo, inode, O_RDWR); -+ if (fd < 0) { -+ *err = -fd; - free(plock); - return NULL; - } -@@ -2000,7 +2039,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - return; - } - -- err = lo_do_open(lo, inode, fi); -+ err = lo_do_open(lo, inode, -1, fi); - lo_inode_put(lo, &inode); - if (err) { - fuse_reply_err(req, err); -@@ -2056,39 +2095,40 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, - struct fuse_file_info *fi) - { -+ struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_data *lo = lo_data(req); - int res; - int fd; -- char *buf; - - fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino, - (void *)fi); - -- if (!fi) { -- struct lo_data *lo = lo_data(req); -- -- res = asprintf(&buf, "%i", lo_fd(req, ino)); -- if (res == -1) { -- return (void)fuse_reply_err(req, errno); -- } -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } - -- fd = openat(lo->proc_self_fd, buf, O_RDWR); -- free(buf); -- if (fd == -1) { -- return (void)fuse_reply_err(req, errno); -+ if (!fi) { -+ fd = lo_inode_open(lo, inode, O_RDWR); -+ if (fd < 0) { -+ res = -fd; -+ goto out; - } - } else { - fd = lo_fi_fd(req, fi); - } - - if (datasync) { -- res = fdatasync(fd); -+ res = fdatasync(fd) == -1 ? errno : 0; - } else { -- res = fsync(fd); -+ res = fsync(fd) == -1 ? errno : 0; - } - if (!fi) { - close(fd); - } -- fuse_reply_err(req, res == -1 ? errno : 0); -+out: -+ lo_inode_put(lo, &inode); -+ fuse_reply_err(req, res); - } - - static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, --- -2.18.2 - diff --git a/SOURCES/kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch b/SOURCES/kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch deleted file mode 100644 index acafa41..0000000 --- a/SOURCES/kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch +++ /dev/null @@ -1,147 +0,0 @@ -From 2e58ff6978f8433fc8672d2e357c6f0f5f36d24f Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:07 +0100 -Subject: [PATCH 096/116] virtiofsd: prevent races with lo_dirp_put() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-93-dgilbert@redhat.com> -Patchwork-id: 93546 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 092/112] virtiofsd: prevent races with lo_dirp_put() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Introduce lo_dirp_put() so that FUSE_RELEASEDIR does not cause -use-after-free races with other threads that are accessing lo_dirp. - -Also make lo_releasedir() atomic to prevent FUSE_RELEASEDIR racing with -itself. This prevents double-frees. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit acefdde73b403576a241ebd8dbe8431ddc0d9442) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 41 ++++++++++++++++++++++++++++++++++------ - 1 file changed, 35 insertions(+), 6 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 690edbc..2d703b5 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1284,11 +1284,28 @@ static void lo_readlink(fuse_req_t req, fuse_ino_t ino) - } - - struct lo_dirp { -+ gint refcount; - DIR *dp; - struct dirent *entry; - off_t offset; - }; - -+static void lo_dirp_put(struct lo_dirp **dp) -+{ -+ struct lo_dirp *d = *dp; -+ -+ if (!d) { -+ return; -+ } -+ *dp = NULL; -+ -+ if (g_atomic_int_dec_and_test(&d->refcount)) { -+ closedir(d->dp); -+ free(d); -+ } -+} -+ -+/* Call lo_dirp_put() on the return value when no longer needed */ - static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi) - { - struct lo_data *lo = lo_data(req); -@@ -1296,6 +1313,9 @@ static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi) - - pthread_mutex_lock(&lo->mutex); - elem = lo_map_get(&lo->dirp_map, fi->fh); -+ if (elem) { -+ g_atomic_int_inc(&elem->dirp->refcount); -+ } - pthread_mutex_unlock(&lo->mutex); - if (!elem) { - return NULL; -@@ -1331,6 +1351,7 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, - d->offset = 0; - d->entry = NULL; - -+ g_atomic_int_set(&d->refcount, 1); /* paired with lo_releasedir() */ - pthread_mutex_lock(&lo->mutex); - fh = lo_add_dirp_mapping(req, d); - pthread_mutex_unlock(&lo->mutex); -@@ -1364,7 +1385,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - off_t offset, struct fuse_file_info *fi, int plus) - { - struct lo_data *lo = lo_data(req); -- struct lo_dirp *d; -+ struct lo_dirp *d = NULL; - struct lo_inode *dinode; - char *buf = NULL; - char *p; -@@ -1454,6 +1475,8 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - - err = 0; - error: -+ lo_dirp_put(&d); -+ - /* - * If there's an error, we can only signal it if we haven't stored - * any entries yet - otherwise we'd end up with wrong lookup -@@ -1484,22 +1507,25 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, - struct fuse_file_info *fi) - { - struct lo_data *lo = lo_data(req); -+ struct lo_map_elem *elem; - struct lo_dirp *d; - - (void)ino; - -- d = lo_dirp(req, fi); -- if (!d) { -+ pthread_mutex_lock(&lo->mutex); -+ elem = lo_map_get(&lo->dirp_map, fi->fh); -+ if (!elem) { -+ pthread_mutex_unlock(&lo->mutex); - fuse_reply_err(req, EBADF); - return; - } - -- pthread_mutex_lock(&lo->mutex); -+ d = elem->dirp; - lo_map_remove(&lo->dirp_map, fi->fh); - pthread_mutex_unlock(&lo->mutex); - -- closedir(d->dp); -- free(d); -+ lo_dirp_put(&d); /* paired with lo_opendir() */ -+ - fuse_reply_err(req, 0); - } - -@@ -1710,6 +1736,9 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, - } else { - res = fsync(fd); - } -+ -+ lo_dirp_put(&d); -+ - fuse_reply_err(req, res == -1 ? errno : 0); - } - --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch b/SOURCES/kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch deleted file mode 100644 index 056559d..0000000 --- a/SOURCES/kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch +++ /dev/null @@ -1,469 +0,0 @@ -From 5c9bbd00e8f8c944d9e8e22e7d1cf08cb8fddd6b Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:37 +0100 -Subject: [PATCH 066/116] virtiofsd: print log only when priority is high - enough -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-63-dgilbert@redhat.com> -Patchwork-id: 93518 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 062/112] virtiofsd: print log only when priority is high enough -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Eryu Guan - -Introduce "-o log_level=" command line option to specify current log -level (priority), valid values are "debug info warn err", e.g. - - ./virtiofsd -o log_level=debug ... - -So only log priority higher than "debug" will be printed to -stderr/syslog. And the default level is info. - -The "-o debug"/"-d" options are kept, and imply debug log level. - -Signed-off-by: Eryu Guan -dgilbert: Reworked for libfuse's log_func -Signed-off-by: Dr. David Alan Gilbert -with fix by: -Signed-off-by: Xiao Yang -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit d240314a1a18a1d914af1b5763fe8c9a572e6409) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 75 ++++++++++--------------- - tools/virtiofsd/fuse_lowlevel.h | 1 + - tools/virtiofsd/helper.c | 8 ++- - tools/virtiofsd/passthrough_ll.c | 118 ++++++++++++++++----------------------- - 4 files changed, 87 insertions(+), 115 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 6ceb33d..a7a1968 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -158,19 +158,17 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, - struct fuse_out_header *out = iov[0].iov_base; - - out->len = iov_length(iov, count); -- if (se->debug) { -- if (out->unique == 0) { -- fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error, -- out->len); -- } else if (out->error) { -- fuse_log(FUSE_LOG_DEBUG, -- " unique: %llu, error: %i (%s), outsize: %i\n", -- (unsigned long long)out->unique, out->error, -- strerror(-out->error), out->len); -- } else { -- fuse_log(FUSE_LOG_DEBUG, " unique: %llu, success, outsize: %i\n", -- (unsigned long long)out->unique, out->len); -- } -+ if (out->unique == 0) { -+ fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error, -+ out->len); -+ } else if (out->error) { -+ fuse_log(FUSE_LOG_DEBUG, -+ " unique: %llu, error: %i (%s), outsize: %i\n", -+ (unsigned long long)out->unique, out->error, -+ strerror(-out->error), out->len); -+ } else { -+ fuse_log(FUSE_LOG_DEBUG, " unique: %llu, success, outsize: %i\n", -+ (unsigned long long)out->unique, out->len); - } - - if (fuse_lowlevel_is_virtio(se)) { -@@ -1662,10 +1660,8 @@ static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, - return; - } - -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", -- (unsigned long long)arg->unique); -- } -+ fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", -+ (unsigned long long)arg->unique); - - req->u.i.unique = arg->unique; - -@@ -1901,13 +1897,10 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, - } - } - -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); -- if (arg->major == 7 && arg->minor >= 6) { -- fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); -- fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", -- arg->max_readahead); -- } -+ fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); -+ if (arg->major == 7 && arg->minor >= 6) { -+ fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); -+ fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", arg->max_readahead); - } - se->conn.proto_major = arg->major; - se->conn.proto_minor = arg->minor; -@@ -2116,19 +2109,14 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, - outarg.congestion_threshold = se->conn.congestion_threshold; - outarg.time_gran = se->conn.time_gran; - -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, -- outarg.minor); -- fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); -- fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", -- outarg.max_readahead); -- fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); -- fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", -- outarg.max_background); -- fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", -- outarg.congestion_threshold); -- fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); -- } -+ fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor); -+ fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); -+ fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", outarg.max_readahead); -+ fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); -+ fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", outarg.max_background); -+ fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", -+ outarg.congestion_threshold); -+ fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); - - send_reply_ok(req, &outarg, outargsize); - } -@@ -2407,14 +2395,11 @@ void fuse_session_process_buf_int(struct fuse_session *se, - in = fuse_mbuf_iter_advance(&iter, sizeof(*in)); - assert(in); /* caller guarantees the input buffer is large enough */ - -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, -- "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, " -- "pid: %u\n", -- (unsigned long long)in->unique, -- opname((enum fuse_opcode)in->opcode), in->opcode, -- (unsigned long long)in->nodeid, buf->size, in->pid); -- } -+ fuse_log( -+ FUSE_LOG_DEBUG, -+ "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n", -+ (unsigned long long)in->unique, opname((enum fuse_opcode)in->opcode), -+ in->opcode, (unsigned long long)in->nodeid, buf->size, in->pid); - - req = fuse_ll_alloc_req(se); - if (req == NULL) { -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index f2750bc..138041e 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1796,6 +1796,7 @@ struct fuse_cmdline_opts { - int show_help; - int print_capabilities; - int syslog; -+ int log_level; - unsigned int max_idle_threads; - }; - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 9692ef9..6d50a46 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -34,7 +34,6 @@ - t, offsetof(struct fuse_cmdline_opts, p), v \ - } - -- - static const struct fuse_opt fuse_helper_opts[] = { - FUSE_HELPER_OPT("-h", show_help), - FUSE_HELPER_OPT("--help", show_help), -@@ -55,6 +54,10 @@ static const struct fuse_opt fuse_helper_opts[] = { - FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), - FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), - FUSE_HELPER_OPT("--syslog", syslog), -+ FUSE_HELPER_OPT_VALUE("log_level=debug", log_level, FUSE_LOG_DEBUG), -+ FUSE_HELPER_OPT_VALUE("log_level=info", log_level, FUSE_LOG_INFO), -+ FUSE_HELPER_OPT_VALUE("log_level=warn", log_level, FUSE_LOG_WARNING), -+ FUSE_HELPER_OPT_VALUE("log_level=err", log_level, FUSE_LOG_ERR), - FUSE_OPT_END - }; - -@@ -142,6 +145,9 @@ void fuse_cmdline_help(void) - " --syslog log to syslog (default stderr)\n" - " -f foreground operation\n" - " --daemonize run in background\n" -+ " -o log_level= log level, default to \"info\"\n" -+ " level could be one of \"debug, " -+ "info, warn, err\"\n" - " -o max_idle_threads the maximum number of idle worker " - "threads\n" - " allowed (default: 10)\n" -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 0372aca..ff6910f 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -37,6 +37,7 @@ - - #include "qemu/osdep.h" - #include "fuse_virtio.h" -+#include "fuse_log.h" - #include "fuse_lowlevel.h" - #include - #include -@@ -140,6 +141,7 @@ static const struct fuse_opt lo_opts[] = { - FUSE_OPT_END - }; - static bool use_syslog = false; -+static int current_log_level; - - static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); - -@@ -458,11 +460,6 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino) - return inode ? inode->fd : -1; - } - --static bool lo_debug(fuse_req_t req) --{ -- return lo_data(req)->debug != 0; --} -- - static void lo_init(void *userdata, struct fuse_conn_info *conn) - { - struct lo_data *lo = (struct lo_data *)userdata; -@@ -472,15 +469,11 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) - } - - if (lo->writeback && conn->capable & FUSE_CAP_WRITEBACK_CACHE) { -- if (lo->debug) { -- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); - conn->want |= FUSE_CAP_WRITEBACK_CACHE; - } - if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { -- if (lo->debug) { -- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); - conn->want |= FUSE_CAP_FLOCK_LOCKS; - } - } -@@ -823,10 +816,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - } - e->ino = inode->fuse_ino; - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -- (unsigned long long)parent, name, (unsigned long long)e->ino); -- } -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, -+ name, (unsigned long long)e->ino); - - return 0; - -@@ -843,10 +834,8 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) - struct fuse_entry_param e; - int err; - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", -- parent, name); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", parent, -+ name); - - /* - * Don't use is_safe_path_component(), allow "." and ".." for NFS export -@@ -971,10 +960,8 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - goto out; - } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -- (unsigned long long)parent, name, (unsigned long long)e.ino); -- } -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, -+ name, (unsigned long long)e.ino); - - fuse_reply_entry(req, &e); - return; -@@ -1074,10 +1061,8 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - pthread_mutex_unlock(&lo->mutex); - e.ino = inode->fuse_ino; - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -- (unsigned long long)parent, name, (unsigned long long)e.ino); -- } -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, -+ name, (unsigned long long)e.ino); - - fuse_reply_entry(req, &e); - return; -@@ -1171,11 +1156,9 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - return; - } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", -- (unsigned long long)ino, (unsigned long long)inode->refcount, -- (unsigned long long)nlookup); -- } -+ fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", -+ (unsigned long long)ino, (unsigned long long)inode->refcount, -+ (unsigned long long)nlookup); - - unref_inode(lo, inode, nlookup); - } -@@ -1445,10 +1428,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - int err; - struct lo_cred old = {}; - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", -- parent, name); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", parent, -+ name); - - if (!is_safe_path_component(name)) { - fuse_reply_err(req, EINVAL); -@@ -1525,10 +1506,8 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - char buf[64]; - struct lo_data *lo = lo_data(req); - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, -- fi->flags); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, -+ fi->flags); - - /* - * With writeback cache, kernel may send read requests even -@@ -1644,12 +1623,10 @@ static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, - { - struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, -- "lo_read(ino=%" PRIu64 ", size=%zd, " -- "off=%lu)\n", -- ino, size, (unsigned long)offset); -- } -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_read(ino=%" PRIu64 ", size=%zd, " -+ "off=%lu)\n", -+ ino, size, (unsigned long)offset); - - buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; - buf.buf[0].fd = lo_fi_fd(req, fi); -@@ -1671,11 +1648,9 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, - out_buf.buf[0].fd = lo_fi_fd(req, fi); - out_buf.buf[0].pos = off; - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, -- "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, -- out_buf.buf[0].size, (unsigned long)off); -- } -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, -+ out_buf.buf[0].size, (unsigned long)off); - - /* - * If kill_priv is set, drop CAP_FSETID which should lead to kernel -@@ -1774,11 +1749,8 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - goto out; - } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, -- "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", ino, name, -- size); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", -+ ino, name, size); - - if (inode->is_symlink) { - /* Sorry, no race free way to getxattr on symlink. */ -@@ -1852,10 +1824,8 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - goto out; - } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", -- ino, size); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino, -+ size); - - if (inode->is_symlink) { - /* Sorry, no race free way to listxattr on symlink. */ -@@ -1929,11 +1899,8 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - goto out; - } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, -- "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", -- ino, name, value, size); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 -+ ", name=%s value=%s size=%zd)\n", ino, name, value, size); - - if (inode->is_symlink) { - /* Sorry, no race free way to setxattr on symlink. */ -@@ -1978,10 +1945,8 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) - goto out; - } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", -- ino, name); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino, -+ name); - - if (inode->is_symlink) { - /* Sorry, no race free way to setxattr on symlink. */ -@@ -2303,6 +2268,10 @@ static void setup_nofile_rlimit(void) - - static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) - { -+ if (current_log_level < level) { -+ return; -+ } -+ - if (use_syslog) { - int priority = LOG_ERR; - switch (level) { -@@ -2401,8 +2370,19 @@ int main(int argc, char *argv[]) - return 1; - } - -+ /* -+ * log_level is 0 if not configured via cmd options (0 is LOG_EMERG, -+ * and we don't use this log level). -+ */ -+ if (opts.log_level != 0) { -+ current_log_level = opts.log_level; -+ } - lo.debug = opts.debug; -+ if (lo.debug) { -+ current_log_level = FUSE_LOG_DEBUG; -+ } - lo.root.refcount = 2; -+ - if (lo.source) { - struct stat stat; - int res; --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-process-requests-in-a-thread-pool.patch b/SOURCES/kvm-virtiofsd-process-requests-in-a-thread-pool.patch deleted file mode 100644 index 87fff99..0000000 --- a/SOURCES/kvm-virtiofsd-process-requests-in-a-thread-pool.patch +++ /dev/null @@ -1,533 +0,0 @@ -From b0db5e666aaa43eadff3e60a1ada704f33b03074 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:19 +0100 -Subject: [PATCH 108/116] virtiofsd: process requests in a thread pool -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-105-dgilbert@redhat.com> -Patchwork-id: 93554 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 104/112] virtiofsd: process requests in a thread pool -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Introduce a thread pool so that fv_queue_thread() just pops -VuVirtqElements and hands them to the thread pool. For the time being -only one worker thread is allowed since passthrough_ll.c is not -thread-safe yet. Future patches will lift this restriction so that -multiple FUSE requests can be processed in parallel. - -The main new concept is struct FVRequest, which contains both -VuVirtqElement and struct fuse_chan. We now have fv_VuDev for a device, -fv_QueueInfo for a virtqueue, and FVRequest for a request. Some of -fv_QueueInfo's fields are moved into FVRequest because they are -per-request. The name FVRequest conforms to QEMU coding style and I -expect the struct fv_* types will be renamed in a future refactoring. - -This patch series is not optimal. fbuf reuse is dropped so each request -does malloc(se->bufsize), but there is no clean and cheap way to keep -this with a thread pool. The vq_lock mutex is held for longer than -necessary, especially during the eventfd_write() syscall. Performance -can be improved in the future. - -prctl(2) had to be added to the seccomp whitelist because glib invokes -it. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit a3d756c5aecccc4c0e51060a7e2f1c87bf8f1180) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 359 +++++++++++++++++++++++------------------- - 1 file changed, 201 insertions(+), 158 deletions(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index f6242f9..0dcf2ef 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -22,6 +22,7 @@ - - #include - #include -+#include - #include - #include - #include -@@ -37,17 +38,28 @@ - struct fv_VuDev; - struct fv_QueueInfo { - pthread_t thread; -+ /* -+ * This lock protects the VuVirtq preventing races between -+ * fv_queue_thread() and fv_queue_worker(). -+ */ -+ pthread_mutex_t vq_lock; -+ - struct fv_VuDev *virtio_dev; - - /* Our queue index, corresponds to array position */ - int qidx; - int kick_fd; - int kill_fd; /* For killing the thread */ -+}; - -- /* The element for the command currently being processed */ -- VuVirtqElement *qe; -+/* A FUSE request */ -+typedef struct { -+ VuVirtqElement elem; -+ struct fuse_chan ch; -+ -+ /* Used to complete requests that involve no reply */ - bool reply_sent; --}; -+} FVRequest; - - /* - * We pass the dev element into libvhost-user -@@ -191,8 +203,11 @@ static void copy_iov(struct iovec *src_iov, int src_count, - int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, - struct iovec *iov, int count) - { -- VuVirtqElement *elem; -- VuVirtq *q; -+ FVRequest *req = container_of(ch, FVRequest, ch); -+ struct fv_QueueInfo *qi = ch->qi; -+ VuDev *dev = &se->virtio_dev->dev; -+ VuVirtq *q = vu_get_queue(dev, qi->qidx); -+ VuVirtqElement *elem = &req->elem; - int ret = 0; - - assert(count >= 1); -@@ -205,11 +220,7 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, - - /* unique == 0 is notification, which we don't support */ - assert(out->unique); -- /* For virtio we always have ch */ -- assert(ch); -- assert(!ch->qi->reply_sent); -- elem = ch->qi->qe; -- q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; -+ assert(!req->reply_sent); - - /* The 'in' part of the elem is to qemu */ - unsigned int in_num = elem->in_num; -@@ -236,9 +247,15 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, - } - - copy_iov(iov, count, in_sg, in_num, tosend_len); -- vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); -- vu_queue_notify(&se->virtio_dev->dev, q); -- ch->qi->reply_sent = true; -+ -+ pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); -+ pthread_mutex_lock(&qi->vq_lock); -+ vu_queue_push(dev, q, elem, tosend_len); -+ vu_queue_notify(dev, q); -+ pthread_mutex_unlock(&qi->vq_lock); -+ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); -+ -+ req->reply_sent = true; - - err: - return ret; -@@ -254,9 +271,12 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - struct iovec *iov, int count, struct fuse_bufvec *buf, - size_t len) - { -+ FVRequest *req = container_of(ch, FVRequest, ch); -+ struct fv_QueueInfo *qi = ch->qi; -+ VuDev *dev = &se->virtio_dev->dev; -+ VuVirtq *q = vu_get_queue(dev, qi->qidx); -+ VuVirtqElement *elem = &req->elem; - int ret = 0; -- VuVirtqElement *elem; -- VuVirtq *q; - - assert(count >= 1); - assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); -@@ -275,11 +295,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - /* unique == 0 is notification which we don't support */ - assert(out->unique); - -- /* For virtio we always have ch */ -- assert(ch); -- assert(!ch->qi->reply_sent); -- elem = ch->qi->qe; -- q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; -+ assert(!req->reply_sent); - - /* The 'in' part of the elem is to qemu */ - unsigned int in_num = elem->in_num; -@@ -395,33 +411,175 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - - ret = 0; - -- vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); -- vu_queue_notify(&se->virtio_dev->dev, q); -+ pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); -+ pthread_mutex_lock(&qi->vq_lock); -+ vu_queue_push(dev, q, elem, tosend_len); -+ vu_queue_notify(dev, q); -+ pthread_mutex_unlock(&qi->vq_lock); -+ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); - - err: - if (ret == 0) { -- ch->qi->reply_sent = true; -+ req->reply_sent = true; - } - - return ret; - } - -+/* Process one FVRequest in a thread pool */ -+static void fv_queue_worker(gpointer data, gpointer user_data) -+{ -+ struct fv_QueueInfo *qi = user_data; -+ struct fuse_session *se = qi->virtio_dev->se; -+ struct VuDev *dev = &qi->virtio_dev->dev; -+ FVRequest *req = data; -+ VuVirtqElement *elem = &req->elem; -+ struct fuse_buf fbuf = {}; -+ bool allocated_bufv = false; -+ struct fuse_bufvec bufv; -+ struct fuse_bufvec *pbufv; -+ -+ assert(se->bufsize > sizeof(struct fuse_in_header)); -+ -+ /* -+ * An element contains one request and the space to send our response -+ * They're spread over multiple descriptors in a scatter/gather set -+ * and we can't trust the guest to keep them still; so copy in/out. -+ */ -+ fbuf.mem = malloc(se->bufsize); -+ assert(fbuf.mem); -+ -+ fuse_mutex_init(&req->ch.lock); -+ req->ch.fd = -1; -+ req->ch.qi = qi; -+ -+ /* The 'out' part of the elem is from qemu */ -+ unsigned int out_num = elem->out_num; -+ struct iovec *out_sg = elem->out_sg; -+ size_t out_len = iov_size(out_sg, out_num); -+ fuse_log(FUSE_LOG_DEBUG, -+ "%s: elem %d: with %d out desc of length %zd\n", -+ __func__, elem->index, out_num, out_len); -+ -+ /* -+ * The elem should contain a 'fuse_in_header' (in to fuse) -+ * plus the data based on the len in the header. -+ */ -+ if (out_len < sizeof(struct fuse_in_header)) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n", -+ __func__, elem->index); -+ assert(0); /* TODO */ -+ } -+ if (out_len > se->bufsize) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", __func__, -+ elem->index); -+ assert(0); /* TODO */ -+ } -+ /* Copy just the first element and look at it */ -+ copy_from_iov(&fbuf, 1, out_sg); -+ -+ pbufv = NULL; /* Compiler thinks an unitialised path */ -+ if (out_num > 2 && -+ out_sg[0].iov_len == sizeof(struct fuse_in_header) && -+ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && -+ out_sg[1].iov_len == sizeof(struct fuse_write_in)) { -+ /* -+ * For a write we don't actually need to copy the -+ * data, we can just do it straight out of guest memory -+ * but we must still copy the headers in case the guest -+ * was nasty and changed them while we were using them. -+ */ -+ fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__); -+ -+ /* copy the fuse_write_in header afte rthe fuse_in_header */ -+ fbuf.mem += out_sg->iov_len; -+ copy_from_iov(&fbuf, 1, out_sg + 1); -+ fbuf.mem -= out_sg->iov_len; -+ fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; -+ -+ /* Allocate the bufv, with space for the rest of the iov */ -+ pbufv = malloc(sizeof(struct fuse_bufvec) + -+ sizeof(struct fuse_buf) * (out_num - 2)); -+ if (!pbufv) { -+ fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", -+ __func__); -+ goto out; -+ } -+ -+ allocated_bufv = true; -+ pbufv->count = 1; -+ pbufv->buf[0] = fbuf; -+ -+ size_t iovindex, pbufvindex; -+ iovindex = 2; /* 2 headers, separate iovs */ -+ pbufvindex = 1; /* 2 headers, 1 fusebuf */ -+ -+ for (; iovindex < out_num; iovindex++, pbufvindex++) { -+ pbufv->count++; -+ pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ -+ pbufv->buf[pbufvindex].flags = 0; -+ pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; -+ pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; -+ } -+ } else { -+ /* Normal (non fast write) path */ -+ -+ /* Copy the rest of the buffer */ -+ fbuf.mem += out_sg->iov_len; -+ copy_from_iov(&fbuf, out_num - 1, out_sg + 1); -+ fbuf.mem -= out_sg->iov_len; -+ fbuf.size = out_len; -+ -+ /* TODO! Endianness of header */ -+ -+ /* TODO: Add checks for fuse_session_exited */ -+ bufv.buf[0] = fbuf; -+ bufv.count = 1; -+ pbufv = &bufv; -+ } -+ pbufv->idx = 0; -+ pbufv->off = 0; -+ fuse_session_process_buf_int(se, pbufv, &req->ch); -+ -+out: -+ if (allocated_bufv) { -+ free(pbufv); -+ } -+ -+ /* If the request has no reply, still recycle the virtqueue element */ -+ if (!req->reply_sent) { -+ struct VuVirtq *q = vu_get_queue(dev, qi->qidx); -+ -+ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", __func__, -+ elem->index); -+ -+ pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); -+ pthread_mutex_lock(&qi->vq_lock); -+ vu_queue_push(dev, q, elem, 0); -+ vu_queue_notify(dev, q); -+ pthread_mutex_unlock(&qi->vq_lock); -+ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); -+ } -+ -+ pthread_mutex_destroy(&req->ch.lock); -+ free(fbuf.mem); -+ free(req); -+} -+ - /* Thread function for individual queues, created when a queue is 'started' */ - static void *fv_queue_thread(void *opaque) - { - struct fv_QueueInfo *qi = opaque; - struct VuDev *dev = &qi->virtio_dev->dev; - struct VuVirtq *q = vu_get_queue(dev, qi->qidx); -- struct fuse_session *se = qi->virtio_dev->se; -- struct fuse_chan ch; -- struct fuse_buf fbuf; -+ GThreadPool *pool; - -- fbuf.mem = NULL; -- fbuf.flags = 0; -- -- fuse_mutex_init(&ch.lock); -- ch.fd = (int)0xdaff0d111; -- ch.qi = qi; -+ pool = g_thread_pool_new(fv_queue_worker, qi, 1 /* TODO max_threads */, -+ TRUE, NULL); -+ if (!pool) { -+ fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__); -+ return NULL; -+ } - - fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, - qi->qidx, qi->kick_fd); -@@ -478,6 +636,7 @@ static void *fv_queue_thread(void *opaque) - /* Mutual exclusion with virtio_loop() */ - ret = pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); - assert(ret == 0); /* there is no possible error case */ -+ pthread_mutex_lock(&qi->vq_lock); - /* out is from guest, in is too guest */ - unsigned int in_bytes, out_bytes; - vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0); -@@ -486,141 +645,22 @@ static void *fv_queue_thread(void *opaque) - "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n", - __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); - -- - while (1) { -- bool allocated_bufv = false; -- struct fuse_bufvec bufv; -- struct fuse_bufvec *pbufv; -- -- /* -- * An element contains one request and the space to send our -- * response They're spread over multiple descriptors in a -- * scatter/gather set and we can't trust the guest to keep them -- * still; so copy in/out. -- */ -- VuVirtqElement *elem = vu_queue_pop(dev, q, sizeof(VuVirtqElement)); -- if (!elem) { -+ FVRequest *req = vu_queue_pop(dev, q, sizeof(FVRequest)); -+ if (!req) { - break; - } - -- qi->qe = elem; -- qi->reply_sent = false; -+ req->reply_sent = false; - -- if (!fbuf.mem) { -- fbuf.mem = malloc(se->bufsize); -- assert(fbuf.mem); -- assert(se->bufsize > sizeof(struct fuse_in_header)); -- } -- /* The 'out' part of the elem is from qemu */ -- unsigned int out_num = elem->out_num; -- struct iovec *out_sg = elem->out_sg; -- size_t out_len = iov_size(out_sg, out_num); -- fuse_log(FUSE_LOG_DEBUG, -- "%s: elem %d: with %d out desc of length %zd\n", __func__, -- elem->index, out_num, out_len); -- -- /* -- * The elem should contain a 'fuse_in_header' (in to fuse) -- * plus the data based on the len in the header. -- */ -- if (out_len < sizeof(struct fuse_in_header)) { -- fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n", -- __func__, elem->index); -- assert(0); /* TODO */ -- } -- if (out_len > se->bufsize) { -- fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", -- __func__, elem->index); -- assert(0); /* TODO */ -- } -- /* Copy just the first element and look at it */ -- copy_from_iov(&fbuf, 1, out_sg); -- -- if (out_num > 2 && -- out_sg[0].iov_len == sizeof(struct fuse_in_header) && -- ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && -- out_sg[1].iov_len == sizeof(struct fuse_write_in)) { -- /* -- * For a write we don't actually need to copy the -- * data, we can just do it straight out of guest memory -- * but we must still copy the headers in case the guest -- * was nasty and changed them while we were using them. -- */ -- fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__); -- -- /* copy the fuse_write_in header after the fuse_in_header */ -- fbuf.mem += out_sg->iov_len; -- copy_from_iov(&fbuf, 1, out_sg + 1); -- fbuf.mem -= out_sg->iov_len; -- fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; -- -- /* Allocate the bufv, with space for the rest of the iov */ -- allocated_bufv = true; -- pbufv = malloc(sizeof(struct fuse_bufvec) + -- sizeof(struct fuse_buf) * (out_num - 2)); -- if (!pbufv) { -- vu_queue_unpop(dev, q, elem, 0); -- free(elem); -- fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", -- __func__); -- goto out; -- } -- -- pbufv->count = 1; -- pbufv->buf[0] = fbuf; -- -- size_t iovindex, pbufvindex; -- iovindex = 2; /* 2 headers, separate iovs */ -- pbufvindex = 1; /* 2 headers, 1 fusebuf */ -- -- for (; iovindex < out_num; iovindex++, pbufvindex++) { -- pbufv->count++; -- pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ -- pbufv->buf[pbufvindex].flags = 0; -- pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; -- pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; -- } -- } else { -- /* Normal (non fast write) path */ -- -- /* Copy the rest of the buffer */ -- fbuf.mem += out_sg->iov_len; -- copy_from_iov(&fbuf, out_num - 1, out_sg + 1); -- fbuf.mem -= out_sg->iov_len; -- fbuf.size = out_len; -- -- /* TODO! Endianness of header */ -- -- /* TODO: Add checks for fuse_session_exited */ -- bufv.buf[0] = fbuf; -- bufv.count = 1; -- pbufv = &bufv; -- } -- pbufv->idx = 0; -- pbufv->off = 0; -- fuse_session_process_buf_int(se, pbufv, &ch); -- -- if (allocated_bufv) { -- free(pbufv); -- } -- -- if (!qi->reply_sent) { -- fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", -- __func__, elem->index); -- /* I think we've still got to recycle the element */ -- vu_queue_push(dev, q, elem, 0); -- vu_queue_notify(dev, q); -- } -- qi->qe = NULL; -- free(elem); -- elem = NULL; -+ g_thread_pool_push(pool, req, NULL); - } - -+ pthread_mutex_unlock(&qi->vq_lock); - pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); - } --out: -- pthread_mutex_destroy(&ch.lock); -- free(fbuf.mem); -+ -+ g_thread_pool_free(pool, FALSE, TRUE); - - return NULL; - } -@@ -643,6 +683,7 @@ static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx) - fuse_log(FUSE_LOG_ERR, "%s: Failed to join thread idx %d err %d\n", - __func__, qidx, ret); - } -+ pthread_mutex_destroy(&ourqi->vq_lock); - close(ourqi->kill_fd); - ourqi->kick_fd = -1; - free(vud->qi[qidx]); -@@ -696,6 +737,8 @@ static void fv_queue_set_started(VuDev *dev, int qidx, bool started) - - ourqi->kill_fd = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE); - assert(ourqi->kill_fd != -1); -+ pthread_mutex_init(&ourqi->vq_lock, NULL); -+ - if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) { - fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n", - __func__, qidx); --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-remove-mountpoint-dummy-argument.patch b/SOURCES/kvm-virtiofsd-remove-mountpoint-dummy-argument.patch deleted file mode 100644 index 181e32d..0000000 --- a/SOURCES/kvm-virtiofsd-remove-mountpoint-dummy-argument.patch +++ /dev/null @@ -1,159 +0,0 @@ -From a8a1835a82510be7d2d6edcc28a60e506a2cedad Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:46 +0100 -Subject: [PATCH 015/116] virtiofsd: remove mountpoint dummy argument -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-12-dgilbert@redhat.com> -Patchwork-id: 93466 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 011/112] virtiofsd: remove mountpoint dummy argument -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Classic FUSE file system daemons take a mountpoint argument but -virtiofsd exposes a vhost-user UNIX domain socket instead. The -mountpoint argument is not used by virtiofsd but the user is still -required to pass a dummy argument on the command-line. - -Remove the mountpoint argument to clean up the command-line. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 67aab02272f6cb47c56420f60b370c184961b5ca) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 2 +- - tools/virtiofsd/fuse_lowlevel.h | 4 +--- - tools/virtiofsd/helper.c | 20 +++----------------- - tools/virtiofsd/passthrough_ll.c | 12 ++---------- - 4 files changed, 7 insertions(+), 31 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 5c9cb52..2f32c68 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2455,7 +2455,7 @@ out1: - return NULL; - } - --int fuse_session_mount(struct fuse_session *se, const char *mountpoint) -+int fuse_session_mount(struct fuse_session *se) - { - int fd; - -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index adb9054..8d8909b 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1863,7 +1863,6 @@ struct fuse_cmdline_opts { - int foreground; - int debug; - int nodefault_subtype; -- char *mountpoint; - int show_version; - int show_help; - unsigned int max_idle_threads; -@@ -1924,12 +1923,11 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - /** - * Mount a FUSE file system. - * -- * @param mountpoint the mount point path - * @param se session object - * - * @return 0 on success, -1 on failure. - **/ --int fuse_session_mount(struct fuse_session *se, const char *mountpoint); -+int fuse_session_mount(struct fuse_session *se); - - /** - * Enter a single threaded, blocking event loop. -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 5711dd2..5e6f205 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -140,27 +140,13 @@ void fuse_cmdline_help(void) - static int fuse_helper_opt_proc(void *data, const char *arg, int key, - struct fuse_args *outargs) - { -+ (void)data; - (void)outargs; -- struct fuse_cmdline_opts *opts = data; - - switch (key) { - case FUSE_OPT_KEY_NONOPT: -- if (!opts->mountpoint) { -- if (fuse_mnt_parse_fuse_fd(arg) != -1) { -- return fuse_opt_add_opt(&opts->mountpoint, arg); -- } -- -- char mountpoint[PATH_MAX] = ""; -- if (realpath(arg, mountpoint) == NULL) { -- fuse_log(FUSE_LOG_ERR, "fuse: bad mount point `%s': %s\n", arg, -- strerror(errno)); -- return -1; -- } -- return fuse_opt_add_opt(&opts->mountpoint, mountpoint); -- } else { -- fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); -- return -1; -- } -+ fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); -+ return -1; - - default: - /* Pass through unknown options */ -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index c5850ef..9377718 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1297,7 +1297,7 @@ int main(int argc, char *argv[]) - return 1; - } - if (opts.show_help) { -- printf("usage: %s [options] \n\n", argv[0]); -+ printf("usage: %s [options]\n\n", argv[0]); - fuse_cmdline_help(); - fuse_lowlevel_help(); - ret = 0; -@@ -1308,13 +1308,6 @@ int main(int argc, char *argv[]) - goto err_out1; - } - -- if (opts.mountpoint == NULL) { -- printf("usage: %s [options] \n", argv[0]); -- printf(" %s --help\n", argv[0]); -- ret = 1; -- goto err_out1; -- } -- - if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { - return 1; - } -@@ -1374,7 +1367,7 @@ int main(int argc, char *argv[]) - goto err_out2; - } - -- if (fuse_session_mount(se, opts.mountpoint) != 0) { -+ if (fuse_session_mount(se) != 0) { - goto err_out3; - } - -@@ -1393,7 +1386,6 @@ err_out3: - err_out2: - fuse_session_destroy(se); - err_out1: -- free(opts.mountpoint); - fuse_opt_free_args(&args); - - if (lo.root.fd >= 0) { --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-remove-unused-notify-reply-support.patch b/SOURCES/kvm-virtiofsd-remove-unused-notify-reply-support.patch deleted file mode 100644 index 98fb968..0000000 --- a/SOURCES/kvm-virtiofsd-remove-unused-notify-reply-support.patch +++ /dev/null @@ -1,294 +0,0 @@ -From e5534c0d4b866f61dbafa8d2422a24ab956189c1 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:47 +0100 -Subject: [PATCH 016/116] virtiofsd: remove unused notify reply support -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-13-dgilbert@redhat.com> -Patchwork-id: 93467 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 012/112] virtiofsd: remove unused notify reply support -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Notify reply support is unused by virtiofsd. The code would need to be -updated to validate input buffer sizes. Remove this unused code since -changes to it are untestable. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 64c6f408a29ef03e9b8da9f5a5d8fd511b0d801e) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 147 +--------------------------------------- - tools/virtiofsd/fuse_lowlevel.h | 47 ------------- - 2 files changed, 1 insertion(+), 193 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 2f32c68..eb0ec49 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -31,12 +31,6 @@ - #define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) - #define OFFSET_MAX 0x7fffffffffffffffLL - --#define container_of(ptr, type, member) \ -- ({ \ -- const typeof(((type *)0)->member) *__mptr = (ptr); \ -- (type *)((char *)__mptr - offsetof(type, member)); \ -- }) -- - struct fuse_pollhandle { - uint64_t kh; - struct fuse_session *se; -@@ -1862,52 +1856,6 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - send_reply_ok(req, NULL, 0); - } - --static void list_del_nreq(struct fuse_notify_req *nreq) --{ -- struct fuse_notify_req *prev = nreq->prev; -- struct fuse_notify_req *next = nreq->next; -- prev->next = next; -- next->prev = prev; --} -- --static void list_add_nreq(struct fuse_notify_req *nreq, -- struct fuse_notify_req *next) --{ -- struct fuse_notify_req *prev = next->prev; -- nreq->next = next; -- nreq->prev = prev; -- prev->next = nreq; -- next->prev = nreq; --} -- --static void list_init_nreq(struct fuse_notify_req *nreq) --{ -- nreq->next = nreq; -- nreq->prev = nreq; --} -- --static void do_notify_reply(fuse_req_t req, fuse_ino_t nodeid, -- const void *inarg, const struct fuse_buf *buf) --{ -- struct fuse_session *se = req->se; -- struct fuse_notify_req *nreq; -- struct fuse_notify_req *head; -- -- pthread_mutex_lock(&se->lock); -- head = &se->notify_list; -- for (nreq = head->next; nreq != head; nreq = nreq->next) { -- if (nreq->unique == req->unique) { -- list_del_nreq(nreq); -- break; -- } -- } -- pthread_mutex_unlock(&se->lock); -- -- if (nreq != head) { -- nreq->reply(nreq, req, nodeid, inarg, buf); -- } --} -- - static int send_notify_iov(struct fuse_session *se, int notify_code, - struct iovec *iov, int count) - { -@@ -2059,95 +2007,6 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, - return res; - } - --struct fuse_retrieve_req { -- struct fuse_notify_req nreq; -- void *cookie; --}; -- --static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, fuse_req_t req, -- fuse_ino_t ino, const void *inarg, -- const struct fuse_buf *ibuf) --{ -- struct fuse_session *se = req->se; -- struct fuse_retrieve_req *rreq = -- container_of(nreq, struct fuse_retrieve_req, nreq); -- const struct fuse_notify_retrieve_in *arg = inarg; -- struct fuse_bufvec bufv = { -- .buf[0] = *ibuf, -- .count = 1, -- }; -- -- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { -- bufv.buf[0].mem = PARAM(arg); -- } -- -- bufv.buf[0].size -= -- sizeof(struct fuse_in_header) + sizeof(struct fuse_notify_retrieve_in); -- -- if (bufv.buf[0].size < arg->size) { -- fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); -- fuse_reply_none(req); -- goto out; -- } -- bufv.buf[0].size = arg->size; -- -- if (se->op.retrieve_reply) { -- se->op.retrieve_reply(req, rreq->cookie, ino, arg->offset, &bufv); -- } else { -- fuse_reply_none(req); -- } --out: -- free(rreq); --} -- --int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, -- size_t size, off_t offset, void *cookie) --{ -- struct fuse_notify_retrieve_out outarg; -- struct iovec iov[2]; -- struct fuse_retrieve_req *rreq; -- int err; -- -- if (!se) { -- return -EINVAL; -- } -- -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { -- return -ENOSYS; -- } -- -- rreq = malloc(sizeof(*rreq)); -- if (rreq == NULL) { -- return -ENOMEM; -- } -- -- pthread_mutex_lock(&se->lock); -- rreq->cookie = cookie; -- rreq->nreq.unique = se->notify_ctr++; -- rreq->nreq.reply = fuse_ll_retrieve_reply; -- list_add_nreq(&rreq->nreq, &se->notify_list); -- pthread_mutex_unlock(&se->lock); -- -- outarg.notify_unique = rreq->nreq.unique; -- outarg.nodeid = ino; -- outarg.offset = offset; -- outarg.size = size; -- outarg.padding = 0; -- -- iov[1].iov_base = &outarg; -- iov[1].iov_len = sizeof(outarg); -- -- err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); -- if (err) { -- pthread_mutex_lock(&se->lock); -- list_del_nreq(&rreq->nreq); -- pthread_mutex_unlock(&se->lock); -- free(rreq); -- } -- -- return err; --} -- - void *fuse_req_userdata(fuse_req_t req) - { - return req->se->userdata; -@@ -2226,7 +2085,7 @@ static struct { - [FUSE_POLL] = { do_poll, "POLL" }, - [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, - [FUSE_DESTROY] = { do_destroy, "DESTROY" }, -- [FUSE_NOTIFY_REPLY] = { (void *)1, "NOTIFY_REPLY" }, -+ [FUSE_NOTIFY_REPLY] = { NULL, "NOTIFY_REPLY" }, - [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, - [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS" }, - [FUSE_RENAME2] = { do_rename2, "RENAME2" }, -@@ -2333,8 +2192,6 @@ void fuse_session_process_buf_int(struct fuse_session *se, - inarg = (void *)&in[1]; - if (in->opcode == FUSE_WRITE && se->op.write_buf) { - do_write_buf(req, in->nodeid, inarg, buf); -- } else if (in->opcode == FUSE_NOTIFY_REPLY) { -- do_notify_reply(req, in->nodeid, inarg, buf); - } else { - fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); - } -@@ -2437,8 +2294,6 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - - list_init_req(&se->list); - list_init_req(&se->interrupts); -- list_init_nreq(&se->notify_list); -- se->notify_ctr = 1; - fuse_mutex_init(&se->lock); - - memcpy(&se->op, op, op_size); -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 8d8909b..12a84b4 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1085,21 +1085,6 @@ struct fuse_lowlevel_ops { - off_t off, struct fuse_file_info *fi); - - /** -- * Callback function for the retrieve request -- * -- * Valid replies: -- * fuse_reply_none -- * -- * @param req request handle -- * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() -- * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() -- * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() -- * @param bufv the buffer containing the returned data -- */ -- void (*retrieve_reply)(fuse_req_t req, void *cookie, fuse_ino_t ino, -- off_t offset, struct fuse_bufvec *bufv); -- -- /** - * Forget about multiple inodes - * - * See description of the forget function for more -@@ -1726,38 +1711,6 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, - int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, - off_t offset, struct fuse_bufvec *bufv, - enum fuse_buf_copy_flags flags); --/** -- * Retrieve data from the kernel buffers -- * -- * Retrieve data in the kernel buffers belonging to the given inode. -- * If successful then the retrieve_reply() method will be called with -- * the returned data. -- * -- * Only present pages are returned in the retrieve reply. Retrieving -- * stops when it finds a non-present page and only data prior to that -- * is returned. -- * -- * If this function returns an error, then the retrieve will not be -- * completed and no reply will be sent. -- * -- * This function doesn't change the dirty state of pages in the kernel -- * buffer. For dirty pages the write() method will be called -- * regardless of having been retrieved previously. -- * -- * Added in FUSE protocol version 7.15. If the kernel does not support -- * this (or a newer) version, the function will return -ENOSYS and do -- * nothing. -- * -- * @param se the session object -- * @param ino the inode number -- * @param size the number of bytes to retrieve -- * @param offset the starting offset into the file to retrieve from -- * @param cookie user data to supply to the reply callback -- * @return zero for success, -errno for failure -- */ --int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, -- size_t size, off_t offset, void *cookie); -- - - /* - * Utility functions --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch b/SOURCES/kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch deleted file mode 100644 index 97a0db3..0000000 --- a/SOURCES/kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch +++ /dev/null @@ -1,139 +0,0 @@ -From e01a6e68d799ed2af0ca3b04d75818ba62b18682 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:08 +0100 -Subject: [PATCH 097/116] virtiofsd: rename inode->refcount to inode->nlookup -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-94-dgilbert@redhat.com> -Patchwork-id: 93547 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 093/112] virtiofsd: rename inode->refcount to inode->nlookup -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -This reference counter plays a specific role in the FUSE protocol. It's -not a generic object reference counter and the FUSE kernel code calls it -"nlookup". - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 1222f015558fc34cea02aa3a5a92de608c82cec8) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 37 +++++++++++++++++++++++++------------ - 1 file changed, 25 insertions(+), 12 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 2d703b5..c819b5f 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -99,7 +99,20 @@ struct lo_inode { - int fd; - bool is_symlink; - struct lo_key key; -- uint64_t refcount; /* protected by lo->mutex */ -+ -+ /* -+ * This counter keeps the inode alive during the FUSE session. -+ * Incremented when the FUSE inode number is sent in a reply -+ * (FUSE_LOOKUP, FUSE_READDIRPLUS, etc). Decremented when an inode is -+ * released by requests like FUSE_FORGET, FUSE_RMDIR, FUSE_RENAME, etc. -+ * -+ * Note that this value is untrusted because the client can manipulate -+ * it arbitrarily using FUSE_FORGET requests. -+ * -+ * Protected by lo->mutex. -+ */ -+ uint64_t nlookup; -+ - fuse_ino_t fuse_ino; - pthread_mutex_t plock_mutex; - GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ -@@ -568,7 +581,7 @@ retry: - if (last == path) { - p = &lo->root; - pthread_mutex_lock(&lo->mutex); -- p->refcount++; -+ p->nlookup++; - pthread_mutex_unlock(&lo->mutex); - } else { - *last = '\0'; -@@ -786,8 +799,8 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) - pthread_mutex_lock(&lo->mutex); - p = g_hash_table_lookup(lo->inodes, &key); - if (p) { -- assert(p->refcount > 0); -- p->refcount++; -+ assert(p->nlookup > 0); -+ p->nlookup++; - } - pthread_mutex_unlock(&lo->mutex); - -@@ -855,7 +868,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - } - - inode->is_symlink = S_ISLNK(e->attr.st_mode); -- inode->refcount = 1; -+ inode->nlookup = 1; - inode->fd = newfd; - newfd = -1; - inode->key.ino = e->attr.st_ino; -@@ -1112,7 +1125,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - } - - pthread_mutex_lock(&lo->mutex); -- inode->refcount++; -+ inode->nlookup++; - pthread_mutex_unlock(&lo->mutex); - e.ino = inode->fuse_ino; - -@@ -1193,9 +1206,9 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, - } - - pthread_mutex_lock(&lo->mutex); -- assert(inode->refcount >= n); -- inode->refcount -= n; -- if (!inode->refcount) { -+ assert(inode->nlookup >= n); -+ inode->nlookup -= n; -+ if (!inode->nlookup) { - lo_map_remove(&lo->ino_map, inode->fuse_ino); - g_hash_table_remove(lo->inodes, &inode->key); - if (g_hash_table_size(inode->posix_locks)) { -@@ -1216,7 +1229,7 @@ static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) - struct lo_inode *inode = value; - struct lo_data *lo = user_data; - -- inode->refcount = 0; -+ inode->nlookup = 0; - lo_map_remove(&lo->ino_map, inode->fuse_ino); - close(inode->fd); - -@@ -1241,7 +1254,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - } - - fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", -- (unsigned long long)ino, (unsigned long long)inode->refcount, -+ (unsigned long long)ino, (unsigned long long)inode->nlookup, - (unsigned long long)nlookup); - - unref_inode_lolocked(lo, inode, nlookup); -@@ -2609,7 +2622,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) - root->fd = fd; - root->key.ino = stat.st_ino; - root->key.dev = stat.st_dev; -- root->refcount = 2; -+ root->nlookup = 2; - } - - static guint lo_key_hash(gconstpointer key) --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch b/SOURCES/kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch deleted file mode 100644 index 95858f8..0000000 --- a/SOURCES/kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch +++ /dev/null @@ -1,94 +0,0 @@ -From cfa4550f926e7a07757853f94273f2d1589cb9d3 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:48 +0100 -Subject: [PATCH 077/116] virtiofsd: rename unref_inode() to - unref_inode_lolocked() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-74-dgilbert@redhat.com> -Patchwork-id: 93526 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 073/112] virtiofsd: rename unref_inode() to unref_inode_lolocked() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -Signed-off-by: Miklos Szeredi -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 95d2715791c60b5dc2d22e4eb7b83217273296fa) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 15 ++++++++------- - 1 file changed, 8 insertions(+), 7 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 8b1784f..de12e75 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -148,8 +148,8 @@ static const struct fuse_opt lo_opts[] = { - }; - static bool use_syslog = false; - static int current_log_level; -- --static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); -+static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, -+ uint64_t n); - - static struct { - pthread_mutex_t mutex; -@@ -586,7 +586,7 @@ retry: - return 0; - - fail_unref: -- unref_inode(lo, p, 1); -+ unref_inode_lolocked(lo, p, 1); - fail: - if (retries) { - retries--; -@@ -624,7 +624,7 @@ fallback: - res = lo_parent_and_name(lo, inode, path, &parent); - if (res != -1) { - res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW); -- unref_inode(lo, parent, 1); -+ unref_inode_lolocked(lo, parent, 1); - } - - return res; -@@ -1027,7 +1027,7 @@ fallback: - res = lo_parent_and_name(lo, inode, path, &parent); - if (res != -1) { - res = linkat(parent->fd, path, dfd, name, 0); -- unref_inode(lo, parent, 1); -+ unref_inode_lolocked(lo, parent, 1); - } - - return res; -@@ -1141,7 +1141,8 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) - fuse_reply_err(req, res == -1 ? errno : 0); - } - --static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) -+static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, -+ uint64_t n) - { - if (!inode) { - return; -@@ -1181,7 +1182,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - (unsigned long long)ino, (unsigned long long)inode->refcount, - (unsigned long long)nlookup); - -- unref_inode(lo, inode, nlookup); -+ unref_inode_lolocked(lo, inode, nlookup); - } - - static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-sandbox-mount-namespace.patch b/SOURCES/kvm-virtiofsd-sandbox-mount-namespace.patch deleted file mode 100644 index ab6f751..0000000 --- a/SOURCES/kvm-virtiofsd-sandbox-mount-namespace.patch +++ /dev/null @@ -1,166 +0,0 @@ -From c7ae38df696e4be432fd418c670dcea892b910a7 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:27 +0100 -Subject: [PATCH 056/116] virtiofsd: sandbox mount namespace -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-53-dgilbert@redhat.com> -Patchwork-id: 93504 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 052/112] virtiofsd: sandbox mount namespace -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Use a mount namespace with the shared directory tree mounted at "/" and -no other mounts. - -This prevents symlink escape attacks because symlink targets are -resolved only against the shared directory and cannot go outside it. - -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Peng Tao -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 5baa3b8e95064c2434bd9e2f312edd5e9ae275dc) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 89 ++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 89 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e2e2211..0570453 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -50,6 +50,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -1943,6 +1944,58 @@ static void print_capabilities(void) - printf("}\n"); - } - -+/* This magic is based on lxc's lxc_pivot_root() */ -+static void setup_pivot_root(const char *source) -+{ -+ int oldroot; -+ int newroot; -+ -+ oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC); -+ if (oldroot < 0) { -+ fuse_log(FUSE_LOG_ERR, "open(/): %m\n"); -+ exit(1); -+ } -+ -+ newroot = open(source, O_DIRECTORY | O_RDONLY | O_CLOEXEC); -+ if (newroot < 0) { -+ fuse_log(FUSE_LOG_ERR, "open(%s): %m\n", source); -+ exit(1); -+ } -+ -+ if (fchdir(newroot) < 0) { -+ fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n"); -+ exit(1); -+ } -+ -+ if (syscall(__NR_pivot_root, ".", ".") < 0) { -+ fuse_log(FUSE_LOG_ERR, "pivot_root(., .): %m\n"); -+ exit(1); -+ } -+ -+ if (fchdir(oldroot) < 0) { -+ fuse_log(FUSE_LOG_ERR, "fchdir(oldroot): %m\n"); -+ exit(1); -+ } -+ -+ if (mount("", ".", "", MS_SLAVE | MS_REC, NULL) < 0) { -+ fuse_log(FUSE_LOG_ERR, "mount(., MS_SLAVE | MS_REC): %m\n"); -+ exit(1); -+ } -+ -+ if (umount2(".", MNT_DETACH) < 0) { -+ fuse_log(FUSE_LOG_ERR, "umount2(., MNT_DETACH): %m\n"); -+ exit(1); -+ } -+ -+ if (fchdir(newroot) < 0) { -+ fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n"); -+ exit(1); -+ } -+ -+ close(newroot); -+ close(oldroot); -+} -+ - static void setup_proc_self_fd(struct lo_data *lo) - { - lo->proc_self_fd = open("/proc/self/fd", O_PATH); -@@ -1952,6 +2005,39 @@ static void setup_proc_self_fd(struct lo_data *lo) - } - } - -+/* -+ * Make the source directory our root so symlinks cannot escape and no other -+ * files are accessible. -+ */ -+static void setup_mount_namespace(const char *source) -+{ -+ if (unshare(CLONE_NEWNS) != 0) { -+ fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNS): %m\n"); -+ exit(1); -+ } -+ -+ if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) { -+ fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_PRIVATE): %m\n"); -+ exit(1); -+ } -+ -+ if (mount(source, source, NULL, MS_BIND, NULL) < 0) { -+ fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); -+ exit(1); -+ } -+ -+ setup_pivot_root(source); -+} -+ -+/* -+ * Lock down this process to prevent access to other processes or files outside -+ * source directory. This reduces the impact of arbitrary code execution bugs. -+ */ -+static void setup_sandbox(struct lo_data *lo) -+{ -+ setup_mount_namespace(lo->source); -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -@@ -2052,6 +2138,7 @@ int main(int argc, char *argv[]) - } - - lo.root.fd = open(lo.source, O_PATH); -+ - if (lo.root.fd == -1) { - fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source); - exit(1); -@@ -2075,6 +2162,8 @@ int main(int argc, char *argv[]) - /* Must be after daemonize to get the right /proc/self/fd */ - setup_proc_self_fd(&lo); - -+ setup_sandbox(&lo); -+ - /* Block until ctrl+c or fusermount -u */ - ret = virtio_loop(se); - --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch b/SOURCES/kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch deleted file mode 100644 index e54248c..0000000 --- a/SOURCES/kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch +++ /dev/null @@ -1,93 +0,0 @@ -From 4cc435b3a8a9a419cc85ee883d5184f810f91e52 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:34 +0100 -Subject: [PATCH 063/116] virtiofsd: set maximum RLIMIT_NOFILE limit -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-60-dgilbert@redhat.com> -Patchwork-id: 93516 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 059/112] virtiofsd: set maximum RLIMIT_NOFILE limit -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -virtiofsd can exceed the default open file descriptor limit easily on -most systems. Take advantage of the fact that it runs as root to raise -the limit. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 01a6dc95ec7f71eeff9963fe3cb03d85225fba3e) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 32 ++++++++++++++++++++++++++++++++ - 1 file changed, 32 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index d53cb1e..c281d81 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -53,6 +53,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -2268,6 +2269,35 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) - setup_seccomp(); - } - -+/* Raise the maximum number of open file descriptors */ -+static void setup_nofile_rlimit(void) -+{ -+ const rlim_t max_fds = 1000000; -+ struct rlimit rlim; -+ -+ if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) { -+ fuse_log(FUSE_LOG_ERR, "getrlimit(RLIMIT_NOFILE): %m\n"); -+ exit(1); -+ } -+ -+ if (rlim.rlim_cur >= max_fds) { -+ return; /* nothing to do */ -+ } -+ -+ rlim.rlim_cur = max_fds; -+ rlim.rlim_max = max_fds; -+ -+ if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) { -+ /* Ignore SELinux denials */ -+ if (errno == EPERM) { -+ return; -+ } -+ -+ fuse_log(FUSE_LOG_ERR, "setrlimit(RLIMIT_NOFILE): %m\n"); -+ exit(1); -+ } -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -@@ -2389,6 +2419,8 @@ int main(int argc, char *argv[]) - - fuse_daemonize(opts.foreground); - -+ setup_nofile_rlimit(); -+ - /* Must be before sandbox since it wants /proc */ - setup_capng(); - --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-stay-below-fs.file-max-sysctl-value-CVE-20.patch b/SOURCES/kvm-virtiofsd-stay-below-fs.file-max-sysctl-value-CVE-20.patch deleted file mode 100644 index ce74f4d..0000000 --- a/SOURCES/kvm-virtiofsd-stay-below-fs.file-max-sysctl-value-CVE-20.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 301f19f2ebd617e43e3a8e7bdcf694de580fe689 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 5 May 2020 16:35:56 +0100 -Subject: [PATCH 5/9] virtiofsd: stay below fs.file-max sysctl value - (CVE-2020-10717) - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200505163600.22956-4-dgilbert@redhat.com> -Patchwork-id: 96271 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 3/7] virtiofsd: stay below fs.file-max sysctl value (CVE-2020-10717) -Bugzilla: 1817445 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz -RH-Acked-by: Michael S. Tsirkin - -From: Stefan Hajnoczi - -The system-wide fs.file-max sysctl value determines how many files can -be open. It defaults to a value calculated based on the machine's RAM -size. Previously virtiofsd would try to set RLIMIT_NOFILE to 1,000,000 -and this allowed the FUSE client to exhaust the number of open files -system-wide on Linux hosts with less than 10 GB of RAM! - -Take fs.file-max into account when choosing the default RLIMIT_NOFILE -value. - -Fixes: CVE-2020-10717 -Reported-by: Yuval Avrahami -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Dr. David Alan Gilbert -Message-Id: <20200501140644.220940-3-stefanha@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 8c1d353d107b4fc344e27f2f08ea7fa25de2eea2) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/helper.c | 26 +++++++++++++++++++++++++- - 1 file changed, 25 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 9b3eddc..5b222ea 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -176,7 +176,8 @@ void fuse_cmdline_help(void) - " default: no_xattr\n" - " --rlimit-nofile= set maximum number of file descriptors\n" - " (0 leaves rlimit unchanged)\n" -- " default: 1,000,000 if the current rlimit is lower\n" -+ " default: min(1000000, fs.file-max - 16384)\n" -+ " if the current rlimit is lower\n" - ); - } - -@@ -199,9 +200,32 @@ static int fuse_helper_opt_proc(void *data, const char *arg, int key, - - static unsigned long get_default_rlimit_nofile(void) - { -+ g_autofree gchar *file_max_str = NULL; -+ const rlim_t reserved_fds = 16384; /* leave at least this many fds free */ - rlim_t max_fds = 1000000; /* our default RLIMIT_NOFILE target */ -+ rlim_t file_max; - struct rlimit rlim; - -+ /* -+ * Reduce max_fds below the system-wide maximum, if necessary. This -+ * ensures there are fds available for other processes so we don't -+ * cause resource exhaustion. -+ */ -+ if (!g_file_get_contents("/proc/sys/fs/file-max", &file_max_str, -+ NULL, NULL)) { -+ fuse_log(FUSE_LOG_ERR, "can't read /proc/sys/fs/file-max\n"); -+ exit(1); -+ } -+ file_max = g_ascii_strtoull(file_max_str, NULL, 10); -+ if (file_max < 2 * reserved_fds) { -+ fuse_log(FUSE_LOG_ERR, -+ "The fs.file-max sysctl is too low (%lu) to allow a " -+ "reasonable number of open files.\n", -+ (unsigned long)file_max); -+ exit(1); -+ } -+ max_fds = MIN(file_max - reserved_fds, max_fds); -+ - if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) { - fuse_log(FUSE_LOG_ERR, "getrlimit(RLIMIT_NOFILE): %m\n"); - exit(1); --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch b/SOURCES/kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch deleted file mode 100644 index be6b244..0000000 --- a/SOURCES/kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 06a24b54c94345b436d888a48b92fafa967c3d58 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:25 +0100 -Subject: [PATCH 114/116] virtiofsd: stop all queue threads on exit in - virtio_loop() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-111-dgilbert@redhat.com> -Patchwork-id: 93564 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 110/112] virtiofsd: stop all queue threads on exit in virtio_loop() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Eryu Guan - -On guest graceful shutdown, virtiofsd receives VHOST_USER_GET_VRING_BASE -request from VMM and shuts down virtqueues by calling fv_set_started(), -which joins fv_queue_thread() threads. So when virtio_loop() returns, -there should be no thread is still accessing data in fuse session and/or -virtio dev. - -But on abnormal exit, e.g. guest got killed for whatever reason, -vhost-user socket is closed and virtio_loop() breaks out the main loop -and returns to main(). But it's possible fv_queue_worker()s are still -working and accessing fuse session and virtio dev, which results in -crash or use-after-free. - -Fix it by stopping fv_queue_thread()s before virtio_loop() returns, -to make sure there's no-one could access fuse session and virtio dev. - -Reported-by: Qingming Su -Signed-off-by: Eryu Guan -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 9883df8ccae6d744a0c8d9cbf9d62b1797d70ebd) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 9f65823..80a6e92 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -815,6 +815,19 @@ int virtio_loop(struct fuse_session *se) - } - } - -+ /* -+ * Make sure all fv_queue_thread()s quit on exit, as we're about to -+ * free virtio dev and fuse session, no one should access them anymore. -+ */ -+ for (int i = 0; i < se->virtio_dev->nqueues; i++) { -+ if (!se->virtio_dev->qi[i]) { -+ continue; -+ } -+ -+ fuse_log(FUSE_LOG_INFO, "%s: Stopping queue %d thread\n", __func__, i); -+ fv_queue_cleanup_thread(se->virtio_dev, i); -+ } -+ - fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__); - - return 0; --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch b/SOURCES/kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch deleted file mode 100644 index f595ffa..0000000 --- a/SOURCES/kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 1744329bcba4a3e1a82cec3b1a34b3fbf0a9d7cf Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:54 +0100 -Subject: [PATCH 083/116] virtiofsd: support nanosecond resolution for file - timestamp -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-80-dgilbert@redhat.com> -Patchwork-id: 93535 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 079/112] virtiofsd: support nanosecond resolution for file timestamp -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Jiufei Xue - -Define HAVE_STRUCT_STAT_ST_ATIM to 1 if `st_atim' is member of `struct -stat' which means support nanosecond resolution for the file timestamp -fields. - -Signed-off-by: Jiufei Xue -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 8a792b034d4b315251fd842bb4c73a133aa1368f) -Signed-off-by: Miroslav Rezanina ---- - configure | 16 ++++++++++++++++ - tools/virtiofsd/fuse_misc.h | 1 + - 2 files changed, 17 insertions(+) - -diff --git a/configure b/configure -index 7831618..5120c14 100755 ---- a/configure -+++ b/configure -@@ -5218,6 +5218,19 @@ if compile_prog "" "" ; then - strchrnul=yes - fi - -+######################################### -+# check if we have st_atim -+ -+st_atim=no -+cat > $TMPC << EOF -+#include -+#include -+int main(void) { return offsetof(struct stat, st_atim); } -+EOF -+if compile_prog "" "" ; then -+ st_atim=yes -+fi -+ - ########################################## - # check if trace backend exists - -@@ -6919,6 +6932,9 @@ fi - if test "$strchrnul" = "yes" ; then - echo "HAVE_STRCHRNUL=y" >> $config_host_mak - fi -+if test "$st_atim" = "yes" ; then -+ echo "HAVE_STRUCT_STAT_ST_ATIM=y" >> $config_host_mak -+fi - if test "$byteswap_h" = "yes" ; then - echo "CONFIG_BYTESWAP_H=y" >> $config_host_mak - fi -diff --git a/tools/virtiofsd/fuse_misc.h b/tools/virtiofsd/fuse_misc.h -index f252baa..5c618ce 100644 ---- a/tools/virtiofsd/fuse_misc.h -+++ b/tools/virtiofsd/fuse_misc.h -@@ -7,6 +7,7 @@ - */ - - #include -+#include "config-host.h" - - /* - * Versioned symbols cannot be used in some cases because it --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch b/SOURCES/kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch deleted file mode 100644 index 1bae1bf..0000000 --- a/SOURCES/kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 7bc27a767bc8c78b1bca46bbe5e1d53dcd7173b4 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:18 +0100 -Subject: [PATCH 107/116] virtiofsd: use fuse_buf_writev to replace - fuse_buf_write for better performance -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-104-dgilbert@redhat.com> -Patchwork-id: 93558 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 103/112] virtiofsd: use fuse_buf_writev to replace fuse_buf_write for better performance -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: piaojun - -fuse_buf_writev() only handles the normal write in which src is buffer -and dest is fd. Specially if src buffer represents guest physical -address that can't be mapped by the daemon process, IO must be bounced -back to the VMM to do it by fuse_buf_copy(). - -Signed-off-by: Jun Piao -Suggested-by: Dr. David Alan Gilbert -Suggested-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit c465bba2c90a810f6e71e4f2646b1b4ee4b478de) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 20 ++++++++++++++++++-- - 1 file changed, 18 insertions(+), 2 deletions(-) - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index 37befeb..27c1377 100644 ---- a/tools/virtiofsd/buffer.c -+++ b/tools/virtiofsd/buffer.c -@@ -34,7 +34,6 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv) - return size; - } - --__attribute__((unused)) - static ssize_t fuse_buf_writev(struct fuse_buf *out_buf, - struct fuse_bufvec *in_buf) - { -@@ -262,12 +261,29 @@ static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) - - ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv) - { -- size_t copied = 0; -+ size_t copied = 0, i; - - if (dstv == srcv) { - return fuse_buf_size(dstv); - } - -+ /* -+ * use writev to improve bandwidth when all the -+ * src buffers already mapped by the daemon -+ * process -+ */ -+ for (i = 0; i < srcv->count; i++) { -+ if (srcv->buf[i].flags & FUSE_BUF_IS_FD) { -+ break; -+ } -+ } -+ if ((i == srcv->count) && (dstv->count == 1) && -+ (dstv->idx == 0) && -+ (dstv->buf[0].flags & FUSE_BUF_IS_FD)) { -+ dstv->buf[0].pos += dstv->off; -+ return fuse_buf_writev(&dstv->buf[0], srcv); -+ } -+ - for (;;) { - const struct fuse_buf *src = fuse_bufvec_current(srcv); - const struct fuse_buf *dst = fuse_bufvec_current(dstv); --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch b/SOURCES/kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch deleted file mode 100644 index feffb5e..0000000 --- a/SOURCES/kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 1724f54070d33d8070ba2d22c8fac87ea65814c1 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:04 +0100 -Subject: [PATCH 093/116] virtiofsd: use fuse_lowlevel_is_virtio() in - fuse_session_destroy() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-90-dgilbert@redhat.com> -Patchwork-id: 93540 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 089/112] virtiofsd: use fuse_lowlevel_is_virtio() in fuse_session_destroy() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -vu_socket_path is NULL when --fd=FDNUM was used. Use -fuse_lowlevel_is_virtio() instead. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 620e9d8d9cee6df7fe71168dea950dba0cc21a4a) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 70568d2..dab6a31 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2537,12 +2537,13 @@ void fuse_session_destroy(struct fuse_session *se) - close(se->fd); - } - -- if (se->vu_socket_path) { -+ if (fuse_lowlevel_is_virtio(se)) { - virtio_session_close(se); -- free(se->vu_socket_path); -- se->vu_socket_path = NULL; - } - -+ free(se->vu_socket_path); -+ se->vu_socket_path = NULL; -+ - free(se); - } - --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch b/SOURCES/kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch deleted file mode 100644 index f250ed7..0000000 --- a/SOURCES/kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch +++ /dev/null @@ -1,390 +0,0 @@ -From bce5070d1aada88154b811a08eec1586ab24fce5 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:26 +0100 -Subject: [PATCH 055/116] virtiofsd: use /proc/self/fd/ O_PATH file descriptor -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-52-dgilbert@redhat.com> -Patchwork-id: 93506 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 051/112] virtiofsd: use /proc/self/fd/ O_PATH file descriptor -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Sandboxing will remove /proc from the mount namespace so we can no -longer build string paths into "/proc/self/fd/...". - -Keep an O_PATH file descriptor so we can still re-open fds via -/proc/self/fd. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 9f59d175e2ca96f0b87f534dba69ea547dd35945) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 130 +++++++++++++++++++++++++++++++-------- - 1 file changed, 103 insertions(+), 27 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e3d65c3..e2e2211 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -110,6 +110,9 @@ struct lo_data { - struct lo_map ino_map; /* protected by lo->mutex */ - struct lo_map dirp_map; /* protected by lo->mutex */ - struct lo_map fd_map; /* protected by lo->mutex */ -+ -+ /* An O_PATH file descriptor to /proc/self/fd/ */ -+ int proc_self_fd; - }; - - static const struct fuse_opt lo_opts[] = { -@@ -379,9 +382,9 @@ static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, - int res; - - retry: -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "%i", inode->fd); - -- res = readlink(procname, path, PATH_MAX); -+ res = readlinkat(lo->proc_self_fd, procname, path, PATH_MAX); - if (res < 0) { - fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__); - goto fail_noretry; -@@ -477,9 +480,9 @@ static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode, - } - return res; - } -- sprintf(path, "/proc/self/fd/%i", inode->fd); -+ sprintf(path, "%i", inode->fd); - -- return utimensat(AT_FDCWD, path, tv, 0); -+ return utimensat(lo->proc_self_fd, path, tv, 0); - - fallback: - res = lo_parent_and_name(lo, inode, path, &parent); -@@ -535,8 +538,8 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - if (fi) { - res = fchmod(fd, attr->st_mode); - } else { -- sprintf(procname, "/proc/self/fd/%i", ifd); -- res = chmod(procname, attr->st_mode); -+ sprintf(procname, "%i", ifd); -+ res = fchmodat(lo->proc_self_fd, procname, attr->st_mode, 0); - } - if (res == -1) { - goto out_err; -@@ -552,11 +555,23 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - } - } - if (valid & FUSE_SET_ATTR_SIZE) { -+ int truncfd; -+ - if (fi) { -- res = ftruncate(fd, attr->st_size); -+ truncfd = fd; - } else { -- sprintf(procname, "/proc/self/fd/%i", ifd); -- res = truncate(procname, attr->st_size); -+ sprintf(procname, "%i", ifd); -+ truncfd = openat(lo->proc_self_fd, procname, O_RDWR); -+ if (truncfd < 0) { -+ goto out_err; -+ } -+ } -+ -+ res = ftruncate(truncfd, attr->st_size); -+ if (!fi) { -+ saverr = errno; -+ close(truncfd); -+ errno = saverr; - } - if (res == -1) { - goto out_err; -@@ -874,9 +889,9 @@ static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode, - return res; - } - -- sprintf(path, "/proc/self/fd/%i", inode->fd); -+ sprintf(path, "%i", inode->fd); - -- return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW); -+ return linkat(lo->proc_self_fd, path, dfd, name, AT_SYMLINK_FOLLOW); - - fallback: - res = lo_parent_and_name(lo, inode, path, &parent); -@@ -1404,8 +1419,8 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - fi->flags &= ~O_APPEND; - } - -- sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); -- fd = open(buf, fi->flags & ~O_NOFOLLOW); -+ sprintf(buf, "%i", lo_fd(req, ino)); -+ fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); - if (fd == -1) { - return (void)fuse_reply_err(req, errno); - } -@@ -1458,7 +1473,6 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, - struct fuse_file_info *fi) - { - int res; -- (void)ino; - int fd; - char *buf; - -@@ -1466,12 +1480,14 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, - (void *)fi); - - if (!fi) { -- res = asprintf(&buf, "/proc/self/fd/%i", lo_fd(req, ino)); -+ struct lo_data *lo = lo_data(req); -+ -+ res = asprintf(&buf, "%i", lo_fd(req, ino)); - if (res == -1) { - return (void)fuse_reply_err(req, errno); - } - -- fd = open(buf, O_RDWR); -+ fd = openat(lo->proc_self_fd, buf, O_RDWR); - free(buf); - if (fd == -1) { - return (void)fuse_reply_err(req, errno); -@@ -1587,11 +1603,13 @@ static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, - static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - size_t size) - { -+ struct lo_data *lo = lo_data(req); - char *value = NULL; - char procname[64]; - struct lo_inode *inode; - ssize_t ret; - int saverr; -+ int fd = -1; - - inode = lo_inode(req, ino); - if (!inode) { -@@ -1616,7 +1634,11 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - goto out; - } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "%i", inode->fd); -+ fd = openat(lo->proc_self_fd, procname, O_RDONLY); -+ if (fd < 0) { -+ goto out_err; -+ } - - if (size) { - value = malloc(size); -@@ -1624,7 +1646,7 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - goto out_err; - } - -- ret = getxattr(procname, name, value, size); -+ ret = fgetxattr(fd, name, value, size); - if (ret == -1) { - goto out_err; - } -@@ -1635,7 +1657,7 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - - fuse_reply_buf(req, value, ret); - } else { -- ret = getxattr(procname, name, NULL, 0); -+ ret = fgetxattr(fd, name, NULL, 0); - if (ret == -1) { - goto out_err; - } -@@ -1644,6 +1666,10 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - } - out_free: - free(value); -+ -+ if (fd >= 0) { -+ close(fd); -+ } - return; - - out_err: -@@ -1655,11 +1681,13 @@ out: - - static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - { -+ struct lo_data *lo = lo_data(req); - char *value = NULL; - char procname[64]; - struct lo_inode *inode; - ssize_t ret; - int saverr; -+ int fd = -1; - - inode = lo_inode(req, ino); - if (!inode) { -@@ -1683,7 +1711,11 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - goto out; - } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "%i", inode->fd); -+ fd = openat(lo->proc_self_fd, procname, O_RDONLY); -+ if (fd < 0) { -+ goto out_err; -+ } - - if (size) { - value = malloc(size); -@@ -1691,7 +1723,7 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - goto out_err; - } - -- ret = listxattr(procname, value, size); -+ ret = flistxattr(fd, value, size); - if (ret == -1) { - goto out_err; - } -@@ -1702,7 +1734,7 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - - fuse_reply_buf(req, value, ret); - } else { -- ret = listxattr(procname, NULL, 0); -+ ret = flistxattr(fd, NULL, 0); - if (ret == -1) { - goto out_err; - } -@@ -1711,6 +1743,10 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - } - out_free: - free(value); -+ -+ if (fd >= 0) { -+ close(fd); -+ } - return; - - out_err: -@@ -1724,9 +1760,11 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - const char *value, size_t size, int flags) - { - char procname[64]; -+ struct lo_data *lo = lo_data(req); - struct lo_inode *inode; - ssize_t ret; - int saverr; -+ int fd = -1; - - inode = lo_inode(req, ino); - if (!inode) { -@@ -1751,21 +1789,31 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - goto out; - } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "%i", inode->fd); -+ fd = openat(lo->proc_self_fd, procname, O_RDWR); -+ if (fd < 0) { -+ saverr = errno; -+ goto out; -+ } - -- ret = setxattr(procname, name, value, size, flags); -+ ret = fsetxattr(fd, name, value, size, flags); - saverr = ret == -1 ? errno : 0; - - out: -+ if (fd >= 0) { -+ close(fd); -+ } - fuse_reply_err(req, saverr); - } - - static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) - { - char procname[64]; -+ struct lo_data *lo = lo_data(req); - struct lo_inode *inode; - ssize_t ret; - int saverr; -+ int fd = -1; - - inode = lo_inode(req, ino); - if (!inode) { -@@ -1789,12 +1837,20 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) - goto out; - } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "%i", inode->fd); -+ fd = openat(lo->proc_self_fd, procname, O_RDWR); -+ if (fd < 0) { -+ saverr = errno; -+ goto out; -+ } - -- ret = removexattr(procname, name); -+ ret = fremovexattr(fd, name); - saverr = ret == -1 ? errno : 0; - - out: -+ if (fd >= 0) { -+ close(fd); -+ } - fuse_reply_err(req, saverr); - } - -@@ -1887,12 +1943,25 @@ static void print_capabilities(void) - printf("}\n"); - } - -+static void setup_proc_self_fd(struct lo_data *lo) -+{ -+ lo->proc_self_fd = open("/proc/self/fd", O_PATH); -+ if (lo->proc_self_fd == -1) { -+ fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n"); -+ exit(1); -+ } -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); - struct fuse_session *se; - struct fuse_cmdline_opts opts; -- struct lo_data lo = { .debug = 0, .writeback = 0 }; -+ struct lo_data lo = { -+ .debug = 0, -+ .writeback = 0, -+ .proc_self_fd = -1, -+ }; - struct lo_map_elem *root_elem; - int ret = -1; - -@@ -2003,6 +2072,9 @@ int main(int argc, char *argv[]) - - fuse_daemonize(opts.foreground); - -+ /* Must be after daemonize to get the right /proc/self/fd */ -+ setup_proc_self_fd(&lo); -+ - /* Block until ctrl+c or fusermount -u */ - ret = virtio_loop(se); - -@@ -2018,6 +2090,10 @@ err_out1: - lo_map_destroy(&lo.dirp_map); - lo_map_destroy(&lo.ino_map); - -+ if (lo.proc_self_fd >= 0) { -+ close(lo.proc_self_fd); -+ } -+ - if (lo.root.fd >= 0) { - close(lo.root.fd); - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch b/SOURCES/kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch deleted file mode 100644 index d60a902..0000000 --- a/SOURCES/kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch +++ /dev/null @@ -1,137 +0,0 @@ -From 6877a6c456178d6c1ca9a0ffaabaa7e51105b2ac Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:22 +0100 -Subject: [PATCH 051/116] virtiofsd: validate input buffer sizes in - do_write_buf() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-48-dgilbert@redhat.com> -Patchwork-id: 93501 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 047/112] virtiofsd: validate input buffer sizes in do_write_buf() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -There is a small change in behavior: if fuse_write_in->size doesn't -match the input buffer size then the request is failed. Previously -write requests with 1 fuse_buf element would truncate to -fuse_write_in->size. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 0ba8c3c6fce8fe949d59c1fd84d98d220ef9e759) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 49 +++++++++++++++++++++++++---------------- - 1 file changed, 30 insertions(+), 19 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 7e10995..611e8b0 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -1003,8 +1003,8 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, -- struct fuse_bufvec *ibufv) -+static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter, struct fuse_bufvec *ibufv) - { - struct fuse_session *se = req->se; - struct fuse_bufvec *pbufv = ibufv; -@@ -1012,28 +1012,27 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, - .buf[0] = ibufv->buf[0], - .count = 1, - }; -- struct fuse_write_in *arg = (struct fuse_write_in *)inarg; -+ struct fuse_write_in *arg; -+ size_t arg_size = sizeof(*arg); - struct fuse_file_info fi; - - memset(&fi, 0, sizeof(fi)); -+ -+ arg = fuse_mbuf_iter_advance(iter, arg_size); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; - fi.fh = arg->fh; - fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; - - if (ibufv->count == 1) { -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- if (!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)) { -- tmpbufv.buf[0].mem = PARAM(arg); -- } -- tmpbufv.buf[0].size -= -- sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); -- if (tmpbufv.buf[0].size < arg->size) { -- fuse_log(FUSE_LOG_ERR, -- "fuse: do_write_buf: buffer size too small\n"); -- fuse_reply_err(req, EIO); -- return; -- } -- tmpbufv.buf[0].size = arg->size; -+ assert(!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)); -+ tmpbufv.buf[0].mem = ((char *)arg) + arg_size; -+ tmpbufv.buf[0].size -= sizeof(struct fuse_in_header) + arg_size; - pbufv = &tmpbufv; - } else { - /* -@@ -1043,6 +1042,13 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, - ibufv->buf[0].size = 0; - } - -+ if (fuse_buf_size(pbufv) != arg->size) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: do_write_buf: buffer size doesn't match arg->size\n"); -+ fuse_reply_err(req, EIO); -+ return; -+ } -+ - se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi); - } - -@@ -2052,12 +2058,17 @@ void fuse_session_process_buf_int(struct fuse_session *se, - struct fuse_chan *ch) - { - const struct fuse_buf *buf = bufv->buf; -+ struct fuse_mbuf_iter iter = FUSE_MBUF_ITER_INIT(buf); - struct fuse_in_header *in; - const void *inarg; - struct fuse_req *req; - int err; - -- in = buf->mem; -+ /* The first buffer must be a memory buffer */ -+ assert(!(buf->flags & FUSE_BUF_IS_FD)); -+ -+ in = fuse_mbuf_iter_advance(&iter, sizeof(*in)); -+ assert(in); /* caller guarantees the input buffer is large enough */ - - if (se->debug) { - fuse_log(FUSE_LOG_DEBUG, -@@ -2129,7 +2140,7 @@ void fuse_session_process_buf_int(struct fuse_session *se, - - inarg = (void *)&in[1]; - if (in->opcode == FUSE_WRITE && se->op.write_buf) { -- do_write_buf(req, in->nodeid, inarg, bufv); -+ do_write_buf(req, in->nodeid, &iter, bufv); - } else { - fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); - } --- -1.8.3.1 - diff --git a/SOURCES/kvm-virtiofsd-validate-path-components.patch b/SOURCES/kvm-virtiofsd-validate-path-components.patch deleted file mode 100644 index b35aed7..0000000 --- a/SOURCES/kvm-virtiofsd-validate-path-components.patch +++ /dev/null @@ -1,164 +0,0 @@ -From 69ac47502848c37ca3ede00f432c0675d9eef42c Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:18 +0100 -Subject: [PATCH 047/116] virtiofsd: validate path components -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-44-dgilbert@redhat.com> -Patchwork-id: 93498 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 043/112] virtiofsd: validate path components -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Several FUSE requests contain single path components. A correct FUSE -client sends well-formed path components but there is currently no input -validation in case something went wrong or the client is malicious. - -Refuse ".", "..", and paths containing '/' when we expect a path -component. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 25dae28c58d7e706b5d5db99042c9db3cef2e657) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 59 ++++++++++++++++++++++++++++++++++++---- - 1 file changed, 53 insertions(+), 6 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index ac380ef..e375406 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -133,6 +133,21 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); - - static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st); - -+static int is_dot_or_dotdot(const char *name) -+{ -+ return name[0] == '.' && -+ (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); -+} -+ -+/* Is `path` a single path component that is not "." or ".."? */ -+static int is_safe_path_component(const char *path) -+{ -+ if (strchr(path, '/')) { -+ return 0; -+ } -+ -+ return !is_dot_or_dotdot(path); -+} - - static struct lo_data *lo_data(fuse_req_t req) - { -@@ -681,6 +696,15 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) - parent, name); - } - -+ /* -+ * Don't use is_safe_path_component(), allow "." and ".." for NFS export -+ * support. -+ */ -+ if (strchr(name, '/')) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - err = lo_do_lookup(req, parent, name, &e); - if (err) { - fuse_reply_err(req, err); -@@ -762,6 +786,11 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - struct fuse_entry_param e; - struct lo_cred old = {}; - -+ if (!is_safe_path_component(name)) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - dir = lo_inode(req, parent); - if (!dir) { - fuse_reply_err(req, EBADF); -@@ -863,6 +892,11 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - struct fuse_entry_param e; - int saverr; - -+ if (!is_safe_path_component(name)) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - inode = lo_inode(req, ino); - if (!inode) { - fuse_reply_err(req, EBADF); -@@ -904,6 +938,10 @@ out_err: - static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) - { - int res; -+ if (!is_safe_path_component(name)) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); - -@@ -916,6 +954,11 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - { - int res; - -+ if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - if (flags) { - fuse_reply_err(req, EINVAL); - return; -@@ -930,6 +973,11 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) - { - int res; - -+ if (!is_safe_path_component(name)) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - res = unlinkat(lo_fd(req, parent), name, 0); - - fuse_reply_err(req, res == -1 ? errno : 0); -@@ -1093,12 +1141,6 @@ out_err: - fuse_reply_err(req, error); - } - --static int is_dot_or_dotdot(const char *name) --{ -- return name[0] == '.' && -- (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); --} -- - static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - off_t offset, struct fuse_file_info *fi, int plus) - { -@@ -1248,6 +1290,11 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - parent, name); - } - -+ if (!is_safe_path_component(name)) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - err = lo_change_cred(req, &old); - if (err) { - goto out; --- -1.8.3.1 - diff --git a/SOURCES/kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch b/SOURCES/kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch deleted file mode 100644 index 20add81..0000000 --- a/SOURCES/kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 247987aa987b7332eb501e00c440079b9e8e1fe7 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:52 +0100 -Subject: [PATCH 021/116] vitriofsd/passthrough_ll: fix fallocate() ifdefs -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-18-dgilbert@redhat.com> -Patchwork-id: 93471 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 017/112] vitriofsd/passthrough_ll: fix fallocate() ifdefs -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Xiao Yang - -1) Use correct CONFIG_FALLOCATE macro to check if fallocate() is supported.(i.e configure - script sets CONFIG_FALLOCATE intead of HAVE_FALLOCATE if fallocate() is supported) -2) Replace HAVE_POSIX_FALLOCATE with CONFIG_POSIX_FALLOCATE. - -Signed-off-by: Xiao Yang -Signed-off-by: Dr. David Alan Gilbert - Merged from two of Xiao Yang's patches -(cherry picked from commit 9776457ca6f05d5900e27decb1dba2ffddf95a22) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 322a889..6c4da18 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -975,13 +975,13 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, - int err = EOPNOTSUPP; - (void)ino; - --#ifdef HAVE_FALLOCATE -+#ifdef CONFIG_FALLOCATE - err = fallocate(fi->fh, mode, offset, length); - if (err < 0) { - err = errno; - } - --#elif defined(HAVE_POSIX_FALLOCATE) -+#elif defined(CONFIG_POSIX_FALLOCATE) - if (mode) { - fuse_reply_err(req, EOPNOTSUPP); - return; --- -1.8.3.1 - diff --git a/SOURCES/kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch b/SOURCES/kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch new file mode 100644 index 0000000..56ecea7 --- /dev/null +++ b/SOURCES/kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch @@ -0,0 +1,68 @@ +From 31530bf621dc28689142ffa83d025ec4a4f110c1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 11 Jan 2022 18:29:31 +0000 +Subject: [PATCH 2/2] x86: Add q35 RHEL 8.6.0 machine type +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 99: x86: Add q35 RHEL 8.6.0 machine type +RH-Commit: [1/1] a694724b6fa972e312bb76b5569bc979d6c596ef +RH-Bugzilla: 2031035 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Cornelia Huck + +Add the new 8.6.0 machine type; note that while the -AV +notation has gone in the product naming, just keep the smbios +definitions the same for consistency. + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/i386/pc_q35.c | 21 ++++++++++++++++++++- + 1 file changed, 20 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index f6e77bca0e..5559261d9e 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -646,6 +646,24 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + } + ++static void pc_q35_init_rhel860(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel860_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-8.6.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.6.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel860, "pc-q35-rhel8.6.0", pc_q35_init_rhel860, ++ pc_q35_machine_rhel860_options); ++ ++ + static void pc_q35_init_rhel850(MachineState *machine) + { + pc_q35_init(machine); +@@ -654,8 +672,9 @@ static void pc_q35_init_rhel850(MachineState *machine) + static void pc_q35_machine_rhel850_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +- pc_q35_machine_rhel_options(m); ++ pc_q35_machine_rhel860_options(m); + m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.5.0"; + compat_props_add(m->compat_props, hw_compat_rhel_8_5, +-- +2.27.0 + diff --git a/SOURCES/kvm-x86-cpu-Enable-AVX512_VP2INTERSECT-cpu-feature.patch b/SOURCES/kvm-x86-cpu-Enable-AVX512_VP2INTERSECT-cpu-feature.patch deleted file mode 100644 index dbcf2a7..0000000 --- a/SOURCES/kvm-x86-cpu-Enable-AVX512_VP2INTERSECT-cpu-feature.patch +++ /dev/null @@ -1,63 +0,0 @@ -From ad50e0e2d310277f06a9c512fe6e31da183ead6e Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Wed, 24 Feb 2021 11:30:34 -0500 -Subject: [PATCH 1/4] x86/cpu: Enable AVX512_VP2INTERSECT cpu feature - -RH-Author: Dr. David Alan Gilbert -Message-id: <20210224113037.15599-2-dgilbert@redhat.com> -Patchwork-id: 101203 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/4] x86/cpu: Enable AVX512_VP2INTERSECT cpu feature -Bugzilla: 1790620 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Peter Xu - -From: Cathy Zhang - -AVX512_VP2INTERSECT compute vector pair intersection to a pair -of mask registers, which is introduced with intel Tiger Lake, -defining as CPUID.(EAX=7,ECX=0):EDX[bit 08]. - -Refer to the following release spec: -https://software.intel.com/sites/default/files/managed/c5/15/\ -architecture-instruction-set-extensions-programming-reference.pdf - -Signed-off-by: Cathy Zhang -Message-Id: <1586760758-13638-1-git-send-email-cathy.zhang@intel.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 353f98c9ad52ff4b8cfe553c90be04f747a14c98) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 2 +- - target/i386/cpu.h | 2 ++ - 2 files changed, 3 insertions(+), 1 deletion(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index ff39fc9905..67dab94aa5 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1078,7 +1078,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - .feat_names = { - NULL, NULL, "avx512-4vnniw", "avx512-4fmaps", - NULL, NULL, NULL, NULL, -- NULL, NULL, "md-clear", NULL, -+ "avx512-vp2intersect", NULL, "md-clear", NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL /* pconfig */, NULL, - NULL, NULL, NULL, NULL, -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index f3da25cb8a..8e2e52ed31 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -770,6 +770,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; - #define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) - /* AVX512 Multiply Accumulation Single Precision */ - #define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) -+/* AVX512 Vector Pair Intersection to a Pair of Mask Registers */ -+#define CPUID_7_0_EDX_AVX512_VP2INTERSECT (1U << 8) - /* Speculation Control */ - #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) - /* Single Thread Indirect Branch Predictors */ --- -2.27.0 - diff --git a/SOURCES/kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch b/SOURCES/kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch deleted file mode 100644 index 9ef6d04..0000000 --- a/SOURCES/kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch +++ /dev/null @@ -1,91 +0,0 @@ -From 655e723a5190206302f6cc4f2e794563b8e1c226 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Wed, 24 Feb 2021 11:30:36 -0500 -Subject: [PATCH 3/4] x86/cpu: Populate SVM CPUID feature bits - -RH-Author: Dr. David Alan Gilbert -Message-id: <20210224113037.15599-4-dgilbert@redhat.com> -Patchwork-id: 101200 -O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 3/4] x86/cpu: Populate SVM CPUID feature bits -Bugzilla: 1790620 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Peter Xu - -From: Wei Huang - -Newer AMD CPUs will add CPUID_0x8000000A_EDX[28] bit, which indicates -that SVM instructions (VMRUN/VMSAVE/VMLOAD) will trigger #VMEXIT before -CPU checking their EAX against reserved memory regions. This change will -allow the hypervisor to avoid intercepting #GP and emulating SVM -instructions. KVM turns on this CPUID bit for nested VMs. In order to -support it, let us populate this bit, along with other SVM feature bits, -in FEAT_SVM. - -Signed-off-by: Wei Huang -Message-Id: <20210126202456.589932-1-wei.huang2@amd.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 5447089c2b3b084b51670af36fc86ee3979e04be) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 6 +++--- - target/i386/cpu.h | 24 ++++++++++++++---------- - 2 files changed, 17 insertions(+), 13 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index f6a9ed84b3..7227c803c3 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1026,11 +1026,11 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - "npt", "lbrv", "svm-lock", "nrip-save", - "tsc-scale", "vmcb-clean", "flushbyasid", "decodeassists", - NULL, NULL, "pause-filter", NULL, -- "pfthreshold", NULL, NULL, NULL, -- NULL, NULL, NULL, NULL, -- NULL, NULL, NULL, NULL, -+ "pfthreshold", "avic", NULL, "v-vmsave-vmload", -+ "vgif", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, -+ "svme-addr-chk", NULL, NULL, NULL, - }, - .cpuid = { .eax = 0x8000000A, .reg = R_EDX, }, - .tcg_features = TCG_SVM_FEATURES, -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index f5a4efcec6..e1b67910c2 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -667,16 +667,20 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; - #define CPUID_EXT3_PERFCORE (1U << 23) - #define CPUID_EXT3_PERFNB (1U << 24) - --#define CPUID_SVM_NPT (1U << 0) --#define CPUID_SVM_LBRV (1U << 1) --#define CPUID_SVM_SVMLOCK (1U << 2) --#define CPUID_SVM_NRIPSAVE (1U << 3) --#define CPUID_SVM_TSCSCALE (1U << 4) --#define CPUID_SVM_VMCBCLEAN (1U << 5) --#define CPUID_SVM_FLUSHASID (1U << 6) --#define CPUID_SVM_DECODEASSIST (1U << 7) --#define CPUID_SVM_PAUSEFILTER (1U << 10) --#define CPUID_SVM_PFTHRESHOLD (1U << 12) -+#define CPUID_SVM_NPT (1U << 0) -+#define CPUID_SVM_LBRV (1U << 1) -+#define CPUID_SVM_SVMLOCK (1U << 2) -+#define CPUID_SVM_NRIPSAVE (1U << 3) -+#define CPUID_SVM_TSCSCALE (1U << 4) -+#define CPUID_SVM_VMCBCLEAN (1U << 5) -+#define CPUID_SVM_FLUSHASID (1U << 6) -+#define CPUID_SVM_DECODEASSIST (1U << 7) -+#define CPUID_SVM_PAUSEFILTER (1U << 10) -+#define CPUID_SVM_PFTHRESHOLD (1U << 12) -+#define CPUID_SVM_AVIC (1U << 13) -+#define CPUID_SVM_V_VMSAVE_VMLOAD (1U << 15) -+#define CPUID_SVM_VGIF (1U << 16) -+#define CPUID_SVM_SVME_ADDR_CHK (1U << 28) - - /* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */ - #define CPUID_7_0_EBX_FSGSBASE (1U << 0) --- -2.27.0 - diff --git a/SOURCES/kvm-xhci-fix-valid.max_access_size-to-access-address-reg.patch b/SOURCES/kvm-xhci-fix-valid.max_access_size-to-access-address-reg.patch deleted file mode 100644 index aabe041..0000000 --- a/SOURCES/kvm-xhci-fix-valid.max_access_size-to-access-address-reg.patch +++ /dev/null @@ -1,76 +0,0 @@ -From f38f51d422e82d1241b678960dd6a033ffa398da Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Wed, 21 Apr 2021 22:30:05 -0400 -Subject: [PATCH 6/7] xhci: fix valid.max_access_size to access address - registers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20210421223006.19650-6-jmaloy@redhat.com> -Patchwork-id: 101483 -O-Subject: [RHEL-8.5.0 qemu-kvm PATCH v2 5/6] xhci: fix valid.max_access_size to access address registers -Bugzilla: 1842478 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laszlo Ersek - -From: Laurent Vivier - -QEMU XHCI advertises AC64 (64-bit addressing) but doesn't allow -64-bit mode access in "runtime" and "operational" MemoryRegionOps. - -Set the max_access_size based on sizeof(dma_addr_t) as AC64 is set. - -XHCI specs: -"If the xHC supports 64-bit addressing (AC64 = ‘1’), then software -should write 64-bit registers using only Qword accesses. If a -system is incapable of issuing Qword accesses, then writes to the -64-bit address fields shall be performed using 2 Dword accesses; -low Dword-first, high-Dword second. If the xHC supports 32-bit -addressing (AC64 = ‘0’), then the high Dword of registers containing -64-bit address fields are unused and software should write addresses -using only Dword accesses" - -The problem has been detected with SLOF, as linux kernel always accesses -registers using 32-bit access even if AC64 is set and revealed by -5d971f9e6725 ("memory: Revert "memory: accept mismatching sizes in memory_region_access_valid"") - -Suggested-by: Alexey Kardashevskiy -Signed-off-by: Laurent Vivier -Message-id: 20200721083322.90651-1-lvivier@redhat.com -Signed-off-by: Gerd Hoffmann - -(cherry picked from commit 8e67fda2dd6202ccec093fda561107ba14830a17) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - hw/usb/hcd-xhci.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 646c78cde9..ab449bb003 100644 ---- a/hw/usb/hcd-xhci.c -+++ b/hw/usb/hcd-xhci.c -@@ -3183,7 +3183,7 @@ static const MemoryRegionOps xhci_oper_ops = { - .read = xhci_oper_read, - .write = xhci_oper_write, - .valid.min_access_size = 4, -- .valid.max_access_size = 4, -+ .valid.max_access_size = sizeof(dma_addr_t), - .endianness = DEVICE_LITTLE_ENDIAN, - }; - -@@ -3199,7 +3199,7 @@ static const MemoryRegionOps xhci_runtime_ops = { - .read = xhci_runtime_read, - .write = xhci_runtime_write, - .valid.min_access_size = 4, -- .valid.max_access_size = 4, -+ .valid.max_access_size = sizeof(dma_addr_t), - .endianness = DEVICE_LITTLE_ENDIAN, - }; - --- -2.27.0 - diff --git a/SOURCES/kvm-xhci-recheck-slot-status.patch b/SOURCES/kvm-xhci-recheck-slot-status.patch deleted file mode 100644 index 8bcbc2c..0000000 --- a/SOURCES/kvm-xhci-recheck-slot-status.patch +++ /dev/null @@ -1,77 +0,0 @@ -From ab87c0ed2a8f0a626099261a3028bc34cfac3929 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 14 Jan 2020 20:23:31 +0000 -Subject: [PATCH 5/5] xhci: recheck slot status -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200114202331.51831-3-dgilbert@redhat.com> -Patchwork-id: 93345 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] xhci: recheck slot status -Bugzilla: 1790844 -RH-Acked-by: Peter Xu -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Gerd Hoffmann - -From: Gerd Hoffmann - -Factor out slot status check into a helper function. Add an additional -check after completing transfers. This is needed in case a guest -queues multiple transfers in a row and a device unplug happens while -qemu processes them. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1786413 -Signed-off-by: Gerd Hoffmann -Reviewed-by: Philippe Mathieu-Daudé -Message-id: 20200107083606.12393-1-kraxel@redhat.com -(cherry picked from commit 236846a019c4f7aa3111026fc9a1fe09684c8978) -Signed-off-by: Danilo C. L. de Paula ---- - hw/usb/hcd-xhci.c | 15 ++++++++++++--- - 1 file changed, 12 insertions(+), 3 deletions(-) - -diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index d2b9744..646c78c 100644 ---- a/hw/usb/hcd-xhci.c -+++ b/hw/usb/hcd-xhci.c -@@ -1861,6 +1861,13 @@ static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid, - xhci_kick_epctx(epctx, streamid); - } - -+static bool xhci_slot_ok(XHCIState *xhci, int slotid) -+{ -+ return (xhci->slots[slotid - 1].uport && -+ xhci->slots[slotid - 1].uport->dev && -+ xhci->slots[slotid - 1].uport->dev->attached); -+} -+ - static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) - { - XHCIState *xhci = epctx->xhci; -@@ -1878,9 +1885,7 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) - - /* If the device has been detached, but the guest has not noticed this - yet the 2 above checks will succeed, but we must NOT continue */ -- if (!xhci->slots[epctx->slotid - 1].uport || -- !xhci->slots[epctx->slotid - 1].uport->dev || -- !xhci->slots[epctx->slotid - 1].uport->dev->attached) { -+ if (!xhci_slot_ok(xhci, epctx->slotid)) { - return; - } - -@@ -1987,6 +1992,10 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) - } else { - xhci_fire_transfer(xhci, xfer, epctx); - } -+ if (!xhci_slot_ok(xhci, epctx->slotid)) { -+ /* surprise removal -> stop processing */ -+ break; -+ } - if (xfer->complete) { - /* update ring dequeue ptr */ - xhci_set_ep_state(xhci, epctx, stctx, epctx->state); --- -1.8.3.1 - diff --git a/SOURCES/kvm-xics-Don-t-deassert-outputs.patch b/SOURCES/kvm-xics-Don-t-deassert-outputs.patch deleted file mode 100644 index 08ed724..0000000 --- a/SOURCES/kvm-xics-Don-t-deassert-outputs.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 99b6ee4b7f63ea49e5b73f61bbf68f67252f27da Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Tue, 21 Jan 2020 05:16:12 +0000 -Subject: [PATCH 02/15] xics: Don't deassert outputs -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: David Gibson -Message-id: <20200121051613.388295-3-dgibson@redhat.com> -Patchwork-id: 93430 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 2/3] xics: Don't deassert outputs -Bugzilla: 1776638 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laurent Vivier -RH-Acked-by: Thomas Huth - -From: Greg Kurz - -The correct way to do this is to deassert the input pins on the CPU side. -This is the case since a previous change. - -Signed-off-by: Greg Kurz -Message-Id: <157548862298.3650476.1228720391270249433.stgit@bahia.lan> -Signed-off-by: David Gibson -(cherry picked from commit 4febcdd88f08422a66a1aa0dc55e1472abed3c4b) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1776638 - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/intc/xics.c | 3 --- - 1 file changed, 3 deletions(-) - -diff --git a/hw/intc/xics.c b/hw/intc/xics.c -index e7ac9ba..72c5dca 100644 ---- a/hw/intc/xics.c -+++ b/hw/intc/xics.c -@@ -289,9 +289,6 @@ void icp_reset(ICPState *icp) - icp->pending_priority = 0xff; - icp->mfrr = 0xff; - -- /* Make all outputs are deasserted */ -- qemu_set_irq(icp->output, 0); -- - if (kvm_irqchip_in_kernel()) { - Error *local_err = NULL; - --- -1.8.3.1 - diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec index 22e8f20..e12e7e7 100644 --- a/SPECS/qemu-kvm.spec +++ b/SPECS/qemu-kvm.spec @@ -1,14 +1,24 @@ %global SLOF_gittagdate 20191022 + %global SLOF_gittagcommit 899d9883 %global have_usbredir 1 %global have_spice 1 %global have_opengl 1 -%global have_fdt 0 +%global have_fdt 1 %global have_gluster 1 %global have_kvm_setup 0 %global have_memlock_limits 0 + + +# Release candidate version tracking +# global rcver rc4 +%if 0%{?rcver:1} +%global rcrel .%{rcver} +%global rcstr -%{rcver} +%endif + %ifnarch %{ix86} x86_64 %global have_usbredir 0 %endif @@ -31,7 +41,6 @@ %endif %ifarch %{power64} %global kvm_target ppc64 - %global have_fdt 1 %global have_kvm_setup 1 %global have_memlock_limits 1 %endif @@ -41,20 +50,27 @@ %endif %ifarch ppc %global kvm_target ppc - %global have_fdt 1 %endif %ifarch aarch64 %global kvm_target aarch64 - %global have_fdt 1 %endif #Versions of various parts: %global requires_all_modules \ +%if %{have_spice} \ +Requires: %{name}-ui-spice = %{epoch}:%{version}-%{release} \ +%endif \ +%if %{have_opengl} \ +Requires: %{name}-ui-opengl = %{epoch}:%{version}-%{release} \ +%endif \ Requires: %{name}-block-curl = %{epoch}:%{version}-%{release} \ %if %{have_gluster} \ Requires: %{name}-block-gluster = %{epoch}:%{version}-%{release} \ %endif \ +%if %{have_usbredir} \ +Requires: %{name}-hw-usbredir = %{epoch}:%{version}-%{release} \ +%endif \ Requires: %{name}-block-iscsi = %{epoch}:%{version}-%{release} \ Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} @@ -66,8 +82,8 @@ Obsoletes: %1-rhev <= %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 4.2.0 -Release: 58%{?dist} +Version: 6.2.0 +Release: 8%{?rcrel}%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -76,7 +92,7 @@ URL: http://www.qemu.org/ ExclusiveArch: x86_64 %{power64} aarch64 s390x -Source0: http://wiki.qemu.org/download/qemu-4.2.0.tar.xz +Source0: http://wiki.qemu.org/download/qemu-6.2.0.tar.xz # KSM control scripts Source4: ksm.service @@ -104,6 +120,7 @@ Source35: udev-kvm-check.c Source36: README.tests +Patch0001: 0001-redhat-Adding-slirp-to-the-exploded-tree.patch Patch0005: 0005-Initial-redhat-build.patch Patch0006: 0006-Enable-disable-devices-for-RHEL.patch Patch0007: 0007-Machine-type-related-general-changes.patch @@ -115,1140 +132,81 @@ Patch0012: 0012-Enable-make-check.patch Patch0013: 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch Patch0014: 0014-Add-support-statement-to-help-output.patch Patch0015: 0015-globally-limit-the-maximum-number-of-CPUs.patch -Patch0016: 0016-Add-support-for-simpletrace.patch -Patch0017: 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch -Patch0018: 0018-usb-xhci-Fix-PCI-capability-order.patch -Patch0019: 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch -Patch0020: 0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch -Patch0021: 0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch -# For bz#1741345 - Remove the "cpu64-rhel6" CPU from qemu-kvm -Patch22: kvm-i386-Remove-cpu64-rhel6-CPU-model.patch -# For bz#1772774 - qemu-kvm core dump during migration+reboot ( Assertion `mem->dirty_bmap' failed ) -Patch23: kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch -# For bz#1733893 - Boot a guest with "-prom-env 'auto-boot?=false'", SLOF failed to enter the boot entry after input "boot" followed by "0 > " on VNC -Patch24: kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch -# For bz#1782678 - qemu core dump after hot-unplugging the XXV710/XL710 PF -Patch25: kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch -# For bz#1789301 - virtio-blk/scsi: fix notification suppression during AioContext polling -Patch26: kvm-virtio-don-t-enable-notifications-during-polling.patch -# For bz#1790844 - USB related fixes -Patch27: kvm-usbredir-Prevent-recursion-in-usbredir_write.patch -# For bz#1790844 - USB related fixes -Patch28: kvm-xhci-recheck-slot-status.patch -# For bz#1791568 - CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0] -Patch29: kvm-tcp_emu-Fix-oob-access.patch -# For bz#1791568 - CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0] -Patch30: kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch -# For bz#1791568 - CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0] -Patch31: kvm-slirp-use-correct-size-while-emulating-commands.patch -# For bz#1559846 - Nested KVM: limit VMX features according to CPU models - Fast Train -Patch32: kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch -# For bz#1725084 - aarch64: support dumping SVE registers -Patch33: kvm-target-arm-arch_dump-Add-SVE-notes.patch -# For bz#1779041 - netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic -Patch34: kvm-vhost-Add-names-to-section-rounded-warning.patch -# For bz#1779041 - netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic -Patch35: kvm-vhost-Only-align-sections-for-vhost-user.patch -# For bz#1779041 - netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic -Patch36: kvm-vhost-coding-style-fix.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch37: kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch38: kvm-vhost-user-fs-remove-vhostfd-property.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch39: kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch40: kvm-virtiofsd-Pull-in-upstream-headers.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch41: kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch42: kvm-virtiofsd-Add-auxiliary-.c-s.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch43: kvm-virtiofsd-Add-fuse_lowlevel.c.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch44: kvm-virtiofsd-Add-passthrough_ll.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch45: kvm-virtiofsd-Trim-down-imported-files.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch46: kvm-virtiofsd-Format-imported-files-to-qemu-style.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch47: kvm-virtiofsd-remove-mountpoint-dummy-argument.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch48: kvm-virtiofsd-remove-unused-notify-reply-support.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch49: kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch50: kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch51: kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch52: kvm-virtiofsd-Trim-out-compatibility-code.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch53: kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch54: kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch55: kvm-virtiofsd-Add-options-for-virtio.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch56: kvm-virtiofsd-add-o-source-PATH-to-help-output.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch57: kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch58: kvm-virtiofsd-Start-wiring-up-vhost-user.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch59: kvm-virtiofsd-Add-main-virtio-loop.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch60: kvm-virtiofsd-get-set-features-callbacks.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch61: kvm-virtiofsd-Start-queue-threads.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch62: kvm-virtiofsd-Poll-kick_fd-for-queue.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch63: kvm-virtiofsd-Start-reading-commands-from-queue.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch64: kvm-virtiofsd-Send-replies-to-messages.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch65: kvm-virtiofsd-Keep-track-of-replies.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch66: kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch67: kvm-virtiofsd-Fast-path-for-virtio-read.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch68: kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch69: kvm-virtiofsd-make-f-foreground-the-default.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch70: kvm-virtiofsd-add-vhost-user.json-file.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch71: kvm-virtiofsd-add-print-capabilities-option.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch72: kvm-virtiofs-Add-maintainers-entry.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch73: kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch74: kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch75: kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch76: kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch77: kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch78: kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch79: kvm-virtiofsd-validate-path-components.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch80: kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch81: kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch82: kvm-virtiofsd-add-fuse_mbuf_iter-API.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch83: kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch84: kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch85: kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch86: kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch87: kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch88: kvm-virtiofsd-sandbox-mount-namespace.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch89: kvm-virtiofsd-move-to-an-empty-network-namespace.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch90: kvm-virtiofsd-move-to-a-new-pid-namespace.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch91: kvm-virtiofsd-add-seccomp-whitelist.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch92: kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch93: kvm-virtiofsd-cap-ng-helpers.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch94: kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch95: kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch96: kvm-virtiofsd-fix-libfuse-information-leaks.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch97: kvm-virtiofsd-add-syslog-command-line-option.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch98: kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch99: kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch100: kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch101: kvm-virtiofsd-Handle-reinit.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch102: kvm-virtiofsd-Handle-hard-reboot.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch103: kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch104: kvm-vhost-user-Print-unexpected-slave-message-types.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch105: kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch106: kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch107: kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch108: kvm-virtiofsd-passthrough_ll-control-readdirplus.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch109: kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch110: kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch111: kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch112: kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch113: kvm-virtiofsd-passthrough_ll-use-hashtable.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch114: kvm-virtiofsd-Clean-up-inodes-on-destroy.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch115: kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch116: kvm-virtiofsd-fix-error-handling-in-main.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch117: kvm-virtiofsd-cleanup-allocated-resource-in-se.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch118: kvm-virtiofsd-fix-memory-leak-on-lo.source.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch119: kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch120: kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch121: kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch122: kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch123: kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch124: kvm-virtiofsd-Support-remote-posix-locks.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch125: kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch126: kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch127: kvm-virtiofsd-make-lo_release-atomic.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch128: kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch129: kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch130: kvm-libvhost-user-Fix-some-memtable-remap-cases.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch131: kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch132: kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch133: kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch134: kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch135: kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch136: kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch137: kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch138: kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch139: kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch140: kvm-virtiofsd-process-requests-in-a-thread-pool.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch141: kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch142: kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch143: kvm-virtiofsd-add-thread-pool-size-NUM-option.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch144: kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch145: kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch146: kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch147: kvm-virtiofsd-add-some-options-to-the-help-message.patch -# For bz#1776638 - Guest failed to boot up after system_reset 20 times -Patch148: kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch -# For bz#1776638 - Guest failed to boot up after system_reset 20 times -Patch149: kvm-xics-Don-t-deassert-outputs.patch -# For bz#1776638 - Guest failed to boot up after system_reset 20 times -Patch150: kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch -# For bz#1787395 - qemu-trace-stap list : TypeError: startswith first arg must be bytes or a tuple of bytes, not str -Patch151: kvm-trace-update-qemu-trace-stap-to-Python-3.patch -# For bz#1794503 - CVE-2020-1711 qemu-kvm: QEMU: block: iscsi: OOB heap access via an unexpected response of iSCSI Server [rhel-av-8.2.0] -Patch153: kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch -# For bz#1787444 - Broken postcopy migration with vTPM device -Patch154: kvm-tpm-ppi-page-align-PPI-RAM.patch -# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature -Patch155: kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch -# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature -Patch156: kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch -# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature -Patch157: kvm-tests-arm-cpu-features-Check-feature-default-values.patch -# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature -Patch158: kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch -# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature -Patch159: kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch -# For bz#1529231 - [q35] VM hangs after migration with 200 vCPUs -Patch160: kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch -# For bz#1529231 - [q35] VM hangs after migration with 200 vCPUs -Patch161: kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch -# For bz#1529231 - [q35] VM hangs after migration with 200 vCPUs -Patch162: kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch -# For bz#1779078 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) -# For bz#1787291 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) [rhel-8.1.0.z] -# For bz#1779078 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) -# For bz#1779078 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) -Patch163: kvm-i386-Resolve-CPU-models-to-v1-by-default.patch -# For bz#1781637 - qemu crashed when do mem and disk snapshot -Patch164: kvm-iotests-Support-job-complete-in-run_job.patch -# For bz#1781637 - qemu crashed when do mem and disk snapshot -Patch165: kvm-iotests-Create-VM.blockdev_create.patch -# For bz#1781637 - qemu crashed when do mem and disk snapshot -Patch166: kvm-block-Activate-recursively-even-for-already-active-n.patch -# For bz#1781637 - qemu crashed when do mem and disk snapshot -Patch167: kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch -# For bz#1781637 - qemu crashed when do mem and disk snapshot -Patch168: kvm-iotests-Test-external-snapshot-with-VM-state.patch -# For bz#1781637 - qemu crashed when do mem and disk snapshot -Patch169: kvm-iotests.py-Let-wait_migration-wait-even-more.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch170: kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch171: kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch172: kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch173: kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch174: kvm-backup-top-Begin-drain-earlier.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch175: kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch176: kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch177: kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch178: kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch -# For bz#1801320 - aarch64: backport query-cpu-model-expansion and adjvtime document fixes -Patch179: kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch -# For bz#1801320 - aarch64: backport query-cpu-model-expansion and adjvtime document fixes -Patch180: kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch -# For bz#1796240 - Enable hw accelerated cache-count-flush by default for POWER9 DD2.3 cpus -Patch181: kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch -# For bz#1798994 - CVE-2020-8608 qemu-kvm: QEMU: Slirp: potential OOB access due to unsafe snprintf() usages [rhel-av-8.2.0] -Patch182: kvm-util-add-slirp_fmt-helpers.patch -# For bz#1798994 - CVE-2020-8608 qemu-kvm: QEMU: Slirp: potential OOB access due to unsafe snprintf() usages [rhel-av-8.2.0] -Patch183: kvm-tcp_emu-fix-unsafe-snprintf-usages.patch -# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device -Patch184: kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch -# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device -Patch185: kvm-virtio-make-virtio_delete_queue-idempotent.patch -# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device -Patch186: kvm-virtio-reset-region-cache-when-on-queue-deletion.patch -# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device -Patch187: kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch -# For bz#1805334 - vhost-user/50-qemu-gpu.json is not valid JSON -Patch188: kvm-vhost-user-gpu-Drop-trailing-json-comma.patch -# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough -Patch189: kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch -# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough -Patch190: kvm-target-i386-add-a-ucode-rev-property.patch -# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough -Patch191: kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch -# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough -Patch192: kvm-target-i386-fix-TCG-UCODE_REV-access.patch -# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough -Patch193: kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch -# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough -Patch194: kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch -# For bz#1703907 - [upstream]QEMU coredump when converting to qcow2: external data file images on block devices with copy_offloading -Patch195: kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch -# For bz#1794692 - Mirror block job stops making progress -Patch196: kvm-mirror-Store-MirrorOp.co-for-debuggability.patch -# For bz#1794692 - Mirror block job stops making progress -Patch197: kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch -# For bz#1782529 - Windows Update Enablement with default smbios strings in qemu -Patch198: kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch199: kvm-migration-multifd-clean-pages-after-filling-packet.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch200: kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch201: kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch202: kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch203: kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch204: kvm-qemu-file-Don-t-do-IO-after-shutdown.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch205: kvm-migration-Don-t-send-data-if-we-have-stopped.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch206: kvm-migration-Create-migration_is_running.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch207: kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch208: kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch -# For bz#1797064 - virtiofsd: Fixes -Patch209: kvm-virtiofsd-Remove-fuse_req_getgroups.patch -# For bz#1797064 - virtiofsd: Fixes -Patch210: kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch -# For bz#1797064 - virtiofsd: Fixes -Patch211: kvm-virtiofsd-load_capng-missing-unlock.patch -# For bz#1797064 - virtiofsd: Fixes -Patch212: kvm-virtiofsd-do_read-missing-NULL-check.patch -# For bz#1797064 - virtiofsd: Fixes -Patch213: kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch -# For bz#1797064 - virtiofsd: Fixes -Patch214: kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch -# For bz#1797064 - virtiofsd: Fixes -Patch215: kvm-virtiofsd-Fix-xattr-operations.patch -# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support -Patch216: kvm-block-nbd-Fix-hang-in-.bdrv_close.patch -# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support -Patch217: kvm-block-Generic-file-creation-fallback.patch -# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support -Patch218: kvm-file-posix-Drop-hdev_co_create_opts.patch -# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support -Patch219: kvm-iscsi-Drop-iscsi_co_create_opts.patch -# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support -Patch220: kvm-iotests-Add-test-for-image-creation-fallback.patch -# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support -Patch221: kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch222: kvm-iotests-Use-complete_and_wait-in-155.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch223: kvm-block-Introduce-bdrv_reopen_commit_post-step.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch224: kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch225: kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch226: kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch227: kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch228: kvm-block-Make-bdrv_get_cumulative_perm-public.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch229: kvm-block-Relax-restrictions-for-blockdev-snapshot.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch230: kvm-iotests-Fix-run_job-with-use_log-False.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch231: kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch232: kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch233: kvm-iotests-Add-iothread-cases-to-155.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch234: kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch -# For bz#1809380 - guest hang during reboot process after migration from RHEl7.8 to RHEL8.2.0. -Patch235: kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch -# For bz#1814336 - [POWER9] QEMU migration-test triggers a kernel warning -Patch236: kvm-migration-Rate-limit-inside-host-pages.patch -# For bz#1811670 - Unneeded qemu-guest-agent dependency on pixman -Patch237: kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch -# For bz#1816007 - qemu-img convert failed to convert with block device as target -Patch238: kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch -# For bz#1816007 - qemu-img convert failed to convert with block device as target -Patch239: kvm-block-trickle-down-the-fallback-image-creation-funct.patch -# For bz#1794692 - Mirror block job stops making progress -Patch240: kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch -# For bz#1794692 - Mirror block job stops making progress -Patch241: kvm-mirror-Wait-only-for-in-flight-operations.patch -# For bz#1817621 - Crash and deadlock with block jobs when using io-threads -Patch242: kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch -# For bz#1817621 - Crash and deadlock with block jobs when using io-threads -Patch243: kvm-replication-assert-we-own-context-before-job_cancel_.patch -# For bz#1817621 - Crash and deadlock with block jobs when using io-threads -Patch244: kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch -# For bz#1817621 - Crash and deadlock with block jobs when using io-threads -Patch245: kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch -# For bz#1817621 - Crash and deadlock with block jobs when using io-threads -Patch246: kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch -# For bz#1817621 - Crash and deadlock with block jobs when using io-threads -Patch247: kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch -# For bz#1822682 - QEMU-4.2 fails to start a VM on Azure -Patch248: kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch -# For bz#1790899 - [RFE] QEMU devices should have the option to enable/disable hotplug/unplug -Patch249: kvm-pcie_root_port-Add-hotplug-disabling-option.patch -# For bz#1816793 - 'edid' compat handling missing for virtio-gpu-ccw -Patch250: kvm-compat-disable-edid-for-virtio-gpu-ccw.patch -# For bz#1820531 - qmp command query-pci get wrong result after hotplug device under hotplug=off controller -Patch251: kvm-hw-pci-pcie-Forbid-hot-plug-if-it-s-disabled-on-the-.patch -# For bz#1820531 - qmp command query-pci get wrong result after hotplug device under hotplug=off controller -Patch252: kvm-hw-pci-pcie-Replace-PCI_DEVICE-casts-with-existing-v.patch -# For bz#1817445 - CVE-2020-10717 virt:8.2/qemu-kvm: QEMU: virtiofsd: guest may open maximum file descriptor to cause DoS [rhel-av-8] -Patch253: kvm-tools-virtiofsd-passthrough_ll-Fix-double-close.patch -# For bz#1817445 - CVE-2020-10717 virt:8.2/qemu-kvm: QEMU: virtiofsd: guest may open maximum file descriptor to cause DoS [rhel-av-8] -Patch254: kvm-virtiofsd-add-rlimit-nofile-NUM-option.patch -# For bz#1817445 - CVE-2020-10717 virt:8.2/qemu-kvm: QEMU: virtiofsd: guest may open maximum file descriptor to cause DoS [rhel-av-8] -Patch255: kvm-virtiofsd-stay-below-fs.file-max-sysctl-value-CVE-20.patch -# For bz#1817445 - CVE-2020-10717 virt:8.2/qemu-kvm: QEMU: virtiofsd: guest may open maximum file descriptor to cause DoS [rhel-av-8] -Patch256: kvm-virtiofsd-jail-lo-proc_self_fd.patch -# For bz#1817445 - CVE-2020-10717 virt:8.2/qemu-kvm: QEMU: virtiofsd: guest may open maximum file descriptor to cause DoS [rhel-av-8] -Patch257: kvm-virtiofsd-Show-submounts.patch -# For bz#1817445 - CVE-2020-10717 virt:8.2/qemu-kvm: QEMU: virtiofsd: guest may open maximum file descriptor to cause DoS [rhel-av-8] -Patch258: kvm-virtiofsd-only-retain-file-system-capabilities.patch -# For bz#1817445 - CVE-2020-10717 virt:8.2/qemu-kvm: QEMU: virtiofsd: guest may open maximum file descriptor to cause DoS [rhel-av-8] -Patch259: kvm-virtiofsd-drop-all-capabilities-in-the-wait-parent-p.patch -# For bz#1775462 - Creating luks-inside-qcow2 images with cluster_size=2k/4k will get a corrupted image -Patch260: kvm-block-always-fill-entire-LUKS-header-space-with-zero.patch -# For bz#1600217 - [Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train -Patch261: kvm-numa-remove-not-needed-check.patch -# For bz#1600217 - [Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train -Patch262: kvm-numa-properly-check-if-numa-is-supported.patch -# For bz#1600217 - [Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train -Patch263: kvm-numa-Extend-CLI-to-provide-initiator-information-for.patch -# For bz#1600217 - [Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train -Patch264: kvm-numa-Extend-CLI-to-provide-memory-latency-and-bandwi.patch -# For bz#1600217 - [Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train -Patch265: kvm-numa-Extend-CLI-to-provide-memory-side-cache-informa.patch -# For bz#1600217 - [Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train -Patch266: kvm-hmat-acpi-Build-Memory-Proximity-Domain-Attributes-S.patch -# For bz#1600217 - [Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train -Patch267: kvm-hmat-acpi-Build-System-Locality-Latency-and-Bandwidt.patch -# For bz#1600217 - [Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train -Patch268: kvm-hmat-acpi-Build-Memory-Side-Cache-Information-Struct.patch -# For bz#1600217 - [Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train -Patch269: kvm-tests-numa-Add-case-for-QMP-build-HMAT.patch -# For bz#1600217 - [Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train -Patch270: kvm-tests-bios-tables-test-add-test-cases-for-ACPI-HMAT.patch -# For bz#1600217 - [Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train -Patch271: kvm-ACPI-add-expected-files-for-HMAT-tests-acpihmat.patch -# For bz#1813940 - CVE-2020-10702 virt:8.1/qemu-kvm: qemu: weak signature generation in Pointer Authentication support for ARM [rhel-av-8] -Patch272: kvm-target-arm-Fix-PAuth-sbox-functions.patch -# For bz#1749737 - CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-av-8] -Patch273: kvm-Don-t-leak-memory-when-reallocation-fails.patch -# For bz#1749737 - CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-av-8] -Patch274: kvm-Replace-remaining-malloc-free-user-with-glib.patch -# For bz#1839030 - RFE: enable the "memfd" memory backend -Patch275: kvm-Revert-RHEL-disable-hostmem-memfd.patch -# For bz#1827630 - volume creation leaving uncleaned stuff behind on error (vol-clone/libvirt/qemu-kvm) -Patch276: kvm-block-introducing-bdrv_co_delete_file-interface.patch -# For bz#1827630 - volume creation leaving uncleaned stuff behind on error (vol-clone/libvirt/qemu-kvm) -Patch277: kvm-block.c-adding-bdrv_co_delete_file.patch -# For bz#1827630 - volume creation leaving uncleaned stuff behind on error (vol-clone/libvirt/qemu-kvm) -Patch278: kvm-crypto.c-cleanup-created-file-when-block_crypto_co_c.patch -# For bz#1513681 - [Intel 8.2.1 Feat] qemu-kvm PT VMX -- Fast Train -Patch279: kvm-target-i386-set-the-CPUID-level-to-0x14-on-old-machi.patch -# For bz#1841038 - qemu-img: /var/tmp/v2vovl56bced.qcow2: CURL: Error opening file: Server does not support 'range' (byte ranges) with HTTP/2 server in VMware ESXi 7 -Patch280: kvm-block-curl-HTTP-header-fields-allow-whitespace-aroun.patch -# For bz#1841038 - qemu-img: /var/tmp/v2vovl56bced.qcow2: CURL: Error opening file: Server does not support 'range' (byte ranges) with HTTP/2 server in VMware ESXi 7 -Patch281: kvm-block-curl-HTTP-header-field-names-are-case-insensit.patch -# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert -# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 -Patch282: kvm-MAINTAINERS-fix-qcow2-bitmap.c-under-Dirty-Bitmaps-h.patch -# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert -# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 -Patch283: kvm-iotests-Let-_make_test_img-parse-its-parameters.patch -# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert -# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 -Patch284: kvm-qemu_img-add-cvtnum_full-to-print-error-reports.patch -# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert -# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 -Patch285: kvm-block-Make-it-easier-to-learn-which-BDS-support-bitm.patch -# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert -# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 -Patch286: kvm-blockdev-Promote-several-bitmap-functions-to-non-sta.patch -# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert -# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 -Patch287: kvm-blockdev-Split-off-basic-bitmap-operations-for-qemu-.patch -# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert -# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 -Patch288: kvm-qemu-img-Add-bitmap-sub-command.patch -# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert -# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 -Patch289: kvm-iotests-Fix-test-178.patch -# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert -# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 -Patch290: kvm-qcow2-Expose-bitmaps-size-during-measure.patch -# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert -# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 -Patch291: kvm-qemu-img-Factor-out-code-for-merging-bitmaps.patch -# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert -# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 -Patch292: kvm-qemu-img-Add-convert-bitmaps-option.patch -# For bz#1779893 - RFE: Copy bitmaps with qemu-img convert -# For bz#1779904 - RFE: ability to estimate bitmap space utilization for qcow2 -Patch293: kvm-iotests-Add-test-291-to-for-qemu-img-bitmap-coverage.patch -# For bz#1778593 - Qemu coredump when backup to a existing small size image -Patch294: kvm-iotests-Add-more-skip_if_unsupported-statements-to-t.patch -# For bz#1778593 - Qemu coredump when backup to a existing small size image -Patch295: kvm-iotests-don-t-use-format-for-drive_add.patch -# For bz#1778593 - Qemu coredump when backup to a existing small size image -Patch296: kvm-iotests-055-refactor-compressed-backup-to-vmdk.patch -# For bz#1778593 - Qemu coredump when backup to a existing small size image -Patch297: kvm-iotests-055-skip-vmdk-target-tests-if-vmdk-is-not-wh.patch -# For bz#1778593 - Qemu coredump when backup to a existing small size image -Patch298: kvm-backup-Improve-error-for-bdrv_getlength-failure.patch -# For bz#1778593 - Qemu coredump when backup to a existing small size image -Patch299: kvm-backup-Make-sure-that-source-and-target-size-match.patch -# For bz#1778593 - Qemu coredump when backup to a existing small size image -Patch300: kvm-iotests-Backup-with-different-source-target-size.patch -# For bz#1778593 - Qemu coredump when backup to a existing small size image -Patch301: kvm-iotests-109-Don-t-mirror-with-mismatched-size.patch -# For bz#1778593 - Qemu coredump when backup to a existing small size image -Patch302: kvm-iotests-229-Use-blkdebug-to-inject-an-error.patch -# For bz#1778593 - Qemu coredump when backup to a existing small size image -Patch303: kvm-mirror-Make-sure-that-source-and-target-size-match.patch -# For bz#1778593 - Qemu coredump when backup to a existing small size image -Patch304: kvm-iotests-Mirror-with-different-source-target-size.patch -# For bz#1841068 - RFE: please support the "ramfb" display device model -Patch305: kvm-enable-ramfb.patch -# For bz#1780574 - Data corruption with resizing short overlay over longer backing files -Patch306: kvm-block-Add-flags-to-BlockDriver.bdrv_co_truncate.patch -# For bz#1780574 - Data corruption with resizing short overlay over longer backing files -Patch307: kvm-block-Add-flags-to-bdrv-_co-_truncate.patch -# For bz#1780574 - Data corruption with resizing short overlay over longer backing files -Patch308: kvm-block-backend-Add-flags-to-blk_truncate.patch -# For bz#1780574 - Data corruption with resizing short overlay over longer backing files -Patch309: kvm-qcow2-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch -# For bz#1780574 - Data corruption with resizing short overlay over longer backing files -Patch310: kvm-raw-format-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch -# For bz#1780574 - Data corruption with resizing short overlay over longer backing files -Patch311: kvm-file-posix-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch -# For bz#1780574 - Data corruption with resizing short overlay over longer backing files -Patch312: kvm-block-truncate-Don-t-make-backing-file-data-visible.patch -# For bz#1780574 - Data corruption with resizing short overlay over longer backing files -Patch313: kvm-iotests-Add-qemu_io_log.patch -# For bz#1780574 - Data corruption with resizing short overlay over longer backing files -Patch314: kvm-iotests-Filter-testfiles-out-in-filter_img_info.patch -# For bz#1780574 - Data corruption with resizing short overlay over longer backing files -Patch315: kvm-iotests-Test-committing-to-short-backing-file.patch -# For bz#1780574 - Data corruption with resizing short overlay over longer backing files -Patch316: kvm-qcow2-Forward-ZERO_WRITE-flag-for-full-preallocation.patch -# For bz#1769912 - [Intel 8.2.1 Feature] introduce Cooper Lake cpu model - qemu-kvm Fast Train -Patch317: kvm-i386-Add-MSR-feature-bit-for-MDS-NO.patch -# For bz#1769912 - [Intel 8.2.1 Feature] introduce Cooper Lake cpu model - qemu-kvm Fast Train -Patch318: kvm-i386-Add-macro-for-stibp.patch -# For bz#1769912 - [Intel 8.2.1 Feature] introduce Cooper Lake cpu model - qemu-kvm Fast Train -Patch319: kvm-target-i386-Add-new-bit-definitions-of-MSR_IA32_ARCH.patch -# For bz#1769912 - [Intel 8.2.1 Feature] introduce Cooper Lake cpu model - qemu-kvm Fast Train -Patch320: kvm-i386-Add-new-CPU-model-Cooperlake.patch -# For bz#1769912 - [Intel 8.2.1 Feature] introduce Cooper Lake cpu model - qemu-kvm Fast Train -Patch321: kvm-target-i386-Add-missed-features-to-Cooperlake-CPU-mo.patch -# For bz#1845384 - CVE-2020-10761 virt:8.2/qemu-kvm: QEMU: nbd: reachable assertion failure in nbd_negotiate_send_rep_verr via remote client [rhel-av-8] -Patch322: kvm-nbd-server-Avoid-long-error-message-assertions-CVE-2.patch -# For bz#1845384 - CVE-2020-10761 virt:8.2/qemu-kvm: QEMU: nbd: reachable assertion failure in nbd_negotiate_send_rep_verr via remote client [rhel-av-8] -Patch323: kvm-block-Call-attention-to-truncation-of-long-NBD-expor.patch -# For bz#1820531 - qmp command query-pci get wrong result after hotplug device under hotplug=off controller -Patch324: kvm-hw-pci-pcie-Move-hot-plug-capability-check-to-pre_pl.patch -# For bz#1840342 - [Intel 8.2.1 Bug] qemu-kvm Add ARCH_CAPABILITIES to Icelake-Server cpu model - Fast Train -Patch325: kvm-target-i386-Add-ARCH_CAPABILITIES-related-bits-into-.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch327: kvm-linux-headers-update-kvm.h.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch328: kvm-s390x-Don-t-do-a-normal-reset-on-the-initial-cpu.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch329: kvm-s390x-Move-reset-normal-to-shared-reset-handler.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch330: kvm-s390x-Move-initial-reset.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch331: kvm-s390x-Move-clear-reset.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch332: kvm-s390x-Beautify-diag308-handling.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch333: kvm-s390x-kvm-Make-kvm_sclp_service_call-void.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch334: kvm-s390x-Fix-cpu-normal-reset-ri-clearing.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch335: kvm-tests-boot-sector-Fix-the-bad-s390x-assembler-code.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch336: kvm-pc-bios-s390x-Fix-reset-psw-mask.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch337: kvm-s390x-Properly-fetch-and-test-the-short-psw-on-diag3.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch338: kvm-s390x-Rename-and-use-constants-for-short-PSW-address.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch339: kvm-s390x-Add-missing-vcpu-reset-functions.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch340: kvm-s390-sclp-improve-special-wait-psw-logic.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch341: kvm-pc-bios-s390x-Save-iplb-location-in-lowcore.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch342: kvm-s390-ipl-sync-back-loadparm.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch343: kvm-s390-ipl-fix-off-by-one-in-update_machine_ipl_proper.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch344: kvm-s390x-ipl-Consolidate-iplb-validity-check-into-one-f.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch345: kvm-vhost-correctly-turn-on-VIRTIO_F_IOMMU_PLATFORM.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch346: kvm-s390x-Move-diagnose-308-subcodes-and-rcs-into-ipl.h.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch347: kvm-s390x-protvirt-Support-unpack-facility.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch348: kvm-s390x-protvirt-Add-migration-blocker.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch349: kvm-s390x-protvirt-Inhibit-balloon-when-switching-to-pro.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch350: kvm-s390x-protvirt-KVM-intercept-changes.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch351: kvm-s390x-Add-SIDA-memory-ops.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch352: kvm-s390x-protvirt-Move-STSI-data-over-SIDAD.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch353: kvm-s390x-protvirt-SCLP-interpretation.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch354: kvm-s390x-protvirt-Set-guest-IPL-PSW.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch355: kvm-s390x-protvirt-Move-diag-308-data-over-SIDA.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch356: kvm-s390x-protvirt-Disable-address-checks-for-PV-guest-I.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch357: kvm-s390x-protvirt-Move-IO-control-structures-over-SIDA.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch358: kvm-s390x-protvirt-Handle-SIGP-store-status-correctly.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch359: kvm-s390x-Add-unpack-facility-feature-to-GA1.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch360: kvm-s390x-protvirt-Fix-stray-error_report_err-in-s390_ma.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch361: kvm-s390x-pv-Retry-ioctls-on-EINTR.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch362: kvm-s390x-s390-virtio-ccw-Fix-build-on-systems-without-K.patch -# For bz#1828317 - [IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part -Patch363: kvm-s390x-pv-Fix-KVM_PV_PREP_RESET-command-wrapper-name.patch -# For bz#1756946 - [zKVM] Re-enable KVM_CAP_S390_AIS for new machine types -Patch364: kvm-spapr-Pass-the-maximum-number-of-vCPUs-to-the-KVM-in.patch -# For bz#1756946 - [zKVM] Re-enable KVM_CAP_S390_AIS for new machine types -Patch365: kvm-introduce-kvm_kernel_irqchip_-functions.patch -# For bz#1756946 - [zKVM] Re-enable KVM_CAP_S390_AIS for new machine types -Patch366: kvm-target-s390x-kvm-Enable-adapter-interruption-suppres.patch -# For bz#1823275 - RHEL8.1 - GPU Numa nodes not visible in guest post the pass-through. -Patch367: kvm-vfio-nvlink-Remove-exec-permission-to-avoid-SELinux-.patch -# For bz#1660916 - [IBM 8.3 FEAT] KVM s390x: DASD passthrough support - qemu part -Patch368: kvm-vfio-ccw-Fix-error-message.patch -# For bz#1660916 - [IBM 8.3 FEAT] KVM s390x: DASD passthrough support - qemu part -Patch369: kvm-vfio-ccw-allow-non-prefetch-ORBs.patch -# For bz#1660916 - [IBM 8.3 FEAT] KVM s390x: DASD passthrough support - qemu part -Patch370: kvm-linux-headers-support-vfio-ccw-features.patch -# For bz#1660916 - [IBM 8.3 FEAT] KVM s390x: DASD passthrough support - qemu part -Patch371: kvm-vfio-ccw-Refactor-cleanup-of-regions.patch -# For bz#1660916 - [IBM 8.3 FEAT] KVM s390x: DASD passthrough support - qemu part -Patch372: kvm-vfio-ccw-Add-support-for-the-schib-region.patch -# For bz#1660916 - [IBM 8.3 FEAT] KVM s390x: DASD passthrough support - qemu part -Patch373: kvm-vfio-ccw-Refactor-ccw-irq-handler.patch -# For bz#1660916 - [IBM 8.3 FEAT] KVM s390x: DASD passthrough support - qemu part -Patch374: kvm-s390x-css-Refactor-the-css_queue_crw-routine.patch -# For bz#1660916 - [IBM 8.3 FEAT] KVM s390x: DASD passthrough support - qemu part -Patch375: kvm-vfio-ccw-Add-support-for-the-CRW-region-and-IRQ.patch -# For bz#1660916 - [IBM 8.3 FEAT] KVM s390x: DASD passthrough support - qemu part -Patch376: kvm-config-enable-VFIO_CCW.patch -Patch377: kvm-virtio-blk-Refactor-the-code-that-processes-queued-r.patch -Patch378: kvm-virtio-blk-On-restart-process-queued-requests-in-the.patch -# For bz#1838070 - CVE-2020-1983 virt:rhel/qemu-kvm: QEMU: slirp: use-after-free in ip_reass() function in ip_input.c [rhel-8] -Patch379: kvm-Fix-use-afte-free-in-ip_reass-CVE-2020-1983.patch -# For bz#1835390 - qemu promote host does not support 'EDX.npt' and 'EDX.nrip-save' when test with Q35 machine type on EPYC host -Patch380: kvm-i386-Mask-SVM-features-if-nested-SVM-is-disabled.patch -# For bz#1854092 - kvm-unit-tests: tcg smp FAIL -Patch381: kvm-s390x-sigp-Fix-sense-running-reporting.patch -# For bz#1854092 - kvm-unit-tests: tcg smp FAIL -Patch382: kvm-s390x-tcg-clear-local-interrupts-on-reset-normal.patch -Patch383: kvm-virtio-net-fix-removal-of-failover-device.patch -# For bz#1807057 - qcow2_alloc_cluster_abort() frees preallocated zero clusters -Patch384: kvm-qcow2-Fix-alloc_cluster_abort-for-pre-existing-clust.patch -# For bz#1807057 - qcow2_alloc_cluster_abort() frees preallocated zero clusters -Patch385: kvm-iotests-026-Test-EIO-on-preallocated-zero-cluster.patch -# For bz#1807057 - qcow2_alloc_cluster_abort() frees preallocated zero clusters -Patch386: kvm-iotests-026-Test-EIO-on-allocation-in-a-data-file.patch -# For bz#1807057 - qcow2_alloc_cluster_abort() frees preallocated zero clusters -Patch387: kvm-iotests-026-Move-v3-exclusive-test-to-new-file.patch -# For bz#1780385 - [RFE] AMD EPYC-Rome support for KVM / QEMU guest -Patch388: kvm-i386-Add-2nd-Generation-AMD-EPYC-processors.patch -# For bz#1689341 - QEMU should report an error and return failure if AMD SEV is not enabled in the kernel -Patch389: kvm-target-i386-sev-provide-proper-error-reporting-for-q.patch -# For bz#1689341 - QEMU should report an error and return failure if AMD SEV is not enabled in the kernel -Patch390: kvm-target-i386-sev-fail-query-sev-capabilities-if-QEMU-.patch -# For bz#1863034 - RHEL8.3 Beta - Secure Execution: Unable to start Qemu with "-no-reboot" option (qemu-kvm) -Patch391: kvm-s390x-protvirt-allow-to-IPL-secure-guests-with-no-re.patch -# For bz#1869710 - CVE-2020-14364 qemu-kvm: QEMU: usb: out-of-bounds r/w access issue while processing usb packets [rhel-8.3.0] -Patch392: kvm-usb-fix-setup_len-init-CVE-2020-14364.patch -# For bz#1755075 - [qemu-guest-agent] fsinfo doesn't return disk info on s390x -Patch393: kvm-qga-commands-posix-Rework-build_guest_fsinfo_for_rea.patch -# For bz#1755075 - [qemu-guest-agent] fsinfo doesn't return disk info on s390x -Patch394: kvm-qga-commands-posix-Move-the-udev-code-from-the-pci-t.patch -# For bz#1755075 - [qemu-guest-agent] fsinfo doesn't return disk info on s390x -Patch395: kvm-qga-commands-posix-Support-fsinfo-for-non-PCI-virtio.patch -# For bz#1874780 - -prom-env does not validate input -Patch396: kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch -# For bz#1846975 - Failed to boot up a s390x guest with virtio-blk-ccw if attaching a virtio-scsi-ccw bus in previous -Patch397: kvm-pc-bios-s390-ccw-Makefile-Compile-with-std-gnu99-fwr.patch -# For bz#1846975 - Failed to boot up a s390x guest with virtio-blk-ccw if attaching a virtio-scsi-ccw bus in previous -Patch398: kvm-pc-bios-s390-ccw-Move-ipl-related-code-from-main-int.patch -# For bz#1846975 - Failed to boot up a s390x guest with virtio-blk-ccw if attaching a virtio-scsi-ccw bus in previous -Patch399: kvm-pc-bios-s390-ccw-Introduce-ENODEV-define-and-remove-.patch -# For bz#1846975 - Failed to boot up a s390x guest with virtio-blk-ccw if attaching a virtio-scsi-ccw bus in previous -Patch400: kvm-pc-bios-s390-ccw-Move-the-inner-logic-of-find_subch-.patch -# For bz#1846975 - Failed to boot up a s390x guest with virtio-blk-ccw if attaching a virtio-scsi-ccw bus in previous -Patch401: kvm-pc-bios-s390-ccw-Do-not-bail-out-early-if-not-findin.patch -# For bz#1846975 - Failed to boot up a s390x guest with virtio-blk-ccw if attaching a virtio-scsi-ccw bus in previous -Patch402: kvm-pc-bios-s390-ccw-Scan-through-all-devices-if-no-boot.patch -# For bz#1846975 - Failed to boot up a s390x guest with virtio-blk-ccw if attaching a virtio-scsi-ccw bus in previous -Patch403: kvm-pc-bios-s390-ccw-Allow-booting-in-case-the-first-vir.patch -# For bz#1846975 - Failed to boot up a s390x guest with virtio-blk-ccw if attaching a virtio-scsi-ccw bus in previous -Patch404: kvm-pc-bios-s390-ccw-main-Remove-superfluous-call-to-ena.patch -# For bz#1846975 - Failed to boot up a s390x guest with virtio-blk-ccw if attaching a virtio-scsi-ccw bus in previous -Patch405: kvm-aio-posix-completely-stop-polling-when-disabled.patch -# For bz#1884531 - qemu-ga aborts after guest-shutdown command -Patch406: kvm-qga-fix-assert-regression-on-guest-shutdown.patch -# For bz#1857733 - [IBM 8.4 FEAT] KVM: Add support for virtio-fs on s390x - qemu part -Patch407: kvm-libvhost-user-handle-endianness-as-mandated-by-the-s.patch -# For bz#1857733 - [IBM 8.4 FEAT] KVM: Add support for virtio-fs on s390x - qemu part -Patch408: kvm-virtio-add-vhost-user-fs-ccw-device.patch -# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part -Patch410: kvm-s390x-sclp.c-remove-unneeded-label-in-sclp_service_c.patch -# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part -Patch411: kvm-s390-sclp-get-machine-once-during-read-scp-cpu-info.patch -# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part -Patch412: kvm-s390-sclp-rework-sclp-boundary-checks.patch -# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part -Patch413: kvm-s390-sclp-read-sccb-from-mem-based-on-provided-lengt.patch -# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part -Patch414: kvm-s390-sclp-check-sccb-len-before-filling-in-data.patch -# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part -Patch415: kvm-s390-sclp-use-cpu-offset-to-locate-cpu-entries.patch -# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part -Patch416: kvm-s390-sclp-add-extended-length-sccb-support-for-kvm-g.patch -# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part -Patch417: kvm-linux-headers-Partial-update-against-Linux-5.9-rc4.patch -# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part -Patch418: kvm-misc-Replace-zero-length-arrays-with-flexible-array-.patch -# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part -Patch419: kvm-s390-guest-support-for-diagnose-0x318.patch -# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part -Patch420: kvm-s390x-pv-Remove-sclp-boundary-checks.patch -# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part -Patch421: kvm-s390x-pv-Fix-diag318-PV-fencing.patch -# For bz#1659412 - [IBM 8.4 FEAT] KVM enablement for enhanced hardware diagnose data of guest kernel on s390x - qemu part -Patch422: kvm-s390-kvm-fix-diag318-propagation-and-reset-functiona.patch -# For bz#1898700 - qemu-kvm for RHEL-8.4 doesn't build due to a possible incompatibility with systemtap-sdt-devel-4.4-1 -Patch423: kvm-trace-use-STAP_SDT_V2-to-work-around-symbol-visibili.patch -# For bz#1860994 - CVE-2020-16092 virt:rhel/qemu-kvm: QEMU: reachable assertion failure in net_tx_pkt_add_raw_fragment() in hw/net/net_tx_pkt.c [rhel-8] -Patch424: kvm-hw-net-net_tx_pkt-fix-assertion-failure-in-net_tx_pk.patch -# For bz#1880546 - qemu use SCMP_ACT_TRAP even SCMP_ACT_KILL_PROCESS is available -Patch425: kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch -# For bz#1903135 - RHEL8.3 - KVM Distro install to vfio_ccw backed DASD gets error at the reboot step (qemu-kvm) -Patch426: kvm-pc-bios-s390x-Rename-PSW_MASK_ZMODE-to-PSW_MASK_64.patch -# For bz#1903135 - RHEL8.3 - KVM Distro install to vfio_ccw backed DASD gets error at the reboot step (qemu-kvm) -Patch427: kvm-pc-bios-s390x-Use-PSW-masks-where-possible-and-intro.patch -# For bz#1903135 - RHEL8.3 - KVM Distro install to vfio_ccw backed DASD gets error at the reboot step (qemu-kvm) -Patch428: kvm-pc-bios-s390x-Ensure-Read-IPL-memory-is-clean.patch -# For bz#1903135 - RHEL8.3 - KVM Distro install to vfio_ccw backed DASD gets error at the reboot step (qemu-kvm) -Patch429: kvm-pc-bios-s390x-Clear-out-leftover-S390EP-string.patch -# For bz#1901837 - Failed to hotunplug pc-dimm device -Patch430: kvm-ppc-spapr-Add-hotremovable-flag-on-DIMM-LMBs-on-drme.patch -# For bz#1901837 - Failed to hotunplug pc-dimm device -Patch431: kvm-ppc-spapr-re-assert-IRQs-during-event-scan-if-there-.patch -# For bz#1902237 - CVE-2020-29129 CVE-2020-29130 virt:rhel/qemu-kvm: QEMU: slirp: out-of-bounds access while processing ARP/NCSI packets [rhel-8] -Patch432: kvm-slirp-check-pkt_len-before-reading-protocol-header.patch -# For bz#1905386 - RHEL8.3 - s390x/s390-virtio-ccw: Reset PCI devices during subsystem reset (qemu-kvm) -Patch433: kvm-s390x-s390-virtio-ccw-Reset-PCI-devices-during-subsy.patch -# For bz#1859494 - Report logical_name for disks without mounted file-system -Patch434: kvm-qapi-enable-use-of-g_autoptr-with-QAPI-types.patch -# For bz#1859494 - Report logical_name for disks without mounted file-system -Patch435: kvm-error-Fix-examples-in-error.h-s-big-comment.patch -# For bz#1859494 - Report logical_name for disks without mounted file-system -Patch436: kvm-error-Improve-error.h-s-big-comment.patch -# For bz#1859494 - Report logical_name for disks without mounted file-system -Patch437: kvm-error-Document-Error-API-usage-rules.patch -# For bz#1859494 - Report logical_name for disks without mounted file-system -Patch438: kvm-error-New-macro-ERRP_GUARD.patch -# For bz#1859494 - Report logical_name for disks without mounted file-system -Patch439: kvm-qga-add-command-guest-get-disks.patch -# For bz#1859494 - Report logical_name for disks without mounted file-system -Patch440: kvm-qga-add-implementation-of-guest-get-disks-for-Linux.patch -# For bz#1859494 - Report logical_name for disks without mounted file-system -Patch441: kvm-qga-add-implementation-of-guest-get-disks-for-Window.patch -# For bz#1859494 - Report logical_name for disks without mounted file-system -Patch442: kvm-qga-fix-missing-closedir-in-qmp_guest_get_disks.patch -# For bz#1859494 - Report logical_name for disks without mounted file-system -Patch443: kvm-qga-update-schema-for-guest-get-disks-dependents-fie.patch -# For bz#1910267 - There is no soft link '/etc/qemu-kvm/fsfreeze-hook' -Patch444: kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch -# For bz#1910326 - Incorrect hostname returned by qga command 'guest-get-host-name' -Patch445: kvm-qga-rename-Error-parameter-to-more-common-errp.patch -# For bz#1910326 - Incorrect hostname returned by qga command 'guest-get-host-name' -Patch446: kvm-util-Introduce-qemu_get_host_name.patch -# For bz#1910326 - Incorrect hostname returned by qga command 'guest-get-host-name' -Patch447: kvm-qga-Use-qemu_get_host_name-instead-of-g_get_host_nam.patch -# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. -Patch449: kvm-hw-arm-smmu-common-Factorize-some-code-in-smmu_ptw_6.patch -# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. -Patch450: kvm-hw-arm-smmu-common-Add-IOTLB-helpers.patch -# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. -Patch451: kvm-hw-arm-smmu-Introduce-smmu_get_iotlb_key.patch -# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. -Patch452: kvm-hw-arm-smmu-Introduce-SMMUTLBEntry-for-PTW-and-IOTLB.patch -# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. -Patch453: kvm-hw-arm-smmu-common-Manage-IOTLB-block-entries.patch -# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. -Patch454: kvm-hw-arm-smmuv3-Introduce-smmuv3_s1_range_inval-helper.patch -# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. -Patch455: kvm-hw-arm-smmuv3-Get-prepared-for-range-invalidation.patch -# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. -Patch456: kvm-hw-arm-smmuv3-Fix-potential-integer-overflow-CID-143.patch -# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. -Patch457: kvm-memory-Rename-memory_region_notify_one-to-memory_reg.patch -# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. -Patch458: kvm-memory-Add-IOMMUTLBEvent.patch -# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. -Patch459: kvm-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch -# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. -Patch460: kvm-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch -# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. -Patch461: kvm-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch -# For bz#1904393 - CVE-2020-27821 virt:rhel/qemu-kvm: QEMU: heap buffer overflow in msix_table_mmio_write() in hw/pci/msix.c [rhel-8] -Patch462: kvm-memory-clamp-cached-translation-in-case-it-points-to.patch -# For bz#1898628 - CVE-2020-25723 virt:rhel/qemu-kvm: QEMU: assertion failure through usb_packet_unmap() in hw/usb/hcd-ehci.c [rhel-8] -Patch463: kvm-hw-ehci-check-return-value-of-usb_packet_map.patch -# For bz#1903070 - CVE-2020-25707 CVE-2020-28916 virt:rhel/qemu-kvm: various flaws [rhel-8] -Patch464: kvm-hw-net-e1000e-advance-desc_offset-in-case-of-null-de.patch -# For bz#1905391 - RHEL8.4 - s390x/pci: Honor vfio DMA limiting (qemu-kvm) -Patch465: kvm-linux-headers-add-vfio-DMA-available-capability.patch -# For bz#1905391 - RHEL8.4 - s390x/pci: Honor vfio DMA limiting (qemu-kvm) -Patch466: kvm-s390x-pci-Move-header-files-to-include-hw-s390x.patch -# For bz#1905391 - RHEL8.4 - s390x/pci: Honor vfio DMA limiting (qemu-kvm) -Patch467: kvm-vfio-Create-shared-routine-for-scanning-info-capabil.patch -# For bz#1905391 - RHEL8.4 - s390x/pci: Honor vfio DMA limiting (qemu-kvm) -Patch468: kvm-vfio-Find-DMA-available-capability.patch -# For bz#1905391 - RHEL8.4 - s390x/pci: Honor vfio DMA limiting (qemu-kvm) -Patch469: kvm-s390x-pci-Add-routine-to-get-the-vfio-dma-available-.patch -# For bz#1905391 - RHEL8.4 - s390x/pci: Honor vfio DMA limiting (qemu-kvm) -Patch470: kvm-s390x-pci-Honor-DMA-limits-set-by-vfio.patch -# For bz#1905391 - RHEL8.4 - s390x/pci: Honor vfio DMA limiting (qemu-kvm) -Patch471: kvm-s390x-fix-build-for-without-default-devices.patch -# For bz#1918054 - CVE-2020-10756 virt:rhel/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-8.4.0] -Patch472: kvm-Drop-bogus-IPv6-messages.patch -# For bz#1901837 - Failed to hotunplug pc-dimm device -Patch473: kvm-spapr-Improve-handling-of-fdt-buffer-size.patch -# For bz#1901837 - Failed to hotunplug pc-dimm device -Patch474: kvm-spapr-Fold-h_cas_compose_response-into-h_client_arch.patch -# For bz#1901837 - Failed to hotunplug pc-dimm device -Patch475: kvm-spapr-Don-t-use-spapr_drc_needed-in-CAS-code.patch -# For bz#1901837 - Failed to hotunplug pc-dimm device -Patch476: kvm-spapr-Fix-handling-of-unplugged-devices-during-CAS-a.patch -# For bz#1901837 - Failed to hotunplug pc-dimm device -Patch477: kvm-spapr-Allow-memory-unplug-to-always-succeed.patch -# For bz#1901837 - Failed to hotunplug pc-dimm device -Patch478: kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch -# For bz#1834281 - qemu-img convert abort when converting image with unaligned size -Patch479: kvm-block-Require-aligned-image-size-to-avoid-assertion-.patch -# For bz#1834281 - qemu-img convert abort when converting image with unaligned size -Patch480: kvm-file-posix-Allow-byte-aligned-O_DIRECT-with-NFS.patch -# For bz#1912974 - CVE-2020-11947 virt:rhel/qemu-kvm: QEMU: heap buffer overflow in iscsi_aio_ioctl_cb() in block/iscsi.c may lead to information disclosure [rhel-8] -Patch481: kvm-block-iscsi-fix-heap-buffer-overflow-in-iscsi_aio_io.patch -# For bz#1919111 - CVE-2020-35517 virt:rhel/qemu-kvm: QEMU: virtiofsd: potential privileged host device access from guest [rhel-8.4.0] -Patch482: kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch -# For bz#1919111 - CVE-2020-35517 virt:rhel/qemu-kvm: QEMU: virtiofsd: potential privileged host device access from guest [rhel-8.4.0] -Patch483: kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch -# For bz#1919111 - CVE-2020-35517 virt:rhel/qemu-kvm: QEMU: virtiofsd: potential privileged host device access from guest [rhel-8.4.0] -Patch484: kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch -# For bz#1912891 - [ppc64le] --disk cdimage.iso,bus=usb fails to boot -Patch486: kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch -# For bz#1790620 - [RFE] AMD Milan - Add KVM/support for EPYC-Milan CPU Model - Slow Train -Patch487: kvm-x86-cpu-Enable-AVX512_VP2INTERSECT-cpu-feature.patch -# For bz#1790620 - [RFE] AMD Milan - Add KVM/support for EPYC-Milan CPU Model - Slow Train -Patch488: kvm-target-i386-add-fast-short-REP-MOV-support.patch -# For bz#1790620 - [RFE] AMD Milan - Add KVM/support for EPYC-Milan CPU Model - Slow Train -Patch489: kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch -# For bz#1790620 - [RFE] AMD Milan - Add KVM/support for EPYC-Milan CPU Model - Slow Train -Patch490: kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch -# For bz#1917451 - CVE-2020-29443 virt:rhel/qemu-kvm: QEMU: ide: atapi: OOB access while processing read commands [rhel-8.4.0] -Patch491: kvm-ide-atapi-check-logical-block-address-and-read-size-.patch -# For bz#1892350 - CVE-2020-27617 virt:rhel/qemu-kvm: QEMU: net: an assert failure via eth_get_gso_type [rhel-8.5.0] -Patch492: kvm-net-remove-an-assert-call-in-eth_get_gso_type.patch -# For bz#1930092 - CVE-2021-20257 virt:rhel/qemu-kvm: QEMU: net: e1000: infinite loop while processing transmit descriptors [rhel-8.5.0] -Patch493: kvm-e1000-fail-early-for-evil-descriptor.patch -# For bz#1859175 - CVE-2020-15859 virt:rhel/qemu-kvm: QEMU: net: e1000e: use-after-free while sending packets [rhel-8] -Patch494: kvm-net-forbid-the-reentrant-RX.patch -# For bz#1855250 - qemu-img convert uses possibly slow pre-zeroing on block storage -Patch495: kvm-qemu-img-convert-Don-t-pre-zero-images.patch -# For bz#1932823 - after upgrade from 4.3 to 4.4 audio stops working in guests after couple of seconds -Patch496: kvm-audio-audio_generic_get_buffer_in-should-honor-size.patch -# For bz#1925430 - CVE-2021-20221 virt:rhel/qemu-kvm: qemu: out-of-bound heap buffer access via an interrupt ID field [rhel-8.5.0] -Patch497: kvm-hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch -# For bz#1842478 - CVE-2020-13754 virt:rhel/qemu-kvm: QEMU: msix: OOB access during mmio operations may lead to DoS [rhel-8.5.0] -Patch498: kvm-libqos-usb-hcd-ehci-use-32-bit-write-for-config-regi.patch -# For bz#1842478 - CVE-2020-13754 virt:rhel/qemu-kvm: QEMU: msix: OOB access during mmio operations may lead to DoS [rhel-8.5.0] -Patch499: kvm-libqos-pci-pc-use-32-bit-write-for-EJ-register.patch -# For bz#1842478 - CVE-2020-13754 virt:rhel/qemu-kvm: QEMU: msix: OOB access during mmio operations may lead to DoS [rhel-8.5.0] -Patch500: kvm-memory-Revert-memory-accept-mismatching-sizes-in-mem.patch -# For bz#1842478 - CVE-2020-13754 virt:rhel/qemu-kvm: QEMU: msix: OOB access during mmio operations may lead to DoS [rhel-8.5.0] -Patch501: kvm-acpi-accept-byte-and-word-access-to-core-ACPI-regist.patch -# For bz#1842478 - CVE-2020-13754 virt:rhel/qemu-kvm: QEMU: msix: OOB access during mmio operations may lead to DoS [rhel-8.5.0] -Patch502: kvm-xhci-fix-valid.max_access_size-to-access-address-reg.patch -# For bz#1842478 - CVE-2020-13754 virt:rhel/qemu-kvm: QEMU: msix: OOB access during mmio operations may lead to DoS [rhel-8.5.0] -Patch503: kvm-softmmu-memory-Log-invalid-memory-accesses.patch -# For bz#1940450 - RHEL8.5 - Mediated Device already in use by same domain we are booting (vfio-ccw/Multipath Testing) (kvm) - qemu-kvm part (also has kernel and libvirt parts) -Patch504: kvm-linux-headers-Add-VFIO_CCW_REQ_IRQ_INDEX.patch -# For bz#1940450 - RHEL8.5 - Mediated Device already in use by same domain we are booting (vfio-ccw/Multipath Testing) (kvm) - qemu-kvm part (also has kernel and libvirt parts) -Patch505: kvm-vfio-ccw-Connect-the-device-request-notifier.patch -# For bz#1942880 - RHEL8.4 Nightly[0322] - KVM guest fails to find zipl boot menu index (qemu-kvm) -Patch506: kvm-pc-bios-s390-ccw-fix-off-by-one-error.patch -# For bz#1942880 - RHEL8.4 Nightly[0322] - KVM guest fails to find zipl boot menu index (qemu-kvm) -Patch507: kvm-pc-bios-s390-ccw-break-loop-if-a-null-block-number-i.patch -# For bz#1942880 - RHEL8.4 Nightly[0322] - KVM guest fails to find zipl boot menu index (qemu-kvm) -Patch508: kvm-pc-bios-s390-ccw-don-t-try-to-read-the-next-block-if.patch -# For bz#1877163 - [FJ 8.3 Bug] The progress bar of the "virt-clone --nonsparse" command shows the progress rate exceeding 100%. -Patch509: kvm-file-posix-Mitigate-file-fragmentation-with-extent-s.patch -# For bz#1944861 - Qemu-img convert fails when source image is on gpfs -Patch510: kvm-block-file-posix-Fix-problem-with-fallocate-PUNCH_HO.patch -# For bz#1969768 - [ppc64le] Hotplug vcpu device hit call trace:[qemu output] KVM: unknown exit, hardware reason 7fff9ce87ed8 -Patch511: kvm-spapr-Remove-stale-comment-about-power-saving-LPCR-b.patch -# For bz#1969768 - [ppc64le] Hotplug vcpu device hit call trace:[qemu output] KVM: unknown exit, hardware reason 7fff9ce87ed8 -Patch512: kvm-spapr-Set-LPCR-to-current-AIL-mode-when-starting-a-n.patch -# For bz#1967914 - [virtio-fs] virtiofsd quit when coping file to a folder in virtio-fs mounted volume(windows guest) -Patch513: kvm-virtiofsd-Whitelist-fchmod.patch -# For bz#1957866 - RHEL8.4 - EEH capability disabled on KVM guest and recovery of PCI passthru device fails(CX5 / mlx5_core) (qemu-kvm) -Patch514: kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch -# For bz#1970912 - Deployment fails with "Invalid or missing agent token received" -Patch515: kvm-Compress-lines-for-immediate-return.patch -# For bz#1970912 - Deployment fails with "Invalid or missing agent token received" -Patch516: kvm-file-posix-Handle-EINVAL-fallocate-return-value.patch -# For bz#1932917 - CVE-2021-3416 virt:rhel/qemu-kvm: QEMU: net: infinite loop in loopback mode may lead to stack overflow -Patch517: kvm-net-introduce-qemu_receive_packet.patch -# For bz#1932917 - CVE-2021-3416 virt:rhel/qemu-kvm: QEMU: net: infinite loop in loopback mode may lead to stack overflow -Patch518: kvm-e1000-switch-to-use-qemu_receive_packet-for-loopback.patch -# For bz#1932917 - CVE-2021-3416 virt:rhel/qemu-kvm: QEMU: net: infinite loop in loopback mode may lead to stack overflow -Patch519: kvm-dp8393x-switch-to-use-qemu_receive_packet-for-loopba.patch -# For bz#1932917 - CVE-2021-3416 virt:rhel/qemu-kvm: QEMU: net: infinite loop in loopback mode may lead to stack overflow -Patch520: kvm-sungem-switch-to-use-qemu_receive_packet-for-loopbac.patch -# For bz#1932917 - CVE-2021-3416 virt:rhel/qemu-kvm: QEMU: net: infinite loop in loopback mode may lead to stack overflow -Patch521: kvm-tx_pkt-switch-to-use-qemu_receive_packet_iov-for-loo.patch -# For bz#1932917 - CVE-2021-3416 virt:rhel/qemu-kvm: QEMU: net: infinite loop in loopback mode may lead to stack overflow -Patch522: kvm-rtl8139-switch-to-use-qemu_receive_packet-for-loopba.patch -# For bz#1932917 - CVE-2021-3416 virt:rhel/qemu-kvm: QEMU: net: infinite loop in loopback mode may lead to stack overflow -Patch523: kvm-pcnet-switch-to-use-qemu_receive_packet-for-loopback.patch -# For bz#1932917 - CVE-2021-3416 virt:rhel/qemu-kvm: QEMU: net: infinite loop in loopback mode may lead to stack overflow -Patch524: kvm-cadence_gem-switch-to-use-qemu_receive_packet-for-lo.patch -# For bz#1932917 - CVE-2021-3416 virt:rhel/qemu-kvm: QEMU: net: infinite loop in loopback mode may lead to stack overflow -Patch525: kvm-lan9118-switch-to-use-qemu_receive_packet-for-loopba.patch -# For bz#1967716 - RFE: rebuild guest agent to include public ssh injection api support -Patch526: kvm-glib-compat-add-g_unix_get_passwd_entry_qemu.patch -# For bz#1967716 - RFE: rebuild guest agent to include public ssh injection api support -Patch527: kvm-qga-add-ssh-add-remove-authorized-keys.patch -# For bz#1967716 - RFE: rebuild guest agent to include public ssh injection api support -Patch528: kvm-qga-add-reset-argument-to-ssh-add-authorized-keys.patch -# For bz#1967716 - RFE: rebuild guest agent to include public ssh injection api support -Patch529: kvm-qga-add-ssh-get-authorized-keys.patch -# For bz#1970819 - CVE-2021-3592 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (bootp) [rhel-8] -# For bz#1970835 - CVE-2021-3593 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (udp6) [rhel-8] -# For bz#1970843 - CVE-2021-3595 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (tftp) [rhel-8] -# For bz#1970853 - CVE-2021-3594 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (udp) [rhel-8] -Patch530: kvm-Add-mtod_check.patch -# For bz#1970819 - CVE-2021-3592 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (bootp) [rhel-8] -# For bz#1970835 - CVE-2021-3593 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (udp6) [rhel-8] -# For bz#1970843 - CVE-2021-3595 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (tftp) [rhel-8] -# For bz#1970853 - CVE-2021-3594 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (udp) [rhel-8] -Patch531: kvm-bootp-limit-vendor-specific-area-to-input-packet-mem.patch -# For bz#1970819 - CVE-2021-3592 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (bootp) [rhel-8] -Patch532: kvm-bootp-check-bootp_input-buffer-size.patch -# For bz#1970835 - CVE-2021-3593 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (udp6) [rhel-8] -Patch533: kvm-upd6-check-udp6_input-buffer-size.patch -# For bz#1970843 - CVE-2021-3595 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (tftp) [rhel-8] -Patch534: kvm-tftp-check-tftp_input-buffer-size.patch -# For bz#1970819 - CVE-2021-3592 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (bootp) [rhel-8] -# For bz#1970835 - CVE-2021-3593 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (udp6) [rhel-8] -# For bz#1970843 - CVE-2021-3595 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (tftp) [rhel-8] -# For bz#1970853 - CVE-2021-3594 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (udp) [rhel-8] -Patch535: kvm-tftp-introduce-a-header-structure.patch -# For bz#1970853 - CVE-2021-3594 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (udp) [rhel-8] -Patch536: kvm-udp-check-upd_input-buffer-size.patch -# For bz#1970819 - CVE-2021-3592 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (bootp) [rhel-8] -# For bz#1970835 - CVE-2021-3593 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (udp6) [rhel-8] -# For bz#1970843 - CVE-2021-3595 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (tftp) [rhel-8] -# For bz#1970853 - CVE-2021-3594 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (udp) [rhel-8] -Patch537: kvm-Fix-DHCP-broken-in-libslirp-v4.6.0.patch -# For bz#1982134 - QEMU core dump while booting guest with a non-exist fd on tap -Patch538: kvm-net-check-if-the-file-descriptor-is-valid-before-usi.patch -# For bz#1982134 - QEMU core dump while booting guest with a non-exist fd on tap -Patch539: kvm-net-detect-errors-from-probing-vnet-hdr-flag-for-TAP.patch -# For bz#1969848 - qemu-img convert hangs on aarch64 -Patch540: kvm-aio-wait-delegate-polling-of-main-AioContext-if-BQL-.patch -# For bz#1969848 - qemu-img convert hangs on aarch64 -Patch541: kvm-async-use-explicit-memory-barriers.patch -# For bz#1967496 - [virtio-fs] nfs/xfstest generic/089 generic/478 generic/632 failed -Patch542: kvm-virtiofsd-Disable-remote-posix-locks-by-default.patch -# For bz#1967496 - [virtio-fs] nfs/xfstest generic/089 generic/478 generic/632 failed -Patch543: kvm-virtiofsd-Fix-the-help-message-of-posix-lock.patch +Patch0016: 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +Patch0017: 0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +Patch0018: 0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +Patch0019: 0019-compat-Update-hw_compat_rhel_8_5.patch +Patch0020: 0020-redhat-Update-pseries-rhel8.5.0-machine-type.patch +Patch0021: 0021-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch +Patch0022: 0022-Fix-virtio-net-pci-vectors-compat.patch +Patch0023: 0023-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch +Patch0024: 0024-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch +Patch0025: 0025-redhat-Add-s390x-machine-type-compatibility-handling.patch +# For bz#2005325 - Fix CPU Model for new IBM Z Hardware - qemu part +Patch26: kvm-redhat-Add-rhel8.6.0-machine-type-for-s390x.patch +# For bz#2031041 - Add rhel-8.6.0 machine types for RHEL 8.6 [ppc64le] +Patch27: kvm-redhat-Define-pseries-rhel8.6.0-machine-type.patch +# For bz#2031039 - Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64] +Patch28: kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch +# For bz#2031039 - Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64] +Patch29: kvm-hw-arm-virt-Register-its-as-a-class-property.patch +# For bz#2031039 - Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64] +Patch30: kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch +# For bz#2031039 - Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64] +Patch31: kvm-hw-arm-virt-Add-8.6-machine-type.patch +# For bz#2031039 - Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64] +Patch32: kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch +# For bz#2029582 - [8.6] machine types: 6.2: Fix prefer_sockets +Patch33: kvm-rhel-machine-types-x86-set-prefer_sockets.patch +# For bz#2036580 - CVE-2021-4158 virt:rhel/qemu-kvm: QEMU: NULL pointer dereference in pci_write() in hw/acpi/pcihp.c [rhel-8] +Patch34: kvm-acpi-validate-hotplug-selector-on-access.patch +# For bz#2031035 - Add rhel-8.6.0 machine types for RHEL 8.6 [x86] +Patch35: kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch +# For bz#2046198 - CVE-2022-0358 virt:av/qemu-kvm: QEMU: virtiofsd: potential privilege escalation via CVE-2018-13405 [rhel-8.6] +Patch36: kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch +# For bz#2033279 - [wrb][qemu-kvm 6.2] The hot-unplugged device can not be hot-plugged back +Patch37: kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch +# For bz#2021778 - Qemu core dump when do full backup during system reset +# For bz#2036178 - Qemu core dumped when do block-stream to a snapshot node on non-enough space storage +Patch38: kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch +# For bz#2021778 - Qemu core dump when do full backup during system reset +# For bz#2036178 - Qemu core dumped when do block-stream to a snapshot node on non-enough space storage +Patch39: kvm-iotests-stream-error-on-reset-New-test.patch +# For bz#2037135 - Booting from Local Snapshot Core Dumped Whose Backing File Is Based on RBD +Patch40: kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch +# For bz#2037135 - Booting from Local Snapshot Core Dumped Whose Backing File Is Based on RBD +Patch41: kvm-block-rbd-workaround-for-ceph-issue-53784.patch +# For bz#1518984 - [Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch42: kvm-numa-Enable-numa-for-SGX-EPC-sections.patch +# For bz#1518984 - [Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch43: kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch +# For bz#1518984 - [Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch44: kvm-doc-Add-the-SGX-numa-description.patch +# For bz#1518984 - [Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch45: kvm-Enable-SGX-RH-Only.patch +# For bz#1518984 - [Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch46: kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch +# For bz#2041480 - [incremental_backup] Inconsistent block status reply in qemu-nbd +Patch47: kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch +# For bz#2041480 - [incremental_backup] Inconsistent block status reply in qemu-nbd +Patch48: kvm-iotests-block-status-cache-New-test.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch49: kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch50: kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch51: kvm-iotests.py-Add-QemuStorageDaemon-class.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch52: kvm-iotests-281-Test-lingering-timers.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch53: kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch54: kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch BuildRequires: wget BuildRequires: rpm-build +BuildRequires: ninja-build +#BuildRequires: meson >= 0.58.2 BuildRequires: zlib-devel BuildRequires: glib2-devel BuildRequires: which @@ -1262,7 +220,7 @@ BuildRequires: pciutils-devel BuildRequires: libiscsi-devel BuildRequires: ncurses-devel BuildRequires: libattr-devel -BuildRequires: libusbx-devel >= 1.0.22 +BuildRequires: libusbx-devel >= 1.0.23 %if %{have_usbredir} BuildRequires: usbredir-devel >= 0.7.1 %endif @@ -1293,8 +251,6 @@ BuildRequires: systemtap-sdt-devel BuildRequires: libpng-devel # For uuid generation BuildRequires: libuuid-devel -# For BlueZ device support -BuildRequires: bluez-libs-devel # For Braille device support BuildRequires: brlapi-devel # For test suite @@ -1351,14 +307,12 @@ BuildRequires: binutils >= 2.27-16 BuildRequires: pkgconfig(epoxy) BuildRequires: pkgconfig(libdrm) BuildRequires: pkgconfig(gbm) -Requires: mesa-libGL -Requires: mesa-libEGL -Requires: mesa-dri-drivers %endif BuildRequires: perl-Test-Harness Requires: qemu-kvm-core = %{epoch}:%{version}-%{release} +Requires: qemu-kvm-docs = %{epoch}:%{version}-%{release} %rhev_ma_conflicts qemu-kvm %{requires_all_modules} @@ -1374,24 +328,18 @@ hardware for a full system such as a PC and its associated peripherals. %package -n qemu-kvm-core Summary: qemu-kvm core components +Requires: %{name}-common = %{epoch}:%{version}-%{release} Requires: qemu-img = %{epoch}:%{version}-%{release} %ifarch %{ix86} x86_64 -Requires: seabios-bin >= 1.10.2-1 -Requires: sgabios-bin Requires: edk2-ovmf %endif %ifarch aarch64 Requires: edk2-aarch64 %endif -%ifnarch aarch64 s390x -Requires: seavgabios-bin >= 1.12.0-3 -Requires: ipxe-roms-qemu >= 20170123-1 -%endif %ifarch %{power64} Requires: SLOF >= %{SLOF_gittagdate}-1.git%{SLOF_gittagcommit} %endif -Requires: %{name}-common = %{epoch}:%{version}-%{release} Requires: libseccomp >= 2.4.0 # For compressed guest memory dumps Requires: lzo snappy @@ -1402,10 +350,7 @@ Requires(preun): systemd-units Requires: powerpc-utils %endif %endif -Requires: libusbx >= 1.0.19 -%if %{have_usbredir} -Requires: usbredir >= 0.7.1 -%endif +Requires: libusbx >= 1.0.23 %if %{have_fdt} Requires: libfdt >= 1.6.0 %endif @@ -1418,6 +363,11 @@ emulation for the KVM hypervisor. qemu-kvm acts as a virtual machine monitor together with the KVM kernel modules, and emulates the hardware for a full system such as a PC and its associated peripherals. +%package -n qemu-kvm-docs +Summary: qemu-kvm documentation + +%description -n qemu-kvm-docs +qemu-kvm-docs provides documentation files regarding qemu-kvm. %package -n qemu-img Summary: QEMU command line tool for manipulating disk images @@ -1437,6 +387,14 @@ Requires(post): /usr/sbin/useradd Requires(post): systemd-units Requires(preun): systemd-units Requires(postun): systemd-units +%ifarch %{ix86} x86_64 +Requires: seabios-bin >= 1.10.2-1 +Requires: sgabios-bin +%endif +%ifnarch aarch64 s390x +Requires: seavgabios-bin >= 1.12.0-3 +Requires: ipxe-roms-qemu >= 20170123-1 +%endif %rhev_ma_conflicts qemu-kvm-common @@ -1529,10 +487,53 @@ Install this package if you want to access remote disks using the Secure Shell (SSH) protocol. +%if %{have_spice} +%package ui-spice +Summary: QEMU spice support +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%if %{have_opengl} +Requires: %{name}-ui-opengl%{?_isa} = %{epoch}:%{version}-%{release} +%endif + +%description ui-spice +This package provides spice support. +%endif + + +%if %{have_opengl} +%package ui-opengl +Summary: QEMU opengl support +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +Requires: mesa-libGL +Requires: mesa-libEGL +Requires: mesa-dri-drivers + +%description ui-opengl +This package provides opengl support. +%endif + +%if %{have_usbredir} +%package hw-usbredir +Summary: QEMU usbredir support +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +Requires: usbredir >= 0.7.1 + +%description hw-usbredir +This package provides usbredir support. +%endif + + %prep -%setup -n qemu-%{version} +%setup -q -n qemu-%{version}%{?rcstr} +# Remove slirp content in scratchbuilds because it's being applyed as a patch +rm -fr slirp +mkdir slirp %autopatch -p1 +%global qemu_kvm_build qemu_kvm_build +mkdir -p %{qemu_kvm_build} + + %build %global buildarch %{kvm_target}-softmmu @@ -1545,171 +546,238 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" %global block_drivers_list %{block_drivers_list},gluster %endif -./configure \ - --prefix="%{_prefix}" \ - --libdir="%{_libdir}" \ - --sysconfdir="%{_sysconfdir}" \ - --interp-prefix=%{_prefix}/qemu-%M \ - --localstatedir="%{_localstatedir}" \ - --docdir="%{qemudocdir}" \ - --libexecdir="%{_libexecdir}" \ - --extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ - --extra-cflags="%{optflags}" \ - --with-pkgversion="%{name}-%{version}-%{release}" \ - --with-confsuffix=/"%{name}" \ - --firmwarepath=%{_prefix}/share/qemu-firmware \ -%if 0%{have_fdt} - --enable-fdt \ -%else - --disable-fdt \ - %endif -%if 0%{have_gluster} - --enable-glusterfs \ -%else - --disable-glusterfs \ -%endif - --enable-guest-agent \ -%ifnarch s390x - --enable-numa \ -%else - --disable-numa \ -%endif - --enable-rbd \ -%if 0%{have_librdma} - --enable-rdma \ -%else - --disable-rdma \ -%endif - --disable-pvrdma \ - --enable-seccomp \ -%if 0%{have_spice} - --enable-spice \ - --enable-smartcard \ -%else - --disable-spice \ - --disable-smartcard \ -%endif -%if 0%{have_opengl} - --enable-opengl \ -%else - --disable-opengl \ -%endif -%if 0%{have_usbredir} - --enable-usb-redir \ -%else - --disable-usb-redir \ -%endif - --disable-tcmalloc \ -%ifarch x86_64 - --enable-libpmem \ -%else - --disable-libpmem \ -%endif - --enable-vhost-user \ -%ifarch s390x - --enable-vhost-user-fs \ -%endif -%ifarch %{ix86} x86_64 - --enable-avx2 \ -%else - --disable-avx2 \ -%endif - --python=%{__python3} \ + +%define disable_everything \\\ + --disable-alsa \\\ + --disable-attr \\\ + --disable-auth-pam \\\ + --disable-avx2 \\\ + --disable-avx512f \\\ + --disable-bochs \\\ + --disable-bpf \\\ + --disable-brlapi \\\ + --disable-bsd-user \\\ + --disable-bzip2 \\\ + --disable-cap-ng \\\ + --disable-capstone \\\ + --disable-cfi \\\ + --disable-cfi-debug \\\ + --disable-cloop \\\ + --disable-cocoa \\\ + --disable-coreaudio \\\ + --disable-coroutine-pool \\\ + --disable-crypto-afalg \\\ + --disable-curl \\\ + --disable-curses \\\ + --disable-debug-info \\\ + --disable-debug-mutex \\\ + --disable-debug-tcg \\\ + --disable-dmg \\\ + --disable-docs \\\ + --disable-dsound \\\ + --disable-fdt \\\ + --disable-fuse \\\ + --disable-fuse-lseek \\\ + --disable-gcrypt \\\ + --disable-gettext \\\ + --disable-gio \\\ + --disable-glusterfs \\\ + --disable-gnutls \\\ + --disable-gtk \\\ + --disable-guest-agent \\\ + --disable-guest-agent-msi \\\ + --disable-hax \\\ + --disable-hvf \\\ + --disable-iconv \\\ + --disable-jack \\\ + --disable-kvm \\\ + --disable-l2tpv3 \\\ + --disable-libdaxctl \\\ + --disable-libiscsi \\\ + --disable-libnfs \\\ + --disable-libpmem \\\ + --disable-libssh \\\ + --disable-libudev \\\ + --disable-libusb \\\ + --disable-libxml2 \\\ + --disable-linux-aio \\\ + --disable-linux-io-uring \\\ + --disable-linux-user \\\ + --disable-live-block-migration \\\ + --disable-lto \\\ + --disable-lzfse \\\ + --disable-lzo \\\ + --disable-malloc-trim \\\ + --disable-membarrier \\\ + --disable-modules \\\ + --disable-module-upgrades \\\ + --disable-mpath \\\ + --disable-multiprocess \\\ + --disable-netmap \\\ + --disable-nettle \\\ + --disable-numa \\\ + --disable-nvmm \\\ + --disable-opengl \\\ + --disable-oss \\\ + --disable-pa \\\ + --disable-parallels \\\ + --disable-pie \\\ + --disable-pvrdma \\\ + --disable-qcow1 \\\ + --disable-qed \\\ + --disable-qom-cast-debug \\\ + --disable-rbd \\\ + --disable-rdma \\\ + --disable-replication \\\ + --disable-rng-none \\\ + --disable-safe-stack \\\ + --disable-sanitizers \\\ + --disable-sdl \\\ + --disable-sdl-image \\\ + --disable-seccomp \\\ + --disable-selinux \\\ + --disable-slirp-smbd \\\ + --disable-smartcard \\\ + --disable-snappy \\\ + --disable-sparse \\\ + --disable-spice \\\ + --disable-spice-protocol \\\ + --disable-strip \\\ + --disable-system \\\ + --disable-tcg \\\ + --disable-tools \\\ + --disable-tpm \\\ + --disable-u2f \\\ + --disable-usb-redir \\\ + --disable-user \\\ + --disable-vde \\\ + --disable-vdi \\\ + --disable-vhost-crypto \\\ + --disable-vhost-kernel \\\ + --disable-vhost-net \\\ + --disable-vhost-scsi \\\ + --disable-vhost-user \\\ + --disable-vhost-user-blk-server \\\ + --disable-vhost-vdpa \\\ + --disable-vhost-vsock \\\ + --disable-virglrenderer \\\ + --disable-virtfs \\\ + --disable-virtiofsd \\\ + --disable-vnc \\\ + --disable-vnc-jpeg \\\ + --disable-vnc-png \\\ + --disable-vnc-sasl \\\ + --disable-vte \\\ + --disable-vvfat \\\ + --disable-werror \\\ + --disable-whpx \\\ + --disable-xen \\\ + --disable-xen-pci-passthrough \\\ + --disable-xfsctl \\\ + --disable-xkbcommon \\\ + --disable-zstd \\\ + --with-git-submodules=ignore + +pushd %{qemu_kvm_build} +../configure \ + --prefix="%{_prefix}" \ + --libdir="%{_libdir}" \ + --datadir="%{_datadir}" \ + --sysconfdir="%{_sysconfdir}" \ + --interp-prefix=%{_prefix}/qemu-%M \ + --localstatedir="%{_localstatedir}" \ + --docdir="%{_docdir}" \ + --libexecdir="%{_libexecdir}" \ + --extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ + --extra-cflags="%{optflags}" \ + --with-pkgversion="%{name}-%{version}-%{release}" \ + --with-suffix="%{name}" \ + --firmwarepath=%{_prefix}/share/qemu-firmware \ + --meson="git" \ --target-list="%{buildarch}" \ --block-drv-rw-whitelist=%{block_drivers_list} \ --audio-drv-list= \ --block-drv-ro-whitelist=vmdk,vhdx,vpc,https,ssh \ --with-coroutine=ucontext \ + --with-git=git \ --tls-priority=@QEMU,SYSTEM \ - --disable-bluez \ - --disable-brlapi \ + %{disable_everything} \ + --enable-attr \ +%ifarch %{ix86} x86_64 + --enable-avx2 \ +%endif --enable-cap-ng \ + --enable-capstone=internal \ --enable-coroutine-pool \ --enable-curl \ - --disable-curses \ - --disable-debug-tcg \ + --enable-debug-info \ --enable-docs \ - --disable-gtk \ +%if 0%{have_fdt} + --enable-fdt=system \ +%endif + --enable-gcrypt \ +%if 0%{have_gluster} + --enable-glusterfs \ +%endif + --enable-gnutls \ + --enable-guest-agent \ + --enable-iconv \ --enable-kvm \ --enable-libiscsi \ - --disable-libnfs \ +%ifarch x86_64 + --enable-libpmem \ +%endif --enable-libssh \ --enable-libusb \ - --disable-bzip2 \ + --enable-libudev \ --enable-linux-aio \ - --disable-live-block-migration \ --enable-lzo \ + --enable-malloc-trim \ + --enable-modules \ + --enable-mpath \ +%ifnarch s390x + --enable-numa \ +%endif +%if 0%{have_opengl} + --enable-opengl \ +%endif --enable-pie \ - --disable-qom-cast-debug \ - --disable-sdl \ + --enable-rbd \ +%if 0%{have_librdma} + --enable-rdma \ +%endif + --enable-seccomp \ --enable-snappy \ - --disable-sparse \ - --disable-strip \ - --enable-tpm \ - --enable-trace-backend=dtrace \ - --disable-vde \ - --disable-vhost-scsi \ - --disable-vxhs \ - --disable-virtfs \ - --disable-vnc-jpeg \ - --disable-vte \ - --enable-vnc-png \ - --enable-vnc-sasl \ - --enable-werror \ - --disable-xen \ - --disable-xfsctl \ - --enable-gnutls \ - --enable-gcrypt \ - --disable-nettle \ - --enable-attr \ - --disable-bsd-user \ - --disable-cocoa \ - --enable-debug-info \ - --disable-guest-agent-msi \ - --disable-hax \ - --disable-jemalloc \ - --disable-linux-user \ - --enable-modules \ - --disable-netmap \ - --disable-replication \ +%if 0%{have_spice} + --enable-smartcard \ + --enable-spice \ + --enable-spice-protocol \ +%endif --enable-system \ + --enable-tcg \ --enable-tools \ - --disable-user \ + --enable-tpm \ + --enable-trace-backend=dtrace \ +%if 0%{have_usbredir} + --enable-usb-redir \ +%endif + --enable-virtiofsd \ + --enable-vhost-kernel \ --enable-vhost-net \ + --enable-vhost-user \ + --enable-vhost-user-blk-server \ + --enable-vhost-vdpa \ --enable-vhost-vsock \ --enable-vnc \ - --enable-mpath \ - --disable-xen-pci-passthrough \ - --enable-tcg \ - --with-git=git \ - --disable-sanitizers \ - --disable-hvf \ - --disable-whpx \ - --enable-malloc-trim \ - --disable-membarrier \ - --disable-vhost-crypto \ - --disable-libxml2 \ - --enable-capstone \ - --disable-git-update \ - --disable-crypto-afalg \ - --disable-debug-mutex \ - --disable-bochs \ - --disable-cloop \ - --disable-dmg \ - --disable-qcow1 \ - --disable-vdi \ - --disable-vvfat \ - --disable-qed \ - --disable-parallels \ - --disable-sheepdog \ - --disable-auth-pam \ - --enable-iconv \ - --disable-lzfse \ - --enable-vhost-kernel \ - --disable-virglrenderer \ - --without-default-devices + --enable-vnc-png \ + --enable-vnc-sasl \ + --enable-werror \ + --enable-xkbcommon \ + --without-default-devices \ + --with-devices-%{kvm_target}=%{kvm_target}-rh-devices + -echo "config-host.mak contents:" +echo "qemu-kvm config-host.mak contents:" echo "===" cat config-host.mak echo "===" @@ -1719,15 +787,15 @@ make V=1 %{?_smp_mflags} $buildldflags # Setup back compat qemu-kvm binary %{__python3} scripts/tracetool.py --backend dtrace --format stap \ --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ - trace-events-all > qemu-kvm.stp + trace/trace-events-all qemu-kvm.stp %{__python3} scripts/tracetool.py --backends=dtrace --format=log-stap \ --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ - trace-events-all > qemu-kvm-log.stp + trace/trace-events-all qemu-kvm-log.stp %{__python3} scripts/tracetool.py --backend dtrace --format simpletrace-stap \ --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ - trace-events-all > qemu-kvm-simpletrace.stp + trace/trace-events-all qemu-kvm-simpletrace.stp cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kvm @@ -1739,7 +807,10 @@ gcc %{SOURCE35} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o udev-kvm-check cp pc-bios/s390-ccw/s390-ccw.img pc-bios/s390-ccw/s390-netboot.img pc-bios/ %endif +popd + %install +pushd %{qemu_kvm_build} %define _udevdir %(pkg-config --variable=udevdir udev) %define _udevrulesdir %{_udevdir}/rules.d @@ -1768,7 +839,7 @@ mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name} # Create new directories and put them all under tests-src mkdir -p $RPM_BUILD_ROOT%{testsdir}/python mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests -mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/acceptance +mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/avocado mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests mkdir -p $RPM_BUILD_ROOT%{testsdir}/scripts/qmp @@ -1779,15 +850,16 @@ install -m 0644 scripts/dump-guest-memory.py \ $RPM_BUILD_ROOT%{_datadir}/%{name} # Install avocado_qemu tests -cp -R tests/acceptance/* $RPM_BUILD_ROOT%{testsdir}/tests/acceptance/ +cp -R tests/avocado/* $RPM_BUILD_ROOT%{testsdir}/tests/avocado/ # Install qemu.py and qmp/ scripts required to run avocado_qemu tests cp -R python/qemu $RPM_BUILD_ROOT%{testsdir}/python cp -R scripts/qmp/* $RPM_BUILD_ROOT%{testsdir}/scripts/qmp -install -p -m 0755 tests/Makefile.include $RPM_BUILD_ROOT%{testsdir}/tests/ +install -p -m 0755 ../tests/Makefile.include $RPM_BUILD_ROOT%{testsdir}/tests/ # Install qemu-iotests -cp -R tests/qemu-iotests/* $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/ +cp -R ../tests/qemu-iotests/* $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/ +cp -ur tests/qemu-iotests/* $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/ # Avoid ambiguous 'python' interpreter name find $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env \(python\|python3\)+%{__python3}+' {} \; find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env \(python\|python3\)+%{__python3}+' {} \; @@ -1802,6 +874,10 @@ make DESTDIR=$RPM_BUILD_ROOT \ mkdir -p $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset +# Move vhost-user JSON files to the standard "qemu" directory +mkdir -p $RPM_BUILD_ROOT%{_datadir}/qemu +mv $RPM_BUILD_ROOT%{_datadir}/%{name}/vhost-user $RPM_BUILD_ROOT%{_datadir}/qemu/ + # Install qemu-guest-agent service and udev rules install -m 0644 %{_sourcedir}/qemu-guest-agent.service %{buildroot}%{_unitdir} install -m 0644 %{_sourcedir}/qemu-ga.sysconfig %{buildroot}%{_sysconfdir}/sysconfig/qemu-ga @@ -1830,14 +906,19 @@ install --preserve-timestamps --mode=0644 \ mkdir -p -v $RPM_BUILD_ROOT%{_localstatedir}/log/qemu-ga/ mkdir -p $RPM_BUILD_ROOT%{_bindir} -install -c -m 0755 qemu-ga ${RPM_BUILD_ROOT}%{_bindir}/qemu-ga +install -c -m 0755 qga/qemu-ga ${RPM_BUILD_ROOT}%{_bindir}/qemu-ga mkdir -p $RPM_BUILD_ROOT%{_mandir}/man8 -install -m 0755 qemu-kvm $RPM_BUILD_ROOT%{_libexecdir}/ +install -m 0755 %{kvm_target}-softmmu/qemu-system-%{kvm_target} $RPM_BUILD_ROOT%{_libexecdir}/qemu-kvm install -m 0644 qemu-kvm.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ install -m 0644 qemu-kvm-log.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ install -m 0644 qemu-kvm-simpletrace.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ +install -d -m 0755 "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/script.d" +install -c -m 0644 scripts/systemtap/script.d/qemu_kvm.stp "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/script.d/" +install -d -m 0755 "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/conf.d" +install -c -m 0644 scripts/systemtap/conf.d/qemu_kvm.conf "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/conf.d/" + rm $RPM_BUILD_ROOT/%{_datadir}/applications/qemu.desktop rm $RPM_BUILD_ROOT%{_bindir}/qemu-system-%{kvm_target} @@ -1849,7 +930,6 @@ rm $RPM_BUILD_ROOT%{_bindir}/elf2dmp # Install simpletrace install -m 0755 scripts/simpletrace.py $RPM_BUILD_ROOT%{_datadir}/%{name}/simpletrace.py # Avoid ambiguous 'python' interpreter name -sed -i -e '1 s/python/python3/' $RPM_BUILD_ROOT%{_datadir}/%{name}/simpletrace.py mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool scripts/tracetool/*.py mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/backend @@ -1858,11 +938,26 @@ mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format scripts/tracetool/format/*.py mkdir -p $RPM_BUILD_ROOT%{qemudocdir} -install -p -m 0644 -t ${RPM_BUILD_ROOT}%{qemudocdir} Changelog README.rst README.systemtap COPYING COPYING.LIB LICENSE docs/interop/qmp-spec.txt +install -p -m 0644 -t ${RPM_BUILD_ROOT}%{qemudocdir} ../README.rst ../README.systemtap ../COPYING ../COPYING.LIB ../LICENSE ../docs/interop/qmp-spec.txt + +# Rename man page +pushd ${RPM_BUILD_ROOT}%{_mandir}/man1/ +for fn in qemu.1*; do + mv $fn "qemu-kvm${fn#qemu}" +done +popd chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man1/* chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man8/* -install -D -p -m 0644 qemu.sasl $RPM_BUILD_ROOT%{_sysconfdir}/sasl2/%{name}.conf +install -D -p -m 0644 ../qemu.sasl $RPM_BUILD_ROOT%{_sysconfdir}/sasl2/%{name}.conf + +# Install keymaps +pushd pc-bios/keymaps +for kmp in *; do + install $kmp ${RPM_BUILD_ROOT}%{_datadir}/%{name}/keymaps/ +done +rm -f ${RPM_BUILD_ROOT}%{_datadir}/%{name}/keymaps/*.stamp +popd # Provided by package openbios rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-ppc @@ -1880,6 +975,7 @@ rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-zipl.rom rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/u-boot.e500 rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qemu_vga.ndrv rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/skiboot.lid +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qboot.rom rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-ccw.img rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-netboot.img @@ -1891,23 +987,34 @@ rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/firmware rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/edk2-*.fd rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/edk2-licenses.txt +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-sifive_u-fw_jump.bin rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-virt-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-generic-fw_dynamic.* rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-sifive_u-fw_jump.bin rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-virt-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-generic-fw_dynamic.* rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qemu-nsis.bmp +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/npcm7xx_bootrom.bin rm -rf ${RPM_BUILD_ROOT}%{_libdir}/qemu-kvm/ui-spice-app.so +# Remove virtfs-proxy-helper files +rm -rf ${RPM_BUILD_ROOT}%{_libexecdir}/virtfs-proxy-helper +rm -rf ${RPM_BUILD_ROOT}%{_mandir}/man1/virtfs-proxy-helper* + %ifarch s390x # Use the s390-*.imgs that we've just built, not the pre-built ones install -m 0644 pc-bios/s390-ccw/s390-ccw.img $RPM_BUILD_ROOT%{_datadir}/%{name}/ install -m 0644 pc-bios/s390-ccw/s390-netboot.img $RPM_BUILD_ROOT%{_datadir}/%{name}/ +%else + rm -rf ${RPM_BUILD_ROOT}%{_libdir}/qemu-kvm/hw-s390x-virtio-gpu-ccw.so %endif %ifnarch x86_64 rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/kvmvapic.bin rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/linuxboot.bin rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/multiboot.bin + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/multiboot_dma.bin rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/pvh.bin %endif @@ -1986,20 +1093,39 @@ install -m 0644 %{_sourcedir}/qemu-pr-helper.socket %{buildroot}%{_unitdir} find $RPM_BUILD_ROOT -name '*.la' -or -name '*.a' | xargs rm -f -# We need to make the block device modules executable else -# RPM won't pick up their dependencies. -chmod +x $RPM_BUILD_ROOT%{_libdir}/qemu-kvm/block-*.so +# We need to make the block device modules and other qemu SO files executable +# otherwise RPM won't pick up their dependencies. +chmod +x $RPM_BUILD_ROOT%{_libdir}/qemu-kvm/*.so # Remove buildinfo rm -rf $RPM_BUILD_ROOT%{qemudocdir}/interop/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/system/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/tools/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/user/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/devel/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/.buildinfo # Remove spec rm -rf $RPM_BUILD_ROOT%{qemudocdir}/specs +popd + %check +pushd %{qemu_kvm_build} +echo "Testing qemu-kvm-build" export DIFF=diff; make check V=1 +popd + +%post -n qemu-kvm-common +%systemd_post ksm.service +%systemd_post ksmtuned.service + +getent group kvm >/dev/null || groupadd -g 36 -r kvm +getent group qemu >/dev/null || groupadd -g 107 -r qemu +getent passwd qemu >/dev/null || \ +useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ + -c "qemu user" qemu -%post -n qemu-kvm-core # load kvm modules now, so we can make sure no reboot is needed. # If there's already a kvm module installed, we don't mess with it %udev_rules_update @@ -2013,24 +1139,12 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : fi %endif -%if %{have_kvm_setup} -%preun -n qemu-kvm-core -%systemd_preun kvm-setup.service -%endif - -%post -n qemu-kvm-common -%systemd_post ksm.service -%systemd_post ksmtuned.service - -getent group kvm >/dev/null || groupadd -g 36 -r kvm -getent group qemu >/dev/null || groupadd -g 107 -r qemu -getent passwd qemu >/dev/null || \ -useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ - -c "qemu user" qemu - %preun -n qemu-kvm-common %systemd_preun ksm.service %systemd_preun ksmtuned.service +%if %{have_kvm_setup} +%systemd_preun kvm-setup.service +%endif %postun -n qemu-kvm-common %systemd_postun_with_restart ksm.service @@ -2046,24 +1160,30 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %files # Deliberately empty - -%files -n qemu-kvm-common +%files -n qemu-kvm-docs %defattr(-,root,root) %dir %{qemudocdir} -%doc %{qemudocdir}/Changelog +%doc %{qemudocdir}/genindex.html +%doc %{qemudocdir}/search.html +%doc %{qemudocdir}/objects.inv +%doc %{qemudocdir}/searchindex.js %doc %{qemudocdir}/README.rst -%doc %{qemudocdir}/qemu-doc.html %doc %{qemudocdir}/COPYING %doc %{qemudocdir}/COPYING.LIB %doc %{qemudocdir}/LICENSE %doc %{qemudocdir}/README.systemtap %doc %{qemudocdir}/qmp-spec.txt -%doc %{qemudocdir}/qemu-doc.txt -%doc %{qemudocdir}/qemu-ga-ref.html -%doc %{qemudocdir}/qemu-ga-ref.txt -%doc %{qemudocdir}/qemu-qmp-ref.html -%doc %{qemudocdir}/qemu-qmp-ref.txt %doc %{qemudocdir}/interop/* +%doc %{qemudocdir}/index.html +%doc %{qemudocdir}/about/* +%doc %{qemudocdir}/system/* +%doc %{qemudocdir}/tools/* +%doc %{qemudocdir}/user/* +%doc %{qemudocdir}/devel/* +%doc %{qemudocdir}/_static/* + +%files -n qemu-kvm-common +%defattr(-,root,root) %{_mandir}/man7/qemu-qmp-ref.7* %{_mandir}/man7/qemu-cpu-models.7* %{_bindir}/qemu-keymap @@ -2073,6 +1193,8 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_unitdir}/qemu-pr-helper.service %{_unitdir}/qemu-pr-helper.socket %{_mandir}/man7/qemu-ga-ref.7* +%{_mandir}/man8/qemu-pr-helper.8* +%{_mandir}/man1/virtiofsd.1* %dir %{_datadir}/%{name}/ %{_datadir}/%{name}/keymaps/ @@ -2099,13 +1221,12 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_datadir}/%{name}/tracetool/backend/*.py* %{_datadir}/%{name}/tracetool/format/*.py* -%files -n qemu-kvm-core -%defattr(-,root,root) %ifarch x86_64 %{_datadir}/%{name}/bios.bin %{_datadir}/%{name}/bios-256k.bin %{_datadir}/%{name}/linuxboot.bin %{_datadir}/%{name}/multiboot.bin + %{_datadir}/%{name}/multiboot_dma.bin %{_datadir}/%{name}/kvmvapic.bin %{_datadir}/%{name}/sgabios.bin %{_datadir}/%{name}/pvh.bin @@ -2129,17 +1250,24 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_datadir}/%{name}/efi-pcnet.rom %{_datadir}/%{name}/efi-rtl8139.rom %{_datadir}/%{name}/efi-ne2k_pci.rom + %{_libdir}/qemu-kvm/hw-display-virtio-vga.so %endif + %{_libdir}/%{name}/hw-display-virtio-gpu-gl.so +%ifnarch s390x + %{_libdir}/%{name}/hw-display-virtio-gpu-pci-gl.so +%endif +%ifarch x86_64 %{power64} + %{_libdir}/%{name}/hw-display-virtio-vga-gl.so +%endif + %{_libdir}/%{name}/accel-qtest-%{kvm_target}.so +%ifarch x86_64 + %{_libdir}/%{name}/accel-tcg-%{kvm_target}.so +%endif +%{_libdir}/%{name}/hw-usb-host.so %{_datadir}/icons/* %{_datadir}/%{name}/linuxboot_dma.bin %{_datadir}/%{name}/dump-guest-memory.py* -%{_libexecdir}/qemu-kvm -%{_datadir}/systemtap/tapset/qemu-kvm.stp -%{_datadir}/systemtap/tapset/qemu-kvm-log.stp %{_datadir}/%{name}/trace-events-all -%{_datadir}/systemtap/tapset/qemu-kvm-simpletrace.stp -%{_datadir}/%{name}/systemtap/script.d/qemu_kvm.stp -%{_datadir}/%{name}/systemtap/conf.d/qemu_kvm.conf %if 0%{have_kvm_setup} %{_prefix}/lib/systemd/kvm-setup %{_unitdir}/kvm-setup.service @@ -2149,15 +1277,38 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_sysconfdir}/security/limits.d/95-kvm-memlock.conf %endif %{_libexecdir}/virtiofsd -%{_datadir}/%{name}/vhost-user/50-qemu-virtiofsd.json + +# This is the standard location for vhost-user JSON files defined in the +# vhost-user specification for interoperability with other software. Unlike +# most other paths we use it's "qemu" instead of "qemu-kvm". +%{_datadir}/qemu/vhost-user/50-qemu-virtiofsd.json + +%files -n qemu-kvm-core +%defattr(-,root,root) +%{_libexecdir}/qemu-kvm +%{_datadir}/systemtap/tapset/qemu-kvm.stp +%{_datadir}/systemtap/tapset/qemu-kvm-log.stp +%{_datadir}/systemtap/tapset/qemu-kvm-simpletrace.stp +%{_datadir}/%{name}/systemtap/script.d/qemu_kvm.stp +%{_datadir}/%{name}/systemtap/conf.d/qemu_kvm.conf + +%{_libdir}/qemu-kvm/hw-display-virtio-gpu.so +%ifarch s390x + %{_libdir}/qemu-kvm/hw-s390x-virtio-gpu-ccw.so +%else + %{_libdir}/qemu-kvm/hw-display-virtio-gpu-pci.so +%endif %files -n qemu-img %defattr(-,root,root) %{_bindir}/qemu-img %{_bindir}/qemu-io %{_bindir}/qemu-nbd +%{_bindir}/qemu-storage-daemon %{_mandir}/man1/qemu-img.1* %{_mandir}/man8/qemu-nbd.8* +%{_mandir}/man1/qemu-storage-daemon.1* +%{_mandir}/man7/qemu-storage-daemon-qmp-ref.7* %files -n qemu-guest-agent %defattr(-,root,root,-) @@ -2192,1161 +1343,1826 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %files block-ssh %{_libdir}/qemu-kvm/block-ssh.so +%if 0%{have_spice} +%files ui-spice + %{_libdir}/qemu-kvm/hw-usb-smartcard.so + %{_libdir}/qemu-kvm/audio-spice.so + %{_libdir}/qemu-kvm/ui-spice-core.so + %{_libdir}/qemu-kvm/chardev-spice.so +%ifarch x86_64 + %{_libdir}/qemu-kvm/hw-display-qxl.so +%endif +%endif -%changelog -* Wed Aug 18 2021 Danilo Cesar Lemes de Paula - 4.2.0-58.el8 -- kvm-virtiofsd-Disable-remote-posix-locks-by-default.patch [bz#1967496] -- kvm-virtiofsd-Fix-the-help-message-of-posix-lock.patch [bz#1967496] -- Resolves: bz#1967496 - ([virtio-fs] nfs/xfstest generic/089 generic/478 generic/632 failed) - -* Wed Aug 04 2021 Miroslav Rezanina - 4.2.0-57 -- kvm-aio-wait-delegate-polling-of-main-AioContext-if-BQL-.patch [bz#1969848] -- kvm-async-use-explicit-memory-barriers.patch [bz#1969848] -- Resolves: bz#1969848 - (qemu-img convert hangs on aarch64) - -* Thu Jul 29 2021 Miroslav Rezanina - 4.2.0-56 -- kvm-glib-compat-add-g_unix_get_passwd_entry_qemu.patch [bz#1967716] -- kvm-qga-add-ssh-add-remove-authorized-keys.patch [bz#1967716] -- kvm-qga-add-reset-argument-to-ssh-add-authorized-keys.patch [bz#1967716] -- kvm-qga-add-ssh-get-authorized-keys.patch [bz#1967716] -- kvm-Add-mtod_check.patch [bz#1970819 bz#1970835 bz#1970843 bz#1970853] -- kvm-bootp-limit-vendor-specific-area-to-input-packet-mem.patch [bz#1970819 bz#1970835 bz#1970843 bz#1970853] -- kvm-bootp-check-bootp_input-buffer-size.patch [bz#1970819] -- kvm-upd6-check-udp6_input-buffer-size.patch [bz#1970835] -- kvm-tftp-check-tftp_input-buffer-size.patch [bz#1970843] -- kvm-tftp-introduce-a-header-structure.patch [bz#1970819 bz#1970835 bz#1970843 bz#1970853] -- kvm-udp-check-upd_input-buffer-size.patch [bz#1970853] -- kvm-Fix-DHCP-broken-in-libslirp-v4.6.0.patch [bz#1970819 bz#1970835 bz#1970843 bz#1970853] -- kvm-net-check-if-the-file-descriptor-is-valid-before-usi.patch [bz#1982134] -- kvm-net-detect-errors-from-probing-vnet-hdr-flag-for-TAP.patch [bz#1982134] -- Resolves: bz#1967716 - (RFE: rebuild guest agent to include public ssh injection api support) -- Resolves: bz#1970819 - (CVE-2021-3592 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (bootp) [rhel-8]) -- Resolves: bz#1970835 - (CVE-2021-3593 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (udp6) [rhel-8]) -- Resolves: bz#1970843 - (CVE-2021-3595 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (tftp) [rhel-8]) -- Resolves: bz#1970853 - (CVE-2021-3594 virt:rhel/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (udp) [rhel-8]) -- Resolves: bz#1982134 - (QEMU core dump while booting guest with a non-exist fd on tap) - -* Fri Jul 23 2021 Danilo Cesar Lemes de Paula - 4.2.0-55.el8 -- kvm-net-introduce-qemu_receive_packet.patch [bz#1932917] -- kvm-e1000-switch-to-use-qemu_receive_packet-for-loopback.patch [bz#1932917] -- kvm-dp8393x-switch-to-use-qemu_receive_packet-for-loopba.patch [bz#1932917] -- kvm-sungem-switch-to-use-qemu_receive_packet-for-loopbac.patch [bz#1932917] -- kvm-tx_pkt-switch-to-use-qemu_receive_packet_iov-for-loo.patch [bz#1932917] -- kvm-rtl8139-switch-to-use-qemu_receive_packet-for-loopba.patch [bz#1932917] -- kvm-pcnet-switch-to-use-qemu_receive_packet-for-loopback.patch [bz#1932917] -- kvm-cadence_gem-switch-to-use-qemu_receive_packet-for-lo.patch [bz#1932917] -- kvm-lan9118-switch-to-use-qemu_receive_packet-for-loopba.patch [bz#1932917] -- Resolves: bz#1932917 - (CVE-2021-3416 virt:rhel/qemu-kvm: QEMU: net: infinite loop in loopback mode may lead to stack overflow) - -* Thu Jul 22 2021 Danilo Cesar Lemes de Paula - 4.2.0-54.el8 -- kvm-redhat-Fix-unversioned-Obsoletes-warning.patch [bz#1967329] -- Resolves: bz#1967329 - (Make qemu-kvm use versioned obsoletes for qemu-kvm-ma and qemu-kvm-rhev) +%if 0%{have_opengl} +%files ui-opengl + %{_libdir}/qemu-kvm/ui-egl-headless.so + %{_libdir}/qemu-kvm/ui-opengl.so +%endif -* Fri Jul 02 2021 Danilo Cesar Lemes de Paula - 4.2.0-53.el8 -- kvm-virtiofsd-Whitelist-fchmod.patch [bz#1967914] -- kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch [bz#1957866] -- kvm-Compress-lines-for-immediate-return.patch [bz#1970912] -- kvm-file-posix-Handle-EINVAL-fallocate-return-value.patch [bz#1970912] -- Resolves: bz#1967914 - ([virtio-fs] virtiofsd quit when coping file to a folder in virtio-fs mounted volume(windows guest)) -- Resolves: bz#1957866 - (RHEL8.4 - EEH capability disabled on KVM guest and recovery of PCI passthru device fails(CX5 / mlx5_core) (qemu-kvm)) -- Resolves: bz#1970912 - (Deployment fails with "Invalid or missing agent token received") - -* Fri Jun 11 2021 Danilo Cesar Lemes de Paula - 4.2.0-52.el8 -- kvm-file-posix-Mitigate-file-fragmentation-with-extent-s.patch [bz#1877163] -- kvm-block-file-posix-Fix-problem-with-fallocate-PUNCH_HO.patch [bz#1944861] -- kvm-spapr-Remove-stale-comment-about-power-saving-LPCR-b.patch [bz#1969768] -- kvm-spapr-Set-LPCR-to-current-AIL-mode-when-starting-a-n.patch [bz#1969768] -- Resolves: bz#1877163 - ([FJ 8.3 Bug] The progress bar of the "virt-clone --nonsparse" command shows the progress rate exceeding 100%.) -- Resolves: bz#1944861 - (Qemu-img convert fails when source image is on gpfs) -- Resolves: bz#1969768 - ([ppc64le] Hotplug vcpu device hit call trace:[qemu output] KVM: unknown exit, hardware reason 7fff9ce87ed8) +%if %{have_usbredir} +%files hw-usbredir + %{_libdir}/qemu-kvm/hw-usb-redirect.so +%endif -* Tue May 25 2021 Danilo Cesar Lemes de Paula - 4.2.0-51.el8 -- kvm-linux-headers-Add-VFIO_CCW_REQ_IRQ_INDEX.patch [bz#1940450] -- kvm-vfio-ccw-Connect-the-device-request-notifier.patch [bz#1940450] -- kvm-pc-bios-s390-ccw-fix-off-by-one-error.patch [bz#1942880] -- kvm-pc-bios-s390-ccw-break-loop-if-a-null-block-number-i.patch [bz#1942880] -- kvm-pc-bios-s390-ccw-don-t-try-to-read-the-next-block-if.patch [bz#1942880] -- Resolves: bz#1940450 - (RHEL8.5 - Mediated Device already in use by same domain we are booting (vfio-ccw/Multipath Testing) (kvm) - qemu-kvm part (also has kernel and libvirt parts)) -- Resolves: bz#1942880 - (RHEL8.4 Nightly[0322] - KVM guest fails to find zipl boot menu index (qemu-kvm)) - -* Wed May 05 2021 Danilo Cesar Lemes de Paula - 4.2.0-50.el8 -- kvm-hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch [bz#1925430] -- kvm-libqos-usb-hcd-ehci-use-32-bit-write-for-config-regi.patch [bz#1842478] -- kvm-libqos-pci-pc-use-32-bit-write-for-EJ-register.patch [bz#1842478] -- kvm-memory-Revert-memory-accept-mismatching-sizes-in-mem.patch [bz#1842478] -- kvm-acpi-accept-byte-and-word-access-to-core-ACPI-regist.patch [bz#1842478] -- kvm-xhci-fix-valid.max_access_size-to-access-address-reg.patch [bz#1842478] -- kvm-softmmu-memory-Log-invalid-memory-accesses.patch [bz#1842478] -- Resolves: bz#1925430 - (CVE-2021-20221 virt:rhel/qemu-kvm: qemu: out-of-bound heap buffer access via an interrupt ID field [rhel-8.5.0]) -- Resolves: bz#1842478 - (CVE-2020-13754 virt:rhel/qemu-kvm: QEMU: msix: OOB access during mmio operations may lead to DoS [rhel-8.5.0]) - -* Wed Apr 28 2021 Danilo Cesar Lemes de Paula - 4.2.0-49.el8 -- kvm-net-remove-an-assert-call-in-eth_get_gso_type.patch [bz#1892350] -- kvm-e1000-fail-early-for-evil-descriptor.patch [bz#1930092] -- kvm-net-forbid-the-reentrant-RX.patch [bz#1859175] -- kvm-qemu-img-convert-Don-t-pre-zero-images.patch [bz#1855250] -- kvm-audio-audio_generic_get_buffer_in-should-honor-size.patch [bz#1932823] -- Resolves: bz#1892350 - (CVE-2020-27617 virt:rhel/qemu-kvm: QEMU: net: an assert failure via eth_get_gso_type [rhel-8.5.0]) + +%changelog +* Tue Feb 22 2022 Jon Maloy - 6.2.0-8 +- kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch [bz#2035185] +- kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch [bz#2035185] +- kvm-iotests.py-Add-QemuStorageDaemon-class.patch [bz#2035185] +- kvm-iotests-281-Test-lingering-timers.patch [bz#2035185] +- kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch [bz#2035185] +- kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch [bz#2035185] +- Resolves: bz#2035185 + (Qemu core dump when start guest with nbd node or do block jobs to nbd node) + +* Tue Feb 15 2022 Jon Maloy - 6.2.0-7 +- kvm-numa-Enable-numa-for-SGX-EPC-sections.patch [bz#1518984] +- kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch [bz#1518984] +- kvm-doc-Add-the-SGX-numa-description.patch [bz#1518984] +- kvm-Enable-SGX-RH-Only.patch [bz#1518984] +- kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch [bz#1518984] +- kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch [bz#2041480] +- kvm-iotests-block-status-cache-New-test.patch [bz#2041480] +- Resolves: bz#1518984 + ([Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support) +- Resolves: bz#2041480 + ([incremental_backup] Inconsistent block status reply in qemu-nbd) + +* Tue Feb 08 2022 Jon Maloy - 6.2.0-6 +- kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch [bz#2046198] +- kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch [bz#2033279] +- kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch [bz#2021778 bz#2036178] +- kvm-iotests-stream-error-on-reset-New-test.patch [bz#2021778 bz#2036178] +- kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch [bz#2037135] +- kvm-block-rbd-workaround-for-ceph-issue-53784.patch [bz#2037135] +- Resolves: bz#2046198 + (CVE-2022-0358 virt:av/qemu-kvm: QEMU: virtiofsd: potential privilege escalation via CVE-2018-13405 [rhel-8.6]) +- Resolves: bz#2033279 + ([wrb][qemu-kvm 6.2] The hot-unplugged device can not be hot-plugged back) +- Resolves: bz#2021778 + (Qemu core dump when do full backup during system reset) +- Resolves: bz#2036178 + (Qemu core dumped when do block-stream to a snapshot node on non-enough space storage) +- Resolves: bz#2037135 + (Booting from Local Snapshot Core Dumped Whose Backing File Is Based on RBD) + +* Tue Jan 25 2022 Jon Maloy - 6.2.0-5 +- kvm-acpi-validate-hotplug-selector-on-access.patch [bz#2036580] +- kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch [bz#2031035] +- Resolves: bz#2036580 + (CVE-2021-4158 virt:rhel/qemu-kvm: QEMU: NULL pointer dereference in pci_write() in hw/acpi/pcihp.c [rhel-8]) +- Resolves: bz#2031035 + (Add rhel-8.6.0 machine types for RHEL 8.6 [x86]) + +* Mon Jan 17 2022 Jon Maloy - 6.2.0-4 +- kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch [bz#2031039] +- kvm-hw-arm-virt-Register-its-as-a-class-property.patch [bz#2031039] +- kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch [bz#2031039] +- kvm-hw-arm-virt-Add-8.6-machine-type.patch [bz#2031039] +- kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch [bz#2031039] +- kvm-rhel-machine-types-x86-set-prefer_sockets.patch [bz#2029582] +- Resolves: bz#2031039 + (Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64]) +- Resolves: bz#2029582 + ([8.6] machine types: 6.2: Fix prefer_sockets) + +* Mon Jan 03 2022 Jon Maloy - 6.2.0-2 +- kvm-redhat-Add-rhel8.6.0-machine-type-for-s390x.patch [bz#2005325] +- kvm-redhat-Define-pseries-rhel8.6.0-machine-type.patch [bz#2031041] +- Resolves: bz#2005325 + (Fix CPU Model for new IBM Z Hardware - qemu part) +- Resolves: bz#2031041 + (Add rhel-8.6.0 machine types for RHEL 8.6 [ppc64le]) + +* Thu Dec 16 2021 Jon Maloy - 6.2.0-1.el8 +- Rebase to qemu-kvm 6.2.0 +- Resolves bz#2027716 + +* Mon Nov 22 2021 Jon Maloy - 6.1.0-5 +- kvm-e1000-fix-tx-re-entrancy-problem.patch [bz#1930092] +- kvm-hw-scsi-scsi-disk-MODE_PAGE_ALLS-not-allowed-in-MODE.patch [bz#2020720] - Resolves: bz#1930092 (CVE-2021-20257 virt:rhel/qemu-kvm: QEMU: net: e1000: infinite loop while processing transmit descriptors [rhel-8.5.0]) -- Resolves: bz#1859175 - (CVE-2020-15859 virt:rhel/qemu-kvm: QEMU: net: e1000e: use-after-free while sending packets [rhel-8]) -- Resolves: bz#1855250 - (qemu-img convert uses possibly slow pre-zeroing on block storage) -- Resolves: bz#1932823 - (after upgrade from 4.3 to 4.4 audio stops working in guests after couple of seconds) - -* Tue Mar 16 2021 Danilo Cesar Lemes de Paula - 4.2.0-48.el8 -- kvm-ide-atapi-check-logical-block-address-and-read-size-.patch [bz#1917451] -- Resolves: bz#1917451 - (CVE-2020-29443 virt:rhel/qemu-kvm: QEMU: ide: atapi: OOB access while processing read commands [rhel-8.4.0]) - -* Mon Mar 08 2021 Danilo Cesar Lemes de Paula - 4.2.0-47.el8 -- kvm-x86-cpu-Enable-AVX512_VP2INTERSECT-cpu-feature.patch [bz#1790620] -- kvm-target-i386-add-fast-short-REP-MOV-support.patch [bz#1790620] -- kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch [bz#1790620] -- kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch [bz#1790620] -- Resolves: bz#1790620 - ([RFE] AMD Milan - Add KVM/support for EPYC-Milan CPU Model - Slow Train) - -* Wed Mar 03 2021 Danilo Cesar Lemes de Paula - 4.2.0-46.el8 -- kvm-redhat-makes-qemu-respect-system-s-crypto-profile.patch [bz#1902960] -- kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch [bz#1912891] -- Resolves: bz#1902960 +- Resolves: bz#2020720 + (CVE-2021-3930 virt:rhel/qemu-kvm: QEMU: off-by-one error in mode_sense_page() in hw/scsi/scsi-disk.c [rhel-8]) + +* Thu Oct 21 2021 Jon Maloy - 6.1.0-4 +- kvm-spec-Remove-qemu-kiwi-build.patch [bz#2002694] +- kvm-hw-arm-virt-Add-hw_compat_rhel_8_5-to-8.5-machine-ty.patch [bz#1998947] +- Resolves: bz#2002694 + (remove qemu-kiwi rpm from qemu-kvm sources in rhel-8.6) +- Resolves: bz#1998947 + (Add machine type compatibility update for 6.1 rebase [aarch64]) + +* Tue Oct 12 2021 Jon Maloy - 6.1.0-3 +- kvm-virtio-net-fix-use-after-unmap-free-for-sg.patch [bz#1999221] +- Resolves: bz#1999221 + (CVE-2021-3748 virt:rhel/qemu-kvm: QEMU: virtio-net: heap use-after-free in virtio_net_receive_rcu [rhel-8]) + +* Fri Oct 01 2021 Jon Maloy - 6.1.0-2 +- kvm-qxl-fix-pre-save-logic.patch [bz#2002907] +- kvm-redhat-Define-hw_compat_rhel_8_5.patch [bz#1998949] +- kvm-redhat-Update-pseries-rhel8.5.0.patch [bz#1998949] +- kvm-redhat-Add-s390x-machine-type-compatibility-update-f.patch [bz#1998950] +- Resolves: bz#2002907 + (Unexpectedly failed when managedsave the guest which has qxl video device) +- Resolves: bz#1998949 + (Add machine type compatibility update for 6.1 rebase [ppc64le]) +- Resolves: bz#1998950 + (Add machine type compatibility update for 6.1 rebase [s390x]) + +* Wed Aug 25 2021 Danilo Cesar Lemes de Paula - 6.0.0-29.el8 +- kvm-file-posix-Cap-max_iov-at-IOV_MAX.patch [bz#1994494] +- kvm-migration-Move-yank-outside-qemu_start_incoming_migr.patch [bz#1974366] +- Resolves: bz#1994494 + (VM remains in paused state when trying to write on a resized disk resides on iscsi) +- Resolves: bz#1974366 + (Fail to set migrate incoming for 2nd time after the first time failed) + +* Wed Aug 18 2021 Danilo Cesar Lemes de Paula - 6.0.0-28.el8 +- kvm-iotests-Improve-and-rename-test-291-to-qemu-img-bitm.patch [bz#1946084] +- kvm-qemu-img-Fail-fast-on-convert-bitmaps-with-inconsist.patch [bz#1946084] +- kvm-qemu-img-Add-skip-broken-bitmaps-for-convert-bitmaps.patch [bz#1946084] +- kvm-audio-Never-send-migration-section.patch [bz#1991671] +- Resolves: bz#1946084 + (qemu-img convert --bitmaps fail if a bitmap is inconsistent) +- Resolves: bz#1991671 + (vmstate differs between -audiodev and QEMU_AUDIO_DRV when no sound frontends devs present.) + +* Wed Aug 04 2021 Miroslav Rezanina - 6.0.0-27 +- kvm-migration-move-wait-unplug-loop-to-its-own-function.patch [bz#1976852] +- kvm-migration-failover-continue-to-wait-card-unplug-on-e.patch [bz#1976852] +- kvm-aarch64-Add-USB-storage-devices.patch [bz#1974579] +- Resolves: bz#1976852 + ([failover vf migration] The failover vf will be unregistered if canceling the migration whose status is "wait-unplug") +- Resolves: bz#1974579 + (It's not possible to start installation from a virtual USB device on aarch64) + +* Thu Jul 29 2021 Miroslav Rezanina - 6.0.0-26 +- kvm-acpi-pc-revert-back-to-v5.2-PCI-slot-enumeration.patch [bz#1977798] +- kvm-migration-failover-reset-partially_hotplugged.patch [bz#1787194] +- kvm-hmp-Fix-loadvm-to-resume-the-VM-on-success-instead-o.patch [bz#1959676] +- kvm-migration-Move-bitmap_mutex-out-of-migration_bitmap_.patch [bz#1959729] +- kvm-i386-cpu-Expose-AVX_VNNI-instruction-to-guest.patch [bz#1924822] +- kvm-ratelimit-protect-with-a-mutex.patch [bz#1838221] +- kvm-Update-Linux-headers-to-5.13-rc4.patch [bz#1838221] +- kvm-i386-Add-ratelimit-for-bus-locks-acquired-in-guest.patch [bz#1838221] +- kvm-iothread-generalize-iothread_set_param-iothread_get_.patch [bz#1930286] +- kvm-iothread-add-aio-max-batch-parameter.patch [bz#1930286] +- kvm-linux-aio-limit-the-batch-size-using-aio-max-batch-p.patch [bz#1930286] +- kvm-block-nvme-Fix-VFIO_MAP_DMA-failed-No-space-left-on-.patch [bz#1848881] +- Resolves: bz#1977798 + (RHEL8.5 guest network interface name changed after upgrade to qemu-6.0) +- Resolves: bz#1787194 + (After canceling the migration of a vm with VF which enables failover, using "migrate -d tcp:invalid uri" to re-migrating the vm will cause the VF in vm to be hot-unplug.) +- Resolves: bz#1959676 + (guest status is paused after loadvm on rhel8.5.0) +- Resolves: bz#1959729 + (SAP/3TB VM migration slowness [idle db]) +- Resolves: bz#1924822 + ([Intel 8.5 FEAT] qemu-kvm AVX2 VNNI - Fast Train) +- Resolves: bz#1838221 + ([Intel 8.5 FEAT] qemu-kvm Bus Lock VM Exit - Fast Train) +- Resolves: bz#1930286 + (randread and randrw regression with virtio-blk multi-queue) +- Resolves: bz#1848881 + (nvme:// block driver can exhaust IOMMU DMAs, hanging the VM, possible data loss) + +* Tue Jul 20 2021 Danilo Cesar Lemes de Paula - 6.0.0-25.el8 +- kvm-s390x-cpumodel-add-3931-and-3932.patch [bz#1976171] +- kvm-file-posix-fix-max_iov-for-dev-sg-devices.patch [bz#1943653] +- kvm-scsi-generic-pass-max_segments-via-max_iov-field-in-.patch [bz#1943653] +- kvm-osdep-provide-ROUND_DOWN-macro.patch [bz#1943653] +- kvm-block-backend-align-max_transfer-to-request-alignmen.patch [bz#1943653] +- kvm-block-add-max_hw_transfer-to-BlockLimits.patch [bz#1943653] +- kvm-file-posix-try-BLKSECTGET-on-block-devices-too-do-no.patch [bz#1943653] +- Resolves: bz#1976171 + ([IBM 8.5 FEAT] CPU Model for new IBM Z Hardware - qemu part) +- Resolves: bz#1943653 + (RHV VM pauses due to 'qemu-kvm' getting EINVAL on i/o to a direct lun with scsi passthrough enabled) + +* Fri Jul 16 2021 Danilo Cesar Lemes de Paula - 6.0.0-24.el8 +- kvm-s390x-css-Introduce-an-ESW-struct.patch [bz#1968326] +- kvm-s390x-css-Split-out-the-IRB-sense-data.patch [bz#1968326] +- kvm-s390x-css-Refactor-IRB-construction.patch [bz#1968326] +- kvm-s390x-css-Add-passthrough-IRB.patch [bz#1968326] +- kvm-vhost-user-blk-Fail-gracefully-on-too-large-queue-si.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-blk-Make-sure-to-set-Error-on-realize-fai.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-blk-Don-t-reconnect-during-initialisation.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-blk-Improve-error-reporting-in-realize.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-blk-Get-more-feature-flags-from-vhost-dev.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-virtio-Fail-if-iommu_platform-is-requested-but-unsup.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-blk-Check-that-num-queues-is-supported-by.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-Fix-backends-without-multiqueue-support.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- Resolves: bz#1968326 + ([vfio_ccw] I/O error when checking format - dasdfmt requires --force in quick mode when passed through) +- Resolves: bz#1935014 + (qemu crash when attach vhost-user-blk-pci with option queue-size=4096) +- Resolves: bz#1935019 + (qemu guest failed boot when attach vhost-user-blk-pci with option iommu_platform=on) +- Resolves: bz#1935020 + (qemu guest failed boot when attach vhost-user-blk-pci with option packed=on) +- Resolves: bz#1935031 + (qemu guest failed boot when attach vhost-user-blk-pci with unmatched num-queues with qsd) + +* Thu Jul 08 2021 Danilo Cesar Lemes de Paula - 6.0.0-23.el8 +- kvm-Add-mtod_check.patch [bz#1970823 bz#1970842 bz#1970850 bz#1970858] +- kvm-bootp-limit-vendor-specific-area-to-input-packet-mem.patch [bz#1970823 bz#1970842 bz#1970850 bz#1970858] +- kvm-bootp-check-bootp_input-buffer-size.patch [bz#1970823] +- kvm-upd6-check-udp6_input-buffer-size.patch [bz#1970842] +- kvm-tftp-check-tftp_input-buffer-size.patch [bz#1970850] +- kvm-tftp-introduce-a-header-structure.patch [bz#1970823 bz#1970842 bz#1970850 bz#1970858] +- kvm-udp-check-upd_input-buffer-size.patch [bz#1970858] +- kvm-Fix-DHCP-broken-in-libslirp-v4.6.0.patch [bz#1970823 bz#1970842 bz#1970850 bz#1970858] +- kvm-redhat-use-the-standard-vhost-user-JSON-path.patch [bz#1804196] +- Resolves: bz#1970823 + (CVE-2021-3592 virt:av/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (bootp) [rhel-av-8]) +- Resolves: bz#1970842 + (CVE-2021-3593 virt:av/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (udp6) [rhel-av-8]) +- Resolves: bz#1970850 + (CVE-2021-3595 virt:av/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (tftp) [rhel-av-8]) +- Resolves: bz#1970858 + (CVE-2021-3594 virt:av/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (udp) [rhel-av-8]) +- Resolves: bz#1804196 + (inconsistent paths for interop json files) + +* Fri Jul 02 2021 Danilo Cesar Lemes de Paula - 6.0.0-22.el8 +- kvm-redhat-Expose-upstream-machines-pc-4.2-and-pc-2.11.patch [bz#1897923] +- kvm-redhat-Enable-FDC-device-for-upstream-machines-too.patch [bz#1897923] +- kvm-redhat-Add-hw_compat_4_2_extra-and-apply-to-upstream.patch [bz#1897923] +- kvm-ppc-pef.c-initialize-cgs-ready-in-kvmppc_svm_init.patch [bz#1789757] +- kvm-virtio-gpu-handle-partial-maps-properly.patch [bz#1932279] +- kvm-redhat-Fix-unversioned-Obsoletes-warning.patch [bz#1950405 bz#1967330] +- kvm-redhat-Move-qemu-kvm-docs-dependency-to-qemu-kvm.patch [bz#1950405 bz#1967330] +- kvm-redhat-introducting-qemu-kvm-hw-usbredir.patch [bz#1950405 bz#1967330] +- kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch [bz#1976015] +- Resolves: bz#1897923 + (support Live Migration from Ubuntu 18.04 i440fx to RHEL) +- Resolves: bz#1789757 + ([IBM 8.5 FEAT] Add machine option to enable secure VM support) +- Resolves: bz#1932279 + ([aarch64] qemu core dumped when using smmuv3 and iommu_platform enabling at virtio-gpu-pci) +- Resolves: bz#1950405 + (review qemu-kvm-core dependencies) +- Resolves: bz#1967330 + (Make qemu-kvm use versioned obsoletes for qemu-kvm-ma and qemu-kvm-rhev) +- Resolves: bz#1976015 + (spapr: Fix EEH capability issue on KVM guest for PCI passthru) + +* Wed Jun 23 2021 Danilo Cesar Lemes de Paula - 6.0.0-21.el8 +- kvm-block-backend-add-drained_poll.patch [bz#1960137] +- kvm-nbd-server-Use-drained-block-ops-to-quiesce-the-serv.patch [bz#1960137] +- kvm-disable-CONFIG_USB_STORAGE_BOT.patch [bz#1866133] +- kvm-doc-Fix-some-mistakes-in-the-SEV-documentation.patch [bz#1954750] +- kvm-docs-Add-SEV-ES-documentation-to-amd-memory-encrypti.patch [bz#1954750] +- kvm-docs-interop-firmware.json-Add-SEV-ES-support.patch [bz#1954750] +- Resolves: bz#1960137 + ([incremental backup] qemu-kvm hangs when Rebooting the VM during full backup) +- Resolves: bz#1866133 + (Disable usb-bot device in QEMU (unsupported)) +- Resolves: bz#1954750 + (firmware scheme for sev-es) + +* Mon Jun 21 2021 Danilo Cesar Lemes de Paula - 6.0.0-20.el8 +- kvm-x86-Add-x86-rhel8.5-machine-types.patch [bz#1957838] +- kvm-redhat-x86-Enable-kvm-asyncpf-int-by-default.patch [bz#1967603] +- kvm-yank-Unregister-function-when-using-TLS-migration.patch [bz#1964326] +- Resolves: bz#1957838 + (8.5 machine types for x86) +- Resolves: bz#1967603 + (Enable interrupt based asynchronous page fault mechanism by default) +- Resolves: bz#1964326 + (Qemu core dump when do tls migration via tcp protocol) + +* Fri Jun 11 2021 Danilo Cesar Lemes de Paula - 6.0.0-19.el8 +- kvm-pc-bios-s390-ccw-don-t-try-to-read-the-next-block-if.patch [bz#1965626] +- kvm-redhat-Install-the-s390-netboot.img-that-we-ve-built.patch [bz#1966463] +- kvm-sockets-update-SOCKET_ADDRESS_TYPE_FD-listen-2-backl.patch [bz#1967177] +- kvm-target-i386-sev-add-support-to-query-the-attestation.patch [bz#1957022] +- kvm-spapr-Don-t-hijack-current_machine-boot_order.patch [bz#1960119] +- kvm-target-i386-Add-CPU-model-versions-supporting-xsaves.patch [bz#1942914] +- kvm-spapr-Remove-stale-comment-about-power-saving-LPCR-b.patch [bz#1940731] +- kvm-spapr-Set-LPCR-to-current-AIL-mode-when-starting-a-n.patch [bz#1940731] +- Resolves: bz#1965626 + (RHEL8.2 - QEMU BIOS fails to read stage2 loader (kvm)) +- Resolves: bz#1966463 + (Rebuild the s390-netboot.img for downstream instead of shipping the upstream image) +- Resolves: bz#1967177 + (QEMU 6.0.0 socket_get_fd() fails with the error "socket_get_fd: too many connections") +- Resolves: bz#1957022 + (SEV: Add support to query the attestation report) +- Resolves: bz#1960119 + ([regression]Failed to reset guest) +- Resolves: bz#1942914 + ([Hyper-V][RHEL8.4]Nested Hyper-V on KVM: On Intel CPU L1 2016 can not start with cpu model Skylake-Server-noTSX-IBRS or Skylake-Client-noTSX-IBRS) +- Resolves: bz#1940731 + ([ppc64le] Hotplug vcpu device hit call trace:[qemu output] KVM: unknown exit, hardware reason 7fff9ce87ed8) + +* Tue Jun 01 2021 Danilo Cesar Lemes de Paula - 6.0.0-18.el8 +- kvm-virtio-net-failover-add-missing-remove_migration_sta.patch [bz#1953045] +- kvm-hw-arm-virt-Add-8.5-machine-type.patch [bz#1957667] +- kvm-hw-arm-virt-Disable-PL011-clock-migration-through-hw.patch [bz#1957667] +- kvm-arm-virt-Register-highmem-and-gic-version-as-class-p.patch [bz#1957667] +- kvm-virtio-blk-Fix-rollback-path-in-virtio_blk_data_plan.patch [bz#1927108] +- kvm-virtio-blk-Configure-all-host-notifiers-in-a-single-.patch [bz#1927108] +- kvm-virtio-scsi-Set-host-notifiers-and-callbacks-separat.patch [bz#1927108] +- kvm-virtio-scsi-Configure-all-host-notifiers-in-a-single.patch [bz#1927108] +- kvm-hw-arm-smmuv3-Another-range-invalidation-fix.patch [bz#1929720] +- Resolves: bz#1953045 + (qemu-kvm NULL pointer de-reference during migration at migrate_fd_connect ->...-> notifier_list_notify) +- Resolves: bz#1957667 + ([aarch64] Add 8.5 machine type) +- Resolves: bz#1927108 + (It's too slow to load scsi disk when use 384 vcpus) +- Resolves: bz#1929720 + ([aarch64] Handle vsmmuv3 IOTLB invalidation with non power of 2 size) + +* Tue May 25 2021 Danilo Cesar Lemes de Paula - 6.0.0-17.el8 +- kvm-redhat-s390x-add-rhel-8.5.0-compat-machine.patch [bz#1951476] +- kvm-redhat-add-missing-entries-in-hw_compat_rhel_8_4.patch [bz#1957834] +- kvm-redhat-Define-pseries-rhel8.5.0-machine-type.patch [bz#1957834] +- Resolves: bz#1951476 + ([s390x] RHEL AV 8.5 new machine type for s390x) +- Resolves: bz#1957834 + ([ppc64le] RHEL AV 8.5 new machine type for ppc64le) + +* Mon May 03 2021 Danilo Cesar Lemes de Paula - 6.0.0-16.el8 +- Rebase to qemu-kvm 6.0.0 + +* Wed Apr 28 2021 Danilo Cesar Lemes de Paula - 5.2.0-16.el8 +- kvm-virtio-pci-compat-page-aligned-ATS.patch [bz#1942362] +- Resolves: bz#1942362 + (Live migration with iommu from rhel8.3.1 to rhel8.4 fails: qemu-kvm: get_pci_config_device: Bad config data) + +* Mon Apr 12 2021 Danilo Cesar Lemes de Paula - 5.2.0-15.el8_4 +- kvm-block-Simplify-qmp_block_resize-error-paths.patch [bz#1903511] +- kvm-block-Fix-locking-in-qmp_block_resize.patch [bz#1903511] +- kvm-block-Fix-deadlock-in-bdrv_co_yield_to_drain.patch [bz#1903511] +- Resolves: bz#1903511 + (no response on QMP command 'block_resize') + +* Sat Mar 20 2021 Danilo Cesar Lemes de Paula - 5.2.0-14.el8 +- kvm-vhost-user-blk-fix-blkcfg-num_queues-endianness.patch [bz#1937004] +- kvm-block-export-fix-blk_size-double-byteswap.patch [bz#1937004] +- kvm-block-export-use-VIRTIO_BLK_SECTOR_BITS.patch [bz#1937004] +- kvm-block-export-fix-vhost-user-blk-export-sector-number.patch [bz#1937004] +- kvm-block-export-port-virtio-blk-discard-write-zeroes-in.patch [bz#1937004] +- kvm-block-export-port-virtio-blk-read-write-range-check.patch [bz#1937004] +- kvm-spec-ui-spice-sub-package.patch [bz#1936373] +- kvm-spec-ui-opengl-sub-package.patch [bz#1936373] +- Resolves: bz#1937004 + (vhost-user-blk server endianness and input validation fixes) +- Resolves: bz#1936373 + (move spice & opengl modules to rpm subpackages) + +* Tue Mar 16 2021 Danilo Cesar Lemes de Paula - 5.2.0-13.el8 +- kvm-i386-acpi-restore-device-paths-for-pre-5.1-vms.patch [bz#1934158] +- Resolves: bz#1934158 + (Windows guest looses network connectivity when NIC was configured with static IP) + +* Mon Mar 15 2021 Danilo Cesar Lemes de Paula - 5.2.0-12.el8 +- kvm-scsi-disk-move-scsi_handle_rw_error-earlier.patch [bz#1927530] +- kvm-scsi-disk-do-not-complete-requests-early-for-rerror-.patch [bz#1927530] +- kvm-scsi-introduce-scsi_sense_from_errno.patch [bz#1927530] +- kvm-scsi-disk-pass-SCSI-status-to-scsi_handle_rw_error.patch [bz#1927530] +- kvm-scsi-disk-pass-guest-recoverable-errors-through-even.patch [bz#1927530] +- kvm-hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch [bz#1936948] +- Resolves: bz#1927530 + (RHEL8 Hypervisor - OVIRT - Issues seen on a virtualization guest with direct passthrough LUNS pausing when a host gets a Thin threshold warning) +- Resolves: bz#1936948 + (CVE-2021-20221 virt:av/qemu-kvm: qemu: out-of-bound heap buffer access via an interrupt ID field [rhel-av-8.4.0]) + +* Mon Mar 08 2021 Danilo Cesar Lemes de Paula - 5.2.0-11.el8 +- kvm-qxl-set-qxl.ssd.dcl.con-on-secondary-devices.patch [bz#1932190] +- kvm-qxl-also-notify-the-rendering-is-done-when-skipping-.patch [bz#1932190] +- kvm-virtiofsd-Save-error-code-early-at-the-failure-calls.patch [bz#1935071] +- kvm-virtiofs-drop-remapped-security.capability-xattr-as-.patch [bz#1935071] +- Resolves: bz#1932190 + (Timeout when dump the screen from 2nd VGA) +- Resolves: bz#1935071 + (CVE-2021-20263 virt:8.4/qemu-kvm: QEMU: virtiofsd: 'security.capabilities' is not dropped with xattrmap option [rhel-av-8]) + +* Wed Mar 03 2021 Danilo Cesar Lemes de Paula - 5.2.0-10.el8 +- kvm-migration-dirty-bitmap-Use-struct-for-alias-map-inne.patch [bz#1930757] +- kvm-migration-dirty-bitmap-Allow-control-of-bitmap-persi.patch [bz#1930757] +- kvm-qemu-iotests-300-Add-test-case-for-modifying-persist.patch [bz#1930757] +- kvm-failover-fix-indentantion.patch [bz#1819991] +- kvm-failover-Use-always-atomics-for-primary_should_be_hi.patch [bz#1819991] +- kvm-failover-primary-bus-is-only-used-once-and-where-it-.patch [bz#1819991] +- kvm-failover-Remove-unused-parameter.patch [bz#1819991] +- kvm-failover-Remove-external-partially_hotplugged-proper.patch [bz#1819991] +- kvm-failover-qdev_device_add-returns-err-or-dev-set.patch [bz#1819991] +- kvm-failover-Rename-bool-to-failover_primary_hidden.patch [bz#1819991] +- kvm-failover-g_strcmp0-knows-how-to-handle-NULL.patch [bz#1819991] +- kvm-failover-Remove-primary_device_opts.patch [bz#1819991] +- kvm-failover-remove-standby_id-variable.patch [bz#1819991] +- kvm-failover-Remove-primary_device_dict.patch [bz#1819991] +- kvm-failover-Remove-memory-leak.patch [bz#1819991] +- kvm-failover-simplify-virtio_net_find_primary.patch [bz#1819991] +- kvm-failover-should_be_hidden-should-take-a-bool.patch [bz#1819991] +- kvm-failover-Rename-function-to-hide_device.patch [bz#1819991] +- kvm-failover-virtio_net_connect_failover_devices-does-no.patch [bz#1819991] +- kvm-failover-Rename-to-failover_find_primary_device.patch [bz#1819991] +- kvm-failover-simplify-qdev_device_add-failover-case.patch [bz#1819991] +- kvm-failover-simplify-qdev_device_add.patch [bz#1819991] +- kvm-failover-make-sure-that-id-always-exist.patch [bz#1819991] +- kvm-failover-remove-failover_find_primary_device-error-p.patch [bz#1819991] +- kvm-failover-split-failover_find_primary_device_id.patch [bz#1819991] +- kvm-failover-We-don-t-need-to-cache-primary_device_id-an.patch [bz#1819991] +- kvm-failover-Caller-of-this-two-functions-already-have-p.patch [bz#1819991] +- kvm-failover-simplify-failover_unplug_primary.patch [bz#1819991] +- kvm-failover-Remove-primary_dev-member.patch [bz#1819991] +- kvm-virtio-net-add-missing-object_unref.patch [bz#1819991] +- kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch [bz#1926785] +- kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch [bz#1926785] +- Resolves: bz#1930757 + (Allow control of block-dirty-bitmap persistence via 'block-bitmap-mapping') +- Resolves: bz#1819991 + (Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug) +- Resolves: bz#1926785 + ([RFE] AMD Milan - Add KVM/support for EPYC-Milan CPU Model - Fast Train) + +* Mon Mar 01 2021 Danilo Cesar Lemes de Paula - 5.2.0-9.el8 +- kvm-docs-generate-qemu-storage-daemon-qmp-ref-7-man-page.patch [bz#1901323] +- kvm-docs-add-qemu-storage-daemon-1-man-page.patch [bz#1901323] +- kvm-docs-Add-qemu-storage-daemon-1-manpage-to-meson.buil.patch [bz#1901323] +- kvm-qemu-storage-daemon-Enable-object-add.patch [bz#1901323] +- kvm-spec-Package-qemu-storage-daemon.patch [bz#1901323] +- kvm-default-configs-Enable-vhost-user-blk.patch [bz#1930033] +- kvm-qemu-nbd-Use-SOMAXCONN-for-socket-listen-backlog.patch [bz#1925345] +- kvm-pcie-don-t-set-link-state-active-if-the-slot-is-empt.patch [bz#1917654] +- Resolves: bz#1901323 + (QSD (QEMU Storage Daemon): basic support - TechPreview) +- Resolves: bz#1930033 + (enable vhost-user-blk device) +- Resolves: bz#1925345 + (qemu-nbd needs larger backlog for Unix socket listen()) +- Resolves: bz#1917654 + ([failover vf migration][RHEL84 vm] After start a vm with a failover vf + a failover virtio net device, the failvoer vf do not exist in the vm) + +* Fri Feb 19 2021 Eduardo Lima (Etrunko) - 5.2.0-8.el8 +- kvm-block-nbd-only-detach-existing-iochannel-from-aio_co.patch [bz#1887883] +- kvm-block-nbd-only-enter-connection-coroutine-if-it-s-pr.patch [bz#1887883] +- kvm-nbd-make-nbd_read-return-EIO-on-error.patch [bz#1887883] +- kvm-virtio-move-use-disabled-flag-property-to-hw_compat_.patch [bz#1907255] +- kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch [bz#1920740] +- kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch [bz#1920740] +- kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch [bz#1920740] +- kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch [bz#1920941] +- kvm-pci-reject-too-large-ROMs.patch [bz#1917830] +- kvm-pci-add-romsize-property.patch [bz#1917830] +- kvm-redhat-Add-some-devices-for-exporting-upstream-machi.patch [bz#1917826] +- kvm-vhost-Check-for-valid-vdev-in-vhost_backend_handle_i.patch [bz#1880299] +- Resolves: bz#1887883 + (qemu blocks client progress with various NBD actions) +- Resolves: bz#1907255 + (Migrate failed with vhost-vsock-pci from RHEL-AV 8.3.1 to RHEL-AV 8.2.1) +- Resolves: bz#1920740 + (CVE-2020-35517 virt:8.4/qemu-kvm: QEMU: virtiofsd: potential privileged host device access from guest [rhel-av-8.4.0]) +- Resolves: bz#1920941 + ([ppc64le] [AV]--disk cdimage.iso,bus=usb fails to boot) +- Resolves: bz#1917830 + (Add romsize property to qemu-kvm) +- Resolves: bz#1917826 + (Add extra device support to qemu-kvm, but not to rhel machine types) +- Resolves: bz#1880299 + (vhost-user mq connection fails to restart after kill host testpmd which acts as vhost-user client) + +* Fri Feb 12 2021 Eduardo Lima (Etrunko) - 5.2.0-7.el8 +- kvm-virtio-Add-corresponding-memory_listener_unregister-.patch [bz#1903521] +- kvm-block-Honor-blk_set_aio_context-context-requirements.patch [bz#1918966 bz#1918968] +- kvm-nbd-server-Quiesce-coroutines-on-context-switch.patch [bz#1918966 bz#1918968] +- kvm-block-Avoid-processing-BDS-twice-in-bdrv_set_aio_con.patch [bz#1918966 bz#1918968] +- kvm-storage-daemon-Call-bdrv_close_all-on-exit.patch [bz#1918966 bz#1918968] +- kvm-block-move-blk_exp_close_all-to-qemu_cleanup.patch [bz#1918966 bz#1918968] +- Resolves: bz#1903521 + (hot unplug vhost-user cause qemu crash: qemu-kvm: ../softmmu/memory.c:2818: do_address_space_destroy: Assertion `QTAILQ_EMPTY(&as->listeners)' failed.) +- Resolves: bz#1918966 + ([incremental_backup] qemu aborts if guest reboot during backup when using virtio-blk: "aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule'") +- Resolves: bz#1918968 + ([incremental_backup] qemu deadlock after poweroff in guest during backup in nbd_export_close_all()) + +* Tue Feb 09 2021 Eduardo Lima (Etrunko) - 5.2.0-6.el8 +- kvm-scsi-fix-device-removal-race-vs-IO-restart-callback-.patch [bz#1854811] +- kvm-tracetool-also-strip-l-and-ll-from-systemtap-format-.patch [bz#1907264] +- kvm-redhat-moving-all-documentation-files-to-qemu-kvm-do.patch [bz#1881170 bz#1924766] +- kvm-hw-arm-smmuv3-Fix-addr_mask-for-range-based-invalida.patch [bz#1834152] +- kvm-redhat-makes-qemu-respect-system-s-crypto-profile.patch [bz#1902219] +- kvm-vhost-Unbreak-SMMU-and-virtio-iommu-on-dev-iotlb-sup.patch [bz#1925028] +- kvm-docs-set-CONFDIR-when-running-sphinx.patch [bz#1902537] +- Resolves: bz#1854811 + (scsi-bus.c: use-after-free due to race between device unplug and I/O operation causes guest crash) +- Resolves: bz#1907264 + (systemtap: invalid or missing conversion specifier at the trace event vhost_vdpa_set_log_base) +- Resolves: bz#1881170 + (split documentation from the qemu-kvm-core package to its own subpackage) +- Resolves: bz#1924766 + (split documentation from the qemu-kvm-core package to its own subpackage [av-8.4.0]) +- Resolves: bz#1834152 + ([aarch64] QEMU SMMUv3 device: Support range invalidation) +- Resolves: bz#1902219 (QEMU doesn't honour system crypto policies) -- Resolves: bz#1912891 - ([ppc64le] --disk cdimage.iso,bus=usb fails to boot) - -* Wed Feb 10 2021 Jon Maloy - 4.2.0-45.el8 -- kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch [bz#1919111] -- kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch [bz#1919111] -- kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch [bz#1919111] -- Resolves: bz#1919111 - (CVE-2020-35517 virt:rhel/qemu-kvm: QEMU: virtiofsd: potential privileged host device access from guest [rhel-8.4.0]) - -* Tue Feb 02 2021 Jon Maloy - 4.2.0-44.el8 -- kvm-spapr-Improve-handling-of-fdt-buffer-size.patch [bz#1901837] -- kvm-spapr-Fold-h_cas_compose_response-into-h_client_arch.patch [bz#1901837] -- kvm-spapr-Don-t-use-spapr_drc_needed-in-CAS-code.patch [bz#1901837] -- kvm-spapr-Fix-handling-of-unplugged-devices-during-CAS-a.patch [bz#1901837] -- kvm-spapr-Allow-memory-unplug-to-always-succeed.patch [bz#1901837] -- kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch [bz#1901837] -- kvm-block-Require-aligned-image-size-to-avoid-assertion-.patch [bz#1834281] -- kvm-file-posix-Allow-byte-aligned-O_DIRECT-with-NFS.patch [bz#1834281] -- kvm-block-iscsi-fix-heap-buffer-overflow-in-iscsi_aio_io.patch [bz#1912974] -- Resolves: bz#1834281 - (qemu-img convert abort when converting image with unaligned size) -- Resolves: bz#1901837 - (Failed to hotunplug pc-dimm device) -- Resolves: bz#1912974 - (CVE-2020-11947 virt:rhel/qemu-kvm: QEMU: heap buffer overflow in iscsi_aio_ioctl_cb() in block/iscsi.c may lead to information disclosure [rhel-8]) - -* Wed Jan 27 2021 Danilo Cesar Lemes de Paula - 4.2.0-43.el8 -- kvm-Drop-bogus-IPv6-messages.patch [bz#1918054] -- Resolves: bz#1918054 - (CVE-2020-10756 virt:rhel/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-8.4.0]) - -* Thu Jan 21 2021 Danilo Cesar Lemes de Paula - 4.2.0-42.el8 -- kvm-linux-headers-add-vfio-DMA-available-capability.patch [bz#1905391] -- kvm-s390x-pci-Move-header-files-to-include-hw-s390x.patch [bz#1905391] -- kvm-vfio-Create-shared-routine-for-scanning-info-capabil.patch [bz#1905391] -- kvm-vfio-Find-DMA-available-capability.patch [bz#1905391] -- kvm-s390x-pci-Add-routine-to-get-the-vfio-dma-available-.patch [bz#1905391] -- kvm-s390x-pci-Honor-DMA-limits-set-by-vfio.patch [bz#1905391] -- kvm-s390x-fix-build-for-without-default-devices.patch [bz#1905391] -- Resolves: bz#1905391 - (RHEL8.4 - s390x/pci: Honor vfio DMA limiting (qemu-kvm)) - -* Mon Jan 18 2021 Danilo Cesar Lemes de Paula - 4.2.0-41.el8 -- kvm-udev-kvm-check-remove-the-exceeded-subscription-limi.patch [bz#1909244] -- kvm-hw-arm-smmu-common-Factorize-some-code-in-smmu_ptw_6.patch [bz#1843852] -- kvm-hw-arm-smmu-common-Add-IOTLB-helpers.patch [bz#1843852] -- kvm-hw-arm-smmu-Introduce-smmu_get_iotlb_key.patch [bz#1843852] -- kvm-hw-arm-smmu-Introduce-SMMUTLBEntry-for-PTW-and-IOTLB.patch [bz#1843852] -- kvm-hw-arm-smmu-common-Manage-IOTLB-block-entries.patch [bz#1843852] -- kvm-hw-arm-smmuv3-Introduce-smmuv3_s1_range_inval-helper.patch [bz#1843852] -- kvm-hw-arm-smmuv3-Get-prepared-for-range-invalidation.patch [bz#1843852] -- kvm-hw-arm-smmuv3-Fix-potential-integer-overflow-CID-143.patch [bz#1843852] -- kvm-memory-Rename-memory_region_notify_one-to-memory_reg.patch [bz#1843852] -- kvm-memory-Add-IOMMUTLBEvent.patch [bz#1843852] -- kvm-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch [bz#1843852] -- kvm-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch [bz#1843852] -- kvm-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch [bz#1843852] -- kvm-memory-clamp-cached-translation-in-case-it-points-to.patch [bz#1904393] -- kvm-hw-ehci-check-return-value-of-usb_packet_map.patch [bz#1898628] -- kvm-hw-net-e1000e-advance-desc_offset-in-case-of-null-de.patch [bz#1903070] -- Resolves: bz#1909244 +- Resolves: bz#1925028 + (vsmmuv3/vhost and virtio-iommu/vhost regression) +- Resolves: bz#1902537 + (The default fsfreeze-hook path from man page and qemu-ga --help command are different) + +* Tue Feb 02 2021 Eduardo Lima (Etrunko) - 5.2.0-5.el8 +- kvm-spapr-Allow-memory-unplug-to-always-succeed.patch [bz#1914069] +- kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch [bz#1914069] +- kvm-x86-cpu-Add-AVX512_FP16-cpu-feature.patch [bz#1838738] +- kvm-q35-Increase-max_cpus-to-710-on-pc-q35-rhel8-machine.patch [bz#1904268] +- kvm-config-enable-VFIO_CCW.patch [bz#1922170] +- Resolves: bz#1914069 + ([ppc64le] have this fix for rhel8.4 av (spapr: Allow memory unplug to always succeed)) +- Resolves: bz#1838738 + ([Intel 8.4 FEAT] qemu-kvm Sapphire Rapids (SPR) New Instructions (NIs) - Fast Train) +- Resolves: bz#1904268 + ([RFE] [HPEMC] qemu-kvm: support up to 710 VCPUs) +- Resolves: bz#1922170 + (Enable vfio-ccw in AV) + +* Wed Jan 27 2021 Danilo Cesar Lemes de Paula - 5.2.0-4.el8 +- kvm-Drop-bogus-IPv6-messages.patch [bz#1918061] +- Resolves: bz#1918061 + (CVE-2020-10756 virt:rhel/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-av-8]) + +* Mon Jan 18 2021 Danilo Cesar Lemes de Paula - 5.2.0-3.el8 +- kvm-block-nvme-Implement-fake-truncate-coroutine.patch [bz#1848834] +- kvm-spec-find-system-python-via-meson.patch [bz#1899619] +- kvm-build-system-use-b_staticpic-false.patch [bz#1899619] +- kvm-spapr-Fix-buffer-overflow-in-spapr_numa_associativit.patch [bz#1908693] +- kvm-usb-hcd-xhci-pci-Fixup-capabilities-ordering-again.patch [bz#1912846] +- kvm-qga-commands-posix-Send-CCW-address-on-s390x-with-th.patch [bz#1755075] +- kvm-AArch64-machine-types-cleanup.patch [bz#1895276] +- kvm-hw-arm-virt-Add-8.4-Machine-type.patch [bz#1895276] +- kvm-udev-kvm-check-remove-the-exceeded-subscription-limi.patch [bz#1914463] +- kvm-memory-Rename-memory_region_notify_one-to-memory_reg.patch [bz#1845758] +- kvm-memory-Add-IOMMUTLBEvent.patch [bz#1845758] +- kvm-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch [bz#1845758] +- kvm-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch [bz#1845758] +- kvm-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch [bz#1845758] +- kvm-RHEL-Switch-pvpanic-test-to-q35.patch [bz#1885555] +- kvm-8.4-x86-machine-type.patch [bz#1885555] +- kvm-memory-clamp-cached-translation-in-case-it-points-to.patch [bz#1904392] +- Resolves: bz#1848834 + (Failed to create luks format image on NVMe device) +- Resolves: bz#1899619 + (QEMU 5.2 is built with PIC objects instead of PIE) +- Resolves: bz#1908693 + ([ppc64le]boot up a guest with 128 numa nodes ,qemu got coredump) +- Resolves: bz#1912846 + (qemu-kvm: Failed to load xhci:parent_obj during migration) +- Resolves: bz#1755075 + ([qemu-guest-agent] fsinfo doesn't return disk info on s390x) +- Resolves: bz#1895276 + (Machine types update for aarch64 for QEMU 5.2.0) +- Resolves: bz#1914463 (Remove KVM guest count and limit info message) -- Resolves: bz#1843852 +- Resolves: bz#1845758 (qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed.) -- Resolves: bz#1904393 - (CVE-2020-27821 virt:rhel/qemu-kvm: QEMU: heap buffer overflow in msix_table_mmio_write() in hw/pci/msix.c [rhel-8]) -- Resolves: bz#1898628 - (CVE-2020-25723 virt:rhel/qemu-kvm: QEMU: assertion failure through usb_packet_unmap() in hw/usb/hcd-ehci.c [rhel-8]) -- Resolves: bz#1903070 - (CVE-2020-25707 CVE-2020-28916 virt:rhel/qemu-kvm: various flaws [rhel-8]) - -* Mon Jan 04 2021 Danilo Cesar Lemes de Paula - 4.2.0-40.el8 -- kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch [bz#1910267] -- kvm-qga-rename-Error-parameter-to-more-common-errp.patch [bz#1910326] -- kvm-util-Introduce-qemu_get_host_name.patch [bz#1910326] -- kvm-qga-Use-qemu_get_host_name-instead-of-g_get_host_nam.patch [bz#1910326] -- kvm-redhat-add-un-pre-install-systemd-hooks-for-qemu-ga.patch [bz#1910220] -- Resolves: bz#1910267 - (There is no soft link '/etc/qemu-kvm/fsfreeze-hook') -- Resolves: bz#1910326 - (Incorrect hostname returned by qga command 'guest-get-host-name') -- Resolves: bz#1910220 +- Resolves: bz#1885555 + (8.4 machine types for x86) +- Resolves: bz#1904392 + (CVE-2020-27821 virt:8.4/qemu-kvm: QEMU: heap buffer overflow in msix_table_mmio_write() in hw/pci/msix.c [rhel-av-8]) + +* Tue Dec 15 2020 Danilo Cesar Lemes de Paula - 5.2.0-2.el8 +- kvm-redhat-Define-hw_compat_8_3.patch [bz#1893935] +- kvm-redhat-Add-spapr_machine_rhel_default_class_options.patch [bz#1893935] +- kvm-redhat-Define-pseries-rhel8.4.0-machine-type.patch [bz#1893935] +- kvm-redhat-s390x-add-rhel-8.4.0-compat-machine.patch [bz#1836282] +- Resolves: bz#1836282 + (New machine type for qemu-kvm on s390x in RHEL-AV) +- Resolves: bz#1893935 + (New machine type on RHEL-AV 8.4 for ppc64le) + +* Wed Dec 09 2020 Miroslav Rezanina - 5.2.0-1.el8 +- Rebase to QEMU 5.2.0 [bz#1905933] +- Resolves: bz#1905933 + (Rebase qemu-kvm to version 5.2.0) + +* Tue Dec 01 2020 Danilo Cesar Lemes de Paula - 5.1.0-16.el8 +- kvm-redhat-introduces-disable_everything-macro-into-the-.patch [bz#1884611] +- kvm-redhat-scripts-extract_build_cmd.py-Avoid-listing-em.patch [bz#1884611] +- kvm-redhat-Removing-unecessary-configurations.patch [bz#1884611] +- kvm-redhat-Fixing-rh-local-build.patch [bz#1884611] +- kvm-redhat-allow-Makefile-rh-prep-builddep-to-fail.patch [bz#1884611] +- kvm-redhat-adding-rh-rpm-target.patch [bz#1884611] +- kvm-redhat-move-shareable-files-from-qemu-kvm-core-to-qe.patch [bz#1884611] +- kvm-redhat-Add-qemu-kiwi-subpackage.patch [bz#1884611] +- Resolves: bz#1884611 + (Build kata-specific version of qemu) + +* Mon Nov 16 2020 Danilo Cesar Lemes de Paula - 5.1.0-15.el8 +- kvm-redhat-add-un-pre-install-systemd-hooks-for-qemu-ga.patch [bz#1882719] +- kvm-rcu-Implement-drain_call_rcu.patch [bz#1812399 bz#1866707] +- kvm-libqtest-Rename-qmp_assert_error_class-to-qmp_expect.patch [bz#1812399 bz#1866707] +- kvm-qtest-rename-qtest_qmp_receive-to-qtest_qmp_receive_.patch [bz#1812399 bz#1866707] +- kvm-qtest-Reintroduce-qtest_qmp_receive-with-QMP-event-b.patch [bz#1812399 bz#1866707] +- kvm-qtest-remove-qtest_qmp_receive_success.patch [bz#1812399 bz#1866707] +- kvm-device-plug-test-use-qtest_qmp-to-send-the-device_de.patch [bz#1812399 bz#1866707] +- kvm-qtest-switch-users-back-to-qtest_qmp_receive.patch [bz#1812399 bz#1866707] +- kvm-qtest-check-that-drives-are-really-appearing-and-dis.patch [bz#1812399 bz#1866707] +- kvm-qemu-iotests-qtest-rewrite-test-067-as-a-qtest.patch [bz#1812399 bz#1866707] +- kvm-qdev-add-check-if-address-free-callback-for-buses.patch [bz#1812399 bz#1866707] +- kvm-scsi-scsi_bus-switch-search-direction-in-scsi_device.patch [bz#1812399 bz#1866707] +- kvm-device_core-use-drain_call_rcu-in-in-qmp_device_add.patch [bz#1812399 bz#1866707] +- kvm-device-core-use-RCU-for-list-of-children-of-a-bus.patch [bz#1812399 bz#1866707] +- kvm-scsi-switch-to-bus-check_address.patch [bz#1812399 bz#1866707] +- kvm-device-core-use-atomic_set-on-.realized-property.patch [bz#1812399 bz#1866707] +- kvm-scsi-scsi-bus-scsi_device_find-don-t-return-unrealiz.patch [bz#1812399] +- kvm-scsi-scsi_bus-Add-scsi_device_get.patch [bz#1812399 bz#1866707] +- kvm-virtio-scsi-use-scsi_device_get.patch [bz#1812399 bz#1866707] +- kvm-scsi-scsi_bus-fix-races-in-REPORT-LUNS.patch [bz#1812399 bz#1866707] +- kvm-tests-migration-fix-memleak-in-wait_command-wait_com.patch [bz#1812399 bz#1866707] +- kvm-libqtest-fix-the-order-of-buffered-events.patch [bz#1812399 bz#1866707] +- kvm-libqtest-fix-memory-leak-in-the-qtest_qmp_event_ref.patch [bz#1812399 bz#1866707] +- kvm-iotests-add-filter_qmp_virtio_scsi-function.patch [bz#1812399 bz#1866707] +- kvm-iotests-rewrite-iotest-240-in-python.patch [bz#1812399 bz#1866707] +- Resolves: bz#1812399 + (Qemu crash when detach disk with cache="none" discard="ignore" io="native") +- Resolves: bz#1866707 + (qemu-kvm is crashing with error "scsi_target_emulate_report_luns: Assertion `i == n + 8' failed") +- Resolves: bz#1882719 (qemu-ga service still active and can work after qemu-guest-agent been removed) -* Wed Dec 23 2020 Danilo Cesar Lemes de Paula - 4.2.0-39.el8 -- kvm-ppc-spapr-Add-hotremovable-flag-on-DIMM-LMBs-on-drme.patch [bz#1901837] -- kvm-ppc-spapr-re-assert-IRQs-during-event-scan-if-there-.patch [bz#1901837] -- kvm-slirp-check-pkt_len-before-reading-protocol-header.patch [bz#1902237] -- kvm-s390x-s390-virtio-ccw-Reset-PCI-devices-during-subsy.patch [bz#1905386] -- kvm-qapi-enable-use-of-g_autoptr-with-QAPI-types.patch [bz#1859494] -- kvm-error-Fix-examples-in-error.h-s-big-comment.patch [bz#1859494] -- kvm-error-Improve-error.h-s-big-comment.patch [bz#1859494] -- kvm-error-Document-Error-API-usage-rules.patch [bz#1859494] -- kvm-error-New-macro-ERRP_GUARD.patch [bz#1859494] -- kvm-qga-add-command-guest-get-disks.patch [bz#1859494] -- kvm-qga-add-implementation-of-guest-get-disks-for-Linux.patch [bz#1859494] -- kvm-qga-add-implementation-of-guest-get-disks-for-Window.patch [bz#1859494] -- kvm-qga-fix-missing-closedir-in-qmp_guest_get_disks.patch [bz#1859494] -- kvm-qga-update-schema-for-guest-get-disks-dependents-fie.patch [bz#1859494] -- Resolves: bz#1859494 - (Report logical_name for disks without mounted file-system) -- Resolves: bz#1901837 - (Failed to hotunplug pc-dimm device) -- Resolves: bz#1902237 - (CVE-2020-29129 CVE-2020-29130 virt:rhel/qemu-kvm: QEMU: slirp: out-of-bounds access while processing ARP/NCSI packets [rhel-8]) -- Resolves: bz#1905386 - (RHEL8.3 - s390x/s390-virtio-ccw: Reset PCI devices during subsystem reset (qemu-kvm)) - -* Fri Dec 11 2020 Danilo Cesar Lemes de Paula - 4.2.0-38.el8 -- kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch [bz#1880546] -- kvm-pc-bios-s390x-Rename-PSW_MASK_ZMODE-to-PSW_MASK_64.patch [bz#1903135] -- kvm-pc-bios-s390x-Use-PSW-masks-where-possible-and-intro.patch [bz#1903135] -- kvm-pc-bios-s390x-Ensure-Read-IPL-memory-is-clean.patch [bz#1903135] -- kvm-pc-bios-s390x-Clear-out-leftover-S390EP-string.patch [bz#1903135] -- Resolves: bz#1880546 +* Tue Oct 13 2020 Danilo Cesar Lemes de Paula - 5.1.0-14.el8_3 +- kvm-virtiofsd-avoid-proc-self-fd-tempdir.patch [bz#1884276] +- Resolves: bz#1884276 + (Pod with kata-runtime won't start, QEMU: "vhost_user_dev init failed, Operation not permitted" [mkdtemp failing in sandboxing]) + +* Thu Oct 08 2020 Danilo Cesar Lemes de Paula - 5.1.0-13.el8_3 +- kvm-x86-lpc9-let-firmware-negotiate-CPU-hotplug-with-SMI.patch [bz#1846886] +- kvm-x86-cpuhp-prevent-guest-crash-on-CPU-hotplug-when-br.patch [bz#1846886] +- kvm-x86-cpuhp-refuse-cpu-hot-unplug-request-earlier-if-n.patch [bz#1846886] +- Resolves: bz#1846886 + (Guest hit soft lockup or reboots if hotplug vcpu under ovmf) + +* Mon Oct 05 2020 Danilo Cesar Lemes de Paula - 5.1.0-12.el8_3 +- kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch [bz#1868449] +- kvm-vhost-vsock-pci-force-virtio-version-1.patch [bz#1868449] +- kvm-vhost-user-vsock-pci-force-virtio-version-1.patch [bz#1868449] +- kvm-vhost-vsock-ccw-force-virtio-version-1.patch [bz#1868449] +- Resolves: bz#1868449 + (vhost_vsock error: device is modern-only, use disable-legacy=on) + +* Mon Oct 05 2020 Danilo Cesar Lemes de Paula - 5.1.0-11.el8_3 +- kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch [bz#1874004] +- kvm-redhat-Make-all-generated-so-files-executable-not-on.patch [bz#1876635] +- Resolves: bz#1874004 + (Live migration performance is poor during guest installation process on power host) +- Resolves: bz#1876635 + (VM fails to start with a passthrough smartcard) + +* Mon Sep 28 2020 Danilo Cesar Lemes de Paula - 5.1.0-10.el8 +- kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch [bz#1877209] +- Resolves: bz#1877209 + ('qemu-img bitmaps --merge' failed when trying to merge top volume bitmap to base volume bitmap) + +* Mon Sep 21 2020 Danilo Cesar Lemes de Paula - 5.1.0-9.el8 +- kvm-hw-nvram-fw_cfg-fix-FWCfgDataGeneratorClass-get_data.patch [bz#1688978] +- Resolves: bz#1688978 + (RFE: forward host preferences for cipher suites and CA certs to guest firmware) + +* Thu Sep 17 2020 Danilo Cesar Lemes de Paula - 5.1.0-8.el8 +- kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch [bz#1738820] +- kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch [bz#1752376] +- kvm-Revert-Drop-bogus-IPv6-messages.patch [bz#1867075] +- kvm-block-rbd-add-namespace-to-qemu_rbd_strong_runtime_o.patch [bz#1821528] +- Resolves: bz#1738820 + ('-F' option of qemu-ga command cause the guest-fsfreeze-freeze command doesn't work) +- Resolves: bz#1752376 (qemu use SCMP_ACT_TRAP even SCMP_ACT_KILL_PROCESS is available) -- Resolves: bz#1903135 - (RHEL8.3 - KVM Distro install to vfio_ccw backed DASD gets error at the reboot step (qemu-kvm)) - -* Mon Nov 23 2020 Danilo Cesar Lemes de Paula - 4.2.0-37.el8 -- kvm-hw-net-net_tx_pkt-fix-assertion-failure-in-net_tx_pk.patch [bz#1860994] -- Resolves: bz#1860994 - (CVE-2020-16092 virt:rhel/qemu-kvm: QEMU: reachable assertion failure in net_tx_pkt_add_raw_fragment() in hw/net/net_tx_pkt.c [rhel-8]) - -* Fri Nov 20 2020 Danilo Cesar Lemes de Paula - 4.2.0-36.el8 -- kvm-qga-fix-assert-regression-on-guest-shutdown.patch [bz#1884531] -- kvm-libvhost-user-handle-endianness-as-mandated-by-the-s.patch [bz#1857733] -- kvm-virtio-add-vhost-user-fs-ccw-device.patch [bz#1857733] -- kvm-Ensure-vhost-user-fs-is-enabled-on-s390x.patch [bz#1857733] -- kvm-s390x-sclp.c-remove-unneeded-label-in-sclp_service_c.patch [bz#1798506] -- kvm-s390-sclp-get-machine-once-during-read-scp-cpu-info.patch [bz#1798506] -- kvm-s390-sclp-rework-sclp-boundary-checks.patch [bz#1798506] -- kvm-s390-sclp-read-sccb-from-mem-based-on-provided-lengt.patch [bz#1798506] -- kvm-s390-sclp-check-sccb-len-before-filling-in-data.patch [bz#1798506] -- kvm-s390-sclp-use-cpu-offset-to-locate-cpu-entries.patch [bz#1798506] -- kvm-s390-sclp-add-extended-length-sccb-support-for-kvm-g.patch [bz#1798506] -- kvm-linux-headers-Partial-update-against-Linux-5.9-rc4.patch [bz#1798506] -- kvm-misc-Replace-zero-length-arrays-with-flexible-array-.patch [bz#1798506] -- kvm-s390-guest-support-for-diagnose-0x318.patch [bz#1798506] -- kvm-s390x-pv-Remove-sclp-boundary-checks.patch [bz#1798506] -- kvm-s390x-pv-Fix-diag318-PV-fencing.patch [bz#1798506] -- kvm-s390-kvm-fix-diag318-propagation-and-reset-functiona.patch [bz#1659412] -- kvm-trace-use-STAP_SDT_V2-to-work-around-symbol-visibili.patch [bz#1898700] -- Resolves: bz#1659412 - ([IBM 8.4 FEAT] KVM enablement for enhanced hardware diagnose data of guest kernel on s390x - qemu part) -- Resolves: bz#1798506 - ([IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part) -- Resolves: bz#1857733 - ([IBM 8.4 FEAT] KVM: Add support for virtio-fs on s390x - qemu part) -- Resolves: bz#1884531 - (qemu-ga aborts after guest-shutdown command) -- Resolves: bz#1898700 - (qemu-kvm for RHEL-8.4 doesn't build due to a possible incompatibility with systemtap-sdt-devel-4.4-1) - -* Mon Oct 19 2020 Danilo Cesar Lemes de Paula - 4.2.0-35.el8 -- kvm-qga-commands-posix-Rework-build_guest_fsinfo_for_rea.patch [bz#1755075] -- kvm-qga-commands-posix-Move-the-udev-code-from-the-pci-t.patch [bz#1755075] -- kvm-qga-commands-posix-Support-fsinfo-for-non-PCI-virtio.patch [bz#1755075] -- kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch [bz#1874780] -- kvm-pc-bios-s390-ccw-Makefile-Compile-with-std-gnu99-fwr.patch [bz#1846975] -- kvm-pc-bios-s390-ccw-Move-ipl-related-code-from-main-int.patch [bz#1846975] -- kvm-pc-bios-s390-ccw-Introduce-ENODEV-define-and-remove-.patch [bz#1846975] -- kvm-pc-bios-s390-ccw-Move-the-inner-logic-of-find_subch-.patch [bz#1846975] -- kvm-pc-bios-s390-ccw-Do-not-bail-out-early-if-not-findin.patch [bz#1846975] -- kvm-pc-bios-s390-ccw-Scan-through-all-devices-if-no-boot.patch [bz#1846975] -- kvm-pc-bios-s390-ccw-Allow-booting-in-case-the-first-vir.patch [bz#1846975] -- kvm-pc-bios-s390-ccw-main-Remove-superfluous-call-to-ena.patch [bz#1846975] -- kvm-aio-posix-completely-stop-polling-when-disabled.patch [bz#1846975] -- kvm-Remove-explicit-glusterfs-api-dependency.patch [bz#1872854] -- Resolves: bz#1755075 - ([qemu-guest-agent] fsinfo doesn't return disk info on s390x) -- Resolves: bz#1846975 - (Failed to boot up a s390x guest with virtio-blk-ccw if attaching a virtio-scsi-ccw bus in previous) -- Resolves: bz#1872854 - (move the glusterfs dependency out of qemu-kvm-core to the glusterfs module) -- Resolves: bz#1874780 +- Resolves: bz#1821528 + (missing namespace attribute when access the rbd image with namespace) +- Resolves: bz#1867075 + (CVE-2020-10756 virt:8.3/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-av-8]) + +* Tue Sep 15 2020 Danilo Cesar Lemes de Paula - 5.1.0-7.el8 +- kvm-target-ppc-Add-experimental-option-for-enabling-secu.patch [bz#1789757 bz#1870384] +- kvm-target-arm-Move-start-powered-off-property-to-generi.patch [bz#1849483] +- kvm-target-arm-Move-setting-of-CPU-halted-state-to-gener.patch [bz#1849483] +- kvm-ppc-spapr-Use-start-powered-off-CPUState-property.patch [bz#1849483] +- Resolves: bz#1789757 + ([IBM 8.4 FEAT] Add machine option to enable secure VM support) +- Resolves: bz#1849483 + (Failed to boot up guest when hotplugging vcpus on bios stage) +- Resolves: bz#1870384 + ([IBM 8.3 FEAT] Add interim/unsupported machine option to enable secure VM support for testing purposes) + +* Thu Sep 10 2020 Danilo Cesar Lemes de Paula - 5.1.0-6.el8 +- kvm-spec-Move-qemu-pr-helper-back-to-usr-bin.patch [bz#1869635] +- kvm-Bump-required-libusbx-version.patch [bz#1856591] +- Resolves: bz#1856591 + (libusbx isn't updated with qemu-kvm) +- Resolves: bz#1869635 + ('/usr/bin/qemu-pr-helper' is not a suitable pr helper: No such file or directory) + +* Tue Sep 08 2020 Danilo Cesar Lemes de Paula - 5.1.0-5.el8 +- kvm-Revert-i386-Fix-pkg_id-offset-for-EPYC-cpu-models.patch [bz#1873417] +- kvm-Revert-target-i386-Enable-new-apic-id-encoding-for-E.patch [bz#1873417] +- kvm-Revert-hw-i386-Move-arch_id-decode-inside-x86_cpus_i.patch [bz#1873417] +- kvm-Revert-i386-Introduce-use_epyc_apic_id_encoding-in-X.patch [bz#1873417] +- kvm-Revert-hw-i386-Introduce-apicid-functions-inside-X86.patch [bz#1873417] +- kvm-Revert-target-i386-Cleanup-and-use-the-EPYC-mode-top.patch [bz#1873417] +- kvm-Revert-hw-386-Add-EPYC-mode-topology-decoding-functi.patch [bz#1873417] +- kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch [bz#1867739] +- kvm-usb-fix-setup_len-init-CVE-2020-14364.patch [bz#1869715] +- kvm-Remove-explicit-glusterfs-api-dependency.patch [bz#1872853] +- kvm-disable-virgl.patch [bz#1831271] +- Resolves: bz#1831271 + (Drop virgil acceleration support and remove virglrenderer dependency) +- Resolves: bz#1867739 (-prom-env does not validate input) - -* Tue Sep 08 2020 Danilo Cesar Lemes de Paula - 4.2.0-34.el8 -- kvm-usb-fix-setup_len-init-CVE-2020-14364.patch [bz#1869710] -- Resolves: bz#1869710 - (CVE-2020-14364 qemu-kvm: QEMU: usb: out-of-bounds r/w access issue while processing usb packets [rhel-8.3.0]) - -* Wed Aug 19 2020 Danilo Cesar Lemes de Paula - 4.2.0-33.el8 +- Resolves: bz#1869715 + (CVE-2020-14364 qemu-kvm: QEMU: usb: out-of-bounds r/w access issue while processing usb packets [rhel-av-8.3.0]) +- Resolves: bz#1872853 + (move the glusterfs dependency out of qemu-kvm-core to the glusterfs module) +- Resolves: bz#1873417 + (AMD/NUMA topology - revert 5.1 changes) + +* Thu Aug 27 2020 Danilo Cesar Lemes de Paula - 5.1.0-4.el8 +- kvm-Drop-bogus-IPv6-messages.patch [bz#1867075] +- kvm-machine-types-numa-set-numa_mem_supported-on-old-mac.patch [bz#1849707] +- kvm-machine_types-numa-compatibility-for-auto_enable_num.patch [bz#1849707] +- kvm-migration-Add-block-bitmap-mapping-parameter.patch [bz#1790492] +- kvm-iotests.py-Let-wait_migration-return-on-failure.patch [bz#1790492] +- kvm-iotests-Test-node-bitmap-aliases-during-migration.patch [bz#1790492] +- Resolves: bz#1790492 + ('dirty-bitmaps' migration capability should allow configuring target nodenames) +- Resolves: bz#1849707 + (8.3 machine types for x86 - 5.1 update) +- Resolves: bz#1867075 + (CVE-2020-10756 virt:8.3/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-av-8]) + +* Wed Aug 19 2020 Danilo Cesar Lemes de Paula - 5.1.0-3.el8 +- kvm-redhat-Update-hw_compat_8_2.patch [bz#1843348] +- kvm-redhat-update-pseries-rhel8.2.0-machine-type.patch [bz#1843348] +- kvm-Disable-TPM-passthrough-backend-on-ARM.patch [bz#1801242] - kvm-Require-libfdt-1.6.0.patch [bz#1867847] +- Resolves: bz#1801242 + ([aarch64] vTPM support in machvirt) +- Resolves: bz#1843348 + (8.3 machine types for POWER) - Resolves: bz#1867847 ([ppc] virt module 7629: /usr/libexec/qemu-kvm: undefined symbol: fdt_check_full, version LIBFDT_1.2) -* Mon Aug 10 2020 Danilo Cesar Lemes de Paula - 4.2.0-32.el8 -- kvm-i386-Add-2nd-Generation-AMD-EPYC-processors.patch [bz#1780385] -- kvm-target-i386-sev-provide-proper-error-reporting-for-q.patch [bz#1689341] -- kvm-target-i386-sev-fail-query-sev-capabilities-if-QEMU-.patch [bz#1689341] -- kvm-s390x-protvirt-allow-to-IPL-secure-guests-with-no-re.patch [bz#1863034] -- Resolves: bz#1689341 - (QEMU should report an error and return failure if AMD SEV is not enabled in the kernel) -- Resolves: bz#1780385 - ([RFE] AMD EPYC-Rome support for KVM / QEMU guest) -- Resolves: bz#1863034 - (RHEL8.3 Beta - Secure Execution: Unable to start Qemu with "-no-reboot" option (qemu-kvm)) - -* Wed Jul 22 2020 Danilo Cesar Lemes de Paula - 4.2.0-31.el8 -- kvm-qcow2-Fix-alloc_cluster_abort-for-pre-existing-clust.patch [bz#1807057] -- kvm-iotests-026-Test-EIO-on-preallocated-zero-cluster.patch [bz#1807057] -- kvm-iotests-026-Test-EIO-on-allocation-in-a-data-file.patch [bz#1807057] -- kvm-iotests-026-Move-v3-exclusive-test-to-new-file.patch [bz#1807057] -- Resolves: bz#1807057 - (qcow2_alloc_cluster_abort() frees preallocated zero clusters) - -* Tue Jul 07 2020 Danilo Cesar Lemes de Paula - 4.2.0-30.el8 -- kvm-i386-Mask-SVM-features-if-nested-SVM-is-disabled.patch [bz#1835390] -- kvm-s390x-sigp-Fix-sense-running-reporting.patch [bz#1854092] -- kvm-s390x-tcg-clear-local-interrupts-on-reset-normal.patch [bz#1854092] -- kvm-virtio-net-fix-removal-of-failover-device.patch [] -- Resolves: bz#1835390 - (qemu promote host does not support 'EDX.npt' and 'EDX.nrip-save' when test with Q35 machine type on EPYC host) -- Resolves: bz#1854092 - (kvm-unit-tests: tcg smp FAIL) - -* Sun Jun 28 2020 Danilo Cesar Lemes de Paula - 4.2.0-29.el8 -- kvm-vfio-ccw-Fix-error-message.patch [bz#1660916] -- kvm-vfio-ccw-allow-non-prefetch-ORBs.patch [bz#1660916] -- kvm-linux-headers-support-vfio-ccw-features.patch [bz#1660916] -- kvm-vfio-ccw-Refactor-cleanup-of-regions.patch [bz#1660916] -- kvm-vfio-ccw-Add-support-for-the-schib-region.patch [bz#1660916] -- kvm-vfio-ccw-Refactor-ccw-irq-handler.patch [bz#1660916] -- kvm-s390x-css-Refactor-the-css_queue_crw-routine.patch [bz#1660916] -- kvm-vfio-ccw-Add-support-for-the-CRW-region-and-IRQ.patch [bz#1660916] -- kvm-config-enable-VFIO_CCW.patch [bz#1660916] -- kvm-virtio-blk-Refactor-the-code-that-processes-queued-r.patch [] -- kvm-virtio-blk-On-restart-process-queued-requests-in-the.patch [] -- kvm-Fix-use-afte-free-in-ip_reass-CVE-2020-1983.patch [bz#1838070] -- Resolves: bz#1660916 - ([IBM 8.3 FEAT] KVM s390x: DASD passthrough support - qemu part) -- Resolves: bz#1838070 - (CVE-2020-1983 virt:rhel/qemu-kvm: QEMU: slirp: use-after-free in ip_reass() function in ip_input.c [rhel-8]) - -* Fri Jun 19 2020 Danilo Cesar Lemes de Paula - 4.2.0-28.el8 -- kvm-redhat-Install-the-s390-netboot.img-that-we-ve-built.patch [bz#1828317] -- kvm-linux-headers-update-kvm.h.patch [bz#1828317] -- kvm-s390x-Don-t-do-a-normal-reset-on-the-initial-cpu.patch [bz#1828317] -- kvm-s390x-Move-reset-normal-to-shared-reset-handler.patch [bz#1828317] -- kvm-s390x-Move-initial-reset.patch [bz#1828317] -- kvm-s390x-Move-clear-reset.patch [bz#1828317] -- kvm-s390x-Beautify-diag308-handling.patch [bz#1828317] -- kvm-s390x-kvm-Make-kvm_sclp_service_call-void.patch [bz#1828317] -- kvm-s390x-Fix-cpu-normal-reset-ri-clearing.patch [bz#1828317] -- kvm-tests-boot-sector-Fix-the-bad-s390x-assembler-code.patch [bz#1828317] -- kvm-pc-bios-s390x-Fix-reset-psw-mask.patch [bz#1828317] -- kvm-s390x-Properly-fetch-and-test-the-short-psw-on-diag3.patch [bz#1828317] -- kvm-s390x-Rename-and-use-constants-for-short-PSW-address.patch [bz#1828317] -- kvm-s390x-Add-missing-vcpu-reset-functions.patch [bz#1828317] -- kvm-s390-sclp-improve-special-wait-psw-logic.patch [bz#1828317] -- kvm-pc-bios-s390x-Save-iplb-location-in-lowcore.patch [bz#1828317] -- kvm-s390-ipl-sync-back-loadparm.patch [bz#1828317] -- kvm-s390-ipl-fix-off-by-one-in-update_machine_ipl_proper.patch [bz#1828317] -- kvm-s390x-ipl-Consolidate-iplb-validity-check-into-one-f.patch [bz#1828317] -- kvm-vhost-correctly-turn-on-VIRTIO_F_IOMMU_PLATFORM.patch [bz#1828317] -- kvm-s390x-Move-diagnose-308-subcodes-and-rcs-into-ipl.h.patch [bz#1828317] -- kvm-s390x-protvirt-Support-unpack-facility.patch [bz#1828317] -- kvm-s390x-protvirt-Add-migration-blocker.patch [bz#1828317] -- kvm-s390x-protvirt-Inhibit-balloon-when-switching-to-pro.patch [bz#1828317] -- kvm-s390x-protvirt-KVM-intercept-changes.patch [bz#1828317] -- kvm-s390x-Add-SIDA-memory-ops.patch [bz#1828317] -- kvm-s390x-protvirt-Move-STSI-data-over-SIDAD.patch [bz#1828317] -- kvm-s390x-protvirt-SCLP-interpretation.patch [bz#1828317] -- kvm-s390x-protvirt-Set-guest-IPL-PSW.patch [bz#1828317] -- kvm-s390x-protvirt-Move-diag-308-data-over-SIDA.patch [bz#1828317] -- kvm-s390x-protvirt-Disable-address-checks-for-PV-guest-I.patch [bz#1828317] -- kvm-s390x-protvirt-Move-IO-control-structures-over-SIDA.patch [bz#1828317] -- kvm-s390x-protvirt-Handle-SIGP-store-status-correctly.patch [bz#1828317] -- kvm-s390x-Add-unpack-facility-feature-to-GA1.patch [bz#1828317] -- kvm-s390x-protvirt-Fix-stray-error_report_err-in-s390_ma.patch [bz#1828317] -- kvm-s390x-pv-Retry-ioctls-on-EINTR.patch [bz#1828317] -- kvm-s390x-s390-virtio-ccw-Fix-build-on-systems-without-K.patch [bz#1828317] -- kvm-s390x-pv-Fix-KVM_PV_PREP_RESET-command-wrapper-name.patch [bz#1828317] -- kvm-spapr-Pass-the-maximum-number-of-vCPUs-to-the-KVM-in.patch [bz#1756946] -- kvm-introduce-kvm_kernel_irqchip_-functions.patch [bz#1756946] -- kvm-target-s390x-kvm-Enable-adapter-interruption-suppres.patch [bz#1756946] -- kvm-vfio-nvlink-Remove-exec-permission-to-avoid-SELinux-.patch [bz#1823275] -- Resolves: bz#1756946 - ([zKVM] Re-enable KVM_CAP_S390_AIS for new machine types) -- Resolves: bz#1823275 - (RHEL8.1 - GPU Numa nodes not visible in guest post the pass-through.) -- Resolves: bz#1828317 - ([IBM 8.3 FEAT] s390x: Base KVM setup for secure guests - qemu part) - -* Fri Jun 19 2020 Danilo C. L. de Paula - 4.2.0 -- Resolves: bz#1810193 -(Upgrade components in virt:rhel module:stream for RHEL-8.3 release) - -* Tue Jun 09 2020 Danilo C. L. de Paula - 4.2.0-25 -- Resolves: bz#1810193 - (Upgrade components in virt:rhel module:stream for RHEL-8.3 release) - Another sync - -* Thu Jun 04 2020 Danilo C. L. de Paula - 4.2.0-23.el8 -- Resolves: bz#1810193 - (Upgrade components in virt:rhel module:stream for RHEL-8.3 release) - Another syncronization - -* Mon Apr 27 2020 Danilo C. L. de Paula - 4.2.0 -- Resolves: bz#1810193 - (Upgrade components in virt:rhel module:stream for RHEL-8.3 release) - -* Fri Feb 21 2020 Danilo Cesar Lemes de Paula - 2.12.0-99.el8 -- kvm-slirp-disable-tcp_emu.patch [bz#1791677] -- kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch [bz#1790308] -- Resolves: bz#1790308 - (qemu-kvm core dump when do L1 guest live migration with L2 guest running) -- Resolves: bz#1791677 - (QEMU: Slirp: disable emulation of tcp programs like ftp IRC etc. [rhel-8]) - -* Mon Feb 10 2020 Danilo Cesar Lemes de Paula - 2.12.0-98.el8 -- kvm-iscsi-Avoid-potential-for-get_status-overflow.patch [bz#1794501] -- kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch [bz#1794501] -- kvm-clean-up-callback-when-del-virtqueue.patch [bz#1708480] -- kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch [bz#1708480] -- kvm-virtio-reset-region-cache-when-on-queue-deletion.patch [bz#1708480] -- kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch [bz#1708480] -- Resolves: bz#1708480 +* Wed Aug 12 2020 Danilo Cesar Lemes de Paula - 5.1.0-2.el8 +- kvm-redhat-define-hw_compat_8_2.patch [bz#1853265] +- Resolves: bz#1853265 + (Forward and backward migration from rhel-av-8.3.0(qemu-kvm-5.0.0) to rhel-av-8.2.1(qemu-kvm-4.2.0) failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'") + +* Wed Aug 12 2020 Danilo Cesar Lemes de Paula - 5.1.0-1.el8 +- Quick changelog fix to reflect the current fixes: +- Resolve: bz#1781911 +- Resolve: bz#1841529 +- Resolve: bz#1842902 +- Resolve: bz#1818843 +- Resolve: bz#1819292 +- Resolve: bz#1801242 + +* Wed Aug 12 2020 Danilo Cesar Lemes de Paula - 5.1.0-0.el8 +- Rebase to 5.1.0 +- Resolves: bz#1809650 + +* Tue Jul 07 2020 Danilo Cesar Lemes de Paula - 4.2.0-29.el8 +- kvm-virtio-net-fix-removal-of-failover-device.patch [bz#1820120] +- Resolves: bz#1820120 + (After hotunplugging the vitrio device and netdev, hotunpluging the failover VF will cause qemu core dump) + +* Sun Jun 28 2020 Danilo Cesar Lemes de Paula - 4.2.0-28.el8 +- kvm-virtio-blk-Refactor-the-code-that-processes-queued-r.patch [bz#1812765] +- kvm-virtio-blk-On-restart-process-queued-requests-in-the.patch [bz#1812765] +- kvm-Fix-use-afte-free-in-ip_reass-CVE-2020-1983.patch [bz#1838082] +- Resolves: bz#1812765 + (qemu with iothreads enabled crashes on resume after enospc pause for disk extension) +- Resolves: bz#1838082 + (CVE-2020-1983 virt:8.2/qemu-kvm: QEMU: slirp: use-after-free in ip_reass() function in ip_input.c [rhel-av-8]) + +* Thu Jun 18 2020 Eduardo Lima (Etrunko) - 4.2.0-27.el8 +- kvm-hw-pci-pcie-Move-hot-plug-capability-check-to-pre_pl.patch [bz#1820531] +- kvm-spec-Fix-python-shenigans-for-tests.patch [bz#1845779] +- kvm-target-i386-Add-ARCH_CAPABILITIES-related-bits-into-.patch [bz#1840342] +- Resolves: bz#1820531 + (qmp command query-pci get wrong result after hotplug device under hotplug=off controller) +- Resolves: bz#1840342 + ([Intel 8.2.1 Bug] qemu-kvm Add ARCH_CAPABILITIES to Icelake-Server cpu model - Fast Train) +- Resolves: bz#1845779 + (Install 'qemu-kvm-tests' failed as nothing provides /usr/libexec/platform-python3 - virt module 6972) + +* Wed Jun 17 2020 Eduardo Lima (Etrunko) - 4.2.0-26.el8 +- kvm-nbd-server-Avoid-long-error-message-assertions-CVE-2.patch [bz#1845384] +- kvm-block-Call-attention-to-truncation-of-long-NBD-expor.patch [bz#1845384] +- Resolves: bz#1845384 + (CVE-2020-10761 virt:8.2/qemu-kvm: QEMU: nbd: reachable assertion failure in nbd_negotiate_send_rep_verr via remote client [rhel-av-8]) + +* Tue Jun 09 2020 Danilo Cesar Lemes de Paula - 4.2.0-25.el8 +- kvm-enable-ramfb.patch [bz#1841068] +- kvm-block-Add-flags-to-BlockDriver.bdrv_co_truncate.patch [bz#1780574] +- kvm-block-Add-flags-to-bdrv-_co-_truncate.patch [bz#1780574] +- kvm-block-backend-Add-flags-to-blk_truncate.patch [bz#1780574] +- kvm-qcow2-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch [bz#1780574] +- kvm-raw-format-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch [bz#1780574] +- kvm-file-posix-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch [bz#1780574] +- kvm-block-truncate-Don-t-make-backing-file-data-visible.patch [bz#1780574] +- kvm-iotests-Add-qemu_io_log.patch [bz#1780574] +- kvm-iotests-Filter-testfiles-out-in-filter_img_info.patch [bz#1780574] +- kvm-iotests-Test-committing-to-short-backing-file.patch [bz#1780574] +- kvm-qcow2-Forward-ZERO_WRITE-flag-for-full-preallocation.patch [bz#1780574] +- kvm-i386-Add-MSR-feature-bit-for-MDS-NO.patch [bz#1769912] +- kvm-i386-Add-macro-for-stibp.patch [bz#1769912] +- kvm-target-i386-Add-new-bit-definitions-of-MSR_IA32_ARCH.patch [bz#1769912] +- kvm-i386-Add-new-CPU-model-Cooperlake.patch [bz#1769912] +- kvm-target-i386-Add-missed-features-to-Cooperlake-CPU-mo.patch [bz#1769912] +- Resolves: bz#1769912 + ([Intel 8.2.1 Feature] introduce Cooper Lake cpu model - qemu-kvm Fast Train) +- Resolves: bz#1780574 + (Data corruption with resizing short overlay over longer backing files) +- Resolves: bz#1841068 + (RFE: please support the "ramfb" display device model) + +* Mon Jun 08 2020 Danilo Cesar Lemes de Paula - 4.2.0-24.el8 +- kvm-target-i386-set-the-CPUID-level-to-0x14-on-old-machi.patch [bz#1513681] +- kvm-block-curl-HTTP-header-fields-allow-whitespace-aroun.patch [bz#1841038] +- kvm-block-curl-HTTP-header-field-names-are-case-insensit.patch [bz#1841038] +- kvm-MAINTAINERS-fix-qcow2-bitmap.c-under-Dirty-Bitmaps-h.patch [bz#1779893 bz#1779904] +- kvm-iotests-Let-_make_test_img-parse-its-parameters.patch [bz#1779893 bz#1779904] +- kvm-qemu_img-add-cvtnum_full-to-print-error-reports.patch [bz#1779893 bz#1779904] +- kvm-block-Make-it-easier-to-learn-which-BDS-support-bitm.patch [bz#1779893 bz#1779904] +- kvm-blockdev-Promote-several-bitmap-functions-to-non-sta.patch [bz#1779893 bz#1779904] +- kvm-blockdev-Split-off-basic-bitmap-operations-for-qemu-.patch [bz#1779893 bz#1779904] +- kvm-qemu-img-Add-bitmap-sub-command.patch [bz#1779893 bz#1779904] +- kvm-iotests-Fix-test-178.patch [bz#1779893 bz#1779904] +- kvm-qcow2-Expose-bitmaps-size-during-measure.patch [bz#1779893 bz#1779904] +- kvm-qemu-img-Factor-out-code-for-merging-bitmaps.patch [bz#1779893 bz#1779904] +- kvm-qemu-img-Add-convert-bitmaps-option.patch [bz#1779893 bz#1779904] +- kvm-iotests-Add-test-291-to-for-qemu-img-bitmap-coverage.patch [bz#1779893 bz#1779904] +- kvm-iotests-Add-more-skip_if_unsupported-statements-to-t.patch [bz#1778593] +- kvm-iotests-don-t-use-format-for-drive_add.patch [bz#1778593] +- kvm-iotests-055-refactor-compressed-backup-to-vmdk.patch [bz#1778593] +- kvm-iotests-055-skip-vmdk-target-tests-if-vmdk-is-not-wh.patch [bz#1778593] +- kvm-backup-Improve-error-for-bdrv_getlength-failure.patch [bz#1778593] +- kvm-backup-Make-sure-that-source-and-target-size-match.patch [bz#1778593] +- kvm-iotests-Backup-with-different-source-target-size.patch [bz#1778593] +- kvm-iotests-109-Don-t-mirror-with-mismatched-size.patch [bz#1778593] +- kvm-iotests-229-Use-blkdebug-to-inject-an-error.patch [bz#1778593] +- kvm-mirror-Make-sure-that-source-and-target-size-match.patch [bz#1778593] +- kvm-iotests-Mirror-with-different-source-target-size.patch [bz#1778593] +- Resolves: bz#1513681 + ([Intel 8.2.1 Feat] qemu-kvm PT VMX -- Fast Train) +- Resolves: bz#1778593 + (Qemu coredump when backup to a existing small size image) +- Resolves: bz#1779893 + (RFE: Copy bitmaps with qemu-img convert) +- Resolves: bz#1779904 + (RFE: ability to estimate bitmap space utilization for qcow2) +- Resolves: bz#1841038 + (qemu-img: /var/tmp/v2vovl56bced.qcow2: CURL: Error opening file: Server does not support 'range' (byte ranges) with HTTP/2 server in VMware ESXi 7) + +* Thu Jun 04 2020 Danilo Cesar Lemes de Paula - 4.2.0-23.el8 +- kvm-target-arm-Fix-PAuth-sbox-functions.patch [bz#1813940] +- kvm-Don-t-leak-memory-when-reallocation-fails.patch [bz#1749737] +- kvm-Replace-remaining-malloc-free-user-with-glib.patch [bz#1749737] +- kvm-Revert-RHEL-disable-hostmem-memfd.patch [bz#1839030] +- kvm-block-introducing-bdrv_co_delete_file-interface.patch [bz#1827630] +- kvm-block.c-adding-bdrv_co_delete_file.patch [bz#1827630] +- kvm-crypto.c-cleanup-created-file-when-block_crypto_co_c.patch [bz#1827630] +- Resolves: bz#1749737 + (CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-av-8]) +- Resolves: bz#1813940 + (CVE-2020-10702 virt:8.1/qemu-kvm: qemu: weak signature generation in Pointer Authentication support for ARM [rhel-av-8]) +- Resolves: bz#1827630 + (volume creation leaving uncleaned stuff behind on error (vol-clone/libvirt/qemu-kvm)) +- Resolves: bz#1839030 + (RFE: enable the "memfd" memory backend) + +* Mon May 25 2020 Danilo Cesar Lemes de Paula - 4.2.0-22.el8 +- kvm-block-always-fill-entire-LUKS-header-space-with-zero.patch [bz#1775462] +- kvm-numa-remove-not-needed-check.patch [bz#1600217] +- kvm-numa-properly-check-if-numa-is-supported.patch [bz#1600217] +- kvm-numa-Extend-CLI-to-provide-initiator-information-for.patch [bz#1600217] +- kvm-numa-Extend-CLI-to-provide-memory-latency-and-bandwi.patch [bz#1600217] +- kvm-numa-Extend-CLI-to-provide-memory-side-cache-informa.patch [bz#1600217] +- kvm-hmat-acpi-Build-Memory-Proximity-Domain-Attributes-S.patch [bz#1600217] +- kvm-hmat-acpi-Build-System-Locality-Latency-and-Bandwidt.patch [bz#1600217] +- kvm-hmat-acpi-Build-Memory-Side-Cache-Information-Struct.patch [bz#1600217] +- kvm-tests-numa-Add-case-for-QMP-build-HMAT.patch [bz#1600217] +- kvm-tests-bios-tables-test-add-test-cases-for-ACPI-HMAT.patch [bz#1600217] +- kvm-ACPI-add-expected-files-for-HMAT-tests-acpihmat.patch [bz#1600217] +- Resolves: bz#1600217 + ([Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train) +- Resolves: bz#1775462 + (Creating luks-inside-qcow2 images with cluster_size=2k/4k will get a corrupted image) + +* Mon May 11 2020 Danilo Cesar Lemes de Paula - 4.2.0-21.el8 +- kvm-hw-pci-pcie-Forbid-hot-plug-if-it-s-disabled-on-the-.patch [bz#1820531] +- kvm-hw-pci-pcie-Replace-PCI_DEVICE-casts-with-existing-v.patch [bz#1820531] +- kvm-tools-virtiofsd-passthrough_ll-Fix-double-close.patch [bz#1817445] +- kvm-virtiofsd-add-rlimit-nofile-NUM-option.patch [bz#1817445] +- kvm-virtiofsd-stay-below-fs.file-max-sysctl-value-CVE-20.patch [bz#1817445] +- kvm-virtiofsd-jail-lo-proc_self_fd.patch [bz#1817445] +- kvm-virtiofsd-Show-submounts.patch [bz#1817445] +- kvm-virtiofsd-only-retain-file-system-capabilities.patch [bz#1817445] +- kvm-virtiofsd-drop-all-capabilities-in-the-wait-parent-p.patch [bz#1817445] +- Resolves: bz#1817445 + (CVE-2020-10717 virt:8.2/qemu-kvm: QEMU: virtiofsd: guest may open maximum file descriptor to cause DoS [rhel-av-8]) +- Resolves: bz#1820531 + (qmp command query-pci get wrong result after hotplug device under hotplug=off controller) + +* Fri May 01 2020 Jon Maloy - 4.2.0-20.el8 +- kvm-pcie_root_port-Add-hotplug-disabling-option.patch [bz#1790899] +- kvm-compat-disable-edid-for-virtio-gpu-ccw.patch [bz#1816793] +- Resolves: bz#1790899 + ([RFE] QEMU devices should have the option to enable/disable hotplug/unplug) +- Resolves: bz#1816793 + ('edid' compat handling missing for virtio-gpu-ccw) + +* Tue Apr 14 2020 Danilo Cesar Lemes de Paula - 4.2.0-19.el8_2 +- kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch [bz#1822682] +- Resolves: bz#1822682 + (QEMU-4.2 fails to start a VM on Azure) + +* Thu Apr 09 2020 Danilo Cesar Lemes de Paula - 4.2.0-18.el8_2 +- kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch [bz#1817621] +- kvm-replication-assert-we-own-context-before-job_cancel_.patch [bz#1817621] +- kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch [bz#1817621] +- kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch [bz#1817621] +- kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch [bz#1817621] +- kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch [bz#1817621] +- Resolves: bz#1817621 + (Crash and deadlock with block jobs when using io-threads) + +* Mon Mar 30 2020 Danilo Cesar Lemes de Paula - 4.2.0-17.el8 +- kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch [bz#1816007] +- kvm-block-trickle-down-the-fallback-image-creation-funct.patch [bz#1816007] +- kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch [bz#1794692] +- kvm-mirror-Wait-only-for-in-flight-operations.patch [bz#1794692] +- Resolves: bz#1794692 + (Mirror block job stops making progress) +- Resolves: bz#1816007 + (qemu-img convert failed to convert with block device as target) + +* Tue Mar 24 2020 Danilo Cesar Lemes de Paula - 4.2.0-16.el8 +- kvm-migration-Rate-limit-inside-host-pages.patch [bz#1814336] +- kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch [bz#1811670] +- Resolves: bz#1811670 + (Unneeded qemu-guest-agent dependency on pixman) +- Resolves: bz#1814336 + ([POWER9] QEMU migration-test triggers a kernel warning) + +* Tue Mar 17 2020 Danilo Cesar Lemes de Paula - 4.2.0-15.el8 +- kvm-block-nbd-Fix-hang-in-.bdrv_close.patch [bz#1640894] +- kvm-block-Generic-file-creation-fallback.patch [bz#1640894] +- kvm-file-posix-Drop-hdev_co_create_opts.patch [bz#1640894] +- kvm-iscsi-Drop-iscsi_co_create_opts.patch [bz#1640894] +- kvm-iotests-Add-test-for-image-creation-fallback.patch [bz#1640894] +- kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch [bz#1640894] +- kvm-iotests-Use-complete_and_wait-in-155.patch [bz#1790482 bz#1805143] +- kvm-block-Introduce-bdrv_reopen_commit_post-step.patch [bz#1790482 bz#1805143] +- kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch [bz#1790482 bz#1805143] +- kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch [bz#1790482 bz#1805143] +- kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch [bz#1790482 bz#1805143] +- kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch [bz#1790482 bz#1805143] +- kvm-block-Make-bdrv_get_cumulative_perm-public.patch [bz#1790482 bz#1805143] +- kvm-block-Relax-restrictions-for-blockdev-snapshot.patch [bz#1790482 bz#1805143] +- kvm-iotests-Fix-run_job-with-use_log-False.patch [bz#1790482 bz#1805143] +- kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch [bz#1790482 bz#1805143] +- kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch [bz#1790482 bz#1805143] +- kvm-iotests-Add-iothread-cases-to-155.patch [bz#1790482 bz#1805143] +- kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch [bz#1790482 bz#1805143] +- kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch [bz#1809380] +- Resolves: bz#1640894 + (Fix generic file creation fallback for qemu-img nvme:// image creation support) +- Resolves: bz#1790482 + (bitmaps in backing images can't be modified) +- Resolves: bz#1805143 + (allow late/lazy opening of backing chain for shallow blockdev-mirror) +- Resolves: bz#1809380 + (guest hang during reboot process after migration from RHEl7.8 to RHEL8.2.0.) + +* Wed Mar 11 2020 Danilo Cesar Lemes de Paula - 4.2.0-14.el8 +- kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch [bz#1782529] +- kvm-migration-multifd-clean-pages-after-filling-packet.patch [bz#1738451] +- kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch [bz#1738451] +- kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch [bz#1738451] +- kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch [bz#1738451] +- kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch [bz#1738451] +- kvm-qemu-file-Don-t-do-IO-after-shutdown.patch [bz#1738451] +- kvm-migration-Don-t-send-data-if-we-have-stopped.patch [bz#1738451] +- kvm-migration-Create-migration_is_running.patch [bz#1738451] +- kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch [bz#1738451] +- kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch [bz#1738451] +- kvm-virtiofsd-Remove-fuse_req_getgroups.patch [bz#1797064] +- kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch [bz#1797064] +- kvm-virtiofsd-load_capng-missing-unlock.patch [bz#1797064] +- kvm-virtiofsd-do_read-missing-NULL-check.patch [bz#1797064] +- kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch [bz#1797064] +- kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch [bz#1797064] +- kvm-virtiofsd-Fix-xattr-operations.patch [bz#1797064] +- Resolves: bz#1738451 + (qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel)) +- Resolves: bz#1782529 + (Windows Update Enablement with default smbios strings in qemu) +- Resolves: bz#1797064 + (virtiofsd: Fixes) + +* Sat Feb 29 2020 Danilo Cesar Lemes de Paula - 4.2.0-13.el8 +- kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch [bz#1791648] +- kvm-target-i386-add-a-ucode-rev-property.patch [bz#1791648] +- kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch [bz#1791648] +- kvm-target-i386-fix-TCG-UCODE_REV-access.patch [bz#1791648] +- kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch [bz#1791648] +- kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch [bz#1791648] +- kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch [bz#1703907] +- kvm-mirror-Store-MirrorOp.co-for-debuggability.patch [bz#1794692] +- kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch [bz#1794692] +- Resolves: bz#1703907 + ([upstream]QEMU coredump when converting to qcow2: external data file images on block devices with copy_offloading) +- Resolves: bz#1791648 + ([RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough) +- Resolves: bz#1794692 + (Mirror block job stops making progress) + +* Mon Feb 24 2020 Danilo Cesar Lemes de Paula - 4.2.0-12.el8 +- kvm-vhost-user-gpu-Drop-trailing-json-comma.patch [bz#1805334] +- Resolves: bz#1805334 + (vhost-user/50-qemu-gpu.json is not valid JSON) + +* Sun Feb 23 2020 Danilo Cesar Lemes de Paula - 4.2.0-11.el8 +- kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch [bz#1796240] +- kvm-util-add-slirp_fmt-helpers.patch [bz#1798994] +- kvm-tcp_emu-fix-unsafe-snprintf-usages.patch [bz#1798994] +- kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch [bz#1791590] +- kvm-virtio-make-virtio_delete_queue-idempotent.patch [bz#1791590] +- kvm-virtio-reset-region-cache-when-on-queue-deletion.patch [bz#1791590] +- kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch [bz#1791590] +- Resolves: bz#1791590 ([Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device) -- Resolves: bz#1794501 - (CVE-2020-1711 qemu-kvm: QEMU: block: iscsi: OOB heap access via an unexpected response of iSCSI Server [rhel-8.2.0]) - -* Fri Jan 24 2020 Miroslav Rezanina - 2.12.0-97.el8 -- kvm-exec-Fix-MAP_RAM-for-cached-access.patch [bz#1769613] -- kvm-virtio-Return-true-from-virtio_queue_empty-if-broken.patch [bz#1769613] -- kvm-usbredir-Prevent-recursion-in-usbredir_write.patch [bz#1752320] -- kvm-xhci-recheck-slot-status.patch [bz#1752320] -- kvm-tcp_emu-Fix-oob-access.patch [bz#1791566] -- kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch [bz#1791566] -- kvm-slirp-use-correct-size-while-emulating-commands.patch [bz#1791566] -- Resolves: bz#1752320 - (vm gets stuck when migrate vm back and forth with remote-viewer trying to connect) -- Resolves: bz#1769613 - ([SEV] kexec mays hang at "[sda] Synchronizing SCSI cache " before switching to new kernel) -- Resolves: bz#1791566 - (CVE-2020-7039 virt:rhel/qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-8.2.0]) - -* Tue Jan 07 2020 Danilo Cesar Lemes de Paula - 2.12.0-96.el8 -- kvm-i386-Remove-cpu64-rhel6-CPU-model.patch [bz#1741346] -- Resolves: bz#1741346 +- Resolves: bz#1796240 + (Enable hw accelerated cache-count-flush by default for POWER9 DD2.3 cpus) +- Resolves: bz#1798994 + (CVE-2020-8608 qemu-kvm: QEMU: Slirp: potential OOB access due to unsafe snprintf() usages [rhel-av-8.2.0]) + +* Fri Feb 14 2020 Danilo Cesar Lemes de Paula - 4.2.0-10.el8 +- kvm-i386-Resolve-CPU-models-to-v1-by-default.patch [bz#1779078 bz#1787291 bz#1779078 bz#1779078] +- kvm-iotests-Support-job-complete-in-run_job.patch [bz#1781637] +- kvm-iotests-Create-VM.blockdev_create.patch [bz#1781637] +- kvm-block-Activate-recursively-even-for-already-active-n.patch [bz#1781637] +- kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch [bz#1781637] +- kvm-iotests-Test-external-snapshot-with-VM-state.patch [bz#1781637] +- kvm-iotests.py-Let-wait_migration-wait-even-more.patch [bz#1781637] +- kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-backup-top-Begin-drain-earlier.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch [bz#1801320] +- kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch [bz#1801320] +- Resolves: bz#1745606 + (Qemu hang when do incremental live backup in transaction mode without bitmap) +- Resolves: bz#1746217 + (Src qemu hang when do storage vm migration during guest installation) +- Resolves: bz#1773517 + (Src qemu hang when do storage vm migration with dataplane enable) +- Resolves: bz#1779036 + (Qemu coredump when do snapshot in transaction mode with one snapshot path not exist) +- Resolves: bz#1779078 + (RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm)) +- Resolves: bz#1781637 + (qemu crashed when do mem and disk snapshot) +- Resolves: bz#1782111 + (Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable)) +- Resolves: bz#1782175 + (Qemu core dump when add persistent bitmap(data plane enable)) +- Resolves: bz#1783965 + (Qemu core dump when do backup with sync: bitmap and no bitmap provided) +- Resolves: bz#1787291 + (RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) [rhel-8.1.0.z]) +- Resolves: bz#1801320 + (aarch64: backport query-cpu-model-expansion and adjvtime document fixes) + +* Mon Feb 10 2020 Danilo Cesar Lemes de Paula - 4.2.0-9.el8 +- kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch [bz#1776638] +- kvm-xics-Don-t-deassert-outputs.patch [bz#1776638] +- kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch [bz#1776638] +- kvm-trace-update-qemu-trace-stap-to-Python-3.patch [bz#1787395] +- kvm-redhat-Remove-redundant-fix-for-qemu-trace-stap.patch [bz#1787395] +- kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch [bz#1794503] +- kvm-tpm-ppi-page-align-PPI-RAM.patch [bz#1787444] +- kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch [bz#1647366] +- kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch [bz#1647366] +- kvm-tests-arm-cpu-features-Check-feature-default-values.patch [bz#1647366] +- kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch [bz#1647366] +- kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch [bz#1647366] +- kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch [bz#1529231] +- kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch [bz#1529231] +- kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch [bz#1529231] +- Resolves: bz#1529231 + ([q35] VM hangs after migration with 200 vCPUs) +- Resolves: bz#1647366 + (aarch64: Add support for the kvm-no-adjvtime ARM CPU feature) +- Resolves: bz#1776638 + (Guest failed to boot up after system_reset 20 times) +- Resolves: bz#1787395 + (qemu-trace-stap list : TypeError: startswith first arg must be bytes or a tuple of bytes, not str) +- Resolves: bz#1787444 + (Broken postcopy migration with vTPM device) +- Resolves: bz#1794503 + (CVE-2020-1711 qemu-kvm: QEMU: block: iscsi: OOB heap access via an unexpected response of iSCSI Server [rhel-av-8.2.0]) + +* Fri Jan 31 2020 Miroslav Rezanina - 4.2.0-8.el8 +- kvm-target-arm-arch_dump-Add-SVE-notes.patch [bz#1725084] +- kvm-vhost-Add-names-to-section-rounded-warning.patch [bz#1779041] +- kvm-vhost-Only-align-sections-for-vhost-user.patch [bz#1779041] +- kvm-vhost-coding-style-fix.patch [bz#1779041] +- kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch [bz#1694164] +- kvm-vhost-user-fs-remove-vhostfd-property.patch [bz#1694164] +- kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch [bz#1694164] +- kvm-virtiofsd-Pull-in-upstream-headers.patch [bz#1694164] +- kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch [bz#1694164] +- kvm-virtiofsd-Add-auxiliary-.c-s.patch [bz#1694164] +- kvm-virtiofsd-Add-fuse_lowlevel.c.patch [bz#1694164] +- kvm-virtiofsd-Add-passthrough_ll.patch [bz#1694164] +- kvm-virtiofsd-Trim-down-imported-files.patch [bz#1694164] +- kvm-virtiofsd-Format-imported-files-to-qemu-style.patch [bz#1694164] +- kvm-virtiofsd-remove-mountpoint-dummy-argument.patch [bz#1694164] +- kvm-virtiofsd-remove-unused-notify-reply-support.patch [bz#1694164] +- kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch [bz#1694164] +- kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch [bz#1694164] +- kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch [bz#1694164] +- kvm-virtiofsd-Trim-out-compatibility-code.patch [bz#1694164] +- kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch [bz#1694164] +- kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch [bz#1694164] +- kvm-virtiofsd-Add-options-for-virtio.patch [bz#1694164] +- kvm-virtiofsd-add-o-source-PATH-to-help-output.patch [bz#1694164] +- kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch [bz#1694164] +- kvm-virtiofsd-Start-wiring-up-vhost-user.patch [bz#1694164] +- kvm-virtiofsd-Add-main-virtio-loop.patch [bz#1694164] +- kvm-virtiofsd-get-set-features-callbacks.patch [bz#1694164] +- kvm-virtiofsd-Start-queue-threads.patch [bz#1694164] +- kvm-virtiofsd-Poll-kick_fd-for-queue.patch [bz#1694164] +- kvm-virtiofsd-Start-reading-commands-from-queue.patch [bz#1694164] +- kvm-virtiofsd-Send-replies-to-messages.patch [bz#1694164] +- kvm-virtiofsd-Keep-track-of-replies.patch [bz#1694164] +- kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch [bz#1694164] +- kvm-virtiofsd-Fast-path-for-virtio-read.patch [bz#1694164] +- kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch [bz#1694164] +- kvm-virtiofsd-make-f-foreground-the-default.patch [bz#1694164] +- kvm-virtiofsd-add-vhost-user.json-file.patch [bz#1694164] +- kvm-virtiofsd-add-print-capabilities-option.patch [bz#1694164] +- kvm-virtiofs-Add-maintainers-entry.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch [bz#1694164] +- kvm-virtiofsd-validate-path-components.patch [bz#1694164] +- kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch [bz#1694164] +- kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch [bz#1694164] +- kvm-virtiofsd-add-fuse_mbuf_iter-API.patch [bz#1694164] +- kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch [bz#1694164] +- kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch [bz#1694164] +- kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch [bz#1694164] +- kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch [bz#1694164] +- kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch [bz#1694164] +- kvm-virtiofsd-sandbox-mount-namespace.patch [bz#1694164] +- kvm-virtiofsd-move-to-an-empty-network-namespace.patch [bz#1694164] +- kvm-virtiofsd-move-to-a-new-pid-namespace.patch [bz#1694164] +- kvm-virtiofsd-add-seccomp-whitelist.patch [bz#1694164] +- kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch [bz#1694164] +- kvm-virtiofsd-cap-ng-helpers.patch [bz#1694164] +- kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch [bz#1694164] +- kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch [bz#1694164] +- kvm-virtiofsd-fix-libfuse-information-leaks.patch [bz#1694164] +- kvm-virtiofsd-add-syslog-command-line-option.patch [bz#1694164] +- kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch [bz#1694164] +- kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch [bz#1694164] +- kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch [bz#1694164] +- kvm-virtiofsd-Handle-reinit.patch [bz#1694164] +- kvm-virtiofsd-Handle-hard-reboot.patch [bz#1694164] +- kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch [bz#1694164] +- kvm-vhost-user-Print-unexpected-slave-message-types.patch [bz#1694164] +- kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-control-readdirplus.patch [bz#1694164] +- kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch [bz#1694164] +- kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch [bz#1694164] +- kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-use-hashtable.patch [bz#1694164] +- kvm-virtiofsd-Clean-up-inodes-on-destroy.patch [bz#1694164] +- kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch [bz#1694164] +- kvm-virtiofsd-fix-error-handling-in-main.patch [bz#1694164] +- kvm-virtiofsd-cleanup-allocated-resource-in-se.patch [bz#1694164] +- kvm-virtiofsd-fix-memory-leak-on-lo.source.patch [bz#1694164] +- kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch [bz#1694164] +- kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch [bz#1694164] +- kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch [bz#1694164] +- kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch [bz#1694164] +- kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch [bz#1694164] +- kvm-virtiofsd-Support-remote-posix-locks.patch [bz#1694164] +- kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch [bz#1694164] +- kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch [bz#1694164] +- kvm-virtiofsd-make-lo_release-atomic.patch [bz#1694164] +- kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch [bz#1694164] +- kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch [bz#1694164] +- kvm-libvhost-user-Fix-some-memtable-remap-cases.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch [bz#1694164] +- kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch [bz#1694164] +- kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch [bz#1694164] +- kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch [bz#1694164] +- kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch [bz#1694164] +- kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch [bz#1694164] +- kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch [bz#1694164] +- kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch [bz#1694164] +- kvm-virtiofsd-process-requests-in-a-thread-pool.patch [bz#1694164] +- kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch [bz#1694164] +- kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch [bz#1694164] +- kvm-virtiofsd-add-thread-pool-size-NUM-option.patch [bz#1694164] +- kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch [bz#1694164] +- kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch [bz#1694164] +- kvm-virtiofsd-add-some-options-to-the-help-message.patch [bz#1694164] +- kvm-redhat-ship-virtiofsd-vhost-user-device-backend.patch [bz#1694164] +- Resolves: bz#1694164 + (virtio-fs: host<->guest shared file system (qemu)) +- Resolves: bz#1725084 + (aarch64: support dumping SVE registers) +- Resolves: bz#1779041 + (netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic) + +* Tue Jan 21 2020 Miroslav Rezanina - 4.2.0-7.el8 +- kvm-tcp_emu-Fix-oob-access.patch [bz#1791568] +- kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch [bz#1791568] +- kvm-slirp-use-correct-size-while-emulating-commands.patch [bz#1791568] +- kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch [bz#1559846] +- Resolves: bz#1559846 + (Nested KVM: limit VMX features according to CPU models - Fast Train) +- Resolves: bz#1791568 + (CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0]) + +* Wed Jan 15 2020 Danilo Cesar Lemes de Paula - 4.2.0-6.el8 +- kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch [bz#1733893] +- kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch [bz#1782678] +- kvm-virtio-don-t-enable-notifications-during-polling.patch [bz#1789301] +- kvm-usbredir-Prevent-recursion-in-usbredir_write.patch [bz#1790844] +- kvm-xhci-recheck-slot-status.patch [bz#1790844] +- Resolves: bz#1733893 + (Boot a guest with "-prom-env 'auto-boot?=false'", SLOF failed to enter the boot entry after input "boot" followed by "0 > " on VNC) +- Resolves: bz#1782678 + (qemu core dump after hot-unplugging the XXV710/XL710 PF) +- Resolves: bz#1789301 + (virtio-blk/scsi: fix notification suppression during AioContext polling) +- Resolves: bz#1790844 + (USB related fixes) + +* Tue Jan 07 2020 Danilo Cesar Lemes de Paula - 4.2.0-5.el8 +- kvm-i386-Remove-cpu64-rhel6-CPU-model.patch [bz#1741345] +- kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch [bz#1772774] +- Resolves: bz#1741345 (Remove the "cpu64-rhel6" CPU from qemu-kvm) - -* Thu Jan 02 2020 Danilo Cesar Lemes de Paula - 2.12.0-95.el8 -- kvm-virtio-gpu-block-both-2d-and-3d-rendering.patch [bz#1674324] -- kvm-x86-Intel-AVX512_BF16-feature-enabling.patch [bz#1642541] -- Resolves: bz#1642541 - ([Intel 8.2 Feature] qemu-kvm Enable BFloat16 data type support) -- Resolves: bz#1674324 - (With , qemu either refuses to start completely or spice-server crashes afterwards) - -* Wed Dec 18 2019 Danilo Cesar Lemes de Paula - 2.12.0-94.el8 -- kvm-util-mmap-alloc-Add-a-is_pmem-parameter-to-qemu_ram_.patch [bz#1539282] -- kvm-mmap-alloc-unfold-qemu_ram_mmap.patch [bz#1539282] -- kvm-mmap-alloc-fix-hugetlbfs-misaligned-length-in-ppc64.patch [bz#1539282] -- kvm-util-mmap-alloc-support-MAP_SYNC-in-qemu_ram_mmap.patch [bz#1539282] -- kvm-x86-cpu-Enable-MOVDIRI-cpu-feature.patch [bz#1634827] -- kvm-x86-cpu-Enable-MOVDIR64B-cpu-feature.patch [bz#1634827] -- kvm-add-call-to-qemu_add_opts-for-overcommit-option.patch [bz#1634827] -- kvm-support-overcommit-cpu-pm-on-off.patch [bz#1634827] -- kvm-i386-cpu-make-cpu-host-support-monitor-mwait.patch [] -- kvm-x86-cpu-Add-support-for-UMONITOR-UMWAIT-TPAUSE.patch [bz#1634827] -- kvm-target-i386-Add-support-for-save-load-IA32_UMWAIT_CO.patch [bz#1634827] -- Resolves: bz#1539282 - ([Intel 8.2 Feature][Crystal Ridge] Support MAP_SYNC - qemu-kvm) -- Resolves: bz#1634827 - ([Intel 8.2 Feat] KVM Enable SnowRidge Accelerator Interface Architecture (AIA) - qemu) - -* Wed Dec 11 2019 Danilo Cesar Lemes de Paula - 2.12.0-93.el8 -- kvm-target-i386-Export-TAA_NO-bit-to-guests.patch [bz#1771971] -- kvm-target-i386-add-support-for-MSR_IA32_TSX_CTRL.patch [bz#1771971] -- Resolves: bz#1771971 - (CVE-2019-11135 virt:rhel/qemu-kvm: hw: TSX Transaction Asynchronous Abort (TAA) [rhel-8.2.0]) - -* Mon Dec 02 2019 Danilo Cesar Lemes de Paula - 2.12.0-92.el8 -- kvm-x86-cpu-use-FeatureWordArray-to-define-filtered_feat.patch [bz#1689270] -- kvm-i386-Add-x-force-features-option-for-testing.patch [bz#1689270] -- kvm-target-i386-define-a-new-MSR-based-feature-word-FEAT.patch [bz#1689270] -- kvm-i386-display-known-CPUID-features-linewrapped-in-alp.patch [bz#1689270] -- kvm-target-i386-kvm-kvm_get_supported_msrs-cleanup.patch [bz#1689270] -- kvm-target-i386-handle-filtered_features-in-a-new-functi.patch [bz#1689270] -- kvm-target-i386-introduce-generic-feature-dependency-mec.patch [bz#1689270] -- kvm-target-i386-expand-feature-words-to-64-bits.patch [bz#1689270] -- kvm-target-i386-add-VMX-definitions.patch [bz#1689270] -- kvm-vmxcap-correct-the-name-of-the-variables.patch [bz#1689270] -- kvm-target-i386-add-VMX-features.patch [bz#1689270] -- kvm-target-i386-work-around-KVM_GET_MSRS-bug-for-seconda.patch [bz#1689270] -- kvm-target-i386-adjust-for-missing-VMX-features.patch [bz#1689270] -- kvm-target-i386-add-VMX-features-to-named-CPU-models.patch [bz#1689270] -- kvm-target-i386-add-VMX-features-to-named-CPU-models-RHE.patch [bz#1689270] -- kvm-vhost-fix-vhost_log-size-overflow-during-migration.patch [bz#1776808] -- Resolves: bz#1689270 - (Nested KVM: limit VMX features according to CPU models - Slow Train) -- Resolves: bz#1776808 - (qemu-kvm crashes when Windows VM is migrated with multiqueue) - -* Wed Nov 27 2019 Danilo Cesar Lemes de Paula - 2.12.0-91.el8 -- kvm-qapi-fill-in-CpuInfoFast.arch-in-query-cpus-fast.patch [bz#1730969] -- kvm-curl-Keep-pointer-to-the-CURLState-in-CURLSocket.patch [bz#1744602] -- kvm-curl-Keep-socket-until-the-end-of-curl_sock_cb.patch [bz#1744602] -- kvm-curl-Check-completion-in-curl_multi_do.patch [bz#1744602] -- kvm-curl-Pass-CURLSocket-to-curl_multi_do.patch [bz#1744602] -- kvm-curl-Report-only-ready-sockets.patch [bz#1744602] -- kvm-curl-Handle-success-in-multi_check_completion.patch [bz#1744602] -- kvm-curl-Check-curl_multi_add_handle-s-return-code.patch [bz#1744602] -- Resolves: bz#1730969 - ([ppc] qmp: The 'arch' value returned by the command 'query-cpus-fast' does not match) -- Resolves: bz#1744602 +- Resolves: bz#1772774 + (qemu-kvm core dump during migration+reboot ( Assertion `mem->dirty_bmap' failed )) + +* Fri Dec 13 2019 Danilo Cesar Lemes de Paula - 4.2.0-4.el8 +- Rebase to qemu-4.2 +- Resolves: bz#1783250 + (rebase qemu-kvm to 4.2) + +* Tue Dec 10 2019 Danilo Cesar Lemes de Paula - 4.1.0-18.el8 +- kvm-LUKS-support-preallocation.patch [bz#1534951] +- kvm-nbd-add-empty-.bdrv_reopen_prepare.patch [bz#1718727] +- kvm-qdev-qbus-add-hidden-device-support.patch [bz#1757796] +- kvm-pci-add-option-for-net-failover.patch [bz#1757796] +- kvm-pci-mark-devices-partially-unplugged.patch [bz#1757796] +- kvm-pci-mark-device-having-guest-unplug-request-pending.patch [bz#1757796] +- kvm-qapi-add-unplug-primary-event.patch [bz#1757796] +- kvm-qapi-add-failover-negotiated-event.patch [bz#1757796] +- kvm-migration-allow-unplug-during-migration-for-failover.patch [bz#1757796] +- kvm-migration-add-new-migration-state-wait-unplug.patch [bz#1757796] +- kvm-libqos-tolerate-wait-unplug-migration-state.patch [bz#1757796] +- kvm-net-virtio-add-failover-support.patch [bz#1757796] +- kvm-vfio-unplug-failover-primary-device-before-migration.patch [bz#1757796] +- kvm-net-virtio-fix-dev_unplug_pending.patch [bz#1757796] +- kvm-net-virtio-return-early-when-failover-primary-alread.patch [bz#1757796] +- kvm-net-virtio-fix-re-plugging-of-primary-device.patch [bz#1757796] +- kvm-net-virtio-return-error-when-device_opts-arg-is-NULL.patch [bz#1757796] +- kvm-vfio-don-t-ignore-return-value-of-migrate_add_blocke.patch [bz#1757796] +- kvm-hw-vfio-pci-Fix-double-free-of-migration_blocker.patch [bz#1757796] +- Resolves: bz#1534951 + (RFE: Support preallocation mode for luks format) +- Resolves: bz#1718727 + (Committing changes to the backing file over NBD fails with reopening files not supported) +- Resolves: bz#1757796 + (RFE: support for net failover devices in qemu) + +* Mon Dec 02 2019 Danilo Cesar Lemes de Paula - 4.1.0-17.el8 +- kvm-qemu-pr-helper-fix-crash-in-mpath_reconstruct_sense.patch [bz#1772322] +- Resolves: bz#1772322 + (qemu-pr-helper: fix crash in mpath_reconstruct_sense) + +* Wed Nov 27 2019 Danilo Cesar Lemes de Paula - 4.1.0-16.el8 +- kvm-curl-Keep-pointer-to-the-CURLState-in-CURLSocket.patch [bz#1745209] +- kvm-curl-Keep-socket-until-the-end-of-curl_sock_cb.patch [bz#1745209] +- kvm-curl-Check-completion-in-curl_multi_do.patch [bz#1745209] +- kvm-curl-Pass-CURLSocket-to-curl_multi_do.patch [bz#1745209] +- kvm-curl-Report-only-ready-sockets.patch [bz#1745209] +- kvm-curl-Handle-success-in-multi_check_completion.patch [bz#1745209] +- kvm-curl-Check-curl_multi_add_handle-s-return-code.patch [bz#1745209] +- kvm-vhost-user-save-features-if-the-char-dev-is-closed.patch [bz#1738768] +- kvm-block-snapshot-Restrict-set-of-snapshot-nodes.patch [bz#1658981] +- kvm-iotests-Test-internal-snapshots-with-blockdev.patch [bz#1658981] +- kvm-qapi-Add-feature-flags-to-commands-in-qapi-introspec.patch [bz#1658981] +- kvm-qapi-Allow-introspecting-fix-for-savevm-s-cooperatio.patch [bz#1658981] +- kvm-block-Remove-backing-null-from-bs-explicit_-options.patch [bz#1773925] +- kvm-iotests-Test-multiple-blockdev-snapshot-calls.patch [bz#1773925] +- Resolves: bz#1658981 + (qemu failed to create internal snapshot via 'savevm' when using blockdev) +- Resolves: bz#1738768 + (Guest fails to recover receiving packets after vhost-user reconnect) +- Resolves: bz#1745209 (qemu-img gets stuck when stream-converting from http) - -* Tue Nov 12 2019 Danilo Cesar Lemes de Paula - 2.12.0-90.el8 -- kvm-i386-Don-t-print-warning-if-phys-bits-was-set-automa.patch [bz#1719127] -- kvm-Disable-CONFIG_I2C-and-CONFIG_IOH3420.patch [bz#1693140] -- kvm-usb-drop-unnecessary-usb_device_post_load-checks.patch [bz#1757482] -- kvm-pc-bios-s390-ccw-define-loadparm-length.patch [bz#1664376] -- kvm-pc-bios-s390-ccw-net-Use-diag308-to-reset-machine-be.patch [bz#1664376] -- kvm-s390-bios-decouple-cio-setup-from-virtio.patch [bz#1664376] -- kvm-s390-bios-decouple-common-boot-logic-from-virtio.patch [bz#1664376] -- kvm-s390-bios-Clean-up-cio.h.patch [bz#1664376] -- kvm-s390-bios-Decouple-channel-i-o-logic-from-virtio.patch [bz#1664376] -- kvm-s390-bios-Map-low-core-memory.patch [bz#1664376] -- kvm-s390-bios-ptr2u32-and-u32toptr.patch [bz#1664376] -- kvm-s390-bios-Support-for-running-format-0-1-channel-pro.patch [bz#1664376] -- kvm-s390-bios-cio-error-handling.patch [bz#1664376] -- kvm-s390-bios-Extend-find_dev-for-non-virtio-devices.patch [bz#1664376] -- kvm-s390-bios-Factor-finding-boot-device-out-of-virtio-c.patch [bz#1664376] -- kvm-s390-bios-Refactor-virtio-to-run-channel-programs-vi.patch [bz#1664376] -- kvm-s390-bios-Use-control-unit-type-to-determine-boot-me.patch [bz#1664376] -- kvm-s390-bios-Add-channel-command-codes-structs-needed-f.patch [bz#1664376] -- kvm-s390-bios-Support-booting-from-real-dasd-device.patch [bz#1664376] -- kvm-s390-bios-Use-control-unit-type-to-find-bootable-dev.patch [bz#1664376] -- kvm-s390x-vfio-ap-Implement-hot-plug-unplug-of-vfio-ap-d.patch [bz#1660906] -- Resolves: bz#1660906 - ([IBM 8.2 FEAT] KVM s390x: Crypto Passthrough Hotplug - qemu part) -- Resolves: bz#1664376 - ([IBM 8.2 FEAT] CCW IPL Support (kvm) - qemu part) -- Resolves: bz#1693140 - (aarch64: qemu: remove smbus_eeprom and i2c from config) -- Resolves: bz#1719127 - ([Intel 8.2 Bug] warning shown when boot VM with “–cpu host” or “–cpu other mode” on ICX platform (physical)) -- Resolves: bz#1757482 - (Fail to migrate a rhel6.10-mt7.6 guest with dimm device) - -* Mon Oct 14 2019 Danilo Cesar Lemes de Paula - 2.12.0-89.el8 -- kvm-accel-use-g_strsplit-for-parsing-accelerator-names.patch [bz#1749022] -- kvm-opts-don-t-silently-truncate-long-parameter-keys.patch [bz#1749022] -- kvm-opts-don-t-silently-truncate-long-option-values.patch [bz#1749022] -- kvm-i386-fix-regression-parsing-multiboot-initrd-modules.patch [bz#1749022] -- kvm-i386-only-parse-the-initrd_filename-once-for-multibo.patch [bz#1749022] -- kvm-opts-remove-redundant-check-for-NULL-parameter.patch [bz#1749022] -- kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch [bz#1749724] -- kvm-virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch [bz#1708459] -- kvm-s390x-cpumodel-Rework-CPU-feature-definition.patch [bz#1660909] -- kvm-s390x-cpumodel-Set-up-CPU-model-for-AQIC-interceptio.patch [bz#1660909] -- kvm-ccid-Fix-dwProtocols-advertisement-of-T-0.patch [bz#1746361] -- kvm-s390-PCI-fix-IOMMU-region-init.patch [bz#1754643] -- kvm-fw_cfg-Improve-error-message-when-can-t-load-splash-.patch [bz#1607367] -- kvm-fw_cfg-Fix-boot-bootsplash-error-checking.patch [bz#1607367] -- kvm-fw_cfg-Fix-boot-reboot-timeout-error-checking.patch [bz#1607367] -- kvm-hw-nvram-fw_cfg-Store-reboot-timeout-as-little-endia.patch [bz#1607367] -- kvm-intel_iommu-Correct-caching-mode-error-message.patch [bz#1738440] -- kvm-intel_iommu-Sanity-check-vfio-pci-config-on-machine-.patch [bz#1738440] -- kvm-qdev-machine-Introduce-hotplug_allowed-hook.patch [bz#1738440] -- kvm-pc-q35-Disallow-vfio-pci-hotplug-without-VT-d-cachin.patch [bz#1738440] -- kvm-intel_iommu-Remove-the-caching-mode-check-during-fla.patch [bz#1738440] -- kvm-pseries-do-not-allow-memory-less-cpu-less-NUMA-node.patch [bz#1651474] -- Resolves: bz#1607367 - (After boot failed, guest should not reboot when set reboot-timeout < -1) -- Resolves: bz#1651474 - (RHEL8.0 Beta - [4.18.0-32.el8.ppc64le] Guest VM crashes during vcpu hotplug with specific numa configuration (kvm)) -- Resolves: bz#1660909 - ([IBM 8.2 FEAT] KVM s390x: Crypto Passthrough Interrupt Support - qemu part) -- Resolves: bz#1708459 +- Resolves: bz#1773925 + (Fail to do blockcommit with more than one snapshots) + +* Thu Nov 14 2019 Danilo Cesar Lemes de Paula - 4.1.0-15.el8 +- kvm-virtio-blk-Add-blk_drain-to-virtio_blk_device_unreal.patch [bz#1706759] +- kvm-Revert-qcow2-skip-writing-zero-buffers-to-empty-COW-.patch [bz#1772473] +- kvm-coroutine-Add-qemu_co_mutex_assert_locked.patch [bz#1772473] +- kvm-qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch [bz#1772473] +- Resolves: bz#1706759 + (qemu core dump when unplug a 16T GPT type disk from win2019 guest) +- Resolves: bz#1772473 + (Import fixes from 8.1.0 into 8.1.1 branch) + +* Tue Oct 29 2019 Danilo Cesar Lemes de Paula - 4.1.0-14.el8 +- kvm-Revert-qcow2-skip-writing-zero-buffers-to-empty-COW-.patch [bz#1751934] +- kvm-coroutine-Add-qemu_co_mutex_assert_locked.patch [bz#1764721] +- kvm-qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch [bz#1764721] +- Resolves: bz#1751934 + (Fail to install guest when xfs is the host filesystem) +- Resolves: bz#1764721 + (qcow2 image corruption due to incorrect locking in preallocation detection) + +* Fri Sep 27 2019 Danilo Cesar Lemes de Paula - 4.1.0-13.el8 +- kvm-nbd-server-attach-client-channel-to-the-export-s-Aio.patch [bz#1748253] +- kvm-virtio-blk-schedule-virtio_notify_config-to-run-on-m.patch [bz#1744955] +- Resolves: bz#1744955 + (Qemu hang when block resize a qcow2 image) +- Resolves: bz#1748253 + (QEMU crashes (core dump) when using the integrated NDB server with data-plane) + +* Thu Sep 26 2019 Danilo Cesar Lemes de Paula - 4.1.0-12.el8 +- kvm-block-Use-QEMU_IS_ALIGNED.patch [bz#1745922] +- kvm-block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch [bz#1745922] +- kvm-block-qcow2-refactor-encryption-code.patch [bz#1745922] +- kvm-qemu-iotests-Add-test-for-bz-1745922.patch [bz#1745922] +- Resolves: bz#1745922 + (Luks-inside-qcow2 snapshot cannot boot after 'qemu-img rebase') + +* Mon Sep 23 2019 Danilo Cesar Lemes de Paula - 4.1.0-11.el8 +- kvm-blockjob-update-nodes-head-while-removing-all-bdrv.patch [bz#1746631] +- kvm-hostmem-file-fix-pmem-file-size-check.patch [bz#1724008 bz#1736788] +- kvm-memory-fetch-pmem-size-in-get_file_size.patch [bz#1724008 bz#1736788] +- kvm-pr-manager-Fix-invalid-g_free-crash-bug.patch [bz#1753992] +- Resolves: bz#1724008 + (QEMU core dumped "memory_region_get_ram_ptr: Assertion `mr->ram_block' failed") +- Resolves: bz#1736788 + (QEMU core dumped if boot guest with nvdimm backed by /dev/dax0.0 and option pmem=off) +- Resolves: bz#1746631 + (Qemu core dump when do block commit under stress) +- Resolves: bz#1753992 + (core dump when testing persistent reservation in guest) + +* Mon Sep 16 2019 Danilo Cesar Lemes de Paula - 4.1.0-10.el8 +- kvm-spapr-xive-Mask-the-EAS-when-allocating-an-IRQ.patch [bz#1748725] +- kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch [bz#1746267] +- kvm-virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch [bz#1717321] +- kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch [bz#1749737] +- Resolves: bz#1717321 (qemu-kvm core dumped when repeat "system_reset" multiple times during guest boot) -- Resolves: bz#1738440 - (For intel-iommu, qemu shows conflict behaviors between booting a guest with vfio and hot plugging vfio device) -- Resolves: bz#1746361 - (ccid: Fix incorrect dwProtocol advertisement of T=0) -- Resolves: bz#1749022 - (Please backport 950c4e6c94b1 ("opts: don't silently truncate long option values", 2018-05-09)) -- Resolves: bz#1749724 - (CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-8]) -- Resolves: bz#1754643 - (RHEL8.1 Snapshot3 - Passthrough PCI card goes into error state if used in domain (kvm)) - -* Fri Sep 13 2019 Danilo Cesar Lemes de Paula - 2.12.0-88.el8 -- Revert fix for bz#1749724 - this got delayed to 8.2 - (CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-8]) - -* Tue Sep 03 2019 Danilo Cesar Lemes de Paula - 2.12.0-86.el8 -- kvm-Do-not-run-iotests-on-brew-build.patch [bz#1742819] -- kvm-target-ppc-spapr-Add-workaround-option-to-SPAPR_CAP_.patch [bz#1744415] -- kvm-target-ppc-spapr-Add-SPAPR_CAP_CCF_ASSIST.patch [bz#1744415] -- kvm-i386-x86_cpu_list_feature_names-function.patch [bz#1747185] -- kvm-i386-unavailable-features-QOM-property.patch [bz#1747185] -- kvm-file-posix-Handle-undetectable-alignment.patch [bz#1738839] -- kvm-iotests-Tweak-221-sizing-for-different-hole-granular.patch [bz#1738839] -- kvm-iotests-Filter-175-s-allocation-information.patch [bz#1738839] -- kvm-block-posix-Always-allocate-the-first-block.patch [bz#1738839] -- kvm-iotests-Test-allocate_first_block-with-O_DIRECT.patch [bz#1738839] -- Resolves: bz#1738839 +- Resolves: bz#1746267 + (qemu coredump: qemu-kvm: block/create.c:68: qmp_blockdev_create: Assertion `drv' failed) +- Resolves: bz#1748725 + ([ppc][migration][v6.3-rc1-p1ce8930]basic migration failed with "qemu-kvm: KVM_SET_DEVICE_ATTR failed: Group 3 attr 0x0000000000001309: Device or resource busy") +- Resolves: bz#1749737 + (CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-av-8]) + +* Tue Sep 10 2019 Danilo Cesar Lemes de Paula - 4.1.0-9.el8 +- kvm-migration-always-initialise-ram_counters-for-a-new-m.patch [bz#1734316] +- kvm-migration-add-qemu_file_update_transfer-interface.patch [bz#1734316] +- kvm-migration-add-speed-limit-for-multifd-migration.patch [bz#1734316] +- kvm-migration-update-ram_counters-for-multifd-sync-packe.patch [bz#1734316] +- kvm-spapr-pci-Consolidate-de-allocation-of-MSIs.patch [bz#1750200] +- kvm-spapr-pci-Free-MSIs-during-reset.patch [bz#1750200] +- Resolves: bz#1734316 + (multifd migration does not honour speed limits, consumes entire bandwidth of NIC) +- Resolves: bz#1750200 + ([RHEL8.1][QEMU4.1]boot up guest with vf device,then system_reset guest,error prompt(qemu-kvm: Can't allocate MSIs for device 2800: IRQ 4904 is not free)) + +* Mon Sep 09 2019 Danilo Cesar Lemes de Paula - 4.1.0-8.el8 +- kvm-migration-Do-not-re-read-the-clock-on-pre_save-in-ca.patch [bz#1747836] +- kvm-ehci-fix-queue-dev-null-ptr-dereference.patch [bz#1746790] +- kvm-spapr-Use-SHUTDOWN_CAUSE_SUBSYSTEM_RESET-for-CAS-reb.patch [bz#1743477] +- kvm-file-posix-Handle-undetectable-alignment.patch [bz#1749134] +- kvm-block-posix-Always-allocate-the-first-block.patch [bz#1749134] +- kvm-iotests-Test-allocate_first_block-with-O_DIRECT.patch [bz#1749134] +- Resolves: bz#1743477 + (Since bd94bc06479a "spapr: change default interrupt mode to 'dual'", QEMU resets the machine to select the appropriate interrupt controller. And -no-reboot prevents that.) +- Resolves: bz#1746790 + (qemu core dump while migrate from RHEL7.6 to RHEL8.1) +- Resolves: bz#1747836 + (Call traces after guest migration due to incorrect handling of the timebase) +- Resolves: bz#1749134 (I/O error when virtio-blk disk is backed by a raw image on 4k disk) + +* Fri Sep 06 2019 Danilo Cesar Lemes de Paula - 4.1.0-7.el8 +- kvm-trace-Clarify-DTrace-SystemTap-help-message.patch [bz#1516220] +- kvm-socket-Add-backlog-parameter-to-socket_listen.patch [bz#1726898] +- kvm-socket-Add-num-connections-to-qio_channel_socket_syn.patch [bz#1726898] +- kvm-socket-Add-num-connections-to-qio_channel_socket_asy.patch [bz#1726898] +- kvm-socket-Add-num-connections-to-qio_net_listener_open_.patch [bz#1726898] +- kvm-multifd-Use-number-of-channels-as-listen-backlog.patch [bz#1726898] +- kvm-pseries-Fix-compat_pvr-on-reset.patch [bz#1744107] +- kvm-spapr-Set-compat-mode-in-spapr_core_plug.patch [bz#1744107] +- Resolves: bz#1516220 + (-trace help prints an incomplete list of trace events) +- Resolves: bz#1726898 + (Parallel migration fails with error "Unable to write to socket: Connection reset by peer" now and then) +- Resolves: bz#1744107 + (Migration from P8(qemu4.1) to P9(qemu4.1), after migration, qemu crash on destination with error message "qemu-kvm: error while loading state for instance 0x1 of device 'cpu'") + +* Wed Sep 04 2019 Danilo Cesar Lemes de Paula - 4.1.0-6.el8 +- kvm-memory-Refactor-memory_region_clear_coalescing.patch [bz#1743142] +- kvm-memory-Split-zones-when-do-coalesced_io_del.patch [bz#1743142] +- kvm-memory-Remove-has_coalesced_range-counter.patch [bz#1743142] +- kvm-memory-Fix-up-memory_region_-add-del-_coalescing.patch [bz#1743142] +- kvm-enable-virgl-for-real-this-time.patch [bz#1559740] +- Resolves: bz#1559740 + ([RFE] Enable virgl as TechPreview (qemu)) +- Resolves: bz#1743142 + (Boot guest with multiple e1000 devices, qemu will crash after several guest reboots: kvm_mem_ioeventfd_add: error adding ioeventfd: No space left on device (28)) + +* Tue Aug 27 2019 Danilo Cesar Lemes de Paula - 4.1.0-5.el8 +- kvm-redhat-s390x-Rename-s390-ccw-virtio-rhel8.0.0-to-s39.patch [bz#1693772] +- kvm-redhat-s390x-Add-proper-compatibility-options-for-th.patch [bz#1693772] +- kvm-enable-virgl.patch [bz#1559740] +- kvm-redhat-update-pseries-rhel8.1.0-machine-type.patch [bz#1744170] +- kvm-Do-not-run-iotests-on-brew-build.patch [bz#1742197 bz#1742819] +- Resolves: bz#1559740 + ([RFE] Enable virgl as TechPreview (qemu)) +- Resolves: bz#1693772 + ([IBM zKVM] RHEL AV 8.1.0 machine type update for s390x) +- Resolves: bz#1742197 + (Remove iotests from qemu-kvm builds [RHEL AV 8.1.0]) - Resolves: bz#1742819 (Remove iotests from qemu-kvm builds [RHEL 8.1.0]) -- Resolves: bz#1744415 - (Backport support for count cache flush Spectre v2 mitigation [slow train]) -- Resolves: bz#1747185 - ("filtered-features" QOM property is not available) - -* Mon Aug 19 2019 Danilo Cesar Lemes de Paula - 2.12.0-85.el8 -- kvm-console-Avoid-segfault-in-screendump.patch [bz#1684383] -- kvm-usb-hub-clear-suspend-on-detach.patch [bz#1619661] -- kvm-qemu-img-fix-regression-copying-secrets-during-conve.patch [bz#1727821] -- Resolves: bz#1619661 - (the attach hub on one hub still exits in device manager after unhotplug) -- Resolves: bz#1684383 - (qemu crashed when take screenshot for 2nd head of virtio video device if the display not opened by virt-viewer) -- Resolves: bz#1727821 - (Failed to convert a source image to the qcow2 image encrypted by luks) - -* Fri Aug 16 2019 Danilo Cesar Lemes de Paula - 2.12.0-84.el8 -- kvm-vnc-detect-and-optimize-pageflips.patch [bz#1727033] -- kvm-block-backend-Make-blk_inc-dec_in_flight-public.patch [bz#1716349] -- kvm-virtio-blk-Increase-in_flight-for-request-restart-BH.patch [bz#1716349] -- kvm-block-Fix-AioContext-switch-for-drained-node.patch [bz#1716349] -- kvm-test-bdrv-drain-AioContext-switch-in-drained-section.patch [bz#1716349] -- kvm-block-Use-normal-drain-for-bdrv_set_aio_context.patch [bz#1716349] -- kvm-block-Fix-AioContext-switch-for-bs-drv-NULL.patch [bz#1716347] -- kvm-iothread-fix-crash-with-invalid-properties.patch [bz#1687541] -- kvm-iothread-replace-init_done_cond-with-a-semaphore.patch [bz#1687541] -- kvm-RHEL-disable-hostmem-memfd.patch [bz#1740797] -- Resolves: bz#1687541 - (qemu aborted when start guest with a big iothreads) -- Resolves: bz#1716347 - (Qemu Core dump when quit vm that's in status "paused(io-error)" with data plane enabled) -- Resolves: bz#1716349 - (qemu with iothreads enabled crashes on resume after enospc pause for disk extension) -- Resolves: bz#1727033 - (vnc server should detect page-flips and avoid sending fullscreen updates then.) +- Resolves: bz#1744170 + ([IBM Power] New 8.1.0 machine type for pseries) + +* Tue Aug 20 2019 Danilo Cesar Lemes de Paula - 4.1.0-4.el8 +- kvm-RHEL-disable-hostmem-memfd.patch [bz#1738626 bz#1740797] +- Resolves: bz#1738626 + (Disable memfd in QEMU) - Resolves: bz#1740797 (Disable memfd in QEMU) -* Thu Aug 01 2019 Danilo Cesar Lemes de Paula - 2.12.0-83.el8 -- kvm-hw-block-pflash_cfi01-Add-missing-DeviceReset-handle.patch [bz#1707192] -- kvm-block-file-posix-Unaligned-O_DIRECT-block-status.patch [bz#1678979] -- kvm-iotests-Test-unaligned-raw-images-with-O_DIRECT.patch [bz#1678979] -- kvm-nbd-client-Lower-min_block-for-block-status-unaligne.patch [bz#1678979] -- kvm-nbd-client-Reject-inaccessible-tail-of-inconsistent-.patch [bz#1678979] -- kvm-nbd-client-Support-qemu-img-convert-from-unaligned-s.patch [bz#1678979] -- kvm-block-Add-bdrv_get_request_alignment.patch [bz#1678979] -- kvm-nbd-server-Advertise-actual-minimum-block-size.patch [bz#1678979] -- kvm-slirp-check-sscanf-result-when-emulating-ident.patch [bz#1727642] -- kvm-slirp-fix-big-little-endian-conversion-in-ident-prot.patch [bz#1727642] -- kvm-slirp-ensure-there-is-enough-space-in-mbuf-to-null-t.patch [bz#1727642] -- kvm-slirp-don-t-manipulate-so_rcv-in-tcp_emu.patch [bz#1727642] -- kvm-tap-set-vhostfd-passed-from-qemu-cli-to-non-blocking.patch [bz#1732642] -- kvm-Fix-heap-overflow-in-ip_reass-on-big-packet-input.patch [bz#1734751] -- Resolves: bz#1678979 - (qemu-img convert abort when converting image with unaligned size (qemu-img: block/io.c:2134: bdrv_co_block_status: Assertion `*pnum && (((*pnum) % (align)) == 0) && align > offset - aligned_offset\' failed)) -- Resolves: bz#1707192 - (implement missing reset handler for cfi.pflash01 - slow train) -- Resolves: bz#1727642 - (CVE-2019-6778 qemu-kvm: QEMU: slirp: heap buffer overflow in tcp_emu()) -- Resolves: bz#1732642 - (enable the virtio-net frontend to work with the vhost-net backend in SEV guests) -- Resolves: bz#1734751 - (CVE-2019-14378 qemu-kvm: QEMU: slirp: heap buffer overflow during packet reassembly [rhel-8.1.0]) - -* Tue Jul 23 2019 Danilo Cesar Lemes de Paula - 2.12.0-82.el8 -- kvm-i386-Add-new-model-of-Cascadelake-Server.patch [bz#1629906] -- kvm-i386-Update-stepping-of-Cascadelake-Server.patch [bz#1629906] -- kvm-target-i386-Disable-MPX-support-on-named-CPU-models.patch [bz#1629906] -- kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-NEW.patch [bz#1629906] -- kvm-i386-Disable-OSPKE-on-CPU-model-definitions-NEW.patch [bz#1629906] -- kvm-block-ssh-Convert-from-DPRINTF-macro-to-trace-events.patch [bz#1513367] -- kvm-block-ssh-Do-not-report-read-write-flush-errors-to-t.patch [bz#1513367] -- kvm-qemu-iotests-Fix-paths-for-NFS.patch [bz#1513367] -- kvm-qemu-iotests-Filter-NFS-paths.patch [bz#1513367] -- kvm-iotests-Filter-SSH-paths.patch [bz#1513367] -- kvm-block-ssh-Implement-.bdrv_refresh_filename.patch [bz#1513367] -- kvm-iotests-Use-Python-byte-strings-where-appropriate.patch [bz#1513367] -- kvm-iotests-Unify-log-outputs-between-Python-2-and-3.patch [bz#1513367] -- kvm-ssh-switch-from-libssh2-to-libssh.patch [bz#1513367] -- kvm-redhat-switch-from-libssh2-to-libssh.patch [bz#1513367] -- kvm-block-gluster-limit-the-transfer-size-to-512-MiB.patch [bz#1728657] -- kvm-s390-cpumodel-fix-description-for-the-new-vector-fac.patch [bz#1729975] -- kvm-s390x-cpumodel-remove-esort-from-the-default-model.patch [bz#1729975] -- kvm-s390x-cpumodel-also-change-name-of-vxbeh.patch [bz#1729975] -- kvm-s390x-cpumodel-change-internal-name-of-vxpdeh-to-mat.patch [bz#1729975] -- kvm-target-i386-sev-Do-not-unpin-ram-device-memory-regio.patch [bz#1728958] -- kvm-i386-Save-EFER-for-32-bit-targets.patch [bz#1689269] -- kvm-target-i386-rename-HF_SVMI_MASK-to-HF_GUEST_MASK.patch [bz#1689269] -- kvm-target-i386-kvm-add-VMX-migration-blocker.patch [bz#1689269] -- kvm-target-i386-kvm-just-return-after-migrate_add_blocke.patch [bz#1689269] -- kvm-target-i386-kvm-Delete-VMX-migration-blocker-on-vCPU.patch [bz#1689269] -- kvm-Introduce-kvm_arch_destroy_vcpu.patch [bz#1689269] -- kvm-target-i386-kvm-Use-symbolic-constant-for-DB-BP-exce.patch [bz#1689269] -- kvm-target-i386-kvm-Re-inject-DB-to-guest-with-updated-D.patch [bz#1689269] -- kvm-target-i386-kvm-Block-migration-for-vCPUs-exposed-wi.patch [bz#1689269] -- kvm-target-i386-kvm-do-not-initialize-padding-fields.patch [bz#1689269] -- kvm-linux-headers-synchronize-generic-and-x86-KVM-header.patch [bz#1689269] -- kvm-vmstate-Add-support-for-kernel-integer-types.patch [bz#1689269] -- kvm-target-i386-kvm-Add-support-for-save-and-restore-nes.patch [bz#1689269] -- kvm-target-i386-kvm-Add-support-for-KVM_CAP_EXCEPTION_PA.patch [bz#1689269] -- kvm-target-i386-kvm-Add-nested-migration-blocker-only-wh.patch [bz#1689269] -- kvm-target-i386-kvm-Demand-nested-migration-kernel-capab.patch [bz#1689269] -- kvm-target-i386-skip-KVM_GET-SET_NESTED_STATE-if-VMX-dis.patch [bz#1689269] -- kvm-i386-kvm-Do-not-sync-nested-state-during-runtime.patch [bz#1689269] -- Resolves: bz#1513367 - (qemu with libssh) -- Resolves: bz#1629906 - ([Intel 8.1 Feat] qemu-kvm Introduce Cascade Lake (CLX) cpu model) -- Resolves: bz#1689269 - (Nested KVM: support for migration of nested hypervisors - Slow Train) -- Resolves: bz#1728657 - ('qemu-io write' to a raw image over libgfapi fails) -- Resolves: bz#1728958 - (Hot unplug vfio-pci NIC devices from sev guest will cause qemu-kvm: sev_ram_block_removed: failed to unregister region) -- Resolves: bz#1729975 - (RHEL 8.1 Pre-Beta - Fix for hardware CPU Model) - -* Mon Jul 08 2019 Miroslav Rezanina - 2.12.0-81.el8 -- kvm-target-i386-add-MDS-NO-feature.patch [bz#1714792] -- kvm-virtio-gpu-pass-down-VirtIOGPU-pointer-to-a-bunch-of.patch [bz#1531543] -- kvm-virtio-gpu-add-iommu-support.patch [bz#1531543] -- kvm-virtio-gpu-fix-unmap-in-error-path.patch [bz#1531543] -- Resolves: bz#1531543 - ([RFE] add iommu support to virtio-gpu) -- Resolves: bz#1714792 - ([Intel 8.1 FEAT] MDS_NO exposure to guest) - -* Tue Jul 02 2019 Danilo Cesar Lemes de Paula - 2.12.0-80.el8 -- kvm-qxl-check-release-info-object.patch [bz#1712705] -- kvm-iotests-Make-182-do-without-device_add.patch [bz#1707598] -- Resolves: bz#1707598 - (qemu-iotest 182 fails without device hotplugging support) -- Resolves: bz#1712705 - (CVE-2019-12155 qemu-kvm: QEMU: qxl: null pointer dereference while releasing spice resources [rhel-8]) - -* Fri Jun 28 2019 Danilo de Paula - 15:2.12.0-79 -- Rebuild all virt packages to fix RHEL's upgrade path -- Resolves: rhbz#1695587 - (Ensure modular RPM upgrade path) - -* Thu Jun 20 2019 Miroslav Rezanina - 2.12.0-78.el8 -- kvm-gluster-Handle-changed-glfs_ftruncate-signature.patch [bz#1721983] -- kvm-gluster-the-glfs_io_cbk-callback-function-pointer-ad.patch [bz#1721983] -- Resolves: bz#1721983 - (qemu-kvm can't be build with new gluster version (6.0.6)) - -* Thu Jun 13 2019 Danilo Cesar Lemes de Paula - 2.12.0-77.el8 -- kvm-i386-Make-arch_capabilities-migratable.patch [bz#1709970] -- kvm-spapr-Fix-ibm-max-associativity-domains-property-num.patch [bz#1710662] -- kvm-linux-headers-Update-for-NVLink2-passthrough-downstr.patch [bz#1710662] -- kvm-pci-Move-NVIDIA-vendor-id-to-the-rest-of-ids.patch [bz#1710662] -- kvm-vfio-quirks-Add-common-quirk-alloc-helper.patch [bz#1710662] -- kvm-vfio-Make-vfio_get_region_info_cap-public.patch [bz#1710662] -- kvm-spapr-Support-NVIDIA-V100-GPU-with-NVLink2.patch [bz#1710662] -- kvm-qemu-kvm.spec-bump-libseccomp-2.4.0.patch [bz#1719578] -- Resolves: bz#1709970 - ([Intel 8.1 Bug] [KVM][CLX] CPUID_7_0_EDX_ARCH_CAPABILITIES is not enabled in VM - qemu-kvm) -- Resolves: bz#1710662 - ([IBM 8.1 FEAT] POWER9 - Virt: qemu: NVLink2 passthru to guest - Nvidia Volta (GPU) (kvm)) -- Resolves: bz#1719578 +* Mon Aug 19 2019 Danilo Cesar Lemes de Paula - 4.1.0-3.el8 +- kvm-x86-machine-types-pc_rhel_8_0_compat.patch [bz#1719649] +- kvm-x86-machine-types-q35-Fixup-units_per_default_bus.patch [bz#1719649] +- kvm-x86-machine-types-Fixup-dynamic-sysbus-entries.patch [bz#1719649] +- kvm-x86-machine-types-add-pc-q35-rhel8.1.0.patch [bz#1719649] +- kvm-machine-types-Update-hw_compat_rhel_8_0-from-hw_comp.patch [bz#1719649] +- kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch [bz#1719649] +- Resolves: bz#1719649 + (8.1 machine type for x86) + +* Mon Aug 19 2019 Danilo Cesar Lemes de Paula - 4.1.0-2.el8 +- kvm-spec-Update-seavgabios-dependency.patch [bz#1725664] +- kvm-pc-Don-t-make-die-id-mandatory-unless-necessary.patch [bz#1741451] +- kvm-display-bochs-fix-pcie-support.patch [bz#1733977 bz#1740692] +- kvm-spapr-Reset-CAS-IRQ-subsystem-after-devices.patch [bz#1733977] +- kvm-spapr-xive-Fix-migration-of-hot-plugged-CPUs.patch [bz#1733977] +- kvm-riscv-roms-Fix-make-rules-for-building-sifive_u-bios.patch [bz#1733977 bz#1740692] +- kvm-Update-version-for-v4.1.0-release.patch [bz#1733977 bz#1740692] +- Resolves: bz#1725664 + (Update seabios dependency) +- Resolves: bz#1733977 + (Qemu core dumped: /home/ngu/qemu/hw/intc/xics_kvm.c:321: ics_kvm_set_irq: Assertion `kernel_xics_fd != -1' failed) +- Resolves: bz#1740692 + (Backport QEMU 4.1.0 rc5 & ga patches) +- Resolves: bz#1741451 + (Failed to hot-plug vcpus) + +* Wed Aug 14 2019 Miroslav Rezanina - 4.1.0-1.el8 +- Rebase to qemu 4.1.0 rc4 [bz#1705235] +- Resolves: bz#1705235 + (Rebase qemu-kvm for RHEL-AV 8.1.0) + +* Tue Jul 23 2019 Danilo Cesar Lemes de Paula - 4.0.0-6.el8 +- kvm-x86_64-rh-devices-add-missing-TPM-passthrough.patch [bz#1519013] +- kvm-x86_64-rh-devices-enable-TPM-emulation.patch [bz#1519013] +- kvm-vfio-increase-the-cap-on-number-of-assigned-devices-.patch [bz#1719823] +- Resolves: bz#1519013 + ([RFE] QEMU Software TPM support (vTPM, or TPM emulation)) +- Resolves: bz#1719823 + ([RHEL 8.1] [RFE] increase the maximum of vfio devices to more than 32 in qemu-kvm) + +* Mon Jul 08 2019 Miroslav Rezanina - 4.0.0-5.el8 +- kvm-qemu-kvm.spec-bump-libseccomp-2.4.0.patch [bz#1720306] +- kvm-qxl-check-release-info-object.patch [bz#1712717] +- kvm-target-i386-add-MDS-NO-feature.patch [bz#1722839] +- kvm-block-file-posix-Unaligned-O_DIRECT-block-status.patch [bz#1588356] +- kvm-iotests-Test-unaligned-raw-images-with-O_DIRECT.patch [bz#1588356] +- kvm-rh-set-CONFIG_BOCHS_DISPLAY-y-for-x86.patch [bz#1707118] +- Resolves: bz#1588356 + (qemu crashed on the source host when do storage migration with source qcow2 disk created by 'qemu-img') +- Resolves: bz#1707118 + (enable device: bochs-display (QEMU)) +- Resolves: bz#1712717 + (CVE-2019-12155 qemu-kvm: QEMU: qxl: null pointer dereference while releasing spice resources [rhel-av-8]) +- Resolves: bz#1720306 (VM failed to start with error "failed to install seccomp syscall filter in the kernel") - -* Tue Jun 11 2019 Danilo Cesar Lemes de Paula - 2.12.0-76.el8 -- kvm-Introduce-new-no_guest_reset-parameter-for-usb-host-.patch [bz#1713677] -- kvm-usb-call-reset-handler-before-updating-state.patch [bz#1713677] -- kvm-usb-host-skip-reset-for-untouched-devices.patch [bz#1713677] -- kvm-usb-host-avoid-libusb_set_configuration-calls.patch [bz#1713677] -- kvm-virtio-scsi-Move-BlockBackend-back-to-the-main-AioCo.patch [bz#1673396 bz#1673401] -- kvm-scsi-disk-Acquire-the-AioContext-in-scsi_-_realize.patch [bz#1673396 bz#1673401] -- kvm-virtio-scsi-Forbid-devices-with-different-iothreads-.patch [bz#1673396 bz#1673401] -- kvm-Disable-VXHS-support.patch [bz#1714933] -- Resolves: bz#1673396 - (qemu-kvm core dumped after hotplug the deleted disk with iothread parameter) -- Resolves: bz#1673401 - (Qemu core dump when start guest with two disks using same drive) -- Resolves: bz#1713677 +- Resolves: bz#1722839 + ([Intel 8.1 FEAT] MDS_NO exposure to guest - Fast Train) + +* Tue Jun 11 2019 Danilo Cesar Lemes de Paula - 4.0.0-4.el8 +- kvm-Disable-VXHS-support.patch [bz#1714937] +- kvm-aarch64-Add-virt-rhel8.1.0-machine-type-for-ARM.patch [bz#1713735] +- kvm-aarch64-Allow-ARM-VIRT-iommu-option-in-RHEL8.1-machi.patch [bz#1713735] +- kvm-usb-call-reset-handler-before-updating-state.patch [bz#1713679] +- kvm-usb-host-skip-reset-for-untouched-devices.patch [bz#1713679] +- kvm-usb-host-avoid-libusb_set_configuration-calls.patch [bz#1713679] +- kvm-aarch64-Compile-out-IOH3420.patch [bz#1627283] +- kvm-vl-Fix-drive-blockdev-persistent-reservation-managem.patch [bz#1714891] +- kvm-vl-Document-why-objects-are-delayed.patch [bz#1714891] +- Resolves: bz#1627283 + (Compile out IOH3420 on aarch64) +- Resolves: bz#1713679 (Detached device when trying to upgrade USB device firmware when in doing USB Passthrough via QEMU) -- Resolves: bz#1714933 - (Disable VXHS in qemu-kvm) - -* Fri May 24 2019 Danilo Cesar Lemes de Paula - 2.12.0-75.el8 -- kvm-s390x-cpumodel-enum-type-S390FeatGroup-now-gets-gene.patch [bz#1660912] -- kvm-linux-headers-update-against-Linux-5.2-rc1.patch [bz#1660912] -- kvm-s390x-cpumodel-ignore-csske-for-expansion.patch [bz#1660912] -- kvm-s390x-cpumodel-Miscellaneous-Instruction-Extensions-.patch [bz#1660912] -- kvm-s390x-cpumodel-msa9-facility.patch [bz#1660912] -- kvm-s390x-cpumodel-vector-enhancements.patch [bz#1660912] -- kvm-s390x-cpumodel-enhanced-sort-facility.patch [bz#1660912] -- kvm-s390x-cpumodel-add-Deflate-conversion-facility.patch [bz#1660912] -- kvm-s390x-cpumodel-add-gen15-defintions.patch [bz#1660912] -- kvm-s390x-cpumodel-wire-up-8561-and-8562-as-gen15-machin.patch [bz#1660912] -- kvm-spice-set-device-address-and-device-display-ID-in-QX.patch [bz#1712946] -- kvm-hw-pci-Add-missing-include.patch [bz#1712946] -- Resolves: bz#1660912 - ([IBM 8.1 FEAT] KVM s390x: Add hardware CPU Model - qemu part) -- Resolves: bz#1712946 - (qemu-kvm build is broken due to spice_qxl_set_max_monitors being deprecated) - -* Mon May 20 2019 Danilo Cesar Lemes de Paula - 2.12.0-74.el8 -- kvm-x86-cpu-Enable-CLDEMOTE-Demote-Cache-Line-cpu-featur.patch [bz#1696436] -- kvm-memory-Fix-the-memory-region-type-assignment-order.patch [bz#1667249] -- kvm-target-i386-sev-Do-not-pin-the-ram-device-memory-reg.patch [bz#1667249] -- kvm-block-Fix-invalidate_cache-error-path-for-parent-act.patch [bz#1673010] -- kvm-target-i386-define-md-clear-bit.patch [bz#1703302 bz#1703308] -- Resolves: bz#1667249 - (Fail to launch AMD SEV VM with assigned PCI device) -- Resolves: bz#1673010 - (Local VM and migrated VM on the same host can run with same RAW file as visual disk source while without shareable configured or lock manager enabled) -- Resolves: bz#1696436 - ([Intel 8.0 Feat] KVM Enabling SnowRidge new NIs - qemu-kvm) -- Resolves: bz#1703302 - (CVE-2018-12130 virt:rhel/qemu-kvm: hardware: Microarchitectural Fill Buffer Data Sampling (MFBDS) [rhel-8]) -- Resolves: bz#1703308 - (CVE-2018-12127 virt:rhel/qemu-kvm: hardware: Micro-architectural Load Port Data Sampling - Information Leak (MLPDS) [rhel-8]) - -* Tue May 14 2019 Danilo Cesar Lemes de Paula - 2.12.0-73.el8 -- kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-mo.patch [bz#1561761] -- kvm-i386-Disable-OSPKE-on-CPU-model-definitions.patch [bz#1561761] -- Resolves: bz#1561761 - ([Intel 8.1 Feat] qemu-kvm Introduce Icelake cpu model) - -* Tue May 14 2019 Danilo Cesar Lemes de Paula - 2.12.0-72.el8 -- kvm-Use-KVM_GET_MSR_INDEX_LIST-for-MSR_IA32_ARCH_CAP.patch [bz#1707706] -- kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch [bz#1707706] -- Resolves: bz#1707706 - (/builddir/build/BUILD/qemu-2.12.0/target/i386/kvm.c:2031: kvm_put_msrs: Assertion `ret == cpu->kvm_msr_buf->nmsrs' failed.) - -* Wed May 08 2019 Danilo Cesar Lemes de Paula - 2.12.0-71.el8 -- kvm-s390-bios-Skip-bootmap-signature-entries.patch [bz#1683275] -- Resolves: bz#1683275 - ([IBM 8.1 FEAT] KVM: Secure Linux Boot Toleration (qemu)) - -* Tue May 07 2019 Danilo Cesar Lemes de Paula - 2.12.0-70.el8 -- kvm-i386-Add-new-MSR-indices-for-IA32_PRED_CMD-and-IA32_.patch [bz#1561761] -- kvm-i386-Add-CPUID-bit-and-feature-words-for-IA32_ARCH_C.patch [bz#1561761] -- kvm-i386-Add-CPUID-bit-for-PCONFIG.patch [bz#1561761] -- kvm-i386-Add-CPUID-bit-for-WBNOINVD.patch [bz#1561761] -- kvm-i386-Add-new-CPU-model-Icelake-Server-Client.patch [bz#1561761] -- kvm-Add-support-to-KVM_GET_MSR_FEATURE_INDEX_LIST-an.patch [bz#1561761] -- kvm-x86-Data-structure-changes-to-support-MSR-based-feat.patch [bz#1561761] -- kvm-x86-define-a-new-MSR-based-feature-word-FEATURE_WORD.patch [bz#1561761] -- kvm-i386-remove-the-new-CPUID-PCONFIG-from-Icelake-Serve.patch [bz#1561761] -- kvm-Revert-i386-Add-CPUID-bit-for-PCONFIG.patch [bz#1561761] -- Resolves: bz#1561761 - ([Intel 8.1 Feat] qemu-kvm Introduce Icelake cpu model) - -* Fri May 03 2019 Danilo Cesar Lemes de Paula - 2.12.0-69.el8 -- kvm-tests-crypto-Use-the-IEC-binary-prefix-definitions.patch [bz#1680231] -- kvm-crypto-expand-algorithm-coverage-for-cipher-benchmar.patch [bz#1680231] -- kvm-crypto-remove-code-duplication-in-tweak-encrypt-decr.patch [bz#1680231] -- kvm-crypto-introduce-a-xts_uint128-data-type.patch [bz#1680231] -- kvm-crypto-convert-xts_tweak_encdec-to-use-xts_uint128-t.patch [bz#1680231] -- kvm-crypto-convert-xts_mult_x-to-use-xts_uint128-type.patch [bz#1680231] -- kvm-crypto-annotate-xts_tweak_encdec-as-inlineable.patch [bz#1680231] -- kvm-crypto-refactor-XTS-cipher-mode-test-suite.patch [bz#1680231] -- kvm-crypto-add-testing-for-unaligned-buffers-with-XTS-ci.patch [bz#1680231] -- Resolves: bz#1680231 - (severe performance impact using luks format) - -* Mon Apr 29 2019 Danilo Cesar Lemes de Paula - 2.12.0-68.el8 -- kvm-s390x-ipl-Try-to-detect-Linux-vs-non-Linux-for-initi.patch [bz#1699070] -- kvm-loader-Check-access-size-when-calling-rom_ptr-to-avo.patch [bz#1699070] -- kvm-hw-s390x-Use-the-IEC-binary-prefix-definitions.patch [bz#1699070] -- kvm-s390x-storage-attributes-fix-CMMA_BLOCK_SIZE-usage.patch [bz#1699070] -- kvm-s390x-cpumodel-fix-segmentation-fault-when-baselinin.patch [bz#1699070] -- kvm-hw-s390x-s390-pci-bus-Convert-sysbus-init-function-t.patch [bz#1699070] -- kvm-s390x-pci-properly-fail-if-the-zPCI-device-cannot-be.patch [bz#1699070] -- kvm-s390x-pci-rename-hotplug-handler-callbacks.patch [bz#1699070] -- kvm-s390-avoid-potential-null-dereference-in-s390_pcihos.patch [bz#1699070] -- kvm-s390x-pci-Send-correct-event-on-hotplug.patch [bz#1699070] -- kvm-s390x-pci-Set-the-iommu-region-size-mpcifc-request.patch [bz#1699070] -- kvm-s390x-pci-Always-delete-and-free-the-release_timer.patch [bz#1699070] -- kvm-s390x-pci-Ignore-the-unplug-call-if-we-already-have-.patch [bz#1699070] -- kvm-s390x-pci-Use-hotplug_dev-instead-of-looking-up-the-.patch [bz#1699070] -- kvm-s390x-pci-Move-some-hotplug-checks-to-the-pre_plug-h.patch [bz#1699070] -- kvm-s390x-pci-Introduce-unplug-requests-and-split-unplug.patch [bz#1699070] -- kvm-s390x-pci-Drop-release-timer-and-replace-it-with-a-f.patch [bz#1699070] -- kvm-s390x-pci-mark-zpci-devices-as-unmigratable.patch [bz#1699070] -- kvm-s390x-pci-Fix-primary-bus-number-for-PCI-bridges.patch [bz#1699070] -- kvm-s390x-pci-Fix-hotplugging-of-PCI-bridges.patch [bz#1699070] -- kvm-s390x-pci-Warn-when-adding-PCI-devices-without-the-z.patch [bz#1699070] -- kvm-s390x-pci-Unplug-remaining-requested-devices-on-pcih.patch [bz#1699070] -- kvm-s390x-refactor-reset-reipl-handling.patch [bz#1699070] -- kvm-s390-ipl-fix-ipl-with-no-reboot.patch [bz#1699070] -- Resolves: bz#1699070 - (Backport s390x-related fixes for qemu-kvm) - -* Tue Apr 23 2019 Danilo Cesar Lemes de Paula - 2.12.0-67.el8 -- kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch [bz#1693116] -- Resolves: bz#1693116 - (CVE-2018-20815 qemu-kvm: QEMU: device_tree: heap buffer overflow while loading device tree blob [rhel-8.0]) - -* Mon Apr 15 2019 Danilo Cesar Lemes de Paula - 2.12.0-66.el8 -- kvm-iotests-153-Fix-dead-code.patch [bz#1694148] -- kvm-file-posix-Include-filename-in-locking-error-message.patch [bz#1694148] -- kvm-file-posix-Skip-effectiveless-OFD-lock-operations.patch [bz#1694148] -- kvm-file-posix-Drop-s-lock_fd.patch [bz#1694148] -- kvm-tests-Add-unit-tests-for-image-locking.patch [bz#1694148] -- kvm-file-posix-Fix-shared-locks-on-reopen-commit.patch [bz#1694148] -- kvm-iotests-Test-file-posix-locking-and-reopen.patch [bz#1694148] -- kvm-block-file-posix-do-not-fail-on-unlock-bytes.patch [bz#1694148] -- kvm-hostmem-file-remove-object-id-from-pmem-error-messag.patch [bz#1687596] -- kvm-redhat-setting-target-release-to-rhel-8.1.0.patch [] -- kvm-redhat-removing-iotest-182.patch [] -- Resolves: bz#1687596 - ([Intel 8.1 BUG][KVM][Crystal Ridge]object_get_canonical_path_component: assertion failed: (obj->parent != NULL)) -- Resolves: bz#1694148 - (QEMU image locking needn't double open fd number, and it should not fail when attempting to release locks) - -* Tue Apr 09 2019 Danilo Cesar Lemes de Paula - 2.12.0-65.el8 -- kvm-s390x-cpumodel-mepochptff-warn-when-no-mepoch-and-re.patch [bz#1664371] -- kvm-s390x-cpumodel-add-z14-GA2-model.patch [bz#1664371] -- kvm-redhat-s390x-cpumodel-enable-mepoch-by-default-for-z.patch [bz#1664371] -- kvm-intel_iommu-fix-operator-in-vtd_switch_address_space.patch [bz#1662272] -- kvm-intel_iommu-reset-intr_enabled-when-system-reset.patch [bz#1662272] -- kvm-pci-msi-export-msi_is_masked.patch [bz#1662272] -- kvm-i386-kvm-ignore-masked-irqs-when-update-msi-routes.patch [bz#1662272] -- Resolves: bz#1662272 - (Boot guest with device assignment+vIOMMU, qemu prompts "vtd_interrupt_remap_msi: MSI address low 32 bit invalid: 0x0" when first rebooting guest) -- Resolves: bz#1664371 - ([IBM 8.1 FEAT] Update hardware CPU Model z14 (kvm) - qemu part) - -* Mon Apr 08 2019 Danilo Cesar Lemes de Paula - 2.12.0-64.el8 -- kvm-doc-fix-the-configuration-path.patch [bz#1645411] -- kvm-Increase-number-of-iotests-being-run-as-a-part-of-RH.patch [bz#1664463] +- Resolves: bz#1713735 + (Allow ARM VIRT iommu option in RHEL8.1 machine) +- Resolves: bz#1714891 + (Guest with persistent reservation manager for a disk fails to start) +- Resolves: bz#1714937 + (Disable VXHS support) + +* Tue May 28 2019 Danilo Cesar Lemes de Paula - 4.0.0-3.el8 +- kvm-redhat-fix-cut-n-paste-garbage-in-hw_compat-comments.patch [bz#1709726] +- kvm-compat-Generic-hw_compat_rhel_8_0.patch [bz#1709726] +- kvm-redhat-sync-pseries-rhel7.6.0-with-rhel-av-8.0.1.patch [bz#1709726] +- kvm-redhat-define-pseries-rhel8.1.0-machine-type.patch [bz#1709726] +- Resolves: bz#1709726 + (Forward and backward migration failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'") + +* Sat May 25 2019 Danilo Cesar Lemes de Paula - 4.0.0-2.el8 +- kvm-target-i386-define-md-clear-bit.patch [bz#1703297 bz#1703304 bz#1703310 bz#1707274] +- Resolves: bz#1703297 + (CVE-2018-12126 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Store Buffer Data Sampling (MSBDS) [rhel-av-8]) +- Resolves: bz#1703304 + (CVE-2018-12130 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Fill Buffer Data Sampling (MFBDS) [rhel-av-8]) +- Resolves: bz#1703310 + (CVE-2018-12127 virt:8.0.0/qemu-kvm: hardware: Micro-architectural Load Port Data Sampling - Information Leak (MLPDS) [rhel-av-8]) +- Resolves: bz#1707274 + (CVE-2019-11091 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Data Sampling Uncacheable Memory (MDSUM) [rhel-av-8.1.0]) + +* Wed May 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-26.el8 +- kvm-target-ppc-spapr-Add-SPAPR_CAP_LARGE_DECREMENTER.patch [bz#1698711] +- kvm-target-ppc-spapr-Add-workaround-option-to-SPAPR_CAP_.patch [bz#1698711] +- kvm-target-ppc-spapr-Add-SPAPR_CAP_CCF_ASSIST.patch [bz#1698711] +- kvm-target-ppc-tcg-make-spapr_caps-apply-cap-cfpc-sbbc-i.patch [bz#1698711] +- kvm-target-ppc-spapr-Enable-mitigations-by-default-for-p.patch [bz#1698711] +- kvm-slirp-ensure-there-is-enough-space-in-mbuf-to-null-t.patch [bz#1693076] +- kvm-slirp-don-t-manipulate-so_rcv-in-tcp_emu.patch [bz#1693076] +- Resolves: bz#1693076 + (CVE-2019-6778 qemu-kvm: QEMU: slirp: heap buffer overflow in tcp_emu() [rhel-av-8]) +- Resolves: bz#1698711 + (Enable Spectre / Meltdown mitigations by default in pseries-rhel8.0.0 machine type) + +* Mon May 06 2019 Danilo Cesar Lemes de Paula - 3.1.0-25.el8 +- kvm-redhat-enable-tpmdev-passthrough.patch [bz#1688312] +- kvm-exec-Only-count-mapped-memory-backends-for-qemu_getr.patch [bz#1680492] +- kvm-Enable-libpmem-to-support-nvdimm.patch [bz#1705149] +- Resolves: bz#1680492 + (Qemu quits suddenly while system_reset after hot-plugging unsupported memory by compatible guest on P9 with 1G huge page set) +- Resolves: bz#1688312 + ([RFE] enable TPM passthrough at compile time (qemu-kvm)) +- Resolves: bz#1705149 + (libpmem support is not enabled in qemu-kvm) + +* Fri Apr 26 2019 Danilo Cesar Lemes de Paula - 3.1.0-24.el8 +- kvm-x86-host-phys-bits-limit-option.patch [bz#1688915] +- kvm-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch [bz#1688915] +- Resolves: bz#1688915 + ([Intel 8.0 Alpha] physical bits should <= 48 when host with 5level paging &EPT5 and qemu command with "-cpu qemu64" parameters.) + +* Tue Apr 23 2019 Danilo Cesar Lemes de Paula - 3.1.0-23.el8 +- kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch [bz#1693173] +- Resolves: bz#1693173 + (CVE-2018-20815 qemu-kvm: QEMU: device_tree: heap buffer overflow while loading device tree blob [rhel-av-8]) + +* Mon Apr 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-22.el8 +- kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch [bz#1687578] +- kvm-i386-Make-arch_capabilities-migratable.patch [bz#1687578] +- Resolves: bz#1687578 + (Incorrect CVE vulnerabilities reported on Cascade Lake cpus) + +* Thu Apr 11 2019 Danilo Cesar Lemes de Paula - 3.1.0-21.el8 +- kvm-Remove-7-qcow2-and-luks-iotests-that-are-taking-25-s.patch [bz#1683473] +- kvm-spapr-fix-out-of-bounds-write-in-spapr_populate_drme.patch [bz#1674438] +- kvm-qcow2-include-LUKS-payload-overhead-in-qemu-img-meas.patch [bz#1655065] +- kvm-iotests-add-LUKS-payload-overhead-to-178-qemu-img-me.patch [bz#1655065] +- kvm-vnc-detect-and-optimize-pageflips.patch [bz#1666206] - kvm-Load-kvm-module-during-boot.patch [bz#1676907 bz#1685995] -- kvm-qemu-kvm.spec.template-Update-pyton-path-to-system-i.patch [] -- Resolves: bz#1645411 - (the "fsfreeze-hook" script path shown by command "qemu-ga --help" or "man qemu-ga" is wrong) -- Resolves: bz#1664463 - (Modify iotest behavior to include luks and nbd and fail build if iotests fail) +- kvm-hostmem-file-reject-invalid-pmem-file-sizes.patch [bz#1669053] +- kvm-iotests-Fix-test-200-on-s390x-without-virtio-pci.patch [bz#1687582] +- kvm-block-file-posix-do-not-fail-on-unlock-bytes.patch [bz#1652572] +- Resolves: bz#1652572 + (QEMU core dumped if stop nfs service during migration) +- Resolves: bz#1655065 + ([rhel.8.0][fast train]'qemu-img measure' size does not match the real allocated size for luks-inside-qcow2 image) +- Resolves: bz#1666206 + (vnc server should detect page-flips and avoid sending fullscreen updates then.) +- Resolves: bz#1669053 + (Guest call trace when boot with nvdimm device backed by /dev/dax) +- Resolves: bz#1674438 + (RHEL8.0 - Guest reboot fails after memory hotplug multiple times (kvm)) - Resolves: bz#1676907 (/dev/kvm device exists but kernel module is not loaded on boot up causing VM start to fail in libvirt) +- Resolves: bz#1683473 + (Remove 7 qcow2 & luks iotests from rhel8 fast train build %check phase) - Resolves: bz#1685995 (/dev/kvm device exists but kernel module is not loaded on boot up causing VM start to fail in libvirt) +- Resolves: bz#1687582 + (QEMU IOTEST 200 fails with 'virtio-scsi-pci is not a valid device model name') + +* Fri Mar 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-20.el8 +- kvm-i386-Add-stibp-flag-name.patch [bz#1686260] +- Resolves: bz#1686260 + (stibp is missing on qemu 3.0 and qemu 3.1) + +* Fri Mar 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-19.el8 +- kvm-migration-Fix-cancel-state.patch [bz#1608649] +- kvm-migration-rdma-Fix-qemu_rdma_cleanup-null-check.patch [bz#1608649] +- Resolves: bz#1608649 + (Query-migrate get "failed" status after migrate-cancel) + +* Tue Feb 26 2019 Danilo Cesar Lemes de Paula - 3.1.0-18.el8 +- kvm-target-i386-Disable-MPX-support-on-named-CPU-models.patch [bz#1661030] +- kvm-i386-remove-the-new-CPUID-PCONFIG-from-Icelake-Serve.patch [bz#1661515] +- kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-mo.patch [bz#1661515] +- kvm-Revert-i386-Add-CPUID-bit-for-PCONFIG.patch [bz#1661515] +- Resolves: bz#1661030 + (Remove MPX support from 8.0 machine types) +- Resolves: bz#1661515 + (Remove PCONFIG and INTEL_PT from Icelake-* CPU models) + +* Tue Feb 26 2019 Danilo Cesar Lemes de Paula - 3.1.0-17.el8 +- kvm-block-Apply-auto-read-only-for-ro-whitelist-drivers.patch [bz#1678968] +- Resolves: bz#1678968 + (-blockdev: auto-read-only is ineffective for drivers on read-only whitelist) + +* Mon Feb 25 2019 Danilo Cesar Lemes de Paula - 3.1.0-16.el8 +- kvm-fdc-Revert-downstream-disablement-of-device-floppy.patch [bz#1664997] +- kvm-fdc-Restrict-floppy-controllers-to-RHEL-7-machine-ty.patch [bz#1664997] +- Resolves: bz#1664997 + (Restrict floppy device to RHEL-7 machine types) + +* Wed Feb 13 2019 Danilo Cesar Lemes de Paula - 3.1.0-15.el8 +- kvm-Add-raw-qcow2-nbd-and-luks-iotests-to-run-during-the.patch [bz#1664855] +- kvm-Introduce-the-qemu-kvm-tests-rpm.patch [bz#1669924] +- Resolves: bz#1664855 + (Run iotests in qemu-kvm build %check phase) +- Resolves: bz#1669924 + (qemu-kvm packaging: Package the avocado_qemu tests and qemu-iotests in a new rpm) + +* Tue Feb 12 2019 Danilo Cesar Lemes de Paula - 3.1.0-14.el8 +- kvm-doc-fix-the-configuration-path.patch [bz#1644985] +- Resolves: bz#1644985 + (The "fsfreeze-hook" script path shown by command "qemu-ga --help" or "man qemu-ga" is wrong - Fast Train) + +* Mon Feb 11 2019 Danilo Cesar Lemes de Paula - 3.1.0-13.el8 +- kvm-Acceptance-tests-add-Linux-initrd-checking-test.patch [bz#1669922] +- kvm-mmap-alloc-unfold-qemu_ram_mmap.patch [bz#1671519] +- kvm-mmap-alloc-fix-hugetlbfs-misaligned-length-in-ppc64.patch [bz#1671519] +- kvm-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch [bz#1653590] +- kvm-block-Fix-invalidate_cache-error-path-for-parent-act.patch [bz#1673014] +- kvm-virtio-scsi-Move-BlockBackend-back-to-the-main-AioCo.patch [bz#1656276 bz#1662508] +- kvm-scsi-disk-Acquire-the-AioContext-in-scsi_-_realize.patch [bz#1656276 bz#1662508] +- kvm-virtio-scsi-Forbid-devices-with-different-iothreads-.patch [bz#1656276 bz#1662508] +- Resolves: bz#1653590 + ([Fast train]had better stop qemu immediately while guest was making use of an improper page size) +- Resolves: bz#1656276 + (qemu-kvm core dumped after hotplug the deleted disk with iothread parameter) +- Resolves: bz#1662508 + (Qemu core dump when start guest with two disks using same drive) +- Resolves: bz#1669922 + (Backport avocado-qemu tests for QEMU 3.1) +- Resolves: bz#1671519 + (RHEL8.0 Snapshot3 - qemu doesn't free up hugepage memory when hotplug/hotunplug using memory-backend-file (qemu-kvm)) +- Resolves: bz#1673014 + (Local VM and migrated VM on the same host can run with same RAW file as visual disk source while without shareable configured or lock manager enabled) -* Tue Feb 26 2019 Danilo Cesar Lemes de Paula - 2.12.0-63.el8 -- kvm-scsi-generic-avoid-possible-out-of-bounds-access-to-.patch [bz#1668162] -- Resolves: bz#1668162 - (CVE-2019-6501 qemu-kvm: QEMU: scsi-generic: possible OOB access while handling inquiry request [rhel-8]) - -* Mon Feb 25 2019 Danilo Cesar Lemes de Paula - 2.12.0-62.el8 -- kvm-slirp-check-data-length-while-emulating-ident-functi.patch [bz#1669069] -- Resolves: bz#1669069 - (CVE-2019-6778 qemu-kvm: QEMU: slirp: heap buffer overflow in tcp_emu() [rhel-8.0]) - -* Mon Feb 11 2019 Danilo Cesar Lemes de Paula - 2.12.0-61.el8 -- kvm-qemu-ga-make-get-fsinfo-work-over-pci-bridges.patch [bz#1666952] -- kvm-qga-fix-driver-leak-in-guest-get-fsinfo.patch [bz#1666952] -- Resolves: bz#1666952 - (qemu-guest-agent does not parse PCI bridge links in "build_guest_fsinfo_for_real_device" (q35)) - -* Mon Jan 28 2019 Danilo Cesar Lemes de Paula - 2.12.0-60.el8 -- kvm-ne2000-fix-possible-out-of-bound-access-in-ne2000_re.patch [bz#1636784] -- kvm-rtl8139-fix-possible-out-of-bound-access.patch [bz#1636784] -- kvm-pcnet-fix-possible-buffer-overflow.patch [bz#1636784] -- kvm-net-ignore-packet-size-greater-than-INT_MAX.patch [bz#1636784] -- kvm-net-drop-too-large-packet-early.patch [bz#1636784] -- kvm-net-hub-suppress-warnings-of-no-host-network-for-qte.patch [bz#1636784] -- kvm-virtio-net-test-accept-variable-length-argument-in-p.patch [bz#1636784] -- kvm-virtio-net-test-remove-unused-macro.patch [bz#1636784] -- kvm-virtio-net-test-add-large-tx-buffer-test.patch [bz#1636784] -- kvm-s390x-Return-specification-exception-for-unimplement.patch [bz#1668261] -- kvm-cpus-ignore-ESRCH-in-qemu_cpu_kick_thread.patch [bz#1665844] -- Resolves: bz#1636784 - (CVE-2018-17963 qemu-kvm: Qemu: net: ignore packets with large size [rhel-8]) -- Resolves: bz#1665844 - (Guest quit with error when hotunplug cpu) -- Resolves: bz#1668261 - ([RHEL8] Backport diag308 stable exception fix (qemu-kvm)) - -* Thu Jan 24 2019 Danilo Cesar Lemes de Paula - 2.12.0-59.el8 -- kvm-hw-scsi-cleanups-before-VPD-BL-emulation.patch [bz#1639957] -- kvm-hw-scsi-centralize-SG_IO-calls-into-single-function.patch [bz#1639957] -- kvm-hw-scsi-add-VPD-Block-Limits-emulation.patch [bz#1639957] -- kvm-scsi-disk-Block-Device-Characteristics-emulation-fix.patch [bz#1639957] -- kvm-scsi-generic-keep-VPD-page-list-sorted.patch [bz#1639957] -- kvm-scsi-generic-avoid-out-of-bounds-access-to-VPD-page-.patch [bz#1639957] -- kvm-scsi-generic-avoid-invalid-access-to-struct-when-emu.patch [bz#1639957] -- kvm-scsi-generic-do-not-do-VPD-emulation-for-sense-other.patch [bz#1639957] -- Resolves: bz#1639957 - ([RHEL.8] scsi host device passthrough limits IO writes - slow train) - -* Mon Jan 21 2019 Danilo Cesar Lemes de Paula - 2.12.0-58.el8 -- kvm-block-Update-flags-in-bdrv_set_read_only.patch [bz#1644996] -- kvm-block-Add-auto-read-only-option.patch [bz#1644996] -- kvm-rbd-Close-image-in-qemu_rbd_open-error-path.patch [bz#1644996] -- kvm-block-Require-auto-read-only-for-existing-fallbacks.patch [bz#1644996] -- kvm-nbd-Support-auto-read-only-option.patch [bz#1644996] -- kvm-file-posix-Support-auto-read-only-option.patch [bz#1644996] -- kvm-curl-Support-auto-read-only-option.patch [bz#1644996] -- kvm-gluster-Support-auto-read-only-option.patch [bz#1644996] -- kvm-iscsi-Support-auto-read-only-option.patch [bz#1644996] -- kvm-block-Make-auto-read-only-on-default-for-drive.patch [bz#1644996] -- kvm-qemu-iotests-Test-auto-read-only-with-drive-and-bloc.patch [bz#1644996] -- kvm-block-Fix-update-of-BDRV_O_AUTO_RDONLY-in-update_fla.patch [bz#1644996] -- kvm-qemu-img-Add-C-option-for-convert-with-copy-offloadi.patch [bz#1623082] -- kvm-iotests-Add-test-for-qemu-img-convert-C-compatibilit.patch [bz#1623082] -- Resolves: bz#1623082 - ([rhel.8.0]Target files for 'qemu-img convert' do not support thin_provisoning with iscsi/nfs backend) -- Resolves: bz#1644996 - (block-commit can't be used with -blockdev) - -* Fri Jan 11 2019 Danilo Cesar Lemes de Paula - 2.12.0-57.el8 -- kvm-qemu-kvm.spec.template-Update-files-for-tests-rpm-to.patch [bz#1601107] - -* Fri Jan 11 2019 Danilo Cesar Lemes de Paula - 2.12.0-56.el8 -- kvm-Run-iotests-as-part-of-the-build-process.patch [bz#1661026] -- kvm-Introduce-the-qemu-kvm-tests-rpm.patch [bz#1601107] -- Resolves: bz#1601107 - (qemu-kvm packaging: make running qemu-iotests more robust) -- Resolves: bz#1661026 - (Run iotests as part of build process) - -* Tue Jan 08 2019 Danilo Cesar Lemes de Paula - 2.12.0-55.el8 -- kvm-block-Don-t-inactivate-children-before-parents.patch [bz#1659395] -- kvm-iotests-Test-migration-with-blockdev.patch [bz#1659395] -- Resolves: bz#1659395 - (src qemu core dump when do migration ( block device node-name changed after change cdrom) - Slow Train) - -* Tue Jan 08 2019 Danilo Cesar Lemes de Paula - 2.12.0-54.el8 -- kvm-s390x-tcg-avoid-overflows-in-time2tod-tod2time.patch [bz#1653569] -- kvm-s390x-kvm-pass-values-instead-of-pointers-to-kvm_s39.patch [bz#1653569] -- kvm-s390x-tod-factor-out-TOD-into-separate-device.patch [bz#1653569] -- kvm-s390x-tcg-drop-tod_basetime.patch [bz#1653569] -- kvm-s390x-tcg-properly-implement-the-TOD.patch [bz#1653569] -- kvm-s390x-tcg-SET-CLOCK-COMPARATOR-can-clear-CKC-interru.patch [bz#1653569] -- kvm-s390x-tcg-implement-SET-CLOCK.patch [bz#1653569] -- kvm-s390x-tcg-rearm-the-CKC-timer-during-migration.patch [bz#1653569] -- kvm-s390x-tcg-fix-locking-problem-with-tcg_s390_tod_upda.patch [bz#1653569] -- kvm-hw-s390x-Include-the-tod-qemu-also-for-builds-with-d.patch [bz#1653569] -- kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch [bz#1653569] -- kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch [bz#1653569] -- kvm-migration-discard-non-migratable-RAMBlocks.patch [bz#1539285] -- kvm-vfio-pci-do-not-set-the-PCIDevice-has_rom-attribute.patch [bz#1539285] -- kvm-memory-exec-Expose-all-memory-block-related-flags.patch [bz#1539285] -- kvm-memory-exec-switch-file-ram-allocation-functions-to-.patch [bz#1539285] -- kvm-configure-add-libpmem-support.patch [bz#1539285] -- kvm-hostmem-file-add-the-pmem-option.patch [bz#1539285] -- kvm-mem-nvdimm-ensure-write-persistence-to-PMEM-in-label.patch [bz#1539285] -- kvm-migration-ram-Add-check-and-info-message-to-nvdimm-p.patch [bz#1539285] -- kvm-migration-ram-ensure-write-persistence-on-loading-al.patch [bz#1539285] -- Resolves: bz#1539285 - ([Intel 8.0 Bug] [KVM][Crystal Ridge] Lack of data persistence guarantee of QEMU writes to host PMEM) -- Resolves: bz#1653569 +* Fri Feb 08 2019 Danilo Cesar Lemes de Paula - 3.1.0-12.el8 +- kvm-io-ensure-UNIX-client-doesn-t-unlink-server-socket.patch [bz#1665896] +- kvm-scsi-disk-Don-t-use-empty-string-as-device-id.patch [bz#1668248] +- kvm-scsi-disk-Add-device_id-property.patch [bz#1668248] +- Resolves: bz#1665896 + (VNC unix listener socket is deleted after first client quits) +- Resolves: bz#1668248 + ("An unknown error has occurred" when using cdrom to install the system with two blockdev disks.(when choose installation destination)) + +* Thu Jan 31 2019 Danilo Cesar Lemes de Paula - 3.1.0-11.el8 +- kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch [bz#1644985] +- kvm-json-Fix-handling-when-not-interpolating.patch [bz#1668244] +- Resolves: bz#1644985 + (The "fsfreeze-hook" script path shown by command "qemu-ga --help" or "man qemu-ga" is wrong - Fast Train) +- Resolves: bz#1668244 + (qemu-img: /var/tmp/v2vovl9951f8.qcow2: CURL: Error opening file: The requested URL returned error: 404 Not Found) + +* Tue Jan 29 2019 Danilo Cesar Lemes de Paula - 3.1.0-10.el8 +- kvm-throttle-groups-fix-restart-coroutine-iothread-race.patch [bz#1655947] +- kvm-iotests-add-238-for-throttling-tgm-unregister-iothre.patch [bz#1655947] +- Resolves: bz#1655947 + (qemu-kvm core dumped after unplug the device which was set io throttling parameters) + +* Tue Jan 29 2019 Danilo Cesar Lemes de Paula - 3.1.0-9.el8 +- kvm-migration-rdma-unregister-fd-handler.patch [bz#1666601] +- kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch [bz#1659127] +- kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch [bz#1659127] +- Resolves: bz#1659127 (Stress guest and stop it, then do live migration, guest hit call trace on destination end) +- Resolves: bz#1666601 + ([q35] dst qemu core dumped when do rdma migration with Mellanox IB QDR card) + +* Thu Jan 24 2019 Danilo Cesar Lemes de Paula - 3.1.0-7.el8 +- kvm-i386-kvm-expose-HV_CPUID_ENLIGHTMENT_INFO.EAX-and-HV.patch [bz#1653511] +- kvm-i386-kvm-add-a-comment-explaining-why-.feat_names-ar.patch [bz#1653511] +- Resolves: bz#1653511 + (qemu doesn't report all support cpu features which cause libvirt cannot get the support status of hv_tlbflush) + +* Wed Jan 23 2019 Danilo Cesar Lemes de Paula - 3.1.0-6.el8 +- kvm-spapr-Fix-ibm-max-associativity-domains-property-num.patch [bz#1653114] +- kvm-cpus-ignore-ESRCH-in-qemu_cpu_kick_thread.patch [bz#1668205] +- Resolves: bz#1653114 + (Incorrect NUMA nodes passed to qemu-kvm guest in ibm,max-associativity-domains property) +- Resolves: bz#1668205 + (Guest quit with error when hotunplug cpu) -* Tue Jan 08 2019 Danilo Cesar Lemes de Paula - 2.12.0-53.el8 -- kvm-ui-add-qapi-parser-for-display.patch [bz#1652871] -- kvm-ui-switch-trivial-displays-to-qapi-parser.patch [bz#1652871] -- kvm-qapi-Add-rendernode-display-option-for-egl-headless.patch [bz#1652871] -- kvm-ui-Allow-specifying-rendernode-display-option-for-eg.patch [bz#1652871] -- kvm-qapi-add-query-display-options-command.patch [bz#1652871] -- Resolves: bz#1652871 - (QEMU doesn't expose rendernode option for egl-headless display type) - -* Fri Jan 04 2019 Danilo Cesar Lemes de Paula - 2.12.0-52.el8 -- kvm-Add-edk2-Requires-to-qemu-kvm.patch [bz#1654276] -- Resolves: bz#1654276 - (qemu-kvm: Should depend on the architecture-appropriate guest firmware) - -* Mon Dec 24 2018 Danilo Cesar Lemes de Paula - 2.12.0-51.el8 -- kvm-x86-host-phys-bits-limit-option.patch [bz#1598284] -- kvm-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch [bz#1598284] -- kvm-i386-do-not-migrate-MSR_SMI_COUNT-on-machine-types-2.patch [bz#1659565] -- kvm-pc-x-migrate-smi-count-to-PC_RHEL_COMPAT.patch [bz#1659565] -- kvm-slow-train-kvm-clear-out-KVM_ASYNC_PF_DELIVERY_AS_PF.patch [bz#1656829] -- Resolves: bz#1598284 - ([Intel 8.0 Alpha] physical bits should < 48 when host with 5level paging &EPT5 and qemu command with "-cpu qemu64" parameters.) -- Resolves: bz#1656829 +* Mon Jan 21 2019 Danilo Cesar Lemes de Paula - 3.1.0-5.el8 +- kvm-virtio-Helper-for-registering-virtio-device-types.patch [bz#1648023] +- kvm-virtio-Provide-version-specific-variants-of-virtio-P.patch [bz#1648023] +- kvm-globals-Allow-global-properties-to-be-optional.patch [bz#1648023] +- kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch [bz#1648023] +- kvm-aarch64-Add-virt-rhel8.0.0-machine-type-for-ARM.patch [bz#1656504] +- kvm-aarch64-Set-virt-rhel8.0.0-max_cpus-to-512.patch [bz#1656504] +- kvm-aarch64-Use-256MB-ECAM-region-by-default.patch [bz#1656504] +- Resolves: bz#1648023 + (Provide separate device types for transitional virtio PCI devices - Fast Train) +- Resolves: bz#1656504 + (Machine types for qemu-kvm based on rebase to qemu-3.1 (aarch64)) + +* Fri Jan 11 2019 Danilo Cesar Lemes de Paula - 3.1.0-4.el8 +- kvm-hw-s390x-s390-virtio-ccw-Add-machine-types-for-RHEL8.patch [bz#1656510] +- kvm-spapr-Add-H-Call-H_HOME_NODE_ASSOCIATIVITY.patch [bz#1661967] +- kvm-redhat-Fixing-.gitpublish-to-include-AV-information.patch [] +- Resolves: bz#1656510 + (Machine types for qemu-kvm based on rebase to qemu-3.1 (s390x)) +- Resolves: bz#1661967 + (Kernel prints the message "VPHN is not supported. Disabling polling...") + +* Thu Jan 03 2019 Danilo Cesar Lemes de Paula - 3.1.0-3.el8 +- kvm-redhat-define-pseries-rhel8.0.0-machine-type.patch [bz#1656508] +- Resolves: bz#1656508 + (Machine types for qemu-kvm based on rebase to qemu-3.1 (ppc64le)) + +* Fri Dec 21 2018 Danilo Cesar Lemes de Paula - 3.1.0-2.el8 +- kvm-pc-7.5-compat-entries.patch [bz#1655820] +- kvm-compat-Generic-HW_COMPAT_RHEL7_6.patch [bz#1655820] +- kvm-pc-PC_RHEL7_6_COMPAT.patch [bz#1655820] +- kvm-pc-Add-compat-for-pc-i440fx-rhel7.6.0-machine-type.patch [bz#1655820] +- kvm-pc-Add-pc-q35-8.0.0-machine-type.patch [bz#1655820] +- kvm-pc-Add-x-migrate-smi-count-off-to-PC_RHEL7_6_COMPAT.patch [bz#1655820] +- kvm-clear-out-KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT-for.patch [bz#1659604] +- kvm-Add-edk2-Requires-to-qemu-kvm.patch [bz#1660208] +- Resolves: bz#1655820 + (Can't migarate between rhel8 and rhel7 when guest has device "video") +- Resolves: bz#1659604 (8->7 migration failed: qemu-kvm: error: failed to set MSR 0x4b564d02 to 0x27fc13285) -- Resolves: bz#1659565 - (machine type: required compat flag x-migrate-smi-count=off) - -* Tue Dec 18 2018 Danilo Cesar Lemes de Paula - 2.12.0-51 -- kvm-Add-edk2-Requires-to-qemu-kvm.patch [bz#1654276] -- Resolves: bz#1654276 +- Resolves: bz#1660208 (qemu-kvm: Should depend on the architecture-appropriate guest firmware) -* Mon Dec 17 2018 Danilo Cesar Lemes de Paula - -- kvm-redhat-enable-tpmdev-passthrough.patch [bz#1654486] -- Resolves: bz#1654486 - ([RFE] enable TPM passthrough at compile time (qemu-kvm)) - -* Fri Dec 14 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-48 -- kvm-redhat-use-autopatch-instead-of-PATCHAPPLY.patch [bz#1613128] -- kvm-redhat-Removing-some-unused-build-flags-in-the-spec-.patch [bz#1613128] -- kvm-redhat-Fixing-rhev-ma-conflicts.patch [bz#1613126] -- kvm-redhat-Remove-_smp_mflags-cleanup-workaround-for-s39.patch [bz#1613128] -- kvm-redhat-Removing-dead-code-from-the-spec-file.patch [bz#1613128] -- kvm-i386-Add-stibp-flag-name.patch [bz#1639446] -- kvm-Add-functional-acceptance-tests-infrastructure.patch [bz#1655807] -- kvm-scripts-qemu.py-allow-adding-to-the-list-of-extra-ar.patch [bz#1655807] -- kvm-Acceptance-tests-add-quick-VNC-tests.patch [bz#1655807] -- kvm-scripts-qemu.py-introduce-set_console-method.patch [bz#1655807] -- kvm-Acceptance-tests-add-Linux-kernel-boot-and-console-c.patch [bz#1655807] -- kvm-Bootstrap-Python-venv-for-tests.patch [bz#1655807] -- kvm-Acceptance-tests-add-make-rule-for-running-them.patch [bz#1655807] -- Resolves: bz#1613126 - (Check and fix qemu-kvm-rhev and qemu-kvm-ma conflicts in qemu-kvm for rhel-8) -- Resolves: bz#1613128 - (Spec file clean up) -- Resolves: bz#1639446 - (Cross migration from RHEL7.5 to RHEL8 shouldn't fail with cpu flag stibp [qemu-kvm]) -- Resolves: bz#1655807 - (Backport avocado-qemu tests for QEMU 2.12) +* Thu Dec 13 2018 Danilo Cesar Lemes de Paula - 3.1.0-1.el8 +- Rebase to qemu-kvm 3.1.0 * Tue Dec 11 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-47 - kvm-Disable-CONFIG_IPMI-and-CONFIG_I2C-for-ppc64.patch [bz#1640044] @@ -3356,7 +3172,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ - Resolves: bz#1640044 (Disable CONFIG_I2C and CONFIG_IPMI in default-configs/ppc64-softmmu.mak) -* Tue Dec 11 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-46 +* Tue Dec 11 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-46 - kvm-qcow2-Give-the-refcount-cache-the-minimum-possible-s.patch [bz#1656507] - kvm-docs-Document-the-new-default-sizes-of-the-qcow2-cac.patch [bz#1656507] - kvm-qcow2-Fix-Coverity-warning-when-calculating-the-refc.patch [bz#1656507] @@ -3409,13 +3225,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ - Resolves: bz#1654651 (Qemu: hw: bt: keep bt/* objects from building [rhel-8.0]) -* Tue Nov 27 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-44 +* Tue Nov 27 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-43 - kvm-block-Make-more-block-drivers-compile-time-configura.patch [bz#1598842 bz#1598842] - kvm-RHEL8-Add-disable-configure-options-to-qemu-spec-fil.patch [bz#1598842] - Resolves: bz#1598842 (Compile out unused block drivers) * Mon Nov 26 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-43 + - kvm-configure-add-test-for-libudev.patch [bz#1636185] - kvm-qga-linux-report-disk-serial-number.patch [bz#1636185] - kvm-qga-linux-return-disk-device-in-guest-get-fsinfo.patch [bz#1636185] @@ -3482,6 +3299,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ (rbd json format of 7.6 is incompatible with 7.5) * Tue Oct 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-40.el8 + - kvm-vnc-call-sasl_server_init-only-when-required.patch [bz#1609327] - kvm-nbd-server-fix-NBD_CMD_CACHE.patch [bz#1636142] - kvm-nbd-fix-NBD_FLAG_SEND_CACHE-value.patch [bz#1636142] @@ -3540,7 +3358,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ - Resolves: bz#1639374 (qemu-img map 'Aborted (core dumped)' when specifying a plain file) -* Tue Oct 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-39.el8 +* Tue Oct 16 2018 Danilo Cesar Lemes de Paula - - kvm-linux-headers-update.patch [bz#1508142] - kvm-s390x-cpumodel-Set-up-CPU-model-for-AP-device-suppor.patch [bz#1508142] - kvm-s390x-kvm-enable-AP-instruction-interpretation-for-g.patch [bz#1508142] @@ -3588,7 +3406,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ - Resolves: bz#1635583 (Quitting VM causes qemu core dump once the block mirror job paused for no enough target space) -* Fri Oct 12 2018 Danilo Cesar Lemes de Paula - 2.12.0-36.el8 +* Fri Oct 12 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-36 - kvm-check-Only-test-ivshm-when-it-is-compiled-in.patch [bz#1621817] - kvm-Disable-ivshmem.patch [bz#1621817] - kvm-mirror-Fail-gracefully-for-source-target.patch [bz#1637963] @@ -3687,14 +3505,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ - Resolves: bz#1615717 (Memory leaks) -* Tue Sep 04 2018 Danilo Cesar Lemes de Paula - 2.12.0-28.el8 -- kvm-e1000e-Do-not-auto-clear-ICR-bits-which-aren-t-set-i.patch [bz#1596024] -- kvm-e1000e-Prevent-MSI-MSI-X-storms.patch [bz#1596024] -- kvm-Drop-build_configure.sh-and-Makefile.local-files.patch [] -- kvm-Fix-subject-line-in-.gitpublish.patch [] -- Resolves: bz#1596024 - (The network link can't be detected on guest when the guest uses e1000e model type) - * Wed Aug 29 2018 Danilo Cesar Lemes de Paula - 2.12.0-27.el8 - kvm-Fix-libusb-1.0.22-deprecated-libusb_set_debug-with-l.patch [bz#1622656] - Resolves: bz#1622656 @@ -4185,15 +3995,15 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ - kvm-Revert-Defining-a-shebang-for-python-scripts.patch [bz#1571533] - kvm-spec-Fix-ambiguous-python-interpreter-name.patch [bz#1571533] - kvm-qemu-ga-blacklisting-guest-exec-and-guest-exec-statu.patch [bz#1518132] -- kvm-redhat-rewrap-build_configure.sh-cmdline-for-the-rh-.patch -- kvm-redhat-remove-the-VTD-LIVE_BLOCK_OPS-and-RHV-options.patch -- kvm-redhat-fix-the-rh-env-prep-target-s-dependency-on-th.patch -- kvm-redhat-remove-dead-code-related-to-s390-not-s390x.patch -- kvm-redhat-sync-compiler-flags-from-the-spec-file-to-rh-.patch -- kvm-redhat-sync-guest-agent-enablement-and-tcmalloc-usag.patch -- kvm-redhat-fix-up-Python-3-dependency-for-building-QEMU.patch -- kvm-redhat-fix-up-Python-dependency-for-SRPM-generation.patch -- kvm-redhat-disable-glusterfs-dependency-support-temporar.patch +- kvm-redhat-rewrap-build_configure.sh-cmdline-for-the-rh-.patch [] +- kvm-redhat-remove-the-VTD-LIVE_BLOCK_OPS-and-RHV-options.patch [] +- kvm-redhat-fix-the-rh-env-prep-target-s-dependency-on-th.patch [] +- kvm-redhat-remove-dead-code-related-to-s390-not-s390x.patch [] +- kvm-redhat-sync-compiler-flags-from-the-spec-file-to-rh-.patch [] +- kvm-redhat-sync-guest-agent-enablement-and-tcmalloc-usag.patch [] +- kvm-redhat-fix-up-Python-3-dependency-for-building-QEMU.patch [] +- kvm-redhat-fix-up-Python-dependency-for-SRPM-generation.patch [] +- kvm-redhat-disable-glusterfs-dependency-support-temporar.patch [] - Resolves: bz#1518132 (Ensure file access RPCs are disabled by default) - Resolves: bz#1571533