diff --git a/.gitignore b/.gitignore index 98eebcb..459c79b 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/qemu-7.0.0.tar.xz +SOURCES/qemu-7.2.0.tar.xz diff --git a/.qemu-kvm.metadata b/.qemu-kvm.metadata index 75efcde..ed38614 100644 --- a/.qemu-kvm.metadata +++ b/.qemu-kvm.metadata @@ -1 +1 @@ -c3fd2403106c33d0470bc9ba4fb4b946c0402248 SOURCES/qemu-7.0.0.tar.xz +634a3e4b381cbf13085eb1568accb85cbd9d89c4 SOURCES/qemu-7.2.0.tar.xz diff --git a/SOURCES/0004-Initial-redhat-build.patch b/SOURCES/0004-Initial-redhat-build.patch index 94cf91c..0f9cc55 100644 --- a/SOURCES/0004-Initial-redhat-build.patch +++ b/SOURCES/0004-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From fc113ecd7c99646a7ced0b99570b5927ae6d595f Mon Sep 17 00:00:00 2001 +From ccc4a5bdc8c2f27678312364a7c12aeafd009bb6 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 26 May 2021 10:56:02 +0200 Subject: Initial redhat build @@ -13,7 +13,7 @@ several issues are fixed in QEMU tree: We disable make check due to issues with some of the tests. -This rebase is based on qemu-kvm-6.2.0-13.el9 +This rebase is based on qemu-kvm-7.1.0-7.el9 Signed-off-by: Miroslav Rezanina -- @@ -50,6 +50,22 @@ Rebase changes (7.0.0): - Change permissions on installing tests/Makefile.include - Remove ssh block driver +Rebase changes (7.1.0 rc0): +- --disable-vnc-png renamed to --disable-png (upstream) +- removed --disable-vhost-vsock and --disable-vhost-scsi +- capstone submodule removed +- Temporary include capstone build + +Rebase changes (7.2.0 rc0): +- Switch --enable-slirp=system to --enable-slirp + +Rebaes changes (7.2.0 rc2): +- Added new configure options (blkio and sndio, both disabled) + +Rebase changes (7.2.0): +- Fix SRPM name generation to work on Fedora 37 +- Switch back to system meson + Merged patches (6.0.0): - 605758c902 Limit build on Power to qemu-img and qemu-ga only @@ -142,46 +158,34 @@ Merged patches (7.0.0): - d46d2710b2 spec: Obsolete old usb redir subpackage - 6f52a50b68 spec: Obsolete ssh driver +Merged patches (7.2.0 rc4): +- 8c6834feb6 Remove opengl display device subpackages (C9S MR 124) +- 0ecc97f29e spec: Add requires for packages with additional virtio-gpu variants (C9S MR 124) + Signed-off-by: Miroslav Rezanina + +fix --- - .distro/85-kvm.preset | 5 - .distro/Makefile | 100 + - .distro/Makefile.common | 40 + + .distro/Makefile.common | 41 + .distro/README.tests | 39 + - .distro/ksm.service | 13 - - .distro/ksm.sysconfig | 4 - - .distro/ksmctl.c | 77 - - .distro/ksmtuned | 139 - - .distro/ksmtuned.conf | 21 - - .distro/ksmtuned.service | 12 - - .distro/kvm-setup | 49 - - .distro/kvm-setup.service | 14 - .distro/modules-load.conf | 4 + .distro/qemu-guest-agent.service | 1 - - .distro/qemu-kvm.spec.template | 4034 +++++++++++++++++++++++ + .distro/qemu-kvm.spec.template | 4315 +++++++++++++++++++++++ .distro/rpminspect.yaml | 6 +- .distro/scripts/extract_build_cmd.py | 12 + + .distro/scripts/process-patches.sh | 4 + .gitignore | 1 + README.systemtap | 43 + - meson.build | 4 +- scripts/qemu-guest-agent/fsfreeze-hook | 2 +- scripts/systemtap/conf.d/qemu_kvm.conf | 4 + scripts/systemtap/script.d/qemu_kvm.stp | 1 + tests/check-block.sh | 2 + ui/vnc-auth-sasl.c | 2 +- - 25 files changed, 4290 insertions(+), 339 deletions(-) - delete mode 100644 .distro/85-kvm.preset + 16 files changed, 4573 insertions(+), 4 deletions(-) create mode 100644 .distro/Makefile create mode 100644 .distro/Makefile.common create mode 100644 .distro/README.tests - delete mode 100644 .distro/ksm.service - delete mode 100644 .distro/ksm.sysconfig - delete mode 100644 .distro/ksmctl.c - delete mode 100644 .distro/ksmtuned - delete mode 100644 .distro/ksmtuned.conf - delete mode 100644 .distro/ksmtuned.service - delete mode 100644 .distro/kvm-setup - delete mode 100644 .distro/kvm-setup.service create mode 100644 .distro/modules-load.conf create mode 100644 .distro/qemu-kvm.spec.template create mode 100644 README.systemtap @@ -237,21 +241,6 @@ index 0000000000..ad913fc990 + +3. Translate the trace record to readable format. + # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log -diff --git a/meson.build b/meson.build -index 861de93c4f..6f7e430f0f 100644 ---- a/meson.build -+++ b/meson.build -@@ -2394,7 +2394,9 @@ if capstone_opt == 'internal' - # Include all configuration defines via a header file, which will wind up - # as a dependency on the object file, and thus changes here will result - # in a rebuild. -- '-include', 'capstone-defs.h' -+ '-include', 'capstone-defs.h', -+ -+ '-Wp,-D_GLIBCXX_ASSERTIONS', - ] - - libcapstone = static_library('capstone', diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook index 13aafd4845..e9b84ec028 100755 --- a/scripts/qemu-guest-agent/fsfreeze-hook @@ -283,11 +272,11 @@ index 0000000000..c04abf9449 @@ -0,0 +1 @@ +probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} diff --git a/tests/check-block.sh b/tests/check-block.sh -index f59496396c..d900d8b35e 100755 +index 5de2c1ba0b..6af743f441 100755 --- a/tests/check-block.sh +++ b/tests/check-block.sh -@@ -48,6 +48,8 @@ if LANG=C bash --version | grep -q 'GNU bash, version [123]' ; then - skip "bash version too old ==> Not running the qemu-iotests." +@@ -22,6 +22,8 @@ if [ -z "$(find . -name 'qemu-system-*' -print)" ]; then + skip "No qemu-system binary available ==> Not running the qemu-iotests." fi +exit 0 diff --git a/SOURCES/0005-Enable-disable-devices-for-RHEL.patch b/SOURCES/0005-Enable-disable-devices-for-RHEL.patch index 1ffbe97..767389f 100644 --- a/SOURCES/0005-Enable-disable-devices-for-RHEL.patch +++ b/SOURCES/0005-Enable-disable-devices-for-RHEL.patch @@ -1,6 +1,6 @@ -From 51ec7495d69fe4b4d0b61642ca6c0e7fd7a1032d Mon Sep 17 00:00:00 2001 +From 90366cd2ead5a5301aaceed56477d2e6d9f1b3cd Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina -Date: Thu, 15 Jul 2021 03:22:36 -0400 +Date: Wed, 7 Dec 2022 03:05:48 -0500 Subject: Enable/disable devices for RHEL This commit adds all changes related to changes in supported devices. @@ -22,6 +22,16 @@ Rebase notes (7.0.0): - Renamed CONFIG_ARM_GIC_TCG to CONFIG_ARM_GICV3_TCG - Removed upstream devices +Rebase notes (7.1.0 rc0): +- Added CONFIG_VHOST_VSOCK and CONFIG_VHOST_USER_VSOCK configs +- Added CONFIG_CXL and CONFIG_CXL_MEM_DEVICE for aarch64 and x86_64 + +Rebase notes (7.1.0 rc3): +- Added CONFIG_VHOST_USER_FS option (all archs) + +Rebase notes (7.2.0 rc20): +- Removed disabling a15mpcore.c as no longer needed + Merged patches (6.1.0): - c51bf45304 Remove SPICE and QXL from x86_64-rh-devices.mak - 02fc745601 aarch64-rh-devices: add CONFIG_PVPANIC_PCI @@ -37,17 +47,22 @@ Merged patches (6.2.0): Merged patches (7.0.0): - fd7c45a5a8 redhat: Enable virtio-mem as tech-preview on x86-64 - c9e68ea451 Enable SGX -- RH Only + +Merged patches (7.1.0 rc0): +- 38b89dc245 pc: Move s3/s4 suspend disabling to compat (only hw/acpi/ich9.c chunk) +- 8f663466c6 configs/devices/aarch64-softmmu: Enable CONFIG_VIRTIO_MEM +- 1bf372717a Enable virtio-iommu-pci on aarch64 +- ae3f269458 Enable virtio-iommu-pci on x86_64 --- .distro/qemu-kvm.spec.template | 18 +-- - .../aarch64-softmmu/aarch64-rh-devices.mak | 34 ++++++ - .../ppc64-softmmu/ppc64-rh-devices.mak | 35 ++++++ + .../aarch64-softmmu/aarch64-rh-devices.mak | 41 +++++++ + .../ppc64-softmmu/ppc64-rh-devices.mak | 37 ++++++ configs/devices/rh-virtio.mak | 10 ++ - .../s390x-softmmu/s390x-rh-devices.mak | 15 +++ - .../x86_64-softmmu/x86_64-rh-devices.mak | 103 ++++++++++++++++++ - hw/acpi/ich9.c | 4 +- + .../s390x-softmmu/s390x-rh-devices.mak | 18 +++ + .../x86_64-softmmu/x86_64-rh-devices.mak | 109 ++++++++++++++++++ hw/arm/meson.build | 2 +- hw/block/fdc.c | 10 ++ - hw/cpu/meson.build | 5 +- + hw/cpu/meson.build | 3 +- hw/display/cirrus_vga.c | 5 +- hw/ide/piix.c | 5 +- hw/input/pckbd.c | 2 + @@ -58,7 +73,7 @@ Merged patches (7.0.0): target/ppc/cpu-models.c | 9 ++ target/s390x/cpu_models_sysemu.c | 3 + target/s390x/kvm/kvm.c | 8 ++ - 20 files changed, 269 insertions(+), 15 deletions(-) + 19 files changed, 283 insertions(+), 13 deletions(-) create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak create mode 100644 configs/devices/rh-virtio.mak @@ -67,10 +82,10 @@ Merged patches (7.0.0): diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak new file mode 100644 -index 0000000000..5f6ee1de5b +index 0000000000..720ec0cb57 --- /dev/null +++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -@@ -0,0 +1,34 @@ +@@ -0,0 +1,41 @@ +include ../rh-virtio.mak + +CONFIG_ARM_GIC_KVM=y @@ -79,6 +94,8 @@ index 0000000000..5f6ee1de5b +CONFIG_ARM_SMMUV3=y +CONFIG_ARM_V7M=y +CONFIG_ARM_VIRT=y ++CONFIG_CXL=y ++CONFIG_CXL_MEM_DEVICE=y +CONFIG_EDID=y +CONFIG_PCIE_PORT=y +CONFIG_PCI_DEVICES=y @@ -95,6 +112,8 @@ index 0000000000..5f6ee1de5b +CONFIG_VFIO_PCI=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_MEM=y ++CONFIG_VIRTIO_IOMMU=y +CONFIG_XIO3130=y +CONFIG_NVDIMM=y +CONFIG_ACPI_APEI=y @@ -105,12 +124,15 @@ index 0000000000..5f6ee1de5b +CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y +CONFIG_PVPANIC_PCI=y +CONFIG_PXB=y ++CONFIG_VHOST_VSOCK=y ++CONFIG_VHOST_USER_VSOCK=y ++CONFIG_VHOST_USER_FS=y diff --git a/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak new file mode 100644 -index 0000000000..6a3e3f0227 +index 0000000000..dbb7d30829 --- /dev/null +++ b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak -@@ -0,0 +1,35 @@ +@@ -0,0 +1,37 @@ +include ../rh-virtio.mak + +CONFIG_DIMM=y @@ -146,6 +168,8 @@ index 0000000000..6a3e3f0227 +CONFIG_TPM=y +CONFIG_TPM_SPAPR=y +CONFIG_TPM_EMULATOR=y ++CONFIG_VHOST_VSOCK=y ++CONFIG_VHOST_USER_VSOCK=y diff --git a/configs/devices/rh-virtio.mak b/configs/devices/rh-virtio.mak new file mode 100644 index 0000000000..94ede1b5f6 @@ -164,10 +188,10 @@ index 0000000000..94ede1b5f6 +CONFIG_VIRTIO_SERIAL=y diff --git a/configs/devices/s390x-softmmu/s390x-rh-devices.mak b/configs/devices/s390x-softmmu/s390x-rh-devices.mak new file mode 100644 -index 0000000000..d3b38312e1 +index 0000000000..69a799adbd --- /dev/null +++ b/configs/devices/s390x-softmmu/s390x-rh-devices.mak -@@ -0,0 +1,15 @@ +@@ -0,0 +1,18 @@ +include ../rh-virtio.mak + +CONFIG_PCI=y @@ -183,12 +207,15 @@ index 0000000000..d3b38312e1 +CONFIG_VHOST_USER=y +CONFIG_VIRTIO_CCW=y +CONFIG_WDT_DIAG288=y ++CONFIG_VHOST_VSOCK=y ++CONFIG_VHOST_USER_VSOCK=y ++CONFIG_VHOST_USER_FS=y diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak new file mode 100644 -index 0000000000..d0c9e66641 +index 0000000000..10cb0a14e0 --- /dev/null +++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -@@ -0,0 +1,103 @@ +@@ -0,0 +1,109 @@ +include ../rh-virtio.mak + +CONFIG_ACPI=y @@ -204,6 +231,8 @@ index 0000000000..d0c9e66641 +CONFIG_APIC=y +CONFIG_APM=y +CONFIG_BOCHS_DISPLAY=y ++CONFIG_CXL=y ++CONFIG_CXL_MEM_DEVICE=y +CONFIG_DIMM=y +CONFIG_E1000E_PCI_EXPRESS=y +CONFIG_E1000_PCI=y @@ -281,6 +310,7 @@ index 0000000000..d0c9e66641 +CONFIG_VIRTIO_MEM=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_VGA=y ++CONFIG_VIRTIO_IOMMU=y +CONFIG_VMMOUSE=y +CONFIG_VMPORT=y +CONFIG_VTD=y @@ -292,26 +322,14 @@ index 0000000000..d0c9e66641 +CONFIG_TPM_TIS_ISA=y +CONFIG_TPM_EMULATOR=y +CONFIG_SGX=y -diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index bd9bbade70..de1e401cdf 100644 ---- a/hw/acpi/ich9.c -+++ b/hw/acpi/ich9.c -@@ -435,8 +435,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) - static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; - pm->acpi_memory_hotplug.is_enabled = true; - pm->cpu_hotplug_legacy = true; -- pm->disable_s3 = 0; -- pm->disable_s4 = 0; -+ pm->disable_s3 = 1; -+ pm->disable_s4 = 1; - pm->s4_val = 2; - pm->use_acpi_hotplug_bridge = true; - pm->keep_pci_slot_hpc = true; ++CONFIG_VHOST_VSOCK=y ++CONFIG_VHOST_USER_VSOCK=y ++CONFIG_VHOST_USER_FS=y diff --git a/hw/arm/meson.build b/hw/arm/meson.build -index 721a8eb8be..87ed4dd914 100644 +index 92f9f6e000..c5e94c997c 100644 --- a/hw/arm/meson.build +++ b/hw/arm/meson.build -@@ -31,7 +31,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) +@@ -30,7 +30,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c')) arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c')) @@ -321,7 +339,7 @@ index 721a8eb8be..87ed4dd914 100644 arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c')) arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c')) diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index 347875a0cd..ca1776121f 100644 +index 64ae4a6899..9b8e782c19 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -49,6 +49,8 @@ @@ -333,7 +351,7 @@ index 347875a0cd..ca1776121f 100644 /********************************************************/ /* debug Floppy devices */ -@@ -2338,6 +2340,14 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) +@@ -2346,6 +2348,14 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) FDrive *drive; static int command_tables_inited = 0; @@ -349,24 +367,21 @@ index 347875a0cd..ca1776121f 100644 error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); return; diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build -index 9e52fee9e7..bb71c9f3e7 100644 +index 9e52fee9e7..87c209a754 100644 --- a/hw/cpu/meson.build +++ b/hw/cpu/meson.build -@@ -1,6 +1,7 @@ +@@ -1,4 +1,5 @@ -softmmu_ss.add(files('core.c', 'cluster.c')) +#softmmu_ss.add(files('core.c', 'cluster.c')) +softmmu_ss.add(files('core.c')) specific_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) specific_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) - specific_ss.add(when: 'CONFIG_A9MPCORE', if_true: files('a9mpcore.c')) --specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) -+#specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index 3bb6a58698..6447fdb02e 100644 +index 6e8c747c46..1948ebee8e 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c -@@ -2945,7 +2945,10 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) +@@ -2946,7 +2946,10 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); int16_t device_id = pc->device_id; @@ -379,10 +394,10 @@ index 3bb6a58698..6447fdb02e 100644 * Also accept 8 MB/16 MB for backward compatibility. */ diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index ce89fd0aa3..fbcf802b13 100644 +index 267dbf37db..87fcda4062 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c -@@ -232,7 +232,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) +@@ -199,7 +199,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); @@ -392,7 +407,7 @@ index ce89fd0aa3..fbcf802b13 100644 } static const TypeInfo piix3_ide_info = { -@@ -261,6 +262,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) +@@ -222,6 +223,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); dc->hotpluggable = false; @@ -402,12 +417,12 @@ index ce89fd0aa3..fbcf802b13 100644 static const TypeInfo piix4_ide_info = { diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c -index 4efdf75620..5143ebaa27 100644 +index b92b63bedc..3b6235dde6 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c -@@ -814,6 +814,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) +@@ -957,6 +957,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) dc->vmsd = &vmstate_kbd_isa; - isa->build_aml = i8042_build_aml; + adevc->build_dev_aml = i8042_build_aml; set_bit(DEVICE_CATEGORY_INPUT, dc->categories); + /* Disabled for Red Hat Enterprise Linux: */ + dc->user_creatable = false; @@ -415,10 +430,10 @@ index 4efdf75620..5143ebaa27 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index f5bc81296d..282d01e374 100644 +index e26e0a64c1..41492fae79 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c -@@ -1821,6 +1821,7 @@ static const E1000Info e1000_devices[] = { +@@ -1824,6 +1824,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -426,7 +441,7 @@ index f5bc81296d..282d01e374 100644 { .name = "e1000-82544gc", .device_id = E1000_DEV_ID_82544GC_COPPER, -@@ -1833,6 +1834,7 @@ static const E1000Info e1000_devices[] = { +@@ -1836,6 +1837,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -452,7 +467,7 @@ index 8a4861f45a..fcb5dfe792 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), diff --git a/hw/usb/meson.build b/hw/usb/meson.build -index de853d780d..0776ae6a20 100644 +index 793df42e21..cd3c305471 100644 --- a/hw/usb/meson.build +++ b/hw/usb/meson.build @@ -52,7 +52,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade @@ -465,10 +480,10 @@ index de853d780d..0776ae6a20 100644 endif diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index 13d0e9b195..3826fa5122 100644 +index 9a2cef7d05..a528ff9a3d 100644 --- a/target/arm/cpu_tcg.c +++ b/target/arm/cpu_tcg.c -@@ -22,6 +22,7 @@ +@@ -151,6 +151,7 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) /* CPU models. These are not needed for the AArch64 linux-user build. */ #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) @@ -476,31 +491,31 @@ index 13d0e9b195..3826fa5122 100644 #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { -@@ -375,6 +376,7 @@ static void cortex_a9_initfn(Object *obj) - cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ +@@ -504,6 +505,7 @@ static void cortex_a9_initfn(Object *obj) + cpu->isar.reset_pmcr_el0 = 0x41093000; define_arm_cp_regs(cpu, cortexa9_cp_reginfo); } +#endif /* disabled for RHEL */ #ifndef CONFIG_USER_ONLY static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) -@@ -400,6 +402,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { - REGINFO_SENTINEL +@@ -528,6 +530,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, }; +#if 0 /* Disabled for Red Hat Enterprise Linux */ static void cortex_a7_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -445,6 +448,7 @@ static void cortex_a7_initfn(Object *obj) - cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */ +@@ -576,6 +579,7 @@ static void cortex_a7_initfn(Object *obj) + cpu->isar.reset_pmcr_el0 = 0x41072000; define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ } +#endif /* disabled for RHEL */ static void cortex_a15_initfn(Object *obj) { -@@ -488,6 +492,7 @@ static void cortex_a15_initfn(Object *obj) +@@ -624,6 +628,7 @@ static void cortex_a15_initfn(Object *obj) define_arm_cp_regs(cpu, cortexa15_cp_reginfo); } @@ -508,7 +523,7 @@ index 13d0e9b195..3826fa5122 100644 static void cortex_m0_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -928,6 +933,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) +@@ -1065,6 +1070,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) cc->gdb_core_xml_file = "arm-m-profile.xml"; } @@ -516,7 +531,7 @@ index 13d0e9b195..3826fa5122 100644 #ifndef TARGET_AARCH64 /* -@@ -1007,6 +1013,7 @@ static void arm_max_initfn(Object *obj) +@@ -1132,6 +1138,7 @@ static void arm_max_initfn(Object *obj) #endif /* !TARGET_AARCH64 */ static const ARMCPUInfo arm_tcg_cpus[] = { @@ -524,7 +539,7 @@ index 13d0e9b195..3826fa5122 100644 { .name = "arm926", .initfn = arm926_initfn }, { .name = "arm946", .initfn = arm946_initfn }, { .name = "arm1026", .initfn = arm1026_initfn }, -@@ -1022,7 +1029,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { +@@ -1147,7 +1154,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "cortex-a7", .initfn = cortex_a7_initfn }, { .name = "cortex-a8", .initfn = cortex_a8_initfn }, { .name = "cortex-a9", .initfn = cortex_a9_initfn }, @@ -534,7 +549,7 @@ index 13d0e9b195..3826fa5122 100644 { .name = "cortex-m0", .initfn = cortex_m0_initfn, .class_init = arm_v7m_class_init }, { .name = "cortex-m3", .initfn = cortex_m3_initfn, -@@ -1053,6 +1062,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { +@@ -1178,6 +1187,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, @@ -543,7 +558,7 @@ index 13d0e9b195..3826fa5122 100644 { .name = "max", .initfn = arm_max_initfn }, #endif diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 976be5e0d1..dd78883410 100644 +index 912b037c63..cd3ff700ac 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -66,6 +66,7 @@ @@ -573,7 +588,7 @@ index 976be5e0d1..dd78883410 100644 POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, "POWER7 v2.3") POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, -@@ -897,12 +901,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -896,12 +900,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "7447a", "7447a_v1.2" }, { "7457a", "7457a_v1.2" }, { "apollo7pm", "7457a_v1.0" }, @@ -589,7 +604,7 @@ index 976be5e0d1..dd78883410 100644 { "power7", "power7_v2.3" }, { "power7+", "power7+_v2.1" }, { "power8e", "power8e_v2.1" }, -@@ -912,6 +919,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -911,12 +918,14 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "power10", "power10_v2.0" }, #endif @@ -597,18 +612,18 @@ index 976be5e0d1..dd78883410 100644 /* Generic PowerPCs */ #if defined(TARGET_PPC64) { "ppc64", "970fx_v3.1" }, -@@ -919,5 +927,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + #endif { "ppc32", "604" }, { "ppc", "604" }, - { "default", "604" }, +#endif + { NULL, NULL } }; diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c -index 05c3ccaaff..6a04ccab1b 100644 +index d8a141a023..d086b1c39c 100644 --- a/target/s390x/cpu_models_sysemu.c +++ b/target/s390x/cpu_models_sysemu.c -@@ -36,6 +36,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, +@@ -35,6 +35,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, (max_model->def->gen == model->def->gen && max_model->def->ec_ga < model->def->ec_ga)) { list_add_feat("type", unavailable); @@ -619,10 +634,10 @@ index 05c3ccaaff..6a04ccab1b 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c -index 6acf14d5ec..74f089d87f 100644 +index 3ac7ec9acf..97da1a6424 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c -@@ -2512,6 +2512,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +@@ -2529,6 +2529,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) error_setg(errp, "KVM doesn't support CPU models"); return; } diff --git a/SOURCES/0006-Machine-type-related-general-changes.patch b/SOURCES/0006-Machine-type-related-general-changes.patch index c3b08a4..fc2a89d 100644 --- a/SOURCES/0006-Machine-type-related-general-changes.patch +++ b/SOURCES/0006-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From a525db3951dc68c469d1f51bdc69ab6e75e72c37 Mon Sep 17 00:00:00 2001 +From 0208f38671b9de4036c0d56142a7f22e5091bae0 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -19,6 +19,10 @@ Rebase notes (7.0.0): - Remove downstream changes leftovers in hw/rtc/mc146818rtc.c - Remove unnecessary change in hw/usb/hcd-uhci.c +Rebase notes (7.1.0 rc0): +- Moved adding rhel_old_machine_deprecation variable from s390x to general machine types commit +- Moved adding hw_compat_rhel_8_6 struct from x86_64 to general machine types commit + Merged patches (6.1.0): - f2fb42a3c6 redhat: add missing entries in hw_compat_rhel_8_4 - 1949ec258e hw/arm/virt: Disable PL011 clock migration through hw_compat_rhel_8_3 @@ -35,28 +39,35 @@ Merged patches (6.2.0): Merged patches (7.0.0): - ef5afcc86d Fix virtio-net-pci* "vectors" compat - 168f0d56e3 compat: Update hw_compat_rhel_8_5 with 6.2.0 RC2 changes + +Merged patches (7.1.0 rc0): +- 38b89dc245 pc: Move s3/s4 suspend disabling to compat (only hw/acpi/piix4.c chunk) +- 1d6439527a WRB: Introduce RHEL 9.0.0 hw compat structure (only hw/core/machine.c and include/hw/boards.h chunk) + +Merged patches (7.2.0 rc0): +- 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) --- - hw/acpi/piix4.c | 6 +- + hw/acpi/piix4.c | 2 +- hw/arm/virt.c | 2 +- - hw/core/machine.c | 186 +++++++++++++++++++++++++++++++++++ + hw/core/machine.c | 222 +++++++++++++++++++++++++++++++++++ hw/display/vga-isa.c | 2 +- hw/i386/pc_piix.c | 2 + hw/i386/pc_q35.c | 2 + hw/net/rtl8139.c | 4 +- - hw/smbios/smbios.c | 46 ++++++++- + hw/smbios/smbios.c | 46 +++++++- hw/timer/i8254_common.c | 2 +- - hw/usb/hcd-xhci-pci.c | 59 ++++++++--- + hw/usb/hcd-xhci-pci.c | 59 +++++++--- hw/usb/hcd-xhci-pci.h | 1 + - include/hw/boards.h | 21 ++++ + include/hw/boards.h | 31 +++++ include/hw/firmware/smbios.h | 5 +- include/hw/i386/pc.h | 3 + - 14 files changed, 316 insertions(+), 25 deletions(-) + 14 files changed, 360 insertions(+), 23 deletions(-) diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index fe5625d07a..28544e78c3 100644 +index 0a81f1ad93..dbfb362a8f 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c -@@ -287,7 +287,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) +@@ -248,7 +248,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) static const VMStateDescription vmstate_acpi = { .name = "piix4_pm", .version_id = 3, @@ -65,22 +76,11 @@ index fe5625d07a..28544e78c3 100644 .post_load = vmstate_acpi_post_load, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), -@@ -653,8 +653,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) - - static Property piix4_pm_properties[] = { - DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), -- DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0), -- DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0), -+ DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 1), -+ DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1), - DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), - DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState, - use_acpi_hotplug_bridge, true), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index d2e5ecd234..6a84031fd7 100644 +index b871350856..d633300fdc 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -1596,7 +1596,7 @@ static void virt_build_smbios(VirtMachineState *vms) +@@ -1619,7 +1619,7 @@ static void virt_build_smbios(VirtMachineState *vms) smbios_set_defaults("QEMU", product, vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, @@ -90,14 +90,50 @@ index d2e5ecd234..6a84031fd7 100644 smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len, diff --git a/hw/core/machine.c b/hw/core/machine.c -index 1e23fdc14b..ea430d844e 100644 +index 8d34caa31d..9edec1ca05 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -37,6 +37,192 @@ - #include "hw/virtio/virtio.h" +@@ -40,6 +40,228 @@ #include "hw/virtio/virtio-pci.h" + #include "qom/object_interfaces.h" +/* ++ * RHEL only: machine types for previous major releases are deprecated ++ */ ++const char *rhel_old_machine_deprecation = ++ "machine types for previous major releases are deprecated"; ++ ++/* ++ * Mostly the same as hw_compat_7_0 ++ */ ++GlobalProperty hw_compat_rhel_9_1[] = { ++ /* hw_compat_rhel_9_1 from hw_compat_7_0 */ ++ { "arm-gicv3-common", "force-8-bit-prio", "on" }, ++ /* hw_compat_rhel_9_1 from hw_compat_7_0 */ ++ { "nvme-ns", "eui64-default", "on"}, ++}; ++const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1); ++ ++/* ++ * Mostly the same as hw_compat_6_2 ++ */ ++GlobalProperty hw_compat_rhel_9_0[] = { ++ /* hw_compat_rhel_9_0 from hw_compat_6_2 */ ++ { "PIIX4_PM", "x-not-migrate-acpi-index", "on"}, ++}; ++const size_t hw_compat_rhel_9_0_len = G_N_ELEMENTS(hw_compat_rhel_9_0); ++ ++GlobalProperty hw_compat_rhel_8_6[] = { ++ /* hw_compat_rhel_8_6 bz 2065589 */ ++ /* ++ * vhost-vsock device in RHEL 8 kernels doesn't support seqpacket, so ++ * we need do disable it downstream on the latest hw_compat_rhel_8. ++ */ ++ { "vhost-vsock-device", "seqpacket", "off" }, ++}; ++const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6); ++ ++/* + * Mostly the same as hw_compat_6_0 and hw_compat_6_1 + */ +GlobalProperty hw_compat_rhel_8_5[] = { @@ -283,14 +319,14 @@ index 1e23fdc14b..ea430d844e 100644 +}; +const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); + - GlobalProperty hw_compat_6_2[] = { - { "PIIX4_PM", "x-not-migrate-acpi-index", "on"}, + GlobalProperty hw_compat_7_1[] = { + { "virtio-device", "queue_reset", "false" }, }; diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c -index 46abbc5653..505467059b 100644 +index 2a5437d803..0db2c2b2a1 100644 --- a/hw/display/vga-isa.c +++ b/hw/display/vga-isa.c -@@ -88,7 +88,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) +@@ -89,7 +89,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) } static Property vga_isa_properties[] = { @@ -300,10 +336,10 @@ index 46abbc5653..505467059b 100644 }; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index b72c03d0a6..c797e98312 100644 +index 0ad0ed1603..0985ff67d2 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -177,6 +177,8 @@ static void pc_init1(MachineState *machine, +@@ -187,6 +187,8 @@ static void pc_init1(MachineState *machine, smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, @@ -313,10 +349,10 @@ index b72c03d0a6..c797e98312 100644 } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 1780f79bc1..b695f88c45 100644 +index a496bd6e74..ea582254e3 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -200,6 +200,8 @@ static void pc_q35_init(MachineState *machine) +@@ -201,6 +201,8 @@ static void pc_q35_init(MachineState *machine) smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, @@ -326,10 +362,10 @@ index 1780f79bc1..b695f88c45 100644 } diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 6b65823b4b..75dacabc43 100644 +index 700b1b66b6..13693aeb4f 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c -@@ -3179,7 +3179,7 @@ static int rtl8139_pre_save(void *opaque) +@@ -3178,7 +3178,7 @@ static int rtl8139_pre_save(void *opaque) static const VMStateDescription vmstate_rtl8139 = { .name = "rtl8139", @@ -338,7 +374,7 @@ index 6b65823b4b..75dacabc43 100644 .minimum_version_id = 3, .post_load = rtl8139_post_load, .pre_save = rtl8139_pre_save, -@@ -3260,7 +3260,9 @@ static const VMStateDescription vmstate_rtl8139 = { +@@ -3259,7 +3259,9 @@ static const VMStateDescription vmstate_rtl8139 = { VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State), VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State), VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State), @@ -349,7 +385,7 @@ index 6b65823b4b..75dacabc43 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 60349ee402..0edcc98434 100644 +index b4243de735..c5ad69237e 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -57,6 +57,9 @@ static bool smbios_legacy = true; @@ -362,7 +398,7 @@ index 60349ee402..0edcc98434 100644 uint8_t *smbios_tables; size_t smbios_tables_len; -@@ -639,7 +642,7 @@ static void smbios_build_type_1_table(void) +@@ -669,7 +672,7 @@ static void smbios_build_type_1_table(void) static void smbios_build_type_2_table(void) { @@ -371,7 +407,7 @@ index 60349ee402..0edcc98434 100644 SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); SMBIOS_TABLE_SET_STR(2, product_str, type2.product); -@@ -914,7 +917,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) +@@ -977,7 +980,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) void smbios_set_defaults(const char *manufacturer, const char *product, const char *version, bool legacy_mode, @@ -383,7 +419,7 @@ index 60349ee402..0edcc98434 100644 { smbios_have_defaults = true; smbios_legacy = legacy_mode; -@@ -935,11 +941,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, +@@ -998,11 +1004,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, g_free(smbios_entries); } @@ -444,7 +480,7 @@ index 050875b497..32935da46c 100644 vmstate_pit_channel, PITChannelState), VMSTATE_INT64(channels[0].next_transition_time, diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c -index e934b1a5b1..e18b05e528 100644 +index 643d4643e4..529bad9366 100644 --- a/hw/usb/hcd-xhci-pci.c +++ b/hw/usb/hcd-xhci-pci.c @@ -104,6 +104,33 @@ static int xhci_pci_vmstate_post_load(void *opaque, int version_id) @@ -555,13 +591,22 @@ index c193f79443..086a1feb1e 100644 #endif diff --git a/include/hw/boards.h b/include/hw/boards.h -index c92ac8815c..c90a19b4d1 100644 +index 90f1dd3aeb..2209d4e416 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -449,4 +449,25 @@ extern const size_t hw_compat_2_2_len; +@@ -454,4 +454,35 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; ++extern GlobalProperty hw_compat_rhel_9_1[]; ++extern const size_t hw_compat_rhel_9_1_len; ++ ++extern GlobalProperty hw_compat_rhel_9_0[]; ++extern const size_t hw_compat_rhel_9_0_len; ++ ++extern GlobalProperty hw_compat_rhel_8_6[]; ++extern const size_t hw_compat_rhel_8_6_len; ++ +extern GlobalProperty hw_compat_rhel_8_5[]; +extern const size_t hw_compat_rhel_8_5_len; + @@ -583,12 +628,13 @@ index c92ac8815c..c90a19b4d1 100644 +extern GlobalProperty hw_compat_rhel_7_6[]; +extern const size_t hw_compat_rhel_7_6_len; + ++extern const char *rhel_old_machine_deprecation; #endif diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index 4b7ad77a44..9acff96a86 100644 +index 7f3259a630..d24b3ccd32 100644 --- a/include/hw/firmware/smbios.h +++ b/include/hw/firmware/smbios.h -@@ -272,7 +272,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); +@@ -294,7 +294,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); void smbios_set_cpuid(uint32_t version, uint32_t features); void smbios_set_defaults(const char *manufacturer, const char *product, const char *version, bool legacy_mode, @@ -601,10 +647,10 @@ index 4b7ad77a44..9acff96a86 100644 void smbios_get_tables(MachineState *ms, const struct smbios_phys_mem_area *mem_array, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 1a27de9c8b..91331059d9 100644 +index c95333514e..3754eaa97d 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -113,6 +113,9 @@ struct PCMachineClass { +@@ -112,6 +112,9 @@ struct PCMachineClass { bool smbios_defaults; bool smbios_legacy_mode; bool smbios_uuid_encoded; diff --git a/SOURCES/0007-Add-aarch64-machine-types.patch b/SOURCES/0007-Add-aarch64-machine-types.patch index 3c44b11..06611e7 100644 --- a/SOURCES/0007-Add-aarch64-machine-types.patch +++ b/SOURCES/0007-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From 697aaa43e3c0f20fc312f06be6c1093f1ba907e1 Mon Sep 17 00:00:00 2001 +From 8501581c99760ed8a800d0c98eeb17a4bf450366 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -17,6 +17,15 @@ Rebase notes (7.0.0): - Added dtb-kaslr-seed option - Set no_tcg_lpa2 to true +Rebase notes (7.1.0 rc0): +- replace dtb_kaslr_seed by dtb_randomness + +Rebase notes (7.1.0 rc3): +- Updated dtb_randomness comment + +Rebase notes (7.2.0 rc0): +- Disabled cortex-a35 + Merged patches (6.2.0): - 9a3d4fde0e hw/arm/virt: Remove 9.0 machine type - f7d04d6695 hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type @@ -29,13 +38,31 @@ Merged patches (7.0.0): - a1d1b6eeb6 hw/arm/virt: Expose the 'RAS' option - 47f8fe1b82 hw/arm/virt: Add 9.0 machine type and remove 8.5 one - ed2346788f hw/arm/virt: Check no_tcg_its and minor style changes + +Merged patches (7.0.0): +- f79b31bdef hw/arm/virt: Remove the dtb-kaslr-seed machine option +- b6fca85f4a hw/arm/virt: Fix missing initialization in instance/class_init() + +Merged patches (7.1.0 rc0): +- ac97dd4f9f RHEL-only: AArch64: Drop unsupported CPU types +- e9c0a70664 target/arm: deprecate named CPU models + +Merged patches (7.2.0 rc0): +- 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) --- - hw/arm/virt.c | 234 +++++++++++++++++++++++++++++++++++++++++- - include/hw/arm/virt.h | 8 ++ - 2 files changed, 241 insertions(+), 1 deletion(-) + hw/arm/virt.c | 237 ++++++++++++++++++++++++++++++++- + include/hw/arm/virt.h | 8 ++ + target/arm/cpu-qom.h | 1 + + target/arm/cpu.c | 5 + + target/arm/cpu.h | 2 + + target/arm/cpu64.c | 16 ++- + target/arm/cpu_tcg.c | 12 +- + target/arm/helper.c | 2 + + tests/qtest/arm-cpu-features.c | 6 + + 9 files changed, 277 insertions(+), 12 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 6a84031fd7..e06862d22a 100644 +index d633300fdc..dfcab40a73 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -80,6 +80,7 @@ @@ -96,7 +123,27 @@ index 6a84031fd7..e06862d22a 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -2250,6 +2292,7 @@ static void machvirt_init(MachineState *machine) +@@ -197,15 +239,19 @@ static const int a15irqmap[] = { + }; + + static const char *valid_cpus[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + ARM_CPU_TYPE_NAME("cortex-a7"), + ARM_CPU_TYPE_NAME("cortex-a15"), + ARM_CPU_TYPE_NAME("cortex-a35"), + ARM_CPU_TYPE_NAME("cortex-a53"), ++#endif /* disabled for RHEL */ + ARM_CPU_TYPE_NAME("cortex-a57"), ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + ARM_CPU_TYPE_NAME("cortex-a72"), + ARM_CPU_TYPE_NAME("cortex-a76"), + ARM_CPU_TYPE_NAME("a64fx"), + ARM_CPU_TYPE_NAME("neoverse-n1"), ++#endif /* disabled for RHEL */ + ARM_CPU_TYPE_NAME("host"), + ARM_CPU_TYPE_NAME("max"), + }; +@@ -2290,6 +2336,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -104,7 +151,7 @@ index 6a84031fd7..e06862d22a 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2277,6 +2320,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -2317,6 +2364,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -112,7 +159,23 @@ index 6a84031fd7..e06862d22a 100644 static bool virt_get_highmem(Object *obj, Error **errp) { -@@ -2402,6 +2446,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) +@@ -2346,6 +2394,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) + vms->its = value; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_dtb_randomness(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2359,6 +2408,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) + + vms->dtb_randomness = value; + } ++#endif /* disabled for RHEL */ + + static char *virt_get_oem_id(Object *obj, Error **errp) + { +@@ -2442,6 +2492,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) vms->ras = value; } @@ -120,7 +183,7 @@ index 6a84031fd7..e06862d22a 100644 static bool virt_get_mte(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2415,6 +2460,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) +@@ -2455,6 +2506,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) vms->mte = value; } @@ -128,7 +191,7 @@ index 6a84031fd7..e06862d22a 100644 static char *virt_get_gic_version(Object *obj, Error **errp) { -@@ -2818,6 +2864,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) +@@ -2886,6 +2938,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) return fixed_ipa ? 0 : requested_pa_size; } @@ -136,7 +199,7 @@ index 6a84031fd7..e06862d22a 100644 static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -3206,3 +3253,188 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -3294,3 +3347,185 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -171,6 +234,7 @@ index 6a84031fd7..e06862d22a 100644 + hc->unplug_request = virt_machine_device_unplug_request_cb; + hc->unplug = virt_machine_device_unplug_cb; + mc->nvdimm_supported = true; ++ mc->smp_props.clusters_supported = true; + mc->auto_enable_numa_with_memhp = true; + mc->auto_enable_numa_with_memdev = true; + mc->default_ram_id = "mach-virt.ram"; @@ -233,13 +297,6 @@ index 6a84031fd7..e06862d22a 100644 + "Override the default value of field OEM Table ID " + "in ACPI table header." + "The string may be up to 8 bytes in size"); -+ -+ object_class_property_add_bool(oc, "dtb-kaslr-seed", -+ virt_get_dtb_kaslr_seed, -+ virt_set_dtb_kaslr_seed); -+ object_class_property_set_description(oc, "dtb-kaslr-seed", -+ "Set off to disable passing of kaslr-seed " -+ "dtb node to guest"); +} + +static void rhel_virt_instance_init(Object *obj) @@ -258,6 +315,8 @@ index 6a84031fd7..e06862d22a 100644 + vms->gic_version = VIRT_GIC_VERSION_NOSEL; + + vms->highmem_ecam = !vmc->no_highmem_ecam; ++ vms->highmem_mmio = true; ++ vms->highmem_redists = true; + + if (vmc->no_its) { + vms->its = false; @@ -284,8 +343,8 @@ index 6a84031fd7..e06862d22a 100644 + /* MTE is disabled by default and non-configurable for RHEL */ + vms->mte = false; + -+ /* Supply a kaslr-seed by default */ -+ vms->dtb_kaslr_seed = true; ++ /* Supply kaslr-seed and rng-seed by default, non-configurable for RHEL */ ++ vms->dtb_randomness = true; + + vms->irqmap = a15irqmap; + @@ -320,16 +379,17 @@ index 6a84031fd7..e06862d22a 100644 + VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); + + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); + + /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ + vmc->no_tcg_lpa2 = true; +} +DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 7e76ee2619..9b1efe8f0e 100644 +index 6ec479ca2b..22b54ec510 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -179,9 +179,17 @@ struct VirtMachineState { +@@ -180,9 +180,17 @@ struct VirtMachineState { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) @@ -347,6 +407,269 @@ index 7e76ee2619..9b1efe8f0e 100644 void virt_acpi_setup(VirtMachineState *vms); bool virt_is_acpi_enabled(VirtMachineState *vms); +diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h +index 64c44cef2d..82e97249bc 100644 +--- a/target/arm/cpu-qom.h ++++ b/target/arm/cpu-qom.h +@@ -35,6 +35,7 @@ typedef struct ARMCPUInfo { + const char *name; + void (*initfn)(Object *obj); + void (*class_init)(ObjectClass *oc, void *data); ++ const char *deprecation_note; + } ARMCPUInfo; + + void arm_cpu_register(const ARMCPUInfo *info); +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 38d066c294..a845814bfb 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2250,8 +2250,13 @@ static void arm_cpu_instance_init(Object *obj) + static void cpu_register_class_init(ObjectClass *oc, void *data) + { + ARMCPUClass *acc = ARM_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); + + acc->info = data; ++ ++ if (acc->info->deprecation_note) { ++ cc->deprecation_note = acc->info->deprecation_note; ++ } + } + + void arm_cpu_register(const ARMCPUInfo *info) +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 9aeed3c848..f9f504d89e 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -34,6 +34,8 @@ + #define KVM_HAVE_MCE_INJECTION 1 + #endif + ++#define RHEL_CPU_DEPRECATION "use 'host' / 'max'" ++ + #define EXCP_UDEF 1 /* undefined instruction */ + #define EXCP_SWI 2 /* software interrupt */ + #define EXCP_PREFETCH_ABORT 3 +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index 3d74f134f5..4b330a52b5 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -36,6 +36,7 @@ + #include "hw/qdev-properties.h" + #include "internals.h" + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void aarch64_a35_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -115,6 +116,7 @@ static void aarch64_a35_initfn(Object *obj) + /* These values are the same with A53/A57/A72. */ + define_cortex_a72_a57_a53_cp_reginfo(cpu); + } ++#endif /* disabled for RHEL */ + + void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) + { +@@ -735,6 +737,7 @@ static void aarch64_a57_initfn(Object *obj) + define_cortex_a72_a57_a53_cp_reginfo(cpu); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void aarch64_a53_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -1033,6 +1036,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj) + /* From D5.1 AArch64 PMU register summary */ + cpu->isar.reset_pmcr_el0 = 0x410c3000; + } ++#endif /* disabled for RHEL */ + + static void aarch64_host_initfn(Object *obj) + { +@@ -1240,13 +1244,18 @@ static void aarch64_max_initfn(Object *obj) + } + + static const ARMCPUInfo aarch64_cpus[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-a35", .initfn = aarch64_a35_initfn }, +- { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, ++#endif /* disabled for RHEL */ ++ { .name = "cortex-a57", .initfn = aarch64_a57_initfn, ++ .deprecation_note = RHEL_CPU_DEPRECATION }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, + { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, + { .name = "cortex-a76", .initfn = aarch64_a76_initfn }, + { .name = "a64fx", .initfn = aarch64_a64fx_initfn }, + { .name = "neoverse-n1", .initfn = aarch64_neoverse_n1_initfn }, ++#endif /* disabled for RHEL */ + { .name = "max", .initfn = aarch64_max_initfn }, + #if defined(CONFIG_KVM) || defined(CONFIG_HVF) + { .name = "host", .initfn = aarch64_host_initfn }, +@@ -1318,8 +1327,13 @@ static void aarch64_cpu_instance_init(Object *obj) + static void cpu_register_class_init(ObjectClass *oc, void *data) + { + ARMCPUClass *acc = ARM_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); + + acc->info = data; ++ ++ if (acc->info->deprecation_note) { ++ cc->deprecation_note = acc->info->deprecation_note; ++ } + } + + void aarch64_cpu_register(const ARMCPUInfo *info) +diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c +index a528ff9a3d..053f70e399 100644 +--- a/target/arm/cpu_tcg.c ++++ b/target/arm/cpu_tcg.c +@@ -148,10 +148,10 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) + } + #endif /* !CONFIG_USER_ONLY */ + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* CPU models. These are not needed for the AArch64 linux-user build. */ + #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) + static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) + { +@@ -505,7 +505,6 @@ static void cortex_a9_initfn(Object *obj) + cpu->isar.reset_pmcr_el0 = 0x41093000; + define_arm_cp_regs(cpu, cortexa9_cp_reginfo); + } +-#endif /* disabled for RHEL */ + + #ifndef CONFIG_USER_ONLY + static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) +@@ -530,7 +529,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + }; + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_a7_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -579,7 +577,6 @@ static void cortex_a7_initfn(Object *obj) + cpu->isar.reset_pmcr_el0 = 0x41072000; + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ + } +-#endif /* disabled for RHEL */ + + static void cortex_a15_initfn(Object *obj) + { +@@ -628,7 +625,6 @@ static void cortex_a15_initfn(Object *obj) + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_m0_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -1070,7 +1066,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) + + cc->gdb_core_xml_file = "arm-m-profile.xml"; + } +-#endif /* disabled for RHEL */ + + #ifndef TARGET_AARCH64 + /* +@@ -1138,7 +1133,6 @@ static void arm_max_initfn(Object *obj) + #endif /* !TARGET_AARCH64 */ + + static const ARMCPUInfo arm_tcg_cpus[] = { +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "arm926", .initfn = arm926_initfn }, + { .name = "arm946", .initfn = arm946_initfn }, + { .name = "arm1026", .initfn = arm1026_initfn }, +@@ -1154,9 +1148,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "cortex-a7", .initfn = cortex_a7_initfn }, + { .name = "cortex-a8", .initfn = cortex_a8_initfn }, + { .name = "cortex-a9", .initfn = cortex_a9_initfn }, +-#endif /* disabled for RHEL */ + { .name = "cortex-a15", .initfn = cortex_a15_initfn }, +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-m0", .initfn = cortex_m0_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-m3", .initfn = cortex_m3_initfn, +@@ -1187,7 +1179,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, + { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, + { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, +-#endif /* disabled for RHEL */ + #ifndef TARGET_AARCH64 + { .name = "max", .initfn = arm_max_initfn }, + #endif +@@ -1215,3 +1206,4 @@ static void arm_tcg_cpu_register_types(void) + type_init(arm_tcg_cpu_register_types) + + #endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */ ++#endif /* disabled for RHEL */ +diff --git a/target/arm/helper.c b/target/arm/helper.c +index d8c8223ec3..ad9d235773 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -8476,6 +8476,7 @@ void arm_cpu_list(void) + static void arm_cpu_add_definition(gpointer data, gpointer user_data) + { + ObjectClass *oc = data; ++ CPUClass *cc = CPU_CLASS(oc); + CpuDefinitionInfoList **cpu_list = user_data; + CpuDefinitionInfo *info; + const char *typename; +@@ -8485,6 +8486,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) + info->name = g_strndup(typename, + strlen(typename) - strlen("-" TYPE_ARM_CPU)); + info->q_typename = g_strdup(typename); ++ info->deprecated = !!cc->deprecation_note; + + QAPI_LIST_PREPEND(*cpu_list, info); + } +diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c +index 5a14527386..a3579fc303 100644 +--- a/tests/qtest/arm-cpu-features.c ++++ b/tests/qtest/arm-cpu-features.c +@@ -440,8 +440,10 @@ static void test_query_cpu_model_expansion(const void *data) + assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); + + /* Test expected feature presence/absence for some cpu types */ ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + assert_has_feature_enabled(qts, "cortex-a15", "pmu"); + assert_has_not_feature(qts, "cortex-a15", "aarch64"); ++#endif /* disabled for RHEL */ + + /* Enabling and disabling pmu should always work. */ + assert_has_feature_enabled(qts, "max", "pmu"); +@@ -458,6 +460,7 @@ static void test_query_cpu_model_expansion(const void *data) + assert_has_feature_enabled(qts, "cortex-a57", "pmu"); + assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + assert_has_feature_enabled(qts, "a64fx", "pmu"); + assert_has_feature_enabled(qts, "a64fx", "aarch64"); + /* +@@ -470,6 +473,7 @@ static void test_query_cpu_model_expansion(const void *data) + "{ 'sve384': true }"); + assert_error(qts, "a64fx", "cannot enable sve640", + "{ 'sve640': true }"); ++#endif /* disabled for RHEL */ + + sve_tests_default(qts, "max"); + pauth_tests_default(qts, "max"); +@@ -505,9 +509,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data) + QDict *resp; + char *error; + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + assert_error(qts, "cortex-a15", + "We cannot guarantee the CPU type 'cortex-a15' works " + "with KVM on this host", NULL); ++#endif /* disabled for RHEL */ + + assert_has_feature_enabled(qts, "host", "aarch64"); + -- 2.31.1 diff --git a/SOURCES/0008-Add-ppc64-machine-types.patch b/SOURCES/0008-Add-ppc64-machine-types.patch index 860e803..a3cb0a3 100644 --- a/SOURCES/0008-Add-ppc64-machine-types.patch +++ b/SOURCES/0008-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From f61b3d7dc000886e23943457ee9baf1d4cae43b4 Mon Sep 17 00:00:00 2001 +From 2c523f1b6c9470e1cd517ba99e414cde02727e16 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -19,21 +19,25 @@ Merged patches (6.1.0): - 0215eb3356 Remove RHEL 7.3.0 machine types (only ppc64 changes) - af69d1ca6e Remove RHEL 7.4.0 machine types (only ppc64 changes) - 8f7a74ab78 Remove RHEL 7.5.0 machine types (only ppc64 changes) + +Merged patches (7.1.0 rc0): +- baa6790171 target/ppc/cpu-models: Fix ppc_cpu_aliases list for RHEL --- hw/ppc/spapr.c | 243 ++++++++++++++++++++++++++++++++++++++++ hw/ppc/spapr_cpu_core.c | 13 +++ include/hw/ppc/spapr.h | 4 + target/ppc/compat.c | 13 ++- + target/ppc/cpu-models.c | 1 + target/ppc/cpu.h | 1 + target/ppc/kvm.c | 27 +++++ target/ppc/kvm_ppc.h | 13 +++ - 7 files changed, 313 insertions(+), 1 deletion(-) + 8 files changed, 314 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index a4372ba189..5fdf8b506d 100644 +index 66b414d2e9..499eb49253 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -1622,6 +1622,9 @@ static void spapr_machine_reset(MachineState *machine) +@@ -1633,6 +1633,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) pef_kvm_reset(machine->cgs, &error_fatal); spapr_caps_apply(spapr); @@ -43,7 +47,7 @@ index a4372ba189..5fdf8b506d 100644 first_ppc_cpu = POWERPC_CPU(first_cpu); if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && -@@ -3317,6 +3320,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) +@@ -3347,6 +3350,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) spapr->host_serial = g_strdup(value); } @@ -64,7 +68,7 @@ index a4372ba189..5fdf8b506d 100644 static void spapr_instance_init(Object *obj) { SpaprMachineState *spapr = SPAPR_MACHINE(obj); -@@ -3395,6 +3412,12 @@ static void spapr_instance_init(Object *obj) +@@ -3425,6 +3442,12 @@ static void spapr_instance_init(Object *obj) spapr_get_host_serial, spapr_set_host_serial); object_property_set_description(obj, "host-serial", "Host serial number to advertise in guest device tree"); @@ -77,7 +81,7 @@ index a4372ba189..5fdf8b506d 100644 } static void spapr_machine_finalizefn(Object *obj) -@@ -4652,6 +4675,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) +@@ -4682,6 +4705,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) vmc->client_architecture_support = spapr_vof_client_architecture_support; vmc->quiesce = spapr_vof_quiesce; vmc->setprop = spapr_vof_setprop; @@ -85,15 +89,15 @@ index a4372ba189..5fdf8b506d 100644 } static const TypeInfo spapr_machine_info = { -@@ -4703,6 +4727,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) +@@ -4733,6 +4757,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* - * pseries-7.0 + * pseries-7.2 */ -@@ -4830,6 +4855,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) +@@ -4882,6 +4907,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) } DEFINE_SPAPR_MACHINE(4_1, "4.1", false); @@ -101,7 +105,7 @@ index a4372ba189..5fdf8b506d 100644 /* * pseries-4.0 -@@ -4849,6 +4875,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, +@@ -4901,6 +4927,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; return true; } @@ -110,7 +114,7 @@ index a4372ba189..5fdf8b506d 100644 static void spapr_machine_4_0_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -5176,6 +5204,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -5228,6 +5256,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); @@ -371,7 +375,7 @@ index fcb5dfe792..ab8fb5bf62 100644 qdev_unrealize(DEVICE(cpu)); return false; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index f5c33dcc86..4a68e0a901 100644 +index 04a95669ab..d5f4cf5e03 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -154,6 +154,7 @@ struct SpaprMachineClass { @@ -382,7 +386,7 @@ index f5c33dcc86..4a68e0a901 100644 bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, -@@ -241,6 +242,9 @@ struct SpaprMachineState { +@@ -256,6 +257,9 @@ struct SpaprMachineState { /* Set by -boot */ char *boot_device; @@ -417,11 +421,23 @@ index 7949a24f5a..f207a9ba01 100644 { const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); +diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c +index cd3ff700ac..1cb49c8087 100644 +--- a/target/ppc/cpu-models.c ++++ b/target/ppc/cpu-models.c +@@ -746,6 +746,7 @@ + /* PowerPC CPU aliases */ + + PowerPCCPUAlias ppc_cpu_aliases[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "405", "405d4" }, + { "405cr", "405crc" }, + { "405gp", "405gpd" }, diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index 047b24ba50..79c5ac50b9 100644 +index 81d4263a07..508fbed90b 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1462,6 +1462,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) +@@ -1467,6 +1467,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) @@ -430,10 +446,10 @@ index 047b24ba50..79c5ac50b9 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c -index dc93b99189..154888cce5 100644 +index 7c25348b7b..83671c955f 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c -@@ -90,6 +90,7 @@ static int cap_ppc_nested_kvm_hv; +@@ -89,6 +89,7 @@ static int cap_ppc_nested_kvm_hv; static int cap_large_decr; static int cap_fwnmi; static int cap_rpt_invalidate; @@ -441,7 +457,7 @@ index dc93b99189..154888cce5 100644 static uint32_t debug_inst_opcode; -@@ -137,6 +138,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) +@@ -136,6 +137,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); kvmppc_get_cpu_characteristics(s); cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); @@ -449,7 +465,7 @@ index dc93b99189..154888cce5 100644 cap_large_decr = kvmppc_get_dec_bits(); cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); /* -@@ -2563,6 +2565,16 @@ int kvmppc_has_cap_rpt_invalidate(void) +@@ -2570,6 +2572,16 @@ int kvmppc_has_cap_rpt_invalidate(void) return cap_rpt_invalidate; } @@ -466,9 +482,9 @@ index dc93b99189..154888cce5 100644 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) { uint32_t host_pvr = mfpvr(); -@@ -2959,3 +2971,18 @@ bool kvm_arch_cpu_check_are_resettable(void) +@@ -2970,3 +2982,18 @@ bool kvm_arch_cpu_check_are_resettable(void) + void kvm_arch_accel_class_init(ObjectClass *oc) { - return true; } + +void kvmppc_svm_allow(Error **errp) diff --git a/SOURCES/0009-Add-s390x-machine-types.patch b/SOURCES/0009-Add-s390x-machine-types.patch index 2d8b554..5860009 100644 --- a/SOURCES/0009-Add-s390x-machine-types.patch +++ b/SOURCES/0009-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 680f343e58a50a99d17bc7dedd3ee90980912023 Mon Sep 17 00:00:00 2001 +From 1973257ed781a93943f27f1518933e8c09c50f88 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -7,6 +7,10 @@ Adding changes to add RHEL machine types for s390x architecture. Signed-off-by: Miroslav Rezanina -- + +Rebase changes (7.1.0 rc0): +- Moved adding rhel_old_machine_deprecation variable to general machine types commit + Merged patches (6.1.0): - 64a9a5c971 hw/s390x: Remove the RHEL7-only machine type - 395516d62b redhat: s390x: add rhel-8.5.0 compat machine @@ -18,62 +22,51 @@ Merged patches (7.0.0): - e6ff4de4f7 redhat: Add s390x machine type compatibility handling for the rebase to v6.2 - 4b0efa7e21 redhat: Add rhel8.6.0 and rhel9.0.0 machine types for s390x - dcc64971bf RHEL: mark old machine types as deprecated (partialy) + +Merged patches (7.1.0 rc0): +- 1d6439527a WRB: Introduce RHEL 9.0.0 hw compat structure (only hw/s390x/s390-virtio-ccw.c chunk) +- c8ad21ca31 redhat: Update s390x machine type compatibility for rebase to QEMU 7.0.0 +- 5bcf8d874c target/s390x: deprecate CPUs older than z14 + +Merged patches (7.2.0 rc0): +- 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) --- - hw/core/machine.c | 6 +++ - hw/s390x/s390-virtio-ccw.c | 104 ++++++++++++++++++++++++++++++++++++- - include/hw/boards.h | 2 + - 3 files changed, 111 insertions(+), 1 deletion(-) + hw/s390x/s390-virtio-ccw.c | 108 +++++++++++++++++++++++++++++++ + target/s390x/cpu_models.c | 11 ++++ + target/s390x/cpu_models.h | 2 + + target/s390x/cpu_models_sysemu.c | 2 + + 4 files changed, 123 insertions(+) -diff --git a/hw/core/machine.c b/hw/core/machine.c -index ea430d844e..77202a3570 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -37,6 +37,12 @@ - #include "hw/virtio/virtio.h" - #include "hw/virtio/virtio-pci.h" - -+/* -+ * RHEL only: machine types for previous major releases are deprecated -+ */ -+const char *rhel_old_machine_deprecation = -+ "machine types for previous major releases are deprecated"; -+ - /* - * Mostly the same as hw_compat_6_0 and hw_compat_6_1 - */ diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 90480e7cf9..ec4176a1e0 100644 +index 2e64ffab45..8d5221fbb1 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -767,7 +767,7 @@ bool css_migration_enabled(void) - { \ - MachineClass *mc = MACHINE_CLASS(oc); \ - ccw_machine_##suffix##_class_options(mc); \ -- mc->desc = "VirtIO-ccw based S390 machine v" verstr; \ -+ mc->desc = "VirtIO-ccw based S390 machine " verstr; \ - if (latest) { \ - mc->alias = "s390-ccw-virtio"; \ - mc->is_default = true; \ -@@ -791,6 +791,7 @@ bool css_migration_enabled(void) +@@ -823,6 +823,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void ccw_machine_7_0_instance_options(MachineState *machine) + static void ccw_machine_7_2_instance_options(MachineState *machine) { } -@@ -1115,6 +1116,107 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1186,6 +1187,113 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); +#endif + ++ +static void ccw_machine_rhel900_instance_options(MachineState *machine) +{ ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 }; ++ ++ s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat); +} + +static void ccw_machine_rhel900_class_options(MachineClass *mc) +{ ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); +} +DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); + @@ -170,17 +163,84 @@ index 90480e7cf9..ec4176a1e0 100644 static void ccw_machine_register_types(void) { -diff --git a/include/hw/boards.h b/include/hw/boards.h -index c90a19b4d1..bf59275f18 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -470,4 +470,6 @@ extern const size_t hw_compat_rhel_8_0_len; - extern GlobalProperty hw_compat_rhel_7_6[]; - extern const size_t hw_compat_rhel_7_6_len; - -+extern const char *rhel_old_machine_deprecation; +diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c +index c3a4f80633..739770dc15 100644 +--- a/target/s390x/cpu_models.c ++++ b/target/s390x/cpu_models.c +@@ -45,6 +45,9 @@ + * of a following release have been a superset of the previous release. With + * generation 15 one base feature and one optional feature have been deprecated. + */ ++ ++#define RHEL_CPU_DEPRECATION "use at least 'z14', or 'host' / 'qemu' / 'max'" + - #endif + static S390CPUDef s390_cpu_defs[] = { + CPUDEF_INIT(0x2064, 7, 1, 38, 0x00000000U, "z900", "IBM zSeries 900 GA1"), + CPUDEF_INIT(0x2064, 7, 2, 38, 0x00000000U, "z900.2", "IBM zSeries 900 GA2"), +@@ -854,22 +857,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) + static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data) + { + S390CPUClass *xcc = S390_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); + + /* all base models are migration safe */ + xcc->cpu_def = (const S390CPUDef *) data; + xcc->is_migration_safe = true; + xcc->is_static = true; + xcc->desc = xcc->cpu_def->desc; ++ if (xcc->cpu_def->gen < 14) { ++ cc->deprecation_note = RHEL_CPU_DEPRECATION; ++ } + } + + static void s390_cpu_model_class_init(ObjectClass *oc, void *data) + { + S390CPUClass *xcc = S390_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); + + /* model that can change between QEMU versions */ + xcc->cpu_def = (const S390CPUDef *) data; + xcc->is_migration_safe = true; + xcc->desc = xcc->cpu_def->desc; ++ if (xcc->cpu_def->gen < 14) { ++ cc->deprecation_note = RHEL_CPU_DEPRECATION; ++ } + } + + static void s390_qemu_cpu_model_class_init(ObjectClass *oc, void *data) +diff --git a/target/s390x/cpu_models.h b/target/s390x/cpu_models.h +index fb1adc8b21..d76745afa9 100644 +--- a/target/s390x/cpu_models.h ++++ b/target/s390x/cpu_models.h +@@ -38,6 +38,8 @@ struct S390CPUDef { + S390FeatBitmap full_feat; + /* used to init full_feat from generated data */ + S390FeatInit full_init; ++ /* if deprecated, provides a suggestion */ ++ const char *deprecation_note; + }; + + /* CPU model based on a CPU definition */ +diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c +index d086b1c39c..1b9cc66405 100644 +--- a/target/s390x/cpu_models_sysemu.c ++++ b/target/s390x/cpu_models_sysemu.c +@@ -60,6 +60,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) + CpuDefinitionInfo *info; + char *name = g_strdup(object_class_get_name(klass)); + S390CPUClass *scc = S390_CPU_CLASS(klass); ++ CPUClass *cc = CPU_CLASS(klass); + + /* strip off the -s390x-cpu */ + g_strrstr(name, "-" TYPE_S390_CPU)[0] = 0; +@@ -69,6 +70,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) + info->migration_safe = scc->is_migration_safe; + info->q_static = scc->is_static; + info->q_typename = g_strdup(object_class_get_name(klass)); ++ info->deprecated = !!cc->deprecation_note; + /* check for unavailable features */ + if (cpu_list_data->model) { + Object *obj; -- 2.31.1 diff --git a/SOURCES/0010-Add-x86_64-machine-types.patch b/SOURCES/0010-Add-x86_64-machine-types.patch index 7c48967..181342a 100644 --- a/SOURCES/0010-Add-x86_64-machine-types.patch +++ b/SOURCES/0010-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 427a575ca57966bc72e1ebf218081da530d435d7 Mon Sep 17 00:00:00 2001 +From 0935624ccdddc286d6eeeb0c1b70d78983c21aa2 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -31,45 +31,32 @@ Merged patches (7.0.0): - 6110d865e5 x86: Add q35 RHEL 9.0.0 machine type - dcc64971bf RHEL: mark old machine types as deprecated (partialy) - 6b396f182b RHEL: disable "seqpacket" for "vhost-vsock-device" in rhel8.6.0 + +Merged patches (7.1.0 rc0): +- 38b89dc245 pc: Move s3/s4 suspend disabling to compat (only hw/i386/pc.c chunk) +- 1d6439527a WRB: Introduce RHEL 9.0.0 hw compat structure (x86_64 specific changes) +- 35b5c8554f target/i386: deprecate CPUs older than x86_64-v2 ABI + +Merged patches (7.2.0 rc0): +- 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) --- - hw/core/machine.c | 10 ++ - hw/i386/pc.c | 135 +++++++++++++++++++++- - hw/i386/pc_piix.c | 79 ++++++++++++- - hw/i386/pc_q35.c | 227 ++++++++++++++++++++++++++++++++++++- + hw/i386/pc.c | 147 ++++++++++++++++++++++- + hw/i386/pc_piix.c | 86 +++++++++++++- + hw/i386/pc_q35.c | 234 ++++++++++++++++++++++++++++++++++++- hw/s390x/s390-virtio-ccw.c | 1 + - include/hw/boards.h | 5 + - include/hw/i386/pc.h | 24 ++++ + include/hw/boards.h | 2 + + include/hw/i386/pc.h | 27 +++++ + target/i386/cpu.c | 21 ++++ target/i386/kvm/kvm-cpu.c | 1 + target/i386/kvm/kvm.c | 4 + tests/qtest/pvpanic-test.c | 5 +- - 10 files changed, 484 insertions(+), 7 deletions(-) + 10 files changed, 521 insertions(+), 7 deletions(-) -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 77202a3570..28989b6e7b 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -43,6 +43,16 @@ - const char *rhel_old_machine_deprecation = - "machine types for previous major releases are deprecated"; - -+GlobalProperty hw_compat_rhel_8_6[] = { -+ /* hw_compat_rhel_8_6 bz 2065589 */ -+ /* -+ * vhost-vsock device in RHEL 8 kernels doesn't support seqpacket, so -+ * we need do disable it downstream on the latest hw_compat_rhel_8. -+ */ -+ { "vhost-vsock-device", "seqpacket", "off" }, -+}; -+const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6); -+ - /* - * Mostly the same as hw_compat_6_0 and hw_compat_6_1 - */ diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index fd55fc725c..263d882af6 100644 +index 546b703cb4..c7b1350e64 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -375,6 +375,137 @@ GlobalProperty pc_compat_1_4[] = { +@@ -393,6 +393,149 @@ GlobalProperty pc_compat_1_4[] = { }; const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); @@ -78,6 +65,12 @@ index fd55fc725c..263d882af6 100644 + * machine type. + */ +GlobalProperty pc_rhel_compat[] = { ++ /* we don't support s3/s4 suspend */ ++ { "PIIX4_PM", "disable_s3", "1" }, ++ { "PIIX4_PM", "disable_s4", "1" }, ++ { "ICH9-LPC", "disable_s3", "1" }, ++ { "ICH9-LPC", "disable_s4", "1" }, ++ + { TYPE_X86_CPU, "host-phys-bits", "on" }, + { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, + { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, @@ -89,6 +82,12 @@ index fd55fc725c..263d882af6 100644 +}; +const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_9_0_compat[] = { ++ /* pc_rhel_9_0_compat from pc_compat_6_2 */ ++ { "virtio-mem", "unplugged-inaccessible", "off" }, ++}; ++const size_t pc_rhel_9_0_compat_len = G_N_ELEMENTS(pc_rhel_9_0_compat); ++ +GlobalProperty pc_rhel_8_5_compat[] = { + /* pc_rhel_8_5_compat from pc_compat_6_0 */ + { "qemu64" "-" TYPE_X86_CPU, "family", "6" }, @@ -207,7 +206,7 @@ index fd55fc725c..263d882af6 100644 GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { GSIState *s; -@@ -1738,6 +1869,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1907,6 +2050,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->pvh_enabled = true; pcmc->kvmclock_create_always = true; assert(!mc->get_hotplug_handler); @@ -215,7 +214,7 @@ index fd55fc725c..263d882af6 100644 mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -1748,7 +1880,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1917,7 +2061,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->has_hotpluggable_cpus = true; mc->default_boot_order = "cad"; mc->block_default_type = IF_IDE; @@ -226,10 +225,10 @@ index fd55fc725c..263d882af6 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index c797e98312..0cacc0d623 100644 +index 0985ff67d2..173a1fd10b 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -50,6 +50,7 @@ +@@ -53,6 +53,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "sysemu/xen.h" @@ -237,7 +236,7 @@ index c797e98312..0cacc0d623 100644 #ifdef CONFIG_XEN #include #include "hw/xen/xen_pt.h" -@@ -174,8 +175,8 @@ static void pc_init1(MachineState *machine, +@@ -184,8 +185,8 @@ static void pc_init1(MachineState *machine, if (pcmc->smbios_defaults) { MachineClass *mc = MACHINE_GET_CLASS(machine); /* These values are guest ABI, do not change */ @@ -248,7 +247,7 @@ index c797e98312..0cacc0d623 100644 pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -314,6 +315,7 @@ static void pc_init1(MachineState *machine, +@@ -334,6 +335,7 @@ static void pc_init1(MachineState *machine, * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). */ @@ -256,7 +255,7 @@ index c797e98312..0cacc0d623 100644 static void pc_compat_2_3_fn(MachineState *machine) { X86MachineState *x86ms = X86_MACHINE(machine); -@@ -967,3 +969,76 @@ static void xenfv_3_1_machine_options(MachineClass *m) +@@ -896,3 +898,83 @@ static void xenfv_3_1_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, xenfv_3_1_machine_options); #endif @@ -305,6 +304,13 @@ index c797e98312..0cacc0d623 100644 + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; ++ pcmc->legacy_no_rng_seed = true; ++ compat_props_add(m->compat_props, hw_compat_rhel_9_1, ++ hw_compat_rhel_9_1_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_9_0, ++ hw_compat_rhel_9_0_len); ++ compat_props_add(m->compat_props, pc_rhel_9_0_compat, ++ pc_rhel_9_0_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_6, + hw_compat_rhel_8_6_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_5, @@ -334,10 +340,10 @@ index c797e98312..0cacc0d623 100644 +DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, + pc_machine_rhel760_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index b695f88c45..157160e069 100644 +index ea582254e3..97c3630021 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -197,8 +197,8 @@ static void pc_q35_init(MachineState *machine) +@@ -198,8 +198,8 @@ static void pc_q35_init(MachineState *machine) if (pcmc->smbios_defaults) { /* These values are guest ABI, do not change */ @@ -348,7 +354,7 @@ index b695f88c45..157160e069 100644 pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -342,6 +342,7 @@ static void pc_q35_init(MachineState *machine) +@@ -352,6 +352,7 @@ static void pc_q35_init(MachineState *machine) DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) @@ -356,7 +362,7 @@ index b695f88c45..157160e069 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -631,3 +632,225 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -666,3 +667,232 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -397,6 +403,13 @@ index b695f88c45..157160e069 100644 + m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.0.0"; ++ pcmc->legacy_no_rng_seed = true; ++ compat_props_add(m->compat_props, hw_compat_rhel_9_1, ++ hw_compat_rhel_9_1_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_9_0, ++ hw_compat_rhel_9_0_len); ++ compat_props_add(m->compat_props, pc_rhel_9_0_compat, ++ pc_rhel_9_0_compat_len); +} + +DEFINE_PC_MACHINE(q35_rhel900, "pc-q35-rhel9.0.0", pc_q35_init_rhel900, @@ -583,10 +596,10 @@ index b695f88c45..157160e069 100644 +DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, + pc_q35_machine_rhel760_options); diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index ec4176a1e0..465a2a09d2 100644 +index 8d5221fbb1..ba640e3d9e 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -1136,6 +1136,7 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) +@@ -1213,6 +1213,7 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) static void ccw_machine_rhel860_class_options(MachineClass *mc) { ccw_machine_rhel900_class_options(mc); @@ -595,7 +608,7 @@ index ec4176a1e0..465a2a09d2 100644 /* All RHEL machines for prior major releases are deprecated */ mc->deprecation_reason = rhel_old_machine_deprecation; diff --git a/include/hw/boards.h b/include/hw/boards.h -index bf59275f18..d1555665df 100644 +index 2209d4e416..fd75f551b1 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -266,6 +266,8 @@ struct MachineClass { @@ -607,27 +620,20 @@ index bf59275f18..d1555665df 100644 bool ignore_boot_device_suffixes; bool smbus_no_migration_support; bool nvdimm_supported; -@@ -449,6 +451,9 @@ extern const size_t hw_compat_2_2_len; - extern GlobalProperty hw_compat_2_1[]; - extern const size_t hw_compat_2_1_len; - -+extern GlobalProperty hw_compat_rhel_8_6[]; -+extern const size_t hw_compat_rhel_8_6_len; -+ - extern GlobalProperty hw_compat_rhel_8_5[]; - extern const size_t hw_compat_rhel_8_5_len; - diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 91331059d9..419a6ec24b 100644 +index 3754eaa97d..4266fe2fdb 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -289,6 +289,30 @@ extern const size_t pc_compat_1_5_len; +@@ -293,6 +293,33 @@ extern const size_t pc_compat_1_5_len; extern GlobalProperty pc_compat_1_4[]; extern const size_t pc_compat_1_4_len; +extern GlobalProperty pc_rhel_compat[]; +extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_9_0_compat[]; ++extern const size_t pc_rhel_9_0_compat_len; ++ +extern GlobalProperty pc_rhel_8_5_compat[]; +extern const size_t pc_rhel_8_5_compat_len; + @@ -649,11 +655,165 @@ index 91331059d9..419a6ec24b 100644 +extern GlobalProperty pc_rhel_7_6_compat[]; +extern const size_t pc_rhel_7_6_compat_len; + - /* Helper for setting model-id for CPU models that changed model-id - * depending on QEMU versions up to QEMU 2.4. + #define DEFINE_PC_MACHINE(suffix, namestr, initfn, optsfn) \ + static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \ + { \ +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 22b681ca37..f7c526cbe6 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1832,9 +1832,13 @@ static const CPUCaches epyc_milan_cache_info = { + * PT in VMX operation */ + ++#define RHEL_CPU_DEPRECATION \ ++ "use at least 'Nehalem' / 'Opteron_G4', or 'host' / 'max'" ++ + static const X86CPUDefinition builtin_x86_defs[] = { + { + .name = "qemu64", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, + .family = 15, +@@ -1855,6 +1859,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "phenom", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_AMD, + .family = 16, +@@ -1887,6 +1892,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "core2duo", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -1929,6 +1935,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "kvm64", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 0xd, + .vendor = CPUID_VENDOR_INTEL, + .family = 15, +@@ -1970,6 +1977,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "qemu32", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 4, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -1984,6 +1992,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "kvm32", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_INTEL, + .family = 15, +@@ -2014,6 +2023,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "coreduo", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -2047,6 +2057,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "486", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 1, + .vendor = CPUID_VENDOR_INTEL, + .family = 4, +@@ -2059,6 +2070,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "pentium", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 1, + .vendor = CPUID_VENDOR_INTEL, + .family = 5, +@@ -2071,6 +2083,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "pentium2", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 2, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -2083,6 +2096,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "pentium3", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 3, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -2095,6 +2109,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "athlon", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 2, + .vendor = CPUID_VENDOR_AMD, + .family = 6, +@@ -2110,6 +2125,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "n270", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -2135,6 +2151,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Conroe", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -2175,6 +2192,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Penryn", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -3762,6 +3780,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Opteron_G1", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_AMD, + .family = 15, +@@ -3782,6 +3801,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Opteron_G2", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_AMD, + .family = 15, +@@ -3804,6 +3824,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Opteron_G3", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_AMD, + .family = 16, diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c -index 5eb955ce9a..74c1396a93 100644 +index 7237378a7d..7b8a3d5af0 100644 --- a/target/i386/kvm/kvm-cpu.c +++ b/target/i386/kvm/kvm-cpu.c @@ -137,6 +137,7 @@ static PropValue kvm_default_props[] = { @@ -665,10 +825,10 @@ index 5eb955ce9a..74c1396a93 100644 }; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c -index 9cf8e03669..6d1e009443 100644 +index a213209379..81526a1575 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c -@@ -3488,6 +3488,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3707,6 +3707,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -676,7 +836,7 @@ index 9cf8e03669..6d1e009443 100644 kvm_msr_buf_reset(cpu); -@@ -3822,6 +3823,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -4062,6 +4063,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; @@ -687,7 +847,7 @@ index 9cf8e03669..6d1e009443 100644 case MSR_KVM_ASYNC_PF_INT: env->async_pf_int_msr = msrs[i].data; diff --git a/tests/qtest/pvpanic-test.c b/tests/qtest/pvpanic-test.c -index 6dcad2db49..580c2c43d2 100644 +index bc7b7dfc39..96e6dee3a1 100644 --- a/tests/qtest/pvpanic-test.c +++ b/tests/qtest/pvpanic-test.c @@ -17,7 +17,7 @@ static void test_panic_nopause(void) diff --git a/SOURCES/0011-Enable-make-check.patch b/SOURCES/0011-Enable-make-check.patch index 832b38d..d0be8e6 100644 --- a/SOURCES/0011-Enable-make-check.patch +++ b/SOURCES/0011-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 5e419e5e0a721bdbbfa6d9b82c8be5c5b3d26a01 Mon Sep 17 00:00:00 2001 +From badfb1290c8eea8a2e1769b2392c7899d5077698 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 2 Sep 2020 09:39:41 +0200 Subject: Enable make check @@ -24,22 +24,88 @@ Rebase changes (7.0.0): - Remove unnecessary changes in iotest 051 - Remove changes in bios-tables-test.c and prom-env-test.c qtests +Rebase changes (7.1.0 rc0): +- Disable bcm2835-dma-test (added upstream) + Merged patches (6.1.0): - 2f129df7d3 redhat: Enable the 'test-block-iothread' test again + +Merged patches (7.1.0 rc0): +- 64d736640e RHEL-only: tests/avocado: Switch aarch64 tests from a53 to a57 --- .distro/qemu-kvm.spec.template | 5 ++--- + tests/avocado/replay_kernel.py | 2 +- + tests/avocado/reverse_debugging.py | 2 +- + tests/avocado/tcg_plugins.py | 6 +++--- tests/qtest/fuzz-e1000e-test.c | 2 +- tests/qtest/fuzz-virtio-scsi-test.c | 2 +- tests/qtest/intel-hda-test.c | 2 +- tests/qtest/libqos/meson.build | 2 +- tests/qtest/lpc-ich9-test.c | 2 +- - tests/qtest/meson.build | 4 ---- + tests/qtest/meson.build | 7 +------ tests/qtest/usb-hcd-xhci-test.c | 4 ++++ tests/qtest/virtio-net-failover.c | 1 + - 9 files changed, 12 insertions(+), 12 deletions(-) + 12 files changed, 18 insertions(+), 19 deletions(-) +diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py +index 00a26e4a0c..fe5ecf238a 100644 +--- a/tests/avocado/replay_kernel.py ++++ b/tests/avocado/replay_kernel.py +@@ -147,7 +147,7 @@ def test_aarch64_virt(self): + """ + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' + '/linux/releases/29/Everything/aarch64/os/images/pxeboot' +diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py +index d2921e70c3..66d185ed42 100644 +--- a/tests/avocado/reverse_debugging.py ++++ b/tests/avocado/reverse_debugging.py +@@ -198,7 +198,7 @@ def test_aarch64_virt(self): + """ + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' + '/linux/releases/29/Everything/aarch64/os/images/pxeboot' +diff --git a/tests/avocado/tcg_plugins.py b/tests/avocado/tcg_plugins.py +index 642d2e49e3..93b3afd823 100644 +--- a/tests/avocado/tcg_plugins.py ++++ b/tests/avocado/tcg_plugins.py +@@ -68,7 +68,7 @@ def test_aarch64_virt_insn(self): + :avocado: tags=accel:tcg + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_path = self._grab_aarch64_kernel() + kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + +@@ -94,7 +94,7 @@ def test_aarch64_virt_insn_icount(self): + :avocado: tags=accel:tcg + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_path = self._grab_aarch64_kernel() + kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + +@@ -120,7 +120,7 @@ def test_aarch64_virt_mem_icount(self): + :avocado: tags=accel:tcg + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_path = self._grab_aarch64_kernel() + kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c -index 66229e6096..947fba73b7 100644 +index 5052883fb6..b5286f4b12 100644 --- a/tests/qtest/fuzz-e1000e-test.c +++ b/tests/qtest/fuzz-e1000e-test.c @@ -17,7 +17,7 @@ static void test_lp1879531_eth_get_rss_ex_dst_addr(void) @@ -52,20 +118,20 @@ index 66229e6096..947fba73b7 100644 qtest_outl(s, 0xcf8, 0x80001010); qtest_outl(s, 0xcfc, 0xe1020000); diff --git a/tests/qtest/fuzz-virtio-scsi-test.c b/tests/qtest/fuzz-virtio-scsi-test.c -index aaf6d10e18..43727d62ac 100644 +index e37b48b2cc..88647da054 100644 --- a/tests/qtest/fuzz-virtio-scsi-test.c +++ b/tests/qtest/fuzz-virtio-scsi-test.c @@ -19,7 +19,7 @@ static void test_mmio_oob_from_memory_region_cache(void) { QTestState *s; -- s = qtest_init("-M pc-q35-5.2 -display none -m 512M " -+ s = qtest_init("-M pc-q35-rhel8.4.0 -display none -m 512M " +- s = qtest_init("-M pc-q35-5.2 -m 512M " ++ s = qtest_init("-M pc-q35-rhel8.4.0 -m 512M " "-device virtio-scsi,num_queues=8,addr=03.0 "); qtest_outl(s, 0xcf8, 0x80001811); diff --git a/tests/qtest/intel-hda-test.c b/tests/qtest/intel-hda-test.c -index a58c98e4d1..c8387e39ce 100644 +index d4a8db6fd6..1a796ec15a 100644 --- a/tests/qtest/intel-hda-test.c +++ b/tests/qtest/intel-hda-test.c @@ -38,7 +38,7 @@ static void test_issue542_ich6(void) @@ -78,20 +144,20 @@ index a58c98e4d1..c8387e39ce 100644 qtest_outl(s, 0xcf8, 0x80000804); diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build -index e988d15791..46f7dcb81a 100644 +index 32f028872c..1e78a1a055 100644 --- a/tests/qtest/libqos/meson.build +++ b/tests/qtest/libqos/meson.build -@@ -41,7 +41,7 @@ libqos_srcs = files('../libqtest.c', +@@ -43,7 +43,7 @@ libqos_srcs = files( 'virtio-rng.c', 'virtio-scsi.c', 'virtio-serial.c', - 'virtio-iommu.c', +# 'virtio-iommu.c', + 'virtio-gpio.c', + 'generic-pcihost.c', - # qgraph machines: - 'aarch64-xlnx-zcu102-machine.c', diff --git a/tests/qtest/lpc-ich9-test.c b/tests/qtest/lpc-ich9-test.c -index fe0bef9980..7a9d51579b 100644 +index 8ac95b89f7..cd2102555c 100644 --- a/tests/qtest/lpc-ich9-test.c +++ b/tests/qtest/lpc-ich9-test.c @@ -15,7 +15,7 @@ static void test_lp1878642_pci_bus_get_irq_level_assert(void) @@ -104,18 +170,18 @@ index fe0bef9980..7a9d51579b 100644 qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */ diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index d25f82bb5a..67cd32def1 100644 +index c07a5b1a5f..9df3f9f8b9 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build -@@ -73,7 +73,6 @@ qtests_i386 = \ +@@ -82,7 +82,6 @@ qtests_i386 = \ config_all_devices.has_key('CONFIG_Q35') and \ config_all_devices.has_key('CONFIG_VIRTIO_PCI') and \ slirp.found() ? ['virtio-net-failover'] : []) + \ - (unpack_edk2_blobs ? ['bios-tables-test'] : []) + \ qtests_pci + \ + qtests_cxl + \ ['fdc-test', - 'ide-test', -@@ -86,7 +85,6 @@ qtests_i386 = \ +@@ -96,7 +95,6 @@ qtests_i386 = \ 'drive_del-test', 'tco-test', 'cpu-plug-test', @@ -123,7 +189,7 @@ index d25f82bb5a..67cd32def1 100644 'vmgenid-test', 'migration-test', 'test-x86-cpuid-compat', -@@ -216,7 +214,6 @@ qtests_arm = \ +@@ -209,15 +207,13 @@ qtests_arm = \ # TODO: once aarch64 TCG is fixed on ARM 32 bit host, make bios-tables-test unconditional qtests_aarch64 = \ @@ -131,7 +197,16 @@ index d25f82bb5a..67cd32def1 100644 (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-test'] : []) + \ (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-swtpm-test'] : []) + \ (config_all_devices.has_key('CONFIG_XLNX_ZYNQMP_ARM') ? ['xlnx-can-test', 'fuzz-xlnx-dp-test'] : []) + \ -@@ -231,7 +228,6 @@ qtests_s390x = \ + ['arm-cpu-features', + 'numa-test', + 'boot-serial-test', +- 'migration-test', +- 'bcm2835-dma-test'] ++ 'migration-test'] + + qtests_s390x = \ + (slirp.found() ? ['pxe-test', 'test-netfilter'] : []) + \ +@@ -225,7 +221,6 @@ qtests_s390x = \ (config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \ ['boot-serial-test', 'drive_del-test', @@ -170,7 +245,7 @@ index 10ef9d2a91..3855873050 100644 qtest_start("-device nec-usb-xhci,id=xhci" diff --git a/tests/qtest/virtio-net-failover.c b/tests/qtest/virtio-net-failover.c -index 78811f1c92..44de8af00c 100644 +index 4a809590bf..1bf3fa641c 100644 --- a/tests/qtest/virtio-net-failover.c +++ b/tests/qtest/virtio-net-failover.c @@ -25,6 +25,7 @@ diff --git a/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch b/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch index c9e42b2..477a75d 100644 --- a/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From c358fd4c224a9c3f64b4a8fff34cc6b1dc201fa0 Mon Sep 17 00:00:00 2001 +From 0804844e4755377be6d2ebad578794ad9f4f3f31 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -32,20 +32,20 @@ Signed-off-by: Bandan Das 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 67a183f17b..1e20f9fd59 100644 +index 939dcc3d4a..acbc6673ce 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c -@@ -45,6 +45,9 @@ - - #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" +@@ -48,6 +48,9 @@ + /* Protected by BQL */ + static KVMRouteChange vfio_route_change; +/* RHEL only: Set once for the first assigned dev */ +static uint16_t device_limit; + static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); - -@@ -2810,9 +2813,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + static void vfio_msi_disable_common(VFIOPCIDevice *vdev); +@@ -2854,9 +2857,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) ssize_t len; struct stat st; int groupid; @@ -74,10 +74,10 @@ index 67a183f17b..1e20f9fd59 100644 + return; + } + - if (!vdev->vbasedev.sysfsdev) { + if (!vbasedev->sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3249,6 +3273,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3293,6 +3317,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), @@ -88,10 +88,10 @@ index 67a183f17b..1e20f9fd59 100644 false), DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 64777516d1..e0fe6ca97e 100644 +index 7c236a52f4..7b7d036a8f 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h -@@ -139,6 +139,7 @@ struct VFIOPCIDevice { +@@ -140,6 +140,7 @@ struct VFIOPCIDevice { EventNotifier err_notifier; EventNotifier req_notifier; int (*resetfn)(struct VFIOPCIDevice *); diff --git a/SOURCES/0013-Add-support-statement-to-help-output.patch b/SOURCES/0013-Add-support-statement-to-help-output.patch index 4826ea4..022f194 100644 --- a/SOURCES/0013-Add-support-statement-to-help-output.patch +++ b/SOURCES/0013-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From ba0c7a5f6b9a1f75666db6b3b795ddf03695dc26 Mon Sep 17 00:00:00 2001 +From 283a0e258dc2f3b83c58e6f948bafe430cd2c1d5 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -21,10 +21,10 @@ Signed-off-by: Eduardo Habkost 1 file changed, 9 insertions(+) diff --git a/softmmu/vl.c b/softmmu/vl.c -index 6f646531a0..9d5dab43d2 100644 +index 5115221efe..17188df528 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c -@@ -831,9 +831,17 @@ static void version(void) +@@ -834,9 +834,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -42,7 +42,7 @@ index 6f646531a0..9d5dab43d2 100644 printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", g_get_prgname()); -@@ -859,6 +867,7 @@ static void help(int exitcode) +@@ -862,6 +870,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); diff --git a/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch new file mode 100644 index 0000000..e39555b --- /dev/null +++ b/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -0,0 +1,61 @@ +From d8ded821aa698b3b03bd9089fbd6c2b33da87b9e Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 8 Jul 2020 08:35:50 +0200 +Subject: Use qemu-kvm in documentation instead of qemu-system- + +Patchwork-id: 62380 +O-Subject: [RHEV-7.1 qemu-kvm-rhev PATCHv4] Use qemu-kvm in documentation instead of qemu-system-i386 +Bugzilla: 1140620 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Markus Armbruster +RH-Acked-by: Stefan Hajnoczi + +From: Miroslav Rezanina + +We change the name and location of qemu-kvm binaries. Update documentation +to reflect this change. Only architectures available in RHEL are updated. + +Signed-off-by: Miroslav Rezanina +--- + docs/defs.rst.inc | 4 ++-- + qemu-options.hx | 10 +++++----- + 2 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/docs/defs.rst.inc b/docs/defs.rst.inc +index 52d6454b93..d74dbdeca9 100644 +--- a/docs/defs.rst.inc ++++ b/docs/defs.rst.inc +@@ -9,7 +9,7 @@ + but the manpages will end up misrendered with following normal text + incorrectly in boldface. + +-.. |qemu_system| replace:: qemu-system-x86_64 +-.. |qemu_system_x86| replace:: qemu-system-x86_64 ++.. |qemu_system| replace:: qemu-kvm ++.. |qemu_system_x86| replace:: qemu-kvm + .. |I2C| replace:: I\ :sup:`2`\ C + .. |I2S| replace:: I\ :sup:`2`\ S +diff --git a/qemu-options.hx b/qemu-options.hx +index 7f99d15b23..ea02ca3a45 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -3300,11 +3300,11 @@ SRST + + :: + +- qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ +- -numa node,memdev=mem \ +- -chardev socket,id=chr0,path=/path/to/socket \ +- -netdev type=vhost-user,id=net0,chardev=chr0 \ +- -device virtio-net-pci,netdev=net0 ++ qemu-kvm -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ ++ -numa node,memdev=mem \ ++ -chardev socket,id=chr0,path=/path/to/socket \ ++ -netdev type=vhost-user,id=net0,chardev=chr0 \ ++ -device virtio-net-pci,netdev=net0 + + ``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]`` + Establish a vhost-vdpa netdev. +-- +2.31.1 + diff --git a/SOURCES/0014-globally-limit-the-maximum-number-of-CPUs.patch b/SOURCES/0014-globally-limit-the-maximum-number-of-CPUs.patch deleted file mode 100644 index 6764a84..0000000 --- a/SOURCES/0014-globally-limit-the-maximum-number-of-CPUs.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 9ebfd2f6cfa8e79c92e58fd169f90cc768fb865a Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Tue, 21 Jan 2014 10:46:52 +0100 -Subject: globally limit the maximum number of CPUs - -We now globally limit the number of VCPUs. -Especially, there is no way one can specify more than -max_cpus VCPUs for a VM. - -This allows us the restore the ppc max_cpus limitation to the upstream -default and minimize the ppc hack in kvm-all.c. - -Signed-off-by: David Hildenbrand -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo Cesar Lemes de Paula ---- - accel/kvm/kvm-all.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index 5f1377ca04..fdf0e4d429 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -2430,6 +2430,18 @@ static int kvm_init(MachineState *ms) - soft_vcpus_limit = kvm_recommended_vcpus(s); - hard_vcpus_limit = kvm_max_vcpus(s); - -+#ifdef HOST_PPC64 -+ /* -+ * On POWER, the kernel advertises a soft limit based on the -+ * number of CPU threads on the host. We want to allow exceeding -+ * this for testing purposes, so we don't want to set hard limit -+ * to soft limit as on x86. -+ */ -+#else -+ /* RHEL doesn't support nr_vcpus > soft_vcpus_limit */ -+ hard_vcpus_limit = soft_vcpus_limit; -+#endif -+ - while (nc->name) { - if (nc->num > soft_vcpus_limit) { - warn_report("Number of %s cpus requested (%d) exceeds " --- -2.31.1 - diff --git a/SOURCES/0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/SOURCES/0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch new file mode 100644 index 0000000..2bedb0b --- /dev/null +++ b/SOURCES/0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -0,0 +1,60 @@ +From 9c6acadb444c9300d7c18b6939ce4f96484aeacc Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Wed, 6 Feb 2019 03:58:56 +0000 +Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts + +RH-Author: David Gibson +Message-id: <20190206035856.19058-1-dgibson@redhat.com> +Patchwork-id: 84246 +O-Subject: [RHELAV-8.0/rhel qemu-kvm PATCH] BZ1653590: Require at least 64kiB pages for downstream guests & hosts +Bugzilla: 1653590 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Serhii Popovych +RH-Acked-by: Thomas Huth + +Most current POWER guests require 64kiB page support, so that's the default +for the cap-hpt-max-pagesize option in qemu which limits available guest +page sizes. We warn if the value is set smaller than that, but don't +outright fail upstream, because we need to allow for the possibility of +guest (and/or host) kernels configured for 4kiB page sizes. + +Downstream, however, we simply don't support 4kiB pagesize configured +kernels in guest or host, so we can have qemu simply error out in this +situation. + +Testing: Attempted to start a guest with cap-hpt-max-page-size=4k and verified + it failed immediately with a qemu error + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr_caps.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c +index b4283055c1..59b88aadff 100644 +--- a/hw/ppc/spapr_caps.c ++++ b/hw/ppc/spapr_caps.c +@@ -329,12 +329,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, + static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, + uint8_t val, Error **errp) + { ++#if 0 /* disabled for RHEL */ + if (val < 12) { + error_setg(errp, "Require at least 4kiB hpt-max-page-size"); + return; + } else if (val < 16) { + warn_report("Many guests require at least 64kiB hpt-max-page-size"); + } ++#else /* Only page sizes >=64kiB supported for RHEL */ ++ if (val < 16) { ++ error_setg(errp, "Require at least 64kiB hpt-max-page-size"); ++ return; ++ } ++#endif + + spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); + } +-- +2.31.1 + diff --git a/SOURCES/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/SOURCES/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch deleted file mode 100644 index 722484d..0000000 --- a/SOURCES/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 4b6c8cdc52fdf94d4098d278defb3833dce1d189 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Wed, 8 Jul 2020 08:35:50 +0200 -Subject: Use qemu-kvm in documentation instead of qemu-system- - -Patchwork-id: 62380 -O-Subject: [RHEV-7.1 qemu-kvm-rhev PATCHv4] Use qemu-kvm in documentation instead of qemu-system-i386 -Bugzilla: 1140620 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Markus Armbruster -RH-Acked-by: Stefan Hajnoczi - -From: Miroslav Rezanina - -We change the name and location of qemu-kvm binaries. Update documentation -to reflect this change. Only architectures available in RHEL are updated. - -Signed-off-by: Miroslav Rezanina ---- - docs/defs.rst.inc | 4 ++-- - qemu-options.hx | 10 +++++----- - 2 files changed, 7 insertions(+), 7 deletions(-) - -diff --git a/docs/defs.rst.inc b/docs/defs.rst.inc -index 52d6454b93..d74dbdeca9 100644 ---- a/docs/defs.rst.inc -+++ b/docs/defs.rst.inc -@@ -9,7 +9,7 @@ - but the manpages will end up misrendered with following normal text - incorrectly in boldface. - --.. |qemu_system| replace:: qemu-system-x86_64 --.. |qemu_system_x86| replace:: qemu-system-x86_64 -+.. |qemu_system| replace:: qemu-kvm -+.. |qemu_system_x86| replace:: qemu-kvm - .. |I2C| replace:: I\ :sup:`2`\ C - .. |I2S| replace:: I\ :sup:`2`\ S -diff --git a/qemu-options.hx b/qemu-options.hx -index 34e9b32a5c..924f61ab6d 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -3233,11 +3233,11 @@ SRST - - :: - -- qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ -- -numa node,memdev=mem \ -- -chardev socket,id=chr0,path=/path/to/socket \ -- -netdev type=vhost-user,id=net0,chardev=chr0 \ -- -device virtio-net-pci,netdev=net0 -+ qemu-kvm -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ -+ -numa node,memdev=mem \ -+ -chardev socket,id=chr0,path=/path/to/socket \ -+ -netdev type=vhost-user,id=net0,chardev=chr0 \ -+ -device virtio-net-pci,netdev=net0 - - ``-netdev vhost-vdpa,vhostdev=/path/to/dev`` - Establish a vhost-vdpa netdev. --- -2.31.1 - diff --git a/SOURCES/0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch b/SOURCES/0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch new file mode 100644 index 0000000..cee5476 --- /dev/null +++ b/SOURCES/0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch @@ -0,0 +1,77 @@ +From 02fde2a0cbd679ebd4104fe5522572c31ec23abd Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 20 Aug 2021 18:25:12 +0200 +Subject: qcow2: Deprecation warning when opening v2 images rw +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +RH-MergeRequest: 37: qcow2: Deprecation warning when opening v2 images rw +RH-Commit: [1/1] f450d0ae32d35063b28c72c4f2d2ebb9e6d8db3e (kmwolf/centos-qemu-kvm) +RH-Bugzilla: 1951814 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Hanna Reitz +RH-Acked-by: Thomas Huth +RH-Acked-by: Philippe Mathieu-Daudé + +qcow2 v3 has been around for a long time (since QEMU 1.1/RHEL 7), so +there is no real reason any more to use it. People still using it might +do so unintentionally. Warn about it and suggest upgrading during the +RHEL 9 timeframe so that the code can possibly be disabled in RHEL 10. + +The warning is restricted to read-write mode and the system emulator. +The primary motivation for not having it in qemu-img is that 'qemu-img +amend' for upgrades would warn otherwise. It also avoids having to make +too many changes to the test suite. + +bdrv_uses_whitelist() is used as a proxy for deciding whether we are +running in a tool or the system emulator. This is not entirely clean, +but it's what is available and the same function qcow2_do_open() already +uses it this way for another warning. + +Signed-off-by: Kevin Wolf + +patch_name: kvm-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +present_in_specfile: true +location_in_specfile: 116 +--- +Rebase notes (6.1.0): +- Replace bs->read_only with bdrv_is_read_only +--- + block/qcow2.c | 6 ++++++ + tests/qemu-iotests/common.filter | 1 + + 2 files changed, 7 insertions(+) + +diff --git a/block/qcow2.c b/block/qcow2.c +index 4d6666d3ff..d2ba263e9d 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -1336,6 +1336,12 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, + ret = -ENOTSUP; + goto fail; + } ++ if (header.version < 3 && !bdrv_is_read_only(bs) && bdrv_uses_whitelist()) { ++ warn_report_once("qcow2 v2 images are deprecated and may not be " ++ "supported in future versions. Please consider " ++ "upgrading the image with 'qemu-img amend " ++ "-o compat=v3'."); ++ } + + s->qcow_version = header.version; + +diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter +index cc9f1a5891..6a13757177 100644 +--- a/tests/qemu-iotests/common.filter ++++ b/tests/qemu-iotests/common.filter +@@ -83,6 +83,7 @@ _filter_qemu() + { + gsed -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \ + -e 's#^QEMU [0-9]\+\.[0-9]\+\.[0-9]\+ monitor#QEMU X.Y.Z monitor#' \ ++ -e "/qcow2 v2 images are deprecated/d" \ + -e $'s#\r##' # QEMU monitor uses \r\n line endings + } + +-- +2.31.1 + diff --git a/SOURCES/0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/SOURCES/0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch deleted file mode 100644 index 9f08024..0000000 --- a/SOURCES/0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ /dev/null @@ -1,66 +0,0 @@ -From b72e04cb7e417d9e1c973223747ab3a27abda8b4 Mon Sep 17 00:00:00 2001 -From: Fam Zheng -Date: Wed, 14 Jun 2017 15:37:01 +0200 -Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] - -RH-Author: Fam Zheng -Message-id: <20170614153701.14757-1-famz@redhat.com> -Patchwork-id: 75613 -O-Subject: [RHV-7.4 qemu-kvm-rhev PATCH v3] virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] -Bugzilla: 1378816 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz - -We need a fix for RHEL 7.4 and 7.3.z, but unfortunately upstream isn't -ready. If it were, the changes will be too invasive. To have an idea: - -https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg05400.html - -is an incomplete attempt to fix part of the issue, and the remaining -work unfortunately involve even more complex changes. - -As a band-aid, this partially reverts the effect of ef8875b -(virtio-scsi: Remove op blocker for dataplane, since v2.7). We cannot -simply revert that commit as a whole because we already shipped it in -qemu-kvm-rhev 7.3, since when, block jobs has been possible. We should -only block what has been broken. Also, faithfully reverting the above -commit means adding back the removed op blocker, but that is not enough, -because it still crashes when inserting media into an initially empty -scsi-cd. - -All in all, scsi-cd on virtio-scsi-dataplane has basically been unusable -unless the scsi-cd never enters an empty state, so, disable it -altogether. Otherwise it would be much more difficult to avoid -crashing. - -Signed-off-by: Fam Zheng -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - hw/scsi/virtio-scsi.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 34a968ecfb..7f6da33a8a 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -896,6 +896,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, - AioContext *old_context; - int ret; - -+ /* XXX: Remove this check once block backend is capable of handling -+ * AioContext change upon eject/insert. -+ * s->ctx is NULL if ioeventfd is off, s->ctx is qemu_get_aio_context() if -+ * data plane is not used, both cases are safe for scsi-cd. */ -+ if (s->ctx && s->ctx != qemu_get_aio_context() && -+ object_dynamic_cast(OBJECT(dev), "scsi-cd")) { -+ error_setg(errp, "scsi-cd is not supported by data plane"); -+ return; -+ } - if (s->ctx && !s->dataplane_fenced) { - if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { - return; --- -2.31.1 - diff --git a/SOURCES/0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/SOURCES/0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch deleted file mode 100644 index 2bc687c..0000000 --- a/SOURCES/0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 64a06662cdea0ff62efb122be4eab506b2a842d9 Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Wed, 6 Feb 2019 03:58:56 +0000 -Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts - -RH-Author: David Gibson -Message-id: <20190206035856.19058-1-dgibson@redhat.com> -Patchwork-id: 84246 -O-Subject: [RHELAV-8.0/rhel qemu-kvm PATCH] BZ1653590: Require at least 64kiB pages for downstream guests & hosts -Bugzilla: 1653590 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Serhii Popovych -RH-Acked-by: Thomas Huth - -Most current POWER guests require 64kiB page support, so that's the default -for the cap-hpt-max-pagesize option in qemu which limits available guest -page sizes. We warn if the value is set smaller than that, but don't -outright fail upstream, because we need to allow for the possibility of -guest (and/or host) kernels configured for 4kiB page sizes. - -Downstream, however, we simply don't support 4kiB pagesize configured -kernels in guest or host, so we can have qemu simply error out in this -situation. - -Testing: Attempted to start a guest with cap-hpt-max-page-size=4k and verified - it failed immediately with a qemu error - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr_caps.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index 655ab856a0..6aa7f93df9 100644 ---- a/hw/ppc/spapr_caps.c -+++ b/hw/ppc/spapr_caps.c -@@ -329,12 +329,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, - static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, - uint8_t val, Error **errp) - { -+#if 0 /* disabled for RHEL */ - if (val < 12) { - error_setg(errp, "Require at least 4kiB hpt-max-page-size"); - return; - } else if (val < 16) { - warn_report("Many guests require at least 64kiB hpt-max-page-size"); - } -+#else /* Only page sizes >=64kiB supported for RHEL */ -+ if (val < 16) { -+ error_setg(errp, "Require at least 64kiB hpt-max-page-size"); -+ return; -+ } -+#endif - - spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); - } --- -2.31.1 - diff --git a/SOURCES/0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch b/SOURCES/0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch new file mode 100644 index 0000000..001880b --- /dev/null +++ b/SOURCES/0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch @@ -0,0 +1,26 @@ +From 21ed34787b9492c2cfe3d8fc12a32748bcf02307 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 9 Nov 2022 07:08:32 -0500 +Subject: Addd 7.2 compat bits for RHEL 9.1 machine type + +Signed-off-by: Miroslav Rezanina +--- + hw/core/machine.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 9edec1ca05..3d851d34da 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -54,6 +54,8 @@ GlobalProperty hw_compat_rhel_9_1[] = { + { "arm-gicv3-common", "force-8-bit-prio", "on" }, + /* hw_compat_rhel_9_1 from hw_compat_7_0 */ + { "nvme-ns", "eui64-default", "on"}, ++ /* hw_compat_rhel_9_1 from hw_compat_7_1 */ ++ { "virtio-device", "queue_reset", "false" }, + }; + const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1); + +-- +2.31.1 + diff --git a/SOURCES/0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch b/SOURCES/0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch deleted file mode 100644 index d7401d5..0000000 --- a/SOURCES/0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 54f9157a918e1404f2f17ce89a9c8b9088c1bc06 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 20 Aug 2021 18:25:12 +0200 -Subject: qcow2: Deprecation warning when opening v2 images rw -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Kevin Wolf -RH-MergeRequest: 37: qcow2: Deprecation warning when opening v2 images rw -RH-Commit: [1/1] f450d0ae32d35063b28c72c4f2d2ebb9e6d8db3e (kmwolf/centos-qemu-kvm) -RH-Bugzilla: 1951814 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Reitz -RH-Acked-by: Thomas Huth -RH-Acked-by: Philippe Mathieu-Daudé - -qcow2 v3 has been around for a long time (since QEMU 1.1/RHEL 7), so -there is no real reason any more to use it. People still using it might -do so unintentionally. Warn about it and suggest upgrading during the -RHEL 9 timeframe so that the code can possibly be disabled in RHEL 10. - -The warning is restricted to read-write mode and the system emulator. -The primary motivation for not having it in qemu-img is that 'qemu-img -amend' for upgrades would warn otherwise. It also avoids having to make -too many changes to the test suite. - -bdrv_uses_whitelist() is used as a proxy for deciding whether we are -running in a tool or the system emulator. This is not entirely clean, -but it's what is available and the same function qcow2_do_open() already -uses it this way for another warning. - -Signed-off-by: Kevin Wolf - -patch_name: kvm-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch -present_in_specfile: true -location_in_specfile: 116 ---- -Rebase notes (6.1.0): -- Replace bs->read_only with bdrv_is_read_only ---- - block/qcow2.c | 6 ++++++ - tests/qemu-iotests/common.filter | 1 + - 2 files changed, 7 insertions(+) - -diff --git a/block/qcow2.c b/block/qcow2.c -index b5c47931ef..a795e457ac 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -1337,6 +1337,12 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, - ret = -ENOTSUP; - goto fail; - } -+ if (header.version < 3 && !bdrv_is_read_only(bs) && bdrv_uses_whitelist()) { -+ warn_report_once("qcow2 v2 images are deprecated and may not be " -+ "supported in future versions. Please consider " -+ "upgrading the image with 'qemu-img amend " -+ "-o compat=v3'."); -+ } - - s->qcow_version = header.version; - -diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter -index cc9f1a5891..6a13757177 100644 ---- a/tests/qemu-iotests/common.filter -+++ b/tests/qemu-iotests/common.filter -@@ -83,6 +83,7 @@ _filter_qemu() - { - gsed -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \ - -e 's#^QEMU [0-9]\+\.[0-9]\+\.[0-9]\+ monitor#QEMU X.Y.Z monitor#' \ -+ -e "/qcow2 v2 images are deprecated/d" \ - -e $'s#\r##' # QEMU monitor uses \r\n line endings - } - --- -2.31.1 - diff --git a/SOURCES/0019-WRB-Introduce-RHEL-9.0.0-hw-compat-structure.patch b/SOURCES/0019-WRB-Introduce-RHEL-9.0.0-hw-compat-structure.patch deleted file mode 100644 index 285cd6b..0000000 --- a/SOURCES/0019-WRB-Introduce-RHEL-9.0.0-hw-compat-structure.patch +++ /dev/null @@ -1,135 +0,0 @@ -From 1d6439527aa6ccabb58208c94417778ccc19de39 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Wed, 9 Feb 2022 04:16:25 -0500 -Subject: WRB: Introduce RHEL 9.0.0 hw compat structure - -General compatibility structure for post RHEL 9.0.0 rebase. - -Signed-off-by: Miroslav Rezanina ---- - hw/core/machine.c | 9 +++++++++ - hw/i386/pc.c | 6 ++++++ - hw/i386/pc_piix.c | 4 ++++ - hw/i386/pc_q35.c | 4 ++++ - hw/s390x/s390-virtio-ccw.c | 2 ++ - include/hw/boards.h | 3 +++ - include/hw/i386/pc.h | 3 +++ - 7 files changed, 31 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 28989b6e7b..dffc3ef4ab 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -53,6 +53,15 @@ GlobalProperty hw_compat_rhel_8_6[] = { - }; - const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6); - -+/* -+ * Mostly the same as hw_compat_6_2 -+ */ -+GlobalProperty hw_compat_rhel_9_0[] = { -+ /* hw_compat_rhel_9_0 from hw_compat_6_2 */ -+ { "PIIX4_PM", "x-not-migrate-acpi-index", "on"}, -+}; -+const size_t hw_compat_rhel_9_0_len = G_N_ELEMENTS(hw_compat_rhel_9_0); -+ - /* - * Mostly the same as hw_compat_6_0 and hw_compat_6_1 - */ -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 263d882af6..0886cfe3fe 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -391,6 +391,12 @@ GlobalProperty pc_rhel_compat[] = { - }; - const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); - -+GlobalProperty pc_rhel_9_0_compat[] = { -+ /* pc_rhel_9_0_compat from pc_compat_6_2 */ -+ { "virtio-mem", "unplugged-inaccessible", "off" }, -+}; -+const size_t pc_rhel_9_0_compat_len = G_N_ELEMENTS(pc_rhel_9_0_compat); -+ - GlobalProperty pc_rhel_8_5_compat[] = { - /* pc_rhel_8_5_compat from pc_compat_6_0 */ - { "qemu64" "-" TYPE_X86_CPU, "family", "6" }, -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 0cacc0d623..dc987fe93b 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -1014,6 +1014,10 @@ static void pc_machine_rhel760_options(MachineClass *m) - pcmc->kvmclock_create_always = false; - /* From pc_i440fx_5_1_machine_options() */ - pcmc->pci_root_uid = 1; -+ compat_props_add(m->compat_props, hw_compat_rhel_9_0, -+ hw_compat_rhel_9_0_len); -+ compat_props_add(m->compat_props, pc_rhel_9_0_compat, -+ pc_rhel_9_0_compat_len); - compat_props_add(m->compat_props, hw_compat_rhel_8_6, - hw_compat_rhel_8_6_len); - compat_props_add(m->compat_props, hw_compat_rhel_8_5, -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 157160e069..52c253c570 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -669,6 +669,10 @@ static void pc_q35_machine_rhel900_options(MachineClass *m) - m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; - pcmc->smbios_stream_product = "RHEL"; - pcmc->smbios_stream_version = "9.0.0"; -+ compat_props_add(m->compat_props, hw_compat_rhel_9_0, -+ hw_compat_rhel_9_0_len); -+ compat_props_add(m->compat_props, pc_rhel_9_0_compat, -+ pc_rhel_9_0_compat_len); - } - - DEFINE_PC_MACHINE(q35_rhel900, "pc-q35-rhel9.0.0", pc_q35_init_rhel900, -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 465a2a09d2..08e0f6a79b 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1118,12 +1118,14 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) - DEFINE_CCW_MACHINE(2_4, "2.4", false); - #endif - -+ - static void ccw_machine_rhel900_instance_options(MachineState *machine) - { - } - - static void ccw_machine_rhel900_class_options(MachineClass *mc) - { -+ compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); - } - DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); - -diff --git a/include/hw/boards.h b/include/hw/boards.h -index d1555665df..635e45dd71 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -451,6 +451,9 @@ extern const size_t hw_compat_2_2_len; - extern GlobalProperty hw_compat_2_1[]; - extern const size_t hw_compat_2_1_len; - -+extern GlobalProperty hw_compat_rhel_9_0[]; -+extern const size_t hw_compat_rhel_9_0_len; -+ - extern GlobalProperty hw_compat_rhel_8_6[]; - extern const size_t hw_compat_rhel_8_6_len; - -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 419a6ec24b..a492c420b5 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -292,6 +292,9 @@ extern const size_t pc_compat_1_4_len; - extern GlobalProperty pc_rhel_compat[]; - extern const size_t pc_rhel_compat_len; - -+extern GlobalProperty pc_rhel_9_0_compat[]; -+extern const size_t pc_rhel_9_0_compat_len; -+ - extern GlobalProperty pc_rhel_8_5_compat[]; - extern const size_t pc_rhel_8_5_compat_len; - --- -2.31.1 - diff --git a/SOURCES/0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch b/SOURCES/0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch new file mode 100644 index 0000000..2642b30 --- /dev/null +++ b/SOURCES/0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch @@ -0,0 +1,47 @@ +From 27c188c6a4cbd908269cf06affd24025708ecb5c Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Thu, 17 Nov 2022 16:47:16 +0100 +Subject: redhat: Update s390x machine type compatibility for QEMU 7.2.0 update + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2143585 +Upstream Status: n/a (rhel-only) + +Add the compatibility handling for the rebase from QEMU 7.1 to 7.2, +i.e. the settings from ccw_machine_7_1_class_options() and +ccw_machine_7_1_instance_options() to the rhel9.1.0 machine type +(earlier settings have been added by previous rebases already). + +Signed-off-by: Thomas Huth +--- + hw/s390x/s390-virtio-ccw.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index ba640e3d9e..97e868ada0 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1195,12 +1195,21 @@ static void ccw_machine_rhel900_instance_options(MachineState *machine) + static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 }; + + s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE); + } + + static void ccw_machine_rhel900_class_options(MachineClass *mc) + { ++ S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); ++ static GlobalProperty compat[] = { ++ { TYPE_S390_PCI_DEVICE, "interpret", "off", }, ++ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, ++ }; ++ ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); ++ s390mc->max_threads = S390_MAX_CPUS; + } + DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); + +-- +2.31.1 + diff --git a/SOURCES/0020-redhat-Update-s390x-machine-type-compatibility-for-r.patch b/SOURCES/0020-redhat-Update-s390x-machine-type-compatibility-for-r.patch deleted file mode 100644 index d3b91d0..0000000 --- a/SOURCES/0020-redhat-Update-s390x-machine-type-compatibility-for-r.patch +++ /dev/null @@ -1,38 +0,0 @@ -From c8ad21ca31892f8798cf82508c2b2c61bf3b9895 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Mon, 4 Apr 2022 12:15:50 +0200 -Subject: redhat: Update s390x machine type compatibility for rebase to QEMU - 7.0.0 - -RH-Author: Thomas Huth -RH-MergeRequest: 143: Update machine type compatibility for QEMU 7.0.0 update [s390x] -RH-Commit: [23/23] 0ecf97d7bdddc50565b5779c64744b353f715cbd -RH-Bugzilla: 2064782 -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -No s390x-specific machine class property updates required this time, -only an update to the default qemu cpu model. - -Signed-off-by: Thomas Huth ---- - hw/s390x/s390-virtio-ccw.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 08e0f6a79b..4a491d4988 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1121,6 +1121,9 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false); - - static void ccw_machine_rhel900_instance_options(MachineState *machine) - { -+ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 }; -+ -+ s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat); - } - - static void ccw_machine_rhel900_class_options(MachineClass *mc) --- -2.31.1 - diff --git a/SOURCES/0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch b/SOURCES/0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch new file mode 100644 index 0000000..cb69b93 --- /dev/null +++ b/SOURCES/0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch @@ -0,0 +1,43 @@ +From c1a21266d8bed27f1ef1f705818fde5f9350b73f Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Wed, 23 Nov 2022 14:15:37 +0100 +Subject: redhat: aarch64: add rhel9.2.0 virt machine type + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2131982 +Upstream: RHEL only + +Signed-off-by: Cornelia Huck +--- + hw/arm/virt.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index dfcab40a73..0a94f31dd1 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3518,14 +3518,21 @@ static void rhel_machine_init(void) + } + type_init(rhel_machine_init); + ++static void rhel920_virt_options(MachineClass *mc) ++{ ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) ++ + static void rhel900_virt_options(MachineClass *mc) + { + VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); + ++ rhel920_virt_options(mc); ++ + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); + + /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ + vmc->no_tcg_lpa2 = true; + } +-DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0) ++DEFINE_RHEL_MACHINE(9, 0, 0) +-- +2.31.1 + diff --git a/SOURCES/0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch b/SOURCES/0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch deleted file mode 100644 index f9535a8..0000000 --- a/SOURCES/0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 38b89dc24551258b630f09d1c654b6c72b265c79 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 14 Apr 2022 14:58:43 +0100 -Subject: pc: Move s3/s4 suspend disabling to compat - -RH-Author: Dr. David Alan Gilbert -RH-MergeRequest: 155: 7.0 machine type fixes (x86) -RH-Commit: [26/26] 7d666032d5f5dab1444ebba085f92f2de4e86699 -RH-Bugzilla: 2064771 - -Our downstream patches currently have tweaks in the C code to disable -s3/s4; Thomas pointed out we can just set the property. - -Signed-off-by: Dr. David Alan Gilbert ---- - hw/acpi/ich9.c | 4 ++-- - hw/acpi/piix4.c | 4 ++-- - hw/i386/pc.c | 6 ++++++ - 3 files changed, 10 insertions(+), 4 deletions(-) - -diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index de1e401cdf..bd9bbade70 100644 ---- a/hw/acpi/ich9.c -+++ b/hw/acpi/ich9.c -@@ -435,8 +435,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) - static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; - pm->acpi_memory_hotplug.is_enabled = true; - pm->cpu_hotplug_legacy = true; -- pm->disable_s3 = 1; -- pm->disable_s4 = 1; -+ pm->disable_s3 = 0; -+ pm->disable_s4 = 0; - pm->s4_val = 2; - pm->use_acpi_hotplug_bridge = true; - pm->keep_pci_slot_hpc = true; -diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 28544e78c3..2fb2b43248 100644 ---- a/hw/acpi/piix4.c -+++ b/hw/acpi/piix4.c -@@ -653,8 +653,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) - - static Property piix4_pm_properties[] = { - DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), -- DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 1), -- DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1), -+ DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0), -+ DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0), - DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), - DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState, - use_acpi_hotplug_bridge, true), -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 0886cfe3fe..f98f842f80 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -380,6 +380,12 @@ const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); - * machine type. - */ - GlobalProperty pc_rhel_compat[] = { -+ /* we don't support s3/s4 suspend */ -+ { "PIIX4_PM", "disable_s3", "1" }, -+ { "PIIX4_PM", "disable_s4", "1" }, -+ { "ICH9-LPC", "disable_s3", "1" }, -+ { "ICH9-LPC", "disable_s4", "1" }, -+ - { TYPE_X86_CPU, "host-phys-bits", "on" }, - { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, - { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, --- -2.31.1 - diff --git a/SOURCES/0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch b/SOURCES/0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch new file mode 100644 index 0000000..144bd92 --- /dev/null +++ b/SOURCES/0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch @@ -0,0 +1,62 @@ +From a932b8d4296066be01613ada84241b501488f99f Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Thu, 17 Nov 2022 17:03:24 +0100 +Subject: redhat: Add new rhel-9.2.0 s390x machine type + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2136473 +Upstream Status: n/a (rhel-only) + +RHEL 9.2 will be an EUS release - we want to have a new machine +type here to make sure that we have a spot where we can wire up +fixes later. + +Signed-off-by: Thomas Huth +--- + hw/s390x/s390-virtio-ccw.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 97e868ada0..aa142a1a4e 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1190,10 +1190,21 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false); + #endif + + ++static void ccw_machine_rhel920_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel920_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true); ++ + static void ccw_machine_rhel900_instance_options(MachineState *machine) + { + static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 }; + ++ ccw_machine_rhel920_instance_options(machine); ++ + s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat); + s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE); + } +@@ -1206,12 +1217,14 @@ static void ccw_machine_rhel900_class_options(MachineClass *mc) + { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, + }; + ++ ccw_machine_rhel920_class_options(mc); ++ + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); + s390mc->max_threads = S390_MAX_CPUS; + } +-DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); ++DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", false); + + static void ccw_machine_rhel860_instance_options(MachineState *machine) + { +-- +2.31.1 + diff --git a/SOURCES/0022-x86-rhel-9.2.0-machine-type.patch b/SOURCES/0022-x86-rhel-9.2.0-machine-type.patch new file mode 100644 index 0000000..8502b91 --- /dev/null +++ b/SOURCES/0022-x86-rhel-9.2.0-machine-type.patch @@ -0,0 +1,75 @@ +From f33ca8aed4744238230f1f2cc47df77aa4c9e0ac Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 17 Nov 2022 12:36:30 +0000 +Subject: x86: rhel 9.2.0 machine type + +Add a 9.2.0 x86 machine type, and fix up the compatibility +for 9.0.0 and older. + +pc_compat_7_1 and pc_compat_7_0 are both empty upstream so there's +nothing to do there. + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/i386/pc_piix.c | 1 + + hw/i386/pc_q35.c | 21 ++++++++++++++++++++- + 2 files changed, 21 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 173a1fd10b..fc06877344 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -944,6 +944,7 @@ static void pc_machine_rhel760_options(MachineClass *m) + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; + pcmc->legacy_no_rng_seed = true; ++ pcmc->enforce_amd_1tb_hole = false; + compat_props_add(m->compat_props, hw_compat_rhel_9_1, + hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 97c3630021..52cfe3bf45 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -692,6 +692,23 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + } + ++static void pc_q35_init_rhel920(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel920_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL"; ++ pcmc->smbios_stream_version = "9.2.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, ++ pc_q35_machine_rhel920_options); ++ + static void pc_q35_init_rhel900(MachineState *machine) + { + pc_q35_init(machine); +@@ -700,11 +717,13 @@ static void pc_q35_init_rhel900(MachineState *machine) + static void pc_q35_machine_rhel900_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +- pc_q35_machine_rhel_options(m); ++ pc_q35_machine_rhel920_options(m); + m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.0.0"; + pcmc->legacy_no_rng_seed = true; ++ pcmc->enforce_amd_1tb_hole = false; + compat_props_add(m->compat_props, hw_compat_rhel_9_1, + hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, +-- +2.31.1 + diff --git a/SOURCES/kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch b/SOURCES/kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch deleted file mode 100644 index 30c28f7..0000000 --- a/SOURCES/kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch +++ /dev/null @@ -1,87 +0,0 @@ -From ac346634c5731407baa9de709dbd4d5cc6f45301 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Mon, 11 Jul 2022 18:11:12 -0300 -Subject: [PATCH 02/11] Add dirty-sync-missed-zero-copy migration stat -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 111: zero-copy-send fixes & improvements -RH-Commit: [2/6] 115035fd0a4e4b9439c91fb0f5d1a2f9244ba369 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 2107466 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Thomas Huth -RH-Acked-by: Dr. David Alan Gilbert - -Signed-off-by: Leonardo Bras -Acked-by: Markus Armbruster -Acked-by: Peter Xu -Reviewed-by: Daniel P. Berrangé -Message-Id: <20220711211112.18951-3-leobras@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit cf20c897338067ab4b70a4596fdccaf90c7e29a1) -Signed-off-by: Leonardo Bras ---- - migration/migration.c | 2 ++ - monitor/hmp-cmds.c | 5 +++++ - qapi/migration.json | 7 ++++++- - 3 files changed, 13 insertions(+), 1 deletion(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 8fb3eae910..3a3a7a4a50 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1017,6 +1017,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->normal_bytes = ram_counters.normal * page_size; - info->ram->mbps = s->mbps; - info->ram->dirty_sync_count = ram_counters.dirty_sync_count; -+ info->ram->dirty_sync_missed_zero_copy = -+ ram_counters.dirty_sync_missed_zero_copy; - info->ram->postcopy_requests = ram_counters.postcopy_requests; - info->ram->page_size = page_size; - info->ram->multifd_bytes = ram_counters.multifd_bytes; -diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c -index 634968498b..9cec01de38 100644 ---- a/monitor/hmp-cmds.c -+++ b/monitor/hmp-cmds.c -@@ -305,6 +305,11 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) - monitor_printf(mon, "postcopy ram: %" PRIu64 " kbytes\n", - info->ram->postcopy_bytes >> 10); - } -+ if (info->ram->dirty_sync_missed_zero_copy) { -+ monitor_printf(mon, -+ "Zero-copy-send fallbacks happened: %" PRIu64 " times\n", -+ info->ram->dirty_sync_missed_zero_copy); -+ } - } - - if (info->has_disk) { -diff --git a/qapi/migration.json b/qapi/migration.json -index 5105790cd0..9b38b3c21c 100644 ---- a/qapi/migration.json -+++ b/qapi/migration.json -@@ -55,6 +55,10 @@ - # @postcopy-bytes: The number of bytes sent during the post-copy phase - # (since 7.0). - # -+# @dirty-sync-missed-zero-copy: Number of times dirty RAM synchronization could -+# not avoid copying dirty pages. This is between -+# 0 and @dirty-sync-count * @multifd-channels. -+# (since 7.1) - # Since: 0.14 - ## - { 'struct': 'MigrationStats', -@@ -65,7 +69,8 @@ - 'postcopy-requests' : 'int', 'page-size' : 'int', - 'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64', - 'precopy-bytes' : 'uint64', 'downtime-bytes' : 'uint64', -- 'postcopy-bytes' : 'uint64' } } -+ 'postcopy-bytes' : 'uint64', -+ 'dirty-sync-missed-zero-copy' : 'uint64' } } - - ## - # @XBZRLECacheStats: --- -2.31.1 - diff --git a/SOURCES/kvm-Enable-virtio-iommu-pci-on-aarch64.patch b/SOURCES/kvm-Enable-virtio-iommu-pci-on-aarch64.patch deleted file mode 100644 index 3aafd3c..0000000 --- a/SOURCES/kvm-Enable-virtio-iommu-pci-on-aarch64.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 85781b8745fa1581a66f64011d61a4f0c4e103dc Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Fri, 6 May 2022 17:03:11 +0200 -Subject: [PATCH 3/5] Enable virtio-iommu-pci on aarch64 - -RH-Author: Eric Auger -RH-MergeRequest: 83: Enable virtio-iommu-pci on aarch64 -RH-Commit: [1/1] 23e5c0832e52c66adf5fd6daccdc3edddc7ecb8b (eauger1/centos-qemu-kvm) -RH-Bugzilla: 1477099 -RH-Acked-by: Gavin Shan -RH-Acked-by: Andrew Jones -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1477099 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45128798 -Upstream Status: RHEL-only -Tested: With virtio-net-pci and virtio-block-pci - -let's enable the virtio-iommu-pci device on aarch64 by -turning CONFIG_VIRTIO_IOMMU on. - -Signed-off-by: Eric Auger ---- - configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -index 187938573f..1618d31b89 100644 ---- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -+++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -@@ -23,6 +23,7 @@ CONFIG_VFIO_PCI=y - CONFIG_VIRTIO_MMIO=y - CONFIG_VIRTIO_PCI=y - CONFIG_VIRTIO_MEM=y -+CONFIG_VIRTIO_IOMMU=y - CONFIG_XIO3130=y - CONFIG_NVDIMM=y - CONFIG_ACPI_APEI=y --- -2.31.1 - diff --git a/SOURCES/kvm-Enable-virtio-iommu-pci-on-x86_64.patch b/SOURCES/kvm-Enable-virtio-iommu-pci-on-x86_64.patch deleted file mode 100644 index 2eb24df..0000000 --- a/SOURCES/kvm-Enable-virtio-iommu-pci-on-x86_64.patch +++ /dev/null @@ -1,41 +0,0 @@ -From c531a39171201f8a1d063e6af752e5d629c1b4bf Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 9 Jun 2022 11:35:18 +0200 -Subject: [PATCH 4/6] Enable virtio-iommu-pci on x86_64 - -RH-Author: Eric Auger -RH-MergeRequest: 100: Enable virtio-iommu-pci on x86_64 -RH-Commit: [1/1] a164af477efc7cb9d3d76a0e644f198f7c9fb2b5 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2094252 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: MST -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094252 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45871185 -Upstream Status: RHEL-only -Tested: With virtio-net-pci and virtio-block-pci - -let's enable the virtio-iommu-pci device on x86_64 by -turning CONFIG_VIRTIO_IOMMU on. - -Signed-off-by: Eric Auger ---- - configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -index d0c9e66641..3850b9de72 100644 ---- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -@@ -90,6 +90,7 @@ CONFIG_VHOST_USER_BLK=y - CONFIG_VIRTIO_MEM=y - CONFIG_VIRTIO_PCI=y - CONFIG_VIRTIO_VGA=y -+CONFIG_VIRTIO_IOMMU=y - CONFIG_VMMOUSE=y - CONFIG_VMPORT=y - CONFIG_VTD=y --- -2.31.1 - diff --git a/SOURCES/kvm-Introduce-event-loop-base-abstract-class.patch b/SOURCES/kvm-Introduce-event-loop-base-abstract-class.patch deleted file mode 100644 index 9f987ea..0000000 --- a/SOURCES/kvm-Introduce-event-loop-base-abstract-class.patch +++ /dev/null @@ -1,503 +0,0 @@ -From 1163da281c178359dd7e1cf1ced5c98caa600f8e Mon Sep 17 00:00:00 2001 -From: Nicolas Saenz Julienne -Date: Mon, 25 Apr 2022 09:57:21 +0200 -Subject: [PATCH 01/16] Introduce event-loop-base abstract class - -RH-Author: Nicolas Saenz Julienne -RH-MergeRequest: 93: util/thread-pool: Expose minimum and maximum size -RH-Commit: [1/3] 5817205d8f56cc4aa98bd5963ecac54a59bad990 -RH-Bugzilla: 2031024 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi - -Introduce the 'event-loop-base' abstract class, it'll hold the -properties common to all event loops and provide the necessary hooks for -their creation and maintenance. Then have iothread inherit from it. - -EventLoopBaseClass is defined as user creatable and provides a hook for -its children to attach themselves to the user creatable class 'complete' -function. It also provides an update_params() callback to propagate -property changes onto its children. - -The new 'event-loop-base' class will live in the root directory. It is -built on its own using the 'link_whole' option (there are no direct -function dependencies between the class and its children, it all happens -trough 'constructor' magic). And also imposes new compilation -dependencies: - - qom <- event-loop-base <- blockdev (iothread.c) - -And in subsequent patches: - - qom <- event-loop-base <- qemuutil (util/main-loop.c) - -All this forced some amount of reordering in meson.build: - - - Moved qom build definition before qemuutil. Doing it the other way - around (i.e. moving qemuutil after qom) isn't possible as a lot of - core libraries that live in between the two depend on it. - - - Process the 'hw' subdir earlier, as it introduces files into the - 'qom' source set. - -No functional changes intended. - -Signed-off-by: Nicolas Saenz Julienne -Reviewed-by: Stefan Hajnoczi -Acked-by: Markus Armbruster -Message-id: 20220425075723.20019-2-nsaenzju@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 7d5983e3c8c40b1d0668faba31d79905c4fadd7d) ---- - event-loop-base.c | 104 +++++++++++++++++++++++++++++++ - include/sysemu/event-loop-base.h | 36 +++++++++++ - include/sysemu/iothread.h | 6 +- - iothread.c | 65 ++++++------------- - meson.build | 23 ++++--- - qapi/qom.json | 22 +++++-- - 6 files changed, 192 insertions(+), 64 deletions(-) - create mode 100644 event-loop-base.c - create mode 100644 include/sysemu/event-loop-base.h - -diff --git a/event-loop-base.c b/event-loop-base.c -new file mode 100644 -index 0000000000..a924c73a7c ---- /dev/null -+++ b/event-loop-base.c -@@ -0,0 +1,104 @@ -+/* -+ * QEMU event-loop base -+ * -+ * Copyright (C) 2022 Red Hat Inc -+ * -+ * Authors: -+ * Stefan Hajnoczi -+ * Nicolas Saenz Julienne -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+ -+#include "qemu/osdep.h" -+#include "qom/object_interfaces.h" -+#include "qapi/error.h" -+#include "sysemu/event-loop-base.h" -+ -+typedef struct { -+ const char *name; -+ ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */ -+} EventLoopBaseParamInfo; -+ -+static EventLoopBaseParamInfo aio_max_batch_info = { -+ "aio-max-batch", offsetof(EventLoopBase, aio_max_batch), -+}; -+ -+static void event_loop_base_get_param(Object *obj, Visitor *v, -+ const char *name, void *opaque, Error **errp) -+{ -+ EventLoopBase *event_loop_base = EVENT_LOOP_BASE(obj); -+ EventLoopBaseParamInfo *info = opaque; -+ int64_t *field = (void *)event_loop_base + info->offset; -+ -+ visit_type_int64(v, name, field, errp); -+} -+ -+static void event_loop_base_set_param(Object *obj, Visitor *v, -+ const char *name, void *opaque, Error **errp) -+{ -+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(obj); -+ EventLoopBase *base = EVENT_LOOP_BASE(obj); -+ EventLoopBaseParamInfo *info = opaque; -+ int64_t *field = (void *)base + info->offset; -+ int64_t value; -+ -+ if (!visit_type_int64(v, name, &value, errp)) { -+ return; -+ } -+ -+ if (value < 0) { -+ error_setg(errp, "%s value must be in range [0, %" PRId64 "]", -+ info->name, INT64_MAX); -+ return; -+ } -+ -+ *field = value; -+ -+ if (bc->update_params) { -+ bc->update_params(base, errp); -+ } -+ -+ return; -+} -+ -+static void event_loop_base_complete(UserCreatable *uc, Error **errp) -+{ -+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc); -+ EventLoopBase *base = EVENT_LOOP_BASE(uc); -+ -+ if (bc->init) { -+ bc->init(base, errp); -+ } -+} -+ -+static void event_loop_base_class_init(ObjectClass *klass, void *class_data) -+{ -+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); -+ ucc->complete = event_loop_base_complete; -+ -+ object_class_property_add(klass, "aio-max-batch", "int", -+ event_loop_base_get_param, -+ event_loop_base_set_param, -+ NULL, &aio_max_batch_info); -+} -+ -+static const TypeInfo event_loop_base_info = { -+ .name = TYPE_EVENT_LOOP_BASE, -+ .parent = TYPE_OBJECT, -+ .instance_size = sizeof(EventLoopBase), -+ .class_size = sizeof(EventLoopBaseClass), -+ .class_init = event_loop_base_class_init, -+ .abstract = true, -+ .interfaces = (InterfaceInfo[]) { -+ { TYPE_USER_CREATABLE }, -+ { } -+ } -+}; -+ -+static void register_types(void) -+{ -+ type_register_static(&event_loop_base_info); -+} -+type_init(register_types); -diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h -new file mode 100644 -index 0000000000..8e77d8b69f ---- /dev/null -+++ b/include/sysemu/event-loop-base.h -@@ -0,0 +1,36 @@ -+/* -+ * QEMU event-loop backend -+ * -+ * Copyright (C) 2022 Red Hat Inc -+ * -+ * Authors: -+ * Nicolas Saenz Julienne -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+#ifndef QEMU_EVENT_LOOP_BASE_H -+#define QEMU_EVENT_LOOP_BASE_H -+ -+#include "qom/object.h" -+#include "block/aio.h" -+#include "qemu/typedefs.h" -+ -+#define TYPE_EVENT_LOOP_BASE "event-loop-base" -+OBJECT_DECLARE_TYPE(EventLoopBase, EventLoopBaseClass, -+ EVENT_LOOP_BASE) -+ -+struct EventLoopBaseClass { -+ ObjectClass parent_class; -+ -+ void (*init)(EventLoopBase *base, Error **errp); -+ void (*update_params)(EventLoopBase *base, Error **errp); -+}; -+ -+struct EventLoopBase { -+ Object parent; -+ -+ /* AioContext AIO engine parameters */ -+ int64_t aio_max_batch; -+}; -+#endif -diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h -index 7f714bd136..8f8601d6ab 100644 ---- a/include/sysemu/iothread.h -+++ b/include/sysemu/iothread.h -@@ -17,11 +17,12 @@ - #include "block/aio.h" - #include "qemu/thread.h" - #include "qom/object.h" -+#include "sysemu/event-loop-base.h" - - #define TYPE_IOTHREAD "iothread" - - struct IOThread { -- Object parent_obj; -+ EventLoopBase parent_obj; - - QemuThread thread; - AioContext *ctx; -@@ -37,9 +38,6 @@ struct IOThread { - int64_t poll_max_ns; - int64_t poll_grow; - int64_t poll_shrink; -- -- /* AioContext AIO engine parameters */ -- int64_t aio_max_batch; - }; - typedef struct IOThread IOThread; - -diff --git a/iothread.c b/iothread.c -index 0f98af0f2a..8fa2f3bfb8 100644 ---- a/iothread.c -+++ b/iothread.c -@@ -17,6 +17,7 @@ - #include "qemu/module.h" - #include "block/aio.h" - #include "block/block.h" -+#include "sysemu/event-loop-base.h" - #include "sysemu/iothread.h" - #include "qapi/error.h" - #include "qapi/qapi-commands-misc.h" -@@ -152,10 +153,15 @@ static void iothread_init_gcontext(IOThread *iothread) - iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE); - } - --static void iothread_set_aio_context_params(IOThread *iothread, Error **errp) -+static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp) - { -+ IOThread *iothread = IOTHREAD(base); - ERRP_GUARD(); - -+ if (!iothread->ctx) { -+ return; -+ } -+ - aio_context_set_poll_params(iothread->ctx, - iothread->poll_max_ns, - iothread->poll_grow, -@@ -166,14 +172,15 @@ static void iothread_set_aio_context_params(IOThread *iothread, Error **errp) - } - - aio_context_set_aio_params(iothread->ctx, -- iothread->aio_max_batch, -+ iothread->parent_obj.aio_max_batch, - errp); - } - --static void iothread_complete(UserCreatable *obj, Error **errp) -+ -+static void iothread_init(EventLoopBase *base, Error **errp) - { - Error *local_error = NULL; -- IOThread *iothread = IOTHREAD(obj); -+ IOThread *iothread = IOTHREAD(base); - char *thread_name; - - iothread->stopping = false; -@@ -189,7 +196,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp) - */ - iothread_init_gcontext(iothread); - -- iothread_set_aio_context_params(iothread, &local_error); -+ iothread_set_aio_context_params(base, &local_error); - if (local_error) { - error_propagate(errp, local_error); - aio_context_unref(iothread->ctx); -@@ -201,7 +208,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp) - * to inherit. - */ - thread_name = g_strdup_printf("IO %s", -- object_get_canonical_path_component(OBJECT(obj))); -+ object_get_canonical_path_component(OBJECT(base))); - qemu_thread_create(&iothread->thread, thread_name, iothread_run, - iothread, QEMU_THREAD_JOINABLE); - g_free(thread_name); -@@ -226,9 +233,6 @@ static IOThreadParamInfo poll_grow_info = { - static IOThreadParamInfo poll_shrink_info = { - "poll-shrink", offsetof(IOThread, poll_shrink), - }; --static IOThreadParamInfo aio_max_batch_info = { -- "aio-max-batch", offsetof(IOThread, aio_max_batch), --}; - - static void iothread_get_param(Object *obj, Visitor *v, - const char *name, IOThreadParamInfo *info, Error **errp) -@@ -288,35 +292,12 @@ static void iothread_set_poll_param(Object *obj, Visitor *v, - } - } - --static void iothread_get_aio_param(Object *obj, Visitor *v, -- const char *name, void *opaque, Error **errp) --{ -- IOThreadParamInfo *info = opaque; -- -- iothread_get_param(obj, v, name, info, errp); --} -- --static void iothread_set_aio_param(Object *obj, Visitor *v, -- const char *name, void *opaque, Error **errp) --{ -- IOThread *iothread = IOTHREAD(obj); -- IOThreadParamInfo *info = opaque; -- -- if (!iothread_set_param(obj, v, name, info, errp)) { -- return; -- } -- -- if (iothread->ctx) { -- aio_context_set_aio_params(iothread->ctx, -- iothread->aio_max_batch, -- errp); -- } --} -- - static void iothread_class_init(ObjectClass *klass, void *class_data) - { -- UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); -- ucc->complete = iothread_complete; -+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(klass); -+ -+ bc->init = iothread_init; -+ bc->update_params = iothread_set_aio_context_params; - - object_class_property_add(klass, "poll-max-ns", "int", - iothread_get_poll_param, -@@ -330,23 +311,15 @@ static void iothread_class_init(ObjectClass *klass, void *class_data) - iothread_get_poll_param, - iothread_set_poll_param, - NULL, &poll_shrink_info); -- object_class_property_add(klass, "aio-max-batch", "int", -- iothread_get_aio_param, -- iothread_set_aio_param, -- NULL, &aio_max_batch_info); - } - - static const TypeInfo iothread_info = { - .name = TYPE_IOTHREAD, -- .parent = TYPE_OBJECT, -+ .parent = TYPE_EVENT_LOOP_BASE, - .class_init = iothread_class_init, - .instance_size = sizeof(IOThread), - .instance_init = iothread_instance_init, - .instance_finalize = iothread_instance_finalize, -- .interfaces = (InterfaceInfo[]) { -- {TYPE_USER_CREATABLE}, -- {} -- }, - }; - - static void iothread_register_types(void) -@@ -383,7 +356,7 @@ static int query_one_iothread(Object *object, void *opaque) - info->poll_max_ns = iothread->poll_max_ns; - info->poll_grow = iothread->poll_grow; - info->poll_shrink = iothread->poll_shrink; -- info->aio_max_batch = iothread->aio_max_batch; -+ info->aio_max_batch = iothread->parent_obj.aio_max_batch; - - QAPI_LIST_APPEND(*tail, info); - return 0; -diff --git a/meson.build b/meson.build -index 6f7e430f0f..b9c919a55e 100644 ---- a/meson.build -+++ b/meson.build -@@ -2804,6 +2804,7 @@ subdir('qom') - subdir('authz') - subdir('crypto') - subdir('ui') -+subdir('hw') - - - if enable_modules -@@ -2811,6 +2812,18 @@ if enable_modules - modulecommon = declare_dependency(link_whole: libmodulecommon, compile_args: '-DBUILD_DSO') - endif - -+qom_ss = qom_ss.apply(config_host, strict: false) -+libqom = static_library('qom', qom_ss.sources() + genh, -+ dependencies: [qom_ss.dependencies()], -+ name_suffix: 'fa') -+qom = declare_dependency(link_whole: libqom) -+ -+event_loop_base = files('event-loop-base.c') -+event_loop_base = static_library('event-loop-base', sources: event_loop_base + genh, -+ build_by_default: true) -+event_loop_base = declare_dependency(link_whole: event_loop_base, -+ dependencies: [qom]) -+ - stub_ss = stub_ss.apply(config_all, strict: false) - - util_ss.add_all(trace_ss) -@@ -2897,7 +2910,6 @@ subdir('monitor') - subdir('net') - subdir('replay') - subdir('semihosting') --subdir('hw') - subdir('tcg') - subdir('fpu') - subdir('accel') -@@ -3022,13 +3034,6 @@ qemu_syms = custom_target('qemu.syms', output: 'qemu.syms', - capture: true, - command: [undefsym, nm, '@INPUT@']) - --qom_ss = qom_ss.apply(config_host, strict: false) --libqom = static_library('qom', qom_ss.sources() + genh, -- dependencies: [qom_ss.dependencies()], -- name_suffix: 'fa') -- --qom = declare_dependency(link_whole: libqom) -- - authz_ss = authz_ss.apply(config_host, strict: false) - libauthz = static_library('authz', authz_ss.sources() + genh, - dependencies: [authz_ss.dependencies()], -@@ -3081,7 +3086,7 @@ libblockdev = static_library('blockdev', blockdev_ss.sources() + genh, - build_by_default: false) - - blockdev = declare_dependency(link_whole: [libblockdev], -- dependencies: [block]) -+ dependencies: [block, event_loop_base]) - - qmp_ss = qmp_ss.apply(config_host, strict: false) - libqmp = static_library('qmp', qmp_ss.sources() + genh, -diff --git a/qapi/qom.json b/qapi/qom.json -index eeb5395ff3..a2439533c5 100644 ---- a/qapi/qom.json -+++ b/qapi/qom.json -@@ -499,6 +499,20 @@ - '*repeat': 'bool', - '*grab-toggle': 'GrabToggleKeys' } } - -+## -+# @EventLoopBaseProperties: -+# -+# Common properties for event loops -+# -+# @aio-max-batch: maximum number of requests in a batch for the AIO engine, -+# 0 means that the engine will use its default. -+# (default: 0) -+# -+# Since: 7.1 -+## -+{ 'struct': 'EventLoopBaseProperties', -+ 'data': { '*aio-max-batch': 'int' } } -+ - ## - # @IothreadProperties: - # -@@ -516,17 +530,15 @@ - # algorithm detects it is spending too long polling without - # encountering events. 0 selects a default behaviour (default: 0) - # --# @aio-max-batch: maximum number of requests in a batch for the AIO engine, --# 0 means that the engine will use its default --# (default:0, since 6.1) -+# The @aio-max-batch option is available since 6.1. - # - # Since: 2.0 - ## - { 'struct': 'IothreadProperties', -+ 'base': 'EventLoopBaseProperties', - 'data': { '*poll-max-ns': 'int', - '*poll-grow': 'int', -- '*poll-shrink': 'int', -- '*aio-max-batch': 'int' } } -+ '*poll-shrink': 'int' } } - - ## - # @MemoryBackendProperties: --- -2.31.1 - diff --git a/SOURCES/kvm-KVM-keep-track-of-running-ioctls.patch b/SOURCES/kvm-KVM-keep-track-of-running-ioctls.patch new file mode 100644 index 0000000..b7aba7e --- /dev/null +++ b/SOURCES/kvm-KVM-keep-track-of-running-ioctls.patch @@ -0,0 +1,82 @@ +From 6aebc271d95f9c59cb63c923b6ce35f16fce10e4 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Mon, 16 Jan 2023 07:17:23 -0500 +Subject: [PATCH 30/31] KVM: keep track of running ioctls + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 138: accel: introduce accelerator blocker API +RH-Bugzilla: 1979276 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/3] f566f81dda26ae733008f32261ecd1253ec1796d (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276 + +commit a27dd2de68f37ba96fe164a42121daa5f0750afc +Author: Emanuele Giuseppe Esposito +Date: Fri Nov 11 10:47:57 2022 -0500 + + KVM: keep track of running ioctls + + Using the new accel-blocker API, mark where ioctls are being called + in KVM. Next, we will implement the critical section that will take + care of performing memslots modifications atomically, therefore + preventing any new ioctl from running and allowing the running ones + to finish. + + Signed-off-by: David Hildenbrand + Signed-off-by: Emanuele Giuseppe Esposito + Message-Id: <20221111154758.1372674-3-eesposit@redhat.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + accel/kvm/kvm-all.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index f99b0becd8..ff660fd469 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2310,6 +2310,7 @@ static int kvm_init(MachineState *ms) + assert(TARGET_PAGE_SIZE <= qemu_real_host_page_size()); + + s->sigmask_len = 8; ++ accel_blocker_init(); + + #ifdef KVM_CAP_SET_GUEST_DEBUG + QTAILQ_INIT(&s->kvm_sw_breakpoints); +@@ -3014,7 +3015,9 @@ int kvm_vm_ioctl(KVMState *s, int type, ...) + va_end(ap); + + trace_kvm_vm_ioctl(type, arg); ++ accel_ioctl_begin(); + ret = ioctl(s->vmfd, type, arg); ++ accel_ioctl_end(); + if (ret == -1) { + ret = -errno; + } +@@ -3032,7 +3035,9 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type, ...) + va_end(ap); + + trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg); ++ accel_cpu_ioctl_begin(cpu); + ret = ioctl(cpu->kvm_fd, type, arg); ++ accel_cpu_ioctl_end(cpu); + if (ret == -1) { + ret = -errno; + } +@@ -3050,7 +3055,9 @@ int kvm_device_ioctl(int fd, int type, ...) + va_end(ap); + + trace_kvm_device_ioctl(fd, type, arg); ++ accel_ioctl_begin(); + ret = ioctl(fd, type, arg); ++ accel_ioctl_end(); + if (ret == -1) { + ret = -errno; + } +-- +2.31.1 + diff --git a/SOURCES/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch b/SOURCES/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch deleted file mode 100644 index c7b8898..0000000 --- a/SOURCES/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch +++ /dev/null @@ -1,420 +0,0 @@ -From cda3fcf14f2883fea633e25256f6c14a71271adf Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:31 -0300 -Subject: [PATCH 08/18] QIOChannel: Add flags on io_writev and introduce - io_flush callback -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [2/11] 06acfb6b0cb2c25733c2eb198011f7623b5a7024 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu - -Add flags to io_writev and introduce io_flush as optional callback to -QIOChannelClass, allowing the implementation of zero copy writes by -subclasses. - -How to use them: -- Write data using qio_channel_writev*(...,QIO_CHANNEL_WRITE_FLAG_ZERO_COPY), -- Wait write completion with qio_channel_flush(). - -Notes: -As some zero copy write implementations work asynchronously, it's -recommended to keep the write buffer untouched until the return of -qio_channel_flush(), to avoid the risk of sending an updated buffer -instead of the buffer state during write. - -As io_flush callback is optional, if a subclass does not implement it, then: -- io_flush will return 0 without changing anything. - -Also, some functions like qio_channel_writev_full_all() were adapted to -receive a flag parameter. That allows shared code between zero copy and -non-zero copy writev, and also an easier implementation on new flags. - -Signed-off-by: Leonardo Bras -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Peter Xu -Reviewed-by: Juan Quintela -Message-Id: <20220513062836.965425-3-leobras@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit b88651cb4d4fa416fdbb6afaf5b26ec8c035eaad) -Signed-off-by: Leonardo Bras ---- - chardev/char-io.c | 2 +- - hw/remote/mpqemu-link.c | 2 +- - include/io/channel.h | 38 +++++++++++++++++++++- - io/channel-buffer.c | 1 + - io/channel-command.c | 1 + - io/channel-file.c | 1 + - io/channel-socket.c | 2 ++ - io/channel-tls.c | 1 + - io/channel-websock.c | 1 + - io/channel.c | 49 +++++++++++++++++++++++------ - migration/rdma.c | 1 + - scsi/pr-manager-helper.c | 2 +- - tests/unit/test-io-channel-socket.c | 1 + - 13 files changed, 88 insertions(+), 14 deletions(-) - -diff --git a/chardev/char-io.c b/chardev/char-io.c -index 8ced184160..4451128cba 100644 ---- a/chardev/char-io.c -+++ b/chardev/char-io.c -@@ -122,7 +122,7 @@ int io_channel_send_full(QIOChannel *ioc, - - ret = qio_channel_writev_full( - ioc, &iov, 1, -- fds, nfds, NULL); -+ fds, nfds, 0, NULL); - if (ret == QIO_CHANNEL_ERR_BLOCK) { - if (offset) { - return offset; -diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c -index 7e841820e5..e8f556bd27 100644 ---- a/hw/remote/mpqemu-link.c -+++ b/hw/remote/mpqemu-link.c -@@ -69,7 +69,7 @@ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp) - } - - if (!qio_channel_writev_full_all(ioc, send, G_N_ELEMENTS(send), -- fds, nfds, errp)) { -+ fds, nfds, 0, errp)) { - ret = true; - } else { - trace_mpqemu_send_io_error(msg->cmd, msg->size, nfds); -diff --git a/include/io/channel.h b/include/io/channel.h -index 88988979f8..c680ee7480 100644 ---- a/include/io/channel.h -+++ b/include/io/channel.h -@@ -32,12 +32,15 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass, - - #define QIO_CHANNEL_ERR_BLOCK -2 - -+#define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1 -+ - typedef enum QIOChannelFeature QIOChannelFeature; - - enum QIOChannelFeature { - QIO_CHANNEL_FEATURE_FD_PASS, - QIO_CHANNEL_FEATURE_SHUTDOWN, - QIO_CHANNEL_FEATURE_LISTEN, -+ QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY, - }; - - -@@ -104,6 +107,7 @@ struct QIOChannelClass { - size_t niov, - int *fds, - size_t nfds, -+ int flags, - Error **errp); - ssize_t (*io_readv)(QIOChannel *ioc, - const struct iovec *iov, -@@ -136,6 +140,8 @@ struct QIOChannelClass { - IOHandler *io_read, - IOHandler *io_write, - void *opaque); -+ int (*io_flush)(QIOChannel *ioc, -+ Error **errp); - }; - - /* General I/O handling functions */ -@@ -228,6 +234,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, - * @niov: the length of the @iov array - * @fds: an array of file handles to send - * @nfds: number of file handles in @fds -+ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*) - * @errp: pointer to a NULL-initialized error object - * - * Write data to the IO channel, reading it from the -@@ -260,6 +267,7 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc, - size_t niov, - int *fds, - size_t nfds, -+ int flags, - Error **errp); - - /** -@@ -837,6 +845,7 @@ int qio_channel_readv_full_all(QIOChannel *ioc, - * @niov: the length of the @iov array - * @fds: an array of file handles to send - * @nfds: number of file handles in @fds -+ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*) - * @errp: pointer to a NULL-initialized error object - * - * -@@ -846,6 +855,14 @@ int qio_channel_readv_full_all(QIOChannel *ioc, - * to be written, yielding from the current coroutine - * if required. - * -+ * If QIO_CHANNEL_WRITE_FLAG_ZERO_COPY is passed in flags, -+ * instead of waiting for all requested data to be written, -+ * this function will wait until it's all queued for writing. -+ * In this case, if the buffer gets changed between queueing and -+ * sending, the updated buffer will be sent. If this is not a -+ * desired behavior, it's suggested to call qio_channel_flush() -+ * before reusing the buffer. -+ * - * Returns: 0 if all bytes were written, or -1 on error - */ - -@@ -853,6 +870,25 @@ int qio_channel_writev_full_all(QIOChannel *ioc, - const struct iovec *iov, - size_t niov, - int *fds, size_t nfds, -- Error **errp); -+ int flags, Error **errp); -+ -+/** -+ * qio_channel_flush: -+ * @ioc: the channel object -+ * @errp: pointer to a NULL-initialized error object -+ * -+ * Will block until every packet queued with -+ * qio_channel_writev_full() + QIO_CHANNEL_WRITE_FLAG_ZERO_COPY -+ * is sent, or return in case of any error. -+ * -+ * If not implemented, acts as a no-op, and returns 0. -+ * -+ * Returns -1 if any error is found, -+ * 1 if every send failed to use zero copy. -+ * 0 otherwise. -+ */ -+ -+int qio_channel_flush(QIOChannel *ioc, -+ Error **errp); - - #endif /* QIO_CHANNEL_H */ -diff --git a/io/channel-buffer.c b/io/channel-buffer.c -index baa4e2b089..bf52011be2 100644 ---- a/io/channel-buffer.c -+++ b/io/channel-buffer.c -@@ -81,6 +81,7 @@ static ssize_t qio_channel_buffer_writev(QIOChannel *ioc, - size_t niov, - int *fds, - size_t nfds, -+ int flags, - Error **errp) - { - QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); -diff --git a/io/channel-command.c b/io/channel-command.c -index 338da73ade..54560464ae 100644 ---- a/io/channel-command.c -+++ b/io/channel-command.c -@@ -258,6 +258,7 @@ static ssize_t qio_channel_command_writev(QIOChannel *ioc, - size_t niov, - int *fds, - size_t nfds, -+ int flags, - Error **errp) - { - QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); -diff --git a/io/channel-file.c b/io/channel-file.c -index d7cf6d278f..ef6807a6be 100644 ---- a/io/channel-file.c -+++ b/io/channel-file.c -@@ -114,6 +114,7 @@ static ssize_t qio_channel_file_writev(QIOChannel *ioc, - size_t niov, - int *fds, - size_t nfds, -+ int flags, - Error **errp) - { - QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); -diff --git a/io/channel-socket.c b/io/channel-socket.c -index 7a8d9f69c9..a1be2197ca 100644 ---- a/io/channel-socket.c -+++ b/io/channel-socket.c -@@ -525,6 +525,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, - size_t niov, - int *fds, - size_t nfds, -+ int flags, - Error **errp) - { - QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); -@@ -620,6 +621,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, - size_t niov, - int *fds, - size_t nfds, -+ int flags, - Error **errp) - { - QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); -diff --git a/io/channel-tls.c b/io/channel-tls.c -index 2ae1b92fc0..4ce890a538 100644 ---- a/io/channel-tls.c -+++ b/io/channel-tls.c -@@ -301,6 +301,7 @@ static ssize_t qio_channel_tls_writev(QIOChannel *ioc, - size_t niov, - int *fds, - size_t nfds, -+ int flags, - Error **errp) - { - QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); -diff --git a/io/channel-websock.c b/io/channel-websock.c -index 55145a6a8c..9619906ac3 100644 ---- a/io/channel-websock.c -+++ b/io/channel-websock.c -@@ -1127,6 +1127,7 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc, - size_t niov, - int *fds, - size_t nfds, -+ int flags, - Error **errp) - { - QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); -diff --git a/io/channel.c b/io/channel.c -index e8b019dc36..0640941ac5 100644 ---- a/io/channel.c -+++ b/io/channel.c -@@ -72,18 +72,32 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc, - size_t niov, - int *fds, - size_t nfds, -+ int flags, - Error **errp) - { - QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); - -- if ((fds || nfds) && -- !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { -+ if (fds || nfds) { -+ if (!qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { -+ error_setg_errno(errp, EINVAL, -+ "Channel does not support file descriptor passing"); -+ return -1; -+ } -+ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { -+ error_setg_errno(errp, EINVAL, -+ "Zero Copy does not support file descriptor passing"); -+ return -1; -+ } -+ } -+ -+ if ((flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) && -+ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { - error_setg_errno(errp, EINVAL, -- "Channel does not support file descriptor passing"); -+ "Requested Zero Copy feature is not available"); - return -1; - } - -- return klass->io_writev(ioc, iov, niov, fds, nfds, errp); -+ return klass->io_writev(ioc, iov, niov, fds, nfds, flags, errp); - } - - -@@ -217,14 +231,14 @@ int qio_channel_writev_all(QIOChannel *ioc, - size_t niov, - Error **errp) - { -- return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, errp); -+ return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, 0, errp); - } - - int qio_channel_writev_full_all(QIOChannel *ioc, - const struct iovec *iov, - size_t niov, - int *fds, size_t nfds, -- Error **errp) -+ int flags, Error **errp) - { - int ret = -1; - struct iovec *local_iov = g_new(struct iovec, niov); -@@ -237,8 +251,10 @@ int qio_channel_writev_full_all(QIOChannel *ioc, - - while (nlocal_iov > 0) { - ssize_t len; -- len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, nfds, -- errp); -+ -+ len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, -+ nfds, flags, errp); -+ - if (len == QIO_CHANNEL_ERR_BLOCK) { - if (qemu_in_coroutine()) { - qio_channel_yield(ioc, G_IO_OUT); -@@ -277,7 +293,7 @@ ssize_t qio_channel_writev(QIOChannel *ioc, - size_t niov, - Error **errp) - { -- return qio_channel_writev_full(ioc, iov, niov, NULL, 0, errp); -+ return qio_channel_writev_full(ioc, iov, niov, NULL, 0, 0, errp); - } - - -@@ -297,7 +313,7 @@ ssize_t qio_channel_write(QIOChannel *ioc, - Error **errp) - { - struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen }; -- return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, errp); -+ return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, 0, errp); - } - - -@@ -473,6 +489,19 @@ off_t qio_channel_io_seek(QIOChannel *ioc, - return klass->io_seek(ioc, offset, whence, errp); - } - -+int qio_channel_flush(QIOChannel *ioc, -+ Error **errp) -+{ -+ QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); -+ -+ if (!klass->io_flush || -+ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { -+ return 0; -+ } -+ -+ return klass->io_flush(ioc, errp); -+} -+ - - static void qio_channel_restart_read(void *opaque) - { -diff --git a/migration/rdma.c b/migration/rdma.c -index ef1e65ec36..672d1958a9 100644 ---- a/migration/rdma.c -+++ b/migration/rdma.c -@@ -2840,6 +2840,7 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, - size_t niov, - int *fds, - size_t nfds, -+ int flags, - Error **errp) - { - QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); -diff --git a/scsi/pr-manager-helper.c b/scsi/pr-manager-helper.c -index 451c7631b7..3be52a98d5 100644 ---- a/scsi/pr-manager-helper.c -+++ b/scsi/pr-manager-helper.c -@@ -77,7 +77,7 @@ static int pr_manager_helper_write(PRManagerHelper *pr_mgr, - iov.iov_base = (void *)buf; - iov.iov_len = sz; - n_written = qio_channel_writev_full(QIO_CHANNEL(pr_mgr->ioc), &iov, 1, -- nfds ? &fd : NULL, nfds, errp); -+ nfds ? &fd : NULL, nfds, 0, errp); - - if (n_written <= 0) { - assert(n_written != QIO_CHANNEL_ERR_BLOCK); -diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c -index c49eec1f03..6713886d02 100644 ---- a/tests/unit/test-io-channel-socket.c -+++ b/tests/unit/test-io-channel-socket.c -@@ -444,6 +444,7 @@ static void test_io_channel_unix_fd_pass(void) - G_N_ELEMENTS(iosend), - fdsend, - G_N_ELEMENTS(fdsend), -+ 0, - &error_abort); - - qio_channel_readv_full(dst, --- -2.35.3 - diff --git a/SOURCES/kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch b/SOURCES/kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch deleted file mode 100644 index 0fd4b6c..0000000 --- a/SOURCES/kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch +++ /dev/null @@ -1,56 +0,0 @@ -From cb6dc39a5e5d2d981b4b1e983042b3fbb529d5d1 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Thu, 4 Aug 2022 04:10:43 -0300 -Subject: [PATCH 06/11] QIOChannelSocket: Add support for MSG_ZEROCOPY + IPV6 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 111: zero-copy-send fixes & improvements -RH-Commit: [6/6] 2eb1aba8ebf267a6f67cfba2e489dc88619c7fd4 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 2107466 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Thomas Huth -RH-Acked-by: Dr. David Alan Gilbert - -For using MSG_ZEROCOPY, there are two steps: -1 - io_writev() the packet, which enqueues the packet for sending, and -2 - io_flush(), which gets confirmation that all packets got correctly sent - -Currently, if MSG_ZEROCOPY is used to send packets over IPV6, no error will -be reported in (1), but it will fail in the first time (2) happens. - -This happens because (2) currently checks for cmsg_level & cmsg_type -associated with IPV4 only, before reporting any error. - -Add checks for cmsg_level & cmsg_type associated with IPV6, and thus enable -support for MSG_ZEROCOPY + IPV6 - -Fixes: 2bc58ffc29 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX") -Signed-off-by: Leonardo Bras -Signed-off-by: Daniel P. Berrangé -(cherry picked from commit 5258a7e2c0677d16e9e1d06845f60171adf0b290) -Signed-off-by: Leonardo Bras ---- - io/channel-socket.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/io/channel-socket.c b/io/channel-socket.c -index eb7baa2184..efd5f60808 100644 ---- a/io/channel-socket.c -+++ b/io/channel-socket.c -@@ -747,8 +747,8 @@ static int qio_channel_socket_flush(QIOChannel *ioc, - } - - cm = CMSG_FIRSTHDR(&msg); -- if (cm->cmsg_level != SOL_IP && -- cm->cmsg_type != IP_RECVERR) { -+ if (cm->cmsg_level != SOL_IP && cm->cmsg_type != IP_RECVERR && -+ cm->cmsg_level != SOL_IPV6 && cm->cmsg_type != IPV6_RECVERR) { - error_setg_errno(errp, EPROTOTYPE, - "Wrong cmsg in errqueue"); - return -1; --- -2.31.1 - diff --git a/SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch b/SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch deleted file mode 100644 index b382a59..0000000 --- a/SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 678981c6bb7c964e1591f6f8aba49e9602f64852 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Mon, 11 Jul 2022 18:11:11 -0300 -Subject: [PATCH 01/11] QIOChannelSocket: Fix zero-copy flush returning code 1 - when nothing sent -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 111: zero-copy-send fixes & improvements -RH-Commit: [1/6] cebc887cb61de1572d8ae3232cde45e80c339404 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 2107466 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Thomas Huth -RH-Acked-by: Dr. David Alan Gilbert - -If flush is called when no buffer was sent with MSG_ZEROCOPY, it currently -returns 1. This return code should be used only when Linux fails to use -MSG_ZEROCOPY on a lot of sendmsg(). - -Fix this by returning early from flush if no sendmsg(...,MSG_ZEROCOPY) -was attempted. - -Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX") -Signed-off-by: Leonardo Bras -Reviewed-by: Daniel P. Berrangé -Acked-by: Daniel P. Berrangé -Reviewed-by: Juan Quintela -Reviewed-by: Peter Xu -Message-Id: <20220711211112.18951-2-leobras@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 927f93e099c4f9184e60a1bc61624ac2d04d0223) -Signed-off-by: Leonardo Bras ---- - io/channel-socket.c | 8 +++++++- - 1 file changed, 7 insertions(+), 1 deletion(-) - -diff --git a/io/channel-socket.c b/io/channel-socket.c -index 8ae8b212cf..eb7baa2184 100644 ---- a/io/channel-socket.c -+++ b/io/channel-socket.c -@@ -717,12 +717,18 @@ static int qio_channel_socket_flush(QIOChannel *ioc, - struct cmsghdr *cm; - char control[CMSG_SPACE(sizeof(*serr))]; - int received; -- int ret = 1; -+ int ret; -+ -+ if (sioc->zero_copy_queued == sioc->zero_copy_sent) { -+ return 0; -+ } - - msg.msg_control = control; - msg.msg_controllen = sizeof(control); - memset(control, 0, sizeof(control)); - -+ ret = 1; -+ - while (sioc->zero_copy_sent < sioc->zero_copy_queued) { - received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE); - if (received < 0) { --- -2.31.1 - diff --git a/SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch b/SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch deleted file mode 100644 index 9d134e6..0000000 --- a/SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch +++ /dev/null @@ -1,58 +0,0 @@ -From e70f01749addd7d0b7aa7fa4fdedb664f98e6b9b Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Mon, 20 Jun 2022 02:39:43 -0300 -Subject: [PATCH 16/18] QIOChannelSocket: Fix zero-copy send so socket flush - works -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [10/11] a2dfac987e24026b1a78e90b86234ca206b6401f (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu - -Somewhere between v6 and v7 the of the zero-copy-send patchset a crucial -part of the flushing mechanism got missing: incrementing zero_copy_queued. - -Without that, the flushing interface becomes a no-op, and there is no -guarantee the buffer is really sent. - -This can go as bad as causing a corruption in RAM during migration. - -Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX") -Reported-by: 徐闯 -Signed-off-by: Leonardo Bras -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Peter Xu -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 4f5a09714c983a3471fd12e3c7f3196e95c650c1) -Signed-off-by: Leonardo Bras ---- - io/channel-socket.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/io/channel-socket.c b/io/channel-socket.c -index 7490e5943d..8ae8b212cf 100644 ---- a/io/channel-socket.c -+++ b/io/channel-socket.c -@@ -612,6 +612,11 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, - "Unable to write to socket"); - return -1; - } -+ -+ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { -+ sioc->zero_copy_queued++; -+ } -+ - return ret; - } - #else /* WIN32 */ --- -2.35.3 - diff --git a/SOURCES/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch b/SOURCES/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch deleted file mode 100644 index 89aa806..0000000 --- a/SOURCES/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch +++ /dev/null @@ -1,249 +0,0 @@ -From 4aeba0365d30dabe2e70dc172683f0878a4a9621 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:32 -0300 -Subject: [PATCH 09/18] QIOChannelSocket: Implement io_writev zero copy flag & - io_flush for CONFIG_LINUX -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [3/11] 9afeac1f5ac7675624660a0281726c09c8321180 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu - -For CONFIG_LINUX, implement the new zero copy flag and the optional callback -io_flush on QIOChannelSocket, but enables it only when MSG_ZEROCOPY -feature is available in the host kernel, which is checked on -qio_channel_socket_connect_sync() - -qio_channel_socket_flush() was implemented by counting how many times -sendmsg(...,MSG_ZEROCOPY) was successfully called, and then reading the -socket's error queue, in order to find how many of them finished sending. -Flush will loop until those counters are the same, or until some error occurs. - -Notes on using writev() with QIO_CHANNEL_WRITE_FLAG_ZERO_COPY: -1: Buffer -- As MSG_ZEROCOPY tells the kernel to use the same user buffer to avoid copying, -some caution is necessary to avoid overwriting any buffer before it's sent. -If something like this happen, a newer version of the buffer may be sent instead. -- If this is a problem, it's recommended to call qio_channel_flush() before freeing -or re-using the buffer. - -2: Locked memory -- When using MSG_ZERCOCOPY, the buffer memory will be locked after queued, and -unlocked after it's sent. -- Depending on the size of each buffer, and how often it's sent, it may require -a larger amount of locked memory than usually available to non-root user. -- If the required amount of locked memory is not available, writev_zero_copy -will return an error, which can abort an operation like migration, -- Because of this, when an user code wants to add zero copy as a feature, it -requires a mechanism to disable it, so it can still be accessible to less -privileged users. - -Signed-off-by: Leonardo Bras -Reviewed-by: Peter Xu -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Juan Quintela -Message-Id: <20220513062836.965425-4-leobras@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 2bc58ffc2926a4efdd03edfb5909861fefc68c3d) -Signed-off-by: Leonardo Bras ---- - include/io/channel-socket.h | 2 + - io/channel-socket.c | 116 ++++++++++++++++++++++++++++++++++-- - 2 files changed, 114 insertions(+), 4 deletions(-) - -diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h -index e747e63514..513c428fe4 100644 ---- a/include/io/channel-socket.h -+++ b/include/io/channel-socket.h -@@ -47,6 +47,8 @@ struct QIOChannelSocket { - socklen_t localAddrLen; - struct sockaddr_storage remoteAddr; - socklen_t remoteAddrLen; -+ ssize_t zero_copy_queued; -+ ssize_t zero_copy_sent; - }; - - -diff --git a/io/channel-socket.c b/io/channel-socket.c -index a1be2197ca..fbd2214d20 100644 ---- a/io/channel-socket.c -+++ b/io/channel-socket.c -@@ -26,6 +26,14 @@ - #include "io/channel-watch.h" - #include "trace.h" - #include "qapi/clone-visitor.h" -+#ifdef CONFIG_LINUX -+#include -+#include -+ -+#if (defined(MSG_ZEROCOPY) && defined(SO_ZEROCOPY)) -+#define QEMU_MSG_ZEROCOPY -+#endif -+#endif - - #define SOCKET_MAX_FDS 16 - -@@ -55,6 +63,8 @@ qio_channel_socket_new(void) - - sioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET)); - sioc->fd = -1; -+ sioc->zero_copy_queued = 0; -+ sioc->zero_copy_sent = 0; - - ioc = QIO_CHANNEL(sioc); - qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); -@@ -154,6 +164,16 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc, - return -1; - } - -+#ifdef QEMU_MSG_ZEROCOPY -+ int ret, v = 1; -+ ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &v, sizeof(v)); -+ if (ret == 0) { -+ /* Zero copy available on host */ -+ qio_channel_set_feature(QIO_CHANNEL(ioc), -+ QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY); -+ } -+#endif -+ - return 0; - } - -@@ -534,6 +554,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, - char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)]; - size_t fdsize = sizeof(int) * nfds; - struct cmsghdr *cmsg; -+ int sflags = 0; - - memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)); - -@@ -558,15 +579,31 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, - memcpy(CMSG_DATA(cmsg), fds, fdsize); - } - -+#ifdef QEMU_MSG_ZEROCOPY -+ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { -+ sflags = MSG_ZEROCOPY; -+ } -+#endif -+ - retry: -- ret = sendmsg(sioc->fd, &msg, 0); -+ ret = sendmsg(sioc->fd, &msg, sflags); - if (ret <= 0) { -- if (errno == EAGAIN) { -+ switch (errno) { -+ case EAGAIN: - return QIO_CHANNEL_ERR_BLOCK; -- } -- if (errno == EINTR) { -+ case EINTR: - goto retry; -+#ifdef QEMU_MSG_ZEROCOPY -+ case ENOBUFS: -+ if (sflags & MSG_ZEROCOPY) { -+ error_setg_errno(errp, errno, -+ "Process can't lock enough memory for using MSG_ZEROCOPY"); -+ return -1; -+ } -+ break; -+#endif - } -+ - error_setg_errno(errp, errno, - "Unable to write to socket"); - return -1; -@@ -660,6 +697,74 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, - } - #endif /* WIN32 */ - -+ -+#ifdef QEMU_MSG_ZEROCOPY -+static int qio_channel_socket_flush(QIOChannel *ioc, -+ Error **errp) -+{ -+ QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); -+ struct msghdr msg = {}; -+ struct sock_extended_err *serr; -+ struct cmsghdr *cm; -+ char control[CMSG_SPACE(sizeof(*serr))]; -+ int received; -+ int ret = 1; -+ -+ msg.msg_control = control; -+ msg.msg_controllen = sizeof(control); -+ memset(control, 0, sizeof(control)); -+ -+ while (sioc->zero_copy_sent < sioc->zero_copy_queued) { -+ received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE); -+ if (received < 0) { -+ switch (errno) { -+ case EAGAIN: -+ /* Nothing on errqueue, wait until something is available */ -+ qio_channel_wait(ioc, G_IO_ERR); -+ continue; -+ case EINTR: -+ continue; -+ default: -+ error_setg_errno(errp, errno, -+ "Unable to read errqueue"); -+ return -1; -+ } -+ } -+ -+ cm = CMSG_FIRSTHDR(&msg); -+ if (cm->cmsg_level != SOL_IP && -+ cm->cmsg_type != IP_RECVERR) { -+ error_setg_errno(errp, EPROTOTYPE, -+ "Wrong cmsg in errqueue"); -+ return -1; -+ } -+ -+ serr = (void *) CMSG_DATA(cm); -+ if (serr->ee_errno != SO_EE_ORIGIN_NONE) { -+ error_setg_errno(errp, serr->ee_errno, -+ "Error on socket"); -+ return -1; -+ } -+ if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) { -+ error_setg_errno(errp, serr->ee_origin, -+ "Error not from zero copy"); -+ return -1; -+ } -+ -+ /* No errors, count successfully finished sendmsg()*/ -+ sioc->zero_copy_sent += serr->ee_data - serr->ee_info + 1; -+ -+ /* If any sendmsg() succeeded using zero copy, return 0 at the end */ -+ if (serr->ee_code != SO_EE_CODE_ZEROCOPY_COPIED) { -+ ret = 0; -+ } -+ } -+ -+ return ret; -+} -+ -+#endif /* QEMU_MSG_ZEROCOPY */ -+ - static int - qio_channel_socket_set_blocking(QIOChannel *ioc, - bool enabled, -@@ -790,6 +895,9 @@ static void qio_channel_socket_class_init(ObjectClass *klass, - ioc_klass->io_set_delay = qio_channel_socket_set_delay; - ioc_klass->io_create_watch = qio_channel_socket_create_watch; - ioc_klass->io_set_aio_fd_handler = qio_channel_socket_set_aio_fd_handler; -+#ifdef QEMU_MSG_ZEROCOPY -+ ioc_klass->io_flush = qio_channel_socket_flush; -+#endif - } - - static const TypeInfo qio_channel_socket_info = { --- -2.35.3 - diff --git a/SOURCES/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch b/SOURCES/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch deleted file mode 100644 index 6fc0c76..0000000 --- a/SOURCES/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 60bf942a58db12c821f2a6a49e2e0b04b99bec30 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Mon, 20 Jun 2022 02:39:42 -0300 -Subject: [PATCH 15/18] QIOChannelSocket: Introduce assert and reduce ifdefs to - improve readability -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [9/11] eaa02d68301852ccc98bdacc7387d8d03be1cb05 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu - -During implementation of MSG_ZEROCOPY feature, a lot of #ifdefs were -introduced, particularly at qio_channel_socket_writev(). - -Rewrite some of those changes so it's easier to read. - -Also, introduce an assert to help detect incorrect zero-copy usage is when -it's disabled on build. - -Signed-off-by: Leonardo Bras -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Juan Quintela -Reviewed-by: Peter Xu -Signed-off-by: Juan Quintela -Signed-off-by: Dr. David Alan Gilbert - dgilbert: Fixed up thinko'd g_assert_unreachable->g_assert_not_reached -(cherry picked from commit 803ca43e4c7fcf32f9f68c118301ccd0c83ece3f) -Signed-off-by: Leonardo Bras ---- - io/channel-socket.c | 14 +++++++++----- - 1 file changed, 9 insertions(+), 5 deletions(-) - -diff --git a/io/channel-socket.c b/io/channel-socket.c -index fbd2214d20..7490e5943d 100644 ---- a/io/channel-socket.c -+++ b/io/channel-socket.c -@@ -579,11 +579,17 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, - memcpy(CMSG_DATA(cmsg), fds, fdsize); - } - --#ifdef QEMU_MSG_ZEROCOPY - if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { -+#ifdef QEMU_MSG_ZEROCOPY - sflags = MSG_ZEROCOPY; -- } -+#else -+ /* -+ * We expect QIOChannel class entry point to have -+ * blocked this code path already -+ */ -+ g_assert_not_reached(); - #endif -+ } - - retry: - ret = sendmsg(sioc->fd, &msg, sflags); -@@ -593,15 +599,13 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, - return QIO_CHANNEL_ERR_BLOCK; - case EINTR: - goto retry; --#ifdef QEMU_MSG_ZEROCOPY - case ENOBUFS: -- if (sflags & MSG_ZEROCOPY) { -+ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { - error_setg_errno(errp, errno, - "Process can't lock enough memory for using MSG_ZEROCOPY"); - return -1; - } - break; --#endif - } - - error_setg_errno(errp, errno, --- -2.35.3 - diff --git a/SOURCES/kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch b/SOURCES/kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch deleted file mode 100644 index 8a0aeb0..0000000 --- a/SOURCES/kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch +++ /dev/null @@ -1,237 +0,0 @@ -From 055edf068196622a3e1868c9e4c991d410272a6d Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Wed, 15 Jun 2022 15:28:27 +0200 -Subject: [PATCH 03/18] RHEL-only: AArch64: Drop unsupported CPU types -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Daniel P. Berrangé -RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models -RH-Commit: [3/6] 21f54c86dc87e5e75a64459b5a385686bc09640c (berrange/centos-src-qemu) -RH-Bugzilla: 2060839 -RH-Acked-by: Thomas Huth -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2066824 -Upstream Status: RHEL only - -We only need to support AArch64 cpu types and we only need three -types: - 1) A base type to use with TCG, i.e. a cpu type with only base - features. 'cortex-a57' serves this role and is currently used - by libguestfs. - 2) The 'max' type, which is for both KVM and TCG and is good for - tests that just specify 'max' but run under both. 'max' with - TCG also provides the VM with all the CPU features TCG - supports, which is good for VMs that need features not - provided by the basic cortex-a57. - 3) The host type which is used with KVM. - -Signed-off-by: Andrew Jones ---- - hw/arm/virt.c | 4 ++++ - target/arm/cpu64.c | 6 ++++++ - target/arm/cpu_tcg.c | 12 ++---------- - tests/qtest/arm-cpu-features.c | 6 ++++++ - 4 files changed, 18 insertions(+), 10 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 95d012d6eb..74119976d3 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -239,12 +239,16 @@ static const int a15irqmap[] = { - }; - - static const char *valid_cpus[] = { -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - ARM_CPU_TYPE_NAME("cortex-a7"), - ARM_CPU_TYPE_NAME("cortex-a15"), - ARM_CPU_TYPE_NAME("cortex-a53"), -+#endif /* disabled for RHEL */ - ARM_CPU_TYPE_NAME("cortex-a57"), -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - ARM_CPU_TYPE_NAME("cortex-a72"), - ARM_CPU_TYPE_NAME("a64fx"), -+#endif /* disabled for RHEL */ - ARM_CPU_TYPE_NAME("host"), - ARM_CPU_TYPE_NAME("max"), - }; -diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index eb44c05822..e80b831073 100644 ---- a/target/arm/cpu64.c -+++ b/target/arm/cpu64.c -@@ -146,6 +146,7 @@ static void aarch64_a57_initfn(Object *obj) - define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void aarch64_a53_initfn(Object *obj) - { - ARMCPU *cpu = ARM_CPU(obj); -@@ -249,6 +250,7 @@ static void aarch64_a72_initfn(Object *obj) - cpu->gic_vprebits = 5; - define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); - } -+#endif /* disabled for RHEL */ - - void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) - { -@@ -923,6 +925,7 @@ static void aarch64_max_initfn(Object *obj) - qdev_property_add_static(DEVICE(obj), &arm_cpu_lpa2_property); - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void aarch64_a64fx_initfn(Object *obj) - { - ARMCPU *cpu = ARM_CPU(obj); -@@ -969,12 +972,15 @@ static void aarch64_a64fx_initfn(Object *obj) - - /* TODO: Add A64FX specific HPC extension registers */ - } -+#endif /* disabled for RHEL */ - - static const ARMCPUInfo aarch64_cpus[] = { - { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, - { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, - { .name = "a64fx", .initfn = aarch64_a64fx_initfn }, -+#endif /* disabled for RHEL */ - { .name = "max", .initfn = aarch64_max_initfn }, - #if defined(CONFIG_KVM) || defined(CONFIG_HVF) - { .name = "host", .initfn = aarch64_host_initfn }, -diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index 3826fa5122..74727fc92c 100644 ---- a/target/arm/cpu_tcg.c -+++ b/target/arm/cpu_tcg.c -@@ -19,10 +19,10 @@ - #include "hw/boards.h" - #endif - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - /* CPU models. These are not needed for the AArch64 linux-user build. */ - #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) - --#if 0 /* Disabled for Red Hat Enterprise Linux */ - #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) - static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) - { -@@ -376,7 +376,6 @@ static void cortex_a9_initfn(Object *obj) - cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ - define_arm_cp_regs(cpu, cortexa9_cp_reginfo); - } --#endif /* disabled for RHEL */ - - #ifndef CONFIG_USER_ONLY - static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) -@@ -402,7 +401,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { - REGINFO_SENTINEL - }; - --#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void cortex_a7_initfn(Object *obj) - { - ARMCPU *cpu = ARM_CPU(obj); -@@ -448,7 +446,6 @@ static void cortex_a7_initfn(Object *obj) - cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */ - define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ - } --#endif /* disabled for RHEL */ - - static void cortex_a15_initfn(Object *obj) - { -@@ -492,7 +489,6 @@ static void cortex_a15_initfn(Object *obj) - define_arm_cp_regs(cpu, cortexa15_cp_reginfo); - } - --#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void cortex_m0_initfn(Object *obj) - { - ARMCPU *cpu = ARM_CPU(obj); -@@ -933,7 +929,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) - - cc->gdb_core_xml_file = "arm-m-profile.xml"; - } --#endif /* disabled for RHEL */ - - #ifndef TARGET_AARCH64 - /* -@@ -1013,7 +1008,6 @@ static void arm_max_initfn(Object *obj) - #endif /* !TARGET_AARCH64 */ - - static const ARMCPUInfo arm_tcg_cpus[] = { --#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "arm926", .initfn = arm926_initfn }, - { .name = "arm946", .initfn = arm946_initfn }, - { .name = "arm1026", .initfn = arm1026_initfn }, -@@ -1029,9 +1023,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { - { .name = "cortex-a7", .initfn = cortex_a7_initfn }, - { .name = "cortex-a8", .initfn = cortex_a8_initfn }, - { .name = "cortex-a9", .initfn = cortex_a9_initfn }, --#endif /* disabled for RHEL */ - { .name = "cortex-a15", .initfn = cortex_a15_initfn }, --#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "cortex-m0", .initfn = cortex_m0_initfn, - .class_init = arm_v7m_class_init }, - { .name = "cortex-m3", .initfn = cortex_m3_initfn, -@@ -1062,7 +1054,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = { - { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, - { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, - { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, --#endif /* disabled for RHEL */ - #ifndef TARGET_AARCH64 - { .name = "max", .initfn = arm_max_initfn }, - #endif -@@ -1090,3 +1081,4 @@ static void arm_tcg_cpu_register_types(void) - type_init(arm_tcg_cpu_register_types) - - #endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */ -+#endif /* disabled for RHEL */ -diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c -index f76652143a..fe2a0a070d 100644 ---- a/tests/qtest/arm-cpu-features.c -+++ b/tests/qtest/arm-cpu-features.c -@@ -440,8 +440,10 @@ static void test_query_cpu_model_expansion(const void *data) - assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); - - /* Test expected feature presence/absence for some cpu types */ -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - assert_has_feature_enabled(qts, "cortex-a15", "pmu"); - assert_has_not_feature(qts, "cortex-a15", "aarch64"); -+#endif /* disabled for RHEL */ - - /* Enabling and disabling pmu should always work. */ - assert_has_feature_enabled(qts, "max", "pmu"); -@@ -458,6 +460,7 @@ static void test_query_cpu_model_expansion(const void *data) - assert_has_feature_enabled(qts, "cortex-a57", "pmu"); - assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - assert_has_feature_enabled(qts, "a64fx", "pmu"); - assert_has_feature_enabled(qts, "a64fx", "aarch64"); - /* -@@ -470,6 +473,7 @@ static void test_query_cpu_model_expansion(const void *data) - "{ 'sve384': true }"); - assert_error(qts, "a64fx", "cannot enable sve640", - "{ 'sve640': true }"); -+#endif /* disabled for RHEL */ - - sve_tests_default(qts, "max"); - pauth_tests_default(qts, "max"); -@@ -505,9 +509,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data) - QDict *resp; - char *error; - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - assert_error(qts, "cortex-a15", - "We cannot guarantee the CPU type 'cortex-a15' works " - "with KVM on this host", NULL); -+#endif /* disabled for RHEL */ - - assert_has_feature_enabled(qts, "host", "aarch64"); - --- -2.35.3 - diff --git a/SOURCES/kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch b/SOURCES/kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch deleted file mode 100644 index a1cc4c7..0000000 --- a/SOURCES/kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch +++ /dev/null @@ -1,95 +0,0 @@ -From d710394f68eb0b6116dd8ac76f619c192e0d5972 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Wed, 15 Jun 2022 15:28:27 +0200 -Subject: [PATCH 02/18] RHEL-only: tests/avocado: Switch aarch64 tests from a53 - to a57 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Daniel P. Berrangé -RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models -RH-Commit: [2/6] e85ef69b42c411a6997e4da10ba05176368769b3 (berrange/centos-src-qemu) -RH-Bugzilla: 2060839 -RH-Acked-by: Thomas Huth -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2066824 -Upstream Status: RHEL only - -We plan to remove the cortex-a53 from the supported cpu types. Switch -all avocado tests that use it to the cortex-a57, which will work the -same and we intend to keep. We don't want to try and upstream this -change since the better upstream change would be to switch from the -a53 to 'max', but the upstream tests also need to use later guest -kernels to use 'max' (see qemu upstream commit 0942820408dc -("hw/arm/virt: Disable LPA2 for -machine virt-6.2") - -Signed-off-by: Andrew Jones ---- - tests/avocado/replay_kernel.py | 2 +- - tests/avocado/reverse_debugging.py | 2 +- - tests/avocado/tcg_plugins.py | 6 +++--- - 3 files changed, 5 insertions(+), 5 deletions(-) - -diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py -index 0b2b0dc692..3a7b5f0748 100644 ---- a/tests/avocado/replay_kernel.py -+++ b/tests/avocado/replay_kernel.py -@@ -147,7 +147,7 @@ def test_aarch64_virt(self): - """ - :avocado: tags=arch:aarch64 - :avocado: tags=machine:virt -- :avocado: tags=cpu:cortex-a53 -+ :avocado: tags=cpu:cortex-a57 - """ - kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' - '/linux/releases/29/Everything/aarch64/os/images/pxeboot' -diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py -index d2921e70c3..66d185ed42 100644 ---- a/tests/avocado/reverse_debugging.py -+++ b/tests/avocado/reverse_debugging.py -@@ -198,7 +198,7 @@ def test_aarch64_virt(self): - """ - :avocado: tags=arch:aarch64 - :avocado: tags=machine:virt -- :avocado: tags=cpu:cortex-a53 -+ :avocado: tags=cpu:cortex-a57 - """ - kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' - '/linux/releases/29/Everything/aarch64/os/images/pxeboot' -diff --git a/tests/avocado/tcg_plugins.py b/tests/avocado/tcg_plugins.py -index 642d2e49e3..93b3afd823 100644 ---- a/tests/avocado/tcg_plugins.py -+++ b/tests/avocado/tcg_plugins.py -@@ -68,7 +68,7 @@ def test_aarch64_virt_insn(self): - :avocado: tags=accel:tcg - :avocado: tags=arch:aarch64 - :avocado: tags=machine:virt -- :avocado: tags=cpu:cortex-a53 -+ :avocado: tags=cpu:cortex-a57 - """ - kernel_path = self._grab_aarch64_kernel() - kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + -@@ -94,7 +94,7 @@ def test_aarch64_virt_insn_icount(self): - :avocado: tags=accel:tcg - :avocado: tags=arch:aarch64 - :avocado: tags=machine:virt -- :avocado: tags=cpu:cortex-a53 -+ :avocado: tags=cpu:cortex-a57 - """ - kernel_path = self._grab_aarch64_kernel() - kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + -@@ -120,7 +120,7 @@ def test_aarch64_virt_mem_icount(self): - :avocado: tags=accel:tcg - :avocado: tags=arch:aarch64 - :avocado: tags=machine:virt -- :avocado: tags=cpu:cortex-a53 -+ :avocado: tags=cpu:cortex-a57 - """ - kernel_path = self._grab_aarch64_kernel() - kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + --- -2.35.3 - diff --git a/SOURCES/kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch b/SOURCES/kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch deleted file mode 100644 index 7740d0b..0000000 --- a/SOURCES/kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 5ab8613582fd56b847fe75750acb5b7255900b35 Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Thu, 9 Jun 2022 11:55:15 +0200 -Subject: [PATCH 15/16] Revert "globally limit the maximum number of CPUs" - -RH-Author: Vitaly Kuznetsov -RH-MergeRequest: 99: Revert "globally limit the maximum number of CPUs" -RH-Commit: [1/1] 13100d4a2209b2190a3654c1f9cf4ebade1e8d24 (vkuznets/qemu-kvm-c9s) -RH-Bugzilla: 2094270 -RH-Acked-by: Andrew Jones -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094270 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45871149 -Upstream Status: RHEL-only -Tested: with upstream kernel - -Downstream QEMU carries a patch that sets the hard limit of possible vCPUs -to the value that the KVM code of the kernel recommends as soft limit. -Upstream KVM code has been changed recently to not use an arbitrary soft -limit anymore, but to cap the value on the amount of available physical -CPUs of the host. This defeats the purpose of the downstream change in -QEMU completely. Drop the downstream-only patch to allow CPU overcommit. - -This reverts commit 6669f6fa677d43144f39d6ad59725b7ba622f1c2. - -Signed-off-by: Vitaly Kuznetsov ---- - accel/kvm/kvm-all.c | 12 ------------ - 1 file changed, 12 deletions(-) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index fdf0e4d429..5f1377ca04 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -2430,18 +2430,6 @@ static int kvm_init(MachineState *ms) - soft_vcpus_limit = kvm_recommended_vcpus(s); - hard_vcpus_limit = kvm_max_vcpus(s); - --#ifdef HOST_PPC64 -- /* -- * On POWER, the kernel advertises a soft limit based on the -- * number of CPU threads on the host. We want to allow exceeding -- * this for testing purposes, so we don't want to set hard limit -- * to soft limit as on x86. -- */ --#else -- /* RHEL doesn't support nr_vcpus > soft_vcpus_limit */ -- hard_vcpus_limit = soft_vcpus_limit; --#endif -- - while (nc->name) { - if (nc->num > soft_vcpus_limit) { - warn_report("Number of %s cpus requested (%d) exceeds " --- -2.31.1 - diff --git a/SOURCES/kvm-Revert-migration-Simplify-unqueue_page.patch b/SOURCES/kvm-Revert-migration-Simplify-unqueue_page.patch deleted file mode 100644 index f5c97f6..0000000 --- a/SOURCES/kvm-Revert-migration-Simplify-unqueue_page.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 5ea59b17866add54e5ae8c76d3cb472c67e1fa91 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 2 Aug 2022 08:19:49 +0200 -Subject: [PATCH 32/32] Revert "migration: Simplify unqueue_page()" - -RH-Author: Thomas Huth -RH-MergeRequest: 112: Fix postcopy migration on s390x -RH-Commit: [2/2] 3913c9ed3f27f4b66245913da29d0c46db0c6567 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2099934 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Peter Xu - -This reverts commit cfd66f30fb0f735df06ff4220e5000290a43dad3. - -The simplification of unqueue_page() introduced a bug that sometimes -breaks migration on s390x hosts. - -The problem is not fully understood yet, but since we are already in -the freeze for QEMU 7.1 and we need something working there, let's -revert this patch for the upcoming release. The optimization can be -redone later again in a proper way if necessary. - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2099934 -Signed-off-by: Thomas Huth -Message-Id: <20220802061949.331576-1-thuth@redhat.com> -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 777f53c75983dd10756f5dbfc8af50fe11da81c1) -Conflicts: - migration/trace-events - (trivial contextual conflict) -Signed-off-by: Thomas Huth ---- - migration/ram.c | 37 ++++++++++++++++++++++++++----------- - migration/trace-events | 3 ++- - 2 files changed, 28 insertions(+), 12 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index fb6db54642..ee40e4a718 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1548,7 +1548,6 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) - { - struct RAMSrcPageRequest *entry; - RAMBlock *block = NULL; -- size_t page_size; - - if (!postcopy_has_request(rs)) { - return NULL; -@@ -1565,13 +1564,10 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) - entry = QSIMPLEQ_FIRST(&rs->src_page_requests); - block = entry->rb; - *offset = entry->offset; -- page_size = qemu_ram_pagesize(block); -- /* Each page request should only be multiple page size of the ramblock */ -- assert((entry->len % page_size) == 0); - -- if (entry->len > page_size) { -- entry->len -= page_size; -- entry->offset += page_size; -+ if (entry->len > TARGET_PAGE_SIZE) { -+ entry->len -= TARGET_PAGE_SIZE; -+ entry->offset += TARGET_PAGE_SIZE; - } else { - memory_region_unref(block->mr); - QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); -@@ -1579,9 +1575,6 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) - migration_consume_urgent_request(); - } - -- trace_unqueue_page(block->idstr, *offset, -- test_bit((*offset >> TARGET_PAGE_BITS), block->bmap)); -- - return block; - } - -@@ -1956,8 +1949,30 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) - { - RAMBlock *block; - ram_addr_t offset; -+ bool dirty; -+ -+ do { -+ block = unqueue_page(rs, &offset); -+ /* -+ * We're sending this page, and since it's postcopy nothing else -+ * will dirty it, and we must make sure it doesn't get sent again -+ * even if this queue request was received after the background -+ * search already sent it. -+ */ -+ if (block) { -+ unsigned long page; -+ -+ page = offset >> TARGET_PAGE_BITS; -+ dirty = test_bit(page, block->bmap); -+ if (!dirty) { -+ trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset, -+ page); -+ } else { -+ trace_get_queued_page(block->idstr, (uint64_t)offset, page); -+ } -+ } - -- block = unqueue_page(rs, &offset); -+ } while (block && !dirty); - - if (!block) { - /* -diff --git a/migration/trace-events b/migration/trace-events -index 1aec580e92..09d61ed1f4 100644 ---- a/migration/trace-events -+++ b/migration/trace-events -@@ -85,6 +85,8 @@ put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)" - qemu_file_fclose(void) "" - - # ram.c -+get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx" -+get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx" - migration_bitmap_sync_start(void) "" - migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64 - migration_bitmap_clear_dirty(char *str, uint64_t start, uint64_t size, unsigned long page) "rb %s start 0x%"PRIx64" size 0x%"PRIx64" page 0x%lx" -@@ -110,7 +112,6 @@ ram_save_iterate_big_wait(uint64_t milliconds, int iterations) "big wait: %" PRI - ram_load_complete(int ret, uint64_t seq_iter) "exit_code %d seq iteration %" PRIu64 - ram_write_tracking_ramblock_start(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu" - ram_write_tracking_ramblock_stop(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu" --unqueue_page(char *block, uint64_t offset, bool dirty) "ramblock '%s' offset 0x%"PRIx64" dirty %d" - - # multifd.c - multifd_new_send_channel_async(uint8_t id) "channel %u" --- -2.31.1 - diff --git a/SOURCES/kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch b/SOURCES/kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch new file mode 100644 index 0000000..752aa08 --- /dev/null +++ b/SOURCES/kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch @@ -0,0 +1,140 @@ +From 0c19fb7c4a22a30830152b224b2e66963f829a7a Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Thu, 19 Jan 2023 18:24:24 +0100 +Subject: [PATCH 19/20] Revert "vhost-user: Introduce nested event loop in + vhost_user_read()" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 146: Fix vhost-user with dpdk +RH-Bugzilla: 2155173 +RH-Acked-by: Cindy Lu +RH-Acked-by: Greg Kurz (RH) +RH-Acked-by: Eugenio Pérez +RH-Commit: [2/2] 9b67041f92f29f70b7ccb41d8087801e4e4e38af (lvivier/qemu-kvm-centos) + +This reverts commit a7f523c7d114d445c5d83aecdba3efc038e5a692. + +The nested event loop is broken by design. It's only user was removed. +Drop the code as well so that nobody ever tries to use it again. + +I had to fix a couple of trivial conflicts around return values because +of 025faa872bcf ("vhost-user: stick to -errno error return convention"). + +Signed-off-by: Greg Kurz +Message-Id: <20230119172424.478268-3-groug@kaod.org> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Acked-by: Maxime Coquelin +(cherry picked from commit 4382138f642f69fdbc79ebf4e93d84be8061191f) +Signed-off-by: Laurent Vivier +--- + hw/virtio/vhost-user.c | 65 ++++-------------------------------------- + 1 file changed, 5 insertions(+), 60 deletions(-) + +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index 0ac00eb901..7cb49c50f9 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -305,19 +305,8 @@ static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) + return 0; + } + +-struct vhost_user_read_cb_data { +- struct vhost_dev *dev; +- VhostUserMsg *msg; +- GMainLoop *loop; +- int ret; +-}; +- +-static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, +- gpointer opaque) ++static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) + { +- struct vhost_user_read_cb_data *data = opaque; +- struct vhost_dev *dev = data->dev; +- VhostUserMsg *msg = data->msg; + struct vhost_user *u = dev->opaque; + CharBackend *chr = u->user->chr; + uint8_t *p = (uint8_t *) msg; +@@ -325,8 +314,7 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, + + r = vhost_user_read_header(dev, msg); + if (r < 0) { +- data->ret = r; +- goto end; ++ return r; + } + + /* validate message size is sane */ +@@ -334,8 +322,7 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, + error_report("Failed to read msg header." + " Size %d exceeds the maximum %zu.", msg->hdr.size, + VHOST_USER_PAYLOAD_SIZE); +- data->ret = -EPROTO; +- goto end; ++ return -EPROTO; + } + + if (msg->hdr.size) { +@@ -346,53 +333,11 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, + int saved_errno = errno; + error_report("Failed to read msg payload." + " Read %d instead of %d.", r, msg->hdr.size); +- data->ret = r < 0 ? -saved_errno : -EIO; +- goto end; ++ return r < 0 ? -saved_errno : -EIO; + } + } + +-end: +- g_main_loop_quit(data->loop); +- return G_SOURCE_REMOVE; +-} +- +-static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) +-{ +- struct vhost_user *u = dev->opaque; +- CharBackend *chr = u->user->chr; +- GMainContext *prev_ctxt = chr->chr->gcontext; +- GMainContext *ctxt = g_main_context_new(); +- GMainLoop *loop = g_main_loop_new(ctxt, FALSE); +- struct vhost_user_read_cb_data data = { +- .dev = dev, +- .loop = loop, +- .msg = msg, +- .ret = 0 +- }; +- +- /* +- * We want to be able to monitor the slave channel fd while waiting +- * for chr I/O. This requires an event loop, but we can't nest the +- * one to which chr is currently attached : its fd handlers might not +- * be prepared for re-entrancy. So we create a new one and switch chr +- * to use it. +- */ +- qemu_chr_be_update_read_handlers(chr->chr, ctxt); +- qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data); +- +- g_main_loop_run(loop); +- +- /* +- * Restore the previous event loop context. This also destroys/recreates +- * event sources : this guarantees that all pending events in the original +- * context that have been processed by the nested loop are purged. +- */ +- qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt); +- +- g_main_loop_unref(loop); +- g_main_context_unref(ctxt); +- +- return data.ret; ++ return 0; + } + + static int process_message_reply(struct vhost_dev *dev, +-- +2.31.1 + diff --git a/SOURCES/kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch b/SOURCES/kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch new file mode 100644 index 0000000..8e7b906 --- /dev/null +++ b/SOURCES/kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch @@ -0,0 +1,143 @@ +From 9fb47ad317ad8cdda9960190d499ad6c3a9817f0 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Thu, 19 Jan 2023 18:24:23 +0100 +Subject: [PATCH 18/20] Revert "vhost-user: Monitor slave channel in + vhost_user_read()" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 146: Fix vhost-user with dpdk +RH-Bugzilla: 2155173 +RH-Acked-by: Cindy Lu +RH-Acked-by: Greg Kurz (RH) +RH-Acked-by: Eugenio Pérez +RH-Commit: [1/2] c583a7f121ca9c93c9a2ad17bf0ccf5c1241dc99 (lvivier/qemu-kvm-centos) + +This reverts commit db8a3772e300c1a656331a92da0785d81667dc81. + +Motivation : this is breaking vhost-user with DPDK as reported in [0]. + +Received unexpected msg type. Expected 22 received 40 +Fail to update device iotlb +Received unexpected msg type. Expected 40 received 22 +Received unexpected msg type. Expected 22 received 11 +Fail to update device iotlb +Received unexpected msg type. Expected 11 received 22 +vhost VQ 1 ring restore failed: -71: Protocol error (71) +Received unexpected msg type. Expected 22 received 11 +Fail to update device iotlb +Received unexpected msg type. Expected 11 received 22 +vhost VQ 0 ring restore failed: -71: Protocol error (71) +unable to start vhost net: 71: falling back on userspace virtio + +The failing sequence that leads to the first error is : +- QEMU sends a VHOST_USER_GET_STATUS (40) request to DPDK on the master + socket +- QEMU starts a nested event loop in order to wait for the + VHOST_USER_GET_STATUS response and to be able to process messages from + the slave channel +- DPDK sends a couple of legitimate IOTLB miss messages on the slave + channel +- QEMU processes each IOTLB request and sends VHOST_USER_IOTLB_MSG (22) + updates on the master socket +- QEMU assumes to receive a response for the latest VHOST_USER_IOTLB_MSG + but it gets the response for the VHOST_USER_GET_STATUS instead + +The subsequent errors have the same root cause : the nested event loop +breaks the order by design. It lures QEMU to expect responses to the +latest message sent on the master socket to arrive first. + +Since this was only needed for DAX enablement which is still not merged +upstream, just drop the code for now. A working solution will have to +be merged later on. Likely protect the master socket with a mutex +and service the slave channel with a separate thread, as discussed with +Maxime in the mail thread below. + +[0] https://lore.kernel.org/qemu-devel/43145ede-89dc-280e-b953-6a2b436de395@redhat.com/ + +Reported-by: Yanghang Liu +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2155173 +Signed-off-by: Greg Kurz +Message-Id: <20230119172424.478268-2-groug@kaod.org> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Acked-by: Stefan Hajnoczi +Acked-by: Maxime Coquelin +(cherry picked from commit f340a59d5a852d75ae34555723694c7e8eafbd0c) +Signed-off-by: Laurent Vivier +--- + hw/virtio/vhost-user.c | 35 +++-------------------------------- + 1 file changed, 3 insertions(+), 32 deletions(-) + +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index 8f635844af..0ac00eb901 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -356,35 +356,6 @@ end: + return G_SOURCE_REMOVE; + } + +-static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, +- gpointer opaque); +- +-/* +- * This updates the read handler to use a new event loop context. +- * Event sources are removed from the previous context : this ensures +- * that events detected in the previous context are purged. They will +- * be re-detected and processed in the new context. +- */ +-static void slave_update_read_handler(struct vhost_dev *dev, +- GMainContext *ctxt) +-{ +- struct vhost_user *u = dev->opaque; +- +- if (!u->slave_ioc) { +- return; +- } +- +- if (u->slave_src) { +- g_source_destroy(u->slave_src); +- g_source_unref(u->slave_src); +- } +- +- u->slave_src = qio_channel_add_watch_source(u->slave_ioc, +- G_IO_IN | G_IO_HUP, +- slave_read, dev, NULL, +- ctxt); +-} +- + static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) + { + struct vhost_user *u = dev->opaque; +@@ -406,7 +377,6 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) + * be prepared for re-entrancy. So we create a new one and switch chr + * to use it. + */ +- slave_update_read_handler(dev, ctxt); + qemu_chr_be_update_read_handlers(chr->chr, ctxt); + qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data); + +@@ -418,7 +388,6 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) + * context that have been processed by the nested loop are purged. + */ + qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt); +- slave_update_read_handler(dev, NULL); + + g_main_loop_unref(loop); + g_main_context_unref(ctxt); +@@ -1802,7 +1771,9 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev) + return -ECONNREFUSED; + } + u->slave_ioc = ioc; +- slave_update_read_handler(dev, NULL); ++ u->slave_src = qio_channel_add_watch_source(u->slave_ioc, ++ G_IO_IN | G_IO_HUP, ++ slave_read, dev, NULL, NULL); + + if (reply_supported) { + msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; +-- +2.31.1 + diff --git a/SOURCES/kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch b/SOURCES/kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch deleted file mode 100644 index e8eb35d..0000000 --- a/SOURCES/kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 733acef2caea0758edd74fb634b095ce09bf5914 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Mon, 9 May 2022 03:46:23 -0400 -Subject: [PATCH 15/16] Revert "virtio-scsi: Reject scsi-cd if data plane - enabled [RHEL only]" - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 91: Revert "virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only]" -RH-Commit: [1/1] 1af55d792bc9166e5c86272afe8093c76ab41bb4 (eesposit/qemu-kvm) -RH-Bugzilla: 1995710 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi - -This reverts commit 4e17b1126e. - -Over time AioContext usage and coverage has increased, and now block -backend is capable of handling AioContext change upon eject and insert. -Therefore the above downstream-only commit is not necessary anymore, -and can be safely reverted. - -X-downstream-only: true - -Signed-off-by: Emanuele Giuseppe Esposito ---- - hw/scsi/virtio-scsi.c | 9 --------- - 1 file changed, 9 deletions(-) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 2450c9438c..db54d104be 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -937,15 +937,6 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, - AioContext *old_context; - int ret; - -- /* XXX: Remove this check once block backend is capable of handling -- * AioContext change upon eject/insert. -- * s->ctx is NULL if ioeventfd is off, s->ctx is qemu_get_aio_context() if -- * data plane is not used, both cases are safe for scsi-cd. */ -- if (s->ctx && s->ctx != qemu_get_aio_context() && -- object_dynamic_cast(OBJECT(dev), "scsi-cd")) { -- error_setg(errp, "scsi-cd is not supported by data plane"); -- return; -- } - if (s->ctx && !s->dataplane_fenced) { - if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { - return; --- -2.31.1 - diff --git a/SOURCES/kvm-accel-introduce-accelerator-blocker-API.patch b/SOURCES/kvm-accel-introduce-accelerator-blocker-API.patch new file mode 100644 index 0000000..29a8ac5 --- /dev/null +++ b/SOURCES/kvm-accel-introduce-accelerator-blocker-API.patch @@ -0,0 +1,348 @@ +From ae2077fd5d351a68c313c64f07fb225dff694a8f Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Mon, 16 Jan 2023 07:16:41 -0500 +Subject: [PATCH 29/31] accel: introduce accelerator blocker API +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 138: accel: introduce accelerator blocker API +RH-Bugzilla: 1979276 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/3] 56b07cd7db516c5066e6d66b4695064fdf73abbf (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276 + +commit bd688fc93120fb3e28aa70e3dfdf567ccc1e0bc1 +Author: Emanuele Giuseppe Esposito +Date: Fri Nov 11 10:47:56 2022 -0500 + + accel: introduce accelerator blocker API + + This API allows the accelerators to prevent vcpus from issuing + new ioctls while execting a critical section marked with the + accel_ioctl_inhibit_begin/end functions. + + Note that all functions submitting ioctls must mark where the + ioctl is being called with accel_{cpu_}ioctl_begin/end(). + + This API requires the caller to always hold the BQL. + API documentation is in sysemu/accel-blocker.h + + Internally, it uses a QemuLockCnt together with a per-CPU QemuLockCnt + (to minimize cache line bouncing) to keep avoid that new ioctls + run when the critical section starts, and a QemuEvent to wait + that all running ioctls finish. + + Signed-off-by: Emanuele Giuseppe Esposito + Reviewed-by: Philippe Mathieu-Daudé + Message-Id: <20221111154758.1372674-2-eesposit@redhat.com> + Signed-off-by: Paolo Bonzini + +Conflicts: + util/meson.build: "interval-tree.c" does not exist + +Signed-off-by: Emanuele Giuseppe Esposito +--- + accel/accel-blocker.c | 154 +++++++++++++++++++++++++++++++++ + accel/meson.build | 2 +- + hw/core/cpu-common.c | 2 + + include/hw/core/cpu.h | 3 + + include/sysemu/accel-blocker.h | 56 ++++++++++++ + util/meson.build | 2 +- + 6 files changed, 217 insertions(+), 2 deletions(-) + create mode 100644 accel/accel-blocker.c + create mode 100644 include/sysemu/accel-blocker.h + +diff --git a/accel/accel-blocker.c b/accel/accel-blocker.c +new file mode 100644 +index 0000000000..1e7f423462 +--- /dev/null ++++ b/accel/accel-blocker.c +@@ -0,0 +1,154 @@ ++/* ++ * Lock to inhibit accelerator ioctls ++ * ++ * Copyright (c) 2022 Red Hat Inc. ++ * ++ * Author: Emanuele Giuseppe Esposito ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/thread.h" ++#include "qemu/main-loop.h" ++#include "hw/core/cpu.h" ++#include "sysemu/accel-blocker.h" ++ ++static QemuLockCnt accel_in_ioctl_lock; ++static QemuEvent accel_in_ioctl_event; ++ ++void accel_blocker_init(void) ++{ ++ qemu_lockcnt_init(&accel_in_ioctl_lock); ++ qemu_event_init(&accel_in_ioctl_event, false); ++} ++ ++void accel_ioctl_begin(void) ++{ ++ if (likely(qemu_mutex_iothread_locked())) { ++ return; ++ } ++ ++ /* block if lock is taken in kvm_ioctl_inhibit_begin() */ ++ qemu_lockcnt_inc(&accel_in_ioctl_lock); ++} ++ ++void accel_ioctl_end(void) ++{ ++ if (likely(qemu_mutex_iothread_locked())) { ++ return; ++ } ++ ++ qemu_lockcnt_dec(&accel_in_ioctl_lock); ++ /* change event to SET. If event was BUSY, wake up all waiters */ ++ qemu_event_set(&accel_in_ioctl_event); ++} ++ ++void accel_cpu_ioctl_begin(CPUState *cpu) ++{ ++ if (unlikely(qemu_mutex_iothread_locked())) { ++ return; ++ } ++ ++ /* block if lock is taken in kvm_ioctl_inhibit_begin() */ ++ qemu_lockcnt_inc(&cpu->in_ioctl_lock); ++} ++ ++void accel_cpu_ioctl_end(CPUState *cpu) ++{ ++ if (unlikely(qemu_mutex_iothread_locked())) { ++ return; ++ } ++ ++ qemu_lockcnt_dec(&cpu->in_ioctl_lock); ++ /* change event to SET. If event was BUSY, wake up all waiters */ ++ qemu_event_set(&accel_in_ioctl_event); ++} ++ ++static bool accel_has_to_wait(void) ++{ ++ CPUState *cpu; ++ bool needs_to_wait = false; ++ ++ CPU_FOREACH(cpu) { ++ if (qemu_lockcnt_count(&cpu->in_ioctl_lock)) { ++ /* exit the ioctl, if vcpu is running it */ ++ qemu_cpu_kick(cpu); ++ needs_to_wait = true; ++ } ++ } ++ ++ return needs_to_wait || qemu_lockcnt_count(&accel_in_ioctl_lock); ++} ++ ++void accel_ioctl_inhibit_begin(void) ++{ ++ CPUState *cpu; ++ ++ /* ++ * We allow to inhibit only when holding the BQL, so we can identify ++ * when an inhibitor wants to issue an ioctl easily. ++ */ ++ g_assert(qemu_mutex_iothread_locked()); ++ ++ /* Block further invocations of the ioctls outside the BQL. */ ++ CPU_FOREACH(cpu) { ++ qemu_lockcnt_lock(&cpu->in_ioctl_lock); ++ } ++ qemu_lockcnt_lock(&accel_in_ioctl_lock); ++ ++ /* Keep waiting until there are running ioctls */ ++ while (true) { ++ ++ /* Reset event to FREE. */ ++ qemu_event_reset(&accel_in_ioctl_event); ++ ++ if (accel_has_to_wait()) { ++ /* ++ * If event is still FREE, and there are ioctls still in progress, ++ * wait. ++ * ++ * If an ioctl finishes before qemu_event_wait(), it will change ++ * the event state to SET. This will prevent qemu_event_wait() from ++ * blocking, but it's not a problem because if other ioctls are ++ * still running the loop will iterate once more and reset the event ++ * status to FREE so that it can wait properly. ++ * ++ * If an ioctls finishes while qemu_event_wait() is blocking, then ++ * it will be waken up, but also here the while loop makes sure ++ * to re-enter the wait if there are other running ioctls. ++ */ ++ qemu_event_wait(&accel_in_ioctl_event); ++ } else { ++ /* No ioctl is running */ ++ return; ++ } ++ } ++} ++ ++void accel_ioctl_inhibit_end(void) ++{ ++ CPUState *cpu; ++ ++ qemu_lockcnt_unlock(&accel_in_ioctl_lock); ++ CPU_FOREACH(cpu) { ++ qemu_lockcnt_unlock(&cpu->in_ioctl_lock); ++ } ++} ++ +diff --git a/accel/meson.build b/accel/meson.build +index 259c35c4c8..061332610f 100644 +--- a/accel/meson.build ++++ b/accel/meson.build +@@ -1,4 +1,4 @@ +-specific_ss.add(files('accel-common.c')) ++specific_ss.add(files('accel-common.c', 'accel-blocker.c')) + softmmu_ss.add(files('accel-softmmu.c')) + user_ss.add(files('accel-user.c')) + +diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c +index f9fdd46b9d..8d6a4b1b65 100644 +--- a/hw/core/cpu-common.c ++++ b/hw/core/cpu-common.c +@@ -237,6 +237,7 @@ static void cpu_common_initfn(Object *obj) + cpu->nr_threads = 1; + + qemu_mutex_init(&cpu->work_mutex); ++ qemu_lockcnt_init(&cpu->in_ioctl_lock); + QSIMPLEQ_INIT(&cpu->work_list); + QTAILQ_INIT(&cpu->breakpoints); + QTAILQ_INIT(&cpu->watchpoints); +@@ -248,6 +249,7 @@ static void cpu_common_finalize(Object *obj) + { + CPUState *cpu = CPU(obj); + ++ qemu_lockcnt_destroy(&cpu->in_ioctl_lock); + qemu_mutex_destroy(&cpu->work_mutex); + } + +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index 8830546121..2417597236 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -398,6 +398,9 @@ struct CPUState { + uint32_t kvm_fetch_index; + uint64_t dirty_pages; + ++ /* Use by accel-block: CPU is executing an ioctl() */ ++ QemuLockCnt in_ioctl_lock; ++ + /* Used for events with 'vcpu' and *without* the 'disabled' properties */ + DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS); + DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS); +diff --git a/include/sysemu/accel-blocker.h b/include/sysemu/accel-blocker.h +new file mode 100644 +index 0000000000..72020529ef +--- /dev/null ++++ b/include/sysemu/accel-blocker.h +@@ -0,0 +1,56 @@ ++/* ++ * Accelerator blocking API, to prevent new ioctls from starting and wait the ++ * running ones finish. ++ * This mechanism differs from pause/resume_all_vcpus() in that it does not ++ * release the BQL. ++ * ++ * Copyright (c) 2022 Red Hat Inc. ++ * ++ * Author: Emanuele Giuseppe Esposito ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++#ifndef ACCEL_BLOCKER_H ++#define ACCEL_BLOCKER_H ++ ++#include "qemu/osdep.h" ++#include "sysemu/cpus.h" ++ ++extern void accel_blocker_init(void); ++ ++/* ++ * accel_{cpu_}ioctl_begin/end: ++ * Mark when ioctl is about to run or just finished. ++ * ++ * accel_{cpu_}ioctl_begin will block after accel_ioctl_inhibit_begin() is ++ * called, preventing new ioctls to run. They will continue only after ++ * accel_ioctl_inibith_end(). ++ */ ++extern void accel_ioctl_begin(void); ++extern void accel_ioctl_end(void); ++extern void accel_cpu_ioctl_begin(CPUState *cpu); ++extern void accel_cpu_ioctl_end(CPUState *cpu); ++ ++/* ++ * accel_ioctl_inhibit_begin: start critical section ++ * ++ * This function makes sure that: ++ * 1) incoming accel_{cpu_}ioctl_begin() calls block ++ * 2) wait that all ioctls that were already running reach ++ * accel_{cpu_}ioctl_end(), kicking vcpus if necessary. ++ * ++ * This allows the caller to access shared data or perform operations without ++ * worrying of concurrent vcpus accesses. ++ */ ++extern void accel_ioctl_inhibit_begin(void); ++ ++/* ++ * accel_ioctl_inhibit_end: end critical section started by ++ * accel_ioctl_inhibit_begin() ++ * ++ * This function allows blocked accel_{cpu_}ioctl_begin() to continue. ++ */ ++extern void accel_ioctl_inhibit_end(void); ++ ++#endif /* ACCEL_BLOCKER_H */ +diff --git a/util/meson.build b/util/meson.build +index 25b9b61f98..85a5504c4d 100644 +--- a/util/meson.build ++++ b/util/meson.build +@@ -57,6 +57,7 @@ util_ss.add(files('guest-random.c')) + util_ss.add(files('yank.c')) + util_ss.add(files('int128.c')) + util_ss.add(files('memalign.c')) ++util_ss.add(files('lockcnt.c')) + + if have_user + util_ss.add(files('selfmap.c')) +@@ -71,7 +72,6 @@ endif + if have_block or have_ga + util_ss.add(files('aiocb.c', 'async.c')) + util_ss.add(files('base64.c')) +- util_ss.add(files('lockcnt.c')) + util_ss.add(files('main-loop.c')) + util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c')) + util_ss.add(files('coroutine-@0@.c'.format(config_host['CONFIG_COROUTINE_BACKEND']))) +-- +2.31.1 + diff --git a/SOURCES/kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch b/SOURCES/kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch new file mode 100644 index 0000000..0680a26 --- /dev/null +++ b/SOURCES/kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch @@ -0,0 +1,58 @@ +From ab68e13b7628f2348d41a4518a92508542af712f Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Fri, 3 Feb 2023 18:15:10 +0100 +Subject: [PATCH 05/20] accel/tcg: Test CPUJumpCache in tb_jmp_cache_clear_page + +RH-Author: Eric Auger +RH-MergeRequest: 144: accel/tcg: Test CPUJumpCache in tb_jmp_cache_clear_page +RH-Bugzilla: 2165280 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Gavin Shan +RH-Acked-by: Shaoqin Huang +RH-Commit: [1/1] 5b0863c34ba06c01c4e343d1ecd72402779c7de3 (eauger1/centos-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/2165280 +Upstream: yes +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=50530041 +Test: 'kvm unit test ./run_tests.sh -g debug' does not SIGSEV anymore + +After commit 4e4fa6c12d ("accel/tcg: Complete cpu initialization +before registration"), it looks the CPUJumpCache pointer can be NULL. +This causes a SIGSEV when running debug-wp-migration kvm unit test. + +At the first place it should be clarified why this TCG code is called +with KVM acceleration. This may hide another bug. + +Fixes: 4e4fa6c12d ("accel/tcg: Complete cpu initialization before registration") +Signed-off-by: Eric Auger +Message-Id: <20230203171510.2867451-1-eric.auger@redhat.com> +Signed-off-by: Richard Henderson +(cherry picked from commit 99ab4d500af638ba3ebb20e8aa89d72201b70860) +Signed-off-by: Eric Auger +--- + accel/tcg/cputlb.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c +index 6f1c00682b..4244b0e4e3 100644 +--- a/accel/tcg/cputlb.c ++++ b/accel/tcg/cputlb.c +@@ -100,9 +100,14 @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, + + static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr) + { +- int i, i0 = tb_jmp_cache_hash_page(page_addr); + CPUJumpCache *jc = cpu->tb_jmp_cache; ++ int i, i0; + ++ if (unlikely(!jc)) { ++ return; ++ } ++ ++ i0 = tb_jmp_cache_hash_page(page_addr); + for (i = 0; i < TB_JMP_PAGE_SIZE; i++) { + qatomic_set(&jc->array[i0 + i].tb, NULL); + } +-- +2.31.1 + diff --git a/SOURCES/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch b/SOURCES/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch new file mode 100644 index 0000000..ee7e7f9 --- /dev/null +++ b/SOURCES/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch @@ -0,0 +1,50 @@ +From e9a9c0b023ae0dcbb14543b74063cca931d8230f Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:24:36 -0500 +Subject: [PATCH 08/12] aio-wait: switch to smp_mb__after_rmw() + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2175660 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Eric Auger +RH-Commit: [5/9] a90c96d148fdbec340a45dc6cedf3660d8be2aab (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 + +commit b532526a07ef3b903ead2e055fe6cc87b41057a3 +Author: Paolo Bonzini +Date: Fri Mar 3 11:03:52 2023 +0100 + + aio-wait: switch to smp_mb__after_rmw() + + The barrier comes after an atomic increment, so it is enough to use + smp_mb__after_rmw(); this avoids a double barrier on x86 systems. + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + include/block/aio-wait.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h +index dd9a7f6461..da13357bb8 100644 +--- a/include/block/aio-wait.h ++++ b/include/block/aio-wait.h +@@ -85,7 +85,7 @@ extern AioWait global_aio_wait; + /* Increment wait_->num_waiters before evaluating cond. */ \ + qatomic_inc(&wait_->num_waiters); \ + /* Paired with smp_mb in aio_wait_kick(). */ \ +- smp_mb(); \ ++ smp_mb__after_rmw(); \ + if (ctx_ && in_aio_context_home_thread(ctx_)) { \ + while ((cond)) { \ + aio_poll(ctx_, true); \ +-- +2.39.1 + diff --git a/SOURCES/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch b/SOURCES/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch new file mode 100644 index 0000000..0e4a48d --- /dev/null +++ b/SOURCES/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch @@ -0,0 +1,66 @@ +From 3d823dda6832b76fd3d776131008107b0b0f7166 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:24:36 -0500 +Subject: [PATCH 12/12] async: clarify usage of barriers in the polling case + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2175660 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Eric Auger +RH-Commit: [9/9] b4ea298d75a75bb61e07a27d1296e0095fbc2bbf (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 + +commit 6229438cca037d42f44a96d38feb15cb102a444f +Author: Paolo Bonzini +Date: Mon Mar 6 10:43:52 2023 +0100 + + async: clarify usage of barriers in the polling case + + Explain that aio_context_notifier_poll() relies on + aio_notify_accept() to catch all the memory writes that were + done before ctx->notified was set to true. + + Reviewed-by: Richard Henderson + Reviewed-by: Stefan Hajnoczi + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + util/async.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/util/async.c b/util/async.c +index 37d3e6036d..e0846baf93 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -472,8 +472,9 @@ void aio_notify_accept(AioContext *ctx) + qatomic_set(&ctx->notified, false); + + /* +- * Write ctx->notified before reading e.g. bh->flags. Pairs with smp_wmb +- * in aio_notify. ++ * Order reads of ctx->notified (in aio_context_notifier_poll()) and the ++ * above clearing of ctx->notified before reads of e.g. bh->flags. Pairs ++ * with smp_wmb() in aio_notify. + */ + smp_mb(); + } +@@ -496,6 +497,11 @@ static bool aio_context_notifier_poll(void *opaque) + EventNotifier *e = opaque; + AioContext *ctx = container_of(e, AioContext, notifier); + ++ /* ++ * No need for load-acquire because we just want to kick the ++ * event loop. aio_notify_accept() takes care of synchronizing ++ * the event loop with the producers. ++ */ + return qatomic_read(&ctx->notified); + } + +-- +2.39.1 + diff --git a/SOURCES/kvm-async-update-documentation-of-the-memory-barriers.patch b/SOURCES/kvm-async-update-documentation-of-the-memory-barriers.patch new file mode 100644 index 0000000..cb92dc9 --- /dev/null +++ b/SOURCES/kvm-async-update-documentation-of-the-memory-barriers.patch @@ -0,0 +1,111 @@ +From 29bcf843d796ffc2a0906dea947e4cdfe9f7ec60 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:24:36 -0500 +Subject: [PATCH 11/12] async: update documentation of the memory barriers + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2175660 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Eric Auger +RH-Commit: [8/9] 5ca20e4c8983e0bc1ecee66bead3472777abe4d1 (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 + +commit 8dd48650b43dfde4ebea34191ac267e474bcc29e +Author: Paolo Bonzini +Date: Mon Mar 6 10:15:06 2023 +0100 + + async: update documentation of the memory barriers + + Ever since commit 8c6b0356b539 ("util/async: make bh_aio_poll() O(1)", + 2020-02-22), synchronization between qemu_bh_schedule() and aio_bh_poll() + is happening when the bottom half is enqueued in the bh_list; not + when the flags are set. Update the documentation to match. + + Reviewed-by: Stefan Hajnoczi + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + util/async.c | 33 +++++++++++++++++++-------------- + 1 file changed, 19 insertions(+), 14 deletions(-) + +diff --git a/util/async.c b/util/async.c +index 63434ddae4..37d3e6036d 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -73,14 +73,21 @@ static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags) + unsigned old_flags; + + /* +- * The memory barrier implicit in qatomic_fetch_or makes sure that: +- * 1. idle & any writes needed by the callback are done before the +- * locations are read in the aio_bh_poll. +- * 2. ctx is loaded before the callback has a chance to execute and bh +- * could be freed. ++ * Synchronizes with atomic_fetch_and() in aio_bh_dequeue(), ensuring that ++ * insertion starts after BH_PENDING is set. + */ + old_flags = qatomic_fetch_or(&bh->flags, BH_PENDING | new_flags); ++ + if (!(old_flags & BH_PENDING)) { ++ /* ++ * At this point the bottom half becomes visible to aio_bh_poll(). ++ * This insertion thus synchronizes with QSLIST_MOVE_ATOMIC in ++ * aio_bh_poll(), ensuring that: ++ * 1. any writes needed by the callback are visible from the callback ++ * after aio_bh_dequeue() returns bh. ++ * 2. ctx is loaded before the callback has a chance to execute and bh ++ * could be freed. ++ */ + QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next); + } + +@@ -106,11 +113,8 @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags) + QSLIST_REMOVE_HEAD(head, next); + + /* +- * The qatomic_and is paired with aio_bh_enqueue(). The implicit memory +- * barrier ensures that the callback sees all writes done by the scheduling +- * thread. It also ensures that the scheduling thread sees the cleared +- * flag before bh->cb has run, and thus will call aio_notify again if +- * necessary. ++ * Synchronizes with qatomic_fetch_or() in aio_bh_enqueue(), ensuring that ++ * the removal finishes before BH_PENDING is reset. + */ + *flags = qatomic_fetch_and(&bh->flags, + ~(BH_PENDING | BH_SCHEDULED | BH_IDLE)); +@@ -157,6 +161,7 @@ int aio_bh_poll(AioContext *ctx) + BHListSlice *s; + int ret = 0; + ++ /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue(). */ + QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list); + QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next); + +@@ -446,15 +451,15 @@ LuringState *aio_get_linux_io_uring(AioContext *ctx) + void aio_notify(AioContext *ctx) + { + /* +- * Write e.g. bh->flags before writing ctx->notified. Pairs with smp_mb in +- * aio_notify_accept. ++ * Write e.g. ctx->bh_list before writing ctx->notified. Pairs with ++ * smp_mb() in aio_notify_accept(). + */ + smp_wmb(); + qatomic_set(&ctx->notified, true); + + /* +- * Write ctx->notified before reading ctx->notify_me. Pairs +- * with smp_mb in aio_ctx_prepare or aio_poll. ++ * Write ctx->notified (and also ctx->bh_list) before reading ctx->notify_me. ++ * Pairs with smp_mb() in aio_ctx_prepare or aio_poll. + */ + smp_mb(); + if (qatomic_read(&ctx->notify_me)) { +-- +2.39.1 + diff --git a/SOURCES/kvm-block-Call-drain-callbacks-only-once.patch b/SOURCES/kvm-block-Call-drain-callbacks-only-once.patch new file mode 100644 index 0000000..04f1dda --- /dev/null +++ b/SOURCES/kvm-block-Call-drain-callbacks-only-once.patch @@ -0,0 +1,250 @@ +From 9bb9cafd736057fd2a8ebfa6f5769668f125fbe6 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:06 +0100 +Subject: [PATCH 24/31] block: Call drain callbacks only once + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [12/16] ea9a433dc01d1b8539a2d4ea12887f2a3ce830ea (sgarzarella/qemu-kvm-c-9-s) + +We only need to call both the BlockDriver's callback and the parent +callbacks when going from undrained to drained or vice versa. A second +drain section doesn't make a difference for the driver or the parent, +they weren't supposed to send new requests before and after the second +drain. + +One thing that gets in the way is the 'ignore_bds_parents' parameter in +bdrv_do_drained_begin_quiesce() and bdrv_do_drained_end(): It means that +bdrv_drain_all_begin() increases bs->quiesce_counter, but does not +quiesce the parent through BdrvChildClass callbacks. If an additional +drain section is started now, bs->quiesce_counter will be non-zero, but +we would still need to quiesce the parent through BdrvChildClass in +order to keep things consistent (and unquiesce it on the matching +bdrv_drained_end(), even though the counter would not reach 0 yet as +long as the bdrv_drain_all() section is still active). + +Instead of keeping track of this, let's just get rid of the parameter. +It was introduced in commit 6cd5c9d7b2d as an optimisation so that +during bdrv_drain_all(), we wouldn't recursively drain all parents up to +the root for each node, resulting in quadratic complexity. As it happens, +calling the callbacks only once solves the same problem, so as of this +patch, we'll still have O(n) complexity and ignore_bds_parents is not +needed any more. + +This patch only ignores the 'ignore_bds_parents' parameter. It will be +removed in a separate patch. + +Signed-off-by: Kevin Wolf +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-12-kwolf@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit 57e05be343f33f4e5899a8d8946a8596d68424a1) +Signed-off-by: Stefano Garzarella +--- + block.c | 25 +++++++------------------ + block/io.c | 30 ++++++++++++++++++------------ + include/block/block_int-common.h | 8 ++++---- + tests/unit/test-bdrv-drain.c | 16 ++++++++++------ + 4 files changed, 39 insertions(+), 40 deletions(-) + +diff --git a/block.c b/block.c +index e0e3b21790..5a583e260d 100644 +--- a/block.c ++++ b/block.c +@@ -2824,7 +2824,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + { + BlockDriverState *old_bs = child->bs; + int new_bs_quiesce_counter; +- int drain_saldo; + + assert(!child->frozen); + assert(old_bs != new_bs); +@@ -2834,16 +2833,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); + } + +- new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); +- drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter; +- + /* + * If the new child node is drained but the old one was not, flush + * all outstanding requests to the old child node. + */ +- while (drain_saldo > 0 && child->klass->drained_begin) { ++ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); ++ if (new_bs_quiesce_counter && !child->quiesced_parent) { + bdrv_parent_drained_begin_single(child, true); +- drain_saldo--; + } + + if (old_bs) { +@@ -2859,16 +2855,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + if (new_bs) { + assert_bdrv_graph_writable(new_bs); + QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); +- +- /* +- * Polling in bdrv_parent_drained_begin_single() may have led to the new +- * node's quiesce_counter having been decreased. Not a problem, we just +- * need to recognize this here and then invoke drained_end appropriately +- * more often. +- */ +- assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); +- drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; +- + if (child->klass->attach) { + child->klass->attach(child); + } +@@ -2877,10 +2863,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + /* + * If the old child node was drained but the new one is not, allow + * requests to come in only after the new node has been attached. ++ * ++ * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single() ++ * polls, which could have changed the value. + */ +- while (drain_saldo < 0 && child->klass->drained_end) { ++ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); ++ if (!new_bs_quiesce_counter && child->quiesced_parent) { + bdrv_parent_drained_end_single(child); +- drain_saldo++; + } + } + +diff --git a/block/io.c b/block/io.c +index 75224480d0..87d6f22ec4 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -62,8 +62,9 @@ void bdrv_parent_drained_end_single(BdrvChild *c) + { + IO_OR_GS_CODE(); + +- assert(c->parent_quiesce_counter > 0); +- c->parent_quiesce_counter--; ++ assert(c->quiesced_parent); ++ c->quiesced_parent = false; ++ + if (c->klass->drained_end) { + c->klass->drained_end(c); + } +@@ -110,7 +111,10 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) + { + AioContext *ctx = bdrv_child_get_parent_aio_context(c); + IO_OR_GS_CODE(); +- c->parent_quiesce_counter++; ++ ++ assert(!c->quiesced_parent); ++ c->quiesced_parent = true; ++ + if (c->klass->drained_begin) { + c->klass->drained_begin(c); + } +@@ -358,11 +362,12 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + /* Stop things in parent-to-child order */ + if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { + aio_disable_external(bdrv_get_aio_context(bs)); +- } + +- bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); +- if (bs->drv && bs->drv->bdrv_drain_begin) { +- bs->drv->bdrv_drain_begin(bs); ++ /* TODO Remove ignore_bds_parents, we don't consider it any more */ ++ bdrv_parent_drained_begin(bs, parent, false); ++ if (bs->drv && bs->drv->bdrv_drain_begin) { ++ bs->drv->bdrv_drain_begin(bs); ++ } + } + } + +@@ -413,13 +418,14 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, + assert(bs->quiesce_counter > 0); + + /* Re-enable things in child-to-parent order */ +- if (bs->drv && bs->drv->bdrv_drain_end) { +- bs->drv->bdrv_drain_end(bs); +- } +- bdrv_parent_drained_end(bs, parent, ignore_bds_parents); +- + old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); + if (old_quiesce_counter == 1) { ++ if (bs->drv && bs->drv->bdrv_drain_end) { ++ bs->drv->bdrv_drain_end(bs); ++ } ++ /* TODO Remove ignore_bds_parents, we don't consider it any more */ ++ bdrv_parent_drained_end(bs, parent, false); ++ + aio_enable_external(bdrv_get_aio_context(bs)); + } + } +diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h +index 791dddfd7d..a6bc6b7fe9 100644 +--- a/include/block/block_int-common.h ++++ b/include/block/block_int-common.h +@@ -980,13 +980,13 @@ struct BdrvChild { + bool frozen; + + /* +- * How many times the parent of this child has been drained ++ * True if the parent of this child has been drained by this BdrvChild + * (through klass->drained_*). +- * Usually, this is equal to bs->quiesce_counter (potentially +- * reduced by bdrv_drain_all_count). It may differ while the ++ * ++ * It is generally true if bs->quiesce_counter > 0. It may differ while the + * child is entering or leaving a drained section. + */ +- int parent_quiesce_counter; ++ bool quiesced_parent; + + QLIST_ENTRY(BdrvChild) next; + QLIST_ENTRY(BdrvChild) next_parent; +diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c +index dda08de8db..172bc6debc 100644 +--- a/tests/unit/test-bdrv-drain.c ++++ b/tests/unit/test-bdrv-drain.c +@@ -296,7 +296,11 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive) + + do_drain_begin(drain_type, bs); + +- g_assert_cmpint(bs->quiesce_counter, ==, 1); ++ if (drain_type == BDRV_DRAIN_ALL) { ++ g_assert_cmpint(bs->quiesce_counter, ==, 2); ++ } else { ++ g_assert_cmpint(bs->quiesce_counter, ==, 1); ++ } + g_assert_cmpint(backing->quiesce_counter, ==, !!recursive); + + do_drain_end(drain_type, bs); +@@ -348,8 +352,8 @@ static void test_nested(void) + + for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) { + for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) { +- int backing_quiesce = (outer != BDRV_DRAIN) + +- (inner != BDRV_DRAIN); ++ int backing_quiesce = (outer == BDRV_DRAIN_ALL) + ++ (inner == BDRV_DRAIN_ALL); + + g_assert_cmpint(bs->quiesce_counter, ==, 0); + g_assert_cmpint(backing->quiesce_counter, ==, 0); +@@ -359,10 +363,10 @@ static void test_nested(void) + do_drain_begin(outer, bs); + do_drain_begin(inner, bs); + +- g_assert_cmpint(bs->quiesce_counter, ==, 2); ++ g_assert_cmpint(bs->quiesce_counter, ==, 2 + !!backing_quiesce); + g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce); +- g_assert_cmpint(s->drain_count, ==, 2); +- g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce); ++ g_assert_cmpint(s->drain_count, ==, 1); ++ g_assert_cmpint(backing_s->drain_count, ==, !!backing_quiesce); + + do_drain_end(inner, bs); + do_drain_end(outer, bs); +-- +2.31.1 + diff --git a/SOURCES/kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch b/SOURCES/kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch new file mode 100644 index 0000000..80018cc --- /dev/null +++ b/SOURCES/kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch @@ -0,0 +1,298 @@ +From 150ef3356cc6732fede7ca059168fc0565ed0b76 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:09 +0100 +Subject: [PATCH 27/31] block: Don't poll in bdrv_replace_child_noperm() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [15/16] 5fc7d6b703a2d6c1118d875056f0afbd6ba5cca9 (sgarzarella/qemu-kvm-c-9-s) + +In order to make sure that bdrv_replace_child_noperm() doesn't have to +poll any more, get rid of the bdrv_parent_drained_begin_single() call. + +This is possible now because we can require that the parent is already +drained through the child in question when the function is called and we +don't call the parent drain callbacks more than once. + +The additional drain calls needed in callers cause the test case to run +its code in the drain handler too early (bdrv_attach_child() drains +now), so modify it to only enable the code after the test setup has +completed. + +Signed-off-by: Kevin Wolf +Message-Id: <20221118174110.55183-15-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit 23987471285a26397e3152a9244b652445fd36c4) +Signed-off-by: Stefano Garzarella +--- + block.c | 103 ++++++++++++++++++++++++++++++----- + block/io.c | 2 +- + include/block/block-io.h | 8 +++ + tests/unit/test-bdrv-drain.c | 10 ++++ + 4 files changed, 108 insertions(+), 15 deletions(-) + +diff --git a/block.c b/block.c +index af31a94863..65588d313a 100644 +--- a/block.c ++++ b/block.c +@@ -2407,6 +2407,20 @@ static void bdrv_replace_child_abort(void *opaque) + + GLOBAL_STATE_CODE(); + /* old_bs reference is transparently moved from @s to @s->child */ ++ if (!s->child->bs) { ++ /* ++ * The parents were undrained when removing old_bs from the child. New ++ * requests can't have been made, though, because the child was empty. ++ * ++ * TODO Make bdrv_replace_child_noperm() transactionable to avoid ++ * undraining the parent in the first place. Once this is done, having ++ * new_bs drained when calling bdrv_replace_child_tran() is not a ++ * requirement any more. ++ */ ++ bdrv_parent_drained_begin_single(s->child, false); ++ assert(!bdrv_parent_drained_poll_single(s->child)); ++ } ++ assert(s->child->quiesced_parent); + bdrv_replace_child_noperm(s->child, s->old_bs); + bdrv_unref(new_bs); + } +@@ -2422,12 +2436,19 @@ static TransactionActionDrv bdrv_replace_child_drv = { + * + * Note: real unref of old_bs is done only on commit. + * ++ * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be ++ * kept drained until the transaction is completed. ++ * + * The function doesn't update permissions, caller is responsible for this. + */ + static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, + Transaction *tran) + { + BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); ++ ++ assert(child->quiesced_parent); ++ assert(!new_bs || new_bs->quiesce_counter); ++ + *s = (BdrvReplaceChildState) { + .child = child, + .old_bs = child->bs, +@@ -2819,6 +2840,14 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) + return permissions[qapi_perm]; + } + ++/* ++ * Replaces the node that a BdrvChild points to without updating permissions. ++ * ++ * If @new_bs is non-NULL, the parent of @child must already be drained through ++ * @child. ++ * ++ * This function does not poll. ++ */ + static void bdrv_replace_child_noperm(BdrvChild *child, + BlockDriverState *new_bs) + { +@@ -2826,6 +2855,28 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + int new_bs_quiesce_counter; + + assert(!child->frozen); ++ ++ /* ++ * If we want to change the BdrvChild to point to a drained node as its new ++ * child->bs, we need to make sure that its new parent is drained, too. In ++ * other words, either child->quiesce_parent must already be true or we must ++ * be able to set it and keep the parent's quiesce_counter consistent with ++ * that, but without polling or starting new requests (this function ++ * guarantees that it doesn't poll, and starting new requests would be ++ * against the invariants of drain sections). ++ * ++ * To keep things simple, we pick the first option (child->quiesce_parent ++ * must already be true). We also generalise the rule a bit to make it ++ * easier to verify in callers and more likely to be covered in test cases: ++ * The parent must be quiesced through this child even if new_bs isn't ++ * currently drained. ++ * ++ * The only exception is for callers that always pass new_bs == NULL. In ++ * this case, we obviously never need to consider the case of a drained ++ * new_bs, so we can keep the callers simpler by allowing them not to drain ++ * the parent. ++ */ ++ assert(!new_bs || child->quiesced_parent); + assert(old_bs != new_bs); + GLOBAL_STATE_CODE(); + +@@ -2833,15 +2884,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); + } + +- /* +- * If the new child node is drained but the old one was not, flush +- * all outstanding requests to the old child node. +- */ +- new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); +- if (new_bs_quiesce_counter && !child->quiesced_parent) { +- bdrv_parent_drained_begin_single(child, true); +- } +- + if (old_bs) { + if (child->klass->detach) { + child->klass->detach(child); +@@ -2861,11 +2903,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + } + + /* +- * If the old child node was drained but the new one is not, allow +- * requests to come in only after the new node has been attached. +- * +- * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single() +- * polls, which could have changed the value. ++ * If the parent was drained through this BdrvChild previously, but new_bs ++ * is not drained, allow requests to come in only after the new node has ++ * been attached. + */ + new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); + if (!new_bs_quiesce_counter && child->quiesced_parent) { +@@ -3002,6 +3042,24 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, + } + + bdrv_ref(child_bs); ++ /* ++ * Let every new BdrvChild start with a drained parent. Inserting the child ++ * in the graph with bdrv_replace_child_noperm() will undrain it if ++ * @child_bs is not drained. ++ * ++ * The child was only just created and is not yet visible in global state ++ * until bdrv_replace_child_noperm() inserts it into the graph, so nobody ++ * could have sent requests and polling is not necessary. ++ * ++ * Note that this means that the parent isn't fully drained yet, we only ++ * stop new requests from coming in. This is fine, we don't care about the ++ * old requests here, they are not for this child. If another place enters a ++ * drain section for the same parent, but wants it to be fully quiesced, it ++ * will not run most of the the code in .drained_begin() again (which is not ++ * a problem, we already did this), but it will still poll until the parent ++ * is fully quiesced, so it will not be negatively affected either. ++ */ ++ bdrv_parent_drained_begin_single(new_child, false); + bdrv_replace_child_noperm(new_child, child_bs); + + BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); +@@ -5059,12 +5117,24 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran) + } + + if (child->bs) { ++ BlockDriverState *bs = child->bs; ++ bdrv_drained_begin(bs); + bdrv_replace_child_tran(child, NULL, tran); ++ bdrv_drained_end(bs); + } + + tran_add(tran, &bdrv_remove_child_drv, child); + } + ++static void undrain_on_clean_cb(void *opaque) ++{ ++ bdrv_drained_end(opaque); ++} ++ ++static TransactionActionDrv undrain_on_clean = { ++ .clean = undrain_on_clean_cb, ++}; ++ + static int bdrv_replace_node_noperm(BlockDriverState *from, + BlockDriverState *to, + bool auto_skip, Transaction *tran, +@@ -5074,6 +5144,11 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, + + GLOBAL_STATE_CODE(); + ++ bdrv_drained_begin(from); ++ bdrv_drained_begin(to); ++ tran_add(tran, &undrain_on_clean, from); ++ tran_add(tran, &undrain_on_clean, to); ++ + QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { + assert(c->bs == from); + if (!should_update_child(c, to)) { +diff --git a/block/io.c b/block/io.c +index 5e9150d92c..ae64830eac 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -81,7 +81,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) + } + } + +-static bool bdrv_parent_drained_poll_single(BdrvChild *c) ++bool bdrv_parent_drained_poll_single(BdrvChild *c) + { + if (c->klass->drained_poll) { + return c->klass->drained_poll(c); +diff --git a/include/block/block-io.h b/include/block/block-io.h +index 8f5e75756a..65e6d2569b 100644 +--- a/include/block/block-io.h ++++ b/include/block/block-io.h +@@ -292,6 +292,14 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); + */ + void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); + ++/** ++ * bdrv_parent_drained_poll_single: ++ * ++ * Returns true if there is any pending activity to cease before @c can be ++ * called quiesced, false otherwise. ++ */ ++bool bdrv_parent_drained_poll_single(BdrvChild *c); ++ + /** + * bdrv_parent_drained_end_single: + * +diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c +index 172bc6debc..2686a8acee 100644 +--- a/tests/unit/test-bdrv-drain.c ++++ b/tests/unit/test-bdrv-drain.c +@@ -1654,6 +1654,7 @@ static void test_drop_intermediate_poll(void) + + + typedef struct BDRVReplaceTestState { ++ bool setup_completed; + bool was_drained; + bool was_undrained; + bool has_read; +@@ -1738,6 +1739,10 @@ static void bdrv_replace_test_drain_begin(BlockDriverState *bs) + { + BDRVReplaceTestState *s = bs->opaque; + ++ if (!s->setup_completed) { ++ return; ++ } ++ + if (!s->drain_count) { + s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs); + bdrv_inc_in_flight(bs); +@@ -1769,6 +1774,10 @@ static void bdrv_replace_test_drain_end(BlockDriverState *bs) + { + BDRVReplaceTestState *s = bs->opaque; + ++ if (!s->setup_completed) { ++ return; ++ } ++ + g_assert(s->drain_count > 0); + if (!--s->drain_count) { + s->was_undrained = true; +@@ -1867,6 +1876,7 @@ static void do_test_replace_child_mid_drain(int old_drain_count, + bdrv_ref(old_child_bs); + bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds, + BDRV_CHILD_COW, &error_abort); ++ parent_s->setup_completed = true; + + for (i = 0; i < old_drain_count; i++) { + bdrv_drained_begin(old_child_bs); +-- +2.31.1 + diff --git a/SOURCES/kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch b/SOURCES/kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch new file mode 100644 index 0000000..e3bf1e2 --- /dev/null +++ b/SOURCES/kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch @@ -0,0 +1,54 @@ +From 6af6de77dace29aa8548b3649dc9c6163740ac86 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:03 +0100 +Subject: [PATCH 21/31] block: Don't use subtree drains in + bdrv_drop_intermediate() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [9/16] 3c06fa826f262558f57d38b0155500c2e8e23a53 (sgarzarella/qemu-kvm-c-9-s) + +Instead of using a subtree drain from the top node (which also drains +child nodes of base that we're not even interested in), use a normal +drain for base, which automatically drains all of the parents, too. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-9-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 631086deefc32690ee56efed1c5b891dec31ae37) +Signed-off-by: Stefano Garzarella +--- + block.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/block.c b/block.c +index cb5e96b1cf..b3449a312e 100644 +--- a/block.c ++++ b/block.c +@@ -5586,7 +5586,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, + GLOBAL_STATE_CODE(); + + bdrv_ref(top); +- bdrv_subtree_drained_begin(top); ++ bdrv_drained_begin(base); + + if (!top->drv || !base->drv) { + goto exit; +@@ -5659,7 +5659,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, + + ret = 0; + exit: +- bdrv_subtree_drained_end(top); ++ bdrv_drained_end(base); + bdrv_unref(top); + return ret; + } +-- +2.31.1 + diff --git a/SOURCES/kvm-block-Drain-individual-nodes-during-reopen.patch b/SOURCES/kvm-block-Drain-individual-nodes-during-reopen.patch new file mode 100644 index 0000000..24661fb --- /dev/null +++ b/SOURCES/kvm-block-Drain-individual-nodes-during-reopen.patch @@ -0,0 +1,157 @@ +From ad52cb621daad45d3c2a0e2e670d6ca2e16690bd Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:02 +0100 +Subject: [PATCH 20/31] block: Drain individual nodes during reopen + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [8/16] 5441b6f0ae9102ef40d1093e1db3084eea81e3b0 (sgarzarella/qemu-kvm-c-9-s) + +bdrv_reopen() and friends use subtree drains as a lazy way of covering +all the nodes they touch. Turns out that this lazy way is a lot more +complicated than just draining the nodes individually, even not +accounting for the additional complexity in the drain mechanism itself. + +Simplify the code by switching to draining the individual nodes that are +already managed in the BlockReopenQueue anyway. + +Signed-off-by: Kevin Wolf +Message-Id: <20221118174110.55183-8-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit d22933acd2f470eeef779e4d444e848f76dcfaf8) +Signed-off-by: Stefano Garzarella +--- + block.c | 16 +++++++++------- + block/replication.c | 6 ------ + blockdev.c | 13 ------------- + 3 files changed, 9 insertions(+), 26 deletions(-) + +diff --git a/block.c b/block.c +index 46df410b07..cb5e96b1cf 100644 +--- a/block.c ++++ b/block.c +@@ -4150,7 +4150,7 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, + * returns a pointer to bs_queue, which is either the newly allocated + * bs_queue, or the existing bs_queue being used. + * +- * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). ++ * bs is drained here and undrained by bdrv_reopen_queue_free(). + * + * To be called with bs->aio_context locked. + */ +@@ -4172,12 +4172,10 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, + int flags; + QemuOpts *opts; + +- /* Make sure that the caller remembered to use a drained section. This is +- * important to avoid graph changes between the recursive queuing here and +- * bdrv_reopen_multiple(). */ +- assert(bs->quiesce_counter > 0); + GLOBAL_STATE_CODE(); + ++ bdrv_drained_begin(bs); ++ + if (bs_queue == NULL) { + bs_queue = g_new0(BlockReopenQueue, 1); + QTAILQ_INIT(bs_queue); +@@ -4328,6 +4326,12 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) + if (bs_queue) { + BlockReopenQueueEntry *bs_entry, *next; + QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { ++ AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs); ++ ++ aio_context_acquire(ctx); ++ bdrv_drained_end(bs_entry->state.bs); ++ aio_context_release(ctx); ++ + qobject_unref(bs_entry->state.explicit_options); + qobject_unref(bs_entry->state.options); + g_free(bs_entry); +@@ -4475,7 +4479,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, + + GLOBAL_STATE_CODE(); + +- bdrv_subtree_drained_begin(bs); + queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); + + if (ctx != qemu_get_aio_context()) { +@@ -4486,7 +4489,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, + if (ctx != qemu_get_aio_context()) { + aio_context_acquire(ctx); + } +- bdrv_subtree_drained_end(bs); + + return ret; + } +diff --git a/block/replication.c b/block/replication.c +index f1eed25e43..c62f48a874 100644 +--- a/block/replication.c ++++ b/block/replication.c +@@ -374,9 +374,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, + s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs); + } + +- bdrv_subtree_drained_begin(hidden_disk->bs); +- bdrv_subtree_drained_begin(secondary_disk->bs); +- + if (s->orig_hidden_read_only) { + QDict *opts = qdict_new(); + qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable); +@@ -401,9 +398,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, + aio_context_acquire(ctx); + } + } +- +- bdrv_subtree_drained_end(hidden_disk->bs); +- bdrv_subtree_drained_end(secondary_disk->bs); + } + + static void backup_job_cleanup(BlockDriverState *bs) +diff --git a/blockdev.c b/blockdev.c +index 3f1dec6242..8ffb3d9537 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3547,8 +3547,6 @@ fail: + void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) + { + BlockReopenQueue *queue = NULL; +- GSList *drained = NULL; +- GSList *p; + + /* Add each one of the BDS that we want to reopen to the queue */ + for (; reopen_list != NULL; reopen_list = reopen_list->next) { +@@ -3585,9 +3583,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) + ctx = bdrv_get_aio_context(bs); + aio_context_acquire(ctx); + +- bdrv_subtree_drained_begin(bs); + queue = bdrv_reopen_queue(queue, bs, qdict, false); +- drained = g_slist_prepend(drained, bs); + + aio_context_release(ctx); + } +@@ -3598,15 +3594,6 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) + + fail: + bdrv_reopen_queue_free(queue); +- for (p = drained; p; p = p->next) { +- BlockDriverState *bs = p->data; +- AioContext *ctx = bdrv_get_aio_context(bs); +- +- aio_context_acquire(ctx); +- bdrv_subtree_drained_end(bs); +- aio_context_release(ctx); +- } +- g_slist_free(drained); + } + + void qmp_blockdev_del(const char *node_name, Error **errp) +-- +2.31.1 + diff --git a/SOURCES/kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch b/SOURCES/kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch new file mode 100644 index 0000000..1ae73c7 --- /dev/null +++ b/SOURCES/kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch @@ -0,0 +1,96 @@ +From 9a789d104a4a69031ad95d7fad6380ab21e82503 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:08 +0100 +Subject: [PATCH 26/31] block: Drop out of coroutine in + bdrv_do_drained_begin_quiesce() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [14/16] c9266663b822f703e55b6a07de98ceb56e69e924 (sgarzarella/qemu-kvm-c-9-s) + +The next patch adds a parent drain to bdrv_attach_child_common(), which +shouldn't be, but is currently called from coroutines in some cases (e.g. +.bdrv_co_create implementations generally open new nodes). Therefore, +the assertion that we're not in a coroutine doesn't hold true any more. + +We could just remove the assertion because there is nothing in the +function that should be in conflict with running in a coroutine, but +just to be on the safe side, we can reverse the caller relationship +between bdrv_do_drained_begin() and bdrv_do_drained_begin_quiesce() so +that the latter also just drops out of coroutine context and we can +still be certain in the future that any drain code doesn't run in +coroutines. + +As a nice side effect, the structure of bdrv_do_drained_begin() is now +symmetrical with bdrv_do_drained_end(). + +Signed-off-by: Kevin Wolf +Message-Id: <20221118174110.55183-14-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit 05c272ff0cf1b16cc3606f746182dd99b774f553) +Signed-off-by: Stefano Garzarella +--- + block/io.c | 25 ++++++++++++------------- + 1 file changed, 12 insertions(+), 13 deletions(-) + +diff --git a/block/io.c b/block/io.c +index 2e9503df6a..5e9150d92c 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -346,10 +346,15 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + } + } + +-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) ++static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, ++ bool poll) + { + IO_OR_GS_CODE(); +- assert(!qemu_in_coroutine()); ++ ++ if (qemu_in_coroutine()) { ++ bdrv_co_yield_to_drain(bs, true, parent, poll); ++ return; ++ } + + /* Stop things in parent-to-child order */ + if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { +@@ -359,17 +364,6 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) + bs->drv->bdrv_drain_begin(bs); + } + } +-} +- +-static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, +- bool poll) +-{ +- if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(bs, true, parent, poll); +- return; +- } +- +- bdrv_do_drained_begin_quiesce(bs, parent); + + /* + * Wait for drained requests to finish. +@@ -385,6 +379,11 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, + } + } + ++void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) ++{ ++ bdrv_do_drained_begin(bs, parent, false); ++} ++ + void bdrv_drained_begin(BlockDriverState *bs) + { + IO_OR_GS_CODE(); +-- +2.31.1 + diff --git a/SOURCES/kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch b/SOURCES/kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch new file mode 100644 index 0000000..b73b8fe --- /dev/null +++ b/SOURCES/kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch @@ -0,0 +1,67 @@ +From e790b4c20a5124239fe93e91fbc87745e5f2cea6 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:01 +0100 +Subject: [PATCH 19/31] block: Fix locking for bdrv_reopen_queue_child() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [7/16] 46bb54506c4400b9a1bf66b6bd7987ff67260003 (sgarzarella/qemu-kvm-c-9-s) + +Callers don't agree whether bdrv_reopen_queue_child() should be called +with the AioContext lock held or not. Standardise on holding the lock +(as done by QMP blockdev-reopen and the replication block driver) and +fix bdrv_reopen() to do the same. + +Signed-off-by: Kevin Wolf +Message-Id: <20221118174110.55183-7-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit 2e117866d7c96cc17e84cd2946fee1bf3292d814) +Signed-off-by: Stefano Garzarella +--- + block.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/block.c b/block.c +index 7999fd08c5..46df410b07 100644 +--- a/block.c ++++ b/block.c +@@ -4151,6 +4151,8 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, + * bs_queue, or the existing bs_queue being used. + * + * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). ++ * ++ * To be called with bs->aio_context locked. + */ + static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, + BlockDriverState *bs, +@@ -4309,6 +4311,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, + return bs_queue; + } + ++/* To be called with bs->aio_context locked */ + BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, + BlockDriverState *bs, + QDict *options, bool keep_old_opts) +@@ -4473,11 +4476,11 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, + GLOBAL_STATE_CODE(); + + bdrv_subtree_drained_begin(bs); ++ queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); ++ + if (ctx != qemu_get_aio_context()) { + aio_context_release(ctx); + } +- +- queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); + ret = bdrv_reopen_multiple(queue, errp); + + if (ctx != qemu_get_aio_context()) { +-- +2.31.1 + diff --git a/SOURCES/kvm-block-Improve-empty-format-specific-info-dump.patch b/SOURCES/kvm-block-Improve-empty-format-specific-info-dump.patch new file mode 100644 index 0000000..5b54210 --- /dev/null +++ b/SOURCES/kvm-block-Improve-empty-format-specific-info-dump.patch @@ -0,0 +1,132 @@ +From 074c89b05dae971c7118cb769fd34e22135c8f4c Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:26:53 +0200 +Subject: [PATCH 06/20] block: Improve empty format-specific info dump + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [1/12] be551e83f426e620e673302198b51368bfd324ce (hreitz/qemu-kvm-c-9-s) + +When a block driver supports obtaining format-specific information, but +that object only contains optional fields, it is possible that none of +them are present, so that dump_qobject() (called by +bdrv_image_info_specific_dump()) will not print anything. + +The callers of bdrv_image_info_specific_dump() put a header above this +information ("Format specific information:\n"), which will look strange +when there is nothing below. Modify bdrv_image_info_specific_dump() to +print this header instead of its callers, and only if there is indeed +something to be printed. + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-2-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 3716470b24f0f63090d59bcf28ad8fe6fb7835bd) +Signed-off-by: Hanna Czenczek +--- + block/qapi.c | 41 +++++++++++++++++++++++++++++++++++++---- + include/block/qapi.h | 3 ++- + qemu-io-cmds.c | 4 ++-- + 3 files changed, 41 insertions(+), 7 deletions(-) + +diff --git a/block/qapi.c b/block/qapi.c +index cf557e3aea..51202b470a 100644 +--- a/block/qapi.c ++++ b/block/qapi.c +@@ -777,7 +777,35 @@ static void dump_qdict(int indentation, QDict *dict) + } + } + +-void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec) ++/* ++ * Return whether dumping the given QObject with dump_qobject() would ++ * yield an empty dump, i.e. not print anything. ++ */ ++static bool qobject_is_empty_dump(const QObject *obj) ++{ ++ switch (qobject_type(obj)) { ++ case QTYPE_QNUM: ++ case QTYPE_QSTRING: ++ case QTYPE_QBOOL: ++ return false; ++ ++ case QTYPE_QDICT: ++ return qdict_size(qobject_to(QDict, obj)) == 0; ++ ++ case QTYPE_QLIST: ++ return qlist_empty(qobject_to(QList, obj)); ++ ++ default: ++ abort(); ++ } ++} ++ ++/** ++ * Dumps the given ImageInfoSpecific object in a human-readable form, ++ * prepending an optional prefix if the dump is not empty. ++ */ ++void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, ++ const char *prefix) + { + QObject *obj, *data; + Visitor *v = qobject_output_visitor_new(&obj); +@@ -785,7 +813,12 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec) + visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort); + visit_complete(v, &obj); + data = qdict_get(qobject_to(QDict, obj), "data"); +- dump_qobject(1, data); ++ if (!qobject_is_empty_dump(data)) { ++ if (prefix) { ++ qemu_printf("%s", prefix); ++ } ++ dump_qobject(1, data); ++ } + qobject_unref(obj); + visit_free(v); + } +@@ -866,7 +899,7 @@ void bdrv_image_info_dump(ImageInfo *info) + } + + if (info->has_format_specific) { +- qemu_printf("Format specific information:\n"); +- bdrv_image_info_specific_dump(info->format_specific); ++ bdrv_image_info_specific_dump(info->format_specific, ++ "Format specific information:\n"); + } + } +diff --git a/include/block/qapi.h b/include/block/qapi.h +index 22c7807c89..c09859ea78 100644 +--- a/include/block/qapi.h ++++ b/include/block/qapi.h +@@ -40,6 +40,7 @@ void bdrv_query_image_info(BlockDriverState *bs, + Error **errp); + + void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); +-void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec); ++void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, ++ const char *prefix); + void bdrv_image_info_dump(ImageInfo *info); + #endif +diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c +index 952dc940f1..f4a374528e 100644 +--- a/qemu-io-cmds.c ++++ b/qemu-io-cmds.c +@@ -1825,8 +1825,8 @@ static int info_f(BlockBackend *blk, int argc, char **argv) + return -EIO; + } + if (spec_info) { +- printf("Format specific information:\n"); +- bdrv_image_info_specific_dump(spec_info); ++ bdrv_image_info_specific_dump(spec_info, ++ "Format specific information:\n"); + qapi_free_ImageInfoSpecific(spec_info); + } + +-- +2.31.1 + diff --git a/SOURCES/kvm-block-Inline-bdrv_drain_invoke.patch b/SOURCES/kvm-block-Inline-bdrv_drain_invoke.patch new file mode 100644 index 0000000..07160dc --- /dev/null +++ b/SOURCES/kvm-block-Inline-bdrv_drain_invoke.patch @@ -0,0 +1,81 @@ +From 1808e560396872173f787f8e338e9837a4c3d626 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:00 +0100 +Subject: [PATCH 18/31] block: Inline bdrv_drain_invoke() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [6/16] 2c7473a36360eb43d94b967deb12308cb5ea0d3b (sgarzarella/qemu-kvm-c-9-s) + +bdrv_drain_invoke() has now two entirely separate cases that share no +code any more and are selected depending on a bool parameter. Each case +has only one caller. Just inline the function. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Emanuele Giuseppe Esposito +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-6-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit c7bc05f78ab31fb02fc9635f60b9bd22efc8d121) +Signed-off-by: Stefano Garzarella +--- + block/io.c | 23 ++++++----------------- + 1 file changed, 6 insertions(+), 17 deletions(-) + +diff --git a/block/io.c b/block/io.c +index f4ca62b034..a25103be6f 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -242,21 +242,6 @@ typedef struct { + bool ignore_bds_parents; + } BdrvCoDrainData; + +-/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ +-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) +-{ +- if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || +- (!begin && !bs->drv->bdrv_drain_end)) { +- return; +- } +- +- if (begin) { +- bs->drv->bdrv_drain_begin(bs); +- } else { +- bs->drv->bdrv_drain_end(bs); +- } +-} +- + /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ + bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, + BdrvChild *ignore_parent, bool ignore_bds_parents) +@@ -390,7 +375,9 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + } + + bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); +- bdrv_drain_invoke(bs, true); ++ if (bs->drv && bs->drv->bdrv_drain_begin) { ++ bs->drv->bdrv_drain_begin(bs); ++ } + } + + static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, +@@ -461,7 +448,9 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, + assert(bs->quiesce_counter > 0); + + /* Re-enable things in child-to-parent order */ +- bdrv_drain_invoke(bs, false); ++ if (bs->drv && bs->drv->bdrv_drain_end) { ++ bs->drv->bdrv_drain_end(bs); ++ } + bdrv_parent_drained_end(bs, parent, ignore_bds_parents); + + old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); +-- +2.31.1 + diff --git a/SOURCES/kvm-block-Remove-drained_end_counter.patch b/SOURCES/kvm-block-Remove-drained_end_counter.patch new file mode 100644 index 0000000..cfafc33 --- /dev/null +++ b/SOURCES/kvm-block-Remove-drained_end_counter.patch @@ -0,0 +1,433 @@ +From 3009e49f242ab371ffad35bb29c2c26ddfac75d4 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:40:59 +0100 +Subject: [PATCH 17/31] block: Remove drained_end_counter + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [5/16] 5589e3f05dece5394a05641f7f42096e8dc62bdb (sgarzarella/qemu-kvm-c-9-s) + +drained_end_counter is unused now, nobody changes its value any more. It +can be removed. + +In cases where we had two almost identical functions that only differed +in whether the caller passes drained_end_counter, or whether they would +poll for a local drained_end_counter to reach 0, these become a single +function. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Emanuele Giuseppe Esposito +Message-Id: <20221118174110.55183-5-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit 2f65df6e16dea2d6e7212fa675f4779d9281e26f) +Signed-off-by: Stefano Garzarella +--- + block.c | 5 +- + block/block-backend.c | 4 +- + block/io.c | 98 ++++++++------------------------ + blockjob.c | 2 +- + include/block/block-io.h | 24 -------- + include/block/block_int-common.h | 6 +- + 6 files changed, 30 insertions(+), 109 deletions(-) + +diff --git a/block.c b/block.c +index 16a62a329c..7999fd08c5 100644 +--- a/block.c ++++ b/block.c +@@ -1235,11 +1235,10 @@ static bool bdrv_child_cb_drained_poll(BdrvChild *child) + return bdrv_drain_poll(bs, false, NULL, false); + } + +-static void bdrv_child_cb_drained_end(BdrvChild *child, +- int *drained_end_counter) ++static void bdrv_child_cb_drained_end(BdrvChild *child) + { + BlockDriverState *bs = child->opaque; +- bdrv_drained_end_no_poll(bs, drained_end_counter); ++ bdrv_drained_end(bs); + } + + static int bdrv_child_cb_inactivate(BdrvChild *child) +diff --git a/block/block-backend.c b/block/block-backend.c +index d98a96ff37..feaf2181fa 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -129,7 +129,7 @@ static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format, + } + static void blk_root_drained_begin(BdrvChild *child); + static bool blk_root_drained_poll(BdrvChild *child); +-static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter); ++static void blk_root_drained_end(BdrvChild *child); + + static void blk_root_change_media(BdrvChild *child, bool load); + static void blk_root_resize(BdrvChild *child); +@@ -2556,7 +2556,7 @@ static bool blk_root_drained_poll(BdrvChild *child) + return busy || !!blk->in_flight; + } + +-static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter) ++static void blk_root_drained_end(BdrvChild *child) + { + BlockBackend *blk = child->opaque; + assert(blk->quiesce_counter); +diff --git a/block/io.c b/block/io.c +index c2ed4b2af9..f4ca62b034 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -58,28 +58,19 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, + } + } + +-static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c, +- int *drained_end_counter) ++void bdrv_parent_drained_end_single(BdrvChild *c) + { ++ IO_OR_GS_CODE(); ++ + assert(c->parent_quiesce_counter > 0); + c->parent_quiesce_counter--; + if (c->klass->drained_end) { +- c->klass->drained_end(c, drained_end_counter); ++ c->klass->drained_end(c); + } + } + +-void bdrv_parent_drained_end_single(BdrvChild *c) +-{ +- int drained_end_counter = 0; +- AioContext *ctx = bdrv_child_get_parent_aio_context(c); +- IO_OR_GS_CODE(); +- bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter); +- AIO_WAIT_WHILE(ctx, qatomic_read(&drained_end_counter) > 0); +-} +- + static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, +- bool ignore_bds_parents, +- int *drained_end_counter) ++ bool ignore_bds_parents) + { + BdrvChild *c; + +@@ -87,7 +78,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, + if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { + continue; + } +- bdrv_parent_drained_end_single_no_poll(c, drained_end_counter); ++ bdrv_parent_drained_end_single(c); + } + } + +@@ -249,12 +240,10 @@ typedef struct { + bool poll; + BdrvChild *parent; + bool ignore_bds_parents; +- int *drained_end_counter; + } BdrvCoDrainData; + + /* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ +-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, +- int *drained_end_counter) ++static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) + { + if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || + (!begin && !bs->drv->bdrv_drain_end)) { +@@ -305,8 +294,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, + BdrvChild *parent, bool ignore_bds_parents, + bool poll); + static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, +- BdrvChild *parent, bool ignore_bds_parents, +- int *drained_end_counter); ++ BdrvChild *parent, bool ignore_bds_parents); + + static void bdrv_co_drain_bh_cb(void *opaque) + { +@@ -319,14 +307,12 @@ static void bdrv_co_drain_bh_cb(void *opaque) + aio_context_acquire(ctx); + bdrv_dec_in_flight(bs); + if (data->begin) { +- assert(!data->drained_end_counter); + bdrv_do_drained_begin(bs, data->recursive, data->parent, + data->ignore_bds_parents, data->poll); + } else { + assert(!data->poll); + bdrv_do_drained_end(bs, data->recursive, data->parent, +- data->ignore_bds_parents, +- data->drained_end_counter); ++ data->ignore_bds_parents); + } + aio_context_release(ctx); + } else { +@@ -342,8 +328,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + bool begin, bool recursive, + BdrvChild *parent, + bool ignore_bds_parents, +- bool poll, +- int *drained_end_counter) ++ bool poll) + { + BdrvCoDrainData data; + Coroutine *self = qemu_coroutine_self(); +@@ -363,7 +348,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + .parent = parent, + .ignore_bds_parents = ignore_bds_parents, + .poll = poll, +- .drained_end_counter = drained_end_counter, + }; + + if (bs) { +@@ -406,7 +390,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + } + + bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); +- bdrv_drain_invoke(bs, true, NULL); ++ bdrv_drain_invoke(bs, true); + } + + static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, +@@ -417,7 +401,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, + + if (qemu_in_coroutine()) { + bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents, +- poll, NULL); ++ poll); + return; + } + +@@ -461,38 +445,24 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs) + + /** + * This function does not poll, nor must any of its recursively called +- * functions. The *drained_end_counter pointee will be incremented +- * once for every background operation scheduled, and decremented once +- * the operation settles. Therefore, the pointer must remain valid +- * until the pointee reaches 0. That implies that whoever sets up the +- * pointee has to poll until it is 0. +- * +- * We use atomic operations to access *drained_end_counter, because +- * (1) when called from bdrv_set_aio_context_ignore(), the subgraph of +- * @bs may contain nodes in different AioContexts, +- * (2) bdrv_drain_all_end() uses the same counter for all nodes, +- * regardless of which AioContext they are in. ++ * functions. + */ + static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, +- BdrvChild *parent, bool ignore_bds_parents, +- int *drained_end_counter) ++ BdrvChild *parent, bool ignore_bds_parents) + { + BdrvChild *child; + int old_quiesce_counter; + +- assert(drained_end_counter != NULL); +- + if (qemu_in_coroutine()) { + bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents, +- false, drained_end_counter); ++ false); + return; + } + assert(bs->quiesce_counter > 0); + + /* Re-enable things in child-to-parent order */ +- bdrv_drain_invoke(bs, false, drained_end_counter); +- bdrv_parent_drained_end(bs, parent, ignore_bds_parents, +- drained_end_counter); ++ bdrv_drain_invoke(bs, false); ++ bdrv_parent_drained_end(bs, parent, ignore_bds_parents); + + old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); + if (old_quiesce_counter == 1) { +@@ -503,32 +473,21 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, + assert(!ignore_bds_parents); + bs->recursive_quiesce_counter--; + QLIST_FOREACH(child, &bs->children, next) { +- bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents, +- drained_end_counter); ++ bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents); + } + } + } + + void bdrv_drained_end(BlockDriverState *bs) + { +- int drained_end_counter = 0; + IO_OR_GS_CODE(); +- bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter); +- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); +-} +- +-void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter) +-{ +- IO_CODE(); +- bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter); ++ bdrv_do_drained_end(bs, false, NULL, false); + } + + void bdrv_subtree_drained_end(BlockDriverState *bs) + { +- int drained_end_counter = 0; + IO_OR_GS_CODE(); +- bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter); +- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); ++ bdrv_do_drained_end(bs, true, NULL, false); + } + + void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) +@@ -543,16 +502,12 @@ void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) + + void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) + { +- int drained_end_counter = 0; + int i; + IO_OR_GS_CODE(); + + for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { +- bdrv_do_drained_end(child->bs, true, child, false, +- &drained_end_counter); ++ bdrv_do_drained_end(child->bs, true, child, false); + } +- +- BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0); + } + + void bdrv_drain(BlockDriverState *bs) +@@ -610,7 +565,7 @@ void bdrv_drain_all_begin(void) + GLOBAL_STATE_CODE(); + + if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL); ++ bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true); + return; + } + +@@ -649,22 +604,19 @@ void bdrv_drain_all_begin(void) + + void bdrv_drain_all_end_quiesce(BlockDriverState *bs) + { +- int drained_end_counter = 0; + GLOBAL_STATE_CODE(); + + g_assert(bs->quiesce_counter > 0); + g_assert(!bs->refcnt); + + while (bs->quiesce_counter) { +- bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter); ++ bdrv_do_drained_end(bs, false, NULL, true); + } +- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); + } + + void bdrv_drain_all_end(void) + { + BlockDriverState *bs = NULL; +- int drained_end_counter = 0; + GLOBAL_STATE_CODE(); + + /* +@@ -680,13 +632,11 @@ void bdrv_drain_all_end(void) + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); +- bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter); ++ bdrv_do_drained_end(bs, false, NULL, true); + aio_context_release(aio_context); + } + + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); +- AIO_WAIT_WHILE(NULL, qatomic_read(&drained_end_counter) > 0); +- + assert(bdrv_drain_all_count > 0); + bdrv_drain_all_count--; + } +diff --git a/blockjob.c b/blockjob.c +index f51d4e18f3..0ab721e139 100644 +--- a/blockjob.c ++++ b/blockjob.c +@@ -120,7 +120,7 @@ static bool child_job_drained_poll(BdrvChild *c) + } + } + +-static void child_job_drained_end(BdrvChild *c, int *drained_end_counter) ++static void child_job_drained_end(BdrvChild *c) + { + BlockJob *job = c->opaque; + job_resume(&job->job); +diff --git a/include/block/block-io.h b/include/block/block-io.h +index b099d7db45..054e964c9b 100644 +--- a/include/block/block-io.h ++++ b/include/block/block-io.h +@@ -237,21 +237,6 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, + int64_t bytes, BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags); + +-/** +- * bdrv_drained_end_no_poll: +- * +- * Same as bdrv_drained_end(), but do not poll for the subgraph to +- * actually become unquiesced. Therefore, no graph changes will occur +- * with this function. +- * +- * *drained_end_counter is incremented for every background operation +- * that is scheduled, and will be decremented for every operation once +- * it settles. The caller must poll until it reaches 0. The counter +- * should be accessed using atomic operations only. +- */ +-void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter); +- +- + /* + * "I/O or GS" API functions. These functions can run without + * the BQL, but only in one specific iothread/main loop. +@@ -311,9 +296,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); + * bdrv_parent_drained_end_single: + * + * End a quiesced section for the parent of @c. +- * +- * This polls @bs's AioContext until all scheduled sub-drained_ends +- * have settled, which may result in graph changes. + */ + void bdrv_parent_drained_end_single(BdrvChild *c); + +@@ -361,12 +343,6 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs); + * bdrv_drained_end: + * + * End a quiescent section started by bdrv_drained_begin(). +- * +- * This polls @bs's AioContext until all scheduled sub-drained_ends +- * have settled. On one hand, that may result in graph changes. On +- * the other, this requires that the caller either runs in the main +- * loop; or that all involved nodes (@bs and all of its parents) are +- * in the caller's AioContext. + */ + void bdrv_drained_end(BlockDriverState *bs); + +diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h +index 40d646d1ed..2b97576f6d 100644 +--- a/include/block/block_int-common.h ++++ b/include/block/block_int-common.h +@@ -939,15 +939,11 @@ struct BdrvChildClass { + * These functions must not change the graph (and therefore also must not + * call aio_poll(), which could change the graph indirectly). + * +- * If drained_end() schedules background operations, it must atomically +- * increment *drained_end_counter for each such operation and atomically +- * decrement it once the operation has settled. +- * + * Note that this can be nested. If drained_begin() was called twice, new + * I/O is allowed only after drained_end() was called twice, too. + */ + void (*drained_begin)(BdrvChild *child); +- void (*drained_end)(BdrvChild *child, int *drained_end_counter); ++ void (*drained_end)(BdrvChild *child); + + /* + * Returns whether the parent has pending requests for the child. This +-- +2.31.1 + diff --git a/SOURCES/kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch b/SOURCES/kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch new file mode 100644 index 0000000..aa64bec --- /dev/null +++ b/SOURCES/kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch @@ -0,0 +1,274 @@ +From 0dc7990533cef41e58579ee96315aca1fdc44ea1 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:07 +0100 +Subject: [PATCH 25/31] block: Remove ignore_bds_parents parameter from + drain_begin/end. + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [13/16] 1ed88d975a9569bffeb33ad847874417780ce408 (sgarzarella/qemu-kvm-c-9-s) + +ignore_bds_parents is now ignored during drain_begin and drain_end, so +we can just remove it there. It is still a valid optimisation for +drain_all in bdrv_drained_poll(), so leave it around there. + +Signed-off-by: Kevin Wolf +Message-Id: <20221118174110.55183-13-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit a82a3bd135078d14f1bb4b5e50f51e77d3748270) +Signed-off-by: Stefano Garzarella +--- + block.c | 2 +- + block/io.c | 58 +++++++++++++++------------------------- + include/block/block-io.h | 3 +-- + 3 files changed, 24 insertions(+), 39 deletions(-) + +diff --git a/block.c b/block.c +index 5a583e260d..af31a94863 100644 +--- a/block.c ++++ b/block.c +@@ -1226,7 +1226,7 @@ static char *bdrv_child_get_parent_desc(BdrvChild *c) + static void bdrv_child_cb_drained_begin(BdrvChild *child) + { + BlockDriverState *bs = child->opaque; +- bdrv_do_drained_begin_quiesce(bs, NULL, false); ++ bdrv_do_drained_begin_quiesce(bs, NULL); + } + + static bool bdrv_child_cb_drained_poll(BdrvChild *child) +diff --git a/block/io.c b/block/io.c +index 87d6f22ec4..2e9503df6a 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -45,13 +45,12 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs); + static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int64_t bytes, BdrvRequestFlags flags); + +-static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, +- bool ignore_bds_parents) ++static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) + { + BdrvChild *c, *next; + + QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { +- if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { ++ if (c == ignore) { + continue; + } + bdrv_parent_drained_begin_single(c, false); +@@ -70,13 +69,12 @@ void bdrv_parent_drained_end_single(BdrvChild *c) + } + } + +-static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, +- bool ignore_bds_parents) ++static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) + { + BdrvChild *c; + + QLIST_FOREACH(c, &bs->parents, next_parent) { +- if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { ++ if (c == ignore) { + continue; + } + bdrv_parent_drained_end_single(c); +@@ -242,7 +240,6 @@ typedef struct { + bool begin; + bool poll; + BdrvChild *parent; +- bool ignore_bds_parents; + } BdrvCoDrainData; + + /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ +@@ -269,9 +266,8 @@ static bool bdrv_drain_poll_top_level(BlockDriverState *bs, + } + + static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, +- bool ignore_bds_parents, bool poll); +-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, +- bool ignore_bds_parents); ++ bool poll); ++static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent); + + static void bdrv_co_drain_bh_cb(void *opaque) + { +@@ -284,11 +280,10 @@ static void bdrv_co_drain_bh_cb(void *opaque) + aio_context_acquire(ctx); + bdrv_dec_in_flight(bs); + if (data->begin) { +- bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents, +- data->poll); ++ bdrv_do_drained_begin(bs, data->parent, data->poll); + } else { + assert(!data->poll); +- bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents); ++ bdrv_do_drained_end(bs, data->parent); + } + aio_context_release(ctx); + } else { +@@ -303,7 +298,6 @@ static void bdrv_co_drain_bh_cb(void *opaque) + static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + bool begin, + BdrvChild *parent, +- bool ignore_bds_parents, + bool poll) + { + BdrvCoDrainData data; +@@ -321,7 +315,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + .done = false, + .begin = begin, + .parent = parent, +- .ignore_bds_parents = ignore_bds_parents, + .poll = poll, + }; + +@@ -353,8 +346,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + } + } + +-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, +- BdrvChild *parent, bool ignore_bds_parents) ++void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) + { + IO_OR_GS_CODE(); + assert(!qemu_in_coroutine()); +@@ -362,9 +354,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + /* Stop things in parent-to-child order */ + if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { + aio_disable_external(bdrv_get_aio_context(bs)); +- +- /* TODO Remove ignore_bds_parents, we don't consider it any more */ +- bdrv_parent_drained_begin(bs, parent, false); ++ bdrv_parent_drained_begin(bs, parent); + if (bs->drv && bs->drv->bdrv_drain_begin) { + bs->drv->bdrv_drain_begin(bs); + } +@@ -372,14 +362,14 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + } + + static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, +- bool ignore_bds_parents, bool poll) ++ bool poll) + { + if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll); ++ bdrv_co_yield_to_drain(bs, true, parent, poll); + return; + } + +- bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents); ++ bdrv_do_drained_begin_quiesce(bs, parent); + + /* + * Wait for drained requests to finish. +@@ -391,7 +381,6 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, + * nodes. + */ + if (poll) { +- assert(!ignore_bds_parents); + BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent)); + } + } +@@ -399,20 +388,19 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, + void bdrv_drained_begin(BlockDriverState *bs) + { + IO_OR_GS_CODE(); +- bdrv_do_drained_begin(bs, NULL, false, true); ++ bdrv_do_drained_begin(bs, NULL, true); + } + + /** + * This function does not poll, nor must any of its recursively called + * functions. + */ +-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, +- bool ignore_bds_parents) ++static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) + { + int old_quiesce_counter; + + if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false); ++ bdrv_co_yield_to_drain(bs, false, parent, false); + return; + } + assert(bs->quiesce_counter > 0); +@@ -423,9 +411,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, + if (bs->drv && bs->drv->bdrv_drain_end) { + bs->drv->bdrv_drain_end(bs); + } +- /* TODO Remove ignore_bds_parents, we don't consider it any more */ +- bdrv_parent_drained_end(bs, parent, false); +- ++ bdrv_parent_drained_end(bs, parent); + aio_enable_external(bdrv_get_aio_context(bs)); + } + } +@@ -433,7 +419,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, + void bdrv_drained_end(BlockDriverState *bs) + { + IO_OR_GS_CODE(); +- bdrv_do_drained_end(bs, NULL, false); ++ bdrv_do_drained_end(bs, NULL); + } + + void bdrv_drain(BlockDriverState *bs) +@@ -491,7 +477,7 @@ void bdrv_drain_all_begin(void) + GLOBAL_STATE_CODE(); + + if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(NULL, true, NULL, true, true); ++ bdrv_co_yield_to_drain(NULL, true, NULL, true); + return; + } + +@@ -516,7 +502,7 @@ void bdrv_drain_all_begin(void) + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); +- bdrv_do_drained_begin(bs, NULL, true, false); ++ bdrv_do_drained_begin(bs, NULL, false); + aio_context_release(aio_context); + } + +@@ -536,7 +522,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs) + g_assert(!bs->refcnt); + + while (bs->quiesce_counter) { +- bdrv_do_drained_end(bs, NULL, true); ++ bdrv_do_drained_end(bs, NULL); + } + } + +@@ -558,7 +544,7 @@ void bdrv_drain_all_end(void) + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); +- bdrv_do_drained_end(bs, NULL, true); ++ bdrv_do_drained_end(bs, NULL); + aio_context_release(aio_context); + } + +diff --git a/include/block/block-io.h b/include/block/block-io.h +index 9c36a16a1f..8f5e75756a 100644 +--- a/include/block/block-io.h ++++ b/include/block/block-io.h +@@ -329,8 +329,7 @@ void bdrv_drained_begin(BlockDriverState *bs); + * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already + * running requests to complete. + */ +-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, +- BdrvChild *parent, bool ignore_bds_parents); ++void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent); + + /** + * bdrv_drained_end: +-- +2.31.1 + diff --git a/SOURCES/kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch b/SOURCES/kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch new file mode 100644 index 0000000..94eba86 --- /dev/null +++ b/SOURCES/kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch @@ -0,0 +1,106 @@ +From 60b66881fb972e1cdff1cd7b4c865e5e21c141b0 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:10 +0100 +Subject: [PATCH 28/31] block: Remove poll parameter from + bdrv_parent_drained_begin_single() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [16/16] fd526cc9e5bebeb256cfa56d23ec596f26caa37a (sgarzarella/qemu-kvm-c-9-s) + +All callers of bdrv_parent_drained_begin_single() pass poll=false now, +so we don't need the parameter any more. + +Signed-off-by: Kevin Wolf +Message-Id: <20221118174110.55183-16-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit 606ed756c1d69cba4822be8923248d2fd714f069) +Signed-off-by: Stefano Garzarella +--- + block.c | 4 ++-- + block/io.c | 8 ++------ + include/block/block-io.h | 5 ++--- + 3 files changed, 6 insertions(+), 11 deletions(-) + +diff --git a/block.c b/block.c +index 65588d313a..0d78711416 100644 +--- a/block.c ++++ b/block.c +@@ -2417,7 +2417,7 @@ static void bdrv_replace_child_abort(void *opaque) + * new_bs drained when calling bdrv_replace_child_tran() is not a + * requirement any more. + */ +- bdrv_parent_drained_begin_single(s->child, false); ++ bdrv_parent_drained_begin_single(s->child); + assert(!bdrv_parent_drained_poll_single(s->child)); + } + assert(s->child->quiesced_parent); +@@ -3059,7 +3059,7 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, + * a problem, we already did this), but it will still poll until the parent + * is fully quiesced, so it will not be negatively affected either. + */ +- bdrv_parent_drained_begin_single(new_child, false); ++ bdrv_parent_drained_begin_single(new_child); + bdrv_replace_child_noperm(new_child, child_bs); + + BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); +diff --git a/block/io.c b/block/io.c +index ae64830eac..38e57d1f67 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -53,7 +53,7 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) + if (c == ignore) { + continue; + } +- bdrv_parent_drained_begin_single(c, false); ++ bdrv_parent_drained_begin_single(c); + } + } + +@@ -105,9 +105,8 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore, + return busy; + } + +-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) ++void bdrv_parent_drained_begin_single(BdrvChild *c) + { +- AioContext *ctx = bdrv_child_get_parent_aio_context(c); + IO_OR_GS_CODE(); + + assert(!c->quiesced_parent); +@@ -116,9 +115,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) + if (c->klass->drained_begin) { + c->klass->drained_begin(c); + } +- if (poll) { +- AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c)); +- } + } + + static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src) +diff --git a/include/block/block-io.h b/include/block/block-io.h +index 65e6d2569b..92aaa7c1e9 100644 +--- a/include/block/block-io.h ++++ b/include/block/block-io.h +@@ -287,10 +287,9 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); + /** + * bdrv_parent_drained_begin_single: + * +- * Begin a quiesced section for the parent of @c. If @poll is true, wait for +- * any pending activity to cease. ++ * Begin a quiesced section for the parent of @c. + */ +-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); ++void bdrv_parent_drained_begin_single(BdrvChild *c); + + /** + * bdrv_parent_drained_poll_single: +-- +2.31.1 + diff --git a/SOURCES/kvm-block-Remove-subtree-drains.patch b/SOURCES/kvm-block-Remove-subtree-drains.patch new file mode 100644 index 0000000..af9c0ff --- /dev/null +++ b/SOURCES/kvm-block-Remove-subtree-drains.patch @@ -0,0 +1,896 @@ +From 79063522861cb2baf921b204bcdf4c3bfb5697f4 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:05 +0100 +Subject: [PATCH 23/31] block: Remove subtree drains + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [11/16] d92f5041cceeeec49a65441b22d20f692c0f1c77 (sgarzarella/qemu-kvm-c-9-s) + +Subtree drains are not used any more. Remove them. + +After this, BdrvChildClass.attach/detach() don't poll any more. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-11-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 299403aedaeb7f08d8e98aa8614b29d4e5546066) +Signed-off-by: Stefano Garzarella +--- + block.c | 20 +-- + block/io.c | 121 +++----------- + include/block/block-io.h | 18 +-- + include/block/block_int-common.h | 1 - + include/block/block_int-io.h | 12 -- + tests/unit/test-bdrv-drain.c | 261 ++----------------------------- + 6 files changed, 44 insertions(+), 389 deletions(-) + +diff --git a/block.c b/block.c +index 5330e89903..e0e3b21790 100644 +--- a/block.c ++++ b/block.c +@@ -1232,7 +1232,7 @@ static void bdrv_child_cb_drained_begin(BdrvChild *child) + static bool bdrv_child_cb_drained_poll(BdrvChild *child) + { + BlockDriverState *bs = child->opaque; +- return bdrv_drain_poll(bs, false, NULL, false); ++ return bdrv_drain_poll(bs, NULL, false); + } + + static void bdrv_child_cb_drained_end(BdrvChild *child) +@@ -1482,8 +1482,6 @@ static void bdrv_child_cb_attach(BdrvChild *child) + assert(!bs->file); + bs->file = child; + } +- +- bdrv_apply_subtree_drain(child, bs); + } + + static void bdrv_child_cb_detach(BdrvChild *child) +@@ -1494,8 +1492,6 @@ static void bdrv_child_cb_detach(BdrvChild *child) + bdrv_backing_detach(child); + } + +- bdrv_unapply_subtree_drain(child, bs); +- + assert_bdrv_graph_writable(bs); + QLIST_REMOVE(child, next); + if (child == bs->backing) { +@@ -2851,9 +2847,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + } + + if (old_bs) { +- /* Detach first so that the recursive drain sections coming from @child +- * are already gone and we only end the drain sections that came from +- * elsewhere. */ + if (child->klass->detach) { + child->klass->detach(child); + } +@@ -2868,17 +2861,14 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); + + /* +- * Detaching the old node may have led to the new node's +- * quiesce_counter having been decreased. Not a problem, we +- * just need to recognize this here and then invoke +- * drained_end appropriately more often. ++ * Polling in bdrv_parent_drained_begin_single() may have led to the new ++ * node's quiesce_counter having been decreased. Not a problem, we just ++ * need to recognize this here and then invoke drained_end appropriately ++ * more often. + */ + assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); + drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; + +- /* Attach only after starting new drained sections, so that recursive +- * drain sections coming from @child don't get an extra .drained_begin +- * callback. */ + if (child->klass->attach) { + child->klass->attach(child); + } +diff --git a/block/io.c b/block/io.c +index a25103be6f..75224480d0 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -236,17 +236,15 @@ typedef struct { + BlockDriverState *bs; + bool done; + bool begin; +- bool recursive; + bool poll; + BdrvChild *parent; + bool ignore_bds_parents; + } BdrvCoDrainData; + + /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ +-bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, +- BdrvChild *ignore_parent, bool ignore_bds_parents) ++bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, ++ bool ignore_bds_parents) + { +- BdrvChild *child, *next; + IO_OR_GS_CODE(); + + if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) { +@@ -257,29 +255,19 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, + return true; + } + +- if (recursive) { +- assert(!ignore_bds_parents); +- QLIST_FOREACH_SAFE(child, &bs->children, next, next) { +- if (bdrv_drain_poll(child->bs, recursive, child, false)) { +- return true; +- } +- } +- } +- + return false; + } + +-static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive, ++static bool bdrv_drain_poll_top_level(BlockDriverState *bs, + BdrvChild *ignore_parent) + { +- return bdrv_drain_poll(bs, recursive, ignore_parent, false); ++ return bdrv_drain_poll(bs, ignore_parent, false); + } + +-static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, +- BdrvChild *parent, bool ignore_bds_parents, +- bool poll); +-static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, +- BdrvChild *parent, bool ignore_bds_parents); ++static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, ++ bool ignore_bds_parents, bool poll); ++static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, ++ bool ignore_bds_parents); + + static void bdrv_co_drain_bh_cb(void *opaque) + { +@@ -292,12 +280,11 @@ static void bdrv_co_drain_bh_cb(void *opaque) + aio_context_acquire(ctx); + bdrv_dec_in_flight(bs); + if (data->begin) { +- bdrv_do_drained_begin(bs, data->recursive, data->parent, +- data->ignore_bds_parents, data->poll); ++ bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents, ++ data->poll); + } else { + assert(!data->poll); +- bdrv_do_drained_end(bs, data->recursive, data->parent, +- data->ignore_bds_parents); ++ bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents); + } + aio_context_release(ctx); + } else { +@@ -310,7 +297,7 @@ static void bdrv_co_drain_bh_cb(void *opaque) + } + + static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, +- bool begin, bool recursive, ++ bool begin, + BdrvChild *parent, + bool ignore_bds_parents, + bool poll) +@@ -329,7 +316,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + .bs = bs, + .done = false, + .begin = begin, +- .recursive = recursive, + .parent = parent, + .ignore_bds_parents = ignore_bds_parents, + .poll = poll, +@@ -380,29 +366,16 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + } + } + +-static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, +- BdrvChild *parent, bool ignore_bds_parents, +- bool poll) ++static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, ++ bool ignore_bds_parents, bool poll) + { +- BdrvChild *child, *next; +- + if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents, +- poll); ++ bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll); + return; + } + + bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents); + +- if (recursive) { +- assert(!ignore_bds_parents); +- bs->recursive_quiesce_counter++; +- QLIST_FOREACH_SAFE(child, &bs->children, next, next) { +- bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents, +- false); +- } +- } +- + /* + * Wait for drained requests to finish. + * +@@ -414,35 +387,27 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, + */ + if (poll) { + assert(!ignore_bds_parents); +- BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent)); ++ BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent)); + } + } + + void bdrv_drained_begin(BlockDriverState *bs) + { + IO_OR_GS_CODE(); +- bdrv_do_drained_begin(bs, false, NULL, false, true); +-} +- +-void bdrv_subtree_drained_begin(BlockDriverState *bs) +-{ +- IO_OR_GS_CODE(); +- bdrv_do_drained_begin(bs, true, NULL, false, true); ++ bdrv_do_drained_begin(bs, NULL, false, true); + } + + /** + * This function does not poll, nor must any of its recursively called + * functions. + */ +-static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, +- BdrvChild *parent, bool ignore_bds_parents) ++static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, ++ bool ignore_bds_parents) + { +- BdrvChild *child; + int old_quiesce_counter; + + if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents, +- false); ++ bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false); + return; + } + assert(bs->quiesce_counter > 0); +@@ -457,46 +422,12 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, + if (old_quiesce_counter == 1) { + aio_enable_external(bdrv_get_aio_context(bs)); + } +- +- if (recursive) { +- assert(!ignore_bds_parents); +- bs->recursive_quiesce_counter--; +- QLIST_FOREACH(child, &bs->children, next) { +- bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents); +- } +- } + } + + void bdrv_drained_end(BlockDriverState *bs) + { + IO_OR_GS_CODE(); +- bdrv_do_drained_end(bs, false, NULL, false); +-} +- +-void bdrv_subtree_drained_end(BlockDriverState *bs) +-{ +- IO_OR_GS_CODE(); +- bdrv_do_drained_end(bs, true, NULL, false); +-} +- +-void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) +-{ +- int i; +- IO_OR_GS_CODE(); +- +- for (i = 0; i < new_parent->recursive_quiesce_counter; i++) { +- bdrv_do_drained_begin(child->bs, true, child, false, true); +- } +-} +- +-void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) +-{ +- int i; +- IO_OR_GS_CODE(); +- +- for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { +- bdrv_do_drained_end(child->bs, true, child, false); +- } ++ bdrv_do_drained_end(bs, NULL, false); + } + + void bdrv_drain(BlockDriverState *bs) +@@ -529,7 +460,7 @@ static bool bdrv_drain_all_poll(void) + while ((bs = bdrv_next_all_states(bs))) { + AioContext *aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); +- result |= bdrv_drain_poll(bs, false, NULL, true); ++ result |= bdrv_drain_poll(bs, NULL, true); + aio_context_release(aio_context); + } + +@@ -554,7 +485,7 @@ void bdrv_drain_all_begin(void) + GLOBAL_STATE_CODE(); + + if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true); ++ bdrv_co_yield_to_drain(NULL, true, NULL, true, true); + return; + } + +@@ -579,7 +510,7 @@ void bdrv_drain_all_begin(void) + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); +- bdrv_do_drained_begin(bs, false, NULL, true, false); ++ bdrv_do_drained_begin(bs, NULL, true, false); + aio_context_release(aio_context); + } + +@@ -599,7 +530,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs) + g_assert(!bs->refcnt); + + while (bs->quiesce_counter) { +- bdrv_do_drained_end(bs, false, NULL, true); ++ bdrv_do_drained_end(bs, NULL, true); + } + } + +@@ -621,7 +552,7 @@ void bdrv_drain_all_end(void) + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); +- bdrv_do_drained_end(bs, false, NULL, true); ++ bdrv_do_drained_end(bs, NULL, true); + aio_context_release(aio_context); + } + +diff --git a/include/block/block-io.h b/include/block/block-io.h +index 054e964c9b..9c36a16a1f 100644 +--- a/include/block/block-io.h ++++ b/include/block/block-io.h +@@ -302,8 +302,7 @@ void bdrv_parent_drained_end_single(BdrvChild *c); + /** + * bdrv_drain_poll: + * +- * Poll for pending requests in @bs, its parents (except for @ignore_parent), +- * and if @recursive is true its children as well (used for subtree drain). ++ * Poll for pending requests in @bs and its parents (except for @ignore_parent). + * + * If @ignore_bds_parents is true, parents that are BlockDriverStates must + * ignore the drain request because they will be drained separately (used for +@@ -311,8 +310,8 @@ void bdrv_parent_drained_end_single(BdrvChild *c); + * + * This is part of bdrv_drained_begin. + */ +-bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, +- BdrvChild *ignore_parent, bool ignore_bds_parents); ++bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, ++ bool ignore_bds_parents); + + /** + * bdrv_drained_begin: +@@ -333,12 +332,6 @@ void bdrv_drained_begin(BlockDriverState *bs); + void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + BdrvChild *parent, bool ignore_bds_parents); + +-/** +- * Like bdrv_drained_begin, but recursively begins a quiesced section for +- * exclusive access to all child nodes as well. +- */ +-void bdrv_subtree_drained_begin(BlockDriverState *bs); +- + /** + * bdrv_drained_end: + * +@@ -346,9 +339,4 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs); + */ + void bdrv_drained_end(BlockDriverState *bs); + +-/** +- * End a quiescent section started by bdrv_subtree_drained_begin(). +- */ +-void bdrv_subtree_drained_end(BlockDriverState *bs); +- + #endif /* BLOCK_IO_H */ +diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h +index 2b97576f6d..791dddfd7d 100644 +--- a/include/block/block_int-common.h ++++ b/include/block/block_int-common.h +@@ -1184,7 +1184,6 @@ struct BlockDriverState { + + /* Accessed with atomic ops. */ + int quiesce_counter; +- int recursive_quiesce_counter; + + unsigned int write_gen; /* Current data generation */ + +diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h +index 4b0b3e17ef..8bc061ebb8 100644 +--- a/include/block/block_int-io.h ++++ b/include/block/block_int-io.h +@@ -179,16 +179,4 @@ void bdrv_bsc_invalidate_range(BlockDriverState *bs, + */ + void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes); + +- +-/* +- * "I/O or GS" API functions. These functions can run without +- * the BQL, but only in one specific iothread/main loop. +- * +- * See include/block/block-io.h for more information about +- * the "I/O or GS" API. +- */ +- +-void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); +-void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); +- + #endif /* BLOCK_INT_IO_H */ +diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c +index 695519ee02..dda08de8db 100644 +--- a/tests/unit/test-bdrv-drain.c ++++ b/tests/unit/test-bdrv-drain.c +@@ -156,7 +156,6 @@ static void call_in_coroutine(void (*entry)(void)) + enum drain_type { + BDRV_DRAIN_ALL, + BDRV_DRAIN, +- BDRV_SUBTREE_DRAIN, + DRAIN_TYPE_MAX, + }; + +@@ -165,7 +164,6 @@ static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs) + switch (drain_type) { + case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break; + case BDRV_DRAIN: bdrv_drained_begin(bs); break; +- case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break; + default: g_assert_not_reached(); + } + } +@@ -175,7 +173,6 @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) + switch (drain_type) { + case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break; + case BDRV_DRAIN: bdrv_drained_end(bs); break; +- case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break; + default: g_assert_not_reached(); + } + } +@@ -271,11 +268,6 @@ static void test_drv_cb_drain(void) + test_drv_cb_common(BDRV_DRAIN, false); + } + +-static void test_drv_cb_drain_subtree(void) +-{ +- test_drv_cb_common(BDRV_SUBTREE_DRAIN, true); +-} +- + static void test_drv_cb_co_drain_all(void) + { + call_in_coroutine(test_drv_cb_drain_all); +@@ -286,11 +278,6 @@ static void test_drv_cb_co_drain(void) + call_in_coroutine(test_drv_cb_drain); + } + +-static void test_drv_cb_co_drain_subtree(void) +-{ +- call_in_coroutine(test_drv_cb_drain_subtree); +-} +- + static void test_quiesce_common(enum drain_type drain_type, bool recursive) + { + BlockBackend *blk; +@@ -332,11 +319,6 @@ static void test_quiesce_drain(void) + test_quiesce_common(BDRV_DRAIN, false); + } + +-static void test_quiesce_drain_subtree(void) +-{ +- test_quiesce_common(BDRV_SUBTREE_DRAIN, true); +-} +- + static void test_quiesce_co_drain_all(void) + { + call_in_coroutine(test_quiesce_drain_all); +@@ -347,11 +329,6 @@ static void test_quiesce_co_drain(void) + call_in_coroutine(test_quiesce_drain); + } + +-static void test_quiesce_co_drain_subtree(void) +-{ +- call_in_coroutine(test_quiesce_drain_subtree); +-} +- + static void test_nested(void) + { + BlockBackend *blk; +@@ -402,158 +379,6 @@ static void test_nested(void) + blk_unref(blk); + } + +-static void test_multiparent(void) +-{ +- BlockBackend *blk_a, *blk_b; +- BlockDriverState *bs_a, *bs_b, *backing; +- BDRVTestState *a_s, *b_s, *backing_s; +- +- blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); +- bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, +- &error_abort); +- a_s = bs_a->opaque; +- blk_insert_bs(blk_a, bs_a, &error_abort); +- +- blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); +- bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, +- &error_abort); +- b_s = bs_b->opaque; +- blk_insert_bs(blk_b, bs_b, &error_abort); +- +- backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); +- backing_s = backing->opaque; +- bdrv_set_backing_hd(bs_a, backing, &error_abort); +- bdrv_set_backing_hd(bs_b, backing, &error_abort); +- +- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); +- g_assert_cmpint(backing->quiesce_counter, ==, 0); +- g_assert_cmpint(a_s->drain_count, ==, 0); +- g_assert_cmpint(b_s->drain_count, ==, 0); +- g_assert_cmpint(backing_s->drain_count, ==, 0); +- +- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); +- +- g_assert_cmpint(bs_a->quiesce_counter, ==, 1); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 1); +- g_assert_cmpint(backing->quiesce_counter, ==, 1); +- g_assert_cmpint(a_s->drain_count, ==, 1); +- g_assert_cmpint(b_s->drain_count, ==, 1); +- g_assert_cmpint(backing_s->drain_count, ==, 1); +- +- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); +- +- g_assert_cmpint(bs_a->quiesce_counter, ==, 2); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 2); +- g_assert_cmpint(backing->quiesce_counter, ==, 2); +- g_assert_cmpint(a_s->drain_count, ==, 2); +- g_assert_cmpint(b_s->drain_count, ==, 2); +- g_assert_cmpint(backing_s->drain_count, ==, 2); +- +- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); +- +- g_assert_cmpint(bs_a->quiesce_counter, ==, 1); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 1); +- g_assert_cmpint(backing->quiesce_counter, ==, 1); +- g_assert_cmpint(a_s->drain_count, ==, 1); +- g_assert_cmpint(b_s->drain_count, ==, 1); +- g_assert_cmpint(backing_s->drain_count, ==, 1); +- +- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); +- +- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); +- g_assert_cmpint(backing->quiesce_counter, ==, 0); +- g_assert_cmpint(a_s->drain_count, ==, 0); +- g_assert_cmpint(b_s->drain_count, ==, 0); +- g_assert_cmpint(backing_s->drain_count, ==, 0); +- +- bdrv_unref(backing); +- bdrv_unref(bs_a); +- bdrv_unref(bs_b); +- blk_unref(blk_a); +- blk_unref(blk_b); +-} +- +-static void test_graph_change_drain_subtree(void) +-{ +- BlockBackend *blk_a, *blk_b; +- BlockDriverState *bs_a, *bs_b, *backing; +- BDRVTestState *a_s, *b_s, *backing_s; +- +- blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); +- bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, +- &error_abort); +- a_s = bs_a->opaque; +- blk_insert_bs(blk_a, bs_a, &error_abort); +- +- blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); +- bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, +- &error_abort); +- b_s = bs_b->opaque; +- blk_insert_bs(blk_b, bs_b, &error_abort); +- +- backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); +- backing_s = backing->opaque; +- bdrv_set_backing_hd(bs_a, backing, &error_abort); +- +- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); +- g_assert_cmpint(backing->quiesce_counter, ==, 0); +- g_assert_cmpint(a_s->drain_count, ==, 0); +- g_assert_cmpint(b_s->drain_count, ==, 0); +- g_assert_cmpint(backing_s->drain_count, ==, 0); +- +- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); +- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); +- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); +- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); +- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); +- +- bdrv_set_backing_hd(bs_b, backing, &error_abort); +- g_assert_cmpint(bs_a->quiesce_counter, ==, 5); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 5); +- g_assert_cmpint(backing->quiesce_counter, ==, 5); +- g_assert_cmpint(a_s->drain_count, ==, 5); +- g_assert_cmpint(b_s->drain_count, ==, 5); +- g_assert_cmpint(backing_s->drain_count, ==, 5); +- +- bdrv_set_backing_hd(bs_b, NULL, &error_abort); +- g_assert_cmpint(bs_a->quiesce_counter, ==, 3); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 2); +- g_assert_cmpint(backing->quiesce_counter, ==, 3); +- g_assert_cmpint(a_s->drain_count, ==, 3); +- g_assert_cmpint(b_s->drain_count, ==, 2); +- g_assert_cmpint(backing_s->drain_count, ==, 3); +- +- bdrv_set_backing_hd(bs_b, backing, &error_abort); +- g_assert_cmpint(bs_a->quiesce_counter, ==, 5); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 5); +- g_assert_cmpint(backing->quiesce_counter, ==, 5); +- g_assert_cmpint(a_s->drain_count, ==, 5); +- g_assert_cmpint(b_s->drain_count, ==, 5); +- g_assert_cmpint(backing_s->drain_count, ==, 5); +- +- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); +- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); +- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); +- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); +- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); +- +- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); +- g_assert_cmpint(backing->quiesce_counter, ==, 0); +- g_assert_cmpint(a_s->drain_count, ==, 0); +- g_assert_cmpint(b_s->drain_count, ==, 0); +- g_assert_cmpint(backing_s->drain_count, ==, 0); +- +- bdrv_unref(backing); +- bdrv_unref(bs_a); +- bdrv_unref(bs_b); +- blk_unref(blk_a); +- blk_unref(blk_b); +-} +- + static void test_graph_change_drain_all(void) + { + BlockBackend *blk_a, *blk_b; +@@ -773,12 +598,6 @@ static void test_iothread_drain(void) + test_iothread_common(BDRV_DRAIN, 1); + } + +-static void test_iothread_drain_subtree(void) +-{ +- test_iothread_common(BDRV_SUBTREE_DRAIN, 0); +- test_iothread_common(BDRV_SUBTREE_DRAIN, 1); +-} +- + + typedef struct TestBlockJob { + BlockJob common; +@@ -863,7 +682,6 @@ enum test_job_result { + enum test_job_drain_node { + TEST_JOB_DRAIN_SRC, + TEST_JOB_DRAIN_SRC_CHILD, +- TEST_JOB_DRAIN_SRC_PARENT, + }; + + static void test_blockjob_common_drain_node(enum drain_type drain_type, +@@ -901,9 +719,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, + case TEST_JOB_DRAIN_SRC_CHILD: + drain_bs = src_backing; + break; +- case TEST_JOB_DRAIN_SRC_PARENT: +- drain_bs = src_overlay; +- break; + default: + g_assert_not_reached(); + } +@@ -1055,10 +870,6 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, + TEST_JOB_DRAIN_SRC); + test_blockjob_common_drain_node(drain_type, use_iothread, result, + TEST_JOB_DRAIN_SRC_CHILD); +- if (drain_type == BDRV_SUBTREE_DRAIN) { +- test_blockjob_common_drain_node(drain_type, use_iothread, result, +- TEST_JOB_DRAIN_SRC_PARENT); +- } + } + + static void test_blockjob_drain_all(void) +@@ -1071,11 +882,6 @@ static void test_blockjob_drain(void) + test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_SUCCESS); + } + +-static void test_blockjob_drain_subtree(void) +-{ +- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_SUCCESS); +-} +- + static void test_blockjob_error_drain_all(void) + { + test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_RUN); +@@ -1088,12 +894,6 @@ static void test_blockjob_error_drain(void) + test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_PREPARE); + } + +-static void test_blockjob_error_drain_subtree(void) +-{ +- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_RUN); +- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_PREPARE); +-} +- + static void test_blockjob_iothread_drain_all(void) + { + test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_SUCCESS); +@@ -1104,11 +904,6 @@ static void test_blockjob_iothread_drain(void) + test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_SUCCESS); + } + +-static void test_blockjob_iothread_drain_subtree(void) +-{ +- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_SUCCESS); +-} +- + static void test_blockjob_iothread_error_drain_all(void) + { + test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_RUN); +@@ -1121,12 +916,6 @@ static void test_blockjob_iothread_error_drain(void) + test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_PREPARE); + } + +-static void test_blockjob_iothread_error_drain_subtree(void) +-{ +- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_RUN); +- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_PREPARE); +-} +- + + typedef struct BDRVTestTopState { + BdrvChild *wait_child; +@@ -1273,14 +1062,6 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, + bdrv_drain(child_bs); + bdrv_unref(child_bs); + break; +- case BDRV_SUBTREE_DRAIN: +- /* Would have to ref/unref bs here for !detach_instead_of_delete, but +- * then the whole test becomes pointless because the graph changes +- * don't occur during the drain any more. */ +- assert(detach_instead_of_delete); +- bdrv_subtree_drained_begin(bs); +- bdrv_subtree_drained_end(bs); +- break; + case BDRV_DRAIN_ALL: + bdrv_drain_all_begin(); + bdrv_drain_all_end(); +@@ -1315,11 +1096,6 @@ static void test_detach_by_drain(void) + do_test_delete_by_drain(true, BDRV_DRAIN); + } + +-static void test_detach_by_drain_subtree(void) +-{ +- do_test_delete_by_drain(true, BDRV_SUBTREE_DRAIN); +-} +- + + struct detach_by_parent_data { + BlockDriverState *parent_b; +@@ -1452,7 +1228,10 @@ static void test_detach_indirect(bool by_parent_cb) + g_assert(acb != NULL); + + /* Drain and check the expected result */ +- bdrv_subtree_drained_begin(parent_b); ++ bdrv_drained_begin(parent_b); ++ bdrv_drained_begin(a); ++ bdrv_drained_begin(b); ++ bdrv_drained_begin(c); + + g_assert(detach_by_parent_data.child_c != NULL); + +@@ -1467,12 +1246,15 @@ static void test_detach_indirect(bool by_parent_cb) + g_assert(QLIST_NEXT(child_a, next) == NULL); + + g_assert_cmpint(parent_a->quiesce_counter, ==, 1); +- g_assert_cmpint(parent_b->quiesce_counter, ==, 1); ++ g_assert_cmpint(parent_b->quiesce_counter, ==, 3); + g_assert_cmpint(a->quiesce_counter, ==, 1); +- g_assert_cmpint(b->quiesce_counter, ==, 0); ++ g_assert_cmpint(b->quiesce_counter, ==, 1); + g_assert_cmpint(c->quiesce_counter, ==, 1); + +- bdrv_subtree_drained_end(parent_b); ++ bdrv_drained_end(parent_b); ++ bdrv_drained_end(a); ++ bdrv_drained_end(b); ++ bdrv_drained_end(c); + + bdrv_unref(parent_b); + blk_unref(blk); +@@ -2202,70 +1984,47 @@ int main(int argc, char **argv) + + g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); + g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); +- g_test_add_func("/bdrv-drain/driver-cb/drain_subtree", +- test_drv_cb_drain_subtree); + + g_test_add_func("/bdrv-drain/driver-cb/co/drain_all", + test_drv_cb_co_drain_all); + g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain); +- g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree", +- test_drv_cb_co_drain_subtree); +- + + g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); + g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); +- g_test_add_func("/bdrv-drain/quiesce/drain_subtree", +- test_quiesce_drain_subtree); + + g_test_add_func("/bdrv-drain/quiesce/co/drain_all", + test_quiesce_co_drain_all); + g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain); +- g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree", +- test_quiesce_co_drain_subtree); + + g_test_add_func("/bdrv-drain/nested", test_nested); +- g_test_add_func("/bdrv-drain/multiparent", test_multiparent); + +- g_test_add_func("/bdrv-drain/graph-change/drain_subtree", +- test_graph_change_drain_subtree); + g_test_add_func("/bdrv-drain/graph-change/drain_all", + test_graph_change_drain_all); + + g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all); + g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain); +- g_test_add_func("/bdrv-drain/iothread/drain_subtree", +- test_iothread_drain_subtree); + + g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); + g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); +- g_test_add_func("/bdrv-drain/blockjob/drain_subtree", +- test_blockjob_drain_subtree); + + g_test_add_func("/bdrv-drain/blockjob/error/drain_all", + test_blockjob_error_drain_all); + g_test_add_func("/bdrv-drain/blockjob/error/drain", + test_blockjob_error_drain); +- g_test_add_func("/bdrv-drain/blockjob/error/drain_subtree", +- test_blockjob_error_drain_subtree); + + g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all", + test_blockjob_iothread_drain_all); + g_test_add_func("/bdrv-drain/blockjob/iothread/drain", + test_blockjob_iothread_drain); +- g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree", +- test_blockjob_iothread_drain_subtree); + + g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all", + test_blockjob_iothread_error_drain_all); + g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain", + test_blockjob_iothread_error_drain); +- g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_subtree", +- test_blockjob_iothread_error_drain_subtree); + + g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain); + g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all); + g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain); +- g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree); + g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb); + g_test_add_func("/bdrv-drain/detach/driver_cb", test_detach_by_driver_cb); + +-- +2.31.1 + diff --git a/SOURCES/kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch b/SOURCES/kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch new file mode 100644 index 0000000..1529fdb --- /dev/null +++ b/SOURCES/kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch @@ -0,0 +1,302 @@ +From 0e894c93cae97bb792dc483be8e295d097ebd7a1 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:40:58 +0100 +Subject: [PATCH 16/31] block: Revert .bdrv_drained_begin/end to + non-coroutine_fn + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [4/16] 86d6049e40a99604e414c2572b67f74b85868832 (sgarzarella/qemu-kvm-c-9-s) + +Polling during bdrv_drained_end() can be problematic (and in the future, +we may get cases for bdrv_drained_begin() where polling is forbidden, +and we don't care about already in-flight requests, but just want to +prevent new requests from arriving). + +The .bdrv_drained_begin/end callbacks running in a coroutine is the only +reason why we have to do this polling, so make them non-coroutine +callbacks again. None of the callers actually yield any more. + +This means that bdrv_drained_end() effectively doesn't poll any more, +even if AIO_WAIT_WHILE() loops are still there (their condition is false +from the beginning). This is generally not a problem, but in +test-bdrv-drain, some additional explicit aio_poll() calls need to be +added because the test case wants to verify the final state after BHs +have executed. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Emanuele Giuseppe Esposito +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-4-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 5e8ac21717373cbe96ef7a91e216bf5788815d63) +Signed-off-by: Stefano Garzarella +--- + block.c | 4 +-- + block/io.c | 49 +++++--------------------------- + block/qed.c | 6 ++-- + block/throttle.c | 8 +++--- + include/block/block_int-common.h | 10 ++++--- + tests/unit/test-bdrv-drain.c | 18 ++++++------ + 6 files changed, 32 insertions(+), 63 deletions(-) + +diff --git a/block.c b/block.c +index ec184150a2..16a62a329c 100644 +--- a/block.c ++++ b/block.c +@@ -1713,8 +1713,8 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, + assert(is_power_of_2(bs->bl.request_alignment)); + + for (i = 0; i < bs->quiesce_counter; i++) { +- if (drv->bdrv_co_drain_begin) { +- drv->bdrv_co_drain_begin(bs); ++ if (drv->bdrv_drain_begin) { ++ drv->bdrv_drain_begin(bs); + } + } + +diff --git a/block/io.c b/block/io.c +index b9424024f9..c2ed4b2af9 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -252,55 +252,20 @@ typedef struct { + int *drained_end_counter; + } BdrvCoDrainData; + +-static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) +-{ +- BdrvCoDrainData *data = opaque; +- BlockDriverState *bs = data->bs; +- +- if (data->begin) { +- bs->drv->bdrv_co_drain_begin(bs); +- } else { +- bs->drv->bdrv_co_drain_end(bs); +- } +- +- /* Set data->done and decrement drained_end_counter before bdrv_wakeup() */ +- qatomic_mb_set(&data->done, true); +- if (!data->begin) { +- qatomic_dec(data->drained_end_counter); +- } +- bdrv_dec_in_flight(bs); +- +- g_free(data); +-} +- +-/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */ ++/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ + static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, + int *drained_end_counter) + { +- BdrvCoDrainData *data; +- +- if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) || +- (!begin && !bs->drv->bdrv_co_drain_end)) { ++ if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || ++ (!begin && !bs->drv->bdrv_drain_end)) { + return; + } + +- data = g_new(BdrvCoDrainData, 1); +- *data = (BdrvCoDrainData) { +- .bs = bs, +- .done = false, +- .begin = begin, +- .drained_end_counter = drained_end_counter, +- }; +- +- if (!begin) { +- qatomic_inc(drained_end_counter); ++ if (begin) { ++ bs->drv->bdrv_drain_begin(bs); ++ } else { ++ bs->drv->bdrv_drain_end(bs); + } +- +- /* Make sure the driver callback completes during the polling phase for +- * drain_begin. */ +- bdrv_inc_in_flight(bs); +- data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data); +- aio_co_schedule(bdrv_get_aio_context(bs), data->co); + } + + /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ +diff --git a/block/qed.c b/block/qed.c +index 013f826c44..c2691a85b1 100644 +--- a/block/qed.c ++++ b/block/qed.c +@@ -262,7 +262,7 @@ static bool coroutine_fn qed_plug_allocating_write_reqs(BDRVQEDState *s) + assert(!s->allocating_write_reqs_plugged); + if (s->allocating_acb != NULL) { + /* Another allocating write came concurrently. This cannot happen +- * from bdrv_qed_co_drain_begin, but it can happen when the timer runs. ++ * from bdrv_qed_drain_begin, but it can happen when the timer runs. + */ + qemu_co_mutex_unlock(&s->table_lock); + return false; +@@ -365,7 +365,7 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs, + } + } + +-static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs) ++static void bdrv_qed_drain_begin(BlockDriverState *bs) + { + BDRVQEDState *s = bs->opaque; + +@@ -1661,7 +1661,7 @@ static BlockDriver bdrv_qed = { + .bdrv_co_check = bdrv_qed_co_check, + .bdrv_detach_aio_context = bdrv_qed_detach_aio_context, + .bdrv_attach_aio_context = bdrv_qed_attach_aio_context, +- .bdrv_co_drain_begin = bdrv_qed_co_drain_begin, ++ .bdrv_drain_begin = bdrv_qed_drain_begin, + }; + + static void bdrv_qed_init(void) +diff --git a/block/throttle.c b/block/throttle.c +index 131eba3ab4..88851c84f4 100644 +--- a/block/throttle.c ++++ b/block/throttle.c +@@ -214,7 +214,7 @@ static void throttle_reopen_abort(BDRVReopenState *reopen_state) + reopen_state->opaque = NULL; + } + +-static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs) ++static void throttle_drain_begin(BlockDriverState *bs) + { + ThrottleGroupMember *tgm = bs->opaque; + if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) { +@@ -222,7 +222,7 @@ static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs) + } + } + +-static void coroutine_fn throttle_co_drain_end(BlockDriverState *bs) ++static void throttle_drain_end(BlockDriverState *bs) + { + ThrottleGroupMember *tgm = bs->opaque; + assert(tgm->io_limits_disabled); +@@ -261,8 +261,8 @@ static BlockDriver bdrv_throttle = { + .bdrv_reopen_commit = throttle_reopen_commit, + .bdrv_reopen_abort = throttle_reopen_abort, + +- .bdrv_co_drain_begin = throttle_co_drain_begin, +- .bdrv_co_drain_end = throttle_co_drain_end, ++ .bdrv_drain_begin = throttle_drain_begin, ++ .bdrv_drain_end = throttle_drain_end, + + .is_filter = true, + .strong_runtime_opts = throttle_strong_runtime_opts, +diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h +index 31ae91e56e..40d646d1ed 100644 +--- a/include/block/block_int-common.h ++++ b/include/block/block_int-common.h +@@ -735,17 +735,19 @@ struct BlockDriver { + void (*bdrv_io_unplug)(BlockDriverState *bs); + + /** +- * bdrv_co_drain_begin is called if implemented in the beginning of a ++ * bdrv_drain_begin is called if implemented in the beginning of a + * drain operation to drain and stop any internal sources of requests in + * the driver. +- * bdrv_co_drain_end is called if implemented at the end of the drain. ++ * bdrv_drain_end is called if implemented at the end of the drain. + * + * They should be used by the driver to e.g. manage scheduled I/O + * requests, or toggle an internal state. After the end of the drain new + * requests will continue normally. ++ * ++ * Implementations of both functions must not call aio_poll(). + */ +- void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs); +- void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs); ++ void (*bdrv_drain_begin)(BlockDriverState *bs); ++ void (*bdrv_drain_end)(BlockDriverState *bs); + + bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs); + bool coroutine_fn (*bdrv_co_can_store_new_dirty_bitmap)( +diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c +index 24f34e24ad..695519ee02 100644 +--- a/tests/unit/test-bdrv-drain.c ++++ b/tests/unit/test-bdrv-drain.c +@@ -46,7 +46,7 @@ static void coroutine_fn sleep_in_drain_begin(void *opaque) + bdrv_dec_in_flight(bs); + } + +-static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) ++static void bdrv_test_drain_begin(BlockDriverState *bs) + { + BDRVTestState *s = bs->opaque; + s->drain_count++; +@@ -57,7 +57,7 @@ static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) + } + } + +-static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs) ++static void bdrv_test_drain_end(BlockDriverState *bs) + { + BDRVTestState *s = bs->opaque; + s->drain_count--; +@@ -111,8 +111,8 @@ static BlockDriver bdrv_test = { + .bdrv_close = bdrv_test_close, + .bdrv_co_preadv = bdrv_test_co_preadv, + +- .bdrv_co_drain_begin = bdrv_test_co_drain_begin, +- .bdrv_co_drain_end = bdrv_test_co_drain_end, ++ .bdrv_drain_begin = bdrv_test_drain_begin, ++ .bdrv_drain_end = bdrv_test_drain_end, + + .bdrv_child_perm = bdrv_default_perms, + +@@ -1703,6 +1703,7 @@ static void test_blockjob_commit_by_drained_end(void) + bdrv_drained_begin(bs_child); + g_assert(!job_has_completed); + bdrv_drained_end(bs_child); ++ aio_poll(qemu_get_aio_context(), false); + g_assert(job_has_completed); + + bdrv_unref(bs_parents[0]); +@@ -1858,6 +1859,7 @@ static void test_drop_intermediate_poll(void) + + g_assert(!job_has_completed); + ret = bdrv_drop_intermediate(chain[1], chain[0], NULL); ++ aio_poll(qemu_get_aio_context(), false); + g_assert(ret == 0); + g_assert(job_has_completed); + +@@ -1946,7 +1948,7 @@ static void coroutine_fn bdrv_replace_test_drain_co(void *opaque) + * .was_drained. + * Increment .drain_count. + */ +-static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs) ++static void bdrv_replace_test_drain_begin(BlockDriverState *bs) + { + BDRVReplaceTestState *s = bs->opaque; + +@@ -1977,7 +1979,7 @@ static void coroutine_fn bdrv_replace_test_read_entry(void *opaque) + * If .drain_count reaches 0 and the node has a backing file, issue a + * read request. + */ +-static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs) ++static void bdrv_replace_test_drain_end(BlockDriverState *bs) + { + BDRVReplaceTestState *s = bs->opaque; + +@@ -2002,8 +2004,8 @@ static BlockDriver bdrv_replace_test = { + .bdrv_close = bdrv_replace_test_close, + .bdrv_co_preadv = bdrv_replace_test_co_preadv, + +- .bdrv_co_drain_begin = bdrv_replace_test_co_drain_begin, +- .bdrv_co_drain_end = bdrv_replace_test_co_drain_end, ++ .bdrv_drain_begin = bdrv_replace_test_drain_begin, ++ .bdrv_drain_end = bdrv_replace_test_drain_end, + + .bdrv_child_perm = bdrv_default_perms, + }; +-- +2.31.1 + diff --git a/SOURCES/kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch b/SOURCES/kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch new file mode 100644 index 0000000..2d95689 --- /dev/null +++ b/SOURCES/kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch @@ -0,0 +1,246 @@ +From 54e290df4bc1c9e83be7357caed6a2b1ba4f21f0 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:26:56 +0200 +Subject: [PATCH 09/20] block: Split BlockNodeInfo off of ImageInfo + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [4/12] fc8d69d549bb9a929db218b91697ee3ae95c1ff6 (hreitz/qemu-kvm-c-9-s) + +ImageInfo sometimes contains flat information, and sometimes it does +not. Split off a BlockNodeInfo struct, which only contains information +about a single node and has no link to the backing image. + +We do this so we can extend BlockNodeInfo to a BlockGraphInfo struct, +which has links to all child nodes, not just the backing node. It would +be strange to base BlockGraphInfo on ImageInfo, because then this +extended struct would have two links to the backing node (one in +BlockGraphInfo as one of all the child links, and one in ImageInfo). + +Furthermore, it is quite common to ignore the backing-image field +altogether: bdrv_query_image_info() does not set it, and +bdrv_image_info_dump() does not evaluate it. That signals that we +should have different structs for describing a single node and one that +has a link to the backing image. + +Still, bdrv_query_image_info() and bdrv_image_info_dump() are not +changed too much in this patch. Follow-up patches will handle them. + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-5-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit a2085f8909377b6df738f6c3f7ee6db4d16da8f7) +Signed-off-by: Hanna Czenczek +--- + block/qapi.c | 86 ++++++++++++++++++++++++++++++++------------ + include/block/qapi.h | 3 ++ + qapi/block-core.json | 24 +++++++++---- + 3 files changed, 85 insertions(+), 28 deletions(-) + +diff --git a/block/qapi.c b/block/qapi.c +index 51202b470a..e5022b4481 100644 +--- a/block/qapi.c ++++ b/block/qapi.c +@@ -241,30 +241,18 @@ int bdrv_query_snapshot_info_list(BlockDriverState *bs, + } + + /** +- * bdrv_query_image_info: +- * @bs: block device to examine +- * @p_info: location to store image information +- * @errp: location to store error information +- * +- * Store "flat" image information in @p_info. +- * +- * "Flat" means it does *not* query backing image information, +- * i.e. (*pinfo)->has_backing_image will be set to false and +- * (*pinfo)->backing_image to NULL even when the image does in fact have +- * a backing image. +- * +- * @p_info will be set only on success. On error, store error in @errp. ++ * Helper function for other query info functions. Store information about @bs ++ * in @info, setting @errp on error. + */ +-void bdrv_query_image_info(BlockDriverState *bs, +- ImageInfo **p_info, +- Error **errp) ++static void bdrv_do_query_node_info(BlockDriverState *bs, ++ BlockNodeInfo *info, ++ Error **errp) + { + int64_t size; + const char *backing_filename; + BlockDriverInfo bdi; + int ret; + Error *err = NULL; +- ImageInfo *info; + + aio_context_acquire(bdrv_get_aio_context(bs)); + +@@ -277,7 +265,6 @@ void bdrv_query_image_info(BlockDriverState *bs, + + bdrv_refresh_filename(bs); + +- info = g_new0(ImageInfo, 1); + info->filename = g_strdup(bs->filename); + info->format = g_strdup(bdrv_get_format_name(bs)); + info->virtual_size = size; +@@ -298,7 +285,6 @@ void bdrv_query_image_info(BlockDriverState *bs, + info->format_specific = bdrv_get_specific_info(bs, &err); + if (err) { + error_propagate(errp, err); +- qapi_free_ImageInfo(info); + goto out; + } + info->has_format_specific = info->format_specific != NULL; +@@ -339,16 +325,72 @@ void bdrv_query_image_info(BlockDriverState *bs, + break; + default: + error_propagate(errp, err); +- qapi_free_ImageInfo(info); + goto out; + } + +- *p_info = info; +- + out: + aio_context_release(bdrv_get_aio_context(bs)); + } + ++/** ++ * bdrv_query_block_node_info: ++ * @bs: block node to examine ++ * @p_info: location to store node information ++ * @errp: location to store error information ++ * ++ * Store image information about @bs in @p_info. ++ * ++ * @p_info will be set only on success. On error, store error in @errp. ++ */ ++void bdrv_query_block_node_info(BlockDriverState *bs, ++ BlockNodeInfo **p_info, ++ Error **errp) ++{ ++ BlockNodeInfo *info; ++ ERRP_GUARD(); ++ ++ info = g_new0(BlockNodeInfo, 1); ++ bdrv_do_query_node_info(bs, info, errp); ++ if (*errp) { ++ qapi_free_BlockNodeInfo(info); ++ return; ++ } ++ ++ *p_info = info; ++} ++ ++/** ++ * bdrv_query_image_info: ++ * @bs: block node to examine ++ * @p_info: location to store image information ++ * @errp: location to store error information ++ * ++ * Store "flat" image information in @p_info. ++ * ++ * "Flat" means it does *not* query backing image information, ++ * i.e. (*pinfo)->has_backing_image will be set to false and ++ * (*pinfo)->backing_image to NULL even when the image does in fact have ++ * a backing image. ++ * ++ * @p_info will be set only on success. On error, store error in @errp. ++ */ ++void bdrv_query_image_info(BlockDriverState *bs, ++ ImageInfo **p_info, ++ Error **errp) ++{ ++ ImageInfo *info; ++ ERRP_GUARD(); ++ ++ info = g_new0(ImageInfo, 1); ++ bdrv_do_query_node_info(bs, qapi_ImageInfo_base(info), errp); ++ if (*errp) { ++ qapi_free_ImageInfo(info); ++ return; ++ } ++ ++ *p_info = info; ++} ++ + /* @p_info will be set only on success. */ + static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, + Error **errp) +diff --git a/include/block/qapi.h b/include/block/qapi.h +index c09859ea78..c7de4e3fa9 100644 +--- a/include/block/qapi.h ++++ b/include/block/qapi.h +@@ -35,6 +35,9 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, + int bdrv_query_snapshot_info_list(BlockDriverState *bs, + SnapshotInfoList **p_list, + Error **errp); ++void bdrv_query_block_node_info(BlockDriverState *bs, ++ BlockNodeInfo **p_info, ++ Error **errp); + void bdrv_query_image_info(BlockDriverState *bs, + ImageInfo **p_info, + Error **errp); +diff --git a/qapi/block-core.json b/qapi/block-core.json +index 4b9365167f..7720da0498 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -251,7 +251,7 @@ + } } + + ## +-# @ImageInfo: ++# @BlockNodeInfo: + # + # Information about a QEMU image file + # +@@ -279,22 +279,34 @@ + # + # @snapshots: list of VM snapshots + # +-# @backing-image: info of the backing image (since 1.6) +-# + # @format-specific: structure supplying additional format-specific + # information (since 1.7) + # +-# Since: 1.3 ++# Since: 8.0 + ## +-{ 'struct': 'ImageInfo', ++{ 'struct': 'BlockNodeInfo', + 'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool', + '*actual-size': 'int', 'virtual-size': 'int', + '*cluster-size': 'int', '*encrypted': 'bool', '*compressed': 'bool', + '*backing-filename': 'str', '*full-backing-filename': 'str', + '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'], +- '*backing-image': 'ImageInfo', + '*format-specific': 'ImageInfoSpecific' } } + ++## ++# @ImageInfo: ++# ++# Information about a QEMU image file, and potentially its backing image ++# ++# @backing-image: info of the backing image ++# ++# Since: 1.3 ++## ++{ 'struct': 'ImageInfo', ++ 'base': 'BlockNodeInfo', ++ 'data': { ++ '*backing-image': 'ImageInfo' ++ } } ++ + ## + # @ImageCheck: + # +-- +2.31.1 + diff --git a/SOURCES/kvm-block-drop-bdrv_remove_filter_or_cow_child.patch b/SOURCES/kvm-block-drop-bdrv_remove_filter_or_cow_child.patch new file mode 100644 index 0000000..19d52b5 --- /dev/null +++ b/SOURCES/kvm-block-drop-bdrv_remove_filter_or_cow_child.patch @@ -0,0 +1,70 @@ +From defd6b325264d94ffb1355a8b19f9a77bd694a2f Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Mon, 7 Nov 2022 19:35:56 +0300 +Subject: [PATCH 13/31] block: drop bdrv_remove_filter_or_cow_child + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [1/16] 6339edd738c3b79f8ecb6c1e012e52b6afb1a622 (sgarzarella/qemu-kvm-c-9-s) + +Drop this simple wrapper used only in one place. We have too many graph +modifying functions even without it. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Hanna Reitz +Message-Id: <20221107163558.618889-3-vsementsov@yandex-team.ru> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit f38eaec4c3618dfc4a23e20435cefb5bf8325264) +Signed-off-by: Stefano Garzarella +--- + block.c | 15 +-------------- + 1 file changed, 1 insertion(+), 14 deletions(-) + +diff --git a/block.c b/block.c +index a18f052374..ec184150a2 100644 +--- a/block.c ++++ b/block.c +@@ -93,8 +93,6 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, + static void bdrv_replace_child_noperm(BdrvChild *child, + BlockDriverState *new_bs); + static void bdrv_remove_child(BdrvChild *child, Transaction *tran); +-static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, +- Transaction *tran); + + static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, +@@ -5073,17 +5071,6 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran) + tran_add(tran, &bdrv_remove_child_drv, child); + } + +-/* +- * A function to remove backing-chain child of @bs if exists: cow child for +- * format nodes (always .backing) and filter child for filters (may be .file or +- * .backing) +- */ +-static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, +- Transaction *tran) +-{ +- bdrv_remove_child(bdrv_filter_or_cow_child(bs), tran); +-} +- + static int bdrv_replace_node_noperm(BlockDriverState *from, + BlockDriverState *to, + bool auto_skip, Transaction *tran, +@@ -5168,7 +5155,7 @@ static int bdrv_replace_node_common(BlockDriverState *from, + } + + if (detach_subchain) { +- bdrv_remove_filter_or_cow_child(to_cow_parent, tran); ++ bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran); + } + + found = g_hash_table_new(NULL, NULL); +-- +2.31.1 + diff --git a/SOURCES/kvm-block-file-Add-file-specific-image-info.patch b/SOURCES/kvm-block-file-Add-file-specific-image-info.patch new file mode 100644 index 0000000..a81b6b0 --- /dev/null +++ b/SOURCES/kvm-block-file-Add-file-specific-image-info.patch @@ -0,0 +1,145 @@ +From 4af86458d6bea2a6e15fd57d4d4bbe88e35f7e72 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:26:54 +0200 +Subject: [PATCH 07/20] block/file: Add file-specific image info + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [2/12] d8cc351d6c16c41b2000e41dc555f13093a9edce (hreitz/qemu-kvm-c-9-s) + +Add some (optional) information that the file driver can provide for +image files, namely the extent size hint. + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-3-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 7f36a50ab4e7d39369cac67be4ba9d6ee4081dc0) +Signed-off-by: Hanna Czenczek +--- + block/file-posix.c | 30 ++++++++++++++++++++++++++++++ + qapi/block-core.json | 26 ++++++++++++++++++++++++-- + 2 files changed, 54 insertions(+), 2 deletions(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index b9647c5ffc..df3da79aed 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -3095,6 +3095,34 @@ static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) + return 0; + } + ++static ImageInfoSpecific *raw_get_specific_info(BlockDriverState *bs, ++ Error **errp) ++{ ++ ImageInfoSpecificFile *file_info = g_new0(ImageInfoSpecificFile, 1); ++ ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1); ++ ++ *spec_info = (ImageInfoSpecific){ ++ .type = IMAGE_INFO_SPECIFIC_KIND_FILE, ++ .u.file.data = file_info, ++ }; ++ ++#ifdef FS_IOC_FSGETXATTR ++ { ++ BDRVRawState *s = bs->opaque; ++ struct fsxattr attr; ++ int ret; ++ ++ ret = ioctl(s->fd, FS_IOC_FSGETXATTR, &attr); ++ if (!ret && attr.fsx_extsize != 0) { ++ file_info->has_extent_size_hint = true; ++ file_info->extent_size_hint = attr.fsx_extsize; ++ } ++ } ++#endif ++ ++ return spec_info; ++} ++ + static BlockStatsSpecificFile get_blockstats_specific_file(BlockDriverState *bs) + { + BDRVRawState *s = bs->opaque; +@@ -3328,6 +3356,7 @@ BlockDriver bdrv_file = { + .bdrv_co_truncate = raw_co_truncate, + .bdrv_getlength = raw_getlength, + .bdrv_get_info = raw_get_info, ++ .bdrv_get_specific_info = raw_get_specific_info, + .bdrv_get_allocated_file_size + = raw_get_allocated_file_size, + .bdrv_get_specific_stats = raw_get_specific_stats, +@@ -3700,6 +3729,7 @@ static BlockDriver bdrv_host_device = { + .bdrv_co_truncate = raw_co_truncate, + .bdrv_getlength = raw_getlength, + .bdrv_get_info = raw_get_info, ++ .bdrv_get_specific_info = raw_get_specific_info, + .bdrv_get_allocated_file_size + = raw_get_allocated_file_size, + .bdrv_get_specific_stats = hdev_get_specific_stats, +diff --git a/qapi/block-core.json b/qapi/block-core.json +index 95ac4fa634..f5d822cbd6 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -139,16 +139,29 @@ + '*encryption-format': 'RbdImageEncryptionFormat' + } } + ++## ++# @ImageInfoSpecificFile: ++# ++# @extent-size-hint: Extent size hint (if available) ++# ++# Since: 8.0 ++## ++{ 'struct': 'ImageInfoSpecificFile', ++ 'data': { ++ '*extent-size-hint': 'size' ++ } } ++ + ## + # @ImageInfoSpecificKind: + # + # @luks: Since 2.7 + # @rbd: Since 6.1 ++# @file: Since 8.0 + # + # Since: 1.7 + ## + { 'enum': 'ImageInfoSpecificKind', +- 'data': [ 'qcow2', 'vmdk', 'luks', 'rbd' ] } ++ 'data': [ 'qcow2', 'vmdk', 'luks', 'rbd', 'file' ] } + + ## + # @ImageInfoSpecificQCow2Wrapper: +@@ -185,6 +198,14 @@ + { 'struct': 'ImageInfoSpecificRbdWrapper', + 'data': { 'data': 'ImageInfoSpecificRbd' } } + ++## ++# @ImageInfoSpecificFileWrapper: ++# ++# Since: 8.0 ++## ++{ 'struct': 'ImageInfoSpecificFileWrapper', ++ 'data': { 'data': 'ImageInfoSpecificFile' } } ++ + ## + # @ImageInfoSpecific: + # +@@ -199,7 +220,8 @@ + 'qcow2': 'ImageInfoSpecificQCow2Wrapper', + 'vmdk': 'ImageInfoSpecificVmdkWrapper', + 'luks': 'ImageInfoSpecificLUKSWrapper', +- 'rbd': 'ImageInfoSpecificRbdWrapper' ++ 'rbd': 'ImageInfoSpecificRbdWrapper', ++ 'file': 'ImageInfoSpecificFileWrapper' + } } + + ## +-- +2.31.1 + diff --git a/SOURCES/kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch b/SOURCES/kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch new file mode 100644 index 0000000..62979ef --- /dev/null +++ b/SOURCES/kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch @@ -0,0 +1,206 @@ +From c8c282c2e1d74cfc5de6527f7e20dfc3e76b67ac Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:27:00 +0200 +Subject: [PATCH 13/20] block/qapi: Add indentation to bdrv_node_info_dump() + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [8/12] d3a697e81ab9828457198075e5815a592363c725 (hreitz/qemu-kvm-c-9-s) + +In order to let qemu-img info present a block graph, add a parameter to +bdrv_node_info_dump() and bdrv_image_info_specific_dump() so that the +information of nodes below the root level can be given an indentation. + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-9-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 76c9e9750d1bd580e8ed4465f6be3a986434e7c3) +Signed-off-by: Hanna Czenczek +--- + block/monitor/block-hmp-cmds.c | 2 +- + block/qapi.c | 47 +++++++++++++++++++--------------- + include/block/qapi.h | 5 ++-- + qemu-img.c | 2 +- + qemu-io-cmds.c | 3 ++- + 5 files changed, 34 insertions(+), 25 deletions(-) + +diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c +index aa37faa601..72824d4e2e 100644 +--- a/block/monitor/block-hmp-cmds.c ++++ b/block/monitor/block-hmp-cmds.c +@@ -734,7 +734,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info, + monitor_printf(mon, "\nImages:\n"); + image_info = inserted->image; + while (1) { +- bdrv_node_info_dump(qapi_ImageInfo_base(image_info)); ++ bdrv_node_info_dump(qapi_ImageInfo_base(image_info), 0); + if (image_info->has_backing_image) { + image_info = image_info->backing_image; + } else { +diff --git a/block/qapi.c b/block/qapi.c +index f208c21ccf..3e35603f0c 100644 +--- a/block/qapi.c ++++ b/block/qapi.c +@@ -915,7 +915,8 @@ static bool qobject_is_empty_dump(const QObject *obj) + * prepending an optional prefix if the dump is not empty. + */ + void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, +- const char *prefix) ++ const char *prefix, ++ int indentation) + { + QObject *obj, *data; + Visitor *v = qobject_output_visitor_new(&obj); +@@ -925,48 +926,51 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, + data = qdict_get(qobject_to(QDict, obj), "data"); + if (!qobject_is_empty_dump(data)) { + if (prefix) { +- qemu_printf("%s", prefix); ++ qemu_printf("%*s%s", indentation * 4, "", prefix); + } +- dump_qobject(1, data); ++ dump_qobject(indentation + 1, data); + } + qobject_unref(obj); + visit_free(v); + } + +-void bdrv_node_info_dump(BlockNodeInfo *info) ++void bdrv_node_info_dump(BlockNodeInfo *info, int indentation) + { + char *size_buf, *dsize_buf; ++ g_autofree char *ind_s = g_strdup_printf("%*s", indentation * 4, ""); ++ + if (!info->has_actual_size) { + dsize_buf = g_strdup("unavailable"); + } else { + dsize_buf = size_to_str(info->actual_size); + } + size_buf = size_to_str(info->virtual_size); +- qemu_printf("image: %s\n" +- "file format: %s\n" +- "virtual size: %s (%" PRId64 " bytes)\n" +- "disk size: %s\n", +- info->filename, info->format, size_buf, +- info->virtual_size, +- dsize_buf); ++ qemu_printf("%simage: %s\n" ++ "%sfile format: %s\n" ++ "%svirtual size: %s (%" PRId64 " bytes)\n" ++ "%sdisk size: %s\n", ++ ind_s, info->filename, ++ ind_s, info->format, ++ ind_s, size_buf, info->virtual_size, ++ ind_s, dsize_buf); + g_free(size_buf); + g_free(dsize_buf); + + if (info->has_encrypted && info->encrypted) { +- qemu_printf("encrypted: yes\n"); ++ qemu_printf("%sencrypted: yes\n", ind_s); + } + + if (info->has_cluster_size) { +- qemu_printf("cluster_size: %" PRId64 "\n", +- info->cluster_size); ++ qemu_printf("%scluster_size: %" PRId64 "\n", ++ ind_s, info->cluster_size); + } + + if (info->has_dirty_flag && info->dirty_flag) { +- qemu_printf("cleanly shut down: no\n"); ++ qemu_printf("%scleanly shut down: no\n", ind_s); + } + + if (info->has_backing_filename) { +- qemu_printf("backing file: %s", info->backing_filename); ++ qemu_printf("%sbacking file: %s", ind_s, info->backing_filename); + if (!info->has_full_backing_filename) { + qemu_printf(" (cannot determine actual path)"); + } else if (strcmp(info->backing_filename, +@@ -975,15 +979,16 @@ void bdrv_node_info_dump(BlockNodeInfo *info) + } + qemu_printf("\n"); + if (info->has_backing_filename_format) { +- qemu_printf("backing file format: %s\n", +- info->backing_filename_format); ++ qemu_printf("%sbacking file format: %s\n", ++ ind_s, info->backing_filename_format); + } + } + + if (info->has_snapshots) { + SnapshotInfoList *elem; + +- qemu_printf("Snapshot list:\n"); ++ qemu_printf("%sSnapshot list:\n", ind_s); ++ qemu_printf("%s", ind_s); + bdrv_snapshot_dump(NULL); + qemu_printf("\n"); + +@@ -1003,6 +1008,7 @@ void bdrv_node_info_dump(BlockNodeInfo *info) + + pstrcpy(sn.id_str, sizeof(sn.id_str), elem->value->id); + pstrcpy(sn.name, sizeof(sn.name), elem->value->name); ++ qemu_printf("%s", ind_s); + bdrv_snapshot_dump(&sn); + qemu_printf("\n"); + } +@@ -1010,6 +1016,7 @@ void bdrv_node_info_dump(BlockNodeInfo *info) + + if (info->has_format_specific) { + bdrv_image_info_specific_dump(info->format_specific, +- "Format specific information:\n"); ++ "Format specific information:\n", ++ indentation); + } + } +diff --git a/include/block/qapi.h b/include/block/qapi.h +index 196436020e..38855f2ae9 100644 +--- a/include/block/qapi.h ++++ b/include/block/qapi.h +@@ -49,6 +49,7 @@ void bdrv_query_block_graph_info(BlockDriverState *bs, + + void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); + void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, +- const char *prefix); +-void bdrv_node_info_dump(BlockNodeInfo *info); ++ const char *prefix, ++ int indentation); ++void bdrv_node_info_dump(BlockNodeInfo *info, int indentation); + #endif +diff --git a/qemu-img.c b/qemu-img.c +index 3b2ca3bbcb..30b4ea58bb 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -2859,7 +2859,7 @@ static void dump_human_image_info_list(BlockNodeInfoList *list) + } + delim = true; + +- bdrv_node_info_dump(elem->value); ++ bdrv_node_info_dump(elem->value, 0); + } + } + +diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c +index f4a374528e..fdcb89211b 100644 +--- a/qemu-io-cmds.c ++++ b/qemu-io-cmds.c +@@ -1826,7 +1826,8 @@ static int info_f(BlockBackend *blk, int argc, char **argv) + } + if (spec_info) { + bdrv_image_info_specific_dump(spec_info, +- "Format specific information:\n"); ++ "Format specific information:\n", ++ 0); + qapi_free_ImageInfoSpecific(spec_info); + } + +-- +2.31.1 + diff --git a/SOURCES/kvm-block-qapi-Introduce-BlockGraphInfo.patch b/SOURCES/kvm-block-qapi-Introduce-BlockGraphInfo.patch new file mode 100644 index 0000000..e9a1622 --- /dev/null +++ b/SOURCES/kvm-block-qapi-Introduce-BlockGraphInfo.patch @@ -0,0 +1,155 @@ +From 0044e3848b02ef6edba5961d1f4b6297d137d207 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:26:59 +0200 +Subject: [PATCH 12/20] block/qapi: Introduce BlockGraphInfo + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [7/12] de47bac372cd552b812c774a2f35f95923af74ff (hreitz/qemu-kvm-c-9-s) + +Introduce a new QAPI type BlockGraphInfo and an associated +bdrv_query_block_graph_info() function that recursively gathers +BlockNodeInfo objects through a block graph. + +A follow-up patch is going to make "qemu-img info" use this to print +information about all nodes that are (usually implicitly) opened for a +given image file. + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-8-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 6cab33997b91eb86e82a6a2ae58a24f835249d4a) +Signed-off-by: Hanna Czenczek +--- + block/qapi.c | 48 ++++++++++++++++++++++++++++++++++++++++++++ + include/block/qapi.h | 3 +++ + qapi/block-core.json | 35 ++++++++++++++++++++++++++++++++ + 3 files changed, 86 insertions(+) + +diff --git a/block/qapi.c b/block/qapi.c +index 5d0a8d2ce3..f208c21ccf 100644 +--- a/block/qapi.c ++++ b/block/qapi.c +@@ -411,6 +411,54 @@ fail: + qapi_free_ImageInfo(info); + } + ++/** ++ * bdrv_query_block_graph_info: ++ * @bs: root node to start from ++ * @p_info: location to store image information ++ * @errp: location to store error information ++ * ++ * Store image information about the graph starting from @bs in @p_info. ++ * ++ * @p_info will be set only on success. On error, store error in @errp. ++ */ ++void bdrv_query_block_graph_info(BlockDriverState *bs, ++ BlockGraphInfo **p_info, ++ Error **errp) ++{ ++ BlockGraphInfo *info; ++ BlockChildInfoList **children_list_tail; ++ BdrvChild *c; ++ ERRP_GUARD(); ++ ++ info = g_new0(BlockGraphInfo, 1); ++ bdrv_do_query_node_info(bs, qapi_BlockGraphInfo_base(info), errp); ++ if (*errp) { ++ goto fail; ++ } ++ ++ children_list_tail = &info->children; ++ ++ QLIST_FOREACH(c, &bs->children, next) { ++ BlockChildInfo *c_info; ++ ++ c_info = g_new0(BlockChildInfo, 1); ++ QAPI_LIST_APPEND(children_list_tail, c_info); ++ ++ c_info->name = g_strdup(c->name); ++ bdrv_query_block_graph_info(c->bs, &c_info->info, errp); ++ if (*errp) { ++ goto fail; ++ } ++ } ++ ++ *p_info = info; ++ return; ++ ++fail: ++ assert(*errp != NULL); ++ qapi_free_BlockGraphInfo(info); ++} ++ + /* @p_info will be set only on success. */ + static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, + Error **errp) +diff --git a/include/block/qapi.h b/include/block/qapi.h +index 2174bf8fa2..196436020e 100644 +--- a/include/block/qapi.h ++++ b/include/block/qapi.h +@@ -43,6 +43,9 @@ void bdrv_query_image_info(BlockDriverState *bs, + bool flat, + bool skip_implicit_filters, + Error **errp); ++void bdrv_query_block_graph_info(BlockDriverState *bs, ++ BlockGraphInfo **p_info, ++ Error **errp); + + void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); + void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, +diff --git a/qapi/block-core.json b/qapi/block-core.json +index 4cf2deeb6c..d703e0fb16 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -307,6 +307,41 @@ + '*backing-image': 'ImageInfo' + } } + ++## ++# @BlockChildInfo: ++# ++# Information about all nodes in the block graph starting at some node, ++# annotated with information about that node in relation to its parent. ++# ++# @name: Child name of the root node in the BlockGraphInfo struct, in its role ++# as the child of some undescribed parent node ++# ++# @info: Block graph information starting at this node ++# ++# Since: 8.0 ++## ++{ 'struct': 'BlockChildInfo', ++ 'data': { ++ 'name': 'str', ++ 'info': 'BlockGraphInfo' ++ } } ++ ++## ++# @BlockGraphInfo: ++# ++# Information about all nodes in a block (sub)graph in the form of BlockNodeInfo ++# data. ++# The base BlockNodeInfo struct contains the information for the (sub)graph's ++# root node. ++# ++# @children: Array of links to this node's child nodes' information ++# ++# Since: 8.0 ++## ++{ 'struct': 'BlockGraphInfo', ++ 'base': 'BlockNodeInfo', ++ 'data': { 'children': ['BlockChildInfo'] } } ++ + ## + # @ImageCheck: + # +-- +2.31.1 + diff --git a/SOURCES/kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch b/SOURCES/kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch new file mode 100644 index 0000000..e5c012a --- /dev/null +++ b/SOURCES/kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch @@ -0,0 +1,197 @@ +From ae2c3df00d673d436fe4d8ec9103a3b76d7e6233 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:26:58 +0200 +Subject: [PATCH 11/20] block/qapi: Let bdrv_query_image_info() recurse + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [6/12] 451a83fd682cd6dd6026c22974d18c2f12ee06e3 (hreitz/qemu-kvm-c-9-s) + +There is no real reason why bdrv_query_image_info() should generally not +recurse. The ImageInfo struct has a pointer to the backing image, so it +should generally be filled, unless the caller explicitly opts out. + +This moves the recursing code from bdrv_block_device_info() into +bdrv_query_image_info(). + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-7-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 5d8813593f3f673fc96eed199beb35690cc46f58) + +Conflicts: + block/qapi.c: Conflicts with + 54fde4ff0621c22b15cbaaa3c74301cc0dbd1c9e ("qapi block: Elide + redundant has_FOO in generated C"), which dropped + `has_backing_image`. Without that commit (and 44ea9d9be before it), + we still need to set `has_backing_image` in + `bdrv_query_image_info()`. + +Signed-off-by: Hanna Czenczek +--- + block/qapi.c | 94 +++++++++++++++++++++++++++----------------- + include/block/qapi.h | 2 + + 2 files changed, 59 insertions(+), 37 deletions(-) + +diff --git a/block/qapi.c b/block/qapi.c +index ad88bf9b38..5d0a8d2ce3 100644 +--- a/block/qapi.c ++++ b/block/qapi.c +@@ -47,8 +47,10 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, + Error **errp) + { + ImageInfo **p_image_info; ++ ImageInfo *backing_info; + BlockDriverState *bs0, *backing; + BlockDeviceInfo *info; ++ ERRP_GUARD(); + + if (!bs->drv) { + error_setg(errp, "Block device %s is ejected", bs->node_name); +@@ -149,38 +151,21 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, + bs0 = bs; + p_image_info = &info->image; + info->backing_file_depth = 0; +- while (1) { +- Error *local_err = NULL; +- bdrv_query_image_info(bs0, p_image_info, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); +- qapi_free_BlockDeviceInfo(info); +- return NULL; +- } +- +- /* stop gathering data for flat output */ +- if (flat) { +- break; +- } + +- if (bs0->drv && bdrv_filter_or_cow_child(bs0)) { +- /* +- * Put any filtered child here (for backwards compatibility to when +- * we put bs0->backing here, which might be any filtered child). +- */ +- info->backing_file_depth++; +- bs0 = bdrv_filter_or_cow_bs(bs0); +- (*p_image_info)->has_backing_image = true; +- p_image_info = &((*p_image_info)->backing_image); +- } else { +- break; +- } ++ /* ++ * Skip automatically inserted nodes that the user isn't aware of for ++ * query-block (blk != NULL), but not for query-named-block-nodes ++ */ ++ bdrv_query_image_info(bs0, p_image_info, flat, blk != NULL, errp); ++ if (*errp) { ++ qapi_free_BlockDeviceInfo(info); ++ return NULL; ++ } + +- /* Skip automatically inserted nodes that the user isn't aware of for +- * query-block (blk != NULL), but not for query-named-block-nodes */ +- if (blk) { +- bs0 = bdrv_skip_implicit_filters(bs0); +- } ++ backing_info = info->image->backing_image; ++ while (backing_info) { ++ info->backing_file_depth++; ++ backing_info = backing_info->backing_image; + } + + return info; +@@ -363,19 +348,28 @@ void bdrv_query_block_node_info(BlockDriverState *bs, + * bdrv_query_image_info: + * @bs: block node to examine + * @p_info: location to store image information ++ * @flat: skip backing node information ++ * @skip_implicit_filters: skip implicit filters in the backing chain + * @errp: location to store error information + * +- * Store "flat" image information in @p_info. ++ * Store image information in @p_info, potentially recursively covering the ++ * backing chain. + * +- * "Flat" means it does *not* query backing image information, +- * i.e. (*pinfo)->has_backing_image will be set to false and +- * (*pinfo)->backing_image to NULL even when the image does in fact have +- * a backing image. ++ * If @flat is true, do not query backing image information, i.e. ++ * (*p_info)->has_backing_image will be set to false and ++ * (*p_info)->backing_image to NULL even when the image does in fact have a ++ * backing image. ++ * ++ * If @skip_implicit_filters is true, implicit filter nodes in the backing chain ++ * will be skipped when querying backing image information. ++ * (@skip_implicit_filters is ignored when @flat is true.) + * + * @p_info will be set only on success. On error, store error in @errp. + */ + void bdrv_query_image_info(BlockDriverState *bs, + ImageInfo **p_info, ++ bool flat, ++ bool skip_implicit_filters, + Error **errp) + { + ImageInfo *info; +@@ -384,11 +378,37 @@ void bdrv_query_image_info(BlockDriverState *bs, + info = g_new0(ImageInfo, 1); + bdrv_do_query_node_info(bs, qapi_ImageInfo_base(info), errp); + if (*errp) { +- qapi_free_ImageInfo(info); +- return; ++ goto fail; ++ } ++ ++ if (!flat) { ++ BlockDriverState *backing; ++ ++ /* ++ * Use any filtered child here (for backwards compatibility to when ++ * we always took bs->backing, which might be any filtered child). ++ */ ++ backing = bdrv_filter_or_cow_bs(bs); ++ if (skip_implicit_filters) { ++ backing = bdrv_skip_implicit_filters(backing); ++ } ++ ++ if (backing) { ++ bdrv_query_image_info(backing, &info->backing_image, false, ++ skip_implicit_filters, errp); ++ if (*errp) { ++ goto fail; ++ } ++ info->has_backing_image = true; ++ } + } + + *p_info = info; ++ return; ++ ++fail: ++ assert(*errp); ++ qapi_free_ImageInfo(info); + } + + /* @p_info will be set only on success. */ +diff --git a/include/block/qapi.h b/include/block/qapi.h +index 22198dcd0c..2174bf8fa2 100644 +--- a/include/block/qapi.h ++++ b/include/block/qapi.h +@@ -40,6 +40,8 @@ void bdrv_query_block_node_info(BlockDriverState *bs, + Error **errp); + void bdrv_query_image_info(BlockDriverState *bs, + ImageInfo **p_info, ++ bool flat, ++ bool skip_implicit_filters, + Error **errp); + + void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); +-- +2.31.1 + diff --git a/SOURCES/kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch b/SOURCES/kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch new file mode 100644 index 0000000..8d5a20a --- /dev/null +++ b/SOURCES/kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch @@ -0,0 +1,99 @@ +From b952c8f1da6f8597736c0e040565830139369359 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Tue, 14 Feb 2023 18:16:21 +0100 +Subject: [PATCH] block: temporarily hold the new AioContext of bs_top in + bdrv_append() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 153: block: temporarily hold the new AioContext of bs_top in bdrv_append() +RH-Bugzilla: 2168209 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Commit: [1/1] 5b190426d996e8c9f7a781bd97aee8d25756dbd3 (sgarzarella/qemu-kvm-c-9-s) + +bdrv_append() is called with bs_top AioContext held, but +bdrv_attach_child_noperm() could change the AioContext of bs_top. + +bdrv_replace_node_noperm() calls bdrv_drained_begin() starting from +commit 2398747128 ("block: Don't poll in bdrv_replace_child_noperm()"). +bdrv_drained_begin() can call BDRV_POLL_WHILE that assumes the new lock +is taken, so let's temporarily hold the new AioContext to prevent QEMU +from failing in BDRV_POLL_WHILE when it tries to release the wrong +AioContext. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2168209 +Reported-by: Aihua Liang +Signed-off-by: Stefano Garzarella +Message-Id: <20230214171621.11574-1-sgarzare@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 60d90bf43c169b9d1dbcb17ed794b7b02c6862b1) +Signed-off-by: Stefano Garzarella +--- + block.c | 23 +++++++++++++++++++++++ + 1 file changed, 23 insertions(+) + +diff --git a/block.c b/block.c +index 0d78711416..9e1dcb9e47 100644 +--- a/block.c ++++ b/block.c +@@ -5275,6 +5275,8 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp) + * child. + * + * This function does not create any image files. ++ * ++ * The caller must hold the AioContext lock for @bs_top. + */ + int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + Error **errp) +@@ -5282,11 +5284,14 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + int ret; + BdrvChild *child; + Transaction *tran = tran_new(); ++ AioContext *old_context, *new_context = NULL; + + GLOBAL_STATE_CODE(); + + assert(!bs_new->backing); + ++ old_context = bdrv_get_aio_context(bs_top); ++ + child = bdrv_attach_child_noperm(bs_new, bs_top, "backing", + &child_of_bds, bdrv_backing_role(bs_new), + tran, errp); +@@ -5295,6 +5300,19 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + goto out; + } + ++ /* ++ * bdrv_attach_child_noperm could change the AioContext of bs_top. ++ * bdrv_replace_node_noperm calls bdrv_drained_begin, so let's temporarily ++ * hold the new AioContext, since bdrv_drained_begin calls BDRV_POLL_WHILE ++ * that assumes the new lock is taken. ++ */ ++ new_context = bdrv_get_aio_context(bs_top); ++ ++ if (old_context != new_context) { ++ aio_context_release(old_context); ++ aio_context_acquire(new_context); ++ } ++ + ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp); + if (ret < 0) { + goto out; +@@ -5306,6 +5324,11 @@ out: + + bdrv_refresh_limits(bs_top, NULL, NULL); + ++ if (new_context && old_context != new_context) { ++ aio_context_release(new_context); ++ aio_context_acquire(old_context); ++ } ++ + return ret; + } + +-- +2.31.1 + diff --git a/SOURCES/kvm-block-vmdk-Change-extent-info-type.patch b/SOURCES/kvm-block-vmdk-Change-extent-info-type.patch new file mode 100644 index 0000000..6b8f6a7 --- /dev/null +++ b/SOURCES/kvm-block-vmdk-Change-extent-info-type.patch @@ -0,0 +1,140 @@ +From d8caed018afb0f60f449e971398d2a8d6c2992e7 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:26:55 +0200 +Subject: [PATCH 08/20] block/vmdk: Change extent info type + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [3/12] efe50a2797c679ce6bb5faa423047461a34e6792 (hreitz/qemu-kvm-c-9-s) + +VMDK's implementation of .bdrv_get_specific_info() returns information +about its extent files, ostensibly in the form of ImageInfo objects. +However, it does not get this information through +bdrv_query_image_info(), but fills only a select few fields with custom +information that does not always match the fields' purposes. + +For example, @format, which is supposed to be a block driver name, is +filled with the extent type, e.g. SPARSE or FLAT. + +In ImageInfo, @compressed shows whether the data that can be seen in the +image is stored in compressed form or not. For example, a compressed +qcow2 image will store compressed data in its data file, but when +accessing the qcow2 node, you will see normal data. This is not how +VMDK uses the @compressed field for its extent files: Instead, it +signifies whether accessing the extent file will yield compressed data +(which the VMDK driver then (de-)compresses). + +Create a new structure to represent the extent information. This allows +us to clarify the fields' meanings, and it clearly shows that these are +not complete ImageInfo objects. (That is, if a user wants an extent +file's ImageInfo object, they will need to query it separately, and will +not get it from ImageInfoSpecificVmdk.extents.) + +Note that this removes the last use of ['ImageInfo'] (i.e. an array of +ImageInfo objects), so the QAPI generator will no longer generate +ImageInfoList by default. However, we use it in qemu-img.c, so we need +to create a dummy object to force the generate to create that type, +similarly to DummyForceArrays in machine.json (introduced in commit +9f08c8ec73878122ad4b061ed334f0437afaaa32 ("qapi: Lazy creation of array +types")). + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-4-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 456e75171a85c19a5bfa202eefcbdc4ef1692f05) +Signed-off-by: Hanna Czenczek +--- + block/vmdk.c | 8 ++++---- + qapi/block-core.json | 38 +++++++++++++++++++++++++++++++++++++- + 2 files changed, 41 insertions(+), 5 deletions(-) + +diff --git a/block/vmdk.c b/block/vmdk.c +index 26376352b9..4435b9880b 100644 +--- a/block/vmdk.c ++++ b/block/vmdk.c +@@ -2901,12 +2901,12 @@ static int vmdk_has_zero_init(BlockDriverState *bs) + return 1; + } + +-static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent) ++static VmdkExtentInfo *vmdk_get_extent_info(VmdkExtent *extent) + { +- ImageInfo *info = g_new0(ImageInfo, 1); ++ VmdkExtentInfo *info = g_new0(VmdkExtentInfo, 1); + + bdrv_refresh_filename(extent->file->bs); +- *info = (ImageInfo){ ++ *info = (VmdkExtentInfo){ + .filename = g_strdup(extent->file->bs->filename), + .format = g_strdup(extent->type), + .virtual_size = extent->sectors * BDRV_SECTOR_SIZE, +@@ -2985,7 +2985,7 @@ static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs, + int i; + BDRVVmdkState *s = bs->opaque; + ImageInfoSpecific *spec_info = g_new0(ImageInfoSpecific, 1); +- ImageInfoList **tail; ++ VmdkExtentInfoList **tail; + + *spec_info = (ImageInfoSpecific){ + .type = IMAGE_INFO_SPECIFIC_KIND_VMDK, +diff --git a/qapi/block-core.json b/qapi/block-core.json +index f5d822cbd6..4b9365167f 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -124,7 +124,33 @@ + 'create-type': 'str', + 'cid': 'int', + 'parent-cid': 'int', +- 'extents': ['ImageInfo'] ++ 'extents': ['VmdkExtentInfo'] ++ } } ++ ++## ++# @VmdkExtentInfo: ++# ++# Information about a VMDK extent file ++# ++# @filename: Name of the extent file ++# ++# @format: Extent type (e.g. FLAT or SPARSE) ++# ++# @virtual-size: Number of bytes covered by this extent ++# ++# @cluster-size: Cluster size in bytes (for non-flat extents) ++# ++# @compressed: Whether this extent contains compressed data ++# ++# Since: 8.0 ++## ++{ 'struct': 'VmdkExtentInfo', ++ 'data': { ++ 'filename': 'str', ++ 'format': 'str', ++ 'virtual-size': 'int', ++ '*cluster-size': 'int', ++ '*compressed': 'bool' + } } + + ## +@@ -5754,3 +5780,13 @@ + 'data': { 'device': 'str', '*id': 'str', '*name': 'str'}, + 'returns': 'SnapshotInfo', + 'allow-preconfig': true } ++ ++## ++# @DummyBlockCoreForceArrays: ++# ++# Not used by QMP; hack to let us use ImageInfoList internally ++# ++# Since: 8.0 ++## ++{ 'struct': 'DummyBlockCoreForceArrays', ++ 'data': { 'unused-image-info': ['ImageInfo'] } } +-- +2.31.1 + diff --git a/SOURCES/kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch b/SOURCES/kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch deleted file mode 100644 index a948e57..0000000 --- a/SOURCES/kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 3a0e9bb88e82cc76ca5efc0595ce94b5dc34749e Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Mon, 25 Apr 2022 13:42:46 +0800 -Subject: [PATCH 1/2] configs/devices/aarch64-softmmu: Enable CONFIG_VIRTIO_MEM - -RH-Author: Gavin Shan -RH-MergeRequest: 80: Enable virtio-mem for aarch64 -RH-Commit: [1/1] 1afbd08da6d7c860da8d617a0a932d3660514878 (gwshan/qemu-rhel-9) -RH-Bugzilla: 2044162 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Eric Auger -RH-Acked-by: David Hildenbrand - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2044162 - -This enables virtio-mem device on aarch64 since all needed commits -are ready. - - b1b87327a9 hw/arm/virt: Support for virtio-mem-pci - 1263615efe virtio-mem: Correct default THP size for ARM64 - -Signed-off-by: Gavin Shan ---- - configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -index 5f6ee1de5b..187938573f 100644 ---- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -+++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -@@ -22,6 +22,7 @@ CONFIG_VFIO=y - CONFIG_VFIO_PCI=y - CONFIG_VIRTIO_MMIO=y - CONFIG_VIRTIO_PCI=y -+CONFIG_VIRTIO_MEM=y - CONFIG_XIO3130=y - CONFIG_NVDIMM=y - CONFIG_ACPI_APEI=y --- -2.35.1 - diff --git a/SOURCES/kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch b/SOURCES/kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch deleted file mode 100644 index c1f3683..0000000 --- a/SOURCES/kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch +++ /dev/null @@ -1,101 +0,0 @@ -From e3cb8849862a9f0dd20f2913d540336a037d43c7 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Tue, 10 May 2022 17:10:19 +0200 -Subject: [PATCH 07/16] coroutine: Rename qemu_coroutine_inc/dec_pool_size() - -RH-Author: Kevin Wolf -RH-MergeRequest: 87: coroutine: Fix crashes due to too large pool batch size -RH-Commit: [1/2] 6389b11f70225f221784c270d9b90c1ea43ca8fb (kmwolf/centos-qemu-kvm) -RH-Bugzilla: 2079938 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella - -It's true that these functions currently affect the batch size in which -coroutines are reused (i.e. moved from the global release pool to the -allocation pool of a specific thread), but this is a bug and will be -fixed in a separate patch. - -In fact, the comment in the header file already just promises that it -influences the pool size, so reflect this in the name of the functions. -As a nice side effect, the shorter function name makes some line -wrapping unnecessary. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Message-Id: <20220510151020.105528-2-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 98e3ab35054b946f7c2aba5408822532b0920b53) -Signed-off-by: Kevin Wolf ---- - hw/block/virtio-blk.c | 6 ++---- - include/qemu/coroutine.h | 6 +++--- - util/qemu-coroutine.c | 4 ++-- - 3 files changed, 7 insertions(+), 9 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index 540c38f829..6a1cc41877 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -1215,8 +1215,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) - for (i = 0; i < conf->num_queues; i++) { - virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output); - } -- qemu_coroutine_increase_pool_batch_size(conf->num_queues * conf->queue_size -- / 2); -+ qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); - virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); - if (err != NULL) { - error_propagate(errp, err); -@@ -1253,8 +1252,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev) - for (i = 0; i < conf->num_queues; i++) { - virtio_del_queue(vdev, i); - } -- qemu_coroutine_decrease_pool_batch_size(conf->num_queues * conf->queue_size -- / 2); -+ qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2); - qemu_del_vm_change_state_handler(s->change); - blockdev_mark_auto_del(s->blk); - virtio_cleanup(vdev); -diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h -index c828a95ee0..5b621d1295 100644 ---- a/include/qemu/coroutine.h -+++ b/include/qemu/coroutine.h -@@ -334,12 +334,12 @@ void coroutine_fn yield_until_fd_readable(int fd); - /** - * Increase coroutine pool size - */ --void qemu_coroutine_increase_pool_batch_size(unsigned int additional_pool_size); -+void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size); - - /** -- * Devcrease coroutine pool size -+ * Decrease coroutine pool size - */ --void qemu_coroutine_decrease_pool_batch_size(unsigned int additional_pool_size); -+void qemu_coroutine_dec_pool_size(unsigned int additional_pool_size); - - #include "qemu/lockable.h" - -diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c -index c03b2422ff..faca0ca97c 100644 ---- a/util/qemu-coroutine.c -+++ b/util/qemu-coroutine.c -@@ -205,12 +205,12 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co) - return co->ctx; - } - --void qemu_coroutine_increase_pool_batch_size(unsigned int additional_pool_size) -+void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size) - { - qatomic_add(&pool_batch_size, additional_pool_size); - } - --void qemu_coroutine_decrease_pool_batch_size(unsigned int removing_pool_size) -+void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size) - { - qatomic_sub(&pool_batch_size, removing_pool_size); - } --- -2.31.1 - diff --git a/SOURCES/kvm-coroutine-Revert-to-constant-batch-size.patch b/SOURCES/kvm-coroutine-Revert-to-constant-batch-size.patch deleted file mode 100644 index 2973510..0000000 --- a/SOURCES/kvm-coroutine-Revert-to-constant-batch-size.patch +++ /dev/null @@ -1,138 +0,0 @@ -From 345107bfd5537b51f34aaeb97d6161858bb6feee Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Tue, 10 May 2022 17:10:20 +0200 -Subject: [PATCH 08/16] coroutine: Revert to constant batch size - -RH-Author: Kevin Wolf -RH-MergeRequest: 87: coroutine: Fix crashes due to too large pool batch size -RH-Commit: [2/2] 8a8a39af873854cdc8333d1a70f3479a97c3ec7a (kmwolf/centos-qemu-kvm) -RH-Bugzilla: 2079938 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella - -Commit 4c41c69e changed the way the coroutine pool is sized because for -virtio-blk devices with a large queue size and heavy I/O, it was just -too small and caused coroutines to be deleted and reallocated soon -afterwards. The change made the size dynamic based on the number of -queues and the queue size of virtio-blk devices. - -There are two important numbers here: Slightly simplified, when a -coroutine terminates, it is generally stored in the global release pool -up to a certain pool size, and if the pool is full, it is freed. -Conversely, when allocating a new coroutine, the coroutines in the -release pool are reused if the pool already has reached a certain -minimum size (the batch size), otherwise we allocate new coroutines. - -The problem after commit 4c41c69e is that it not only increases the -maximum pool size (which is the intended effect), but also the batch -size for reusing coroutines (which is a bug). It means that in cases -with many devices and/or a large queue size (which defaults to the -number of vcpus for virtio-blk-pci), many thousand coroutines could be -sitting in the release pool without being reused. - -This is not only a waste of memory and allocations, but it actually -makes the QEMU process likely to hit the vm.max_map_count limit on Linux -because each coroutine requires two mappings (its stack and the guard -page for the stack), causing it to abort() in qemu_alloc_stack() because -when the limit is hit, mprotect() starts to fail with ENOMEM. - -In order to fix the problem, change the batch size back to 64 to avoid -uselessly accumulating coroutines in the release pool, but keep the -dynamic maximum pool size so that coroutines aren't freed too early -in heavy I/O scenarios. - -Note that this fix doesn't strictly make it impossible to hit the limit, -but this would only happen if most of the coroutines are actually in use -at the same time, not just sitting in a pool. This is the same behaviour -as we already had before commit 4c41c69e. Fully preventing this would -require allowing qemu_coroutine_create() to return an error, but it -doesn't seem to be a scenario that people hit in practice. - -Cc: qemu-stable@nongnu.org -Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2079938 -Fixes: 4c41c69e05fe28c0f95f8abd2ebf407e95a4f04b -Signed-off-by: Kevin Wolf -Message-Id: <20220510151020.105528-3-kwolf@redhat.com> -Tested-by: Hiroki Narukawa -Signed-off-by: Kevin Wolf -(cherry picked from commit 9ec7a59b5aad4b736871c378d30f5ef5ec51cb52) - -Conflicts: - util/qemu-coroutine.c - -Trivial merge conflict because we don't have commit ac387a08 downstream. - -Signed-off-by: Kevin Wolf ---- - util/qemu-coroutine.c | 22 ++++++++++++++-------- - 1 file changed, 14 insertions(+), 8 deletions(-) - -diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c -index faca0ca97c..804f672e0a 100644 ---- a/util/qemu-coroutine.c -+++ b/util/qemu-coroutine.c -@@ -20,14 +20,20 @@ - #include "qemu/coroutine_int.h" - #include "block/aio.h" - --/** Initial batch size is 64, and is increased on demand */ -+/** -+ * The minimal batch size is always 64, coroutines from the release_pool are -+ * reused as soon as there are 64 coroutines in it. The maximum pool size starts -+ * with 64 and is increased on demand so that coroutines are not deleted even if -+ * they are not immediately reused. -+ */ - enum { -- POOL_INITIAL_BATCH_SIZE = 64, -+ POOL_MIN_BATCH_SIZE = 64, -+ POOL_INITIAL_MAX_SIZE = 64, - }; - - /** Free list to speed up creation */ - static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); --static unsigned int pool_batch_size = POOL_INITIAL_BATCH_SIZE; -+static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE; - static unsigned int release_pool_size; - static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool); - static __thread unsigned int alloc_pool_size; -@@ -51,7 +57,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) - if (CONFIG_COROUTINE_POOL) { - co = QSLIST_FIRST(&alloc_pool); - if (!co) { -- if (release_pool_size > qatomic_read(&pool_batch_size)) { -+ if (release_pool_size > POOL_MIN_BATCH_SIZE) { - /* Slow path; a good place to register the destructor, too. */ - if (!coroutine_pool_cleanup_notifier.notify) { - coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup; -@@ -88,12 +94,12 @@ static void coroutine_delete(Coroutine *co) - co->caller = NULL; - - if (CONFIG_COROUTINE_POOL) { -- if (release_pool_size < qatomic_read(&pool_batch_size) * 2) { -+ if (release_pool_size < qatomic_read(&pool_max_size) * 2) { - QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next); - qatomic_inc(&release_pool_size); - return; - } -- if (alloc_pool_size < qatomic_read(&pool_batch_size)) { -+ if (alloc_pool_size < qatomic_read(&pool_max_size)) { - QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next); - alloc_pool_size++; - return; -@@ -207,10 +213,10 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co) - - void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size) - { -- qatomic_add(&pool_batch_size, additional_pool_size); -+ qatomic_add(&pool_max_size, additional_pool_size); - } - - void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size) - { -- qatomic_sub(&pool_batch_size, removing_pool_size); -+ qatomic_sub(&pool_max_size, removing_pool_size); - } --- -2.31.1 - diff --git a/SOURCES/kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch b/SOURCES/kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch deleted file mode 100644 index 963cf04..0000000 --- a/SOURCES/kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch +++ /dev/null @@ -1,132 +0,0 @@ -From ffbd90e5f4eba620c7cd631b04f0ed31beb22ffa Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 17 May 2022 12:07:56 +0100 -Subject: [PATCH 1/6] coroutine-ucontext: use QEMU_DEFINE_STATIC_CO_TLS() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 89: coroutine: use coroutine TLS macros to protect thread-local variables -RH-Commit: [1/3] a9782fe8e919c4bd317b7e8744c7ff57d898add3 (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 1952483 -RH-Acked-by: Hanna Reitz -RH-Acked-by: Eric Blake -RH-Acked-by: Kevin Wolf - -Thread-Local Storage variables cannot be used directly from coroutine -code because the compiler may optimize TLS variable accesses across -qemu_coroutine_yield() calls. When the coroutine is re-entered from -another thread the TLS variables from the old thread must no longer be -used. - -Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. - -Signed-off-by: Stefan Hajnoczi -Message-Id: <20220307153853.602859-2-stefanha@redhat.com> -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Kevin Wolf -(cherry picked from commit 34145a307d849d0b6734d0222a7aa0bb9eef7407) -Signed-off-by: Stefan Hajnoczi ---- - util/coroutine-ucontext.c | 38 ++++++++++++++++++++++++-------------- - 1 file changed, 24 insertions(+), 14 deletions(-) - -diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c -index 904b375192..127d5a13c8 100644 ---- a/util/coroutine-ucontext.c -+++ b/util/coroutine-ucontext.c -@@ -25,6 +25,7 @@ - #include "qemu/osdep.h" - #include - #include "qemu/coroutine_int.h" -+#include "qemu/coroutine-tls.h" - - #ifdef CONFIG_VALGRIND_H - #include -@@ -66,8 +67,8 @@ typedef struct { - /** - * Per-thread coroutine bookkeeping - */ --static __thread CoroutineUContext leader; --static __thread Coroutine *current; -+QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current); -+QEMU_DEFINE_STATIC_CO_TLS(CoroutineUContext, leader); - - /* - * va_args to makecontext() must be type 'int', so passing -@@ -97,14 +98,15 @@ static inline __attribute__((always_inline)) - void finish_switch_fiber(void *fake_stack_save) - { - #ifdef CONFIG_ASAN -+ CoroutineUContext *leaderp = get_ptr_leader(); - const void *bottom_old; - size_t size_old; - - __sanitizer_finish_switch_fiber(fake_stack_save, &bottom_old, &size_old); - -- if (!leader.stack) { -- leader.stack = (void *)bottom_old; -- leader.stack_size = size_old; -+ if (!leaderp->stack) { -+ leaderp->stack = (void *)bottom_old; -+ leaderp->stack_size = size_old; - } - #endif - #ifdef CONFIG_TSAN -@@ -161,8 +163,10 @@ static void coroutine_trampoline(int i0, int i1) - - /* Initialize longjmp environment and switch back the caller */ - if (!sigsetjmp(self->env, 0)) { -- start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, leader.stack, -- leader.stack_size); -+ CoroutineUContext *leaderp = get_ptr_leader(); -+ -+ start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, -+ leaderp->stack, leaderp->stack_size); - start_switch_fiber_tsan(&fake_stack_save, self, true); /* true=caller */ - siglongjmp(*(sigjmp_buf *)co->entry_arg, 1); - } -@@ -297,7 +301,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, - int ret; - void *fake_stack_save = NULL; - -- current = to_; -+ set_current(to_); - - ret = sigsetjmp(from->env, 0); - if (ret == 0) { -@@ -315,18 +319,24 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, - - Coroutine *qemu_coroutine_self(void) - { -- if (!current) { -- current = &leader.base; -+ Coroutine *self = get_current(); -+ CoroutineUContext *leaderp = get_ptr_leader(); -+ -+ if (!self) { -+ self = &leaderp->base; -+ set_current(self); - } - #ifdef CONFIG_TSAN -- if (!leader.tsan_co_fiber) { -- leader.tsan_co_fiber = __tsan_get_current_fiber(); -+ if (!leaderp->tsan_co_fiber) { -+ leaderp->tsan_co_fiber = __tsan_get_current_fiber(); - } - #endif -- return current; -+ return self; - } - - bool qemu_in_coroutine(void) - { -- return current && current->caller; -+ Coroutine *self = get_current(); -+ -+ return self && self->caller; - } --- -2.31.1 - diff --git a/SOURCES/kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch b/SOURCES/kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch deleted file mode 100644 index 9d0f811..0000000 --- a/SOURCES/kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch +++ /dev/null @@ -1,139 +0,0 @@ -From 9c2e55d25fec6ffb21e344513b7dbeed7e21f641 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 17 May 2022 12:08:04 +0100 -Subject: [PATCH 2/6] coroutine: use QEMU_DEFINE_STATIC_CO_TLS() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 89: coroutine: use coroutine TLS macros to protect thread-local variables -RH-Commit: [2/3] 68a8847e406e2eace6ddc31b0c5676a60600d606 (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 1952483 -RH-Acked-by: Hanna Reitz -RH-Acked-by: Eric Blake -RH-Acked-by: Kevin Wolf - -Thread-Local Storage variables cannot be used directly from coroutine -code because the compiler may optimize TLS variable accesses across -qemu_coroutine_yield() calls. When the coroutine is re-entered from -another thread the TLS variables from the old thread must no longer be -used. - -Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. -The alloc_pool QSLIST needs a typedef so the return value of -get_ptr_alloc_pool() can be stored in a local variable. - -One example of why this code is necessary: a coroutine that yields -before calling qemu_coroutine_create() to create another coroutine is -affected by the TLS issue. - -Signed-off-by: Stefan Hajnoczi -Message-Id: <20220307153853.602859-3-stefanha@redhat.com> -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Kevin Wolf -(cherry picked from commit ac387a08a9c9f6b36757da912f0339c25f421f90) - -Conflicts: -- Context conflicts due to commit 5411171c3ef4 ("coroutine: Revert to - constant batch size"). - -Signed-off-by: Stefan Hajnoczi ---- - util/qemu-coroutine.c | 41 ++++++++++++++++++++++++----------------- - 1 file changed, 24 insertions(+), 17 deletions(-) - -diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c -index 804f672e0a..4a8bd63ef0 100644 ---- a/util/qemu-coroutine.c -+++ b/util/qemu-coroutine.c -@@ -18,6 +18,7 @@ - #include "qemu/atomic.h" - #include "qemu/coroutine.h" - #include "qemu/coroutine_int.h" -+#include "qemu/coroutine-tls.h" - #include "block/aio.h" - - /** -@@ -35,17 +36,20 @@ enum { - static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); - static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE; - static unsigned int release_pool_size; --static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool); --static __thread unsigned int alloc_pool_size; --static __thread Notifier coroutine_pool_cleanup_notifier; -+ -+typedef QSLIST_HEAD(, Coroutine) CoroutineQSList; -+QEMU_DEFINE_STATIC_CO_TLS(CoroutineQSList, alloc_pool); -+QEMU_DEFINE_STATIC_CO_TLS(unsigned int, alloc_pool_size); -+QEMU_DEFINE_STATIC_CO_TLS(Notifier, coroutine_pool_cleanup_notifier); - - static void coroutine_pool_cleanup(Notifier *n, void *value) - { - Coroutine *co; - Coroutine *tmp; -+ CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); - -- QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) { -- QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); -+ QSLIST_FOREACH_SAFE(co, alloc_pool, pool_next, tmp) { -+ QSLIST_REMOVE_HEAD(alloc_pool, pool_next); - qemu_coroutine_delete(co); - } - } -@@ -55,27 +59,30 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) - Coroutine *co = NULL; - - if (CONFIG_COROUTINE_POOL) { -- co = QSLIST_FIRST(&alloc_pool); -+ CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); -+ -+ co = QSLIST_FIRST(alloc_pool); - if (!co) { - if (release_pool_size > POOL_MIN_BATCH_SIZE) { - /* Slow path; a good place to register the destructor, too. */ -- if (!coroutine_pool_cleanup_notifier.notify) { -- coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup; -- qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier); -+ Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier(); -+ if (!notifier->notify) { -+ notifier->notify = coroutine_pool_cleanup; -+ qemu_thread_atexit_add(notifier); - } - - /* This is not exact; there could be a little skew between - * release_pool_size and the actual size of release_pool. But - * it is just a heuristic, it does not need to be perfect. - */ -- alloc_pool_size = qatomic_xchg(&release_pool_size, 0); -- QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool); -- co = QSLIST_FIRST(&alloc_pool); -+ set_alloc_pool_size(qatomic_xchg(&release_pool_size, 0)); -+ QSLIST_MOVE_ATOMIC(alloc_pool, &release_pool); -+ co = QSLIST_FIRST(alloc_pool); - } - } - if (co) { -- QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); -- alloc_pool_size--; -+ QSLIST_REMOVE_HEAD(alloc_pool, pool_next); -+ set_alloc_pool_size(get_alloc_pool_size() - 1); - } - } - -@@ -99,9 +106,9 @@ static void coroutine_delete(Coroutine *co) - qatomic_inc(&release_pool_size); - return; - } -- if (alloc_pool_size < qatomic_read(&pool_max_size)) { -- QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next); -- alloc_pool_size++; -+ if (get_alloc_pool_size() < qatomic_read(&pool_max_size)) { -+ QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next); -+ set_alloc_pool_size(get_alloc_pool_size() + 1); - return; - } - } --- -2.31.1 - diff --git a/SOURCES/kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch b/SOURCES/kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch deleted file mode 100644 index 1665319..0000000 --- a/SOURCES/kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch +++ /dev/null @@ -1,99 +0,0 @@ -From 336581e6e9ace3f1ddd24ad0a258db9785f9b0ed Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 17 May 2022 12:08:12 +0100 -Subject: [PATCH 3/6] coroutine-win32: use QEMU_DEFINE_STATIC_CO_TLS() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 89: coroutine: use coroutine TLS macros to protect thread-local variables -RH-Commit: [3/3] 55b35dfdae1bc7d6f614ac9f81a92f5c6431f713 (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 1952483 -RH-Acked-by: Hanna Reitz -RH-Acked-by: Eric Blake -RH-Acked-by: Kevin Wolf - -Thread-Local Storage variables cannot be used directly from coroutine -code because the compiler may optimize TLS variable accesses across -qemu_coroutine_yield() calls. When the coroutine is re-entered from -another thread the TLS variables from the old thread must no longer be -used. - -Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. - -I think coroutine-win32.c could get away with __thread because the -variables are only used in situations where either the stale value is -correct (current) or outside coroutine context (loading leader when -current is NULL). Due to the difficulty of being sure that this is -really safe in all scenarios it seems worth converting it anyway. - -Signed-off-by: Stefan Hajnoczi -Message-Id: <20220307153853.602859-4-stefanha@redhat.com> -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Kevin Wolf -(cherry picked from commit c1fe694357a328c807ae3cc6961c19e923448fcc) -Signed-off-by: Stefan Hajnoczi ---- - util/coroutine-win32.c | 18 +++++++++++++----- - 1 file changed, 13 insertions(+), 5 deletions(-) - -diff --git a/util/coroutine-win32.c b/util/coroutine-win32.c -index de6bd4fd3e..c02a62c896 100644 ---- a/util/coroutine-win32.c -+++ b/util/coroutine-win32.c -@@ -25,6 +25,7 @@ - #include "qemu/osdep.h" - #include "qemu-common.h" - #include "qemu/coroutine_int.h" -+#include "qemu/coroutine-tls.h" - - typedef struct - { -@@ -34,8 +35,8 @@ typedef struct - CoroutineAction action; - } CoroutineWin32; - --static __thread CoroutineWin32 leader; --static __thread Coroutine *current; -+QEMU_DEFINE_STATIC_CO_TLS(CoroutineWin32, leader); -+QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current); - - /* This function is marked noinline to prevent GCC from inlining it - * into coroutine_trampoline(). If we allow it to do that then it -@@ -52,7 +53,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, - CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_); - CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_); - -- current = to_; -+ set_current(to_); - - to->action = action; - SwitchToFiber(to->fiber); -@@ -89,14 +90,21 @@ void qemu_coroutine_delete(Coroutine *co_) - - Coroutine *qemu_coroutine_self(void) - { -+ Coroutine *current = get_current(); -+ - if (!current) { -- current = &leader.base; -- leader.fiber = ConvertThreadToFiber(NULL); -+ CoroutineWin32 *leader = get_ptr_leader(); -+ -+ current = &leader->base; -+ set_current(current); -+ leader->fiber = ConvertThreadToFiber(NULL); - } - return current; - } - - bool qemu_in_coroutine(void) - { -+ Coroutine *current = get_current(); -+ - return current && current->caller; - } --- -2.31.1 - diff --git a/SOURCES/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch b/SOURCES/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch new file mode 100644 index 0000000..1a3c139 --- /dev/null +++ b/SOURCES/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch @@ -0,0 +1,127 @@ +From b886411a682b56bfe674f0a35d40c67c8e9dc87a Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 21 Feb 2023 16:22:17 -0500 +Subject: [PATCH 02/12] dma-helpers: prevent dma_blk_cb() vs dma_aio_cancel() + race + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread +RH-Bugzilla: 2155748 +RH-Acked-by: Eric Blake +RH-Acked-by: Kevin Wolf +RH-Acked-by: Laszlo Ersek +RH-Commit: [2/3] eeeea43c25d8f4fa84591b05547fb77e4058abff (stefanha/centos-stream-qemu-kvm) + +dma_blk_cb() only takes the AioContext lock around ->io_func(). That +means the rest of dma_blk_cb() is not protected. In particular, the +DMAAIOCB field accesses happen outside the lock. + +There is a race when the main loop thread holds the AioContext lock and +invokes scsi_device_purge_requests() -> bdrv_aio_cancel() -> +dma_aio_cancel() while an IOThread executes dma_blk_cb(). The dbs->acb +field determines how cancellation proceeds. If dma_aio_cancel() sees +dbs->acb == NULL while dma_blk_cb() is still running, the request can be +completed twice (-ECANCELED and the actual return value). + +The following assertion can occur with virtio-scsi when an IOThread is +used: + + ../hw/scsi/scsi-disk.c:368: scsi_dma_complete: Assertion `r->req.aiocb != NULL' failed. + +Fix the race by holding the AioContext across dma_blk_cb(). Now +dma_aio_cancel() under the AioContext lock will not see +inconsistent/intermediate states. + +Cc: Paolo Bonzini +Reviewed-by: Eric Blake +Signed-off-by: Stefan Hajnoczi +Message-Id: <20230221212218.1378734-3-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit abfcd2760b3e70727bbc0792221b8b98a733dc32) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/scsi-disk.c | 4 +--- + softmmu/dma-helpers.c | 12 +++++++----- + 2 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 5327f93f4c..b12d8b0816 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -354,13 +354,12 @@ done: + scsi_req_unref(&r->req); + } + ++/* Called with AioContext lock held */ + static void scsi_dma_complete(void *opaque, int ret) + { + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); +- + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +@@ -370,7 +369,6 @@ static void scsi_dma_complete(void *opaque, int ret) + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + } + scsi_dma_complete_noio(r, ret); +- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); + } + + static void scsi_read_complete_noio(SCSIDiskReq *r, int ret) +diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c +index 7820fec54c..2463964805 100644 +--- a/softmmu/dma-helpers.c ++++ b/softmmu/dma-helpers.c +@@ -113,17 +113,19 @@ static void dma_complete(DMAAIOCB *dbs, int ret) + static void dma_blk_cb(void *opaque, int ret) + { + DMAAIOCB *dbs = (DMAAIOCB *)opaque; ++ AioContext *ctx = dbs->ctx; + dma_addr_t cur_addr, cur_len; + void *mem; + + trace_dma_blk_cb(dbs, ret); + ++ aio_context_acquire(ctx); + dbs->acb = NULL; + dbs->offset += dbs->iov.size; + + if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) { + dma_complete(dbs, ret); +- return; ++ goto out; + } + dma_blk_unmap(dbs); + +@@ -164,9 +166,9 @@ static void dma_blk_cb(void *opaque, int ret) + + if (dbs->iov.size == 0) { + trace_dma_map_wait(dbs); +- dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs); ++ dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs); + cpu_register_map_client(dbs->bh); +- return; ++ goto out; + } + + if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) { +@@ -174,11 +176,11 @@ static void dma_blk_cb(void *opaque, int ret) + QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align)); + } + +- aio_context_acquire(dbs->ctx); + dbs->acb = dbs->io_func(dbs->offset, &dbs->iov, + dma_blk_cb, dbs, dbs->io_func_opaque); +- aio_context_release(dbs->ctx); + assert(dbs->acb); ++out: ++ aio_context_release(ctx); + } + + static void dma_aio_cancel(BlockAIOCB *acb) +-- +2.39.1 + diff --git a/SOURCES/kvm-edu-add-smp_mb__after_rmw.patch b/SOURCES/kvm-edu-add-smp_mb__after_rmw.patch new file mode 100644 index 0000000..dd77648 --- /dev/null +++ b/SOURCES/kvm-edu-add-smp_mb__after_rmw.patch @@ -0,0 +1,61 @@ +From 67bbeb056f75adc6c964468d876531ab68366fe0 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:24:36 -0500 +Subject: [PATCH 07/12] edu: add smp_mb__after_rmw() + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2175660 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Eric Auger +RH-Commit: [4/9] 2ad6fd6cb33fde39d2d017d94c0dde2152ad70c4 (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 + +commit 2482aeea4195ad84cf3d4e5b15b28ec5b420ed5a +Author: Paolo Bonzini +Date: Thu Mar 2 11:16:13 2023 +0100 + + edu: add smp_mb__after_rmw() + + Ensure ordering between clearing the COMPUTING flag and checking + IRQFACT, and between setting the IRQFACT flag and checking + COMPUTING. This ensures that no wakeups are lost. + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + hw/misc/edu.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/misc/edu.c b/hw/misc/edu.c +index e935c418d4..a1f8bc77e7 100644 +--- a/hw/misc/edu.c ++++ b/hw/misc/edu.c +@@ -267,6 +267,8 @@ static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val, + case 0x20: + if (val & EDU_STATUS_IRQFACT) { + qatomic_or(&edu->status, EDU_STATUS_IRQFACT); ++ /* Order check of the COMPUTING flag after setting IRQFACT. */ ++ smp_mb__after_rmw(); + } else { + qatomic_and(&edu->status, ~EDU_STATUS_IRQFACT); + } +@@ -349,6 +351,9 @@ static void *edu_fact_thread(void *opaque) + qemu_mutex_unlock(&edu->thr_mutex); + qatomic_and(&edu->status, ~EDU_STATUS_COMPUTING); + ++ /* Clear COMPUTING flag before checking IRQFACT. */ ++ smp_mb__after_rmw(); ++ + if (qatomic_read(&edu->status) & EDU_STATUS_IRQFACT) { + qemu_mutex_lock_iothread(); + edu_raise_irq(edu, FACT_IRQ); +-- +2.39.1 + diff --git a/SOURCES/kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch b/SOURCES/kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch deleted file mode 100644 index 2795dcd..0000000 --- a/SOURCES/kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch +++ /dev/null @@ -1,179 +0,0 @@ -From 8a12049e97149056f61f7748d9869606d282d16e Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 11 May 2022 18:01:35 +0800 -Subject: [PATCH 06/16] hw/acpi/aml-build: Use existing CPU topology to build - PPTT table - -RH-Author: Gavin Shan -RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology -RH-Commit: [6/6] 53fa376531c204cf706cc1a7a0499019756106cb (gwshan/qemu-rhel-9) -RH-Bugzilla: 2041823 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Andrew Jones - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 - -When the PPTT table is built, the CPU topology is re-calculated, but -it's unecessary because the CPU topology has been populated in -virt_possible_cpu_arch_ids() on arm/virt machine. - -This reworks build_pptt() to avoid by reusing the existing IDs in -ms->possible_cpus. Currently, the only user of build_pptt() is -arm/virt machine. - -Signed-off-by: Gavin Shan -Tested-by: Yanan Wang -Reviewed-by: Yanan Wang -Acked-by: Igor Mammedov -Acked-by: Michael S. Tsirkin -Message-id: 20220503140304.855514-7-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit ae9141d4a3265553503bf07d3574b40f84615a34) -Signed-off-by: Gavin Shan ---- - hw/acpi/aml-build.c | 111 +++++++++++++++++++------------------------- - 1 file changed, 48 insertions(+), 63 deletions(-) - -diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c -index 4086879ebf..e6bfac95c7 100644 ---- a/hw/acpi/aml-build.c -+++ b/hw/acpi/aml-build.c -@@ -2002,86 +2002,71 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, - const char *oem_id, const char *oem_table_id) - { - MachineClass *mc = MACHINE_GET_CLASS(ms); -- GQueue *list = g_queue_new(); -- guint pptt_start = table_data->len; -- guint parent_offset; -- guint length, i; -- int uid = 0; -- int socket; -+ CPUArchIdList *cpus = ms->possible_cpus; -+ int64_t socket_id = -1, cluster_id = -1, core_id = -1; -+ uint32_t socket_offset = 0, cluster_offset = 0, core_offset = 0; -+ uint32_t pptt_start = table_data->len; -+ int n; - AcpiTable table = { .sig = "PPTT", .rev = 2, - .oem_id = oem_id, .oem_table_id = oem_table_id }; - - acpi_table_begin(&table, table_data); - -- for (socket = 0; socket < ms->smp.sockets; socket++) { -- g_queue_push_tail(list, -- GUINT_TO_POINTER(table_data->len - pptt_start)); -- build_processor_hierarchy_node( -- table_data, -- /* -- * Physical package - represents the boundary -- * of a physical package -- */ -- (1 << 0), -- 0, socket, NULL, 0); -- } -- -- if (mc->smp_props.clusters_supported) { -- length = g_queue_get_length(list); -- for (i = 0; i < length; i++) { -- int cluster; -- -- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); -- for (cluster = 0; cluster < ms->smp.clusters; cluster++) { -- g_queue_push_tail(list, -- GUINT_TO_POINTER(table_data->len - pptt_start)); -- build_processor_hierarchy_node( -- table_data, -- (0 << 0), /* not a physical package */ -- parent_offset, cluster, NULL, 0); -- } -+ /* -+ * This works with the assumption that cpus[n].props.*_id has been -+ * sorted from top to down levels in mc->possible_cpu_arch_ids(). -+ * Otherwise, the unexpected and duplicated containers will be -+ * created. -+ */ -+ for (n = 0; n < cpus->len; n++) { -+ if (cpus->cpus[n].props.socket_id != socket_id) { -+ assert(cpus->cpus[n].props.socket_id > socket_id); -+ socket_id = cpus->cpus[n].props.socket_id; -+ cluster_id = -1; -+ core_id = -1; -+ socket_offset = table_data->len - pptt_start; -+ build_processor_hierarchy_node(table_data, -+ (1 << 0), /* Physical package */ -+ 0, socket_id, NULL, 0); - } -- } - -- length = g_queue_get_length(list); -- for (i = 0; i < length; i++) { -- int core; -- -- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); -- for (core = 0; core < ms->smp.cores; core++) { -- if (ms->smp.threads > 1) { -- g_queue_push_tail(list, -- GUINT_TO_POINTER(table_data->len - pptt_start)); -- build_processor_hierarchy_node( -- table_data, -- (0 << 0), /* not a physical package */ -- parent_offset, core, NULL, 0); -- } else { -- build_processor_hierarchy_node( -- table_data, -- (1 << 1) | /* ACPI Processor ID valid */ -- (1 << 3), /* Node is a Leaf */ -- parent_offset, uid++, NULL, 0); -+ if (mc->smp_props.clusters_supported) { -+ if (cpus->cpus[n].props.cluster_id != cluster_id) { -+ assert(cpus->cpus[n].props.cluster_id > cluster_id); -+ cluster_id = cpus->cpus[n].props.cluster_id; -+ core_id = -1; -+ cluster_offset = table_data->len - pptt_start; -+ build_processor_hierarchy_node(table_data, -+ (0 << 0), /* Not a physical package */ -+ socket_offset, cluster_id, NULL, 0); - } -+ } else { -+ cluster_offset = socket_offset; - } -- } - -- length = g_queue_get_length(list); -- for (i = 0; i < length; i++) { -- int thread; -+ if (ms->smp.threads == 1) { -+ build_processor_hierarchy_node(table_data, -+ (1 << 1) | /* ACPI Processor ID valid */ -+ (1 << 3), /* Node is a Leaf */ -+ cluster_offset, n, NULL, 0); -+ } else { -+ if (cpus->cpus[n].props.core_id != core_id) { -+ assert(cpus->cpus[n].props.core_id > core_id); -+ core_id = cpus->cpus[n].props.core_id; -+ core_offset = table_data->len - pptt_start; -+ build_processor_hierarchy_node(table_data, -+ (0 << 0), /* Not a physical package */ -+ cluster_offset, core_id, NULL, 0); -+ } - -- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); -- for (thread = 0; thread < ms->smp.threads; thread++) { -- build_processor_hierarchy_node( -- table_data, -+ build_processor_hierarchy_node(table_data, - (1 << 1) | /* ACPI Processor ID valid */ - (1 << 2) | /* Processor is a Thread */ - (1 << 3), /* Node is a Leaf */ -- parent_offset, uid++, NULL, 0); -+ core_offset, n, NULL, 0); - } - } - -- g_queue_free(list); - acpi_table_end(linker, &table); - } - --- -2.31.1 - diff --git a/SOURCES/kvm-hw-arm-virt-Add-compact-highmem-property.patch b/SOURCES/kvm-hw-arm-virt-Add-compact-highmem-property.patch new file mode 100644 index 0000000..bc65e2f --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Add-compact-highmem-property.patch @@ -0,0 +1,169 @@ +From 4ab2aff624908e49b099f00609875f4d03e9e1ec Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 21 Dec 2022 08:48:45 +0800 +Subject: [PATCH 6/8] hw/arm/virt: Add 'compact-highmem' property + +RH-Author: Gavin Shan +RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment +RH-Bugzilla: 2113840 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [6/8] 781506f3445493f05b511547370b6d88ef092457 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 + +After the improvement to high memory region address assignment is +applied, the memory layout can be changed, introducing possible +migration breakage. For example, VIRT_HIGH_PCIE_MMIO memory region +is disabled or enabled when the optimization is applied or not, with +the following configuration. The configuration is only achievable by +modifying the source code until more properties are added to allow +users selectively disable those high memory regions. + + pa_bits = 40; + vms->highmem_redists = false; + vms->highmem_ecam = false; + vms->highmem_mmio = true; + + # qemu-system-aarch64 -accel kvm -cpu host \ + -machine virt-7.2,compact-highmem={on, off} \ + -m 4G,maxmem=511G -monitor stdio + + Region compact-highmem=off compact-highmem=on + ---------------------------------------------------------------- + MEM [1GB 512GB] [1GB 512GB] + HIGH_GIC_REDISTS2 [512GB 512GB+64MB] [disabled] + HIGH_PCIE_ECAM [512GB+256MB 512GB+512MB] [disabled] + HIGH_PCIE_MMIO [disabled] [512GB 1TB] + +In order to keep backwords compatibility, we need to disable the +optimization on machine, which is virt-7.1 or ealier than it. It +means the optimization is enabled by default from virt-7.2. Besides, +'compact-highmem' property is added so that the optimization can be +explicitly enabled or disabled on all machine types by users. + +Signed-off-by: Gavin Shan +Reviewed-by: Eric Auger +Reviewed-by: Cornelia Huck +Reviewed-by: Marc Zyngier +Tested-by: Zhenyu Zhang +Message-id: 20221029224307.138822-7-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit f40408a9fe5d1db70a75a33d2b26c8af8a5d57b0) +Signed-off-by: Gavin Shan +Conflicts: + hw/arm/virt.c + Comment out the handlers of property 'compact-highmem' since + the property isn't exposed. +--- + docs/system/arm/virt.rst | 4 ++++ + hw/arm/virt.c | 34 ++++++++++++++++++++++++++++++++++ + include/hw/arm/virt.h | 1 + + 3 files changed, 39 insertions(+) + +diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst +index 20442ea2c1..4454706392 100644 +--- a/docs/system/arm/virt.rst ++++ b/docs/system/arm/virt.rst +@@ -94,6 +94,10 @@ highmem + address space above 32 bits. The default is ``on`` for machine types + later than ``virt-2.12``. + ++compact-highmem ++ Set ``on``/``off`` to enable/disable the compact layout for high memory regions. ++ The default is ``on`` for machine types later than ``virt-7.2``. ++ + gic-version + Specify the version of the Generic Interrupt Controller (GIC) to provide. + Valid values are: +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 6896e0ca0f..6087511ae9 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -216,6 +216,12 @@ static const MemMapEntry base_memmap[] = { + * Note the extended_memmap is sized so that it eventually also includes the + * base_memmap entries (VIRT_HIGH_GIC_REDIST2 index is greater than the last + * index of base_memmap). ++ * ++ * The memory map for these Highmem IO Regions can be in legacy or compact ++ * layout, depending on 'compact-highmem' property. With legacy layout, the ++ * PA space for one specific region is always reserved, even if the region ++ * has been disabled or doesn't fit into the PA space. However, the PA space ++ * for the region won't be reserved in these circumstances with compact layout. + */ + static MemMapEntry extended_memmap[] = { + /* Additional 64 MB redist region (can contain up to 512 redistributors) */ +@@ -2400,6 +2406,22 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp) + vms->highmem = value; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ ++static bool virt_get_compact_highmem(Object *obj, Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ ++ return vms->highmem_compact; ++} ++ ++static void virt_set_compact_highmem(Object *obj, bool value, Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ ++ vms->highmem_compact = value; ++} ++#endif /* disabled for RHEL */ ++ + static bool virt_get_its(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -3023,6 +3045,13 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + "Set on/off to enable/disable using " + "physical address space above 32 bits"); + ++ object_class_property_add_bool(oc, "compact-highmem", ++ virt_get_compact_highmem, ++ virt_set_compact_highmem); ++ object_class_property_set_description(oc, "compact-highmem", ++ "Set on/off to enable/disable compact " ++ "layout for high memory regions"); ++ + object_class_property_add_str(oc, "gic-version", virt_get_gic_version, + virt_set_gic_version); + object_class_property_set_description(oc, "gic-version", +@@ -3107,6 +3136,7 @@ static void virt_instance_init(Object *obj) + + /* High memory is enabled by default */ + vms->highmem = true; ++ vms->highmem_compact = !vmc->no_highmem_compact; + vms->gic_version = VIRT_GIC_VERSION_NOSEL; + + vms->highmem_ecam = !vmc->no_highmem_ecam; +@@ -3176,8 +3206,12 @@ DEFINE_VIRT_MACHINE_AS_LATEST(7, 2) + + static void virt_machine_7_1_options(MachineClass *mc) + { ++ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); ++ + virt_machine_7_2_options(mc); + compat_props_add(mc->compat_props, hw_compat_7_1, hw_compat_7_1_len); ++ /* Compact layout for high memory regions was introduced with 7.2 */ ++ vmc->no_highmem_compact = true; + } + DEFINE_VIRT_MACHINE(7, 1) + +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 15bd291311..85e7d61868 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -125,6 +125,7 @@ struct VirtMachineClass { + bool no_pmu; + bool claim_edge_triggered_timers; + bool smbios_old_sys_ver; ++ bool no_highmem_compact; + bool no_highmem_ecam; + bool no_ged; /* Machines < 4.2 have no support for ACPI GED device */ + bool kvm_no_adjvtime; +-- +2.31.1 + diff --git a/SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch b/SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch new file mode 100644 index 0000000..df691a7 --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch @@ -0,0 +1,179 @@ +From 30e86a7c4fbcdc95b74bcb2a15745cb221783091 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 21 Dec 2022 08:48:45 +0800 +Subject: [PATCH 7/8] hw/arm/virt: Add properties to disable high memory + regions + +RH-Author: Gavin Shan +RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment +RH-Bugzilla: 2113840 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [7/8] 16f8762393b447a590b31c9e4d8d3c58c6bc9fa8 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 + +The 3 high memory regions are usually enabled by default, but they may +be not used. For example, VIRT_HIGH_GIC_REDIST2 isn't needed by GICv2. +This leads to waste in the PA space. + +Add properties ("highmem-redists", "highmem-ecam", "highmem-mmio") to +allow users selectively disable them if needed. After that, the high +memory region for GICv3 or GICv4 redistributor can be disabled by user, +the number of maximal supported CPUs needs to be calculated based on +'vms->highmem_redists'. The follow-up error message is also improved +to indicate if the high memory region for GICv3 and GICv4 has been +enabled or not. + +Suggested-by: Marc Zyngier +Signed-off-by: Gavin Shan +Reviewed-by: Marc Zyngier +Reviewed-by: Cornelia Huck +Reviewed-by: Eric Auger +Message-id: 20221029224307.138822-8-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit 6a48c64eec355ab1aff694eb4522d07a8e461368) +Signed-off-by: Gavin Shan +Conflicts: + hw/arm/virt.c + Comment out the handlers of the property 'highmem-redists', + 'highmem-ecam' and 'highmem-mmio' since they aren't exposed. +--- + docs/system/arm/virt.rst | 13 +++++++ + hw/arm/virt.c | 75 ++++++++++++++++++++++++++++++++++++++-- + 2 files changed, 86 insertions(+), 2 deletions(-) + +diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst +index 4454706392..188a4f211f 100644 +--- a/docs/system/arm/virt.rst ++++ b/docs/system/arm/virt.rst +@@ -98,6 +98,19 @@ compact-highmem + Set ``on``/``off`` to enable/disable the compact layout for high memory regions. + The default is ``on`` for machine types later than ``virt-7.2``. + ++highmem-redists ++ Set ``on``/``off`` to enable/disable the high memory region for GICv3 or ++ GICv4 redistributor. The default is ``on``. Setting this to ``off`` will ++ limit the maximum number of CPUs when GICv3 or GICv4 is used. ++ ++highmem-ecam ++ Set ``on``/``off`` to enable/disable the high memory region for PCI ECAM. ++ The default is ``on`` for machine types later than ``virt-3.0``. ++ ++highmem-mmio ++ Set ``on``/``off`` to enable/disable the high memory region for PCI MMIO. ++ The default is ``on``. ++ + gic-version + Specify the version of the Generic Interrupt Controller (GIC) to provide. + Valid values are: +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 6087511ae9..304fa0d6e7 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2142,14 +2142,20 @@ static void machvirt_init(MachineState *machine) + if (vms->gic_version == VIRT_GIC_VERSION_2) { + virt_max_cpus = GIC_NCPU; + } else { +- virt_max_cpus = virt_redist_capacity(vms, VIRT_GIC_REDIST) + +- virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2); ++ virt_max_cpus = virt_redist_capacity(vms, VIRT_GIC_REDIST); ++ if (vms->highmem_redists) { ++ virt_max_cpus += virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2); ++ } + } + + if (max_cpus > virt_max_cpus) { + error_report("Number of SMP CPUs requested (%d) exceeds max CPUs " + "supported by machine 'mach-virt' (%d)", + max_cpus, virt_max_cpus); ++ if (vms->gic_version != VIRT_GIC_VERSION_2 && !vms->highmem_redists) { ++ error_printf("Try 'highmem-redists=on' for more CPUs\n"); ++ } ++ + exit(1); + } + +@@ -2420,6 +2426,49 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp) + + vms->highmem_compact = value; + } ++ ++static bool virt_get_highmem_redists(Object *obj, Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ ++ return vms->highmem_redists; ++} ++ ++static void virt_set_highmem_redists(Object *obj, bool value, Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ ++ vms->highmem_redists = value; ++} ++ ++static bool virt_get_highmem_ecam(Object *obj, Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ ++ return vms->highmem_ecam; ++} ++ ++static void virt_set_highmem_ecam(Object *obj, bool value, Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ ++ vms->highmem_ecam = value; ++} ++ ++static bool virt_get_highmem_mmio(Object *obj, Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ ++ return vms->highmem_mmio; ++} ++ ++static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ ++ vms->highmem_mmio = value; ++} ++ + #endif /* disabled for RHEL */ + + static bool virt_get_its(Object *obj, Error **errp) +@@ -3052,6 +3101,28 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + "Set on/off to enable/disable compact " + "layout for high memory regions"); + ++ object_class_property_add_bool(oc, "highmem-redists", ++ virt_get_highmem_redists, ++ virt_set_highmem_redists); ++ object_class_property_set_description(oc, "highmem-redists", ++ "Set on/off to enable/disable high " ++ "memory region for GICv3 or GICv4 " ++ "redistributor"); ++ ++ object_class_property_add_bool(oc, "highmem-ecam", ++ virt_get_highmem_ecam, ++ virt_set_highmem_ecam); ++ object_class_property_set_description(oc, "highmem-ecam", ++ "Set on/off to enable/disable high " ++ "memory region for PCI ECAM"); ++ ++ object_class_property_add_bool(oc, "highmem-mmio", ++ virt_get_highmem_mmio, ++ virt_set_highmem_mmio); ++ object_class_property_set_description(oc, "highmem-mmio", ++ "Set on/off to enable/disable high " ++ "memory region for PCI MMIO"); ++ + object_class_property_add_str(oc, "gic-version", virt_get_gic_version, + virt_set_gic_version); + object_class_property_set_description(oc, "gic-version", +-- +2.31.1 + diff --git a/SOURCES/kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch b/SOURCES/kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch deleted file mode 100644 index 240aead..0000000 --- a/SOURCES/kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 3b05d3464945295112b5d02d142422f524a52054 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 11 May 2022 18:01:35 +0800 -Subject: [PATCH 03/16] hw/arm/virt: Consider SMP configuration in CPU topology - -RH-Author: Gavin Shan -RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology -RH-Commit: [3/6] 7125b41f038c2b1cb33377d0ef1222f1ea42b648 (gwshan/qemu-rhel-9) -RH-Bugzilla: 2041823 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Andrew Jones - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 - -Currently, the SMP configuration isn't considered when the CPU -topology is populated. In this case, it's impossible to provide -the default CPU-to-NUMA mapping or association based on the socket -ID of the given CPU. - -This takes account of SMP configuration when the CPU topology -is populated. The die ID for the given CPU isn't assigned since -it's not supported on arm/virt machine. Besides, the used SMP -configuration in qtest/numa-test/aarch64_numa_cpu() is corrcted -to avoid testing failure - -Signed-off-by: Gavin Shan -Reviewed-by: Yanan Wang -Acked-by: Igor Mammedov -Message-id: 20220503140304.855514-4-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit c9ec4cb5e4936f980889e717524e73896b0200ed) -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 15 ++++++++++++++- - 1 file changed, 14 insertions(+), 1 deletion(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 8be12e121d..a87c8d396a 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2553,6 +2553,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) - int n; - unsigned int max_cpus = ms->smp.max_cpus; - VirtMachineState *vms = VIRT_MACHINE(ms); -+ MachineClass *mc = MACHINE_GET_CLASS(vms); - - if (ms->possible_cpus) { - assert(ms->possible_cpus->len == max_cpus); -@@ -2566,8 +2567,20 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) - ms->possible_cpus->cpus[n].type = ms->cpu_type; - ms->possible_cpus->cpus[n].arch_id = - virt_cpu_mp_affinity(vms, n); -+ -+ assert(!mc->smp_props.dies_supported); -+ ms->possible_cpus->cpus[n].props.has_socket_id = true; -+ ms->possible_cpus->cpus[n].props.socket_id = -+ n / (ms->smp.clusters * ms->smp.cores * ms->smp.threads); -+ ms->possible_cpus->cpus[n].props.has_cluster_id = true; -+ ms->possible_cpus->cpus[n].props.cluster_id = -+ (n / (ms->smp.cores * ms->smp.threads)) % ms->smp.clusters; -+ ms->possible_cpus->cpus[n].props.has_core_id = true; -+ ms->possible_cpus->cpus[n].props.core_id = -+ (n / ms->smp.threads) % ms->smp.cores; - ms->possible_cpus->cpus[n].props.has_thread_id = true; -- ms->possible_cpus->cpus[n].props.thread_id = n; -+ ms->possible_cpus->cpus[n].props.thread_id = -+ n % ms->smp.threads; - } - return ms->possible_cpus; - } --- -2.31.1 - diff --git a/SOURCES/kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch b/SOURCES/kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch new file mode 100644 index 0000000..6b20bb8 --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch @@ -0,0 +1,51 @@ +From 969ea1ff46b52c5fe6d87f2eeb1625871a2dfb2a Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 21 Dec 2022 08:48:45 +0800 +Subject: [PATCH 8/8] hw/arm/virt: Enable compat high memory region address + assignment for 9.2.0 machine + +RH-Author: Gavin Shan +RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment +RH-Bugzilla: 2113840 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [8/8] beda1791c0c35dce5c669efd47685302b8468032 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 +Upstream: RHEL only + +The compact high memory region address assignment is enabled for 9.2.0, +but it's kept as disabled for 9.0.0, to keep the backwards compatibility +on 9.0.0. Note that these newly added properties ('compact-highmem', +'highmem-redists', 'highmem-ecam', and 'highmem-mmio') in the upstream +aren't exposed for the downstream. + +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 304fa0d6e7..e41c0b462c 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3581,6 +3581,7 @@ static void rhel_virt_instance_init(Object *obj) + + /* High memory is enabled by default */ + vms->highmem = true; ++ vms->highmem_compact = !vmc->no_highmem_compact; + vms->gic_version = VIRT_GIC_VERSION_NOSEL; + + vms->highmem_ecam = !vmc->no_highmem_ecam; +@@ -3659,5 +3660,7 @@ static void rhel900_virt_options(MachineClass *mc) + + /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ + vmc->no_tcg_lpa2 = true; ++ /* Compact layout for high memory regions was introduced with 9.2.0 */ ++ vmc->no_highmem_compact = true; + } + DEFINE_RHEL_MACHINE(9, 0, 0) +-- +2.31.1 + diff --git a/SOURCES/kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch b/SOURCES/kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch deleted file mode 100644 index 6b60b70..0000000 --- a/SOURCES/kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 14e49ad3b98f01c1ad6fe456469d40a96a43dc3c Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 11 May 2022 18:01:35 +0800 -Subject: [PATCH 05/16] hw/arm/virt: Fix CPU's default NUMA node ID - -RH-Author: Gavin Shan -RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology -RH-Commit: [5/6] 5336f62bc0c53c0417db1d71ef89544907bc28c0 (gwshan/qemu-rhel-9) -RH-Bugzilla: 2041823 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Andrew Jones - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 - -When CPU-to-NUMA association isn't explicitly provided by users, -the default one is given by mc->get_default_cpu_node_id(). However, -the CPU topology isn't fully considered in the default association -and this causes CPU topology broken warnings on booting Linux guest. - -For example, the following warning messages are observed when the -Linux guest is booted with the following command lines. - -/home/gavin/sandbox/qemu.main/build/qemu-system-aarch64 \ --accel kvm -machine virt,gic-version=host \ --cpu host \ --smp 6,sockets=2,cores=3,threads=1 \ --m 1024M,slots=16,maxmem=64G \ --object memory-backend-ram,id=mem0,size=128M \ --object memory-backend-ram,id=mem1,size=128M \ --object memory-backend-ram,id=mem2,size=128M \ --object memory-backend-ram,id=mem3,size=128M \ --object memory-backend-ram,id=mem4,size=128M \ --object memory-backend-ram,id=mem4,size=384M \ --numa node,nodeid=0,memdev=mem0 \ --numa node,nodeid=1,memdev=mem1 \ --numa node,nodeid=2,memdev=mem2 \ --numa node,nodeid=3,memdev=mem3 \ --numa node,nodeid=4,memdev=mem4 \ --numa node,nodeid=5,memdev=mem5 -: -alternatives: patching kernel code -BUG: arch topology borken -the CLS domain not a subset of the MC domain - -BUG: arch topology borken -the DIE domain not a subset of the NODE domain - -With current implementation of mc->get_default_cpu_node_id(), -CPU#0 to CPU#5 are associated with NODE#0 to NODE#5 separately. -That's incorrect because CPU#0/1/2 should be associated with same -NUMA node because they're seated in same socket. - -This fixes the issue by considering the socket ID when the default -CPU-to-NUMA association is provided in virt_possible_cpu_arch_ids(). -With this applied, no more CPU topology broken warnings are seen -from the Linux guest. The 6 CPUs are associated with NODE#0/1, but -there are no CPUs associated with NODE#2/3/4/5. - -Signed-off-by: Gavin Shan -Reviewed-by: Igor Mammedov -Reviewed-by: Yanan Wang -Message-id: 20220503140304.855514-6-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit 4c18bc192386dfbca530e7f550e0992df657818a) -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index a87c8d396a..95d012d6eb 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2545,7 +2545,9 @@ virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index) - - static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx) - { -- return idx % ms->numa_state->num_nodes; -+ int64_t socket_id = ms->possible_cpus->cpus[idx].props.socket_id; -+ -+ return socket_id % ms->numa_state->num_nodes; - } - - static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) --- -2.31.1 - diff --git a/SOURCES/kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch b/SOURCES/kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch deleted file mode 100644 index 78b9ee0..0000000 --- a/SOURCES/kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch +++ /dev/null @@ -1,56 +0,0 @@ -From e25c40735d2f022c07481b548d20476222006657 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Wed, 4 May 2022 11:11:54 +0200 -Subject: [PATCH 2/5] hw/arm/virt: Fix missing initialization in - instance/class_init() - -RH-Author: Eric Auger -RH-MergeRequest: 82: hw/arm/virt: Remove the dtb-kaslr-seed machine option -RH-Commit: [2/2] 22cbbfc30cf57a09b8acfb25d8a4dff2754c630c (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2046029 -RH-Acked-by: Gavin Shan -RH-Acked-by: Andrew Jones -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2046029 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45133161 -Upstream Status: RHEL-only -Tested: Boot RHEL guest and check migration from 8.6 to 9.1 - (with custom additions) - -During the 7.0 rebase, the initialization of highmem_mmio and -highmem_redists was forgotten in rhel_virt_instance_init(). -Fix it to match virt_instance_init() code. - -Also mc->smp_props.clusters_supported was missing in -rhel_machine_class_init(). - -Signed-off-by: Eric Auger ---- - hw/arm/virt.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index bde4f77994..8be12e121d 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3286,6 +3286,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - hc->unplug_request = virt_machine_device_unplug_request_cb; - hc->unplug = virt_machine_device_unplug_cb; - mc->nvdimm_supported = true; -+ mc->smp_props.clusters_supported = true; - mc->auto_enable_numa_with_memhp = true; - mc->auto_enable_numa_with_memdev = true; - mc->default_ram_id = "mach-virt.ram"; -@@ -3366,6 +3367,8 @@ static void rhel_virt_instance_init(Object *obj) - vms->gic_version = VIRT_GIC_VERSION_NOSEL; - - vms->highmem_ecam = !vmc->no_highmem_ecam; -+ vms->highmem_mmio = true; -+ vms->highmem_redists = true; - - if (vmc->no_its) { - vms->its = false; --- -2.31.1 - diff --git a/SOURCES/kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch b/SOURCES/kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch new file mode 100644 index 0000000..9dcdf61 --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch @@ -0,0 +1,112 @@ +From 1c7fad3776a14ca35b24dc2fdb262d4ddf40d6eb Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 21 Dec 2022 08:48:45 +0800 +Subject: [PATCH 5/8] hw/arm/virt: Improve high memory region address + assignment + +RH-Author: Gavin Shan +RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment +RH-Bugzilla: 2113840 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [5/8] 4d77fa78b5258a1bd8d30405cec5ba3311d42f92 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 + +There are three high memory regions, which are VIRT_HIGH_REDIST2, +VIRT_HIGH_PCIE_ECAM and VIRT_HIGH_PCIE_MMIO. Their base addresses +are floating on highest RAM address. However, they can be disabled +in several cases. + +(1) One specific high memory region is likely to be disabled by + code by toggling vms->highmem_{redists, ecam, mmio}. + +(2) VIRT_HIGH_PCIE_ECAM region is disabled on machine, which is + 'virt-2.12' or ealier than it. + +(3) VIRT_HIGH_PCIE_ECAM region is disabled when firmware is loaded + on 32-bits system. + +(4) One specific high memory region is disabled when it breaks the + PA space limit. + +The current implementation of virt_set_{memmap, high_memmap}() isn't +optimized because the high memory region's PA space is always reserved, +regardless of whatever the actual state in the corresponding +vms->highmem_{redists, ecam, mmio} flag. In the code, 'base' and +'vms->highest_gpa' are always increased for case (1), (2) and (3). +It's unnecessary since the assigned PA space for the disabled high +memory region won't be used afterwards. + +Improve the address assignment for those three high memory region by +skipping the address assignment for one specific high memory region if +it has been disabled in case (1), (2) and (3). The memory layout may +be changed after the improvement is applied, which leads to potential +migration breakage. So 'vms->highmem_compact' is added to control if +the improvement should be applied. For now, 'vms->highmem_compact' is +set to false, meaning that we don't have memory layout change until it +becomes configurable through property 'compact-highmem' in next patch. + +Signed-off-by: Gavin Shan +Reviewed-by: Eric Auger +Reviewed-by: Cornelia Huck +Reviewed-by: Marc Zyngier +Tested-by: Zhenyu Zhang +Message-id: 20221029224307.138822-6-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit 4a4ff9edc6a8fdc76082af5b41b059217138c09b) +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 15 ++++++++++----- + include/hw/arm/virt.h | 1 + + 2 files changed, 11 insertions(+), 5 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 6e3b9fc060..6896e0ca0f 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1768,18 +1768,23 @@ static void virt_set_high_memmap(VirtMachineState *vms, + vms->memmap[i].size = region_size; + + /* +- * Check each device to see if they fit in the PA space, +- * moving highest_gpa as we go. ++ * Check each device to see if it fits in the PA space, ++ * moving highest_gpa as we go. For compatibility, move ++ * highest_gpa for disabled fitting devices as well, if ++ * the compact layout has been disabled. + * + * For each device that doesn't fit, disable it. + */ + fits = (region_base + region_size) <= BIT_ULL(pa_bits); +- if (fits) { +- vms->highest_gpa = region_base + region_size - 1; ++ *region_enabled &= fits; ++ if (vms->highmem_compact && !*region_enabled) { ++ continue; + } + +- *region_enabled &= fits; + base = region_base + region_size; ++ if (fits) { ++ vms->highest_gpa = base - 1; ++ } + } + } + +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 22b54ec510..15bd291311 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -144,6 +144,7 @@ struct VirtMachineState { + PFlashCFI01 *flash[2]; + bool secure; + bool highmem; ++ bool highmem_compact; + bool highmem_ecam; + bool highmem_mmio; + bool highmem_redists; +-- +2.31.1 + diff --git a/SOURCES/kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch b/SOURCES/kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch new file mode 100644 index 0000000..ea9cb1f --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch @@ -0,0 +1,82 @@ +From 305a369fd18f29914bf96cc181add532d435d8ed Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 21 Dec 2022 08:48:45 +0800 +Subject: [PATCH 3/8] hw/arm/virt: Introduce variable region_base in + virt_set_high_memmap() + +RH-Author: Gavin Shan +RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment +RH-Bugzilla: 2113840 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/8] 15de90df217d680ccc858b679898b3993e1c050a + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 + +This introduces variable 'region_base' for the base address of the +specific high memory region. It's the preparatory work to optimize +high memory region address assignment. + +No functional change intended. + +Signed-off-by: Gavin Shan +Reviewed-by: Eric Auger +Reviewed-by: Cornelia Huck +Reviewed-by: Marc Zyngier +Tested-by: Zhenyu Zhang +Message-id: 20221029224307.138822-4-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit fa245799b9407fc7b561da185b3d889df5e16a88) +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index ca098d40b8..ddcf7ee2f8 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1739,15 +1739,15 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) + static void virt_set_high_memmap(VirtMachineState *vms, + hwaddr base, int pa_bits) + { +- hwaddr region_size; ++ hwaddr region_base, region_size; + bool fits; + int i; + + for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { ++ region_base = ROUND_UP(base, extended_memmap[i].size); + region_size = extended_memmap[i].size; + +- base = ROUND_UP(base, region_size); +- vms->memmap[i].base = base; ++ vms->memmap[i].base = region_base; + vms->memmap[i].size = region_size; + + /* +@@ -1756,9 +1756,9 @@ static void virt_set_high_memmap(VirtMachineState *vms, + * + * For each device that doesn't fit, disable it. + */ +- fits = (base + region_size) <= BIT_ULL(pa_bits); ++ fits = (region_base + region_size) <= BIT_ULL(pa_bits); + if (fits) { +- vms->highest_gpa = base + region_size - 1; ++ vms->highest_gpa = region_base + region_size - 1; + } + + switch (i) { +@@ -1773,7 +1773,7 @@ static void virt_set_high_memmap(VirtMachineState *vms, + break; + } + +- base += region_size; ++ base = region_base + region_size; + } + } + +-- +2.31.1 + diff --git a/SOURCES/kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch b/SOURCES/kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch new file mode 100644 index 0000000..659faeb --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch @@ -0,0 +1,95 @@ +From a2ddd68c8365ec602db6b2a9cf83bb441ca701cc Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 21 Dec 2022 08:48:45 +0800 +Subject: [PATCH 4/8] hw/arm/virt: Introduce virt_get_high_memmap_enabled() + helper + +RH-Author: Gavin Shan +RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment +RH-Bugzilla: 2113840 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/8] 65524de2fc106600bbaff641caa8c4f2f8027114 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 + +This introduces virt_get_high_memmap_enabled() helper, which returns +the pointer to vms->highmem_{redists, ecam, mmio}. The pointer will +be used in the subsequent patches. + +No functional change intended. + +Signed-off-by: Gavin Shan +Reviewed-by: Eric Auger +Reviewed-by: Cornelia Huck +Reviewed-by: Marc Zyngier +Tested-by: Zhenyu Zhang +Message-id: 20221029224307.138822-5-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit a5cb1350b19a5c2a58ab4edddf609ed429c13085) +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 32 +++++++++++++++++++------------- + 1 file changed, 19 insertions(+), 13 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index ddcf7ee2f8..6e3b9fc060 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1736,14 +1736,31 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) + return arm_cpu_mp_affinity(idx, clustersz); + } + ++static inline bool *virt_get_high_memmap_enabled(VirtMachineState *vms, ++ int index) ++{ ++ bool *enabled_array[] = { ++ &vms->highmem_redists, ++ &vms->highmem_ecam, ++ &vms->highmem_mmio, ++ }; ++ ++ assert(ARRAY_SIZE(extended_memmap) - VIRT_LOWMEMMAP_LAST == ++ ARRAY_SIZE(enabled_array)); ++ assert(index - VIRT_LOWMEMMAP_LAST < ARRAY_SIZE(enabled_array)); ++ ++ return enabled_array[index - VIRT_LOWMEMMAP_LAST]; ++} ++ + static void virt_set_high_memmap(VirtMachineState *vms, + hwaddr base, int pa_bits) + { + hwaddr region_base, region_size; +- bool fits; ++ bool *region_enabled, fits; + int i; + + for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { ++ region_enabled = virt_get_high_memmap_enabled(vms, i); + region_base = ROUND_UP(base, extended_memmap[i].size); + region_size = extended_memmap[i].size; + +@@ -1761,18 +1778,7 @@ static void virt_set_high_memmap(VirtMachineState *vms, + vms->highest_gpa = region_base + region_size - 1; + } + +- switch (i) { +- case VIRT_HIGH_GIC_REDIST2: +- vms->highmem_redists &= fits; +- break; +- case VIRT_HIGH_PCIE_ECAM: +- vms->highmem_ecam &= fits; +- break; +- case VIRT_HIGH_PCIE_MMIO: +- vms->highmem_mmio &= fits; +- break; +- } +- ++ *region_enabled &= fits; + base = region_base + region_size; + } + } +-- +2.31.1 + diff --git a/SOURCES/kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch b/SOURCES/kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch new file mode 100644 index 0000000..f55c06a --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch @@ -0,0 +1,130 @@ +From 5dff87c5ea60054709021025c9513ec259433ce2 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 21 Dec 2022 08:48:45 +0800 +Subject: [PATCH 1/8] hw/arm/virt: Introduce virt_set_high_memmap() helper + +RH-Author: Gavin Shan +RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment +RH-Bugzilla: 2113840 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/8] 5f6ba5af7a2c21d8473c58e088ee99b11336c673 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 + +This introduces virt_set_high_memmap() helper. The logic of high +memory region address assignment is moved to the helper. The intention +is to make the subsequent optimization for high memory region address +assignment easier. + +No functional change intended. + +Signed-off-by: Gavin Shan +Reviewed-by: Eric Auger +Reviewed-by: Cornelia Huck +Reviewed-by: Marc Zyngier +Tested-by: Zhenyu Zhang +Message-id: 20221029224307.138822-2-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit 4af6b6edece5ef273d29972d53547f823d2bc1c0) +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 74 ++++++++++++++++++++++++++++----------------------- + 1 file changed, 41 insertions(+), 33 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index bf18838b87..bea5f54720 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1736,6 +1736,46 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) + return arm_cpu_mp_affinity(idx, clustersz); + } + ++static void virt_set_high_memmap(VirtMachineState *vms, ++ hwaddr base, int pa_bits) ++{ ++ int i; ++ ++ for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { ++ hwaddr size = extended_memmap[i].size; ++ bool fits; ++ ++ base = ROUND_UP(base, size); ++ vms->memmap[i].base = base; ++ vms->memmap[i].size = size; ++ ++ /* ++ * Check each device to see if they fit in the PA space, ++ * moving highest_gpa as we go. ++ * ++ * For each device that doesn't fit, disable it. ++ */ ++ fits = (base + size) <= BIT_ULL(pa_bits); ++ if (fits) { ++ vms->highest_gpa = base + size - 1; ++ } ++ ++ switch (i) { ++ case VIRT_HIGH_GIC_REDIST2: ++ vms->highmem_redists &= fits; ++ break; ++ case VIRT_HIGH_PCIE_ECAM: ++ vms->highmem_ecam &= fits; ++ break; ++ case VIRT_HIGH_PCIE_MMIO: ++ vms->highmem_mmio &= fits; ++ break; ++ } ++ ++ base += size; ++ } ++} ++ + static void virt_set_memmap(VirtMachineState *vms, int pa_bits) + { + MachineState *ms = MACHINE(vms); +@@ -1791,39 +1831,7 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits) + /* We know for sure that at least the memory fits in the PA space */ + vms->highest_gpa = memtop - 1; + +- for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { +- hwaddr size = extended_memmap[i].size; +- bool fits; +- +- base = ROUND_UP(base, size); +- vms->memmap[i].base = base; +- vms->memmap[i].size = size; +- +- /* +- * Check each device to see if they fit in the PA space, +- * moving highest_gpa as we go. +- * +- * For each device that doesn't fit, disable it. +- */ +- fits = (base + size) <= BIT_ULL(pa_bits); +- if (fits) { +- vms->highest_gpa = base + size - 1; +- } +- +- switch (i) { +- case VIRT_HIGH_GIC_REDIST2: +- vms->highmem_redists &= fits; +- break; +- case VIRT_HIGH_PCIE_ECAM: +- vms->highmem_ecam &= fits; +- break; +- case VIRT_HIGH_PCIE_MMIO: +- vms->highmem_mmio &= fits; +- break; +- } +- +- base += size; +- } ++ virt_set_high_memmap(vms, base, pa_bits); + + if (device_memory_size > 0) { + ms->device_memory = g_malloc0(sizeof(*ms->device_memory)); +-- +2.31.1 + diff --git a/SOURCES/kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch b/SOURCES/kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch deleted file mode 100644 index 10af6c0..0000000 --- a/SOURCES/kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch +++ /dev/null @@ -1,76 +0,0 @@ -From 69f771c3dc641431f3e98497cbd3832edb69284f Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Tue, 3 May 2022 08:56:52 +0200 -Subject: [PATCH 1/5] hw/arm/virt: Remove the dtb-kaslr-seed machine option - -RH-Author: Eric Auger -RH-MergeRequest: 82: hw/arm/virt: Remove the dtb-kaslr-seed machine option -RH-Commit: [1/2] a89dcd7f22e04ae39de99795d3f34cdd0b831bc0 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2046029 -RH-Acked-by: Gavin Shan -RH-Acked-by: Andrew Jones -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2046029 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45133161 -Upstream Status: RHEL-only -Tested: Boot RHEL guest and check the option is not available - -In RHEL we do not want to expose the dtb-kaslr-seed virt machine -option. Indeed the default 'on' value matches our need as -random data in the DTB does not cause any boot failure and we -want to support KASLR for the guest. - -Signed-off-by: Eric Auger - ---- ---- - hw/arm/virt.c | 11 +++-------- - 1 file changed, 3 insertions(+), 8 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index e06862d22a..bde4f77994 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2350,6 +2350,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) - vms->its = value; - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static bool virt_get_dtb_kaslr_seed(Object *obj, Error **errp) - { - VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2363,6 +2364,7 @@ static void virt_set_dtb_kaslr_seed(Object *obj, bool value, Error **errp) - - vms->dtb_kaslr_seed = value; - } -+#endif /* disabled for RHEL */ - - static char *virt_get_oem_id(Object *obj, Error **errp) - { -@@ -3346,13 +3348,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - "Override the default value of field OEM Table ID " - "in ACPI table header." - "The string may be up to 8 bytes in size"); -- -- object_class_property_add_bool(oc, "dtb-kaslr-seed", -- virt_get_dtb_kaslr_seed, -- virt_set_dtb_kaslr_seed); -- object_class_property_set_description(oc, "dtb-kaslr-seed", -- "Set off to disable passing of kaslr-seed " -- "dtb node to guest"); - } - - static void rhel_virt_instance_init(Object *obj) -@@ -3397,7 +3392,7 @@ static void rhel_virt_instance_init(Object *obj) - /* MTE is disabled by default and non-configurable for RHEL */ - vms->mte = false; - -- /* Supply a kaslr-seed by default */ -+ /* Supply a kaslr-seed by default and non-configurable for RHEL */ - vms->dtb_kaslr_seed = true; - - vms->irqmap = a15irqmap; --- -2.31.1 - diff --git a/SOURCES/kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch b/SOURCES/kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch new file mode 100644 index 0000000..27bc6bb --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch @@ -0,0 +1,83 @@ +From bd5b7edbf8f4425f4b4e0d49a00cbdd48d9c6f48 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 21 Dec 2022 08:48:45 +0800 +Subject: [PATCH 2/8] hw/arm/virt: Rename variable size to region_size in + virt_set_high_memmap() + +RH-Author: Gavin Shan +RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment +RH-Bugzilla: 2113840 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/8] 1cadf1b00686cceb45821a58fdcb509bc5da335d + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 + +This renames variable 'size' to 'region_size' in virt_set_high_memmap(). +Its counterpart ('region_base') will be introduced in next patch. + +No functional change intended. + +Signed-off-by: Gavin Shan +Reviewed-by: Eric Auger +Reviewed-by: Cornelia Huck +Reviewed-by: Marc Zyngier +Tested-by: Zhenyu Zhang +Message-id: 20221029224307.138822-3-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit 370bea9d1c78796eec235ed6cb4310f489931a62) +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index bea5f54720..ca098d40b8 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1739,15 +1739,16 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) + static void virt_set_high_memmap(VirtMachineState *vms, + hwaddr base, int pa_bits) + { ++ hwaddr region_size; ++ bool fits; + int i; + + for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { +- hwaddr size = extended_memmap[i].size; +- bool fits; ++ region_size = extended_memmap[i].size; + +- base = ROUND_UP(base, size); ++ base = ROUND_UP(base, region_size); + vms->memmap[i].base = base; +- vms->memmap[i].size = size; ++ vms->memmap[i].size = region_size; + + /* + * Check each device to see if they fit in the PA space, +@@ -1755,9 +1756,9 @@ static void virt_set_high_memmap(VirtMachineState *vms, + * + * For each device that doesn't fit, disable it. + */ +- fits = (base + size) <= BIT_ULL(pa_bits); ++ fits = (base + region_size) <= BIT_ULL(pa_bits); + if (fits) { +- vms->highest_gpa = base + size - 1; ++ vms->highest_gpa = base + region_size - 1; + } + + switch (i) { +@@ -1772,7 +1773,7 @@ static void virt_set_high_memmap(VirtMachineState *vms, + break; + } + +- base += size; ++ base += region_size; + } + } + +-- +2.31.1 + diff --git a/SOURCES/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch b/SOURCES/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch deleted file mode 100644 index 1bdad27..0000000 --- a/SOURCES/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 6ee4a8718dcce2d6da43ee200534b75baf1d7bbe Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Thu, 18 Nov 2021 12:57:32 +0100 -Subject: [PATCH 16/17] hw/block/fdc: Prevent end-of-track overrun - (CVE-2021-3507) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -RH-MergeRequest: 107: hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507) -RH-Commit: [1/2] 9ffc5290348884d20b894fa79f4d0c8089247f8b (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1951522 -RH-Acked-by: Hanna Reitz -RH-Acked-by: Miroslav Rezanina - -Per the 82078 datasheet, if the end-of-track (EOT byte in -the FIFO) is more than the number of sectors per side, the -command is terminated unsuccessfully: - -* 5.2.5 DATA TRANSFER TERMINATION - - The 82078 supports terminal count explicitly through - the TC pin and implicitly through the underrun/over- - run and end-of-track (EOT) functions. For full sector - transfers, the EOT parameter can define the last - sector to be transferred in a single or multisector - transfer. If the last sector to be transferred is a par- - tial sector, the host can stop transferring the data in - mid-sector, and the 82078 will continue to complete - the sector as if a hardware TC was received. The - only difference between these implicit functions and - TC is that they return "abnormal termination" result - status. Such status indications can be ignored if they - were expected. - -* 6.1.3 READ TRACK - - This command terminates when the EOT specified - number of sectors have been read. If the 82078 - does not find an I D Address Mark on the diskette - after the second· occurrence of a pulse on the - INDX# pin, then it sets the IC code in Status Regis- - ter 0 to "01" (Abnormal termination), sets the MA bit - in Status Register 1 to "1", and terminates the com- - mand. - -* 6.1.6 VERIFY - - Refer to Table 6-6 and Table 6-7 for information - concerning the values of MT and EC versus SC and - EOT value. - -* Table 6·6. Result Phase Table - -* Table 6-7. Verify Command Result Phase Table - -Fix by aborting the transfer when EOT > # Sectors Per Side. - -Cc: qemu-stable@nongnu.org -Cc: Hervé Poussineau -Fixes: baca51faff0 ("floppy driver: disk geometry auto detect") -Reported-by: Alexander Bulekov -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/339 -Signed-off-by: Philippe Mathieu-Daudé -Message-Id: <20211118115733.4038610-2-philmd@redhat.com> -Reviewed-by: Hanna Reitz -Signed-off-by: Kevin Wolf -(cherry picked from commit defac5e2fbddf8423a354ff0454283a2115e1367) -Signed-off-by: Jon Maloy ---- - hw/block/fdc.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index ca1776121f..6481ec0cfb 100644 ---- a/hw/block/fdc.c -+++ b/hw/block/fdc.c -@@ -1532,6 +1532,14 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction) - int tmp; - fdctrl->data_len = 128 << (fdctrl->fifo[5] > 7 ? 7 : fdctrl->fifo[5]); - tmp = (fdctrl->fifo[6] - ks + 1); -+ if (tmp < 0) { -+ FLOPPY_DPRINTF("invalid EOT: %d\n", tmp); -+ fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_MA, 0x00); -+ fdctrl->fifo[3] = kt; -+ fdctrl->fifo[4] = kh; -+ fdctrl->fifo[5] = ks; -+ return; -+ } - if (fdctrl->fifo[0] & 0x80) - tmp += fdctrl->fifo[6]; - fdctrl->data_len *= tmp; --- -2.31.1 - diff --git a/SOURCES/kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch b/SOURCES/kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch new file mode 100644 index 0000000..b452281 --- /dev/null +++ b/SOURCES/kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch @@ -0,0 +1,59 @@ +From 8b0c5c6d356fd6cce9092727e20097b70e07bba9 Mon Sep 17 00:00:00 2001 +From: Julia Suvorova +Date: Thu, 23 Feb 2023 13:57:47 +0100 +Subject: [PATCH] hw/smbios: fix field corruption in type 4 table + +RH-Author: Julia Suvorova +RH-MergeRequest: 156: hw/smbios: fix field corruption in type 4 table +RH-Bugzilla: 2169904 +RH-Acked-by: Igor Mammedov +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [1/1] ee6d9bb6dfa0fb2625915947072cb91a0926c4ec + +Since table type 4 of SMBIOS version 2.6 is shorter than 3.0, the +strings which follow immediately after the struct fields have been +overwritten by unconditional filling of later fields such as core_count2. +Make these fields dependent on the SMBIOS version. + +Fixes: 05e27d74c7 ("hw/smbios: add core_count2 to smbios table type 4") +Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2169904 + +Signed-off-by: Julia Suvorova +Message-Id: <20230223125747.254914-1-jusual@redhat.com> +Reviewed-by: Igor Mammedov +Reviewed-by: Ani Sinha +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 60d09b8dc7dd4256d664ad680795cb1327805b2b) +--- + hw/smbios/smbios.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index c5ad69237e..2d2ece3edb 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -752,14 +752,16 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance) + t->core_count = (ms->smp.cores > 255) ? 0xFF : ms->smp.cores; + t->core_enabled = t->core_count; + +- t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores); +- + t->thread_count = (ms->smp.threads > 255) ? 0xFF : ms->smp.threads; +- t->thread_count2 = cpu_to_le16(ms->smp.threads); + + t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */ + t->processor_family2 = cpu_to_le16(0x01); /* Other */ + ++ if (tbl_len == SMBIOS_TYPE_4_LEN_V30) { ++ t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores); ++ t->thread_count2 = cpu_to_le16(ms->smp.threads); ++ } ++ + SMBIOS_BUILD_TABLE_POST; + smbios_type4_count++; + } +-- +2.31.1 + diff --git a/SOURCES/kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch b/SOURCES/kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch deleted file mode 100644 index 44897ac..0000000 --- a/SOURCES/kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch +++ /dev/null @@ -1,95 +0,0 @@ -From 4dad0e9abbc843fba4e5fee6e7aa1b0db13f5898 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:27:35 +0200 -Subject: [PATCH 03/32] hw/virtio: Replace g_memdup() by g_memdup2() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [3/27] ae196903eb1a7aebbf999100e997cf82e5024cb6 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit d792199de55ca5cb5334016884039c740290b5c7 -Author: Philippe Mathieu-Daudé -Date: Thu May 12 19:57:46 2022 +0200 - - hw/virtio: Replace g_memdup() by g_memdup2() - - Per https://discourse.gnome.org/t/port-your-module-from-g-memdup-to-g-memdup2-now/5538 - - The old API took the size of the memory to duplicate as a guint, - whereas most memory functions take memory sizes as a gsize. This - made it easy to accidentally pass a gsize to g_memdup(). For large - values, that would lead to a silent truncation of the size from 64 - to 32 bits, and result in a heap area being returned which is - significantly smaller than what the caller expects. This can likely - be exploited in various modules to cause a heap buffer overflow. - - Replace g_memdup() by the safer g_memdup2() wrapper. - - Acked-by: Jason Wang - Acked-by: Eugenio Pérez - Signed-off-by: Philippe Mathieu-Daudé - Message-Id: <20220512175747.142058-6-eperezma@redhat.com> - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Michael S. Tsirkin - -Signed-off-by: Eugenio Pérez ---- - hw/net/virtio-net.c | 3 ++- - hw/virtio/virtio-crypto.c | 6 +++--- - 2 files changed, 5 insertions(+), 4 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 099e65036d..633de61513 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -1458,7 +1458,8 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) - } - - iov_cnt = elem->out_num; -- iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num); -+ iov2 = iov = g_memdup2(elem->out_sg, -+ sizeof(struct iovec) * elem->out_num); - s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); - iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); - if (s != sizeof(ctrl)) { -diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c -index dcd80b904d..0e31e3cc04 100644 ---- a/hw/virtio/virtio-crypto.c -+++ b/hw/virtio/virtio-crypto.c -@@ -242,7 +242,7 @@ static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) - } - - out_num = elem->out_num; -- out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num); -+ out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num); - out_iov = out_iov_copy; - - in_num = elem->in_num; -@@ -605,11 +605,11 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request) - } - - out_num = elem->out_num; -- out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num); -+ out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num); - out_iov = out_iov_copy; - - in_num = elem->in_num; -- in_iov_copy = g_memdup(elem->in_sg, sizeof(in_iov[0]) * in_num); -+ in_iov_copy = g_memdup2(elem->in_sg, sizeof(in_iov[0]) * in_num); - in_iov = in_iov_copy; - - if (unlikely(iov_to_buf(out_iov, out_num, 0, &req, sizeof(req)) --- -2.31.1 - diff --git a/SOURCES/kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch b/SOURCES/kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch deleted file mode 100644 index a1d4496..0000000 --- a/SOURCES/kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 8452a7925e18d6d57e2ac787b192097d4136b104 Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Thu, 18 Aug 2022 17:01:13 +0200 -Subject: [PATCH 2/2] i386: do kvm_put_msr_feature_control() first thing when - vCPU is reset - -RH-Author: Vitaly Kuznetsov -RH-MergeRequest: 217: i386: fix 'system_reset' when the VM is in VMX root operation -RH-Bugzilla: 2117546 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Peter Xu -RH-Commit: [2/2] 08d5992691ba70561ce0a5b7f4504618f96a2ee6 - -kvm_put_sregs2() fails to reset 'locked' CR4/CR0 bits upon vCPU reset when -it is in VMX root operation. Do kvm_put_msr_feature_control() before -kvm_put_sregs2() to (possibly) kick vCPU out of VMX root operation. It also -seems logical to do kvm_put_msr_feature_control() before -kvm_put_nested_state() and not after it, especially when 'real' nested -state is set. - -Signed-off-by: Vitaly Kuznetsov -Message-Id: <20220818150113.479917-3-vkuznets@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 45ed68a1a3a19754ade954d75a3c9d13ff560e5c) -Signed-off-by: Vitaly Kuznetsov ---- - target/i386/kvm/kvm.c | 17 ++++++++++++----- - 1 file changed, 12 insertions(+), 5 deletions(-) - -diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c -index 9feb98fe0b..ef70e2c85f 100644 ---- a/target/i386/kvm/kvm.c -+++ b/target/i386/kvm/kvm.c -@@ -4356,6 +4356,18 @@ int kvm_arch_put_registers(CPUState *cpu, int level) - - assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); - -+ /* -+ * Put MSR_IA32_FEATURE_CONTROL first, this ensures the VM gets out of VMX -+ * root operation upon vCPU reset. kvm_put_msr_feature_control() should also -+ * preceed kvm_put_nested_state() when 'real' nested state is set. -+ */ -+ if (level >= KVM_PUT_RESET_STATE) { -+ ret = kvm_put_msr_feature_control(x86_cpu); -+ if (ret < 0) { -+ return ret; -+ } -+ } -+ - /* must be before kvm_put_nested_state so that EFER.SVME is set */ - ret = has_sregs2 ? kvm_put_sregs2(x86_cpu) : kvm_put_sregs(x86_cpu); - if (ret < 0) { -@@ -4367,11 +4379,6 @@ int kvm_arch_put_registers(CPUState *cpu, int level) - if (ret < 0) { - return ret; - } -- -- ret = kvm_put_msr_feature_control(x86_cpu); -- if (ret < 0) { -- return ret; -- } - } - - if (level == KVM_PUT_FULL_STATE) { --- -2.31.1 - diff --git a/SOURCES/kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch b/SOURCES/kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch deleted file mode 100644 index 7fe9b99..0000000 --- a/SOURCES/kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch +++ /dev/null @@ -1,95 +0,0 @@ -From b84bb71165c97b475548edc1c07decccca53cf16 Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Thu, 18 Aug 2022 17:01:12 +0200 -Subject: [PATCH 1/2] i386: reset KVM nested state upon CPU reset - -RH-Author: Vitaly Kuznetsov -RH-MergeRequest: 217: i386: fix 'system_reset' when the VM is in VMX root operation -RH-Bugzilla: 2117546 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Peter Xu -RH-Commit: [1/2] b329f053a027761f50187e4ca7fd6b50ac32d2ad - -Make sure env->nested_state is cleaned up when a vCPU is reset, it may -be stale after an incoming migration, kvm_arch_put_registers() may -end up failing or putting vCPU in a weird state. - -Reviewed-by: Maxim Levitsky -Signed-off-by: Vitaly Kuznetsov -Message-Id: <20220818150113.479917-2-vkuznets@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 3cafdb67504a34a0305260f0c86a73d5a3fb000b) -Signed-off-by: Vitaly Kuznetsov ---- - target/i386/kvm/kvm.c | 37 +++++++++++++++++++++++++++---------- - 1 file changed, 27 insertions(+), 10 deletions(-) - -diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c -index 6d1e009443..9feb98fe0b 100644 ---- a/target/i386/kvm/kvm.c -+++ b/target/i386/kvm/kvm.c -@@ -1617,6 +1617,30 @@ static void kvm_init_xsave(CPUX86State *env) - env->xsave_buf_len); - } - -+static void kvm_init_nested_state(CPUX86State *env) -+{ -+ struct kvm_vmx_nested_state_hdr *vmx_hdr; -+ uint32_t size; -+ -+ if (!env->nested_state) { -+ return; -+ } -+ -+ size = env->nested_state->size; -+ -+ memset(env->nested_state, 0, size); -+ env->nested_state->size = size; -+ -+ if (cpu_has_vmx(env)) { -+ env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX; -+ vmx_hdr = &env->nested_state->hdr.vmx; -+ vmx_hdr->vmxon_pa = -1ull; -+ vmx_hdr->vmcs12_pa = -1ull; -+ } else if (cpu_has_svm(env)) { -+ env->nested_state->format = KVM_STATE_NESTED_FORMAT_SVM; -+ } -+} -+ - int kvm_arch_init_vcpu(CPUState *cs) - { - struct { -@@ -2044,19 +2068,10 @@ int kvm_arch_init_vcpu(CPUState *cs) - assert(max_nested_state_len >= offsetof(struct kvm_nested_state, data)); - - if (cpu_has_vmx(env) || cpu_has_svm(env)) { -- struct kvm_vmx_nested_state_hdr *vmx_hdr; -- - env->nested_state = g_malloc0(max_nested_state_len); - env->nested_state->size = max_nested_state_len; - -- if (cpu_has_vmx(env)) { -- env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX; -- vmx_hdr = &env->nested_state->hdr.vmx; -- vmx_hdr->vmxon_pa = -1ull; -- vmx_hdr->vmcs12_pa = -1ull; -- } else { -- env->nested_state->format = KVM_STATE_NESTED_FORMAT_SVM; -- } -+ kvm_init_nested_state(env); - } - } - -@@ -2121,6 +2136,8 @@ void kvm_arch_reset_vcpu(X86CPU *cpu) - /* enabled by default */ - env->poll_control_msr = 1; - -+ kvm_init_nested_state(env); -+ - sev_es_set_reset_vector(CPU(cpu)); - } - --- -2.31.1 - diff --git a/SOURCES/kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch b/SOURCES/kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch new file mode 100644 index 0000000..0f321e4 --- /dev/null +++ b/SOURCES/kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch @@ -0,0 +1,64 @@ +From cadcc1c6a001622d971c86d44925516905e3d104 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Thu, 23 Feb 2023 14:59:21 +0800 +Subject: [PATCH 8/8] intel-iommu: fail DEVIOTLB_UNMAP without dt mode + +RH-Author: Laurent Vivier +RH-MergeRequest: 157: intel-iommu: fail DEVIOTLB_UNMAP without dt mode +RH-Bugzilla: 2156876 +RH-Acked-by: Eric Auger +RH-Acked-by: Peter Xu +RH-Acked-by: MST +RH-Commit: [1/1] eb9dbae6140ef4ba10d90b9e66abd75540f6892d (lvivier/qemu-kvm-centos) + +Without dt mode, device IOTLB notifier won't work since guest won't +send device IOTLB invalidation descriptor in this case. Let's fail +early instead of misbehaving silently. + +Reviewed-by: Laurent Vivier +Tested-by: Laurent Vivier +Tested-by: Viktor Prutyanov +Buglink: https://bugzilla.redhat.com/2156876 +Signed-off-by: Jason Wang +Message-Id: <20230223065924.42503-3-jasowang@redhat.com> +Reviewed-by: Peter Xu +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 09adb0e021207b60a0c51a68939b4539d98d3ef3) + +Conflict in hw/i386/intel_iommu.c because of missing commit: + + 4ce27463ccce ("intel-iommu: fail MAP notifier without caching mode") +--- + hw/i386/intel_iommu.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c +index a08ee85edf..d2983f40d3 100644 +--- a/hw/i386/intel_iommu.c ++++ b/hw/i386/intel_iommu.c +@@ -3179,6 +3179,7 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, + { + VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); + IntelIOMMUState *s = vtd_as->iommu_state; ++ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); + + /* TODO: add support for VFIO and vhost users */ + if (s->snoop_control) { +@@ -3186,6 +3187,13 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, + "Snoop Control with vhost or VFIO is not supported"); + return -ENOTSUP; + } ++ if (!x86_iommu->dt_supported && (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP)) { ++ error_setg_errno(errp, ENOTSUP, ++ "device %02x.%02x.%x requires device IOTLB mode", ++ pci_bus_num(vtd_as->bus), PCI_SLOT(vtd_as->devfn), ++ PCI_FUNC(vtd_as->devfn)); ++ return -ENOTSUP; ++ } + + /* Update per-address-space notifier flags */ + vtd_as->notifier_flags = new; +-- +2.39.1 + diff --git a/SOURCES/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch b/SOURCES/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch new file mode 100644 index 0000000..22abf35 --- /dev/null +++ b/SOURCES/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch @@ -0,0 +1,386 @@ +From 3a29b50036b972caae5bca0e5dfc34d910b1d5e9 Mon Sep 17 00:00:00 2001 +From: "manish.mishra" +Date: Tue, 20 Dec 2022 18:44:17 +0000 +Subject: [PATCH 6/8] io: Add support for MSG_PEEK for socket channel +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 150: migration: Fix multifd crash on channel disorders +RH-Bugzilla: 2169732 +RH-Acked-by: quintela1 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Dr. David Alan Gilbert +RH-Commit: [1/2] 266563f3e387e97ec710d9bc179e5de26dfd09f1 (peterx/qemu-kvm) + +MSG_PEEK peeks at the channel, The data is treated as unread and +the next read shall still return this data. This support is +currently added only for socket class. Extra parameter 'flags' +is added to io_readv calls to pass extra read flags like MSG_PEEK. + +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrange +Reviewed-by: Juan Quintela +Suggested-by: Daniel P. Berrange +Signed-off-by: manish.mishra +Signed-off-by: Juan Quintela +(cherry picked from commit 84615a19ddf2bfb38d7b3a0d487d2397ee55e4f3) +Signed-off-by: Peter Xu +--- + chardev/char-socket.c | 4 ++-- + include/io/channel.h | 6 ++++++ + io/channel-buffer.c | 1 + + io/channel-command.c | 1 + + io/channel-file.c | 1 + + io/channel-null.c | 1 + + io/channel-socket.c | 19 ++++++++++++++++++- + io/channel-tls.c | 1 + + io/channel-websock.c | 1 + + io/channel.c | 16 ++++++++++++---- + migration/channel-block.c | 1 + + migration/rdma.c | 1 + + scsi/qemu-pr-helper.c | 2 +- + tests/qtest/tpm-emu.c | 2 +- + tests/unit/test-io-channel-socket.c | 1 + + util/vhost-user-server.c | 2 +- + 16 files changed, 50 insertions(+), 10 deletions(-) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index 879564aa8a..5afce9a464 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -283,11 +283,11 @@ static ssize_t tcp_chr_recv(Chardev *chr, char *buf, size_t len) + if (qio_channel_has_feature(s->ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { + ret = qio_channel_readv_full(s->ioc, &iov, 1, + &msgfds, &msgfds_num, +- NULL); ++ 0, NULL); + } else { + ret = qio_channel_readv_full(s->ioc, &iov, 1, + NULL, NULL, +- NULL); ++ 0, NULL); + } + + if (msgfds_num) { +diff --git a/include/io/channel.h b/include/io/channel.h +index c680ee7480..716235d496 100644 +--- a/include/io/channel.h ++++ b/include/io/channel.h +@@ -34,6 +34,8 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass, + + #define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1 + ++#define QIO_CHANNEL_READ_FLAG_MSG_PEEK 0x1 ++ + typedef enum QIOChannelFeature QIOChannelFeature; + + enum QIOChannelFeature { +@@ -41,6 +43,7 @@ enum QIOChannelFeature { + QIO_CHANNEL_FEATURE_SHUTDOWN, + QIO_CHANNEL_FEATURE_LISTEN, + QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY, ++ QIO_CHANNEL_FEATURE_READ_MSG_PEEK, + }; + + +@@ -114,6 +117,7 @@ struct QIOChannelClass { + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp); + int (*io_close)(QIOChannel *ioc, + Error **errp); +@@ -188,6 +192,7 @@ void qio_channel_set_name(QIOChannel *ioc, + * @niov: the length of the @iov array + * @fds: pointer to an array that will received file handles + * @nfds: pointer filled with number of elements in @fds on return ++ * @flags: read flags (QIO_CHANNEL_READ_FLAG_*) + * @errp: pointer to a NULL-initialized error object + * + * Read data from the IO channel, storing it in the +@@ -224,6 +229,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp); + + +diff --git a/io/channel-buffer.c b/io/channel-buffer.c +index bf52011be2..8096180f85 100644 +--- a/io/channel-buffer.c ++++ b/io/channel-buffer.c +@@ -54,6 +54,7 @@ static ssize_t qio_channel_buffer_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); +diff --git a/io/channel-command.c b/io/channel-command.c +index 74516252ba..e7edd091af 100644 +--- a/io/channel-command.c ++++ b/io/channel-command.c +@@ -203,6 +203,7 @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); +diff --git a/io/channel-file.c b/io/channel-file.c +index b67687c2aa..d76663e6ae 100644 +--- a/io/channel-file.c ++++ b/io/channel-file.c +@@ -86,6 +86,7 @@ static ssize_t qio_channel_file_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); +diff --git a/io/channel-null.c b/io/channel-null.c +index 75e3781507..4fafdb770d 100644 +--- a/io/channel-null.c ++++ b/io/channel-null.c +@@ -60,6 +60,7 @@ qio_channel_null_readv(QIOChannel *ioc, + size_t niov, + int **fds G_GNUC_UNUSED, + size_t *nfds G_GNUC_UNUSED, ++ int flags, + Error **errp) + { + QIOChannelNull *nioc = QIO_CHANNEL_NULL(ioc); +diff --git a/io/channel-socket.c b/io/channel-socket.c +index b76dca9cc1..7aca84f61a 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -173,6 +173,9 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc, + } + #endif + ++ qio_channel_set_feature(QIO_CHANNEL(ioc), ++ QIO_CHANNEL_FEATURE_READ_MSG_PEEK); ++ + return 0; + } + +@@ -406,6 +409,9 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, + } + #endif /* WIN32 */ + ++ qio_channel_set_feature(QIO_CHANNEL(cioc), ++ QIO_CHANNEL_FEATURE_READ_MSG_PEEK); ++ + trace_qio_channel_socket_accept_complete(ioc, cioc, cioc->fd); + return cioc; + +@@ -496,6 +502,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); +@@ -517,6 +524,10 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, + + } + ++ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) { ++ sflags |= MSG_PEEK; ++ } ++ + retry: + ret = recvmsg(sioc->fd, &msg, sflags); + if (ret < 0) { +@@ -624,11 +635,17 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); + ssize_t done = 0; + ssize_t i; ++ int sflags = 0; ++ ++ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) { ++ sflags |= MSG_PEEK; ++ } + + for (i = 0; i < niov; i++) { + ssize_t ret; +@@ -636,7 +653,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, + ret = recv(sioc->fd, + iov[i].iov_base, + iov[i].iov_len, +- 0); ++ sflags); + if (ret < 0) { + if (errno == EAGAIN) { + if (done) { +diff --git a/io/channel-tls.c b/io/channel-tls.c +index 4ce890a538..c730cb8ec5 100644 +--- a/io/channel-tls.c ++++ b/io/channel-tls.c +@@ -260,6 +260,7 @@ static ssize_t qio_channel_tls_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); +diff --git a/io/channel-websock.c b/io/channel-websock.c +index fb4932ade7..a12acc27cf 100644 +--- a/io/channel-websock.c ++++ b/io/channel-websock.c +@@ -1081,6 +1081,7 @@ static ssize_t qio_channel_websock_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); +diff --git a/io/channel.c b/io/channel.c +index 0640941ac5..a8c7f11649 100644 +--- a/io/channel.c ++++ b/io/channel.c +@@ -52,6 +52,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); +@@ -63,7 +64,14 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, + return -1; + } + +- return klass->io_readv(ioc, iov, niov, fds, nfds, errp); ++ if ((flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) && ++ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { ++ error_setg_errno(errp, EINVAL, ++ "Channel does not support peek read"); ++ return -1; ++ } ++ ++ return klass->io_readv(ioc, iov, niov, fds, nfds, flags, errp); + } + + +@@ -146,7 +154,7 @@ int qio_channel_readv_full_all_eof(QIOChannel *ioc, + while ((nlocal_iov > 0) || local_fds) { + ssize_t len; + len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds, +- local_nfds, errp); ++ local_nfds, 0, errp); + if (len == QIO_CHANNEL_ERR_BLOCK) { + if (qemu_in_coroutine()) { + qio_channel_yield(ioc, G_IO_IN); +@@ -284,7 +292,7 @@ ssize_t qio_channel_readv(QIOChannel *ioc, + size_t niov, + Error **errp) + { +- return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, errp); ++ return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, 0, errp); + } + + +@@ -303,7 +311,7 @@ ssize_t qio_channel_read(QIOChannel *ioc, + Error **errp) + { + struct iovec iov = { .iov_base = buf, .iov_len = buflen }; +- return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, errp); ++ return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, 0, errp); + } + + +diff --git a/migration/channel-block.c b/migration/channel-block.c +index f4ab53acdb..b7374363c3 100644 +--- a/migration/channel-block.c ++++ b/migration/channel-block.c +@@ -53,6 +53,7 @@ qio_channel_block_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelBlock *bioc = QIO_CHANNEL_BLOCK(ioc); +diff --git a/migration/rdma.c b/migration/rdma.c +index 94a55dd95b..d8b4632094 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -2854,6 +2854,7 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); +diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c +index 196b78c00d..199227a556 100644 +--- a/scsi/qemu-pr-helper.c ++++ b/scsi/qemu-pr-helper.c +@@ -614,7 +614,7 @@ static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz, + iov.iov_base = buf; + iov.iov_len = sz; + n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1, +- &fds, &nfds, errp); ++ &fds, &nfds, 0, errp); + + if (n_read == QIO_CHANNEL_ERR_BLOCK) { + qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN); +diff --git a/tests/qtest/tpm-emu.c b/tests/qtest/tpm-emu.c +index 2994d1cf42..3cf1acaf7d 100644 +--- a/tests/qtest/tpm-emu.c ++++ b/tests/qtest/tpm-emu.c +@@ -106,7 +106,7 @@ void *tpm_emu_ctrl_thread(void *data) + int *pfd = NULL; + size_t nfd = 0; + +- qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, &error_abort); ++ qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, 0, &error_abort); + cmd = be32_to_cpu(cmd); + g_assert_cmpint(cmd, ==, CMD_SET_DATAFD); + g_assert_cmpint(nfd, ==, 1); +diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c +index b36a5d972a..b964bb202d 100644 +--- a/tests/unit/test-io-channel-socket.c ++++ b/tests/unit/test-io-channel-socket.c +@@ -460,6 +460,7 @@ static void test_io_channel_unix_fd_pass(void) + G_N_ELEMENTS(iorecv), + &fdrecv, + &nfdrecv, ++ 0, + &error_abort); + + g_assert(nfdrecv == G_N_ELEMENTS(fdsend)); +diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c +index 232984ace6..145eb17c08 100644 +--- a/util/vhost-user-server.c ++++ b/util/vhost-user-server.c +@@ -116,7 +116,7 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg) + * qio_channel_readv_full may have short reads, keeping calling it + * until getting VHOST_USER_HDR_SIZE or 0 bytes in total + */ +- rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err); ++ rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, 0, &local_err); + if (rc < 0) { + if (rc == QIO_CHANNEL_ERR_BLOCK) { + assert(local_err == NULL); +-- +2.31.1 + diff --git a/SOURCES/kvm-iotests-106-214-308-Read-only-one-size-line.patch b/SOURCES/kvm-iotests-106-214-308-Read-only-one-size-line.patch new file mode 100644 index 0000000..399acfc --- /dev/null +++ b/SOURCES/kvm-iotests-106-214-308-Read-only-one-size-line.patch @@ -0,0 +1,99 @@ +From 6727e92a97f8ee9f367a41111bef3f5cad4a479a Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:27:02 +0200 +Subject: [PATCH 15/20] iotests/106, 214, 308: Read only one size line + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [10/12] 1554e0a92b92ed101a251478ccae43f45f6e071e (hreitz/qemu-kvm-c-9-s) + +These tests read size information (sometimes disk size, sometimes +virtual size) from qemu-img info's output. Once qemu-img starts +printing info about child nodes, we are going to see multiple instances +of that per image, but these tests are only interested in the first one, +so use "head -n 1" to get it. + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-11-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 74163adda3101b127943f7cbbf8fcccd2d472426) +Signed-off-by: Hanna Czenczek +--- + tests/qemu-iotests/106 | 4 ++-- + tests/qemu-iotests/214 | 6 ++++-- + tests/qemu-iotests/308 | 4 ++-- + 3 files changed, 8 insertions(+), 6 deletions(-) + +diff --git a/tests/qemu-iotests/106 b/tests/qemu-iotests/106 +index 9d6adb542d..ae0fc46691 100755 +--- a/tests/qemu-iotests/106 ++++ b/tests/qemu-iotests/106 +@@ -66,7 +66,7 @@ for create_mode in off falloc full; do + expected_size=$((expected_size + $GROWTH_SIZE)) + fi + +- actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size') ++ actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size' | head -n 1) + actual_size=$(echo "$actual_size" | sed -e 's/^[^0-9]*\([0-9]\+\).*$/\1/') + + # The actual size may exceed the expected size, depending on the file +@@ -105,7 +105,7 @@ for growth_mode in falloc full; do + _make_test_img -o "extent_size_hint=0" 2G + $QEMU_IMG resize -f "$IMGFMT" --preallocation=$growth_mode "$TEST_IMG" +${GROWTH_SIZE}K + +- actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size') ++ actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size' | head -n 1) + actual_size=$(echo "$actual_size" | sed -e 's/^[^0-9]*\([0-9]\+\).*$/\1/') + + if [ $actual_size -lt $GROWTH_SIZE ]; then +diff --git a/tests/qemu-iotests/214 b/tests/qemu-iotests/214 +index c66e246ba2..55ffcd7f44 100755 +--- a/tests/qemu-iotests/214 ++++ b/tests/qemu-iotests/214 +@@ -102,7 +102,8 @@ let data_size="8 * $cluster_size" + $QEMU_IO -c "write -P 0xaa 0 $data_size" "$TEST_IMG" \ + 2>&1 | _filter_qemu_io | _filter_testdir + sizeA=$($QEMU_IMG info --output=json "$TEST_IMG" | +- sed -n '/"actual-size":/ s/[^0-9]//gp') ++ sed -n '/"actual-size":/ s/[^0-9]//gp' | ++ head -n 1) + + _make_test_img 2M -o cluster_size=$cluster_size + echo "Write compressed data:" +@@ -124,7 +125,8 @@ $QEMU_IO -c "write -P 0xcc $offset $data_size" "json:{\ + _filter_qemu_io | _filter_testdir + + sizeB=$($QEMU_IMG info --output=json "$TEST_IMG" | +- sed -n '/"actual-size":/ s/[^0-9]//gp') ++ sed -n '/"actual-size":/ s/[^0-9]//gp' | ++ head -n 1) + + if [ $sizeA -lt $sizeB ] + then +diff --git a/tests/qemu-iotests/308 b/tests/qemu-iotests/308 +index bde4aac2fa..09275e9a10 100755 +--- a/tests/qemu-iotests/308 ++++ b/tests/qemu-iotests/308 +@@ -217,12 +217,12 @@ echo + echo '=== Remove export ===' + + # Double-check that $EXT_MP appears as a non-empty file (the raw image) +-$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' ++$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' | head -n 1 + + fuse_export_del 'export-mp' + + # See that the file appears empty again +-$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' ++$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' | head -n 1 + + echo + echo '=== Writable export ===' +-- +2.31.1 + diff --git a/SOURCES/kvm-iotests-108-Fix-when-missing-user_allow_other.patch b/SOURCES/kvm-iotests-108-Fix-when-missing-user_allow_other.patch deleted file mode 100644 index a37ea6f..0000000 --- a/SOURCES/kvm-iotests-108-Fix-when-missing-user_allow_other.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 447bca651c9156d7aba6b7495c75f19b5e4ed53f Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Thu, 21 Apr 2022 16:24:35 +0200 -Subject: [PATCH 07/16] iotests/108: Fix when missing user_allow_other - -RH-Author: Hanna Reitz -RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding -RH-Commit: [4/4] a51ab8606fc9d8dea2b6539f4e795d5813892a5c (hreitz/qemu-kvm-c-9-s) -RH-Bugzilla: 2072379 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi - -FUSE exports' allow-other option defaults to "auto", which means that it -will try passing allow_other as a mount option, and fall back to not -using it when an error occurs. We make no effort to hide fusermount's -error message (because it would be difficult, and because users might -want to know about the fallback occurring), and so when allow_other does -not work (primarily when /etc/fuse.conf does not contain -user_allow_other), this error message will appear and break the -reference output. - -We do not need allow_other here, though, so we can just pass -allow-other=off to fix that. - -Reported-by: Markus Armbruster -Signed-off-by: Hanna Reitz -Message-Id: <20220421142435.569600-1-hreitz@redhat.com> -Tested-by: Markus Armbruster -Tested-by: Eric Blake -(cherry picked from commit 348a0740afc5b313599533eb69bbb2b95d2f1bba) -Signed-off-by: Hanna Reitz ---- - tests/qemu-iotests/108 | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108 -index a3090e2875..4681c7c769 100755 ---- a/tests/qemu-iotests/108 -+++ b/tests/qemu-iotests/108 -@@ -326,7 +326,7 @@ else - - $QSD \ - --blockdev file,node-name=export-node,filename="$TEST_IMG" \ -- --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \ -+ --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off,allow-other=off \ - --pidfile "$TEST_DIR/qsd.pid" \ - & - --- -2.31.1 - diff --git a/SOURCES/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch b/SOURCES/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch deleted file mode 100644 index 7a968f6..0000000 --- a/SOURCES/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch +++ /dev/null @@ -1,445 +0,0 @@ -From ed69e01352b5e9a06173daab53bfa373c8535732 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Tue, 5 Apr 2022 15:46:51 +0200 -Subject: [PATCH 05/16] iotests/108: Test new refcount rebuild algorithm - -RH-Author: Hanna Reitz -RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding -RH-Commit: [2/4] b68310a9fee8465dd3f568c8e867e1b7ae52bdaf (hreitz/qemu-kvm-c-9-s) -RH-Bugzilla: 2072379 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi - -One clear problem with how qcow2's refcount structure rebuild algorithm -used to be before "qcow2: Improve refcount structure rebuilding" was -that it is prone to failure for qcow2 images on block devices: There is -generally unused space after the actual image, and if that exceeds what -one refblock covers, the old algorithm would invariably write the -reftable past the block device's end, which cannot work. The new -algorithm does not have this problem. - -Test it with three tests: -(1) Create an image with more empty space at the end than what one - refblock covers, see whether rebuilding the refcount structures - results in a change in the image file length. (It should not.) - -(2) Leave precisely enough space somewhere at the beginning of the image - for the new reftable (and the refblock for that place), see whether - the new algorithm puts the reftable there. (It should.) - -(3) Test the original problem: Create (something like) a block device - with a fixed size, then create a qcow2 image in there, write some - data, and then have qemu-img check rebuild the refcount structures. - Before HEAD^, the reftable would have been written past the image - file end, i.e. outside of what the block device provides, which - cannot work. HEAD^ should have fixed that. - ("Something like a block device" means a loop device if we can use - one ("sudo -n losetup" works), or a FUSE block export with - growable=false otherwise.) - -Reviewed-by: Eric Blake -Signed-off-by: Hanna Reitz -Message-Id: <20220405134652.19278-3-hreitz@redhat.com> -(cherry picked from commit 9ffd6d646d1d5ee9087a8cbf0b7d2f96c5656162) - -Conflicts: -- 108: The downstream qemu-storage-daemon does not support --daemonize, - so this switch has been replaced by a loop waiting for the PID file to - appear - -Signed-off-by: Hanna Reitz ---- - tests/qemu-iotests/108 | 263 ++++++++++++++++++++++++++++++++++++- - tests/qemu-iotests/108.out | 81 ++++++++++++ - 2 files changed, 343 insertions(+), 1 deletion(-) - -diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108 -index 56339ab2c5..a3090e2875 100755 ---- a/tests/qemu-iotests/108 -+++ b/tests/qemu-iotests/108 -@@ -30,13 +30,20 @@ status=1 # failure is the default! - - _cleanup() - { -- _cleanup_test_img -+ _cleanup_test_img -+ if [ -f "$TEST_DIR/qsd.pid" ]; then -+ qsd_pid=$(cat "$TEST_DIR/qsd.pid") -+ kill -KILL "$qsd_pid" -+ fusermount -u "$TEST_DIR/fuse-export" &>/dev/null -+ fi -+ rm -f "$TEST_DIR/fuse-export" - } - trap "_cleanup; exit \$status" 0 1 2 3 15 - - # get standard environment, filters and checks - . ./common.rc - . ./common.filter -+. ./common.qemu - - # This tests qcow2-specific low-level functionality - _supported_fmt qcow2 -@@ -47,6 +54,22 @@ _supported_os Linux - # files - _unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' data_file - -+# This test either needs sudo -n losetup or FUSE exports to work -+if sudo -n losetup &>/dev/null; then -+ loopdev=true -+else -+ loopdev=false -+ -+ # QSD --export fuse will either yield "Parameter 'id' is missing" -+ # or "Invalid parameter 'fuse'", depending on whether there is -+ # FUSE support or not. -+ error=$($QSD --export fuse 2>&1) -+ if [[ $error = *"'fuse'"* ]]; then -+ _notrun 'Passwordless sudo for losetup or FUSE support required, but' \ -+ 'neither is available' -+ fi -+fi -+ - echo - echo '=== Repairing an image without any refcount table ===' - echo -@@ -138,6 +161,244 @@ _make_test_img 64M - poke_file "$TEST_IMG" $((0x10008)) "\xff\xff\xff\xff\xff\xff\x00\x00" - _check_test_img -r all - -+echo -+echo '=== Check rebuilt reftable location ===' -+ -+# In an earlier version of the refcount rebuild algorithm, the -+# reftable was generally placed at the image end (unless something was -+# allocated in the area covered by the refblock right before the image -+# file end, then we would try to place the reftable in that refblock). -+# This was later changed so the reftable would be placed in the -+# earliest possible location. Test this. -+ -+echo -+echo '--- Does the image size increase? ---' -+echo -+ -+# First test: Just create some image, write some data to it, and -+# resize it so there is free space at the end of the image (enough -+# that it spans at least one full refblock, which for cluster_size=512 -+# images, spans 128k). With the old algorithm, the reftable would -+# have then been placed at the end of the image file, but with the new -+# one, it will be put in that free space. -+# We want to check whether the size of the image file increases due to -+# rebuilding the refcount structures (it should not). -+ -+_make_test_img -o 'cluster_size=512' 1M -+# Write something -+$QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io -+ -+# Add free space -+file_len=$(stat -c '%s' "$TEST_IMG") -+truncate -s $((file_len + 256 * 1024)) "$TEST_IMG" -+ -+# Corrupt the image by saying the image header was not allocated -+rt_offset=$(peek_file_be "$TEST_IMG" 48 8) -+rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8) -+poke_file "$TEST_IMG" $rb_offset "\x00\x00" -+ -+# Check whether rebuilding the refcount structures increases the image -+# file size -+file_len=$(stat -c '%s' "$TEST_IMG") -+echo -+# The only leaks there can be are the old refcount structures that are -+# leaked during rebuilding, no need to clutter the output with them -+_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0' -+echo -+post_repair_file_len=$(stat -c '%s' "$TEST_IMG") -+ -+if [[ $file_len -eq $post_repair_file_len ]]; then -+ echo 'OK: Image size did not change' -+else -+ echo 'ERROR: Image size differs' \ -+ "($file_len before, $post_repair_file_len after)" -+fi -+ -+echo -+echo '--- Will the reftable occupy a hole specifically left for it? ---' -+echo -+ -+# Note: With cluster_size=512, every refblock covers 128k. -+# The reftable covers 8M per reftable cluster. -+ -+# Create an image that requires two reftable clusters (just because -+# this is more interesting than a single-clustered reftable). -+_make_test_img -o 'cluster_size=512' 9M -+$QEMU_IO -c 'write 0 8M' "$TEST_IMG" | _filter_qemu_io -+ -+# Writing 8M will have resized the reftable. Unfortunately, doing so -+# will leave holes in the file, so we need to fill them up so we can -+# be sure the whole file is allocated. Do that by writing -+# consecutively smaller chunks starting from 8 MB, until the file -+# length increases even with a chunk size of 512. Then we must have -+# filled all holes. -+ofs=$((8 * 1024 * 1024)) -+block_len=$((16 * 1024)) -+while [[ $block_len -ge 512 ]]; do -+ file_len=$(stat -c '%s' "$TEST_IMG") -+ while [[ $(stat -c '%s' "$TEST_IMG") -eq $file_len ]]; do -+ # Do not include this in the reference output, it does not -+ # really matter which qemu-io calls we do here exactly -+ $QEMU_IO -c "write $ofs $block_len" "$TEST_IMG" >/dev/null -+ ofs=$((ofs + block_len)) -+ done -+ block_len=$((block_len / 2)) -+done -+ -+# Fill up to 9M (do not include this in the reference output either, -+# $ofs is random for all we know) -+$QEMU_IO -c "write $ofs $((9 * 1024 * 1024 - ofs))" "$TEST_IMG" >/dev/null -+ -+# Make space as follows: -+# - For the first refblock: Right at the beginning of the image (this -+# refblock is placed in the first place possible), -+# - For the reftable somewhere soon afterwards, still near the -+# beginning of the image (i.e. covered by the first refblock); the -+# reftable too is placed in the first place possible, but only after -+# all refblocks have been placed) -+# No space is needed for the other refblocks, because no refblock is -+# put before the space it covers. In this test case, we do not mind -+# if they are placed at the image file's end. -+ -+# Before we make that space, we have to find out the host offset of -+# the area that belonged to the two data clusters at guest offset 4k, -+# because we expect the reftable to be placed there, and we will have -+# to verify that it is. -+ -+l1_offset=$(peek_file_be "$TEST_IMG" 40 8) -+l2_offset=$(peek_file_be "$TEST_IMG" $l1_offset 8) -+l2_offset=$((l2_offset & 0x00fffffffffffe00)) -+data_4k_offset=$(peek_file_be "$TEST_IMG" \ -+ $((l2_offset + 4096 / 512 * 8)) 8) -+data_4k_offset=$((data_4k_offset & 0x00fffffffffffe00)) -+ -+$QEMU_IO -c "discard 0 512" -c "discard 4k 1k" "$TEST_IMG" | _filter_qemu_io -+ -+# Corrupt the image by saying the image header was not allocated -+rt_offset=$(peek_file_be "$TEST_IMG" 48 8) -+rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8) -+poke_file "$TEST_IMG" $rb_offset "\x00\x00" -+ -+echo -+# The only leaks there can be are the old refcount structures that are -+# leaked during rebuilding, no need to clutter the output with them -+_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0' -+echo -+ -+# Check whether the reftable was put where we expected -+rt_offset=$(peek_file_be "$TEST_IMG" 48 8) -+if [[ $rt_offset -eq $data_4k_offset ]]; then -+ echo 'OK: Reftable is where we expect it' -+else -+ echo "ERROR: Reftable is at $rt_offset, but was expected at $data_4k_offset" -+fi -+ -+echo -+echo '--- Rebuilding refcount structures on block devices ---' -+echo -+ -+# A block device cannot really grow, at least not during qemu-img -+# check. As mentioned in the above cases, rebuilding the refcount -+# structure may lead to new refcount structures being written after -+# the end of the image, and in the past that happened even if there -+# was more than sufficient space in the image. Such post-EOF writes -+# will not work on block devices, so test that the new algorithm -+# avoids it. -+ -+# If we have passwordless sudo and losetup, we can use those to create -+# a block device. Otherwise, we can resort to qemu's FUSE export to -+# create a file that isn't growable, which effectively tests the same -+# thing. -+ -+_cleanup_test_img -+truncate -s $((64 * 1024 * 1024)) "$TEST_IMG" -+ -+if $loopdev; then -+ export_mp=$(sudo -n losetup --show -f "$TEST_IMG") -+ export_mp_driver=host_device -+ sudo -n chmod go+rw "$export_mp" -+else -+ # Create non-growable FUSE export that is a bit like an empty -+ # block device -+ export_mp="$TEST_DIR/fuse-export" -+ export_mp_driver=file -+ touch "$export_mp" -+ -+ $QSD \ -+ --blockdev file,node-name=export-node,filename="$TEST_IMG" \ -+ --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \ -+ --pidfile "$TEST_DIR/qsd.pid" \ -+ & -+ -+ while [ ! -f "$TEST_DIR/qsd.pid" ]; do -+ sleep 0.1 -+ done -+fi -+ -+# Now create a qcow2 image on the device -- unfortunately, qemu-img -+# create force-creates the file, so we have to resort to the -+# blockdev-create job. -+_launch_qemu \ -+ --blockdev $export_mp_driver,node-name=file,filename="$export_mp" -+ -+_send_qemu_cmd \ -+ $QEMU_HANDLE \ -+ '{ "execute": "qmp_capabilities" }' \ -+ 'return' -+ -+# Small cluster size again, so the image needs multiple refblocks -+_send_qemu_cmd \ -+ $QEMU_HANDLE \ -+ '{ "execute": "blockdev-create", -+ "arguments": { -+ "job-id": "create", -+ "options": { -+ "driver": "qcow2", -+ "file": "file", -+ "size": '$((64 * 1024 * 1024))', -+ "cluster-size": 512 -+ } } }' \ -+ '"concluded"' -+ -+_send_qemu_cmd \ -+ $QEMU_HANDLE \ -+ '{ "execute": "job-dismiss", "arguments": { "id": "create" } }' \ -+ 'return' -+ -+_send_qemu_cmd \ -+ $QEMU_HANDLE \ -+ '{ "execute": "quit" }' \ -+ 'return' -+ -+wait=y _cleanup_qemu -+echo -+ -+# Write some data -+$QEMU_IO -c 'write 0 64k' "$export_mp" | _filter_qemu_io -+ -+# Corrupt the image by saying the image header was not allocated -+rt_offset=$(peek_file_be "$export_mp" 48 8) -+rb_offset=$(peek_file_be "$export_mp" $rt_offset 8) -+poke_file "$export_mp" $rb_offset "\x00\x00" -+ -+# Repairing such a simple case should just work -+# (We used to put the reftable at the end of the image file, which can -+# never work for non-growable devices.) -+echo -+TEST_IMG="$export_mp" _check_test_img -r all \ -+ | grep -v '^Repairing cluster.*refcount=1 reference=0' -+ -+if $loopdev; then -+ sudo -n losetup -d "$export_mp" -+else -+ qsd_pid=$(cat "$TEST_DIR/qsd.pid") -+ kill -TERM "$qsd_pid" -+ # Wait for process to exit (cannot `wait` because the QSD is daemonized) -+ while [ -f "$TEST_DIR/qsd.pid" ]; do -+ true -+ done -+fi -+ - # success, all done - echo '*** done' - rm -f $seq.full -diff --git a/tests/qemu-iotests/108.out b/tests/qemu-iotests/108.out -index 75bab8dc84..b5401d788d 100644 ---- a/tests/qemu-iotests/108.out -+++ b/tests/qemu-iotests/108.out -@@ -105,6 +105,87 @@ The following inconsistencies were found and repaired: - 0 leaked clusters - 1 corruptions - -+Double checking the fixed image now... -+No errors were found on the image. -+ -+=== Check rebuilt reftable location === -+ -+--- Does the image size increase? --- -+ -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 -+wrote 65536/65536 bytes at offset 0 -+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+ERROR cluster 0 refcount=0 reference=1 -+Rebuilding refcount structure -+The following inconsistencies were found and repaired: -+ -+ 0 leaked clusters -+ 1 corruptions -+ -+Double checking the fixed image now... -+No errors were found on the image. -+ -+OK: Image size did not change -+ -+--- Will the reftable occupy a hole specifically left for it? --- -+ -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=9437184 -+wrote 8388608/8388608 bytes at offset 0 -+8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+discard 512/512 bytes at offset 0 -+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+discard 1024/1024 bytes at offset 4096 -+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+ERROR cluster 0 refcount=0 reference=1 -+Rebuilding refcount structure -+The following inconsistencies were found and repaired: -+ -+ 0 leaked clusters -+ 1 corruptions -+ -+Double checking the fixed image now... -+No errors were found on the image. -+ -+OK: Reftable is where we expect it -+ -+--- Rebuilding refcount structures on block devices --- -+ -+{ "execute": "qmp_capabilities" } -+{"return": {}} -+{ "execute": "blockdev-create", -+ "arguments": { -+ "job-id": "create", -+ "options": { -+ "driver": "IMGFMT", -+ "file": "file", -+ "size": 67108864, -+ "cluster-size": 512 -+ } } } -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "create"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "create"}} -+{"return": {}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "create"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "create"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "create"}} -+{ "execute": "job-dismiss", "arguments": { "id": "create" } } -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "create"}} -+{"return": {}} -+{ "execute": "quit" } -+{"return": {}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+ -+wrote 65536/65536 bytes at offset 0 -+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+ERROR cluster 0 refcount=0 reference=1 -+Rebuilding refcount structure -+The following inconsistencies were found and repaired: -+ -+ 0 leaked clusters -+ 1 corruptions -+ - Double checking the fixed image now... - No errors were found on the image. - *** done --- -2.31.1 - diff --git a/SOURCES/kvm-iotests-Filter-child-node-information.patch b/SOURCES/kvm-iotests-Filter-child-node-information.patch new file mode 100644 index 0000000..12eee3a --- /dev/null +++ b/SOURCES/kvm-iotests-Filter-child-node-information.patch @@ -0,0 +1,171 @@ +From 3102e62f80757729c97e58e2b3d62a6a9de952a7 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:27:01 +0200 +Subject: [PATCH 14/20] iotests: Filter child node information + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [9/12] 0b0a42d54397791f7f149e53c9175b7863707e70 (hreitz/qemu-kvm-c-9-s) + +Before we let qemu-img info print child node information, have +common.filter, common.rc, and iotests.py filter it from the test output +so we get as few reference output changes as possible. + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-10-hreitz@redhat.com> +Tested-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit bcc6777ad6facede73c0cf8b1700045bf4365f7d) +Signed-off-by: Hanna Czenczek +--- + tests/qemu-iotests/common.filter | 22 ++++++++++++++-------- + tests/qemu-iotests/common.rc | 22 ++++++++++++++-------- + tests/qemu-iotests/iotests.py | 18 +++++++++++++++--- + 3 files changed, 43 insertions(+), 19 deletions(-) + +diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter +index 6a13757177..6ddda2ee64 100644 +--- a/tests/qemu-iotests/common.filter ++++ b/tests/qemu-iotests/common.filter +@@ -224,6 +224,7 @@ _filter_img_info() + + discard=0 + regex_json_spec_start='^ *"format-specific": \{' ++ regex_json_child_start='^ *"children": \[' + gsed -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \ + -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ + -e "s#$TEST_DIR#TEST_DIR#g" \ +@@ -252,20 +253,25 @@ _filter_img_info() + -e 's/\(compression type: \)\(zlib\|zstd\)/\1COMPRESSION_TYPE/' \ + -e "s/uuid: [-a-f0-9]\\+/uuid: 00000000-0000-0000-0000-000000000000/" | \ + while IFS='' read -r line; do +- if [[ $format_specific == 1 ]]; then +- discard=0 +- elif [[ $line == "Format specific information:" ]]; then +- discard=1 +- elif [[ $line =~ $regex_json_spec_start ]]; then +- discard=2 +- regex_json_spec_end="^${line%%[^ ]*}\\},? *$" ++ if [[ $discard == 0 ]]; then ++ if [[ $format_specific == 0 && $line == "Format specific information:" ]]; then ++ discard=1 ++ elif [[ $line =~ "Child node '/" ]]; then ++ discard=1 ++ elif [[ $line =~ $regex_json_spec_start ]]; then ++ discard=2 ++ regex_json_end="^${line%%[^ ]*}\\},? *$" ++ elif [[ $line =~ $regex_json_child_start ]]; then ++ discard=2 ++ regex_json_end="^${line%%[^ ]*}\\],? *$" ++ fi + fi + if [[ $discard == 0 ]]; then + echo "$line" + elif [[ $discard == 1 && ! $line ]]; then + echo + discard=0 +- elif [[ $discard == 2 && $line =~ $regex_json_spec_end ]]; then ++ elif [[ $discard == 2 && $line =~ $regex_json_end ]]; then + discard=0 + fi + done +diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc +index db757025cb..f4476b62f7 100644 +--- a/tests/qemu-iotests/common.rc ++++ b/tests/qemu-iotests/common.rc +@@ -711,6 +711,7 @@ _img_info() + + discard=0 + regex_json_spec_start='^ *"format-specific": \{' ++ regex_json_child_start='^ *"children": \[' + $QEMU_IMG info $QEMU_IMG_EXTRA_ARGS "$@" "$TEST_IMG" 2>&1 | \ + sed -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \ + -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ +@@ -721,20 +722,25 @@ _img_info() + -e "/^disk size:/ D" \ + -e "/actual-size/ D" | \ + while IFS='' read -r line; do +- if [[ $format_specific == 1 ]]; then +- discard=0 +- elif [[ $line == "Format specific information:" ]]; then +- discard=1 +- elif [[ $line =~ $regex_json_spec_start ]]; then +- discard=2 +- regex_json_spec_end="^${line%%[^ ]*}\\},? *$" ++ if [[ $discard == 0 ]]; then ++ if [[ $format_specific == 0 && $line == "Format specific information:" ]]; then ++ discard=1 ++ elif [[ $line =~ "Child node '/" ]]; then ++ discard=1 ++ elif [[ $format_specific == 0 && $line =~ $regex_json_spec_start ]]; then ++ discard=2 ++ regex_json_end="^${line%%[^ ]*}\\},? *$" ++ elif [[ $line =~ $regex_json_child_start ]]; then ++ discard=2 ++ regex_json_end="^${line%%[^ ]*}\\],? *$" ++ fi + fi + if [[ $discard == 0 ]]; then + echo "$line" + elif [[ $discard == 1 && ! $line ]]; then + echo + discard=0 +- elif [[ $discard == 2 && $line =~ $regex_json_spec_end ]]; then ++ elif [[ $discard == 2 && $line =~ $regex_json_end ]]; then + discard=0 + fi + done +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index da7d6637e1..94aeb3f3b2 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -329,7 +329,7 @@ def qemu_img_log(*args: str, check: bool = True + + def img_info_log(filename: str, filter_path: Optional[str] = None, + use_image_opts: bool = False, extra_args: Sequence[str] = (), +- check: bool = True, ++ check: bool = True, drop_child_info: bool = True, + ) -> None: + args = ['info'] + if use_image_opts: +@@ -342,7 +342,7 @@ def img_info_log(filename: str, filter_path: Optional[str] = None, + output = qemu_img(*args, check=check).stdout + if not filter_path: + filter_path = filename +- log(filter_img_info(output, filter_path)) ++ log(filter_img_info(output, filter_path, drop_child_info)) + + def qemu_io_wrap_args(args: Sequence[str]) -> List[str]: + if '-f' in args or '--image-opts' in args: +@@ -642,11 +642,23 @@ def _filter(_key, value): + def filter_generated_node_ids(msg): + return re.sub("#block[0-9]+", "NODE_NAME", msg) + +-def filter_img_info(output, filename): ++def filter_img_info(output: str, filename: str, ++ drop_child_info: bool = True) -> str: + lines = [] ++ drop_indented = False + for line in output.split('\n'): + if 'disk size' in line or 'actual-size' in line: + continue ++ ++ # Drop child node info ++ if drop_indented: ++ if line.startswith(' '): ++ continue ++ drop_indented = False ++ if drop_child_info and "Child node '/" in line: ++ drop_indented = True ++ continue ++ + line = line.replace(filename, 'TEST_IMG') + line = filter_testfiles(line) + line = line.replace(imgfmt, 'IMGFMT') +-- +2.31.1 + diff --git a/SOURCES/kvm-kvm-Atomic-memslot-updates.patch b/SOURCES/kvm-kvm-Atomic-memslot-updates.patch new file mode 100644 index 0000000..14e9e32 --- /dev/null +++ b/SOURCES/kvm-kvm-Atomic-memslot-updates.patch @@ -0,0 +1,286 @@ +From e13fdc97ff05cdee46c112c2dee70b6ef33e7fa7 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Mon, 16 Jan 2023 07:17:31 -0500 +Subject: [PATCH 31/31] kvm: Atomic memslot updates + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 138: accel: introduce accelerator blocker API +RH-Bugzilla: 1979276 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/3] 9f03181ebcad2474fbe859acbce7b9891caa216b (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276 + +commit f39b7d2b96e3e73c01bb678cd096f7baf0b9ab39 +Author: David Hildenbrand +Date: Fri Nov 11 10:47:58 2022 -0500 + + kvm: Atomic memslot updates + + If we update an existing memslot (e.g., resize, split), we temporarily + remove the memslot to re-add it immediately afterwards. These updates + are not atomic, especially not for KVM VCPU threads, such that we can + get spurious faults. + + Let's inhibit most KVM ioctls while performing relevant updates, such + that we can perform the update just as if it would happen atomically + without additional kernel support. + + We capture the add/del changes and apply them in the notifier commit + stage instead. There, we can check for overlaps and perform the ioctl + inhibiting only if really required (-> overlap). + + To keep things simple we don't perform additional checks that wouldn't + actually result in an overlap -- such as !RAM memory regions in some + cases (see kvm_set_phys_mem()). + + To minimize cache-line bouncing, use a separate indicator + (in_ioctl_lock) per CPU. Also, make sure to hold the kvm_slots_lock + while performing both actions (removing+re-adding). + + We have to wait until all IOCTLs were exited and block new ones from + getting executed. + + This approach cannot result in a deadlock as long as the inhibitor does + not hold any locks that might hinder an IOCTL from getting finished and + exited - something fairly unusual. The inhibitor will always hold the BQL. + + AFAIKs, one possible candidate would be userfaultfd. If a page cannot be + placed (e.g., during postcopy), because we're waiting for a lock, or if the + userfaultfd thread cannot process a fault, because it is waiting for a + lock, there could be a deadlock. However, the BQL is not applicable here, + because any other guest memory access while holding the BQL would already + result in a deadlock. + + Nothing else in the kernel should block forever and wait for userspace + intervention. + + Note: pause_all_vcpus()/resume_all_vcpus() or + start_exclusive()/end_exclusive() cannot be used, as they either drop + the BQL or require to be called without the BQL - something inhibitors + cannot handle. We need a low-level locking mechanism that is + deadlock-free even when not releasing the BQL. + + Signed-off-by: David Hildenbrand + Signed-off-by: Emanuele Giuseppe Esposito + Tested-by: Emanuele Giuseppe Esposito + Message-Id: <20221111154758.1372674-4-eesposit@redhat.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + accel/kvm/kvm-all.c | 101 ++++++++++++++++++++++++++++++++++----- + include/sysemu/kvm_int.h | 8 ++++ + 2 files changed, 98 insertions(+), 11 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index ff660fd469..39ed30ab59 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -31,6 +31,7 @@ + #include "sysemu/kvm_int.h" + #include "sysemu/runstate.h" + #include "sysemu/cpus.h" ++#include "sysemu/accel-blocker.h" + #include "qemu/bswap.h" + #include "exec/memory.h" + #include "exec/ram_addr.h" +@@ -46,6 +47,7 @@ + #include "sysemu/hw_accel.h" + #include "kvm-cpus.h" + #include "sysemu/dirtylimit.h" ++#include "qemu/range.h" + + #include "hw/boards.h" + #include "monitor/stats.h" +@@ -1292,6 +1294,7 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size) + kvm_max_slot_size = max_slot_size; + } + ++/* Called with KVMMemoryListener.slots_lock held */ + static void kvm_set_phys_mem(KVMMemoryListener *kml, + MemoryRegionSection *section, bool add) + { +@@ -1326,14 +1329,12 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + ram = memory_region_get_ram_ptr(mr) + mr_offset; + ram_start_offset = memory_region_get_ram_addr(mr) + mr_offset; + +- kvm_slots_lock(); +- + if (!add) { + do { + slot_size = MIN(kvm_max_slot_size, size); + mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); + if (!mem) { +- goto out; ++ return; + } + if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { + /* +@@ -1371,7 +1372,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + start_addr += slot_size; + size -= slot_size; + } while (size); +- goto out; ++ return; + } + + /* register the new slot */ +@@ -1396,9 +1397,6 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + ram += slot_size; + size -= slot_size; + } while (size); +- +-out: +- kvm_slots_unlock(); + } + + static void *kvm_dirty_ring_reaper_thread(void *data) +@@ -1455,18 +1453,95 @@ static void kvm_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { + KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); ++ KVMMemoryUpdate *update; ++ ++ update = g_new0(KVMMemoryUpdate, 1); ++ update->section = *section; + +- memory_region_ref(section->mr); +- kvm_set_phys_mem(kml, section, true); ++ QSIMPLEQ_INSERT_TAIL(&kml->transaction_add, update, next); + } + + static void kvm_region_del(MemoryListener *listener, + MemoryRegionSection *section) + { + KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); ++ KVMMemoryUpdate *update; ++ ++ update = g_new0(KVMMemoryUpdate, 1); ++ update->section = *section; ++ ++ QSIMPLEQ_INSERT_TAIL(&kml->transaction_del, update, next); ++} ++ ++static void kvm_region_commit(MemoryListener *listener) ++{ ++ KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, ++ listener); ++ KVMMemoryUpdate *u1, *u2; ++ bool need_inhibit = false; ++ ++ if (QSIMPLEQ_EMPTY(&kml->transaction_add) && ++ QSIMPLEQ_EMPTY(&kml->transaction_del)) { ++ return; ++ } ++ ++ /* ++ * We have to be careful when regions to add overlap with ranges to remove. ++ * We have to simulate atomic KVM memslot updates by making sure no ioctl() ++ * is currently active. ++ * ++ * The lists are order by addresses, so it's easy to find overlaps. ++ */ ++ u1 = QSIMPLEQ_FIRST(&kml->transaction_del); ++ u2 = QSIMPLEQ_FIRST(&kml->transaction_add); ++ while (u1 && u2) { ++ Range r1, r2; ++ ++ range_init_nofail(&r1, u1->section.offset_within_address_space, ++ int128_get64(u1->section.size)); ++ range_init_nofail(&r2, u2->section.offset_within_address_space, ++ int128_get64(u2->section.size)); ++ ++ if (range_overlaps_range(&r1, &r2)) { ++ need_inhibit = true; ++ break; ++ } ++ if (range_lob(&r1) < range_lob(&r2)) { ++ u1 = QSIMPLEQ_NEXT(u1, next); ++ } else { ++ u2 = QSIMPLEQ_NEXT(u2, next); ++ } ++ } ++ ++ kvm_slots_lock(); ++ if (need_inhibit) { ++ accel_ioctl_inhibit_begin(); ++ } ++ ++ /* Remove all memslots before adding the new ones. */ ++ while (!QSIMPLEQ_EMPTY(&kml->transaction_del)) { ++ u1 = QSIMPLEQ_FIRST(&kml->transaction_del); ++ QSIMPLEQ_REMOVE_HEAD(&kml->transaction_del, next); + +- kvm_set_phys_mem(kml, section, false); +- memory_region_unref(section->mr); ++ kvm_set_phys_mem(kml, &u1->section, false); ++ memory_region_unref(u1->section.mr); ++ ++ g_free(u1); ++ } ++ while (!QSIMPLEQ_EMPTY(&kml->transaction_add)) { ++ u1 = QSIMPLEQ_FIRST(&kml->transaction_add); ++ QSIMPLEQ_REMOVE_HEAD(&kml->transaction_add, next); ++ ++ memory_region_ref(u1->section.mr); ++ kvm_set_phys_mem(kml, &u1->section, true); ++ ++ g_free(u1); ++ } ++ ++ if (need_inhibit) { ++ accel_ioctl_inhibit_end(); ++ } ++ kvm_slots_unlock(); + } + + static void kvm_log_sync(MemoryListener *listener, +@@ -1610,8 +1685,12 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, + kml->slots[i].slot = i; + } + ++ QSIMPLEQ_INIT(&kml->transaction_add); ++ QSIMPLEQ_INIT(&kml->transaction_del); ++ + kml->listener.region_add = kvm_region_add; + kml->listener.region_del = kvm_region_del; ++ kml->listener.commit = kvm_region_commit; + kml->listener.log_start = kvm_log_start; + kml->listener.log_stop = kvm_log_stop; + kml->listener.priority = 10; +diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h +index 3b4adcdc10..60b520a13e 100644 +--- a/include/sysemu/kvm_int.h ++++ b/include/sysemu/kvm_int.h +@@ -12,6 +12,7 @@ + #include "exec/memory.h" + #include "qapi/qapi-types-common.h" + #include "qemu/accel.h" ++#include "qemu/queue.h" + #include "sysemu/kvm.h" + + typedef struct KVMSlot +@@ -31,10 +32,17 @@ typedef struct KVMSlot + ram_addr_t ram_start_offset; + } KVMSlot; + ++typedef struct KVMMemoryUpdate { ++ QSIMPLEQ_ENTRY(KVMMemoryUpdate) next; ++ MemoryRegionSection section; ++} KVMMemoryUpdate; ++ + typedef struct KVMMemoryListener { + MemoryListener listener; + KVMSlot *slots; + int as_id; ++ QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add; ++ QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del; + } KVMMemoryListener; + + #define KVM_MSI_HASHTAB_SIZE 256 +-- +2.31.1 + diff --git a/SOURCES/kvm-kvm-don-t-use-perror-without-useful-errno.patch b/SOURCES/kvm-kvm-don-t-use-perror-without-useful-errno.patch deleted file mode 100644 index a78c089..0000000 --- a/SOURCES/kvm-kvm-don-t-use-perror-without-useful-errno.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 9ddefaedf423ec03eadaf17496c14e0d7b2381c8 Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Thu, 28 Jul 2022 16:24:46 +0200 -Subject: [PATCH 30/32] kvm: don't use perror() without useful errno - -RH-Author: Cornelia Huck -RH-MergeRequest: 110: kvm: don't use perror() without useful errno -RH-Commit: [1/1] 20e51aac6767c1f89f74c7d692d1fb7689eff5f0 (cohuck/qemu-kvm-c9s) -RH-Bugzilla: 2095608 -RH-Acked-by: Eric Auger -RH-Acked-by: Thomas Huth -RH-Acked-by: Gavin Shan - -perror() is designed to append the decoded errno value to a -string. This, however, only makes sense if we called something that -actually sets errno prior to that. - -For the callers that check for split irqchip support that is not the -case, and we end up with confusing error messages that end in -"success". Use error_report() instead. - -Signed-off-by: Cornelia Huck -Message-Id: <20220728142446.438177-1-cohuck@redhat.com> -Signed-off-by: Paolo Bonzini - -https://bugzilla.redhat.com/show_bug.cgi?id=2095608 -(cherry picked from commit 47c182fe8b03c0c40059fb95840923e65c9bdb4f) -Signed-off-by: Cornelia Huck ---- - accel/kvm/kvm-all.c | 2 +- - target/arm/kvm.c | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index 5f1377ca04..e9c7947640 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -2254,7 +2254,7 @@ static void kvm_irqchip_create(KVMState *s) - ret = kvm_arch_irqchip_create(s); - if (ret == 0) { - if (s->kernel_irqchip_split == ON_OFF_AUTO_ON) { -- perror("Split IRQ chip mode not supported."); -+ error_report("Split IRQ chip mode not supported."); - exit(1); - } else { - ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP); -diff --git a/target/arm/kvm.c b/target/arm/kvm.c -index bbf1ce7ba3..0a2ba1f8e3 100644 ---- a/target/arm/kvm.c -+++ b/target/arm/kvm.c -@@ -960,7 +960,7 @@ void kvm_arch_init_irq_routing(KVMState *s) - int kvm_arch_irqchip_create(KVMState *s) - { - if (kvm_kernel_irqchip_split()) { -- perror("-machine kernel_irqchip=split is not supported on ARM."); -+ error_report("-machine kernel_irqchip=split is not supported on ARM."); - exit(1); - } - --- -2.31.1 - diff --git a/SOURCES/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch b/SOURCES/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch deleted file mode 100644 index f12b8ec..0000000 --- a/SOURCES/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 49d9c9dced7278517105e9cfec34ea4af716432d Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 9 Jun 2022 17:47:12 +0100 -Subject: [PATCH 6/6] linux-aio: explain why max batch is checked in - laio_io_unplug() - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 102: linux-aio: fix unbalanced plugged counter in laio_io_unplug() -RH-Commit: [2/2] b3d6421086bde50d4baad2343b2df89c5f66950e (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 2092788 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefano Garzarella - -It may not be obvious why laio_io_unplug() checks max batch. I discussed -this with Stefano and have added a comment summarizing the reason. - -Cc: Stefano Garzarella -Cc: Kevin Wolf -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Stefano Garzarella -Message-id: 20220609164712.1539045-3-stefanha@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 99b969fbe105117f5af6060d3afef40ca39cc9c1) -Signed-off-by: Stefan Hajnoczi ---- - block/linux-aio.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/block/linux-aio.c b/block/linux-aio.c -index 6078da7e42..9c2393a2f7 100644 ---- a/block/linux-aio.c -+++ b/block/linux-aio.c -@@ -365,6 +365,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, - assert(s->io_q.plugged); - s->io_q.plugged--; - -+ /* -+ * Why max batch checking is performed here: -+ * Another BDS may have queued requests with a higher dev_max_batch and -+ * therefore in_queue could now exceed our dev_max_batch. Re-check the max -+ * batch so we can honor our device's dev_max_batch. -+ */ - if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) || - (!s->io_q.plugged && - !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) { --- -2.31.1 - diff --git a/SOURCES/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch b/SOURCES/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch deleted file mode 100644 index ed9b5ee..0000000 --- a/SOURCES/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch +++ /dev/null @@ -1,56 +0,0 @@ -From e7326c3a7e0fc022aa5c0ae07bc1e19ad1b6f2ed Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 9 Jun 2022 17:47:11 +0100 -Subject: [PATCH 5/6] linux-aio: fix unbalanced plugged counter in - laio_io_unplug() - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 102: linux-aio: fix unbalanced plugged counter in laio_io_unplug() -RH-Commit: [1/2] 8a71da371c72521f1d70b8767ee564575e0d522b (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 2092788 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefano Garzarella - -Every laio_io_plug() call has a matching laio_io_unplug() call. There is -a plugged counter that tracks the number of levels of plugging and -allows for nesting. - -The plugged counter must reflect the balance between laio_io_plug() and -laio_io_unplug() calls accurately. Otherwise I/O stalls occur since -io_submit(2) calls are skipped while plugged. - -Reported-by: Nikolay Tenev -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Stefano Garzarella -Message-id: 20220609164712.1539045-2-stefanha@redhat.com -Cc: Stefano Garzarella -Fixes: 68d7946648 ("linux-aio: add `dev_max_batch` parameter to laio_io_unplug()") -[Stefano Garzarella suggested adding a Fixes tag. ---Stefan] -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit f387cac5af030a58ac5a0dacf64cab5e5a4fe5c7) -Signed-off-by: Stefan Hajnoczi ---- - block/linux-aio.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/block/linux-aio.c b/block/linux-aio.c -index 4c423fcccf..6078da7e42 100644 ---- a/block/linux-aio.c -+++ b/block/linux-aio.c -@@ -363,8 +363,10 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, - uint64_t dev_max_batch) - { - assert(s->io_q.plugged); -+ s->io_q.plugged--; -+ - if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) || -- (--s->io_q.plugged == 0 && -+ (!s->io_q.plugged && - !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) { - ioq_submit(s); - } --- -2.31.1 - diff --git a/SOURCES/kvm-linux-headers-Update-to-v6.1.patch b/SOURCES/kvm-linux-headers-Update-to-v6.1.patch new file mode 100644 index 0000000..6ce9c7d --- /dev/null +++ b/SOURCES/kvm-linux-headers-Update-to-v6.1.patch @@ -0,0 +1,577 @@ +From cbe35c6a4794107ea1ddecf0b381ba4b1c8799f5 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 7 Feb 2023 15:57:10 -0500 +Subject: [PATCH 3/8] linux-headers: Update to v6.1 + +RH-Author: Peter Xu +RH-MergeRequest: 149: Support /dev/userfaultfd +RH-Bugzilla: 2158704 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: quintela1 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/3] 15d97026e802a0f01b5f80f81fb4414dc69b2b2d (peterx/qemu-kvm) + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Acked-by: Cornelia Huck +Signed-off-by: Juan Quintela +(cherry picked from commit 93e0932b7be2498024cd6ba8446a0fa2cb1769bc) +Signed-off-by: Peter Xu +--- + include/standard-headers/drm/drm_fourcc.h | 34 ++++- + include/standard-headers/linux/ethtool.h | 63 +++++++- + include/standard-headers/linux/fuse.h | 6 +- + .../linux/input-event-codes.h | 1 + + include/standard-headers/linux/virtio_blk.h | 19 +++ + linux-headers/asm-generic/hugetlb_encode.h | 26 ++-- + linux-headers/asm-generic/mman-common.h | 2 + + linux-headers/asm-mips/mman.h | 2 + + linux-headers/asm-riscv/kvm.h | 4 + + linux-headers/linux/kvm.h | 1 + + linux-headers/linux/psci.h | 14 ++ + linux-headers/linux/userfaultfd.h | 4 + + linux-headers/linux/vfio.h | 142 ++++++++++++++++++ + 13 files changed, 298 insertions(+), 20 deletions(-) + +diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h +index 48b620cbef..b868488f93 100644 +--- a/include/standard-headers/drm/drm_fourcc.h ++++ b/include/standard-headers/drm/drm_fourcc.h +@@ -98,18 +98,42 @@ extern "C" { + #define DRM_FORMAT_INVALID 0 + + /* color index */ ++#define DRM_FORMAT_C1 fourcc_code('C', '1', ' ', ' ') /* [7:0] C0:C1:C2:C3:C4:C5:C6:C7 1:1:1:1:1:1:1:1 eight pixels/byte */ ++#define DRM_FORMAT_C2 fourcc_code('C', '2', ' ', ' ') /* [7:0] C0:C1:C2:C3 2:2:2:2 four pixels/byte */ ++#define DRM_FORMAT_C4 fourcc_code('C', '4', ' ', ' ') /* [7:0] C0:C1 4:4 two pixels/byte */ + #define DRM_FORMAT_C8 fourcc_code('C', '8', ' ', ' ') /* [7:0] C */ + +-/* 8 bpp Red */ ++/* 1 bpp Darkness (inverse relationship between channel value and brightness) */ ++#define DRM_FORMAT_D1 fourcc_code('D', '1', ' ', ' ') /* [7:0] D0:D1:D2:D3:D4:D5:D6:D7 1:1:1:1:1:1:1:1 eight pixels/byte */ ++ ++/* 2 bpp Darkness (inverse relationship between channel value and brightness) */ ++#define DRM_FORMAT_D2 fourcc_code('D', '2', ' ', ' ') /* [7:0] D0:D1:D2:D3 2:2:2:2 four pixels/byte */ ++ ++/* 4 bpp Darkness (inverse relationship between channel value and brightness) */ ++#define DRM_FORMAT_D4 fourcc_code('D', '4', ' ', ' ') /* [7:0] D0:D1 4:4 two pixels/byte */ ++ ++/* 8 bpp Darkness (inverse relationship between channel value and brightness) */ ++#define DRM_FORMAT_D8 fourcc_code('D', '8', ' ', ' ') /* [7:0] D */ ++ ++/* 1 bpp Red (direct relationship between channel value and brightness) */ ++#define DRM_FORMAT_R1 fourcc_code('R', '1', ' ', ' ') /* [7:0] R0:R1:R2:R3:R4:R5:R6:R7 1:1:1:1:1:1:1:1 eight pixels/byte */ ++ ++/* 2 bpp Red (direct relationship between channel value and brightness) */ ++#define DRM_FORMAT_R2 fourcc_code('R', '2', ' ', ' ') /* [7:0] R0:R1:R2:R3 2:2:2:2 four pixels/byte */ ++ ++/* 4 bpp Red (direct relationship between channel value and brightness) */ ++#define DRM_FORMAT_R4 fourcc_code('R', '4', ' ', ' ') /* [7:0] R0:R1 4:4 two pixels/byte */ ++ ++/* 8 bpp Red (direct relationship between channel value and brightness) */ + #define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */ + +-/* 10 bpp Red */ ++/* 10 bpp Red (direct relationship between channel value and brightness) */ + #define DRM_FORMAT_R10 fourcc_code('R', '1', '0', ' ') /* [15:0] x:R 6:10 little endian */ + +-/* 12 bpp Red */ ++/* 12 bpp Red (direct relationship between channel value and brightness) */ + #define DRM_FORMAT_R12 fourcc_code('R', '1', '2', ' ') /* [15:0] x:R 4:12 little endian */ + +-/* 16 bpp Red */ ++/* 16 bpp Red (direct relationship between channel value and brightness) */ + #define DRM_FORMAT_R16 fourcc_code('R', '1', '6', ' ') /* [15:0] R little endian */ + + /* 16 bpp RG */ +@@ -204,7 +228,9 @@ extern "C" { + #define DRM_FORMAT_VYUY fourcc_code('V', 'Y', 'U', 'Y') /* [31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little endian */ + + #define DRM_FORMAT_AYUV fourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little endian */ ++#define DRM_FORMAT_AVUY8888 fourcc_code('A', 'V', 'U', 'Y') /* [31:0] A:Cr:Cb:Y 8:8:8:8 little endian */ + #define DRM_FORMAT_XYUV8888 fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */ ++#define DRM_FORMAT_XVUY8888 fourcc_code('X', 'V', 'U', 'Y') /* [31:0] X:Cr:Cb:Y 8:8:8:8 little endian */ + #define DRM_FORMAT_VUY888 fourcc_code('V', 'U', '2', '4') /* [23:0] Cr:Cb:Y 8:8:8 little endian */ + #define DRM_FORMAT_VUY101010 fourcc_code('V', 'U', '3', '0') /* Y followed by U then V, 10:10:10. Non-linear modifier only */ + +diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h +index 4537da20cc..1dc56cdc0a 100644 +--- a/include/standard-headers/linux/ethtool.h ++++ b/include/standard-headers/linux/ethtool.h +@@ -736,6 +736,51 @@ enum ethtool_module_power_mode { + ETHTOOL_MODULE_POWER_MODE_HIGH, + }; + ++/** ++ * enum ethtool_podl_pse_admin_state - operational state of the PoDL PSE ++ * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState ++ * @ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN: state of PoDL PSE functions are ++ * unknown ++ * @ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED: PoDL PSE functions are disabled ++ * @ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED: PoDL PSE functions are enabled ++ */ ++enum ethtool_podl_pse_admin_state { ++ ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN = 1, ++ ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED, ++ ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED, ++}; ++ ++/** ++ * enum ethtool_podl_pse_pw_d_status - power detection status of the PoDL PSE. ++ * IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus: ++ * @ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN: PoDL PSE ++ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED: "The enumeration “disabled” is ++ * asserted true when the PoDL PSE state diagram variable mr_pse_enable is ++ * false" ++ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING: "The enumeration “searching” is ++ * asserted true when either of the PSE state diagram variables ++ * pi_detecting or pi_classifying is true." ++ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING: "The enumeration “deliveringPower” ++ * is asserted true when the PoDL PSE state diagram variable pi_powered is ++ * true." ++ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP: "The enumeration “sleep” is asserted ++ * true when the PoDL PSE state diagram variable pi_sleeping is true." ++ * @ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE: "The enumeration “idle” is asserted true ++ * when the logical combination of the PoDL PSE state diagram variables ++ * pi_prebiased*!pi_sleeping is true." ++ * @ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR: "The enumeration “error” is asserted ++ * true when the PoDL PSE state diagram variable overload_held is true." ++ */ ++enum ethtool_podl_pse_pw_d_status { ++ ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN = 1, ++ ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED, ++ ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING, ++ ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING, ++ ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP, ++ ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE, ++ ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR, ++}; ++ + /** + * struct ethtool_gstrings - string set for data tagging + * @cmd: Command number = %ETHTOOL_GSTRINGS +@@ -1840,6 +1885,20 @@ static inline int ethtool_validate_duplex(uint8_t duplex) + #define MASTER_SLAVE_STATE_SLAVE 3 + #define MASTER_SLAVE_STATE_ERR 4 + ++/* These are used to throttle the rate of data on the phy interface when the ++ * native speed of the interface is higher than the link speed. These should ++ * not be used for phy interfaces which natively support multiple speeds (e.g. ++ * MII or SGMII). ++ */ ++/* No rate matching performed. */ ++#define RATE_MATCH_NONE 0 ++/* The phy sends pause frames to throttle the MAC. */ ++#define RATE_MATCH_PAUSE 1 ++/* The phy asserts CRS to prevent the MAC from transmitting. */ ++#define RATE_MATCH_CRS 2 ++/* The MAC is programmed with a sufficiently-large IPG. */ ++#define RATE_MATCH_OPEN_LOOP 3 ++ + /* Which connector port. */ + #define PORT_TP 0x00 + #define PORT_AUI 0x01 +@@ -2033,8 +2092,8 @@ enum ethtool_reset_flags { + * reported consistently by PHYLIB. Read-only. + * @master_slave_cfg: Master/slave port mode. + * @master_slave_state: Master/slave port state. ++ * @rate_matching: Rate adaptation performed by the PHY + * @reserved: Reserved for future use; see the note on reserved space. +- * @reserved1: Reserved for future use; see the note on reserved space. + * @link_mode_masks: Variable length bitmaps. + * + * If autonegotiation is disabled, the speed and @duplex represent the +@@ -2085,7 +2144,7 @@ struct ethtool_link_settings { + uint8_t transceiver; + uint8_t master_slave_cfg; + uint8_t master_slave_state; +- uint8_t reserved1[1]; ++ uint8_t rate_matching; + uint32_t reserved[7]; + uint32_t link_mode_masks[]; + /* layout of link_mode_masks fields: +diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h +index bda06258be..713d259768 100644 +--- a/include/standard-headers/linux/fuse.h ++++ b/include/standard-headers/linux/fuse.h +@@ -194,6 +194,9 @@ + * - add FUSE_SECURITY_CTX init flag + * - add security context to create, mkdir, symlink, and mknod requests + * - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX ++ * ++ * 7.37 ++ * - add FUSE_TMPFILE + */ + + #ifndef _LINUX_FUSE_H +@@ -225,7 +228,7 @@ + #define FUSE_KERNEL_VERSION 7 + + /** Minor version number of this interface */ +-#define FUSE_KERNEL_MINOR_VERSION 36 ++#define FUSE_KERNEL_MINOR_VERSION 37 + + /** The node ID of the root inode */ + #define FUSE_ROOT_ID 1 +@@ -533,6 +536,7 @@ enum fuse_opcode { + FUSE_SETUPMAPPING = 48, + FUSE_REMOVEMAPPING = 49, + FUSE_SYNCFS = 50, ++ FUSE_TMPFILE = 51, + + /* CUSE specific operations */ + CUSE_INIT = 4096, +diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h +index 50790aee5a..815f7a1dff 100644 +--- a/include/standard-headers/linux/input-event-codes.h ++++ b/include/standard-headers/linux/input-event-codes.h +@@ -862,6 +862,7 @@ + #define ABS_TOOL_WIDTH 0x1c + + #define ABS_VOLUME 0x20 ++#define ABS_PROFILE 0x21 + + #define ABS_MISC 0x28 + +diff --git a/include/standard-headers/linux/virtio_blk.h b/include/standard-headers/linux/virtio_blk.h +index 2dcc90826a..e81715cd70 100644 +--- a/include/standard-headers/linux/virtio_blk.h ++++ b/include/standard-headers/linux/virtio_blk.h +@@ -40,6 +40,7 @@ + #define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ + #define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */ + #define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */ ++#define VIRTIO_BLK_F_SECURE_ERASE 16 /* Secure Erase is supported */ + + /* Legacy feature bits */ + #ifndef VIRTIO_BLK_NO_LEGACY +@@ -119,6 +120,21 @@ struct virtio_blk_config { + uint8_t write_zeroes_may_unmap; + + uint8_t unused1[3]; ++ ++ /* the next 3 entries are guarded by VIRTIO_BLK_F_SECURE_ERASE */ ++ /* ++ * The maximum secure erase sectors (in 512-byte sectors) for ++ * one segment. ++ */ ++ __virtio32 max_secure_erase_sectors; ++ /* ++ * The maximum number of secure erase segments in a ++ * secure erase command. ++ */ ++ __virtio32 max_secure_erase_seg; ++ /* Secure erase commands must be aligned to this number of sectors. */ ++ __virtio32 secure_erase_sector_alignment; ++ + } QEMU_PACKED; + + /* +@@ -153,6 +169,9 @@ struct virtio_blk_config { + /* Write zeroes command */ + #define VIRTIO_BLK_T_WRITE_ZEROES 13 + ++/* Secure erase command */ ++#define VIRTIO_BLK_T_SECURE_ERASE 14 ++ + #ifndef VIRTIO_BLK_NO_LEGACY + /* Barrier before this op. */ + #define VIRTIO_BLK_T_BARRIER 0x80000000 +diff --git a/linux-headers/asm-generic/hugetlb_encode.h b/linux-headers/asm-generic/hugetlb_encode.h +index 4f3d5aaa11..de687009bf 100644 +--- a/linux-headers/asm-generic/hugetlb_encode.h ++++ b/linux-headers/asm-generic/hugetlb_encode.h +@@ -20,18 +20,18 @@ + #define HUGETLB_FLAG_ENCODE_SHIFT 26 + #define HUGETLB_FLAG_ENCODE_MASK 0x3f + +-#define HUGETLB_FLAG_ENCODE_16KB (14 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_32MB (25 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_512MB (29 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_16KB (14U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_64KB (16U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_512KB (19U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_1MB (20U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_2MB (21U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_8MB (23U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_16MB (24U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_32MB (25U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_256MB (28U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_512MB (29U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_1GB (30U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_2GB (31U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_16GB (34U << HUGETLB_FLAG_ENCODE_SHIFT) + + #endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */ +diff --git a/linux-headers/asm-generic/mman-common.h b/linux-headers/asm-generic/mman-common.h +index 6c1aa92a92..6ce1f1ceb4 100644 +--- a/linux-headers/asm-generic/mman-common.h ++++ b/linux-headers/asm-generic/mman-common.h +@@ -77,6 +77,8 @@ + + #define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ + ++#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ ++ + /* compatibility flags */ + #define MAP_FILE 0 + +diff --git a/linux-headers/asm-mips/mman.h b/linux-headers/asm-mips/mman.h +index 1be428663c..c6e1fc77c9 100644 +--- a/linux-headers/asm-mips/mman.h ++++ b/linux-headers/asm-mips/mman.h +@@ -103,6 +103,8 @@ + + #define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ + ++#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ ++ + /* compatibility flags */ + #define MAP_FILE 0 + +diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h +index 7351417afd..8985ff234c 100644 +--- a/linux-headers/asm-riscv/kvm.h ++++ b/linux-headers/asm-riscv/kvm.h +@@ -48,6 +48,7 @@ struct kvm_sregs { + /* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ + struct kvm_riscv_config { + unsigned long isa; ++ unsigned long zicbom_block_size; + }; + + /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ +@@ -98,6 +99,9 @@ enum KVM_RISCV_ISA_EXT_ID { + KVM_RISCV_ISA_EXT_M, + KVM_RISCV_ISA_EXT_SVPBMT, + KVM_RISCV_ISA_EXT_SSTC, ++ KVM_RISCV_ISA_EXT_SVINVAL, ++ KVM_RISCV_ISA_EXT_ZIHINTPAUSE, ++ KVM_RISCV_ISA_EXT_ZICBOM, + KVM_RISCV_ISA_EXT_MAX, + }; + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index ebdafa576d..b2783c5202 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1175,6 +1175,7 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220 + #define KVM_CAP_S390_ZPCI_OP 221 + #define KVM_CAP_S390_CPU_TOPOLOGY 222 ++#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223 + + #ifdef KVM_CAP_IRQ_ROUTING + +diff --git a/linux-headers/linux/psci.h b/linux-headers/linux/psci.h +index 213b2a0f70..e60dfd8907 100644 +--- a/linux-headers/linux/psci.h ++++ b/linux-headers/linux/psci.h +@@ -48,12 +48,26 @@ + #define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU PSCI_0_2_FN64(7) + + #define PSCI_1_0_FN_PSCI_FEATURES PSCI_0_2_FN(10) ++#define PSCI_1_0_FN_CPU_FREEZE PSCI_0_2_FN(11) ++#define PSCI_1_0_FN_CPU_DEFAULT_SUSPEND PSCI_0_2_FN(12) ++#define PSCI_1_0_FN_NODE_HW_STATE PSCI_0_2_FN(13) + #define PSCI_1_0_FN_SYSTEM_SUSPEND PSCI_0_2_FN(14) + #define PSCI_1_0_FN_SET_SUSPEND_MODE PSCI_0_2_FN(15) ++#define PSCI_1_0_FN_STAT_RESIDENCY PSCI_0_2_FN(16) ++#define PSCI_1_0_FN_STAT_COUNT PSCI_0_2_FN(17) ++ + #define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18) ++#define PSCI_1_1_FN_MEM_PROTECT PSCI_0_2_FN(19) ++#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(19) + ++#define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND PSCI_0_2_FN64(12) ++#define PSCI_1_0_FN64_NODE_HW_STATE PSCI_0_2_FN64(13) + #define PSCI_1_0_FN64_SYSTEM_SUSPEND PSCI_0_2_FN64(14) ++#define PSCI_1_0_FN64_STAT_RESIDENCY PSCI_0_2_FN64(16) ++#define PSCI_1_0_FN64_STAT_COUNT PSCI_0_2_FN64(17) ++ + #define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18) ++#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(19) + + /* PSCI v0.2 power state encoding for CPU_SUSPEND function */ + #define PSCI_0_2_POWER_STATE_ID_MASK 0xffff +diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h +index a3a377cd44..ba5d0df52f 100644 +--- a/linux-headers/linux/userfaultfd.h ++++ b/linux-headers/linux/userfaultfd.h +@@ -12,6 +12,10 @@ + + #include + ++/* ioctls for /dev/userfaultfd */ ++#define USERFAULTFD_IOC 0xAA ++#define USERFAULTFD_IOC_NEW _IO(USERFAULTFD_IOC, 0x00) ++ + /* + * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and + * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In +diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h +index ede44b5572..bee7e42198 100644 +--- a/linux-headers/linux/vfio.h ++++ b/linux-headers/linux/vfio.h +@@ -986,6 +986,148 @@ enum vfio_device_mig_state { + VFIO_DEVICE_STATE_RUNNING_P2P = 5, + }; + ++/* ++ * Upon VFIO_DEVICE_FEATURE_SET, allow the device to be moved into a low power ++ * state with the platform-based power management. Device use of lower power ++ * states depends on factors managed by the runtime power management core, ++ * including system level support and coordinating support among dependent ++ * devices. Enabling device low power entry does not guarantee lower power ++ * usage by the device, nor is a mechanism provided through this feature to ++ * know the current power state of the device. If any device access happens ++ * (either from the host or through the vfio uAPI) when the device is in the ++ * low power state, then the host will move the device out of the low power ++ * state as necessary prior to the access. Once the access is completed, the ++ * device may re-enter the low power state. For single shot low power support ++ * with wake-up notification, see ++ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP below. Access to mmap'd ++ * device regions is disabled on LOW_POWER_ENTRY and may only be resumed after ++ * calling LOW_POWER_EXIT. ++ */ ++#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY 3 ++ ++/* ++ * This device feature has the same behavior as ++ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY with the exception that the user ++ * provides an eventfd for wake-up notification. When the device moves out of ++ * the low power state for the wake-up, the host will not allow the device to ++ * re-enter a low power state without a subsequent user call to one of the low ++ * power entry device feature IOCTLs. Access to mmap'd device regions is ++ * disabled on LOW_POWER_ENTRY_WITH_WAKEUP and may only be resumed after the ++ * low power exit. The low power exit can happen either through LOW_POWER_EXIT ++ * or through any other access (where the wake-up notification has been ++ * generated). The access to mmap'd device regions will not trigger low power ++ * exit. ++ * ++ * The notification through the provided eventfd will be generated only when ++ * the device has entered and is resumed from a low power state after ++ * calling this device feature IOCTL. A device that has not entered low power ++ * state, as managed through the runtime power management core, will not ++ * generate a notification through the provided eventfd on access. Calling the ++ * LOW_POWER_EXIT feature is optional in the case where notification has been ++ * signaled on the provided eventfd that a resume from low power has occurred. ++ */ ++struct vfio_device_low_power_entry_with_wakeup { ++ __s32 wakeup_eventfd; ++ __u32 reserved; ++}; ++ ++#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP 4 ++ ++/* ++ * Upon VFIO_DEVICE_FEATURE_SET, disallow use of device low power states as ++ * previously enabled via VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY or ++ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP device features. ++ * This device feature IOCTL may itself generate a wakeup eventfd notification ++ * in the latter case if the device had previously entered a low power state. ++ */ ++#define VFIO_DEVICE_FEATURE_LOW_POWER_EXIT 5 ++ ++/* ++ * Upon VFIO_DEVICE_FEATURE_SET start/stop device DMA logging. ++ * VFIO_DEVICE_FEATURE_PROBE can be used to detect if the device supports ++ * DMA logging. ++ * ++ * DMA logging allows a device to internally record what DMAs the device is ++ * initiating and report them back to userspace. It is part of the VFIO ++ * migration infrastructure that allows implementing dirty page tracking ++ * during the pre copy phase of live migration. Only DMA WRITEs are logged, ++ * and this API is not connected to VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE. ++ * ++ * When DMA logging is started a range of IOVAs to monitor is provided and the ++ * device can optimize its logging to cover only the IOVA range given. Each ++ * DMA that the device initiates inside the range will be logged by the device ++ * for later retrieval. ++ * ++ * page_size is an input that hints what tracking granularity the device ++ * should try to achieve. If the device cannot do the hinted page size then ++ * it's the driver choice which page size to pick based on its support. ++ * On output the device will return the page size it selected. ++ * ++ * ranges is a pointer to an array of ++ * struct vfio_device_feature_dma_logging_range. ++ * ++ * The core kernel code guarantees to support by minimum num_ranges that fit ++ * into a single kernel page. User space can try higher values but should give ++ * up if the above can't be achieved as of some driver limitations. ++ * ++ * A single call to start device DMA logging can be issued and a matching stop ++ * should follow at the end. Another start is not allowed in the meantime. ++ */ ++struct vfio_device_feature_dma_logging_control { ++ __aligned_u64 page_size; ++ __u32 num_ranges; ++ __u32 __reserved; ++ __aligned_u64 ranges; ++}; ++ ++struct vfio_device_feature_dma_logging_range { ++ __aligned_u64 iova; ++ __aligned_u64 length; ++}; ++ ++#define VFIO_DEVICE_FEATURE_DMA_LOGGING_START 6 ++ ++/* ++ * Upon VFIO_DEVICE_FEATURE_SET stop device DMA logging that was started ++ * by VFIO_DEVICE_FEATURE_DMA_LOGGING_START ++ */ ++#define VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP 7 ++ ++/* ++ * Upon VFIO_DEVICE_FEATURE_GET read back and clear the device DMA log ++ * ++ * Query the device's DMA log for written pages within the given IOVA range. ++ * During querying the log is cleared for the IOVA range. ++ * ++ * bitmap is a pointer to an array of u64s that will hold the output bitmap ++ * with 1 bit reporting a page_size unit of IOVA. The mapping of IOVA to bits ++ * is given by: ++ * bitmap[(addr - iova)/page_size] & (1ULL << (addr % 64)) ++ * ++ * The input page_size can be any power of two value and does not have to ++ * match the value given to VFIO_DEVICE_FEATURE_DMA_LOGGING_START. The driver ++ * will format its internal logging to match the reporting page size, possibly ++ * by replicating bits if the internal page size is lower than requested. ++ * ++ * The LOGGING_REPORT will only set bits in the bitmap and never clear or ++ * perform any initialization of the user provided bitmap. ++ * ++ * If any error is returned userspace should assume that the dirty log is ++ * corrupted. Error recovery is to consider all memory dirty and try to ++ * restart the dirty tracking, or to abort/restart the whole migration. ++ * ++ * If DMA logging is not enabled, an error will be returned. ++ * ++ */ ++struct vfio_device_feature_dma_logging_report { ++ __aligned_u64 iova; ++ __aligned_u64 length; ++ __aligned_u64 page_size; ++ __aligned_u64 bitmap; ++}; ++ ++#define VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT 8 ++ + /* -------- API for Type1 VFIO IOMMU -------- */ + + /** +-- +2.31.1 + diff --git a/SOURCES/kvm-meson-create-have_vhost_-variables.patch b/SOURCES/kvm-meson-create-have_vhost_-variables.patch deleted file mode 100644 index fcae620..0000000 --- a/SOURCES/kvm-meson-create-have_vhost_-variables.patch +++ /dev/null @@ -1,154 +0,0 @@ -From 51c310097832724bafac26aed81399da40128400 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:50:43 +0200 -Subject: [PATCH 05/32] meson: create have_vhost_* variables -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [5/27] 3b30f89e6d639923dc9d9a92a4261bb4509e5c83 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 2a3129a37652e5e81d12f6e16dd3c447f09831f9 -Author: Paolo Bonzini -Date: Wed Apr 20 17:34:05 2022 +0200 - - meson: create have_vhost_* variables - - When using Meson options rather than config-host.h, the "when" clauses - have to be changed to if statements (which is not necessarily great, - though at least it highlights which parts of the build are per-target - and which are not). - - Do that before moving vhost logic to meson.build, though for now - the variables are just based on config-host.mak data. - - Reviewed-by: Marc-André Lureau - Signed-off-by: Paolo Bonzini - -Signed-off-by: Eugenio Pérez ---- - meson.build | 30 ++++++++++++++++++++---------- - tests/meson.build | 2 +- - tools/meson.build | 2 +- - 3 files changed, 22 insertions(+), 12 deletions(-) - -diff --git a/meson.build b/meson.build -index 13e3323380..735f538497 100644 ---- a/meson.build -+++ b/meson.build -@@ -298,6 +298,15 @@ have_tpm = get_option('tpm') \ - .require(targetos != 'windows', error_message: 'TPM emulation only available on POSIX systems') \ - .allowed() - -+# vhost -+have_vhost_user = 'CONFIG_VHOST_USER' in config_host -+have_vhost_vdpa = 'CONFIG_VHOST_VDPA' in config_host -+have_vhost_kernel = 'CONFIG_VHOST_KERNEL' in config_host -+have_vhost_net_user = 'CONFIG_VHOST_NET_USER' in config_host -+have_vhost_net_vdpa = 'CONFIG_VHOST_NET_VDPA' in config_host -+have_vhost_net = 'CONFIG_VHOST_NET' in config_host -+have_vhost_user_crypto = 'CONFIG_VHOST_CRYPTO' in config_host -+ - # Target-specific libraries and flags - libm = cc.find_library('m', required: false) - threads = dependency('threads') -@@ -1335,7 +1344,7 @@ has_statx_mnt_id = cc.links(statx_mnt_id_test) - have_vhost_user_blk_server = get_option('vhost_user_blk_server') \ - .require(targetos == 'linux', - error_message: 'vhost_user_blk_server requires linux') \ -- .require('CONFIG_VHOST_USER' in config_host, -+ .require(have_vhost_user, - error_message: 'vhost_user_blk_server requires vhost-user support') \ - .disable_auto_if(not have_system) \ - .allowed() -@@ -2116,9 +2125,9 @@ host_kconfig = \ - (have_ivshmem ? ['CONFIG_IVSHMEM=y'] : []) + \ - ('CONFIG_OPENGL' in config_host ? ['CONFIG_OPENGL=y'] : []) + \ - (x11.found() ? ['CONFIG_X11=y'] : []) + \ -- ('CONFIG_VHOST_USER' in config_host ? ['CONFIG_VHOST_USER=y'] : []) + \ -- ('CONFIG_VHOST_VDPA' in config_host ? ['CONFIG_VHOST_VDPA=y'] : []) + \ -- ('CONFIG_VHOST_KERNEL' in config_host ? ['CONFIG_VHOST_KERNEL=y'] : []) + \ -+ (have_vhost_user ? ['CONFIG_VHOST_USER=y'] : []) + \ -+ (have_vhost_vdpa ? ['CONFIG_VHOST_VDPA=y'] : []) + \ -+ (have_vhost_kernel ? ['CONFIG_VHOST_KERNEL=y'] : []) + \ - (have_virtfs ? ['CONFIG_VIRTFS=y'] : []) + \ - ('CONFIG_LINUX' in config_host ? ['CONFIG_LINUX=y'] : []) + \ - ('CONFIG_PVRDMA' in config_host ? ['CONFIG_PVRDMA=y'] : []) + \ -@@ -2799,7 +2808,7 @@ if have_system or have_user - endif - - vhost_user = not_found --if targetos == 'linux' and 'CONFIG_VHOST_USER' in config_host -+if targetos == 'linux' and have_vhost_user - libvhost_user = subproject('libvhost-user') - vhost_user = libvhost_user.get_variable('vhost_user_dep') - endif -@@ -3386,7 +3395,7 @@ if have_tools - dependencies: qemuutil, - install: true) - -- if 'CONFIG_VHOST_USER' in config_host -+ if have_vhost_user - subdir('contrib/vhost-user-blk') - subdir('contrib/vhost-user-gpu') - subdir('contrib/vhost-user-input') -@@ -3516,15 +3525,16 @@ if 'simple' in get_option('trace_backends') - endif - summary_info += {'D-Bus display': dbus_display} - summary_info += {'QOM debugging': get_option('qom_cast_debug')} --summary_info += {'vhost-kernel support': config_host.has_key('CONFIG_VHOST_KERNEL')} --summary_info += {'vhost-net support': config_host.has_key('CONFIG_VHOST_NET')} --summary_info += {'vhost-crypto support': config_host.has_key('CONFIG_VHOST_CRYPTO')} -+summary_info += {'vhost-kernel support': have_vhost_kernel} -+summary_info += {'vhost-net support': have_vhost_net} -+summary_info += {'vhost-user support': have_vhost_user} -+summary_info += {'vhost-user-crypto support': have_vhost_user_crypto} - summary_info += {'vhost-scsi support': config_host.has_key('CONFIG_VHOST_SCSI')} - summary_info += {'vhost-vsock support': config_host.has_key('CONFIG_VHOST_VSOCK')} --summary_info += {'vhost-user support': config_host.has_key('CONFIG_VHOST_USER')} - summary_info += {'vhost-user-blk server support': have_vhost_user_blk_server} - summary_info += {'vhost-user-fs support': config_host.has_key('CONFIG_VHOST_USER_FS')} - summary_info += {'vhost-vdpa support': config_host.has_key('CONFIG_VHOST_VDPA')} -+summary_info += {'vhost-vdpa support': have_vhost_vdpa} - summary_info += {'build guest agent': have_ga} - summary(summary_info, bool_yn: true, section: 'Configurable features') - -diff --git a/tests/meson.build b/tests/meson.build -index 1d05109eb4..bbe41c8559 100644 ---- a/tests/meson.build -+++ b/tests/meson.build -@@ -70,7 +70,7 @@ test_deps = { - 'test-qht-par': qht_bench, - } - --if have_tools and 'CONFIG_VHOST_USER' in config_host and 'CONFIG_LINUX' in config_host -+if have_tools and have_vhost_user and 'CONFIG_LINUX' in config_host - executable('vhost-user-bridge', - sources: files('vhost-user-bridge.c'), - dependencies: [qemuutil, vhost_user]) -diff --git a/tools/meson.build b/tools/meson.build -index 46977af84f..10eb3a043f 100644 ---- a/tools/meson.build -+++ b/tools/meson.build -@@ -3,7 +3,7 @@ have_virtiofsd = get_option('virtiofsd') \ - error_message: 'virtiofsd requires Linux') \ - .require(seccomp.found() and libcap_ng.found(), - error_message: 'virtiofsd requires libcap-ng-devel and seccomp-devel') \ -- .require('CONFIG_VHOST_USER' in config_host, -+ .require(have_vhost_user, - error_message: 'virtiofsd needs vhost-user-support') \ - .disable_auto_if(not have_tools and not have_system) \ - .allowed() --- -2.31.1 - diff --git a/SOURCES/kvm-meson-use-have_vhost_-variables-to-pick-sources.patch b/SOURCES/kvm-meson-use-have_vhost_-variables-to-pick-sources.patch deleted file mode 100644 index 99d86c1..0000000 --- a/SOURCES/kvm-meson-use-have_vhost_-variables-to-pick-sources.patch +++ /dev/null @@ -1,213 +0,0 @@ -From a7d57a09e33275d5e6649273b5c9da1bc3c92491 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:51:53 +0200 -Subject: [PATCH 06/32] meson: use have_vhost_* variables to pick sources -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [6/27] bc3db1efb759c0bc97fde2f4fbb3d6dc404c8d3d (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 43b6d7ee1fbc5b5fb7c85d8131fdac1863214ad6 -Author: Paolo Bonzini -Date: Wed Apr 20 17:34:06 2022 +0200 - - meson: use have_vhost_* variables to pick sources - - Reviewed-by: Marc-André Lureau - Signed-off-by: Paolo Bonzini - -Signed-off-by: Eugenio Pérez ---- - Kconfig.host | 3 --- - backends/meson.build | 8 ++++++-- - hw/net/meson.build | 8 ++++++-- - hw/virtio/Kconfig | 3 --- - hw/virtio/meson.build | 25 ++++++++++++++++--------- - meson.build | 1 + - net/meson.build | 12 +++++++----- - tests/qtest/meson.build | 4 +++- - 8 files changed, 39 insertions(+), 25 deletions(-) - -diff --git a/Kconfig.host b/Kconfig.host -index 60b9c07b5e..1165c4eacd 100644 ---- a/Kconfig.host -+++ b/Kconfig.host -@@ -22,15 +22,12 @@ config TPM - - config VHOST_USER - bool -- select VHOST - - config VHOST_VDPA - bool -- select VHOST - - config VHOST_KERNEL - bool -- select VHOST - - config VIRTFS - bool -diff --git a/backends/meson.build b/backends/meson.build -index 6e68945528..cb92f639ca 100644 ---- a/backends/meson.build -+++ b/backends/meson.build -@@ -12,9 +12,13 @@ softmmu_ss.add([files( - softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files('rng-random.c')) - softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files('hostmem-file.c')) - softmmu_ss.add(when: 'CONFIG_LINUX', if_true: files('hostmem-memfd.c')) --softmmu_ss.add(when: ['CONFIG_VHOST_USER', 'CONFIG_VIRTIO'], if_true: files('vhost-user.c')) -+if have_vhost_user -+ softmmu_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c')) -+endif - softmmu_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c')) --softmmu_ss.add(when: ['CONFIG_VIRTIO_CRYPTO', 'CONFIG_VHOST_CRYPTO'], if_true: files('cryptodev-vhost-user.c')) -+if have_vhost_user_crypto -+ softmmu_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c')) -+endif - softmmu_ss.add(when: 'CONFIG_GIO', if_true: [files('dbus-vmstate.c'), gio]) - softmmu_ss.add(when: 'CONFIG_SGX', if_true: files('hostmem-epc.c')) - -diff --git a/hw/net/meson.build b/hw/net/meson.build -index 685b75badb..ebac261542 100644 ---- a/hw/net/meson.build -+++ b/hw/net/meson.build -@@ -46,8 +46,12 @@ specific_ss.add(when: 'CONFIG_XILINX_ETHLITE', if_true: files('xilinx_ethlite.c' - softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('net_rx_pkt.c')) - specific_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('virtio-net.c')) - --softmmu_ss.add(when: ['CONFIG_VIRTIO_NET', 'CONFIG_VHOST_NET'], if_true: files('vhost_net.c'), if_false: files('vhost_net-stub.c')) --softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost_net-stub.c')) -+if have_vhost_net -+ softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost_net.c'), if_false: files('vhost_net-stub.c')) -+ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost_net-stub.c')) -+else -+ softmmu_ss.add(files('vhost_net-stub.c')) -+endif - - softmmu_ss.add(when: 'CONFIG_ETSEC', if_true: files( - 'fsl_etsec/etsec.c', -diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig -index c144d42f9b..8ca7b3d9d6 100644 ---- a/hw/virtio/Kconfig -+++ b/hw/virtio/Kconfig -@@ -1,6 +1,3 @@ --config VHOST -- bool -- - config VIRTIO - bool - -diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build -index 67dc77e00f..30a832eb4a 100644 ---- a/hw/virtio/meson.build -+++ b/hw/virtio/meson.build -@@ -2,18 +2,22 @@ softmmu_virtio_ss = ss.source_set() - softmmu_virtio_ss.add(files('virtio-bus.c')) - softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c')) - softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c')) --softmmu_virtio_ss.add(when: 'CONFIG_VHOST', if_false: files('vhost-stub.c')) -- --softmmu_ss.add_all(when: 'CONFIG_VIRTIO', if_true: softmmu_virtio_ss) --softmmu_ss.add(when: 'CONFIG_VIRTIO', if_false: files('vhost-stub.c')) -- --softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c')) - - virtio_ss = ss.source_set() - virtio_ss.add(files('virtio.c')) --virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-iova-tree.c')) --virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c')) --virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-shadow-virtqueue.c', 'vhost-vdpa.c')) -+ -+if have_vhost -+ virtio_ss.add(files('vhost.c', 'vhost-backend.c', 'vhost-iova-tree.c')) -+ if have_vhost_user -+ virtio_ss.add(files('vhost-user.c')) -+ endif -+ if have_vhost_vdpa -+ virtio_ss.add(files('vhost-vdpa.c', 'vhost-shadow-virtqueue.c')) -+ endif -+else -+ softmmu_virtio_ss.add(files('vhost-stub.c')) -+endif -+ - virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c')) - virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) - virtio_ss.add(when: ['CONFIG_VIRTIO_CRYPTO', 'CONFIG_VIRTIO_PCI'], if_true: files('virtio-crypto-pci.c')) -@@ -53,3 +57,6 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c')) - virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) - - specific_ss.add_all(when: 'CONFIG_VIRTIO', if_true: virtio_ss) -+softmmu_ss.add_all(when: 'CONFIG_VIRTIO', if_true: softmmu_virtio_ss) -+softmmu_ss.add(when: 'CONFIG_VIRTIO', if_false: files('vhost-stub.c')) -+softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c')) -diff --git a/meson.build b/meson.build -index 735f538497..9ba675f098 100644 ---- a/meson.build -+++ b/meson.build -@@ -305,6 +305,7 @@ have_vhost_kernel = 'CONFIG_VHOST_KERNEL' in config_host - have_vhost_net_user = 'CONFIG_VHOST_NET_USER' in config_host - have_vhost_net_vdpa = 'CONFIG_VHOST_NET_VDPA' in config_host - have_vhost_net = 'CONFIG_VHOST_NET' in config_host -+have_vhost = have_vhost_user or have_vhost_vdpa or have_vhost_kernel - have_vhost_user_crypto = 'CONFIG_VHOST_CRYPTO' in config_host - - # Target-specific libraries and flags -diff --git a/net/meson.build b/net/meson.build -index 847bc2ac85..c965e83b26 100644 ---- a/net/meson.build -+++ b/net/meson.build -@@ -26,10 +26,10 @@ softmmu_ss.add(when: vde, if_true: files('vde.c')) - if have_netmap - softmmu_ss.add(files('netmap.c')) - endif --vhost_user_ss = ss.source_set() --vhost_user_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-user.c'), if_false: files('vhost-user-stub.c')) --softmmu_ss.add_all(when: 'CONFIG_VHOST_NET_USER', if_true: vhost_user_ss) --softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-user-stub.c')) -+if have_vhost_net_user -+ softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-user.c'), if_false: files('vhost-user-stub.c')) -+ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-user-stub.c')) -+endif - - softmmu_ss.add(when: 'CONFIG_LINUX', if_true: files('tap-linux.c')) - softmmu_ss.add(when: 'CONFIG_BSD', if_true: files('tap-bsd.c')) -@@ -40,6 +40,8 @@ if not config_host.has_key('CONFIG_LINUX') and not config_host.has_key('CONFIG_B - endif - softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files(tap_posix)) - softmmu_ss.add(when: 'CONFIG_WIN32', if_true: files('tap-win32.c')) --softmmu_ss.add(when: 'CONFIG_VHOST_NET_VDPA', if_true: files('vhost-vdpa.c')) -+if have_vhost_net_vdpa -+ softmmu_ss.add(files('vhost-vdpa.c')) -+endif - - subdir('can') -diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index 67cd32def1..9f550df900 100644 ---- a/tests/qtest/meson.build -+++ b/tests/qtest/meson.build -@@ -269,7 +269,9 @@ qos_test_ss.add( - if have_virtfs - qos_test_ss.add(files('virtio-9p-test.c')) - endif --qos_test_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user-test.c')) -+if have_vhost_user -+ qos_test_ss.add(files('vhost-user-test.c')) -+endif - if have_tools and have_vhost_user_blk_server - qos_test_ss.add(files('vhost-user-blk-test.c')) - endif --- -2.31.1 - diff --git a/SOURCES/kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch b/SOURCES/kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch deleted file mode 100644 index 0da63bf..0000000 --- a/SOURCES/kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 7c489b54b0bb33445113fbf16e88feb23be68013 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:30 -0300 -Subject: [PATCH 07/18] meson.build: Fix docker-test-build@alpine when - including linux/errqueue.h -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [1/11] f058eb846fcf611d527a1dd3b0cc399cdc17e3ee (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu - -A build error happens in alpine CI when linux/errqueue.h is included -in io/channel-socket.c, due to redefining of 'struct __kernel_timespec': - -=== -ninja: job failed: [...] -In file included from /usr/include/linux/errqueue.h:6, - from ../io/channel-socket.c:29: -/usr/include/linux/time_types.h:7:8: error: redefinition of 'struct __kernel_timespec' - 7 | struct __kernel_timespec { - | ^~~~~~~~~~~~~~~~~ -In file included from /usr/include/liburing.h:19, - from /builds/user/qemu/include/block/aio.h:18, - from /builds/user/qemu/include/io/channel.h:26, - from /builds/user/qemu/include/io/channel-socket.h:24, - from ../io/channel-socket.c:24: -/usr/include/liburing/compat.h:9:8: note: originally defined here - 9 | struct __kernel_timespec { - | ^~~~~~~~~~~~~~~~~ -ninja: subcommand failed -=== - -As above error message suggests, 'struct __kernel_timespec' was already -defined by liburing/compat.h. - -Fix alpine CI by adding test to disable liburing in configure step if a -redefinition happens between linux/errqueue.h and liburing/compat.h. - -[dgilbert: This has been fixed in Alpine issue 13813 and liburing] - -Signed-off-by: Leonardo Bras -Message-Id: <20220513062836.965425-2-leobras@redhat.com> -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 354081d43de44ebd3497fe08f7f0121a5517d528) -Signed-off-by: Leonardo Bras ---- - meson.build | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/meson.build b/meson.build -index 5a7c10e639..13e3323380 100644 ---- a/meson.build -+++ b/meson.build -@@ -471,12 +471,23 @@ if not get_option('linux_aio').auto() or have_block - required: get_option('linux_aio'), - kwargs: static_kwargs) - endif -+ -+linux_io_uring_test = ''' -+ #include -+ #include -+ -+ int main(void) { return 0; }''' -+ - linux_io_uring = not_found - if not get_option('linux_io_uring').auto() or have_block - linux_io_uring = dependency('liburing', version: '>=0.3', - required: get_option('linux_io_uring'), - method: 'pkg-config', kwargs: static_kwargs) -+ if not cc.links(linux_io_uring_test) -+ linux_io_uring = not_found -+ endif - endif -+ - libnfs = not_found - if not get_option('libnfs').auto() or have_block - libnfs = dependency('libnfs', version: '>=1.9.3', --- -2.35.3 - diff --git a/SOURCES/kvm-migration-Add-migrate_use_tls-helper.patch b/SOURCES/kvm-migration-Add-migrate_use_tls-helper.patch deleted file mode 100644 index 0fe0d91..0000000 --- a/SOURCES/kvm-migration-Add-migrate_use_tls-helper.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 828f6c106eedcb7a48e551ffda15af56ff92a899 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:34 -0300 -Subject: [PATCH 11/18] migration: Add migrate_use_tls() helper -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [5/11] 06e945297c3b9c0ce5864885aafcdba1e5746bc2 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu - -A lot of places check parameters.tls_creds in order to evaluate if TLS is -in use, and sometimes call migrate_get_current() just for that test. - -Add new helper function migrate_use_tls() in order to simplify testing -for TLS usage. - -Signed-off-by: Leonardo Bras -Reviewed-by: Juan Quintela -Reviewed-by: Peter Xu -Reviewed-by: Daniel P. Berrangé -Message-Id: <20220513062836.965425-6-leobras@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit d2fafb6a6814a8998607d0baf691265032996a0f) -Signed-off-by: Leonardo Bras ---- - migration/channel.c | 3 +-- - migration/migration.c | 9 +++++++++ - migration/migration.h | 1 + - migration/multifd.c | 5 +---- - 4 files changed, 12 insertions(+), 6 deletions(-) - -diff --git a/migration/channel.c b/migration/channel.c -index c4fc000a1a..086b5c0d8b 100644 ---- a/migration/channel.c -+++ b/migration/channel.c -@@ -38,8 +38,7 @@ void migration_channel_process_incoming(QIOChannel *ioc) - trace_migration_set_incoming_channel( - ioc, object_get_typename(OBJECT(ioc))); - -- if (s->parameters.tls_creds && -- *s->parameters.tls_creds && -+ if (migrate_use_tls() && - !object_dynamic_cast(OBJECT(ioc), - TYPE_QIO_CHANNEL_TLS)) { - migration_tls_channel_process_incoming(s, ioc, &local_err); -diff --git a/migration/migration.c b/migration/migration.c -index 0a6b3b9f4d..d91efb66fe 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2582,6 +2582,15 @@ bool migrate_use_zero_copy_send(void) - } - #endif - -+int migrate_use_tls(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.tls_creds && *s->parameters.tls_creds; -+} -+ - int migrate_use_xbzrle(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index 5bcb7628ef..c2cabb8a14 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -381,6 +381,7 @@ bool migrate_use_zero_copy_send(void); - #else - #define migrate_use_zero_copy_send() (false) - #endif -+int migrate_use_tls(void); - int migrate_use_xbzrle(void); - uint64_t migrate_xbzrle_cache_size(void); - bool migrate_colo_enabled(void); -diff --git a/migration/multifd.c b/migration/multifd.c -index 76b57a7177..43998ad117 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -784,14 +784,11 @@ static bool multifd_channel_connect(MultiFDSendParams *p, - QIOChannel *ioc, - Error *error) - { -- MigrationState *s = migrate_get_current(); -- - trace_multifd_set_outgoing_channel( - ioc, object_get_typename(OBJECT(ioc)), p->tls_hostname, error); - - if (!error) { -- if (s->parameters.tls_creds && -- *s->parameters.tls_creds && -+ if (migrate_use_tls() && - !object_dynamic_cast(OBJECT(ioc), - TYPE_QIO_CHANNEL_TLS)) { - multifd_tls_channel_connect(p, ioc, &error); --- -2.35.3 - diff --git a/SOURCES/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch b/SOURCES/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch deleted file mode 100644 index 206ac3d..0000000 --- a/SOURCES/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch +++ /dev/null @@ -1,250 +0,0 @@ -From d6500340dc3c1152b5efe04ef3daa50c17a55e30 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:33 -0300 -Subject: [PATCH 10/18] migration: Add zero-copy-send parameter for QMP/HMP for - Linux -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [4/11] 514d98d595992c53ff98de750035e080ded8972e (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu - -Add property that allows zero-copy migration of memory pages -on the sending side, and also includes a helper function -migrate_use_zero_copy_send() to check if it's enabled. - -No code is introduced to actually do the migration, but it allow -future implementations to enable/disable this feature. - -On non-Linux builds this parameter is compiled-out. - -Signed-off-by: Leonardo Bras -Reviewed-by: Peter Xu -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Juan Quintela -Acked-by: Markus Armbruster -Message-Id: <20220513062836.965425-5-leobras@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit abb6295b3ace5d17c3a65936913fc346616dbf14) -Signed-off-by: Leonardo Bras ---- - migration/migration.c | 32 ++++++++++++++++++++++++++++++++ - migration/migration.h | 5 +++++ - migration/socket.c | 11 +++++++++-- - monitor/hmp-cmds.c | 6 ++++++ - qapi/migration.json | 24 ++++++++++++++++++++++++ - 5 files changed, 76 insertions(+), 2 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 695f0f2900..0a6b3b9f4d 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -899,6 +899,10 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) - params->multifd_zlib_level = s->parameters.multifd_zlib_level; - params->has_multifd_zstd_level = true; - params->multifd_zstd_level = s->parameters.multifd_zstd_level; -+#ifdef CONFIG_LINUX -+ params->has_zero_copy_send = true; -+ params->zero_copy_send = s->parameters.zero_copy_send; -+#endif - params->has_xbzrle_cache_size = true; - params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; - params->has_max_postcopy_bandwidth = true; -@@ -1555,6 +1559,11 @@ static void migrate_params_test_apply(MigrateSetParameters *params, - if (params->has_multifd_compression) { - dest->multifd_compression = params->multifd_compression; - } -+#ifdef CONFIG_LINUX -+ if (params->has_zero_copy_send) { -+ dest->zero_copy_send = params->zero_copy_send; -+ } -+#endif - if (params->has_xbzrle_cache_size) { - dest->xbzrle_cache_size = params->xbzrle_cache_size; - } -@@ -1667,6 +1676,11 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) - if (params->has_multifd_compression) { - s->parameters.multifd_compression = params->multifd_compression; - } -+#ifdef CONFIG_LINUX -+ if (params->has_zero_copy_send) { -+ s->parameters.zero_copy_send = params->zero_copy_send; -+ } -+#endif - if (params->has_xbzrle_cache_size) { - s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; - xbzrle_cache_resize(params->xbzrle_cache_size, errp); -@@ -2557,6 +2571,17 @@ int migrate_multifd_zstd_level(void) - return s->parameters.multifd_zstd_level; - } - -+#ifdef CONFIG_LINUX -+bool migrate_use_zero_copy_send(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.zero_copy_send; -+} -+#endif -+ - int migrate_use_xbzrle(void) - { - MigrationState *s; -@@ -4200,6 +4225,10 @@ static Property migration_properties[] = { - DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, - parameters.multifd_zstd_level, - DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), -+#ifdef CONFIG_LINUX -+ DEFINE_PROP_BOOL("zero_copy_send", MigrationState, -+ parameters.zero_copy_send, false), -+#endif - DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, - parameters.xbzrle_cache_size, - DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), -@@ -4297,6 +4326,9 @@ static void migration_instance_init(Object *obj) - params->has_multifd_compression = true; - params->has_multifd_zlib_level = true; - params->has_multifd_zstd_level = true; -+#ifdef CONFIG_LINUX -+ params->has_zero_copy_send = true; -+#endif - params->has_xbzrle_cache_size = true; - params->has_max_postcopy_bandwidth = true; - params->has_max_cpu_throttle = true; -diff --git a/migration/migration.h b/migration/migration.h -index 2de861df01..5bcb7628ef 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -376,6 +376,11 @@ MultiFDCompression migrate_multifd_compression(void); - int migrate_multifd_zlib_level(void); - int migrate_multifd_zstd_level(void); - -+#ifdef CONFIG_LINUX -+bool migrate_use_zero_copy_send(void); -+#else -+#define migrate_use_zero_copy_send() (false) -+#endif - int migrate_use_xbzrle(void); - uint64_t migrate_xbzrle_cache_size(void); - bool migrate_colo_enabled(void); -diff --git a/migration/socket.c b/migration/socket.c -index 05705a32d8..3754d8f72c 100644 ---- a/migration/socket.c -+++ b/migration/socket.c -@@ -74,9 +74,16 @@ static void socket_outgoing_migration(QIOTask *task, - - if (qio_task_propagate_error(task, &err)) { - trace_migration_socket_outgoing_error(error_get_pretty(err)); -- } else { -- trace_migration_socket_outgoing_connected(data->hostname); -+ goto out; - } -+ -+ trace_migration_socket_outgoing_connected(data->hostname); -+ -+ if (migrate_use_zero_copy_send()) { -+ error_setg(&err, "Zero copy send not available in migration"); -+ } -+ -+out: - migration_channel_connect(data->s, sioc, data->hostname, err); - object_unref(OBJECT(sioc)); - } -diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c -index 634968498b..55b48d3733 100644 ---- a/monitor/hmp-cmds.c -+++ b/monitor/hmp-cmds.c -@@ -1309,6 +1309,12 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) - p->has_multifd_zstd_level = true; - visit_type_uint8(v, param, &p->multifd_zstd_level, &err); - break; -+#ifdef CONFIG_LINUX -+ case MIGRATION_PARAMETER_ZERO_COPY_SEND: -+ p->has_zero_copy_send = true; -+ visit_type_bool(v, param, &p->zero_copy_send, &err); -+ break; -+#endif - case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE: - p->has_xbzrle_cache_size = true; - if (!visit_type_size(v, param, &cache_size, &err)) { -diff --git a/qapi/migration.json b/qapi/migration.json -index 27d7b28158..4d833ecdd6 100644 ---- a/qapi/migration.json -+++ b/qapi/migration.json -@@ -741,6 +741,13 @@ - # will consume more CPU. - # Defaults to 1. (Since 5.0) - # -+# @zero-copy-send: Controls behavior on sending memory pages on migration. -+# When true, enables a zero-copy mechanism for sending -+# memory pages, if host supports it. -+# Requires that QEMU be permitted to use locked memory -+# for guest RAM pages. -+# Defaults to false. (Since 7.1) -+# - # @block-bitmap-mapping: Maps block nodes and bitmaps on them to - # aliases for the purpose of dirty bitmap migration. Such - # aliases may for example be the corresponding names on the -@@ -780,6 +787,7 @@ - 'xbzrle-cache-size', 'max-postcopy-bandwidth', - 'max-cpu-throttle', 'multifd-compression', - 'multifd-zlib-level' ,'multifd-zstd-level', -+ { 'name': 'zero-copy-send', 'if' : 'CONFIG_LINUX'}, - 'block-bitmap-mapping' ] } - - ## -@@ -906,6 +914,13 @@ - # will consume more CPU. - # Defaults to 1. (Since 5.0) - # -+# @zero-copy-send: Controls behavior on sending memory pages on migration. -+# When true, enables a zero-copy mechanism for sending -+# memory pages, if host supports it. -+# Requires that QEMU be permitted to use locked memory -+# for guest RAM pages. -+# Defaults to false. (Since 7.1) -+# - # @block-bitmap-mapping: Maps block nodes and bitmaps on them to - # aliases for the purpose of dirty bitmap migration. Such - # aliases may for example be the corresponding names on the -@@ -960,6 +975,7 @@ - '*multifd-compression': 'MultiFDCompression', - '*multifd-zlib-level': 'uint8', - '*multifd-zstd-level': 'uint8', -+ '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, - '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } - - ## -@@ -1106,6 +1122,13 @@ - # will consume more CPU. - # Defaults to 1. (Since 5.0) - # -+# @zero-copy-send: Controls behavior on sending memory pages on migration. -+# When true, enables a zero-copy mechanism for sending -+# memory pages, if host supports it. -+# Requires that QEMU be permitted to use locked memory -+# for guest RAM pages. -+# Defaults to false. (Since 7.1) -+# - # @block-bitmap-mapping: Maps block nodes and bitmaps on them to - # aliases for the purpose of dirty bitmap migration. Such - # aliases may for example be the corresponding names on the -@@ -1158,6 +1181,7 @@ - '*multifd-compression': 'MultiFDCompression', - '*multifd-zlib-level': 'uint8', - '*multifd-zstd-level': 'uint8', -+ '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, - '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } - - ## --- -2.35.3 - diff --git a/SOURCES/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch b/SOURCES/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch deleted file mode 100644 index 29dc0ea..0000000 --- a/SOURCES/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch +++ /dev/null @@ -1,98 +0,0 @@ -From fd6f516a94e635bc42e58448f314db575814a834 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Thu, 31 Mar 2022 11:08:45 -0400 -Subject: [PATCH 18/18] migration: Allow migrate-recover to run multiple times -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 104: migration: Allow migrate-recover to run multiple times -RH-Commit: [1/1] afd726e54c069ae800e2d01f34e768d6bac7dcb9 (peterx/qemu-kvm) -RH-Bugzilla: 2096143 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Hanna Reitz -RH-Acked-by: Dr. David Alan Gilbert - -Previously migration didn't have an easy way to cleanup the listening -transport, migrate recovery only allows to execute once. That's done with a -trick flag in postcopy_recover_triggered. - -Now the facility is already there. - -Drop postcopy_recover_triggered and instead allows a new migrate-recover to -release the previous listener transport. - -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Peter Xu -Message-Id: <20220331150857.74406-8-peterx@redhat.com> -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 08401c0426bc1a5ce4609afd1cda5dd39abbf9fa) -Signed-off-by: Peter Xu ---- - migration/migration.c | 13 ++----------- - migration/migration.h | 1 - - migration/savevm.c | 3 --- - 3 files changed, 2 insertions(+), 15 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 2a141bfaf3..8fb3eae910 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2166,11 +2166,8 @@ void qmp_migrate_recover(const char *uri, Error **errp) - return; - } - -- if (qatomic_cmpxchg(&mis->postcopy_recover_triggered, -- false, true) == true) { -- error_setg(errp, "Migrate recovery is triggered already"); -- return; -- } -+ /* If there's an existing transport, release it */ -+ migration_incoming_transport_cleanup(mis); - - /* - * Note that this call will never start a real migration; it will -@@ -2178,12 +2175,6 @@ void qmp_migrate_recover(const char *uri, Error **errp) - * to continue using that newly established channel. - */ - qemu_start_incoming_migration(uri, errp); -- -- /* Safe to dereference with the assert above */ -- if (*errp) { -- /* Reset the flag so user could still retry */ -- qatomic_set(&mis->postcopy_recover_triggered, false); -- } - } - - void qmp_migrate_pause(Error **errp) -diff --git a/migration/migration.h b/migration/migration.h -index c2cabb8a14..fbc8690ec8 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -139,7 +139,6 @@ struct MigrationIncomingState { - struct PostcopyBlocktimeContext *blocktime_ctx; - - /* notify PAUSED postcopy incoming migrations to try to continue */ -- bool postcopy_recover_triggered; - QemuSemaphore postcopy_pause_sem_dst; - QemuSemaphore postcopy_pause_sem_fault; - -diff --git a/migration/savevm.c b/migration/savevm.c -index 02ed94c180..d9076897b8 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -2589,9 +2589,6 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis) - - assert(migrate_postcopy_ram()); - -- /* Clear the triggered bit to allow one recovery */ -- mis->postcopy_recover_triggered = false; -- - /* - * Unregister yank with either from/to src would work, since ioc behind it - * is the same --- -2.35.3 - diff --git a/SOURCES/kvm-migration-Avoid-false-positive-on-non-supported-scen.patch b/SOURCES/kvm-migration-Avoid-false-positive-on-non-supported-scen.patch deleted file mode 100644 index 9f440eb..0000000 --- a/SOURCES/kvm-migration-Avoid-false-positive-on-non-supported-scen.patch +++ /dev/null @@ -1,93 +0,0 @@ -From 0753565af588dfa78b3529e359b1590e15fcbdb3 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Tue, 19 Jul 2022 09:23:45 -0300 -Subject: [PATCH 04/11] migration: Avoid false-positive on non-supported - scenarios for zero-copy-send -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 111: zero-copy-send fixes & improvements -RH-Commit: [4/6] f5c7ed6710d92668acb81d0118a71fab0b4e3d43 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 2107466 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Thomas Huth -RH-Acked-by: Dr. David Alan Gilbert - -Migration with zero-copy-send currently has it's limitations, as it can't -be used with TLS nor any kind of compression. In such scenarios, it should -output errors during parameter / capability setting. - -But currently there are some ways of setting this not-supported scenarios -without printing the error message: - -!) For 'compression' capability, it works by enabling it together with -zero-copy-send. This happens because the validity test for zero-copy uses -the helper unction migrate_use_compression(), which check for compression -presence in s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS]. - -The point here is: the validity test happens before the capability gets -enabled. If all of them get enabled together, this test will not return -error. - -In order to fix that, replace migrate_use_compression() by directly testing -the cap_list parameter migrate_caps_check(). - -2) For features enabled by parameters such as TLS & 'multifd_compression', -there was also a possibility of setting non-supported scenarios: setting -zero-copy-send first, then setting the unsupported parameter. - -In order to fix that, also add a check for parameters conflicting with -zero-copy-send on migrate_params_check(). - -3) XBZRLE is also a compression capability, so it makes sense to also add -it to the list of capabilities which are not supported with zero-copy-send. - -Fixes: 1abaec9a1b2c ("migration: Change zero_copy_send from migration parameter to migration capability") -Signed-off-by: Leonardo Bras -Message-Id: <20220719122345.253713-1-leobras@redhat.com> -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 90eb69e4f1a16b388d0483543bf6bfc69a9966e4) -Signed-off-by: Leonardo Bras ---- - migration/migration.c | 15 ++++++++++++++- - 1 file changed, 14 insertions(+), 1 deletion(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 3a3a7a4a50..343629d59c 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1265,7 +1265,9 @@ static bool migrate_caps_check(bool *cap_list, - #ifdef CONFIG_LINUX - if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && - (!cap_list[MIGRATION_CAPABILITY_MULTIFD] || -- migrate_use_compression() || -+ cap_list[MIGRATION_CAPABILITY_COMPRESS] || -+ cap_list[MIGRATION_CAPABILITY_XBZRLE] || -+ migrate_multifd_compression() || - migrate_use_tls())) { - error_setg(errp, - "Zero copy only available for non-compressed non-TLS multifd migration"); -@@ -1502,6 +1504,17 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) - error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); - return false; - } -+ -+#ifdef CONFIG_LINUX -+ if (migrate_use_zero_copy_send() && -+ ((params->has_multifd_compression && params->multifd_compression) || -+ (params->has_tls_creds && params->tls_creds && *params->tls_creds))) { -+ error_setg(errp, -+ "Zero copy only available for non-compressed non-TLS multifd migration"); -+ return false; -+ } -+#endif -+ - return true; - } - --- -2.31.1 - diff --git a/SOURCES/kvm-migration-Change-zero_copy_send-from-migration-param.patch b/SOURCES/kvm-migration-Change-zero_copy_send-from-migration-param.patch deleted file mode 100644 index abeeeb6..0000000 --- a/SOURCES/kvm-migration-Change-zero_copy_send-from-migration-param.patch +++ /dev/null @@ -1,289 +0,0 @@ -From 7e2a037f3f349c21201152cecce32d8c8ff0bea0 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Mon, 20 Jun 2022 02:39:45 -0300 -Subject: [PATCH 17/18] migration: Change zero_copy_send from migration - parameter to migration capability -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [11/11] e4a955607947896a49398ac8400241a0adac51a1 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu - -When originally implemented, zero_copy_send was designed as a Migration -paramenter. - -But taking into account how is that supposed to work, and how -the difference between a capability and a parameter, it only makes sense -that zero-copy-send would work better as a capability. - -Taking into account how recently the change got merged, it was decided -that it's still time to make it right, and convert zero_copy_send into -a Migration capability. - -Signed-off-by: Leonardo Bras -Reviewed-by: Juan Quintela -Acked-by: Markus Armbruster -Acked-by: Peter Xu -Signed-off-by: Juan Quintela -Signed-off-by: Dr. David Alan Gilbert - dgilbert: always define the capability, even on non-Linux but error if -set; avoids build problems with the capability -(cherry picked from commit 1abaec9a1b2c23f7aa94709a422128d9e42c3e0b) -Signed-off-by: Leonardo Bras ---- - migration/migration.c | 58 +++++++++++++++++++------------------------ - monitor/hmp-cmds.c | 6 ----- - qapi/migration.json | 33 +++++++----------------- - 3 files changed, 34 insertions(+), 63 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 102236fba0..2a141bfaf3 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -163,7 +163,8 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, - MIGRATION_CAPABILITY_COMPRESS, - MIGRATION_CAPABILITY_XBZRLE, - MIGRATION_CAPABILITY_X_COLO, -- MIGRATION_CAPABILITY_VALIDATE_UUID); -+ MIGRATION_CAPABILITY_VALIDATE_UUID, -+ MIGRATION_CAPABILITY_ZERO_COPY_SEND); - - /* When we add fault tolerance, we could have several - migrations at once. For now we don't need to add -@@ -899,10 +900,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) - params->multifd_zlib_level = s->parameters.multifd_zlib_level; - params->has_multifd_zstd_level = true; - params->multifd_zstd_level = s->parameters.multifd_zstd_level; --#ifdef CONFIG_LINUX -- params->has_zero_copy_send = true; -- params->zero_copy_send = s->parameters.zero_copy_send; --#endif - params->has_xbzrle_cache_size = true; - params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; - params->has_max_postcopy_bandwidth = true; -@@ -1263,6 +1260,24 @@ static bool migrate_caps_check(bool *cap_list, - } - } - -+#ifdef CONFIG_LINUX -+ if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && -+ (!cap_list[MIGRATION_CAPABILITY_MULTIFD] || -+ migrate_use_compression() || -+ migrate_use_tls())) { -+ error_setg(errp, -+ "Zero copy only available for non-compressed non-TLS multifd migration"); -+ return false; -+ } -+#else -+ if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { -+ error_setg(errp, -+ "Zero copy currently only available on Linux"); -+ return false; -+ } -+#endif -+ -+ - /* incoming side only */ - if (runstate_check(RUN_STATE_INMIGRATE) && - !migrate_multifd_is_allowed() && -@@ -1485,16 +1500,6 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) - error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); - return false; - } --#ifdef CONFIG_LINUX -- if (params->zero_copy_send && -- (!migrate_use_multifd() || -- params->multifd_compression != MULTIFD_COMPRESSION_NONE || -- (params->tls_creds && *params->tls_creds))) { -- error_setg(errp, -- "Zero copy only available for non-compressed non-TLS multifd migration"); -- return false; -- } --#endif - return true; - } - -@@ -1568,11 +1573,6 @@ static void migrate_params_test_apply(MigrateSetParameters *params, - if (params->has_multifd_compression) { - dest->multifd_compression = params->multifd_compression; - } --#ifdef CONFIG_LINUX -- if (params->has_zero_copy_send) { -- dest->zero_copy_send = params->zero_copy_send; -- } --#endif - if (params->has_xbzrle_cache_size) { - dest->xbzrle_cache_size = params->xbzrle_cache_size; - } -@@ -1685,11 +1685,6 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) - if (params->has_multifd_compression) { - s->parameters.multifd_compression = params->multifd_compression; - } --#ifdef CONFIG_LINUX -- if (params->has_zero_copy_send) { -- s->parameters.zero_copy_send = params->zero_copy_send; -- } --#endif - if (params->has_xbzrle_cache_size) { - s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; - xbzrle_cache_resize(params->xbzrle_cache_size, errp); -@@ -2587,7 +2582,7 @@ bool migrate_use_zero_copy_send(void) - - s = migrate_get_current(); - -- return s->parameters.zero_copy_send; -+ return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; - } - #endif - -@@ -4243,10 +4238,6 @@ static Property migration_properties[] = { - DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, - parameters.multifd_zstd_level, - DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), --#ifdef CONFIG_LINUX -- DEFINE_PROP_BOOL("zero_copy_send", MigrationState, -- parameters.zero_copy_send, false), --#endif - DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, - parameters.xbzrle_cache_size, - DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), -@@ -4284,6 +4275,10 @@ static Property migration_properties[] = { - DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), - DEFINE_PROP_MIG_CAP("x-background-snapshot", - MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), -+#ifdef CONFIG_LINUX -+ DEFINE_PROP_MIG_CAP("x-zero-copy-send", -+ MIGRATION_CAPABILITY_ZERO_COPY_SEND), -+#endif - - DEFINE_PROP_END_OF_LIST(), - }; -@@ -4344,9 +4339,6 @@ static void migration_instance_init(Object *obj) - params->has_multifd_compression = true; - params->has_multifd_zlib_level = true; - params->has_multifd_zstd_level = true; --#ifdef CONFIG_LINUX -- params->has_zero_copy_send = true; --#endif - params->has_xbzrle_cache_size = true; - params->has_max_postcopy_bandwidth = true; - params->has_max_cpu_throttle = true; -diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c -index 55b48d3733..634968498b 100644 ---- a/monitor/hmp-cmds.c -+++ b/monitor/hmp-cmds.c -@@ -1309,12 +1309,6 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) - p->has_multifd_zstd_level = true; - visit_type_uint8(v, param, &p->multifd_zstd_level, &err); - break; --#ifdef CONFIG_LINUX -- case MIGRATION_PARAMETER_ZERO_COPY_SEND: -- p->has_zero_copy_send = true; -- visit_type_bool(v, param, &p->zero_copy_send, &err); -- break; --#endif - case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE: - p->has_xbzrle_cache_size = true; - if (!visit_type_size(v, param, &cache_size, &err)) { -diff --git a/qapi/migration.json b/qapi/migration.json -index 4d833ecdd6..5105790cd0 100644 ---- a/qapi/migration.json -+++ b/qapi/migration.json -@@ -463,6 +463,13 @@ - # procedure starts. The VM RAM is saved with running VM. - # (since 6.0) - # -+# @zero-copy-send: Controls behavior on sending memory pages on migration. -+# When true, enables a zero-copy mechanism for sending -+# memory pages, if host supports it. -+# Requires that QEMU be permitted to use locked memory -+# for guest RAM pages. -+# (since 7.1) -+# - # Features: - # @unstable: Members @x-colo and @x-ignore-shared are experimental. - # -@@ -476,7 +483,8 @@ - 'block', 'return-path', 'pause-before-switchover', 'multifd', - 'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate', - { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, -- 'validate-uuid', 'background-snapshot'] } -+ 'validate-uuid', 'background-snapshot', -+ 'zero-copy-send'] } - - ## - # @MigrationCapabilityStatus: -@@ -741,12 +749,6 @@ - # will consume more CPU. - # Defaults to 1. (Since 5.0) - # --# @zero-copy-send: Controls behavior on sending memory pages on migration. --# When true, enables a zero-copy mechanism for sending --# memory pages, if host supports it. --# Requires that QEMU be permitted to use locked memory --# for guest RAM pages. --# Defaults to false. (Since 7.1) - # - # @block-bitmap-mapping: Maps block nodes and bitmaps on them to - # aliases for the purpose of dirty bitmap migration. Such -@@ -787,7 +789,6 @@ - 'xbzrle-cache-size', 'max-postcopy-bandwidth', - 'max-cpu-throttle', 'multifd-compression', - 'multifd-zlib-level' ,'multifd-zstd-level', -- { 'name': 'zero-copy-send', 'if' : 'CONFIG_LINUX'}, - 'block-bitmap-mapping' ] } - - ## -@@ -914,13 +915,6 @@ - # will consume more CPU. - # Defaults to 1. (Since 5.0) - # --# @zero-copy-send: Controls behavior on sending memory pages on migration. --# When true, enables a zero-copy mechanism for sending --# memory pages, if host supports it. --# Requires that QEMU be permitted to use locked memory --# for guest RAM pages. --# Defaults to false. (Since 7.1) --# - # @block-bitmap-mapping: Maps block nodes and bitmaps on them to - # aliases for the purpose of dirty bitmap migration. Such - # aliases may for example be the corresponding names on the -@@ -975,7 +969,6 @@ - '*multifd-compression': 'MultiFDCompression', - '*multifd-zlib-level': 'uint8', - '*multifd-zstd-level': 'uint8', -- '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, - '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } - - ## -@@ -1122,13 +1115,6 @@ - # will consume more CPU. - # Defaults to 1. (Since 5.0) - # --# @zero-copy-send: Controls behavior on sending memory pages on migration. --# When true, enables a zero-copy mechanism for sending --# memory pages, if host supports it. --# Requires that QEMU be permitted to use locked memory --# for guest RAM pages. --# Defaults to false. (Since 7.1) --# - # @block-bitmap-mapping: Maps block nodes and bitmaps on them to - # aliases for the purpose of dirty bitmap migration. Such - # aliases may for example be the corresponding names on the -@@ -1181,7 +1167,6 @@ - '*multifd-compression': 'MultiFDCompression', - '*multifd-zlib-level': 'uint8', - '*multifd-zstd-level': 'uint8', -- '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, - '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } - - ## --- -2.35.3 - diff --git a/SOURCES/kvm-migration-Fix-operator-type.patch b/SOURCES/kvm-migration-Fix-operator-type.patch deleted file mode 100644 index f6a462a..0000000 --- a/SOURCES/kvm-migration-Fix-operator-type.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 4bd48e784ae0c38c89f1a944b06c997fd28c4d37 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Thu, 19 May 2022 04:15:33 -0400 -Subject: [PATCH 16/16] migration: Fix operator type -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 92: Fix build using clang 14 -RH-Commit: [1/1] ad9980e64cf2e39085d68f1ff601444bf2afe228 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 2064530 -RH-Acked-by: Daniel P. Berrangé -RH-Acked-by: Dr. David Alan Gilbert - -Clang spotted an & that should have been an &&; fix it. - -Reported by: David Binderman / https://gitlab.com/dcb -Fixes: 65dacaa04fa ("migration: introduce save_normal_page()") -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/963 -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20220406102515.96320-1-dgilbert@redhat.com> -Reviewed-by: Peter Maydell -Reviewed-by: Peter Xu -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit f912ec5b2d65644116ff496b58d7c9145c19e4c0) -Signed-off-by: Miroslav Rezanina ---- - migration/ram.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 3532f64ecb..0ef4bd63eb 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1289,7 +1289,7 @@ static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, - offset | RAM_SAVE_FLAG_PAGE)); - if (async) { - qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE, -- migrate_release_ram() & -+ migrate_release_ram() && - migration_in_postcopy()); - } else { - qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE); --- -2.31.1 - diff --git a/SOURCES/kvm-migration-Read-state-once.patch b/SOURCES/kvm-migration-Read-state-once.patch deleted file mode 100644 index 44f2a36..0000000 --- a/SOURCES/kvm-migration-Read-state-once.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 0a808f2304731f2108b29c3c6777cdd966a03beb Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Wed, 13 Apr 2022 12:33:29 +0100 -Subject: [PATCH] migration: Read state once - -RH-Author: Dr. David Alan Gilbert -RH-MergeRequest: 255: migration: Read state once -RH-Bugzilla: 2168221 -RH-Acked-by: quintela1 -RH-Acked-by: Peter Xu -RH-Acked-by: Stefano Garzarella -RH-Commit: [1/1] 215b2009145df37a2caee525991021ce9325686a - -The 'status' field for the migration is updated normally using -an atomic operation from the migration thread. -Most readers of it aren't that careful, and in most cases it doesn't -matter. - -In query_migrate->fill_source_migration_info the 'state' -is read twice; the first time to decide which state fields to fill in, -and then secondly to copy the state to the status field; that can end up -with a status that's inconsistent; e.g. setting up the fields -for 'setup' and then having an 'active' status. In that case -libvirt gets upset by the lack of ram info. -The symptom is: - libvirt.libvirtError: internal error: migration was active, but no RAM info was set - -Read the state exactly once in fill_source_migration_info. - -This is a possible fix for: -https://bugzilla.redhat.com/show_bug.cgi?id=2074205 - -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20220413113329.103696-1-dgilbert@redhat.com> -Reviewed-by: Juan Quintela -Reviewed-by: Peter Xu -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 552de79bfdd5e9e53847eb3c6d6e4cd898a4370e) ---- - migration/migration.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 5e78028df4..e417d40c44 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1076,6 +1076,7 @@ static void populate_disk_info(MigrationInfo *info) - static void fill_source_migration_info(MigrationInfo *info) - { - MigrationState *s = migrate_get_current(); -+ int state = qatomic_read(&s->state); - GSList *cur_blocker = migration_blockers; - - info->blocked_reasons = NULL; -@@ -1095,7 +1096,7 @@ static void fill_source_migration_info(MigrationInfo *info) - } - info->has_blocked_reasons = info->blocked_reasons != NULL; - -- switch (s->state) { -+ switch (state) { - case MIGRATION_STATUS_NONE: - /* no migration has happened ever */ - /* do not overwrite destination migration status */ -@@ -1140,7 +1141,7 @@ static void fill_source_migration_info(MigrationInfo *info) - info->has_status = true; - break; - } -- info->status = s->state; -+ info->status = state; - } - - typedef enum WriteTrackingSupport { --- -2.31.1 - diff --git a/SOURCES/kvm-migration-add-remaining-params-has_-true-in-migratio.patch b/SOURCES/kvm-migration-add-remaining-params-has_-true-in-migratio.patch deleted file mode 100644 index bcaff3b..0000000 --- a/SOURCES/kvm-migration-add-remaining-params-has_-true-in-migratio.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 9698c0e8dd9b4f5dbc237a3f98ac46297dac85fb Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Mon, 25 Jul 2022 22:02:35 -0300 -Subject: [PATCH 05/11] migration: add remaining params->has_* = true in - migration_instance_init() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 111: zero-copy-send fixes & improvements -RH-Commit: [5/6] 50bbad254e2356b3ae16f6e00a3db8fd0b22dde9 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 2107466 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Thomas Huth -RH-Acked-by: Dr. David Alan Gilbert - -Some of params->has_* = true are missing in migration_instance_init, this -causes migrate_params_check() to skip some tests, allowing some -unsupported scenarios. - -Fix this by adding all missing params->has_* = true in -migration_instance_init(). - -Fixes: 69ef1f36b0 ("migration: define 'tls-creds' and 'tls-hostname' migration parameters") -Fixes: 1d58872a91 ("migration: do not wait for free thread") -Fixes: d2f1d29b95 ("migration: add support for a "tls-authz" migration parameter") -Signed-off-by: Leonardo Bras -Message-Id: <20220726010235.342927-1-leobras@redhat.com> -Reviewed-by: Peter Xu -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit df67aa3e61e2c83459da7d815962d9706f1528fc) -Signed-off-by: Leonardo Bras ---- - migration/migration.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/migration/migration.c b/migration/migration.c -index 343629d59c..5e78028df4 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -4332,6 +4332,7 @@ static void migration_instance_init(Object *obj) - /* Set has_* up only for parameter checks */ - params->has_compress_level = true; - params->has_compress_threads = true; -+ params->has_compress_wait_thread = true; - params->has_decompress_threads = true; - params->has_throttle_trigger_threshold = true; - params->has_cpu_throttle_initial = true; -@@ -4352,6 +4353,9 @@ static void migration_instance_init(Object *obj) - params->has_announce_max = true; - params->has_announce_rounds = true; - params->has_announce_step = true; -+ params->has_tls_creds = true; -+ params->has_tls_hostname = true; -+ params->has_tls_authz = true; - - qemu_sem_init(&ms->postcopy_pause_sem, 0); - qemu_sem_init(&ms->postcopy_pause_rp_sem, 0); --- -2.31.1 - diff --git a/SOURCES/kvm-migration-check-magic-value-for-deciding-the-mapping.patch b/SOURCES/kvm-migration-check-magic-value-for-deciding-the-mapping.patch new file mode 100644 index 0000000..387d0b3 --- /dev/null +++ b/SOURCES/kvm-migration-check-magic-value-for-deciding-the-mapping.patch @@ -0,0 +1,330 @@ +From 29eee1fbb84c0e2f0ece9e6d996afa7238ed2912 Mon Sep 17 00:00:00 2001 +From: "manish.mishra" +Date: Tue, 20 Dec 2022 18:44:18 +0000 +Subject: [PATCH 7/8] migration: check magic value for deciding the mapping of + channels +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 150: migration: Fix multifd crash on channel disorders +RH-Bugzilla: 2169732 +RH-Acked-by: quintela1 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Dr. David Alan Gilbert +RH-Commit: [2/2] 4fb9408478923415a91fe0527bf4b1a0f022f329 (peterx/qemu-kvm) + +Current logic assumes that channel connections on the destination side are +always established in the same order as the source and the first one will +always be the main channel followed by the multifid or post-copy +preemption channel. This may not be always true, as even if a channel has a +connection established on the source side it can be in the pending state on +the destination side and a newer connection can be established first. +Basically causing out of order mapping of channels on the destination side. +Currently, all channels except post-copy preempt send a magic number, this +patch uses that magic number to decide the type of channel. This logic is +applicable only for precopy(multifd) live migration, as mentioned, the +post-copy preempt channel does not send any magic number. Also, tls live +migrations already does tls handshake before creating other channels, so +this issue is not possible with tls, hence this logic is avoided for tls +live migrations. This patch uses read peek to check the magic number of +channels so that current data/control stream management remains +un-effected. + +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrange +Reviewed-by: Juan Quintela +Suggested-by: Daniel P. Berrange +Signed-off-by: manish.mishra +Signed-off-by: Juan Quintela +(cherry picked from commit 6720c2b32725e6ac404f22851a0ecd0a71d0cbe2) +Signed-off-by: Peter Xu +--- + migration/channel.c | 45 +++++++++++++++++++++++++++++++++ + migration/channel.h | 5 ++++ + migration/migration.c | 54 ++++++++++++++++++++++++++++------------ + migration/multifd.c | 19 +++++++------- + migration/multifd.h | 2 +- + migration/postcopy-ram.c | 5 +--- + migration/postcopy-ram.h | 2 +- + 7 files changed, 101 insertions(+), 31 deletions(-) + +diff --git a/migration/channel.c b/migration/channel.c +index 1b0815039f..ca3319a309 100644 +--- a/migration/channel.c ++++ b/migration/channel.c +@@ -92,3 +92,48 @@ void migration_channel_connect(MigrationState *s, + migrate_fd_connect(s, error); + error_free(error); + } ++ ++ ++/** ++ * @migration_channel_read_peek - Peek at migration channel, without ++ * actually removing it from channel buffer. ++ * ++ * @ioc: the channel object ++ * @buf: the memory region to read data into ++ * @buflen: the number of bytes to read in @buf ++ * @errp: pointer to a NULL-initialized error object ++ * ++ * Returns 0 if successful, returns -1 and sets @errp if fails. ++ */ ++int migration_channel_read_peek(QIOChannel *ioc, ++ const char *buf, ++ const size_t buflen, ++ Error **errp) ++{ ++ ssize_t len = 0; ++ struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen }; ++ ++ while (true) { ++ len = qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, ++ QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp); ++ ++ if (len <= 0 && len != QIO_CHANNEL_ERR_BLOCK) { ++ error_setg(errp, ++ "Failed to peek at channel"); ++ return -1; ++ } ++ ++ if (len == buflen) { ++ break; ++ } ++ ++ /* 1ms sleep. */ ++ if (qemu_in_coroutine()) { ++ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000); ++ } else { ++ g_usleep(1000); ++ } ++ } ++ ++ return 0; ++} +diff --git a/migration/channel.h b/migration/channel.h +index 67a461c28a..5bdb8208a7 100644 +--- a/migration/channel.h ++++ b/migration/channel.h +@@ -24,4 +24,9 @@ void migration_channel_connect(MigrationState *s, + QIOChannel *ioc, + const char *hostname, + Error *error_in); ++ ++int migration_channel_read_peek(QIOChannel *ioc, ++ const char *buf, ++ const size_t buflen, ++ Error **errp); + #endif +diff --git a/migration/migration.c b/migration/migration.c +index f485eea5fb..593dbd25de 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -31,6 +31,7 @@ + #include "migration.h" + #include "savevm.h" + #include "qemu-file.h" ++#include "channel.h" + #include "migration/vmstate.h" + #include "block/block.h" + #include "qapi/error.h" +@@ -663,10 +664,6 @@ static bool migration_incoming_setup(QEMUFile *f, Error **errp) + { + MigrationIncomingState *mis = migration_incoming_get_current(); + +- if (multifd_load_setup(errp) != 0) { +- return false; +- } +- + if (!mis->from_src_file) { + mis->from_src_file = f; + } +@@ -733,31 +730,56 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) + { + MigrationIncomingState *mis = migration_incoming_get_current(); + Error *local_err = NULL; +- bool start_migration; + QEMUFile *f; ++ bool default_channel = true; ++ uint32_t channel_magic = 0; ++ int ret = 0; + +- if (!mis->from_src_file) { +- /* The first connection (multifd may have multiple) */ ++ if (migrate_use_multifd() && !migrate_postcopy_ram() && ++ qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { ++ /* ++ * With multiple channels, it is possible that we receive channels ++ * out of order on destination side, causing incorrect mapping of ++ * source channels on destination side. Check channel MAGIC to ++ * decide type of channel. Please note this is best effort, postcopy ++ * preempt channel does not send any magic number so avoid it for ++ * postcopy live migration. Also tls live migration already does ++ * tls handshake while initializing main channel so with tls this ++ * issue is not possible. ++ */ ++ ret = migration_channel_read_peek(ioc, (void *)&channel_magic, ++ sizeof(channel_magic), &local_err); ++ ++ if (ret != 0) { ++ error_propagate(errp, local_err); ++ return; ++ } ++ ++ default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)); ++ } else { ++ default_channel = !mis->from_src_file; ++ } ++ ++ if (multifd_load_setup(errp) != 0) { ++ error_setg(errp, "Failed to setup multifd channels"); ++ return; ++ } ++ ++ if (default_channel) { + f = qemu_file_new_input(ioc); + + if (!migration_incoming_setup(f, errp)) { + return; + } +- +- /* +- * Common migration only needs one channel, so we can start +- * right now. Some features need more than one channel, we wait. +- */ +- start_migration = !migration_needs_multiple_sockets(); + } else { + /* Multiple connections */ + assert(migration_needs_multiple_sockets()); + if (migrate_use_multifd()) { +- start_migration = multifd_recv_new_channel(ioc, &local_err); ++ multifd_recv_new_channel(ioc, &local_err); + } else { + assert(migrate_postcopy_preempt()); + f = qemu_file_new_input(ioc); +- start_migration = postcopy_preempt_new_channel(mis, f); ++ postcopy_preempt_new_channel(mis, f); + } + if (local_err) { + error_propagate(errp, local_err); +@@ -765,7 +787,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) + } + } + +- if (start_migration) { ++ if (migration_has_all_channels()) { + /* If it's a recovery, we're done */ + if (postcopy_try_recover()) { + return; +diff --git a/migration/multifd.c b/migration/multifd.c +index 509bbbe3bf..c3385529cf 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -1167,9 +1167,14 @@ int multifd_load_setup(Error **errp) + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); + uint8_t i; + +- if (!migrate_use_multifd()) { ++ /* ++ * Return successfully if multiFD recv state is already initialised ++ * or multiFD is not enabled. ++ */ ++ if (multifd_recv_state || !migrate_use_multifd()) { + return 0; + } ++ + if (!migrate_multi_channels_is_allowed()) { + error_setg(errp, "multifd is not supported by current protocol"); + return -1; +@@ -1228,11 +1233,9 @@ bool multifd_recv_all_channels_created(void) + + /* + * Try to receive all multifd channels to get ready for the migration. +- * - Return true and do not set @errp when correctly receiving all channels; +- * - Return false and do not set @errp when correctly receiving the current one; +- * - Return false and set @errp when failing to receive the current channel. ++ * Sets @errp when failing to receive the current channel. + */ +-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) ++void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + { + MultiFDRecvParams *p; + Error *local_err = NULL; +@@ -1245,7 +1248,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + "failed to receive packet" + " via multifd channel %d: ", + qatomic_read(&multifd_recv_state->count)); +- return false; ++ return; + } + trace_multifd_recv_new_channel(id); + +@@ -1255,7 +1258,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + id); + multifd_recv_terminate_threads(local_err); + error_propagate(errp, local_err); +- return false; ++ return; + } + p->c = ioc; + object_ref(OBJECT(ioc)); +@@ -1266,6 +1269,4 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, + QEMU_THREAD_JOINABLE); + qatomic_inc(&multifd_recv_state->count); +- return qatomic_read(&multifd_recv_state->count) == +- migrate_multifd_channels(); + } +diff --git a/migration/multifd.h b/migration/multifd.h +index 519f498643..913e4ba274 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -18,7 +18,7 @@ void multifd_save_cleanup(void); + int multifd_load_setup(Error **errp); + int multifd_load_cleanup(Error **errp); + bool multifd_recv_all_channels_created(void); +-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp); ++void multifd_recv_new_channel(QIOChannel *ioc, Error **errp); + void multifd_recv_sync_main(void); + int multifd_send_sync_main(QEMUFile *f); + int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset); +diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c +index 0c55df0e52..b98e95dab0 100644 +--- a/migration/postcopy-ram.c ++++ b/migration/postcopy-ram.c +@@ -1538,7 +1538,7 @@ void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd) + } + } + +-bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) ++void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) + { + /* + * The new loading channel has its own threads, so it needs to be +@@ -1547,9 +1547,6 @@ bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) + qemu_file_set_blocking(file, true); + mis->postcopy_qemufile_dst = file; + trace_postcopy_preempt_new_channel(); +- +- /* Start the migration immediately */ +- return true; + } + + /* +diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h +index 6147bf7d1d..25881c4127 100644 +--- a/migration/postcopy-ram.h ++++ b/migration/postcopy-ram.h +@@ -190,7 +190,7 @@ enum PostcopyChannels { + RAM_CHANNEL_MAX, + }; + +-bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file); ++void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file); + int postcopy_preempt_setup(MigrationState *s, Error **errp); + int postcopy_preempt_wait_channel(MigrationState *s); + +-- +2.31.1 + diff --git a/SOURCES/kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch b/SOURCES/kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch deleted file mode 100644 index d7b1ab3..0000000 --- a/SOURCES/kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 78bbe28d5f5691330239041448cccfb339eed779 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Mon, 11 Jul 2022 18:11:13 -0300 -Subject: [PATCH 03/11] migration/multifd: Report to user when zerocopy not - working -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 111: zero-copy-send fixes & improvements -RH-Commit: [3/6] 4f9165325b3cb8ff16d8b3b7649ff780fae0e2ad (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 2107466 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Thomas Huth -RH-Acked-by: Dr. David Alan Gilbert - -Some errors, like the lack of Scatter-Gather support by the network -interface(NETIF_F_SG) may cause sendmsg(...,MSG_ZEROCOPY) to fail on using -zero-copy, which causes it to fall back to the default copying mechanism. - -After each full dirty-bitmap scan there should be a zero-copy flush -happening, which checks for errors each of the previous calls to -sendmsg(...,MSG_ZEROCOPY). If all of them failed to use zero-copy, then -increment dirty_sync_missed_zero_copy migration stat to let the user know -about it. - -Signed-off-by: Leonardo Bras -Reviewed-by: Daniel P. Berrangé -Acked-by: Peter Xu -Message-Id: <20220711211112.18951-4-leobras@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit d59c40cc483729f2e67c80e58df769ad19976fe9) -Signed-off-by: Leonardo Bras ---- - migration/multifd.c | 2 ++ - migration/ram.c | 5 +++++ - migration/ram.h | 2 ++ - 3 files changed, 9 insertions(+) - -diff --git a/migration/multifd.c b/migration/multifd.c -index 0b5b41c53f..96e5f0a058 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -626,6 +626,8 @@ int multifd_send_sync_main(QEMUFile *f) - if (ret < 0) { - error_report_err(err); - return -1; -+ } else if (ret == 1) { -+ dirty_sync_missed_zero_copy(); - } - } - } -diff --git a/migration/ram.c b/migration/ram.c -index ee40e4a718..c437ff1b1f 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -406,6 +406,11 @@ static void ram_transferred_add(uint64_t bytes) - ram_counters.transferred += bytes; - } - -+void dirty_sync_missed_zero_copy(void) -+{ -+ ram_counters.dirty_sync_missed_zero_copy++; -+} -+ - /* used by the search for pages to send */ - struct PageSearchStatus { - /* Current block being searched */ -diff --git a/migration/ram.h b/migration/ram.h -index 2c6dc3675d..34adf5cb92 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -86,4 +86,6 @@ void ram_write_tracking_prepare(void); - int ram_write_tracking_start(void); - void ram_write_tracking_stop(void); - -+void dirty_sync_missed_zero_copy(void); -+ - #endif --- -2.31.1 - diff --git a/SOURCES/kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch b/SOURCES/kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch deleted file mode 100644 index ea89a9f..0000000 --- a/SOURCES/kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch +++ /dev/null @@ -1,142 +0,0 @@ -From 1d280070748b604c60a7be4d4c3c3a28e3964f37 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 2 Aug 2022 10:11:21 +0200 -Subject: [PATCH 31/32] multifd: Copy pages before compressing them with zlib - -RH-Author: Thomas Huth -RH-MergeRequest: 112: Fix postcopy migration on s390x -RH-Commit: [1/2] fd5a0221e22b4563bd1cb7f8a8b95f0bfe8f5fc9 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2099934 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Peter Xu - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2099934 - -zlib_send_prepare() compresses pages of a running VM. zlib does not -make any thread-safety guarantees with respect to changing deflate() -input concurrently with deflate() [1]. - -One can observe problems due to this with the IBM zEnterprise Data -Compression accelerator capable zlib [2]. When the hardware -acceleration is enabled, migration/multifd/tcp/plain/zlib test fails -intermittently [3] due to sliding window corruption. The accelerator's -architecture explicitly discourages concurrent accesses [4]: - - Page 26-57, "Other Conditions": - - As observed by this CPU, other CPUs, and channel - programs, references to the parameter block, first, - second, and third operands may be multiple-access - references, accesses to these storage locations are - not necessarily block-concurrent, and the sequence - of these accesses or references is undefined. - -Mark Adler pointed out that vanilla zlib performs double fetches under -certain circumstances as well [5], therefore we need to copy data -before passing it to deflate(). - -[1] https://zlib.net/manual.html -[2] https://github.com/madler/zlib/pull/410 -[3] https://lists.nongnu.org/archive/html/qemu-devel/2022-03/msg03988.html -[4] http://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf -[5] https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg00889.html - -Signed-off-by: Ilya Leoshkevich -Message-Id: <20220705203559.2960949-1-iii@linux.ibm.com> -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 007e179ef0e97eafda4c9ff2a9d665a1947c7c6d) -Signed-off-by: Thomas Huth ---- - migration/multifd-zlib.c | 38 ++++++++++++++++++++++++++++++-------- - 1 file changed, 30 insertions(+), 8 deletions(-) - -diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c -index 3a7ae44485..18213a9513 100644 ---- a/migration/multifd-zlib.c -+++ b/migration/multifd-zlib.c -@@ -27,6 +27,8 @@ struct zlib_data { - uint8_t *zbuff; - /* size of compressed buffer */ - uint32_t zbuff_len; -+ /* uncompressed buffer of size qemu_target_page_size() */ -+ uint8_t *buf; - }; - - /* Multifd zlib compression */ -@@ -45,26 +47,38 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) - { - struct zlib_data *z = g_new0(struct zlib_data, 1); - z_stream *zs = &z->zs; -+ const char *err_msg; - - zs->zalloc = Z_NULL; - zs->zfree = Z_NULL; - zs->opaque = Z_NULL; - if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) { -- g_free(z); -- error_setg(errp, "multifd %u: deflate init failed", p->id); -- return -1; -+ err_msg = "deflate init failed"; -+ goto err_free_z; - } - /* This is the maxium size of the compressed buffer */ - z->zbuff_len = compressBound(MULTIFD_PACKET_SIZE); - z->zbuff = g_try_malloc(z->zbuff_len); - if (!z->zbuff) { -- deflateEnd(&z->zs); -- g_free(z); -- error_setg(errp, "multifd %u: out of memory for zbuff", p->id); -- return -1; -+ err_msg = "out of memory for zbuff"; -+ goto err_deflate_end; -+ } -+ z->buf = g_try_malloc(qemu_target_page_size()); -+ if (!z->buf) { -+ err_msg = "out of memory for buf"; -+ goto err_free_zbuff; - } - p->data = z; - return 0; -+ -+err_free_zbuff: -+ g_free(z->zbuff); -+err_deflate_end: -+ deflateEnd(&z->zs); -+err_free_z: -+ g_free(z); -+ error_setg(errp, "multifd %u: %s", p->id, err_msg); -+ return -1; - } - - /** -@@ -82,6 +96,8 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) - deflateEnd(&z->zs); - g_free(z->zbuff); - z->zbuff = NULL; -+ g_free(z->buf); -+ z->buf = NULL; - g_free(p->data); - p->data = NULL; - } -@@ -114,8 +130,14 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) - flush = Z_SYNC_FLUSH; - } - -+ /* -+ * Since the VM might be running, the page may be changing concurrently -+ * with compression. zlib does not guarantee that this is safe, -+ * therefore copy the page before calling deflate(). -+ */ -+ memcpy(z->buf, p->pages->block->host + p->normal[i], page_size); - zs->avail_in = page_size; -- zs->next_in = p->pages->block->host + p->normal[i]; -+ zs->next_in = z->buf; - - zs->avail_out = available; - zs->next_out = z->zbuff + out_size; --- -2.31.1 - diff --git a/SOURCES/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch b/SOURCES/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch deleted file mode 100644 index c7159e1..0000000 --- a/SOURCES/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch +++ /dev/null @@ -1,182 +0,0 @@ -From c1a2866d158ac67179fa0d17f1710302eb9a3866 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:37 -0300 -Subject: [PATCH 14/18] multifd: Implement zero copy write in multifd migration - (multifd-zero-copy) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [8/11] b93009cc94b2cc4b464b4f68ebfb37b870dd6f7d (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu - -Implement zero copy send on nocomp_send_write(), by making use of QIOChannel -writev + flags & flush interface. - -Change multifd_send_sync_main() so flush_zero_copy() can be called -after each iteration in order to make sure all dirty pages are sent before -a new iteration is started. It will also flush at the beginning and at the -end of migration. - -Also make it return -1 if flush_zero_copy() fails, in order to cancel -the migration process, and avoid resuming the guest in the target host -without receiving all current RAM. - -This will work fine on RAM migration because the RAM pages are not usually freed, -and there is no problem on changing the pages content between writev_zero_copy() and -the actual sending of the buffer, because this change will dirty the page and -cause it to be re-sent on a next iteration anyway. - -A lot of locked memory may be needed in order to use multifd migration -with zero-copy enabled, so disabling the feature should be necessary for -low-privileged users trying to perform multifd migrations. - -Signed-off-by: Leonardo Bras -Reviewed-by: Peter Xu -Reviewed-by: Daniel P. Berrangé -Message-Id: <20220513062836.965425-9-leobras@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 5b1d9bab2da4fca3a3caee97c430e5709cb32b7b) -Signed-off-by: Leonardo Bras ---- - migration/migration.c | 11 ++++++++++- - migration/multifd.c | 37 +++++++++++++++++++++++++++++++++++-- - migration/multifd.h | 2 ++ - migration/socket.c | 5 +++-- - 4 files changed, 50 insertions(+), 5 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index d91efb66fe..102236fba0 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1485,7 +1485,16 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) - error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); - return false; - } -- -+#ifdef CONFIG_LINUX -+ if (params->zero_copy_send && -+ (!migrate_use_multifd() || -+ params->multifd_compression != MULTIFD_COMPRESSION_NONE || -+ (params->tls_creds && *params->tls_creds))) { -+ error_setg(errp, -+ "Zero copy only available for non-compressed non-TLS multifd migration"); -+ return false; -+ } -+#endif - return true; - } - -diff --git a/migration/multifd.c b/migration/multifd.c -index 8fca6c970e..0b5b41c53f 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -571,6 +571,7 @@ void multifd_save_cleanup(void) - int multifd_send_sync_main(QEMUFile *f) - { - int i; -+ bool flush_zero_copy; - - if (!migrate_use_multifd()) { - return 0; -@@ -581,6 +582,20 @@ int multifd_send_sync_main(QEMUFile *f) - return -1; - } - } -+ -+ /* -+ * When using zero-copy, it's necessary to flush the pages before any of -+ * the pages can be sent again, so we'll make sure the new version of the -+ * pages will always arrive _later_ than the old pages. -+ * -+ * Currently we achieve this by flushing the zero-page requested writes -+ * per ram iteration, but in the future we could potentially optimize it -+ * to be less frequent, e.g. only after we finished one whole scanning of -+ * all the dirty bitmaps. -+ */ -+ -+ flush_zero_copy = migrate_use_zero_copy_send(); -+ - for (i = 0; i < migrate_multifd_channels(); i++) { - MultiFDSendParams *p = &multifd_send_state->params[i]; - -@@ -602,6 +617,17 @@ int multifd_send_sync_main(QEMUFile *f) - ram_counters.transferred += p->packet_len; - qemu_mutex_unlock(&p->mutex); - qemu_sem_post(&p->sem); -+ -+ if (flush_zero_copy && p->c) { -+ int ret; -+ Error *err = NULL; -+ -+ ret = qio_channel_flush(p->c, &err); -+ if (ret < 0) { -+ error_report_err(err); -+ return -1; -+ } -+ } - } - for (i = 0; i < migrate_multifd_channels(); i++) { - MultiFDSendParams *p = &multifd_send_state->params[i]; -@@ -686,8 +712,8 @@ static void *multifd_send_thread(void *opaque) - p->iov[0].iov_base = p->packet; - } - -- ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, -- &local_err); -+ ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL, -+ 0, p->write_flags, &local_err); - if (ret != 0) { - break; - } -@@ -928,6 +954,13 @@ int multifd_save_setup(Error **errp) - /* We need one extra place for the packet header */ - p->iov = g_new0(struct iovec, page_count + 1); - p->normal = g_new0(ram_addr_t, page_count); -+ -+ if (migrate_use_zero_copy_send()) { -+ p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; -+ } else { -+ p->write_flags = 0; -+ } -+ - socket_send_channel_create(multifd_new_send_channel_async, p); - } - -diff --git a/migration/multifd.h b/migration/multifd.h -index cd495195ce..7ec688fb4f 100644 ---- a/migration/multifd.h -+++ b/migration/multifd.h -@@ -96,6 +96,8 @@ typedef struct { - uint32_t packet_len; - /* pointer to the packet */ - MultiFDPacket_t *packet; -+ /* multifd flags for sending ram */ -+ int write_flags; - /* multifd flags for each packet */ - uint32_t flags; - /* size of the next packet that contains pages */ -diff --git a/migration/socket.c b/migration/socket.c -index 3754d8f72c..4fd5e85f50 100644 ---- a/migration/socket.c -+++ b/migration/socket.c -@@ -79,8 +79,9 @@ static void socket_outgoing_migration(QIOTask *task, - - trace_migration_socket_outgoing_connected(data->hostname); - -- if (migrate_use_zero_copy_send()) { -- error_setg(&err, "Zero copy send not available in migration"); -+ if (migrate_use_zero_copy_send() && -+ !qio_channel_has_feature(sioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { -+ error_setg(&err, "Zero copy send feature not detected in host kernel"); - } - - out: --- -2.35.3 - diff --git a/SOURCES/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch b/SOURCES/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch deleted file mode 100644 index 415e3a9..0000000 --- a/SOURCES/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 63255c13492f42a3236d96e706e5f8e70bb4e219 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:36 -0300 -Subject: [PATCH 13/18] multifd: Send header packet without flags if - zero-copy-send is enabled -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [7/11] 137eea685e387d3d6aff187ec3fcac05bc16b6e3 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu - -Since d48c3a0445 ("multifd: Use a single writev on the send side"), -sending the header packet and the memory pages happens in the same -writev, which can potentially make the migration faster. - -Using channel-socket as example, this works well with the default copying -mechanism of sendmsg(), but with zero-copy-send=true, it will cause -the migration to often break. - -This happens because the header packet buffer gets reused quite often, -and there is a high chance that by the time the MSG_ZEROCOPY mechanism get -to send the buffer, it has already changed, sending the wrong data and -causing the migration to abort. - -It means that, as it is, the buffer for the header packet is not suitable -for sending with MSG_ZEROCOPY. - -In order to enable zero copy for multifd, send the header packet on an -individual write(), without any flags, and the remanining pages with a -writev(), as it was happening before. This only changes how a migration -with zero-copy-send=true works, not changing any current behavior for -migrations with zero-copy-send=false. - -Signed-off-by: Leonardo Bras -Reviewed-by: Peter Xu -Reviewed-by: Daniel P. Berrangé -Message-Id: <20220513062836.965425-8-leobras@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit b7dbdd8e76cd03453c234dbb9578d20969859d74) -Signed-off-by: Leonardo Bras ---- - migration/multifd.c | 22 +++++++++++++++++++--- - 1 file changed, 19 insertions(+), 3 deletions(-) - -diff --git a/migration/multifd.c b/migration/multifd.c -index cdb57439a7..8fca6c970e 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -619,6 +619,7 @@ static void *multifd_send_thread(void *opaque) - MultiFDSendParams *p = opaque; - Error *local_err = NULL; - int ret = 0; -+ bool use_zero_copy_send = migrate_use_zero_copy_send(); - - trace_multifd_send_thread_start(p->id); - rcu_register_thread(); -@@ -641,9 +642,14 @@ static void *multifd_send_thread(void *opaque) - if (p->pending_job) { - uint64_t packet_num = p->packet_num; - uint32_t flags = p->flags; -- p->iovs_num = 1; - p->normal_num = 0; - -+ if (use_zero_copy_send) { -+ p->iovs_num = 0; -+ } else { -+ p->iovs_num = 1; -+ } -+ - for (int i = 0; i < p->pages->num; i++) { - p->normal[p->normal_num] = p->pages->offset[i]; - p->normal_num++; -@@ -667,8 +673,18 @@ static void *multifd_send_thread(void *opaque) - trace_multifd_send(p->id, packet_num, p->normal_num, flags, - p->next_packet_size); - -- p->iov[0].iov_len = p->packet_len; -- p->iov[0].iov_base = p->packet; -+ if (use_zero_copy_send) { -+ /* Send header first, without zerocopy */ -+ ret = qio_channel_write_all(p->c, (void *)p->packet, -+ p->packet_len, &local_err); -+ if (ret != 0) { -+ break; -+ } -+ } else { -+ /* Send header using the same writev call */ -+ p->iov[0].iov_len = p->packet_len; -+ p->iov[0].iov_base = p->packet; -+ } - - ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, - &local_err); --- -2.35.3 - diff --git a/SOURCES/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch b/SOURCES/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch deleted file mode 100644 index e6d726a..0000000 --- a/SOURCES/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch +++ /dev/null @@ -1,163 +0,0 @@ -From 4ca5375a936bc87829c6e2b4620f56c73a5efc70 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:35 -0300 -Subject: [PATCH 12/18] multifd: multifd_send_sync_main now returns negative on - error -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [6/11] c8ebdee4327d463c74f4b2eeb42d3c964f314c94 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu - -Even though multifd_send_sync_main() currently emits error_reports, it's -callers don't really check it before continuing. - -Change multifd_send_sync_main() to return -1 on error and 0 on success. -Also change all it's callers to make use of this change and possibly fail -earlier. - -(This change is important to next patch on multifd zero copy -implementation, to make it sure an error in zero-copy flush does not go -unnoticed. - -Signed-off-by: Leonardo Bras -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Peter Xu -Message-Id: <20220513062836.965425-7-leobras@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 33d70973a3a6e8c6b62bcbc64d9e488961981007) -Signed-off-by: Leonardo Bras ---- - migration/multifd.c | 10 ++++++---- - migration/multifd.h | 2 +- - migration/ram.c | 29 ++++++++++++++++++++++------- - 3 files changed, 29 insertions(+), 12 deletions(-) - -diff --git a/migration/multifd.c b/migration/multifd.c -index 43998ad117..cdb57439a7 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -568,17 +568,17 @@ void multifd_save_cleanup(void) - multifd_send_state = NULL; - } - --void multifd_send_sync_main(QEMUFile *f) -+int multifd_send_sync_main(QEMUFile *f) - { - int i; - - if (!migrate_use_multifd()) { -- return; -+ return 0; - } - if (multifd_send_state->pages->num) { - if (multifd_send_pages(f) < 0) { - error_report("%s: multifd_send_pages fail", __func__); -- return; -+ return -1; - } - } - for (i = 0; i < migrate_multifd_channels(); i++) { -@@ -591,7 +591,7 @@ void multifd_send_sync_main(QEMUFile *f) - if (p->quit) { - error_report("%s: channel %d has already quit", __func__, i); - qemu_mutex_unlock(&p->mutex); -- return; -+ return -1; - } - - p->packet_num = multifd_send_state->packet_num++; -@@ -610,6 +610,8 @@ void multifd_send_sync_main(QEMUFile *f) - qemu_sem_wait(&p->sem_sync); - } - trace_multifd_send_sync_main(multifd_send_state->packet_num); -+ -+ return 0; - } - - static void *multifd_send_thread(void *opaque) -diff --git a/migration/multifd.h b/migration/multifd.h -index 4dda900a0b..cd495195ce 100644 ---- a/migration/multifd.h -+++ b/migration/multifd.h -@@ -22,7 +22,7 @@ int multifd_load_cleanup(Error **errp); - bool multifd_recv_all_channels_created(void); - bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp); - void multifd_recv_sync_main(void); --void multifd_send_sync_main(QEMUFile *f); -+int multifd_send_sync_main(QEMUFile *f); - int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset); - - /* Multifd Compression flags */ -diff --git a/migration/ram.c b/migration/ram.c -index 0ef4bd63eb..fb6db54642 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -2903,6 +2903,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) - { - RAMState **rsp = opaque; - RAMBlock *block; -+ int ret; - - if (compress_threads_save_setup()) { - return -1; -@@ -2937,7 +2938,11 @@ static int ram_save_setup(QEMUFile *f, void *opaque) - ram_control_before_iterate(f, RAM_CONTROL_SETUP); - ram_control_after_iterate(f, RAM_CONTROL_SETUP); - -- multifd_send_sync_main(f); -+ ret = multifd_send_sync_main(f); -+ if (ret < 0) { -+ return ret; -+ } -+ - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); - -@@ -3046,7 +3051,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) - out: - if (ret >= 0 - && migration_is_setup_or_active(migrate_get_current()->state)) { -- multifd_send_sync_main(rs->f); -+ ret = multifd_send_sync_main(rs->f); -+ if (ret < 0) { -+ return ret; -+ } -+ - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); - ram_transferred_add(8); -@@ -3106,13 +3115,19 @@ static int ram_save_complete(QEMUFile *f, void *opaque) - ram_control_after_iterate(f, RAM_CONTROL_FINISH); - } - -- if (ret >= 0) { -- multifd_send_sync_main(rs->f); -- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -- qemu_fflush(f); -+ if (ret < 0) { -+ return ret; - } - -- return ret; -+ ret = multifd_send_sync_main(rs->f); -+ if (ret < 0) { -+ return ret; -+ } -+ -+ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -+ qemu_fflush(f); -+ -+ return 0; - } - - static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, --- -2.35.3 - diff --git a/SOURCES/kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch b/SOURCES/kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch deleted file mode 100644 index 56abcb1..0000000 --- a/SOURCES/kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch +++ /dev/null @@ -1,381 +0,0 @@ -From 4a9ddf42788d3f924bdad7746f7aca615f03d7c1 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Wed, 11 May 2022 19:49:24 -0500 -Subject: [PATCH 2/2] nbd/server: Allow MULTI_CONN for shared writable exports -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Blake -RH-MergeRequest: 90: Advertise MULTI_CONN on writeable NBD servers -RH-Commit: [2/2] 53f0e885a5ed7f6e4bb14e74fe8e7957e6afe90f (ebblake/centos-qemu-kvm) -RH-Bugzilla: 1708300 -RH-Acked-by: Nir Soffer -RH-Acked-by: Kevin Wolf -RH-Acked-by: Daniel P. Berrangé - -According to the NBD spec, a server that advertises -NBD_FLAG_CAN_MULTI_CONN promises that multiple client connections will -not see any cache inconsistencies: when properly separated by a single -flush, actions performed by one client will be visible to another -client, regardless of which client did the flush. - -We always satisfy these conditions in qemu - even when we support -multiple clients, ALL clients go through a single point of reference -into the block layer, with no local caching. The effect of one client -is instantly visible to the next client. Even if our backend were a -network device, we argue that any multi-path caching effects that -would cause inconsistencies in back-to-back actions not seeing the -effect of previous actions would be a bug in that backend, and not the -fault of caching in qemu. As such, it is safe to unconditionally -advertise CAN_MULTI_CONN for any qemu NBD server situation that -supports parallel clients. - -Note, however, that we don't want to advertise CAN_MULTI_CONN when we -know that a second client cannot connect (for historical reasons, -qemu-nbd defaults to a single connection while nbd-server-add and QMP -commands default to unlimited connections; but we already have -existing means to let either style of NBD server creation alter those -defaults). This is visible by no longer advertising MULTI_CONN for -'qemu-nbd -r' without -e, as in the iotest nbd-qemu-allocation. - -The harder part of this patch is setting up an iotest to demonstrate -behavior of multiple NBD clients to a single server. It might be -possible with parallel qemu-io processes, but I found it easier to do -in python with the help of libnbd, and help from Nir and Vladimir in -writing the test. - -Signed-off-by: Eric Blake -Suggested-by: Nir Soffer -Suggested-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20220512004924.417153-3-eblake@redhat.com> -Signed-off-by: Kevin Wolf - -(cherry picked from commit 58a6fdcc9efb2a7c1ef4893dca4aa5e8020ca3dc) -Conflicts: - nbd/server.c - context, e5fb29d5 not backported -Signed-off-by: Eric Blake ---- - MAINTAINERS | 1 + - blockdev-nbd.c | 5 + - docs/interop/nbd.txt | 1 + - docs/tools/qemu-nbd.rst | 3 +- - include/block/nbd.h | 3 +- - nbd/server.c | 10 +- - qapi/block-export.json | 8 +- - tests/qemu-iotests/tests/nbd-multiconn | 145 ++++++++++++++++++ - tests/qemu-iotests/tests/nbd-multiconn.out | 5 + - .../tests/nbd-qemu-allocation.out | 2 +- - 10 files changed, 172 insertions(+), 11 deletions(-) - create mode 100755 tests/qemu-iotests/tests/nbd-multiconn - create mode 100644 tests/qemu-iotests/tests/nbd-multiconn.out - -diff --git a/MAINTAINERS b/MAINTAINERS -index 4ad2451e03..2fe20a49ab 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -3370,6 +3370,7 @@ F: qemu-nbd.* - F: blockdev-nbd.c - F: docs/interop/nbd.txt - F: docs/tools/qemu-nbd.rst -+F: tests/qemu-iotests/tests/*nbd* - T: git https://repo.or.cz/qemu/ericb.git nbd - T: git https://src.openvz.org/scm/~vsementsov/qemu.git nbd - -diff --git a/blockdev-nbd.c b/blockdev-nbd.c -index add41a23af..c6d9b0324c 100644 ---- a/blockdev-nbd.c -+++ b/blockdev-nbd.c -@@ -44,6 +44,11 @@ bool nbd_server_is_running(void) - return nbd_server || qemu_nbd_connections >= 0; - } - -+int nbd_server_max_connections(void) -+{ -+ return nbd_server ? nbd_server->max_connections : qemu_nbd_connections; -+} -+ - static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) - { - nbd_client_put(client); -diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt -index bdb0f2a41a..f5ca25174a 100644 ---- a/docs/interop/nbd.txt -+++ b/docs/interop/nbd.txt -@@ -68,3 +68,4 @@ NBD_CMD_BLOCK_STATUS for "qemu:dirty-bitmap:", NBD_CMD_CACHE - * 4.2: NBD_FLAG_CAN_MULTI_CONN for shareable read-only exports, - NBD_CMD_FLAG_FAST_ZERO - * 5.2: NBD_CMD_BLOCK_STATUS for "qemu:allocation-depth" -+* 7.1: NBD_FLAG_CAN_MULTI_CONN for shareable writable exports -diff --git a/docs/tools/qemu-nbd.rst b/docs/tools/qemu-nbd.rst -index 4c950f6199..8e08a29e89 100644 ---- a/docs/tools/qemu-nbd.rst -+++ b/docs/tools/qemu-nbd.rst -@@ -139,8 +139,7 @@ driver options if :option:`--image-opts` is specified. - .. option:: -e, --shared=NUM - - Allow up to *NUM* clients to share the device (default -- ``1``), 0 for unlimited. Safe for readers, but for now, -- consistency is not guaranteed between multiple writers. -+ ``1``), 0 for unlimited. - - .. option:: -t, --persistent - -diff --git a/include/block/nbd.h b/include/block/nbd.h -index c5a29ce1c6..c74b7a9d2e 100644 ---- a/include/block/nbd.h -+++ b/include/block/nbd.h -@@ -1,5 +1,5 @@ - /* -- * Copyright (C) 2016-2020 Red Hat, Inc. -+ * Copyright (C) 2016-2022 Red Hat, Inc. - * Copyright (C) 2005 Anthony Liguori - * - * Network Block Device -@@ -346,6 +346,7 @@ void nbd_client_put(NBDClient *client); - - void nbd_server_is_qemu_nbd(int max_connections); - bool nbd_server_is_running(void); -+int nbd_server_max_connections(void); - void nbd_server_start(SocketAddress *addr, const char *tls_creds, - const char *tls_authz, uint32_t max_connections, - Error **errp); -diff --git a/nbd/server.c b/nbd/server.c -index c5644fd3f6..6e2157acfa 100644 ---- a/nbd/server.c -+++ b/nbd/server.c -@@ -1,5 +1,5 @@ - /* -- * Copyright (C) 2016-2021 Red Hat, Inc. -+ * Copyright (C) 2016-2022 Red Hat, Inc. - * Copyright (C) 2005 Anthony Liguori - * - * Network Block Device Server Side -@@ -1642,7 +1642,6 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, - int64_t size; - uint64_t perm, shared_perm; - bool readonly = !exp_args->writable; -- bool shared = !exp_args->writable; - strList *bitmaps; - size_t i; - int ret; -@@ -1693,11 +1692,12 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, - exp->description = g_strdup(arg->description); - exp->nbdflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_FLUSH | - NBD_FLAG_SEND_FUA | NBD_FLAG_SEND_CACHE); -+ -+ if (nbd_server_max_connections() != 1) { -+ exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN; -+ } - if (readonly) { - exp->nbdflags |= NBD_FLAG_READ_ONLY; -- if (shared) { -- exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN; -- } - } else { - exp->nbdflags |= (NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_WRITE_ZEROES | - NBD_FLAG_SEND_FAST_ZERO); -diff --git a/qapi/block-export.json b/qapi/block-export.json -index 1e34927f85..755ccc89b1 100644 ---- a/qapi/block-export.json -+++ b/qapi/block-export.json -@@ -21,7 +21,9 @@ - # recreated on the fly while the NBD server is active. - # If missing, it will default to denying access (since 4.0). - # @max-connections: The maximum number of connections to allow at the same --# time, 0 for unlimited. (since 5.2; default: 0) -+# time, 0 for unlimited. Setting this to 1 also stops -+# the server from advertising multiple client support -+# (since 5.2; default: 0) - # - # Since: 4.2 - ## -@@ -50,7 +52,9 @@ - # recreated on the fly while the NBD server is active. - # If missing, it will default to denying access (since 4.0). - # @max-connections: The maximum number of connections to allow at the same --# time, 0 for unlimited. (since 5.2; default: 0) -+# time, 0 for unlimited. Setting this to 1 also stops -+# the server from advertising multiple client support -+# (since 5.2; default: 0). - # - # Returns: error if the server is already running. - # -diff --git a/tests/qemu-iotests/tests/nbd-multiconn b/tests/qemu-iotests/tests/nbd-multiconn -new file mode 100755 -index 0000000000..b121f2e363 ---- /dev/null -+++ b/tests/qemu-iotests/tests/nbd-multiconn -@@ -0,0 +1,145 @@ -+#!/usr/bin/env python3 -+# group: rw auto quick -+# -+# Test cases for NBD multi-conn advertisement -+# -+# Copyright (C) 2022 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+ -+import os -+from contextlib import contextmanager -+import iotests -+from iotests import qemu_img_create, qemu_io -+ -+ -+disk = os.path.join(iotests.test_dir, 'disk') -+size = '4M' -+nbd_sock = os.path.join(iotests.sock_dir, 'nbd_sock') -+nbd_uri = 'nbd+unix:///{}?socket=' + nbd_sock -+ -+ -+@contextmanager -+def open_nbd(export_name): -+ h = nbd.NBD() -+ try: -+ h.connect_uri(nbd_uri.format(export_name)) -+ yield h -+ finally: -+ h.shutdown() -+ -+class TestNbdMulticonn(iotests.QMPTestCase): -+ def setUp(self): -+ qemu_img_create('-f', iotests.imgfmt, disk, size) -+ qemu_io('-c', 'w -P 1 0 2M', '-c', 'w -P 2 2M 2M', disk) -+ -+ self.vm = iotests.VM() -+ self.vm.launch() -+ result = self.vm.qmp('blockdev-add', { -+ 'driver': 'qcow2', -+ 'node-name': 'n', -+ 'file': {'driver': 'file', 'filename': disk} -+ }) -+ self.assert_qmp(result, 'return', {}) -+ -+ def tearDown(self): -+ self.vm.shutdown() -+ os.remove(disk) -+ try: -+ os.remove(nbd_sock) -+ except OSError: -+ pass -+ -+ @contextmanager -+ def run_server(self, max_connections=None): -+ args = { -+ 'addr': { -+ 'type': 'unix', -+ 'data': {'path': nbd_sock} -+ } -+ } -+ if max_connections is not None: -+ args['max-connections'] = max_connections -+ -+ result = self.vm.qmp('nbd-server-start', args) -+ self.assert_qmp(result, 'return', {}) -+ yield -+ -+ result = self.vm.qmp('nbd-server-stop') -+ self.assert_qmp(result, 'return', {}) -+ -+ def add_export(self, name, writable=None): -+ args = { -+ 'type': 'nbd', -+ 'id': name, -+ 'node-name': 'n', -+ 'name': name, -+ } -+ if writable is not None: -+ args['writable'] = writable -+ -+ result = self.vm.qmp('block-export-add', args) -+ self.assert_qmp(result, 'return', {}) -+ -+ def test_default_settings(self): -+ with self.run_server(): -+ self.add_export('r') -+ self.add_export('w', writable=True) -+ with open_nbd('r') as h: -+ self.assertTrue(h.can_multi_conn()) -+ with open_nbd('w') as h: -+ self.assertTrue(h.can_multi_conn()) -+ -+ def test_limited_connections(self): -+ with self.run_server(max_connections=1): -+ self.add_export('r') -+ self.add_export('w', writable=True) -+ with open_nbd('r') as h: -+ self.assertFalse(h.can_multi_conn()) -+ with open_nbd('w') as h: -+ self.assertFalse(h.can_multi_conn()) -+ -+ def test_parallel_writes(self): -+ with self.run_server(): -+ self.add_export('w', writable=True) -+ -+ clients = [nbd.NBD() for _ in range(3)] -+ for c in clients: -+ c.connect_uri(nbd_uri.format('w')) -+ self.assertTrue(c.can_multi_conn()) -+ -+ initial_data = clients[0].pread(1024 * 1024, 0) -+ self.assertEqual(initial_data, b'\x01' * 1024 * 1024) -+ -+ updated_data = b'\x03' * 1024 * 1024 -+ clients[1].pwrite(updated_data, 0) -+ clients[2].flush() -+ current_data = clients[0].pread(1024 * 1024, 0) -+ -+ self.assertEqual(updated_data, current_data) -+ -+ for i in range(3): -+ clients[i].shutdown() -+ -+ -+if __name__ == '__main__': -+ try: -+ # Easier to use libnbd than to try and set up parallel -+ # 'qemu-nbd --list' or 'qemu-io' processes, but not all systems -+ # have libnbd installed. -+ import nbd # type: ignore -+ -+ iotests.main(supported_fmts=['qcow2']) -+ except ImportError: -+ iotests.notrun('libnbd not installed') -diff --git a/tests/qemu-iotests/tests/nbd-multiconn.out b/tests/qemu-iotests/tests/nbd-multiconn.out -new file mode 100644 -index 0000000000..8d7e996700 ---- /dev/null -+++ b/tests/qemu-iotests/tests/nbd-multiconn.out -@@ -0,0 +1,5 @@ -+... -+---------------------------------------------------------------------- -+Ran 3 tests -+ -+OK -diff --git a/tests/qemu-iotests/tests/nbd-qemu-allocation.out b/tests/qemu-iotests/tests/nbd-qemu-allocation.out -index 0bf1abb063..9d938db24e 100644 ---- a/tests/qemu-iotests/tests/nbd-qemu-allocation.out -+++ b/tests/qemu-iotests/tests/nbd-qemu-allocation.out -@@ -17,7 +17,7 @@ wrote 2097152/2097152 bytes at offset 1048576 - exports available: 1 - export: '' - size: 4194304 -- flags: 0x58f ( readonly flush fua df multi cache ) -+ flags: 0x48f ( readonly flush fua df cache ) - min block: 1 - opt block: 4096 - max block: 33554432 --- -2.31.1 - diff --git a/SOURCES/kvm-net-stream-add-a-new-option-to-automatically-reconne.patch b/SOURCES/kvm-net-stream-add-a-new-option-to-automatically-reconne.patch new file mode 100644 index 0000000..707c80f --- /dev/null +++ b/SOURCES/kvm-net-stream-add-a-new-option-to-automatically-reconne.patch @@ -0,0 +1,325 @@ +From e5834364958a3914d7b8b46b985a1b054728b466 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 19 Jan 2023 11:16:45 +0100 +Subject: [PATCH 2/8] net: stream: add a new option to automatically reconnect +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 148: net: stream: add a new option to automatically reconnect +RH-Bugzilla: 2169232 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Cindy Lu +RH-Acked-by: MST +RH-Acked-by: David Gibson (Red Hat) +RH-Commit: [2/2] 9b87647a9ed2e7c1b91bdfa9d0a736e091c892a5 (lvivier/qemu-kvm-centos) + +In stream mode, if the server shuts down there is currently +no way to reconnect the client to a new server without removing +the NIC device and the netdev backend (or to reboot). + +This patch introduces a reconnect option that specifies a delay +to try to reconnect with the same parameters. + +Add a new test in qtest to test the reconnect option and the +connect/disconnect events. + +Signed-off-by: Laurent Vivier +Signed-off-by: Jason Wang +(cherry picked from commit b95c0d4440950fba6dbef0f781962911fa42abdb) +--- + net/stream.c | 53 ++++++++++++++++++- + qapi/net.json | 7 ++- + qemu-options.hx | 6 +-- + tests/qtest/netdev-socket.c | 101 ++++++++++++++++++++++++++++++++++++ + 4 files changed, 162 insertions(+), 5 deletions(-) + +diff --git a/net/stream.c b/net/stream.c +index 37ff727e0c..9204b4c96e 100644 +--- a/net/stream.c ++++ b/net/stream.c +@@ -39,6 +39,8 @@ + #include "io/channel-socket.h" + #include "io/net-listener.h" + #include "qapi/qapi-events-net.h" ++#include "qapi/qapi-visit-sockets.h" ++#include "qapi/clone-visitor.h" + + typedef struct NetStreamState { + NetClientState nc; +@@ -49,11 +51,15 @@ typedef struct NetStreamState { + guint ioc_write_tag; + SocketReadState rs; + unsigned int send_index; /* number of bytes sent*/ ++ uint32_t reconnect; ++ guint timer_tag; ++ SocketAddress *addr; + } NetStreamState; + + static void net_stream_listen(QIONetListener *listener, + QIOChannelSocket *cioc, + void *opaque); ++static void net_stream_arm_reconnect(NetStreamState *s); + + static gboolean net_stream_writable(QIOChannel *ioc, + GIOCondition condition, +@@ -170,6 +176,7 @@ static gboolean net_stream_send(QIOChannel *ioc, + qemu_set_info_str(&s->nc, "%s", ""); + + qapi_event_send_netdev_stream_disconnected(s->nc.name); ++ net_stream_arm_reconnect(s); + + return G_SOURCE_REMOVE; + } +@@ -187,6 +194,14 @@ static gboolean net_stream_send(QIOChannel *ioc, + static void net_stream_cleanup(NetClientState *nc) + { + NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc); ++ if (s->timer_tag) { ++ g_source_remove(s->timer_tag); ++ s->timer_tag = 0; ++ } ++ if (s->addr) { ++ qapi_free_SocketAddress(s->addr); ++ s->addr = NULL; ++ } + if (s->ioc) { + if (QIO_CHANNEL_SOCKET(s->ioc)->fd != -1) { + if (s->ioc_read_tag) { +@@ -346,12 +361,37 @@ static void net_stream_client_connected(QIOTask *task, gpointer opaque) + error: + object_unref(OBJECT(s->ioc)); + s->ioc = NULL; ++ net_stream_arm_reconnect(s); ++} ++ ++static gboolean net_stream_reconnect(gpointer data) ++{ ++ NetStreamState *s = data; ++ QIOChannelSocket *sioc; ++ ++ s->timer_tag = 0; ++ ++ sioc = qio_channel_socket_new(); ++ s->ioc = QIO_CHANNEL(sioc); ++ qio_channel_socket_connect_async(sioc, s->addr, ++ net_stream_client_connected, s, ++ NULL, NULL); ++ return G_SOURCE_REMOVE; ++} ++ ++static void net_stream_arm_reconnect(NetStreamState *s) ++{ ++ if (s->reconnect && s->timer_tag == 0) { ++ s->timer_tag = g_timeout_add_seconds(s->reconnect, ++ net_stream_reconnect, s); ++ } + } + + static int net_stream_client_init(NetClientState *peer, + const char *model, + const char *name, + SocketAddress *addr, ++ uint32_t reconnect, + Error **errp) + { + NetStreamState *s; +@@ -364,6 +404,10 @@ static int net_stream_client_init(NetClientState *peer, + s->ioc = QIO_CHANNEL(sioc); + s->nc.link_down = true; + ++ s->reconnect = reconnect; ++ if (reconnect) { ++ s->addr = QAPI_CLONE(SocketAddress, addr); ++ } + qio_channel_socket_connect_async(sioc, addr, + net_stream_client_connected, s, + NULL, NULL); +@@ -380,7 +424,14 @@ int net_init_stream(const Netdev *netdev, const char *name, + sock = &netdev->u.stream; + + if (!sock->has_server || !sock->server) { +- return net_stream_client_init(peer, "stream", name, sock->addr, errp); ++ return net_stream_client_init(peer, "stream", name, sock->addr, ++ sock->has_reconnect ? sock->reconnect : 0, ++ errp); ++ } ++ if (sock->has_reconnect) { ++ error_setg(errp, "'reconnect' option is incompatible with " ++ "socket in server mode"); ++ return -1; + } + return net_stream_server_init(peer, "stream", name, sock->addr, errp); + } +diff --git a/qapi/net.json b/qapi/net.json +index 522ac582ed..d6eb30008b 100644 +--- a/qapi/net.json ++++ b/qapi/net.json +@@ -585,6 +585,10 @@ + # @addr: socket address to listen on (server=true) + # or connect to (server=false) + # @server: create server socket (default: false) ++# @reconnect: For a client socket, if a socket is disconnected, ++# then attempt a reconnect after the given number of seconds. ++# Setting this to zero disables this function. (default: 0) ++# (since 8.0) + # + # Only SocketAddress types 'unix', 'inet' and 'fd' are supported. + # +@@ -593,7 +597,8 @@ + { 'struct': 'NetdevStreamOptions', + 'data': { + 'addr': 'SocketAddress', +- '*server': 'bool' } } ++ '*server': 'bool', ++ '*reconnect': 'uint32' } } + + ## + # @NetdevDgramOptions: +diff --git a/qemu-options.hx b/qemu-options.hx +index ea02ca3a45..48eef4aa2c 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -2766,9 +2766,9 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, + "-netdev socket,id=str[,fd=h][,udp=host:port][,localaddr=host:port]\n" + " configure a network backend to connect to another network\n" + " using an UDP tunnel\n" +- "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off]\n" +- "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off]\n" +- "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor\n" ++ "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off][,reconnect=seconds]\n" ++ "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off][,reconnect=seconds]\n" ++ "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor[,reconnect=seconds]\n" + " configure a network backend to connect to another network\n" + " using a socket connection in stream mode.\n" + "-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=inet,local.host=addr]\n" +diff --git a/tests/qtest/netdev-socket.c b/tests/qtest/netdev-socket.c +index 6ba256e173..acc32c378b 100644 +--- a/tests/qtest/netdev-socket.c ++++ b/tests/qtest/netdev-socket.c +@@ -11,6 +11,10 @@ + #include + #include "../unit/socket-helpers.h" + #include "libqtest.h" ++#include "qapi/qmp/qstring.h" ++#include "qemu/sockets.h" ++#include "qapi/qobject-input-visitor.h" ++#include "qapi/qapi-visit-sockets.h" + + #define CONNECTION_TIMEOUT 5 + +@@ -142,6 +146,101 @@ static void test_stream_inet_ipv4(void) + qtest_quit(qts0); + } + ++static void wait_stream_connected(QTestState *qts, const char *id, ++ SocketAddress **addr) ++{ ++ QDict *resp, *data; ++ QString *qstr; ++ QObject *obj; ++ Visitor *v = NULL; ++ ++ resp = qtest_qmp_eventwait_ref(qts, "NETDEV_STREAM_CONNECTED"); ++ g_assert_nonnull(resp); ++ data = qdict_get_qdict(resp, "data"); ++ g_assert_nonnull(data); ++ ++ qstr = qobject_to(QString, qdict_get(data, "netdev-id")); ++ g_assert_nonnull(data); ++ ++ g_assert(!strcmp(qstring_get_str(qstr), id)); ++ ++ obj = qdict_get(data, "addr"); ++ ++ v = qobject_input_visitor_new(obj); ++ visit_type_SocketAddress(v, NULL, addr, NULL); ++ visit_free(v); ++ qobject_unref(resp); ++} ++ ++static void wait_stream_disconnected(QTestState *qts, const char *id) ++{ ++ QDict *resp, *data; ++ QString *qstr; ++ ++ resp = qtest_qmp_eventwait_ref(qts, "NETDEV_STREAM_DISCONNECTED"); ++ g_assert_nonnull(resp); ++ data = qdict_get_qdict(resp, "data"); ++ g_assert_nonnull(data); ++ ++ qstr = qobject_to(QString, qdict_get(data, "netdev-id")); ++ g_assert_nonnull(data); ++ ++ g_assert(!strcmp(qstring_get_str(qstr), id)); ++ qobject_unref(resp); ++} ++ ++static void test_stream_inet_reconnect(void) ++{ ++ QTestState *qts0, *qts1; ++ int port; ++ SocketAddress *addr; ++ ++ port = inet_get_free_port(false); ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=true,addr.type=inet," ++ "addr.ipv4=on,addr.ipv6=off," ++ "addr.host=127.0.0.1,addr.port=%d", port); ++ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,server=false,id=st0,addr.type=inet," ++ "addr.ipv4=on,addr.ipv6=off,reconnect=1," ++ "addr.host=127.0.0.1,addr.port=%d", port); ++ ++ wait_stream_connected(qts0, "st0", &addr); ++ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET); ++ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1"); ++ qapi_free_SocketAddress(addr); ++ ++ /* kill server */ ++ qtest_quit(qts0); ++ ++ /* check client has been disconnected */ ++ wait_stream_disconnected(qts1, "st0"); ++ ++ /* restart server */ ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=true,addr.type=inet," ++ "addr.ipv4=on,addr.ipv6=off," ++ "addr.host=127.0.0.1,addr.port=%d", port); ++ ++ /* wait connection events*/ ++ wait_stream_connected(qts0, "st0", &addr); ++ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET); ++ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1"); ++ qapi_free_SocketAddress(addr); ++ ++ wait_stream_connected(qts1, "st0", &addr); ++ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET); ++ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1"); ++ g_assert_cmpint(atoi(addr->u.inet.port), ==, port); ++ qapi_free_SocketAddress(addr); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++} ++ + static void test_stream_inet_ipv6(void) + { + QTestState *qts0, *qts1; +@@ -418,6 +517,8 @@ int main(int argc, char **argv) + #ifndef _WIN32 + qtest_add_func("/netdev/dgram/mcast", test_dgram_mcast); + #endif ++ qtest_add_func("/netdev/stream/inet/reconnect", ++ test_stream_inet_reconnect); + } + if (has_ipv6) { + qtest_add_func("/netdev/stream/inet/ipv6", test_stream_inet_ipv6); +-- +2.31.1 + diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch b/SOURCES/kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch deleted file mode 100644 index 1bb8ea5..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 03996a8a826c9186e4a16e1b4757f1ef5947a503 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 5 Aug 2022 11:42:14 +0200 -Subject: [PATCH 07/11] pc-bios/s390-ccw: Fix booting with logical block size < - physical block size - -RH-Author: Thomas Huth -RH-MergeRequest: 113: pc-bios/s390-ccw: Fix booting with logical block size < physical block size -RH-Commit: [1/1] a45ff477bc7d7011ea6c4d42a1aade213d1e4690 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2112303 -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Claudio Imbrenda - -For accessing single blocks during boot, it's the logical block size that -matters. (Physical block sizes are rather interesting e.g. for creating -file systems with the correct alignment for speed reasons etc.). -So the s390-ccw bios has to use the logical block size for calculating -sector numbers during the boot phase, the "physical_block_exp" shift -value must not be taken into account. This change fixes the boot process -when the guest hast been installed on a disk where the logical block size -differs from the physical one, e.g. if the guest has been installed -like this: - - qemu-system-s390x -nographic -accel kvm -m 2G \ - -drive if=none,id=d1,file=fedora.iso,format=raw,media=cdrom \ - -device virtio-scsi -device scsi-cd,drive=d1 \ - -drive if=none,id=d2,file=test.qcow2,format=qcow2 - -device virtio-blk,drive=d2,physical_block_size=4096,logical_block_size=512 - -Linux correctly uses the logical block size of 512 for the installation, -but the s390-ccw bios tries to boot from a disk with 4096 block size so -far, as long as this patch has not been applied yet (well, it used to work -by accident in the past due to the virtio_assume_scsi() hack that used to -enforce 512 byte sectors on all virtio-block disks, but that hack has been -well removed in commit 5447de2619050a0a4d to fix other scenarios). - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2112303 -Message-Id: <20220805094214.285223-1-thuth@redhat.com> -Reviewed-by: Cornelia Huck -Reviewed-by: Eric Farman -Signed-off-by: Thomas Huth -(cherry picked from commit 393296de19650e1400ca265914cfdeb313725363) ---- - pc-bios/s390-ccw/virtio-blkdev.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c -index 8271c47296..794f99b42c 100644 ---- a/pc-bios/s390-ccw/virtio-blkdev.c -+++ b/pc-bios/s390-ccw/virtio-blkdev.c -@@ -173,7 +173,7 @@ int virtio_get_block_size(void) - - switch (vdev->senseid.cu_model) { - case VIRTIO_ID_BLOCK: -- return vdev->config.blk.blk_size << vdev->config.blk.physical_block_exp; -+ return vdev->config.blk.blk_size; - case VIRTIO_ID_SCSI: - return vdev->scsi_block_size; - } --- -2.31.1 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch b/SOURCES/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch deleted file mode 100644 index b212194..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch +++ /dev/null @@ -1,180 +0,0 @@ -From 2e38b4ec5c53b2b98539a70105d3046e1c452ab8 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 13/17] pc-bios/s390-ccw: Split virtio-scsi code from - virtio_blk_setup_device() - -RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [8/10] f49c5fb77e05c9dc09ed9f037e37f6a461e4bba6 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Bugzilla: http://bugzilla.redhat.com/2098077 - -commit cf30b7c4a9b2c64518be8037c2e6670aacdb00b9 -Author: Thomas Huth -Date: Mon Jul 4 13:19:00 2022 +0200 - - pc-bios/s390-ccw: Split virtio-scsi code from virtio_blk_setup_device() - - The next patch is going to add more virtio-block specific code to - virtio_blk_setup_device(), and if the virtio-scsi code is also in - there, this is more cumbersome. And the calling function virtio_setup() - in main.c looks at the device type already anyway, so it's more - logical to separate the virtio-scsi stuff into a new function in - virtio-scsi.c instead. - - Message-Id: <20220704111903.62400-10-thuth@redhat.com> - Reviewed-by: Eric Farman - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - pc-bios/s390-ccw/main.c | 24 +++++++++++++++++------- - pc-bios/s390-ccw/virtio-blkdev.c | 20 ++------------------ - pc-bios/s390-ccw/virtio-scsi.c | 19 ++++++++++++++++++- - pc-bios/s390-ccw/virtio-scsi.h | 2 +- - 4 files changed, 38 insertions(+), 27 deletions(-) - -diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c -index 5d2b7ba94d..13e1d8fdf7 100644 ---- a/pc-bios/s390-ccw/main.c -+++ b/pc-bios/s390-ccw/main.c -@@ -14,6 +14,7 @@ - #include "s390-ccw.h" - #include "cio.h" - #include "virtio.h" -+#include "virtio-scsi.h" - #include "dasd-ipl.h" - - char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE))); -@@ -218,6 +219,7 @@ static int virtio_setup(void) - { - VDev *vdev = virtio_get_device(); - QemuIplParameters *early_qipl = (QemuIplParameters *)QIPL_ADDRESS; -+ int ret; - - memcpy(&qipl, early_qipl, sizeof(QemuIplParameters)); - -@@ -225,18 +227,26 @@ static int virtio_setup(void) - menu_setup(); - } - -- if (virtio_get_device_type() == VIRTIO_ID_NET) { -+ switch (vdev->senseid.cu_model) { -+ case VIRTIO_ID_NET: - sclp_print("Network boot device detected\n"); - vdev->netboot_start_addr = qipl.netboot_start_addr; -- } else { -- int ret = virtio_blk_setup_device(blk_schid); -- if (ret) { -- return ret; -- } -+ return 0; -+ case VIRTIO_ID_BLOCK: -+ ret = virtio_blk_setup_device(blk_schid); -+ break; -+ case VIRTIO_ID_SCSI: -+ ret = virtio_scsi_setup_device(blk_schid); -+ break; -+ default: -+ panic("\n! No IPL device available !\n"); -+ } -+ -+ if (!ret) { - IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected"); - } - -- return 0; -+ return ret; - } - - static void ipl_boot_device(void) -diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c -index db1f7f44aa..c175b66a47 100644 ---- a/pc-bios/s390-ccw/virtio-blkdev.c -+++ b/pc-bios/s390-ccw/virtio-blkdev.c -@@ -222,27 +222,11 @@ uint64_t virtio_get_blocks(void) - int virtio_blk_setup_device(SubChannelId schid) - { - VDev *vdev = virtio_get_device(); -- int ret = 0; - - vdev->schid = schid; - virtio_setup_ccw(vdev); - -- switch (vdev->senseid.cu_model) { -- case VIRTIO_ID_BLOCK: -- sclp_print("Using virtio-blk.\n"); -- break; -- case VIRTIO_ID_SCSI: -- IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE, -- "Config: sense size mismatch"); -- IPL_assert(vdev->config.scsi.cdb_size == VIRTIO_SCSI_CDB_SIZE, -- "Config: CDB size mismatch"); -+ sclp_print("Using virtio-blk.\n"); - -- sclp_print("Using virtio-scsi.\n"); -- ret = virtio_scsi_setup(vdev); -- break; -- default: -- panic("\n! No IPL device available !\n"); -- } -- -- return ret; -+ return 0; - } -diff --git a/pc-bios/s390-ccw/virtio-scsi.c b/pc-bios/s390-ccw/virtio-scsi.c -index 2c8d0f3097..3b7069270c 100644 ---- a/pc-bios/s390-ccw/virtio-scsi.c -+++ b/pc-bios/s390-ccw/virtio-scsi.c -@@ -329,7 +329,7 @@ static void scsi_parse_capacity_report(void *data, - } - } - --int virtio_scsi_setup(VDev *vdev) -+static int virtio_scsi_setup(VDev *vdev) - { - int retry_test_unit_ready = 3; - uint8_t data[256]; -@@ -430,3 +430,20 @@ int virtio_scsi_setup(VDev *vdev) - - return 0; - } -+ -+int virtio_scsi_setup_device(SubChannelId schid) -+{ -+ VDev *vdev = virtio_get_device(); -+ -+ vdev->schid = schid; -+ virtio_setup_ccw(vdev); -+ -+ IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE, -+ "Config: sense size mismatch"); -+ IPL_assert(vdev->config.scsi.cdb_size == VIRTIO_SCSI_CDB_SIZE, -+ "Config: CDB size mismatch"); -+ -+ sclp_print("Using virtio-scsi.\n"); -+ -+ return virtio_scsi_setup(vdev); -+} -diff --git a/pc-bios/s390-ccw/virtio-scsi.h b/pc-bios/s390-ccw/virtio-scsi.h -index 4b14c2c2f9..e6b6cd4815 100644 ---- a/pc-bios/s390-ccw/virtio-scsi.h -+++ b/pc-bios/s390-ccw/virtio-scsi.h -@@ -67,8 +67,8 @@ static inline bool virtio_scsi_response_ok(const VirtioScsiCmdResp *r) - return r->response == VIRTIO_SCSI_S_OK && r->status == CDB_STATUS_GOOD; - } - --int virtio_scsi_setup(VDev *vdev); - int virtio_scsi_read_many(VDev *vdev, - ulong sector, void *load_addr, int sec_num); -+int virtio_scsi_setup_device(SubChannelId schid); - - #endif /* VIRTIO_SCSI_H */ --- -2.31.1 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch b/SOURCES/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch deleted file mode 100644 index 231a8a0..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 64fa56e0520215e3909e442f09d8073c1870648a Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 07/17] pc-bios/s390-ccw/bootmap: Improve the guessing logic in - zipl_load_vblk() - -RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [2/10] ca8f5e847617cf4ac2fd6c38edb2982f32fa3eba (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Bugzilla: http://bugzilla.redhat.com/2098077 - -commit 422865f6672ee1482b98d18321b55c1ecfb06c82 -Author: Thomas Huth -Date: Mon Jul 4 13:18:54 2022 +0200 - - pc-bios/s390-ccw/bootmap: Improve the guessing logic in zipl_load_vblk() - - The logic of trying an final ISO or ECKD boot on virtio-block devices is - very weird: Since the geometry hardly ever matches in virtio_disk_is_scsi(), - virtio_blk_setup_device() always sets a "guessed" disk geometry via - virtio_assume_scsi() (which is certainly also wrong in a lot of cases). - - zipl_load_vblk() then sees that there's been a "virtio_guessed_disk_nature" - and tries to fix up the geometry again via virtio_assume_iso9660() before - always trying to do ipl_iso_el_torito(). That's a very brain-twisting - way of attempting to boot from ISO images, which won't work anymore after - the following patches that will clean up the virtio_assume_scsi() mess - (and thus get rid of the "virtio_guessed_disk_nature" here). - - Let's try a better approach instead: ISO files always have a magic - string "CD001" at offset 0x8001 (see e.g. the ECMA-119 specification) - which we can use to decide whether we should try to boot in ISO 9660 - mode (which we should also try if we see a sector size of 2048). - - And if we were not able to boot in ISO mode here, the final boot attempt - before panicking is to boot in ECKD mode. Since this is our last boot - attempt anyway, simply always assume the ECKD geometry here (if the sector - size was not 4096 yet), so that we also do not depend on the guessed disk - geometry from virtio_blk_setup_device() here anymore. - - Message-Id: <20220704111903.62400-4-thuth@redhat.com> - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - pc-bios/s390-ccw/bootmap.c | 27 +++++++++++++++++++++++---- - 1 file changed, 23 insertions(+), 4 deletions(-) - -diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c -index 56411ab3b6..994e59c0b0 100644 ---- a/pc-bios/s390-ccw/bootmap.c -+++ b/pc-bios/s390-ccw/bootmap.c -@@ -780,18 +780,37 @@ static void ipl_iso_el_torito(void) - } - } - -+/** -+ * Detect whether we're trying to boot from an .ISO image. -+ * These always have a signature string "CD001" at offset 0x8001. -+ */ -+static bool has_iso_signature(void) -+{ -+ int blksize = virtio_get_block_size(); -+ -+ if (!blksize || virtio_read(0x8000 / blksize, sec)) { -+ return false; -+ } -+ -+ return !memcmp("CD001", &sec[1], 5); -+} -+ - /*********************************************************************** - * Bus specific IPL sequences - */ - - static void zipl_load_vblk(void) - { -- if (virtio_guessed_disk_nature()) { -- virtio_assume_iso9660(); -+ int blksize = virtio_get_block_size(); -+ -+ if (blksize == VIRTIO_ISO_BLOCK_SIZE || has_iso_signature()) { -+ if (blksize != VIRTIO_ISO_BLOCK_SIZE) { -+ virtio_assume_iso9660(); -+ } -+ ipl_iso_el_torito(); - } -- ipl_iso_el_torito(); - -- if (virtio_guessed_disk_nature()) { -+ if (blksize != VIRTIO_DASD_DEFAULT_BLOCK_SIZE) { - sclp_print("Using guessed DASD geometry.\n"); - virtio_assume_eckd(); - } --- -2.31.1 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch b/SOURCES/kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch deleted file mode 100644 index 00601aa..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch +++ /dev/null @@ -1,78 +0,0 @@ -From 56674ee1f25f12978a6a8a1390e11b55b3e0fabe Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 15/17] pc-bios/s390-ccw/netboot.mak: Ignore Clang's warnings - about GNU extensions - -RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [10/10] 037dab4df23ebb2b42871bca8c842a53a7204b50 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Bugzilla: http://bugzilla.redhat.com/2098077 - -commit e2269220acb03e6c6a460c3090d804835e202239 -Author: Thomas Huth -Date: Mon Jul 4 13:19:03 2022 +0200 - - pc-bios/s390-ccw/netboot.mak: Ignore Clang's warnings about GNU extensions - - When compiling the s390-ccw bios with Clang (v14.0), there is currently - an unuseful warning like this: - - CC pc-bios/s390-ccw/ipv6.o - ../../roms/SLOF/lib/libnet/ipv6.c:447:18: warning: variable length array - folded to constant array as an extension [-Wgnu-folding-constant] - unsigned short raw[ip6size]; - ^ - - SLOF is currently GCC-only and cannot be compiled with Clang yet, so - it is expected that such extensions sneak in there - and as long as - we don't want to compile the code with a compiler that is neither GCC - or Clang, it is also not necessary to avoid such extensions. - - Thus these GNU-extension related warnings are completely useless in - the s390-ccw bios, especially in the code that is coming from SLOF, - so we should simply disable the related warnings here now. - - Message-Id: <20220704111903.62400-13-thuth@redhat.com> - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - pc-bios/s390-ccw/netboot.mak | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/pc-bios/s390-ccw/netboot.mak b/pc-bios/s390-ccw/netboot.mak -index 68b4d7edcb..ad41898cb6 100644 ---- a/pc-bios/s390-ccw/netboot.mak -+++ b/pc-bios/s390-ccw/netboot.mak -@@ -16,9 +16,12 @@ s390-netboot.elf: $(NETOBJS) libnet.a libc.a - s390-netboot.img: s390-netboot.elf - $(call quiet-command,$(STRIP) --strip-unneeded $< -o $@,"STRIP","$(TARGET_DIR)$@") - -+# SLOF is GCC-only, so ignore warnings about GNU extensions with Clang here -+NO_GNU_WARN := $(call cc-option,-Werror $(QEMU_CFLAGS),-Wno-gnu) -+ - # libc files: - --LIBC_CFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(LIBC_INC) $(LIBNET_INC) \ -+LIBC_CFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(NO_GNU_WARN) $(LIBC_INC) $(LIBNET_INC) \ - -MMD -MP -MT $@ -MF $(@:%.o=%.d) - - CTYPE_OBJS = isdigit.o isxdigit.o toupper.o -@@ -52,7 +55,7 @@ libc.a: $(LIBCOBJS) - - LIBNETOBJS := args.o dhcp.o dns.o icmpv6.o ipv6.o tcp.o udp.o bootp.o \ - dhcpv6.o ethernet.o ipv4.o ndp.o tftp.o pxelinux.o --LIBNETCFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(LIBC_INC) $(LIBNET_INC) \ -+LIBNETCFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(NO_GNU_WARN) $(LIBC_INC) $(LIBNET_INC) \ - -DDHCPARCH=0x1F -MMD -MP -MT $@ -MF $(@:%.o=%.d) - - %.o : $(SLOF_DIR)/lib/libnet/%.c --- -2.31.1 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch b/SOURCES/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch deleted file mode 100644 index 5e4b689..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 430e76fd964390db86c8486f76b916a1cf7f74c2 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 12/17] pc-bios/s390-ccw/virtio: Beautify the code for reading - virtqueue configuration - -RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [7/10] b15c06b4c5431837672b6cb5d57d09da20718441 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Bugzilla: http://bugzilla.redhat.com/2098077 - -commit 070824885741f5d2a66626d3c4ecb2773c8e0552 -Author: Thomas Huth -Date: Mon Jul 4 13:18:59 2022 +0200 - - pc-bios/s390-ccw/virtio: Beautify the code for reading virtqueue configuration - - It looks nicer if we separate the run_ccw() from the IPL_assert() - statement, and the error message should talk about "virtio device" - instead of "block device", since this code is nowadays used for - non-block (i.e. network) devices, too. - - Message-Id: <20220704111903.62400-9-thuth@redhat.com> - Reviewed-by: Cornelia Huck - Reviewed-by: Eric Farman - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - pc-bios/s390-ccw/virtio.c | 5 ++--- - 1 file changed, 2 insertions(+), 3 deletions(-) - -diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c -index d8c2b52710..f37510f312 100644 ---- a/pc-bios/s390-ccw/virtio.c -+++ b/pc-bios/s390-ccw/virtio.c -@@ -289,9 +289,8 @@ void virtio_setup_ccw(VDev *vdev) - .num = 0, - }; - -- IPL_assert( -- run_ccw(vdev, CCW_CMD_READ_VQ_CONF, &config, sizeof(config), false) == 0, -- "Could not get block device VQ configuration"); -+ rc = run_ccw(vdev, CCW_CMD_READ_VQ_CONF, &config, sizeof(config), false); -+ IPL_assert(rc == 0, "Could not get virtio device VQ configuration"); - info.num = config.num; - vring_init(&vdev->vrings[i], &info); - vdev->vrings[i].schid = vdev->schid; --- -2.31.1 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch b/SOURCES/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch deleted file mode 100644 index 04ab605..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 7d4f2454f95bfc087ad3f2fe3bc4625dcea3568e Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 06/17] pc-bios/s390-ccw/virtio: Introduce a macro for the DASD - block size - -RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [1/10] 71033934e1e9988bcf71362e02665ceb7449009d (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Bugzilla: http://bugzilla.redhat.com/2098077 - -commit 1f2c2ee48e87ea743f8e23cc7569dd26c4cf9623 -Author: Thomas Huth -Date: Mon Jul 4 13:18:53 2022 +0200 - - pc-bios/s390-ccw/virtio: Introduce a macro for the DASD block size - - Use VIRTIO_DASD_DEFAULT_BLOCK_SIZE instead of the magic value 4096. - - Message-Id: <20220704111903.62400-3-thuth@redhat.com> - Reviewed-by: Eric Farman - Reviewed-by: Cornelia Huck - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - pc-bios/s390-ccw/virtio-blkdev.c | 2 +- - pc-bios/s390-ccw/virtio.h | 1 + - 2 files changed, 2 insertions(+), 1 deletion(-) - -diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c -index 7d35050292..6483307630 100644 ---- a/pc-bios/s390-ccw/virtio-blkdev.c -+++ b/pc-bios/s390-ccw/virtio-blkdev.c -@@ -155,7 +155,7 @@ void virtio_assume_eckd(void) - vdev->config.blk.physical_block_exp = 0; - switch (vdev->senseid.cu_model) { - case VIRTIO_ID_BLOCK: -- vdev->config.blk.blk_size = 4096; -+ vdev->config.blk.blk_size = VIRTIO_DASD_DEFAULT_BLOCK_SIZE; - break; - case VIRTIO_ID_SCSI: - vdev->config.blk.blk_size = vdev->scsi_block_size; -diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h -index 19fceb6495..9e410bde6f 100644 ---- a/pc-bios/s390-ccw/virtio.h -+++ b/pc-bios/s390-ccw/virtio.h -@@ -198,6 +198,7 @@ extern int virtio_read_many(ulong sector, void *load_addr, int sec_num); - #define VIRTIO_SECTOR_SIZE 512 - #define VIRTIO_ISO_BLOCK_SIZE 2048 - #define VIRTIO_SCSI_BLOCK_SIZE 512 -+#define VIRTIO_DASD_DEFAULT_BLOCK_SIZE 4096 - - static inline ulong virtio_sector_adjust(ulong sector) - { --- -2.31.1 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch b/SOURCES/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch deleted file mode 100644 index 41ae538..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 20f8724d0837acbe642c8c7698a4b256f34c1209 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 11/17] pc-bios/s390-ccw/virtio: Read device config after - feature negotiation - -RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [6/10] 54d21e430b2dfba9e0a0823d6bb8ec7e7f8ff2ff (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Bugzilla: http://bugzilla.redhat.com/2098077 - -commit aa5c69ce99411c4886bcd051f288afc02b6d968d -Author: Thomas Huth -Date: Mon Jul 4 13:18:58 2022 +0200 - - pc-bios/s390-ccw/virtio: Read device config after feature negotiation - - Feature negotiation should be done first, since some fields in the - config area can depend on the negotiated features and thus should - rather be read afterwards. - - While we're at it, also adjust the error message here a little bit - (the code is nowadays used for non-block virtio devices, too). - - Message-Id: <20220704111903.62400-8-thuth@redhat.com> - Reviewed-by: Eric Farman - Reviewed-by: Cornelia Huck - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - pc-bios/s390-ccw/virtio.c | 7 +++---- - 1 file changed, 3 insertions(+), 4 deletions(-) - -diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c -index 4e85a2eb82..d8c2b52710 100644 ---- a/pc-bios/s390-ccw/virtio.c -+++ b/pc-bios/s390-ccw/virtio.c -@@ -262,10 +262,6 @@ void virtio_setup_ccw(VDev *vdev) - rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); - IPL_assert(rc == 0, "Could not write DRIVER status to host"); - -- IPL_assert( -- run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false) == 0, -- "Could not get block device configuration"); -- - /* Feature negotiation */ - for (i = 0; i < ARRAY_SIZE(vdev->guest_features); i++) { - feats.features = 0; -@@ -278,6 +274,9 @@ void virtio_setup_ccw(VDev *vdev) - IPL_assert(rc == 0, "Could not set features bits"); - } - -+ rc = run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false); -+ IPL_assert(rc == 0, "Could not get virtio device configuration"); -+ - for (i = 0; i < vdev->nr_vqs; i++) { - VqInfo info = { - .queue = (unsigned long long) ring_area + (i * VIRTIO_RING_SIZE), --- -2.31.1 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch b/SOURCES/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch deleted file mode 100644 index e976047..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch +++ /dev/null @@ -1,93 +0,0 @@ -From 303fb3ddcdbbd1373c5b1aa28e03f90507e217f3 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 10/17] pc-bios/s390-ccw/virtio: Set missing status bits while - initializing - -RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [5/10] 4bc44d9adae055fb60b79d04a2f08535b4d38d2b (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Bugzilla: http://bugzilla.redhat.com/2098077 - -commit 175aa06a152ef6b58ba9b2e47a1296b024dea70c -Author: Thomas Huth -Date: Mon Jul 4 13:18:57 2022 +0200 - - pc-bios/s390-ccw/virtio: Set missing status bits while initializing - - According chapter "3.1.1 Driver Requirements: Device Initialization" - of the Virtio specification (v1.1), a driver for a device has to set - the ACKNOWLEDGE and DRIVER bits in the status field after resetting - the device. The s390-ccw bios skipped these steps so far and seems - like QEMU never cared. Anyway, it's better to follow the spec, so - let's set these bits now in the right spots, too. - - Message-Id: <20220704111903.62400-7-thuth@redhat.com> - Acked-by: Christian Borntraeger - Reviewed-by: Cornelia Huck - Reviewed-by: Eric Farman - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - pc-bios/s390-ccw/virtio.c | 18 ++++++++++++++---- - 1 file changed, 14 insertions(+), 4 deletions(-) - -diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c -index 5d2c6e3381..4e85a2eb82 100644 ---- a/pc-bios/s390-ccw/virtio.c -+++ b/pc-bios/s390-ccw/virtio.c -@@ -220,7 +220,7 @@ int virtio_run(VDev *vdev, int vqid, VirtioCmd *cmd) - void virtio_setup_ccw(VDev *vdev) - { - int i, rc, cfg_size = 0; -- unsigned char status = VIRTIO_CONFIG_S_DRIVER_OK; -+ uint8_t status; - struct VirtioFeatureDesc { - uint32_t features; - uint8_t index; -@@ -234,6 +234,10 @@ void virtio_setup_ccw(VDev *vdev) - - run_ccw(vdev, CCW_CMD_VDEV_RESET, NULL, 0, false); - -+ status = VIRTIO_CONFIG_S_ACKNOWLEDGE; -+ rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); -+ IPL_assert(rc == 0, "Could not write ACKNOWLEDGE status to host"); -+ - switch (vdev->senseid.cu_model) { - case VIRTIO_ID_NET: - vdev->nr_vqs = 2; -@@ -253,6 +257,11 @@ void virtio_setup_ccw(VDev *vdev) - default: - panic("Unsupported virtio device\n"); - } -+ -+ status |= VIRTIO_CONFIG_S_DRIVER; -+ rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); -+ IPL_assert(rc == 0, "Could not write DRIVER status to host"); -+ - IPL_assert( - run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false) == 0, - "Could not get block device configuration"); -@@ -291,9 +300,10 @@ void virtio_setup_ccw(VDev *vdev) - run_ccw(vdev, CCW_CMD_SET_VQ, &info, sizeof(info), false) == 0, - "Cannot set VQ info"); - } -- IPL_assert( -- run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false) == 0, -- "Could not write status to host"); -+ -+ status |= VIRTIO_CONFIG_S_DRIVER_OK; -+ rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); -+ IPL_assert(rc == 0, "Could not write DRIVER_OK status to host"); - } - - bool virtio_is_supported(SubChannelId schid) --- -2.31.1 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch b/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch deleted file mode 100644 index 109b98e..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch +++ /dev/null @@ -1,101 +0,0 @@ -From d3335a98a7b6e084aadf4907968536a67cf8e64c Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 09/17] pc-bios/s390-ccw/virtio-blkdev: Remove - virtio_assume_scsi() - -RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [4/10] bf27f75344f220a03475a2918ed49ec9cd5ba317 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Bugzilla: http://bugzilla.redhat.com/2098077 - -commit 5447de2619050a0a4dd480b97f88a9b58da360d1 -Author: Thomas Huth -Date: Mon Jul 4 13:18:56 2022 +0200 - - pc-bios/s390-ccw/virtio-blkdev: Remove virtio_assume_scsi() - - The virtio_assume_scsi() function is very questionable: First, it - is only called for virtio-blk, and not for virtio-scsi, so the naming - is already quite confusing. Second, it is called if we detected a - "invalid" IPL disk, trying to fix it by blindly setting a sector - size of 512. This of course won't work in most cases since disks - might have a different sector size for a reason. - - Thus let's remove this strange function now. The calling code can - also be removed completely, since there is another spot in main.c - that does "IPL_assert(virtio_ipl_disk_is_valid(), ...)" to make - sure that we do not try to IPL from an invalid device. - - Message-Id: <20220704111903.62400-6-thuth@redhat.com> - Reviewed-by: Eric Farman - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - pc-bios/s390-ccw/virtio-blkdev.c | 24 ------------------------ - pc-bios/s390-ccw/virtio.h | 1 - - 2 files changed, 25 deletions(-) - -diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c -index 7e13155589..db1f7f44aa 100644 ---- a/pc-bios/s390-ccw/virtio-blkdev.c -+++ b/pc-bios/s390-ccw/virtio-blkdev.c -@@ -112,23 +112,6 @@ VirtioGDN virtio_guessed_disk_nature(void) - return virtio_get_device()->guessed_disk_nature; - } - --void virtio_assume_scsi(void) --{ -- VDev *vdev = virtio_get_device(); -- -- switch (vdev->senseid.cu_model) { -- case VIRTIO_ID_BLOCK: -- vdev->guessed_disk_nature = VIRTIO_GDN_SCSI; -- vdev->config.blk.blk_size = VIRTIO_SCSI_BLOCK_SIZE; -- vdev->config.blk.physical_block_exp = 0; -- vdev->blk_factor = 1; -- break; -- case VIRTIO_ID_SCSI: -- vdev->scsi_block_size = VIRTIO_SCSI_BLOCK_SIZE; -- break; -- } --} -- - void virtio_assume_iso9660(void) - { - VDev *vdev = virtio_get_device(); -@@ -247,13 +230,6 @@ int virtio_blk_setup_device(SubChannelId schid) - switch (vdev->senseid.cu_model) { - case VIRTIO_ID_BLOCK: - sclp_print("Using virtio-blk.\n"); -- if (!virtio_ipl_disk_is_valid()) { -- /* make sure all getters but blocksize return 0 for -- * invalid IPL disk -- */ -- memset(&vdev->config.blk, 0, sizeof(vdev->config.blk)); -- virtio_assume_scsi(); -- } - break; - case VIRTIO_ID_SCSI: - IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE, -diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h -index 241730effe..600ba5052b 100644 ---- a/pc-bios/s390-ccw/virtio.h -+++ b/pc-bios/s390-ccw/virtio.h -@@ -182,7 +182,6 @@ enum guessed_disk_nature_type { - typedef enum guessed_disk_nature_type VirtioGDN; - - VirtioGDN virtio_guessed_disk_nature(void); --void virtio_assume_scsi(void); - void virtio_assume_eckd(void); - void virtio_assume_iso9660(void); - --- -2.31.1 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch b/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch deleted file mode 100644 index 8bc7a11..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch +++ /dev/null @@ -1,63 +0,0 @@ -From db58915fcaf3d24b64fe2c34cc15b5596b9a81bb Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 14/17] pc-bios/s390-ccw/virtio-blkdev: Request the right - feature bits - -RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [9/10] 9dcd8c2f659f366f9487ab6473d1f0d7778b40a7 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Bugzilla: http://bugzilla.redhat.com/2098077 - -commit 9125a314cca4a1838b09305a87d8efb98f80ab67 -Author: Thomas Huth -Date: Mon Jul 4 13:19:01 2022 +0200 - - pc-bios/s390-ccw/virtio-blkdev: Request the right feature bits - - The virtio-blk code uses the block size and geometry fields in the - config area. According to the virtio-spec, these have to be negotiated - with the right feature bits during initialization, otherwise they - might not be available. QEMU is so far very forgiving and always - provides them, but we should not rely on this behavior, so let's - better request them properly via the VIRTIO_BLK_F_GEOMETRY and - VIRTIO_BLK_F_BLK_SIZE feature bits. - - Message-Id: <20220704111903.62400-11-thuth@redhat.com> - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - pc-bios/s390-ccw/virtio-blkdev.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c -index c175b66a47..8271c47296 100644 ---- a/pc-bios/s390-ccw/virtio-blkdev.c -+++ b/pc-bios/s390-ccw/virtio-blkdev.c -@@ -13,6 +13,9 @@ - #include "virtio.h" - #include "virtio-scsi.h" - -+#define VIRTIO_BLK_F_GEOMETRY (1 << 4) -+#define VIRTIO_BLK_F_BLK_SIZE (1 << 6) -+ - static int virtio_blk_read_many(VDev *vdev, ulong sector, void *load_addr, - int sec_num) - { -@@ -223,6 +226,7 @@ int virtio_blk_setup_device(SubChannelId schid) - { - VDev *vdev = virtio_get_device(); - -+ vdev->guest_features[0] = VIRTIO_BLK_F_GEOMETRY | VIRTIO_BLK_F_BLK_SIZE; - vdev->schid = schid; - virtio_setup_ccw(vdev); - --- -2.31.1 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch b/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch deleted file mode 100644 index 818e515..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch +++ /dev/null @@ -1,124 +0,0 @@ -From f07e4629a7c58407f903810a038660c88c6a6315 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 08/17] pc-bios/s390-ccw/virtio-blkdev: Simplify/fix - virtio_ipl_disk_is_valid() - -RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [3/10] fb06830a3e50d9da3d84913b50bb227865cc44b3 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Bugzilla: http://bugzilla.redhat.com/2098077 - -commit bbf615f7b707f009ef8e757d170902ad33b90644 -Author: Thomas Huth -Date: Mon Jul 4 13:18:55 2022 +0200 - - pc-bios/s390-ccw/virtio-blkdev: Simplify/fix virtio_ipl_disk_is_valid() - - The s390-ccw bios fails to boot if the boot disk is a virtio-blk - disk with a sector size of 4096. For example: - - dasdfmt -b 4096 -d cdl -y -p -M quick /dev/dasdX - fdasd -a /dev/dasdX - install a guest onto /dev/dasdX1 using virtio-blk - qemu-system-s390x -nographic -hda /dev/dasdX1 - - The bios then bails out with: - - ! Cannot read block 0 ! - - Looking at virtio_ipl_disk_is_valid() and especially the function - virtio_disk_is_scsi(), it does not really make sense that we expect - only such a limited disk geometry (like a block size of 512) for - our boot disks. Let's relax the check and allow everything that - remotely looks like a sane disk. - - Message-Id: <20220704111903.62400-5-thuth@redhat.com> - Reviewed-by: Eric Farman - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - pc-bios/s390-ccw/virtio-blkdev.c | 41 ++++++-------------------------- - pc-bios/s390-ccw/virtio.h | 2 -- - 2 files changed, 7 insertions(+), 36 deletions(-) - -diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c -index 6483307630..7e13155589 100644 ---- a/pc-bios/s390-ccw/virtio-blkdev.c -+++ b/pc-bios/s390-ccw/virtio-blkdev.c -@@ -166,46 +166,19 @@ void virtio_assume_eckd(void) - virtio_eckd_sectors_for_block_size(vdev->config.blk.blk_size); - } - --bool virtio_disk_is_scsi(void) --{ -- VDev *vdev = virtio_get_device(); -- -- if (vdev->guessed_disk_nature == VIRTIO_GDN_SCSI) { -- return true; -- } -- switch (vdev->senseid.cu_model) { -- case VIRTIO_ID_BLOCK: -- return (vdev->config.blk.geometry.heads == 255) -- && (vdev->config.blk.geometry.sectors == 63) -- && (virtio_get_block_size() == VIRTIO_SCSI_BLOCK_SIZE); -- case VIRTIO_ID_SCSI: -- return true; -- } -- return false; --} -- --bool virtio_disk_is_eckd(void) -+bool virtio_ipl_disk_is_valid(void) - { -+ int blksize = virtio_get_block_size(); - VDev *vdev = virtio_get_device(); -- const int block_size = virtio_get_block_size(); - -- if (vdev->guessed_disk_nature == VIRTIO_GDN_DASD) { -+ if (vdev->guessed_disk_nature == VIRTIO_GDN_SCSI || -+ vdev->guessed_disk_nature == VIRTIO_GDN_DASD) { - return true; - } -- switch (vdev->senseid.cu_model) { -- case VIRTIO_ID_BLOCK: -- return (vdev->config.blk.geometry.heads == 15) -- && (vdev->config.blk.geometry.sectors == -- virtio_eckd_sectors_for_block_size(block_size)); -- case VIRTIO_ID_SCSI: -- return false; -- } -- return false; --} - --bool virtio_ipl_disk_is_valid(void) --{ -- return virtio_disk_is_scsi() || virtio_disk_is_eckd(); -+ return (vdev->senseid.cu_model == VIRTIO_ID_BLOCK || -+ vdev->senseid.cu_model == VIRTIO_ID_SCSI) && -+ blksize >= 512 && blksize <= 4096; - } - - int virtio_get_block_size(void) -diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h -index 9e410bde6f..241730effe 100644 ---- a/pc-bios/s390-ccw/virtio.h -+++ b/pc-bios/s390-ccw/virtio.h -@@ -186,8 +186,6 @@ void virtio_assume_scsi(void); - void virtio_assume_eckd(void); - void virtio_assume_iso9660(void); - --extern bool virtio_disk_is_scsi(void); --extern bool virtio_disk_is_eckd(void); - extern bool virtio_ipl_disk_is_valid(void); - extern int virtio_get_block_size(void); - extern uint8_t virtio_get_heads(void); --- -2.31.1 - diff --git a/SOURCES/kvm-physmem-add-missing-memory-barrier.patch b/SOURCES/kvm-physmem-add-missing-memory-barrier.patch new file mode 100644 index 0000000..3eafa78 --- /dev/null +++ b/SOURCES/kvm-physmem-add-missing-memory-barrier.patch @@ -0,0 +1,55 @@ +From 0dd4be411e35f00d006d89a15d9161f5d8783c1d Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:24:36 -0500 +Subject: [PATCH 10/12] physmem: add missing memory barrier + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2175660 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Eric Auger +RH-Commit: [7/9] ee4875cb8c564f0510e48b00a5d95c0e6ea6301b (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 + +commit 33828ca11da08436e1b32f3e79dabce3061a0427 +Author: Paolo Bonzini +Date: Fri Mar 3 14:36:32 2023 +0100 + + physmem: add missing memory barrier + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + softmmu/physmem.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/softmmu/physmem.c b/softmmu/physmem.c +index 1b606a3002..772c9896cd 100644 +--- a/softmmu/physmem.c ++++ b/softmmu/physmem.c +@@ -3117,6 +3117,8 @@ void cpu_register_map_client(QEMUBH *bh) + qemu_mutex_lock(&map_client_list_lock); + client->bh = bh; + QLIST_INSERT_HEAD(&map_client_list, client, link); ++ /* Write map_client_list before reading in_use. */ ++ smp_mb(); + if (!qatomic_read(&bounce.in_use)) { + cpu_notify_map_clients_locked(); + } +@@ -3309,6 +3311,7 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, + qemu_vfree(bounce.buffer); + bounce.buffer = NULL; + memory_region_unref(bounce.mr); ++ /* Clear in_use before reading map_client_list. */ + qatomic_mb_set(&bounce.in_use, false); + cpu_notify_map_clients(); + } +-- +2.39.1 + diff --git a/SOURCES/kvm-qapi-machine.json-Add-cluster-id.patch b/SOURCES/kvm-qapi-machine.json-Add-cluster-id.patch deleted file mode 100644 index 2b2a22a..0000000 --- a/SOURCES/kvm-qapi-machine.json-Add-cluster-id.patch +++ /dev/null @@ -1,126 +0,0 @@ -From e97c563f7146098119839aa146a6f25070eb7148 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 11 May 2022 18:01:02 +0800 -Subject: [PATCH 01/16] qapi/machine.json: Add cluster-id - -RH-Author: Gavin Shan -RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology -RH-Commit: [1/6] 44d7d83008c6d28485ae44f7cced792f4987b919 (gwshan/qemu-rhel-9) -RH-Bugzilla: 2041823 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Andrew Jones - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 - -This adds cluster-id in CPU instance properties, which will be used -by arm/virt machine. Besides, the cluster-id is also verified or -dumped in various spots: - - * hw/core/machine.c::machine_set_cpu_numa_node() to associate - CPU with its NUMA node. - - * hw/core/machine.c::machine_numa_finish_cpu_init() to record - CPU slots with no NUMA mapping set. - - * hw/core/machine-hmp-cmds.c::hmp_hotpluggable_cpus() to dump - cluster-id. - -Signed-off-by: Gavin Shan -Reviewed-by: Yanan Wang -Acked-by: Igor Mammedov -Message-id: 20220503140304.855514-2-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit 1dcf7001d4bae651129d46d5628b29e93a411d0b) -Signed-off-by: Gavin Shan ---- - hw/core/machine-hmp-cmds.c | 4 ++++ - hw/core/machine.c | 16 ++++++++++++++++ - qapi/machine.json | 6 ++++-- - 3 files changed, 24 insertions(+), 2 deletions(-) - -diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c -index 4e2f319aeb..5cb5eecbfc 100644 ---- a/hw/core/machine-hmp-cmds.c -+++ b/hw/core/machine-hmp-cmds.c -@@ -77,6 +77,10 @@ void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict) - if (c->has_die_id) { - monitor_printf(mon, " die-id: \"%" PRIu64 "\"\n", c->die_id); - } -+ if (c->has_cluster_id) { -+ monitor_printf(mon, " cluster-id: \"%" PRIu64 "\"\n", -+ c->cluster_id); -+ } - if (c->has_core_id) { - monitor_printf(mon, " core-id: \"%" PRIu64 "\"\n", c->core_id); - } -diff --git a/hw/core/machine.c b/hw/core/machine.c -index dffc3ef4ab..168f4de910 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -890,6 +890,11 @@ void machine_set_cpu_numa_node(MachineState *machine, - return; - } - -+ if (props->has_cluster_id && !slot->props.has_cluster_id) { -+ error_setg(errp, "cluster-id is not supported"); -+ return; -+ } -+ - if (props->has_socket_id && !slot->props.has_socket_id) { - error_setg(errp, "socket-id is not supported"); - return; -@@ -909,6 +914,11 @@ void machine_set_cpu_numa_node(MachineState *machine, - continue; - } - -+ if (props->has_cluster_id && -+ props->cluster_id != slot->props.cluster_id) { -+ continue; -+ } -+ - if (props->has_die_id && props->die_id != slot->props.die_id) { - continue; - } -@@ -1203,6 +1213,12 @@ static char *cpu_slot_to_string(const CPUArchId *cpu) - } - g_string_append_printf(s, "die-id: %"PRId64, cpu->props.die_id); - } -+ if (cpu->props.has_cluster_id) { -+ if (s->len) { -+ g_string_append_printf(s, ", "); -+ } -+ g_string_append_printf(s, "cluster-id: %"PRId64, cpu->props.cluster_id); -+ } - if (cpu->props.has_core_id) { - if (s->len) { - g_string_append_printf(s, ", "); -diff --git a/qapi/machine.json b/qapi/machine.json -index d25a481ce4..4c417e32a5 100644 ---- a/qapi/machine.json -+++ b/qapi/machine.json -@@ -868,10 +868,11 @@ - # @node-id: NUMA node ID the CPU belongs to - # @socket-id: socket number within node/board the CPU belongs to - # @die-id: die number within socket the CPU belongs to (since 4.1) --# @core-id: core number within die the CPU belongs to -+# @cluster-id: cluster number within die the CPU belongs to (since 7.1) -+# @core-id: core number within cluster the CPU belongs to - # @thread-id: thread number within core the CPU belongs to - # --# Note: currently there are 5 properties that could be present -+# Note: currently there are 6 properties that could be present - # but management should be prepared to pass through other - # properties with device_add command to allow for future - # interface extension. This also requires the filed names to be kept in -@@ -883,6 +884,7 @@ - 'data': { '*node-id': 'int', - '*socket-id': 'int', - '*die-id': 'int', -+ '*cluster-id': 'int', - '*core-id': 'int', - '*thread-id': 'int' - } --- -2.31.1 - diff --git a/SOURCES/kvm-qatomic-add-smp_mb__before-after_rmw.patch b/SOURCES/kvm-qatomic-add-smp_mb__before-after_rmw.patch new file mode 100644 index 0000000..acc8c7d --- /dev/null +++ b/SOURCES/kvm-qatomic-add-smp_mb__before-after_rmw.patch @@ -0,0 +1,177 @@ +From 1fdc864f9ac927f3ea407f35f6771a4b2e8f509f Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:24:36 -0500 +Subject: [PATCH 04/12] qatomic: add smp_mb__before/after_rmw() + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2175660 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Eric Auger +RH-Commit: [1/9] e8d0b64670bff778d275b1fb477dcee0c109251a (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 + +commit ff00bed1897c3d27adc5b0cec6f6eeb5a7d13176 +Author: Paolo Bonzini +Date: Thu Mar 2 11:10:56 2023 +0100 + + qatomic: add smp_mb__before/after_rmw() + + On ARM, seqcst loads and stores (which QEMU does not use) are compiled + respectively as LDAR and STLR instructions. Even though LDAR is + also used for load-acquire operations, it also waits for all STLRs to + leave the store buffer. Thus, LDAR and STLR alone are load-acquire + and store-release operations, but LDAR also provides store-against-load + ordering as long as the previous store is a STLR. + + Compare this to ARMv7, where store-release is DMB+STR and load-acquire + is LDR+DMB, but an additional DMB is needed between store-seqcst and + load-seqcst (e.g. DMB+STR+DMB+LDR+DMB); or with x86, where MOV provides + load-acquire and store-release semantics and the two can be reordered. + + Likewise, on ARM sequentially consistent read-modify-write operations only + need to use LDAXR and STLXR respectively for the load and the store, while + on x86 they need to use the stronger LOCK prefix. + + In a strange twist of events, however, the _stronger_ semantics + of the ARM instructions can end up causing bugs on ARM, not on x86. + The problems occur when seqcst atomics are mixed with relaxed atomics. + + QEMU's atomics try to bridge the Linux API (that most of the developers + are familiar with) and the C11 API, and the two have a substantial + difference: + + - in Linux, strongly-ordered atomics such as atomic_add_return() affect + the global ordering of _all_ memory operations, including for example + READ_ONCE()/WRITE_ONCE() + + - in C11, sequentially consistent atomics (except for seq-cst fences) + only affect the ordering of sequentially consistent operations. + In particular, since relaxed loads are done with LDR on ARM, they are + not ordered against seqcst stores (which are done with STLR). + + QEMU implements high-level synchronization primitives with the idea that + the primitives contain the necessary memory barriers, and the callers can + use relaxed atomics (qatomic_read/qatomic_set) or even regular accesses. + This is very much incompatible with the C11 view that seqcst accesses + are only ordered against other seqcst accesses, and requires using seqcst + fences as in the following example: + + qatomic_set(&y, 1); qatomic_set(&x, 1); + smp_mb(); smp_mb(); + ... qatomic_read(&x) ... ... qatomic_read(&y) ... + + When a qatomic_*() read-modify write operation is used instead of one + or both stores, developers that are more familiar with the Linux API may + be tempted to omit the smp_mb(), which will work on x86 but not on ARM. + + This nasty difference between Linux and C11 read-modify-write operations + has already caused issues in util/async.c and more are being found. + Provide something similar to Linux smp_mb__before/after_atomic(); this + has the double function of documenting clearly why there is a memory + barrier, and avoiding a double barrier on x86 and s390x systems. + + The new macro can already be put to use in qatomic_mb_set(). + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + docs/devel/atomics.rst | 26 +++++++++++++++++++++----- + include/qemu/atomic.h | 17 ++++++++++++++++- + 2 files changed, 37 insertions(+), 6 deletions(-) + +diff --git a/docs/devel/atomics.rst b/docs/devel/atomics.rst +index 52baa0736d..10fbfc58bb 100644 +--- a/docs/devel/atomics.rst ++++ b/docs/devel/atomics.rst +@@ -25,7 +25,8 @@ provides macros that fall in three camps: + + - weak atomic access and manual memory barriers: ``qatomic_read()``, + ``qatomic_set()``, ``smp_rmb()``, ``smp_wmb()``, ``smp_mb()``, +- ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``; ++ ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``, ++ ``smp_mb__before_rmw()``, ``smp_mb__after_rmw()``; + + - sequentially consistent atomic access: everything else. + +@@ -470,7 +471,7 @@ and memory barriers, and the equivalents in QEMU: + sequential consistency. + + - in QEMU, ``qatomic_read()`` and ``qatomic_set()`` do not participate in +- the total ordering enforced by sequentially-consistent operations. ++ the ordering enforced by read-modify-write operations. + This is because QEMU uses the C11 memory model. The following example + is correct in Linux but not in QEMU: + +@@ -486,9 +487,24 @@ and memory barriers, and the equivalents in QEMU: + because the read of ``y`` can be moved (by either the processor or the + compiler) before the write of ``x``. + +- Fixing this requires an ``smp_mb()`` memory barrier between the write +- of ``x`` and the read of ``y``. In the common case where only one thread +- writes ``x``, it is also possible to write it like this: ++ Fixing this requires a full memory barrier between the write of ``x`` and ++ the read of ``y``. QEMU provides ``smp_mb__before_rmw()`` and ++ ``smp_mb__after_rmw()``; they act both as an optimization, ++ avoiding the memory barrier on processors where it is unnecessary, ++ and as a clarification of this corner case of the C11 memory model: ++ ++ +--------------------------------+ ++ | QEMU (correct) | ++ +================================+ ++ | :: | ++ | | ++ | a = qatomic_fetch_add(&x, 2);| ++ | smp_mb__after_rmw(); | ++ | b = qatomic_read(&y); | ++ +--------------------------------+ ++ ++ In the common case where only one thread writes ``x``, it is also possible ++ to write it like this: + + +--------------------------------+ + | QEMU (correct) | +diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h +index 874134fd19..f85834ee8b 100644 +--- a/include/qemu/atomic.h ++++ b/include/qemu/atomic.h +@@ -245,6 +245,20 @@ + #define smp_wmb() smp_mb_release() + #define smp_rmb() smp_mb_acquire() + ++/* ++ * SEQ_CST is weaker than the older __sync_* builtins and Linux ++ * kernel read-modify-write atomics. Provide a macro to obtain ++ * the same semantics. ++ */ ++#if !defined(QEMU_SANITIZE_THREAD) && \ ++ (defined(__i386__) || defined(__x86_64__) || defined(__s390x__)) ++# define smp_mb__before_rmw() signal_barrier() ++# define smp_mb__after_rmw() signal_barrier() ++#else ++# define smp_mb__before_rmw() smp_mb() ++# define smp_mb__after_rmw() smp_mb() ++#endif ++ + /* qatomic_mb_read/set semantics map Java volatile variables. They are + * less expensive on some platforms (notably POWER) than fully + * sequentially consistent operations. +@@ -259,7 +273,8 @@ + #if !defined(QEMU_SANITIZE_THREAD) && \ + (defined(__i386__) || defined(__x86_64__) || defined(__s390x__)) + /* This is more efficient than a store plus a fence. */ +-# define qatomic_mb_set(ptr, i) ((void)qatomic_xchg(ptr, i)) ++# define qatomic_mb_set(ptr, i) \ ++ ({ (void)qatomic_xchg(ptr, i); smp_mb__after_rmw(); }) + #else + # define qatomic_mb_set(ptr, i) \ + ({ qatomic_store_release(ptr, i); smp_mb(); }) +-- +2.39.1 + diff --git a/SOURCES/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch b/SOURCES/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch deleted file mode 100644 index 9010d3d..0000000 --- a/SOURCES/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch +++ /dev/null @@ -1,162 +0,0 @@ -From 5e385a0e49a520550a83299632be175857b63f19 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Tue, 5 Apr 2022 15:46:52 +0200 -Subject: [PATCH 06/16] qcow2: Add errp to rebuild_refcount_structure() - -RH-Author: Hanna Reitz -RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding -RH-Commit: [3/4] 937b89a7eab6ec6b18618d59bc1526976ad03290 (hreitz/qemu-kvm-c-9-s) -RH-Bugzilla: 2072379 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi - -Instead of fprint()-ing error messages in rebuild_refcount_structure() -and its rebuild_refcounts_write_refblocks() helper, pass them through an -Error object to qcow2_check_refcounts() (which will then print it). - -Suggested-by: Eric Blake -Signed-off-by: Hanna Reitz -Message-Id: <20220405134652.19278-4-hreitz@redhat.com> -Reviewed-by: Eric Blake -(cherry picked from commit 0423f75351ab83b844a31349218b0eadd830e07a) -Signed-off-by: Hanna Reitz ---- - block/qcow2-refcount.c | 33 +++++++++++++++++++-------------- - 1 file changed, 19 insertions(+), 14 deletions(-) - -diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c -index c5669eaa51..ed0ecfaa89 100644 ---- a/block/qcow2-refcount.c -+++ b/block/qcow2-refcount.c -@@ -2465,7 +2465,8 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, - static int rebuild_refcounts_write_refblocks( - BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters, - int64_t first_cluster, int64_t end_cluster, -- uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr -+ uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr, -+ Error **errp - ) - { - BDRVQcow2State *s = bs->opaque; -@@ -2516,8 +2517,8 @@ static int rebuild_refcounts_write_refblocks( - nb_clusters, - &first_free_cluster); - if (refblock_offset < 0) { -- fprintf(stderr, "ERROR allocating refblock: %s\n", -- strerror(-refblock_offset)); -+ error_setg_errno(errp, -refblock_offset, -+ "ERROR allocating refblock"); - return refblock_offset; - } - -@@ -2539,6 +2540,7 @@ static int rebuild_refcounts_write_refblocks( - on_disk_reftable_entries * - REFTABLE_ENTRY_SIZE); - if (!on_disk_reftable) { -+ error_setg(errp, "ERROR allocating reftable memory"); - return -ENOMEM; - } - -@@ -2562,7 +2564,7 @@ static int rebuild_refcounts_write_refblocks( - ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset, - s->cluster_size, false); - if (ret < 0) { -- fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); -+ error_setg_errno(errp, -ret, "ERROR writing refblock"); - return ret; - } - -@@ -2578,7 +2580,7 @@ static int rebuild_refcounts_write_refblocks( - ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock, - s->cluster_size); - if (ret < 0) { -- fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); -+ error_setg_errno(errp, -ret, "ERROR writing refblock"); - return ret; - } - -@@ -2601,7 +2603,8 @@ static int rebuild_refcounts_write_refblocks( - static int rebuild_refcount_structure(BlockDriverState *bs, - BdrvCheckResult *res, - void **refcount_table, -- int64_t *nb_clusters) -+ int64_t *nb_clusters, -+ Error **errp) - { - BDRVQcow2State *s = bs->opaque; - int64_t reftable_offset = -1; -@@ -2652,7 +2655,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, - rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, - 0, *nb_clusters, - &on_disk_reftable, -- &on_disk_reftable_entries); -+ &on_disk_reftable_entries, errp); - if (reftable_size_changed < 0) { - res->check_errors++; - ret = reftable_size_changed; -@@ -2676,8 +2679,8 @@ static int rebuild_refcount_structure(BlockDriverState *bs, - refcount_table, nb_clusters, - &first_free_cluster); - if (reftable_offset < 0) { -- fprintf(stderr, "ERROR allocating reftable: %s\n", -- strerror(-reftable_offset)); -+ error_setg_errno(errp, -reftable_offset, -+ "ERROR allocating reftable"); - res->check_errors++; - ret = reftable_offset; - goto fail; -@@ -2695,7 +2698,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, - reftable_start_cluster, - reftable_end_cluster, - &on_disk_reftable, -- &on_disk_reftable_entries); -+ &on_disk_reftable_entries, errp); - if (reftable_size_changed < 0) { - res->check_errors++; - ret = reftable_size_changed; -@@ -2725,7 +2728,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, - ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length, - false); - if (ret < 0) { -- fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); -+ error_setg_errno(errp, -ret, "ERROR writing reftable"); - goto fail; - } - -@@ -2733,7 +2736,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, - ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable, - reftable_length); - if (ret < 0) { -- fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); -+ error_setg_errno(errp, -ret, "ERROR writing reftable"); - goto fail; - } - -@@ -2746,7 +2749,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, - &reftable_offset_and_clusters, - sizeof(reftable_offset_and_clusters)); - if (ret < 0) { -- fprintf(stderr, "ERROR setting reftable: %s\n", strerror(-ret)); -+ error_setg_errno(errp, -ret, "ERROR setting reftable"); - goto fail; - } - -@@ -2814,11 +2817,13 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, - if (rebuild && (fix & BDRV_FIX_ERRORS)) { - BdrvCheckResult old_res = *res; - int fresh_leaks = 0; -+ Error *local_err = NULL; - - fprintf(stderr, "Rebuilding refcount structure\n"); - ret = rebuild_refcount_structure(bs, res, &refcount_table, -- &nb_clusters); -+ &nb_clusters, &local_err); - if (ret < 0) { -+ error_report_err(local_err); - goto fail; - } - --- -2.31.1 - diff --git a/SOURCES/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch b/SOURCES/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch new file mode 100644 index 0000000..7f39f4a --- /dev/null +++ b/SOURCES/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch @@ -0,0 +1,67 @@ +From 46ead2c391924b68741d6da28f28f909b80f5914 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 12 Jan 2023 20:14:51 +0100 +Subject: [PATCH 01/20] qcow2: Fix theoretical corruption in store_bitmap() + error path +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image +RH-Bugzilla: 2150180 +RH-Acked-by: Thomas Huth +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefano Garzarella +RH-Commit: [1/4] a6a497947179431567d330d0501247a3749fb9fd (kmwolf/centos-qemu-kvm) + +In order to write the bitmap table to the image file, it is converted to +big endian. If the write fails, it is passed to clear_bitmap_table() to +free all of the clusters it had allocated before. However, if we don't +convert it back to native endianness first, we'll free things at a wrong +offset. + +In practical terms, the offsets will be so high that we won't actually +free any allocated clusters, but just run into an error, but in theory +this can cause image corruption. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Message-Id: <20230112191454.169353-2-kwolf@redhat.com> +Reviewed-by: Hanna Czenczek +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Kevin Wolf +(cherry picked from commit b03dd9613bcf8fe948581b2b3585510cb525c382) +Signed-off-by: Kevin Wolf +--- + block/qcow2-bitmap.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c +index bcad567c0c..3dff99ba06 100644 +--- a/block/qcow2-bitmap.c ++++ b/block/qcow2-bitmap.c +@@ -115,7 +115,7 @@ static int update_header_sync(BlockDriverState *bs) + return bdrv_flush(bs->file->bs); + } + +-static inline void bitmap_table_to_be(uint64_t *bitmap_table, size_t size) ++static inline void bitmap_table_bswap_be(uint64_t *bitmap_table, size_t size) + { + size_t i; + +@@ -1401,9 +1401,10 @@ static int store_bitmap(BlockDriverState *bs, Qcow2Bitmap *bm, Error **errp) + goto fail; + } + +- bitmap_table_to_be(tb, tb_size); ++ bitmap_table_bswap_be(tb, tb_size); + ret = bdrv_pwrite(bs->file, tb_offset, tb_size * sizeof(tb[0]), tb, 0); + if (ret < 0) { ++ bitmap_table_bswap_be(tb, tb_size); + error_setg_errno(errp, -ret, "Failed to write bitmap '%s' to file", + bm_name); + goto fail; +-- +2.31.1 + diff --git a/SOURCES/kvm-qcow2-Improve-refcount-structure-rebuilding.patch b/SOURCES/kvm-qcow2-Improve-refcount-structure-rebuilding.patch deleted file mode 100644 index cdc92b8..0000000 --- a/SOURCES/kvm-qcow2-Improve-refcount-structure-rebuilding.patch +++ /dev/null @@ -1,465 +0,0 @@ -From b453cf6be8429f4438d51eb24fcf49e7d9f14db6 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Tue, 5 Apr 2022 15:46:50 +0200 -Subject: [PATCH 04/16] qcow2: Improve refcount structure rebuilding - -RH-Author: Hanna Reitz -RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding -RH-Commit: [1/4] a3606b7abcaebb4930b566e95b1090aead62dfae (hreitz/qemu-kvm-c-9-s) -RH-Bugzilla: 2072379 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi - -When rebuilding the refcount structures (when qemu-img check -r found -errors with refcount = 0, but reference count > 0), the new refcount -table defaults to being put at the image file end[1]. There is no good -reason for that except that it means we will not have to rewrite any -refblocks we already wrote to disk. - -Changing the code to rewrite those refblocks is not too difficult, -though, so let us do that. That is beneficial for images on block -devices, where we cannot really write beyond the end of the image file. - -Use this opportunity to add extensive comments to the code, and refactor -it a bit, getting rid of the backwards-jumping goto. - -[1] Unless there is something allocated in the area pointed to by the - last refblock, so we have to write that refblock. In that case, we - try to put the reftable in there. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1519071 -Closes: https://gitlab.com/qemu-project/qemu/-/issues/941 -Reviewed-by: Eric Blake -Signed-off-by: Hanna Reitz -Message-Id: <20220405134652.19278-2-hreitz@redhat.com> -(cherry picked from commit a8c07ec287554dcefd33733f0e5888a281ddc95e) -Signed-off-by: Hanna Reitz ---- - block/qcow2-refcount.c | 332 +++++++++++++++++++++++++++++------------ - 1 file changed, 235 insertions(+), 97 deletions(-) - -diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c -index b91499410c..c5669eaa51 100644 ---- a/block/qcow2-refcount.c -+++ b/block/qcow2-refcount.c -@@ -2438,111 +2438,140 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, - } - - /* -- * Creates a new refcount structure based solely on the in-memory information -- * given through *refcount_table. All necessary allocations will be reflected -- * in that array. -+ * Helper function for rebuild_refcount_structure(). - * -- * On success, the old refcount structure is leaked (it will be covered by the -- * new refcount structure). -+ * Scan the range of clusters [first_cluster, end_cluster) for allocated -+ * clusters and write all corresponding refblocks to disk. The refblock -+ * and allocation data is taken from the in-memory refcount table -+ * *refcount_table[] (of size *nb_clusters), which is basically one big -+ * (unlimited size) refblock for the whole image. -+ * -+ * For these refblocks, clusters are allocated using said in-memory -+ * refcount table. Care is taken that these allocations are reflected -+ * in the refblocks written to disk. -+ * -+ * The refblocks' offsets are written into a reftable, which is -+ * *on_disk_reftable_ptr[] (of size *on_disk_reftable_entries_ptr). If -+ * that reftable is of insufficient size, it will be resized to fit. -+ * This reftable is not written to disk. -+ * -+ * (If *on_disk_reftable_ptr is not NULL, the entries within are assumed -+ * to point to existing valid refblocks that do not need to be allocated -+ * again.) -+ * -+ * Return whether the on-disk reftable array was resized (true/false), -+ * or -errno on error. - */ --static int rebuild_refcount_structure(BlockDriverState *bs, -- BdrvCheckResult *res, -- void **refcount_table, -- int64_t *nb_clusters) -+static int rebuild_refcounts_write_refblocks( -+ BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters, -+ int64_t first_cluster, int64_t end_cluster, -+ uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr -+ ) - { - BDRVQcow2State *s = bs->opaque; -- int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0; -+ int64_t cluster; - int64_t refblock_offset, refblock_start, refblock_index; -- uint32_t reftable_size = 0; -- uint64_t *on_disk_reftable = NULL; -+ int64_t first_free_cluster = 0; -+ uint64_t *on_disk_reftable = *on_disk_reftable_ptr; -+ uint32_t on_disk_reftable_entries = *on_disk_reftable_entries_ptr; - void *on_disk_refblock; -- int ret = 0; -- struct { -- uint64_t reftable_offset; -- uint32_t reftable_clusters; -- } QEMU_PACKED reftable_offset_and_clusters; -- -- qcow2_cache_empty(bs, s->refcount_block_cache); -+ bool reftable_grown = false; -+ int ret; - --write_refblocks: -- for (; cluster < *nb_clusters; cluster++) { -+ for (cluster = first_cluster; cluster < end_cluster; cluster++) { -+ /* Check all clusters to find refblocks that contain non-zero entries */ - if (!s->get_refcount(*refcount_table, cluster)) { - continue; - } - -+ /* -+ * This cluster is allocated, so we need to create a refblock -+ * for it. The data we will write to disk is just the -+ * respective slice from *refcount_table, so it will contain -+ * accurate refcounts for all clusters belonging to this -+ * refblock. After we have written it, we will therefore skip -+ * all remaining clusters in this refblock. -+ */ -+ - refblock_index = cluster >> s->refcount_block_bits; - refblock_start = refblock_index << s->refcount_block_bits; - -- /* Don't allocate a cluster in a refblock already written to disk */ -- if (first_free_cluster < refblock_start) { -- first_free_cluster = refblock_start; -- } -- refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table, -- nb_clusters, &first_free_cluster); -- if (refblock_offset < 0) { -- fprintf(stderr, "ERROR allocating refblock: %s\n", -- strerror(-refblock_offset)); -- res->check_errors++; -- ret = refblock_offset; -- goto fail; -- } -+ if (on_disk_reftable_entries > refblock_index && -+ on_disk_reftable[refblock_index]) -+ { -+ /* -+ * We can get here after a `goto write_refblocks`: We have a -+ * reftable from a previous run, and the refblock is already -+ * allocated. No need to allocate it again. -+ */ -+ refblock_offset = on_disk_reftable[refblock_index]; -+ } else { -+ int64_t refblock_cluster_index; - -- if (reftable_size <= refblock_index) { -- uint32_t old_reftable_size = reftable_size; -- uint64_t *new_on_disk_reftable; -+ /* Don't allocate a cluster in a refblock already written to disk */ -+ if (first_free_cluster < refblock_start) { -+ first_free_cluster = refblock_start; -+ } -+ refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table, -+ nb_clusters, -+ &first_free_cluster); -+ if (refblock_offset < 0) { -+ fprintf(stderr, "ERROR allocating refblock: %s\n", -+ strerror(-refblock_offset)); -+ return refblock_offset; -+ } - -- reftable_size = ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE, -- s->cluster_size) / REFTABLE_ENTRY_SIZE; -- new_on_disk_reftable = g_try_realloc(on_disk_reftable, -- reftable_size * -- REFTABLE_ENTRY_SIZE); -- if (!new_on_disk_reftable) { -- res->check_errors++; -- ret = -ENOMEM; -- goto fail; -+ refblock_cluster_index = refblock_offset / s->cluster_size; -+ if (refblock_cluster_index >= end_cluster) { -+ /* -+ * We must write the refblock that holds this refblock's -+ * refcount -+ */ -+ end_cluster = refblock_cluster_index + 1; - } -- on_disk_reftable = new_on_disk_reftable; - -- memset(on_disk_reftable + old_reftable_size, 0, -- (reftable_size - old_reftable_size) * REFTABLE_ENTRY_SIZE); -+ if (on_disk_reftable_entries <= refblock_index) { -+ on_disk_reftable_entries = -+ ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE, -+ s->cluster_size) / REFTABLE_ENTRY_SIZE; -+ on_disk_reftable = -+ g_try_realloc(on_disk_reftable, -+ on_disk_reftable_entries * -+ REFTABLE_ENTRY_SIZE); -+ if (!on_disk_reftable) { -+ return -ENOMEM; -+ } - -- /* The offset we have for the reftable is now no longer valid; -- * this will leak that range, but we can easily fix that by running -- * a leak-fixing check after this rebuild operation */ -- reftable_offset = -1; -- } else { -- assert(on_disk_reftable); -- } -- on_disk_reftable[refblock_index] = refblock_offset; -+ memset(on_disk_reftable + *on_disk_reftable_entries_ptr, 0, -+ (on_disk_reftable_entries - -+ *on_disk_reftable_entries_ptr) * -+ REFTABLE_ENTRY_SIZE); - -- /* If this is apparently the last refblock (for now), try to squeeze the -- * reftable in */ -- if (refblock_index == (*nb_clusters - 1) >> s->refcount_block_bits && -- reftable_offset < 0) -- { -- uint64_t reftable_clusters = size_to_clusters(s, reftable_size * -- REFTABLE_ENTRY_SIZE); -- reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, -- refcount_table, nb_clusters, -- &first_free_cluster); -- if (reftable_offset < 0) { -- fprintf(stderr, "ERROR allocating reftable: %s\n", -- strerror(-reftable_offset)); -- res->check_errors++; -- ret = reftable_offset; -- goto fail; -+ *on_disk_reftable_ptr = on_disk_reftable; -+ *on_disk_reftable_entries_ptr = on_disk_reftable_entries; -+ -+ reftable_grown = true; -+ } else { -+ assert(on_disk_reftable); - } -+ on_disk_reftable[refblock_index] = refblock_offset; - } - -+ /* Refblock is allocated, write it to disk */ -+ - ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset, - s->cluster_size, false); - if (ret < 0) { - fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); -- goto fail; -+ return ret; - } - -- /* The size of *refcount_table is always cluster-aligned, therefore the -- * write operation will not overflow */ -+ /* -+ * The refblock is simply a slice of *refcount_table. -+ * Note that the size of *refcount_table is always aligned to -+ * whole clusters, so the write operation will not result in -+ * out-of-bounds accesses. -+ */ - on_disk_refblock = (void *)((char *) *refcount_table + - refblock_index * s->cluster_size); - -@@ -2550,23 +2579,99 @@ write_refblocks: - s->cluster_size); - if (ret < 0) { - fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); -- goto fail; -+ return ret; - } - -- /* Go to the end of this refblock */ -+ /* This refblock is done, skip to its end */ - cluster = refblock_start + s->refcount_block_size - 1; - } - -- if (reftable_offset < 0) { -- uint64_t post_refblock_start, reftable_clusters; -+ return reftable_grown; -+} -+ -+/* -+ * Creates a new refcount structure based solely on the in-memory information -+ * given through *refcount_table (this in-memory information is basically just -+ * the concatenation of all refblocks). All necessary allocations will be -+ * reflected in that array. -+ * -+ * On success, the old refcount structure is leaked (it will be covered by the -+ * new refcount structure). -+ */ -+static int rebuild_refcount_structure(BlockDriverState *bs, -+ BdrvCheckResult *res, -+ void **refcount_table, -+ int64_t *nb_clusters) -+{ -+ BDRVQcow2State *s = bs->opaque; -+ int64_t reftable_offset = -1; -+ int64_t reftable_length = 0; -+ int64_t reftable_clusters; -+ int64_t refblock_index; -+ uint32_t on_disk_reftable_entries = 0; -+ uint64_t *on_disk_reftable = NULL; -+ int ret = 0; -+ int reftable_size_changed = 0; -+ struct { -+ uint64_t reftable_offset; -+ uint32_t reftable_clusters; -+ } QEMU_PACKED reftable_offset_and_clusters; -+ -+ qcow2_cache_empty(bs, s->refcount_block_cache); -+ -+ /* -+ * For each refblock containing entries, we try to allocate a -+ * cluster (in the in-memory refcount table) and write its offset -+ * into on_disk_reftable[]. We then write the whole refblock to -+ * disk (as a slice of the in-memory refcount table). -+ * This is done by rebuild_refcounts_write_refblocks(). -+ * -+ * Once we have scanned all clusters, we try to find space for the -+ * reftable. This will dirty the in-memory refcount table (i.e. -+ * make it differ from the refblocks we have already written), so we -+ * need to run rebuild_refcounts_write_refblocks() again for the -+ * range of clusters where the reftable has been allocated. -+ * -+ * This second run might make the reftable grow again, in which case -+ * we will need to allocate another space for it, which is why we -+ * repeat all this until the reftable stops growing. -+ * -+ * (This loop will terminate, because with every cluster the -+ * reftable grows, it can accomodate a multitude of more refcounts, -+ * so that at some point this must be able to cover the reftable -+ * and all refblocks describing it.) -+ * -+ * We then convert the reftable to big-endian and write it to disk. -+ * -+ * Note that we never free any reftable allocations. Doing so would -+ * needlessly complicate the algorithm: The eventual second check -+ * run we do will clean up all leaks we have caused. -+ */ -+ -+ reftable_size_changed = -+ rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, -+ 0, *nb_clusters, -+ &on_disk_reftable, -+ &on_disk_reftable_entries); -+ if (reftable_size_changed < 0) { -+ res->check_errors++; -+ ret = reftable_size_changed; -+ goto fail; -+ } -+ -+ /* -+ * There was no reftable before, so rebuild_refcounts_write_refblocks() -+ * must have increased its size (from 0 to something). -+ */ -+ assert(reftable_size_changed); -+ -+ do { -+ int64_t reftable_start_cluster, reftable_end_cluster; -+ int64_t first_free_cluster = 0; -+ -+ reftable_length = on_disk_reftable_entries * REFTABLE_ENTRY_SIZE; -+ reftable_clusters = size_to_clusters(s, reftable_length); - -- post_refblock_start = ROUND_UP(*nb_clusters, s->refcount_block_size); -- reftable_clusters = -- size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE); -- /* Not pretty but simple */ -- if (first_free_cluster < post_refblock_start) { -- first_free_cluster = post_refblock_start; -- } - reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, - refcount_table, nb_clusters, - &first_free_cluster); -@@ -2578,24 +2683,55 @@ write_refblocks: - goto fail; - } - -- goto write_refblocks; -- } -+ /* -+ * We need to update the affected refblocks, so re-run the -+ * write_refblocks loop for the reftable's range of clusters. -+ */ -+ assert(offset_into_cluster(s, reftable_offset) == 0); -+ reftable_start_cluster = reftable_offset / s->cluster_size; -+ reftable_end_cluster = reftable_start_cluster + reftable_clusters; -+ reftable_size_changed = -+ rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, -+ reftable_start_cluster, -+ reftable_end_cluster, -+ &on_disk_reftable, -+ &on_disk_reftable_entries); -+ if (reftable_size_changed < 0) { -+ res->check_errors++; -+ ret = reftable_size_changed; -+ goto fail; -+ } -+ -+ /* -+ * If the reftable size has changed, we will need to find a new -+ * allocation, repeating the loop. -+ */ -+ } while (reftable_size_changed); - -- for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) { -+ /* The above loop must have run at least once */ -+ assert(reftable_offset >= 0); -+ -+ /* -+ * All allocations are done, all refblocks are written, convert the -+ * reftable to big-endian and write it to disk. -+ */ -+ -+ for (refblock_index = 0; refblock_index < on_disk_reftable_entries; -+ refblock_index++) -+ { - cpu_to_be64s(&on_disk_reftable[refblock_index]); - } - -- ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, -- reftable_size * REFTABLE_ENTRY_SIZE, -+ ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length, - false); - if (ret < 0) { - fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); - goto fail; - } - -- assert(reftable_size < INT_MAX / REFTABLE_ENTRY_SIZE); -+ assert(reftable_length < INT_MAX); - ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable, -- reftable_size * REFTABLE_ENTRY_SIZE); -+ reftable_length); - if (ret < 0) { - fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); - goto fail; -@@ -2604,7 +2740,7 @@ write_refblocks: - /* Enter new reftable into the image header */ - reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset); - reftable_offset_and_clusters.reftable_clusters = -- cpu_to_be32(size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE)); -+ cpu_to_be32(reftable_clusters); - ret = bdrv_pwrite_sync(bs->file, - offsetof(QCowHeader, refcount_table_offset), - &reftable_offset_and_clusters, -@@ -2614,12 +2750,14 @@ write_refblocks: - goto fail; - } - -- for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) { -+ for (refblock_index = 0; refblock_index < on_disk_reftable_entries; -+ refblock_index++) -+ { - be64_to_cpus(&on_disk_reftable[refblock_index]); - } - s->refcount_table = on_disk_reftable; - s->refcount_table_offset = reftable_offset; -- s->refcount_table_size = reftable_size; -+ s->refcount_table_size = on_disk_reftable_entries; - update_max_refcount_table_index(s); - - return 0; --- -2.31.1 - diff --git a/SOURCES/kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch b/SOURCES/kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch new file mode 100644 index 0000000..d2dacbc --- /dev/null +++ b/SOURCES/kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch @@ -0,0 +1,84 @@ +From f628a08d20b9ab6be24c2ab18b38a934a314c78b Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:40:56 +0100 +Subject: [PATCH 14/31] qed: Don't yield in bdrv_qed_co_drain_begin() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [2/16] f18e9aebb7e04a62e309b656bac8f2ab83df657f (sgarzarella/qemu-kvm-c-9-s) + +We want to change .bdrv_co_drained_begin() back to be a non-coroutine +callback, so in preparation, avoid yielding in its implementation. + +Because we increase bs->in_flight and bdrv_drained_begin() polls, the +behaviour is unchanged. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Emanuele Giuseppe Esposito +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-2-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 6d47eb0c8bf2d50682c7dccae74d24104076fe23) +Signed-off-by: Stefano Garzarella +--- + block/qed.c | 20 +++++++++++++++++--- + 1 file changed, 17 insertions(+), 3 deletions(-) + +diff --git a/block/qed.c b/block/qed.c +index 2f36ad342c..013f826c44 100644 +--- a/block/qed.c ++++ b/block/qed.c +@@ -282,9 +282,8 @@ static void coroutine_fn qed_unplug_allocating_write_reqs(BDRVQEDState *s) + qemu_co_mutex_unlock(&s->table_lock); + } + +-static void coroutine_fn qed_need_check_timer_entry(void *opaque) ++static void coroutine_fn qed_need_check_timer(BDRVQEDState *s) + { +- BDRVQEDState *s = opaque; + int ret; + + trace_qed_need_check_timer_cb(s); +@@ -310,9 +309,20 @@ static void coroutine_fn qed_need_check_timer_entry(void *opaque) + (void) ret; + } + ++static void coroutine_fn qed_need_check_timer_entry(void *opaque) ++{ ++ BDRVQEDState *s = opaque; ++ ++ qed_need_check_timer(opaque); ++ bdrv_dec_in_flight(s->bs); ++} ++ + static void qed_need_check_timer_cb(void *opaque) + { ++ BDRVQEDState *s = opaque; + Coroutine *co = qemu_coroutine_create(qed_need_check_timer_entry, opaque); ++ ++ bdrv_inc_in_flight(s->bs); + qemu_coroutine_enter(co); + } + +@@ -363,8 +373,12 @@ static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs) + * header is flushed. + */ + if (s->need_check_timer && timer_pending(s->need_check_timer)) { ++ Coroutine *co; ++ + qed_cancel_need_check_timer(s); +- qed_need_check_timer_entry(s); ++ co = qemu_coroutine_create(qed_need_check_timer_entry, s); ++ bdrv_inc_in_flight(bs); ++ aio_co_enter(bdrv_get_aio_context(bs), co); + } + } + +-- +2.31.1 + diff --git a/SOURCES/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch b/SOURCES/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch new file mode 100644 index 0000000..86e94db --- /dev/null +++ b/SOURCES/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch @@ -0,0 +1,75 @@ +From 7a9907c65e3e2bbb0c119acdbbeb4381e7f1d902 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:24:36 -0500 +Subject: [PATCH 09/12] qemu-coroutine-lock: add smp_mb__after_rmw() + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2175660 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Eric Auger +RH-Commit: [6/9] 4b1723b1ad670ec4c85240390b4fc15ff361154f (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 + +commit e3a3b6ec8169eab2feb241b4982585001512cd55 +Author: Paolo Bonzini +Date: Fri Mar 3 10:52:59 2023 +0100 + + qemu-coroutine-lock: add smp_mb__after_rmw() + + mutex->from_push and mutex->handoff in qemu-coroutine-lock implement + the familiar pattern: + + write a write b + smp_mb() smp_mb() + read b read a + + The memory barrier is required by the C memory model even after a + SEQ_CST read-modify-write operation such as QSLIST_INSERT_HEAD_ATOMIC. + Add it and avoid the unclear qatomic_mb_read() operation. + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + util/qemu-coroutine-lock.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c +index 45c6b57374..c5897bd963 100644 +--- a/util/qemu-coroutine-lock.c ++++ b/util/qemu-coroutine-lock.c +@@ -202,10 +202,16 @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(AioContext *ctx, + trace_qemu_co_mutex_lock_entry(mutex, self); + push_waiter(mutex, &w); + ++ /* ++ * Add waiter before reading mutex->handoff. Pairs with qatomic_mb_set ++ * in qemu_co_mutex_unlock. ++ */ ++ smp_mb__after_rmw(); ++ + /* This is the "Responsibility Hand-Off" protocol; a lock() picks from + * a concurrent unlock() the responsibility of waking somebody up. + */ +- old_handoff = qatomic_mb_read(&mutex->handoff); ++ old_handoff = qatomic_read(&mutex->handoff); + if (old_handoff && + has_waiters(mutex) && + qatomic_cmpxchg(&mutex->handoff, old_handoff, 0) == old_handoff) { +@@ -304,6 +310,7 @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex) + } + + our_handoff = mutex->sequence; ++ /* Set handoff before checking for waiters. */ + qatomic_mb_set(&mutex->handoff, our_handoff); + if (!has_waiters(mutex)) { + /* The concurrent lock has not added itself yet, so it +-- +2.39.1 + diff --git a/SOURCES/kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch b/SOURCES/kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch new file mode 100644 index 0000000..eff4d2e --- /dev/null +++ b/SOURCES/kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch @@ -0,0 +1,197 @@ +From b1970c733dc46b2a8f648997a7e1c5d12900ff54 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:27:04 +0200 +Subject: [PATCH 17/20] qemu-img: Change info key names for protocol nodes + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [12/12] 67c260aaa05466410503fecee6210bf9d47e8c7c (hreitz/qemu-kvm-c-9-s) + +Currently, when querying a qcow2 image, qemu-img info reports something +like this: + +image: test.qcow2 +file format: qcow2 +virtual size: 64 MiB (67108864 bytes) +disk size: 196 KiB +cluster_size: 65536 +Format specific information: + compat: 1.1 + compression type: zlib + lazy refcounts: false + refcount bits: 16 + corrupt: false + extended l2: false +Child node '/file': + image: test.qcow2 + file format: file + virtual size: 192 KiB (197120 bytes) + disk size: 196 KiB + Format specific information: + extent size hint: 1048576 + +Notably, the way the keys are named is specific for image files: The +filename is shown under "image", the BDS driver under "file format", and +the BDS length under "virtual size". This does not make much sense for +nodes that are not actually supposed to be guest images, like the /file +child node shown above. + +Give bdrv_node_info_dump() a @protocol parameter that gives a hint that +the respective node is probably just used for data storage and does not +necessarily present the data for a VM guest disk. This renames the keys +so that with this patch, the output becomes: + +image: test.qcow2 +[...] +Child node '/file': + filename: test.qcow2 + protocol type: file + file length: 192 KiB (197120 bytes) + disk size: 196 KiB + Format specific information: + extent size hint: 1048576 + +(Perhaps we should also rename "Format specific information", but I +could not come up with anything better that will not become problematic +if we guess wrong with the protocol "heuristic".) + +This change affects iotest 302, which has protocol node information in +its reference output. + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-13-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit d570177b50c389f379f93183155a27d44856ab46) +Signed-off-by: Hanna Czenczek +--- + block/monitor/block-hmp-cmds.c | 2 +- + block/qapi.c | 39 ++++++++++++++++++++++++++++------ + include/block/qapi.h | 2 +- + qemu-img.c | 3 ++- + tests/qemu-iotests/302.out | 6 +++--- + 5 files changed, 39 insertions(+), 13 deletions(-) + +diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c +index 72824d4e2e..4d83339a5d 100644 +--- a/block/monitor/block-hmp-cmds.c ++++ b/block/monitor/block-hmp-cmds.c +@@ -734,7 +734,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info, + monitor_printf(mon, "\nImages:\n"); + image_info = inserted->image; + while (1) { +- bdrv_node_info_dump(qapi_ImageInfo_base(image_info), 0); ++ bdrv_node_info_dump(qapi_ImageInfo_base(image_info), 0, false); + if (image_info->has_backing_image) { + image_info = image_info->backing_image; + } else { +diff --git a/block/qapi.c b/block/qapi.c +index 3e35603f0c..56f398c500 100644 +--- a/block/qapi.c ++++ b/block/qapi.c +@@ -934,24 +934,49 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, + visit_free(v); + } + +-void bdrv_node_info_dump(BlockNodeInfo *info, int indentation) ++/** ++ * Print the given @info object in human-readable form. Every field is indented ++ * using the given @indentation (four spaces per indentation level). ++ * ++ * When using this to print a whole block graph, @protocol can be set to true to ++ * signify that the given information is associated with a protocol node, i.e. ++ * just data storage for an image, such that the data it presents is not really ++ * a full VM disk. If so, several fields change name: For example, "virtual ++ * size" is printed as "file length". ++ * (Consider a qcow2 image, which is represented by a qcow2 node and a file ++ * node. Printing a "virtual size" for the file node does not make sense, ++ * because without the qcow2 node, it is not really a guest disk, so it does not ++ * have a "virtual size". Therefore, we call it "file length" instead.) ++ * ++ * @protocol is ignored when @indentation is 0, because we take that to mean ++ * that the associated node is the root node in the queried block graph, and ++ * thus is always to be interpreted as a standalone guest disk. ++ */ ++void bdrv_node_info_dump(BlockNodeInfo *info, int indentation, bool protocol) + { + char *size_buf, *dsize_buf; + g_autofree char *ind_s = g_strdup_printf("%*s", indentation * 4, ""); + ++ if (indentation == 0) { ++ /* Top level, consider this a normal image */ ++ protocol = false; ++ } ++ + if (!info->has_actual_size) { + dsize_buf = g_strdup("unavailable"); + } else { + dsize_buf = size_to_str(info->actual_size); + } + size_buf = size_to_str(info->virtual_size); +- qemu_printf("%simage: %s\n" +- "%sfile format: %s\n" +- "%svirtual size: %s (%" PRId64 " bytes)\n" ++ qemu_printf("%s%s: %s\n" ++ "%s%s: %s\n" ++ "%s%s: %s (%" PRId64 " bytes)\n" + "%sdisk size: %s\n", +- ind_s, info->filename, +- ind_s, info->format, +- ind_s, size_buf, info->virtual_size, ++ ind_s, protocol ? "filename" : "image", info->filename, ++ ind_s, protocol ? "protocol type" : "file format", ++ info->format, ++ ind_s, protocol ? "file length" : "virtual size", ++ size_buf, info->virtual_size, + ind_s, dsize_buf); + g_free(size_buf); + g_free(dsize_buf); +diff --git a/include/block/qapi.h b/include/block/qapi.h +index 38855f2ae9..26113da21a 100644 +--- a/include/block/qapi.h ++++ b/include/block/qapi.h +@@ -51,5 +51,5 @@ void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); + void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, + const char *prefix, + int indentation); +-void bdrv_node_info_dump(BlockNodeInfo *info, int indentation); ++void bdrv_node_info_dump(BlockNodeInfo *info, int indentation, bool protocol); + #endif +diff --git a/qemu-img.c b/qemu-img.c +index e281011245..2943625c67 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -2853,7 +2853,8 @@ static void dump_human_image_info(BlockGraphInfo *info, int indentation, + { + BlockChildInfoList *children_list; + +- bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation); ++ bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation, ++ info->children == NULL); + + for (children_list = info->children; children_list; + children_list = children_list->next) +diff --git a/tests/qemu-iotests/302.out b/tests/qemu-iotests/302.out +index edfa1c4f05..7b5014cdd8 100644 +--- a/tests/qemu-iotests/302.out ++++ b/tests/qemu-iotests/302.out +@@ -5,9 +5,9 @@ file format: raw + virtual size: 448 KiB (458752 bytes) + disk size: unavailable + Child node '/file': +- image: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock +- file format: nbd +- virtual size: 448 KiB (458752 bytes) ++ filename: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock ++ protocol type: nbd ++ file length: 448 KiB (458752 bytes) + disk size: unavailable + + === Converted image info === +-- +2.31.1 + diff --git a/SOURCES/kvm-qemu-img-Let-info-print-block-graph.patch b/SOURCES/kvm-qemu-img-Let-info-print-block-graph.patch new file mode 100644 index 0000000..536df69 --- /dev/null +++ b/SOURCES/kvm-qemu-img-Let-info-print-block-graph.patch @@ -0,0 +1,261 @@ +From ea73e9de42b446ce1049805c23f7706e4f87ed1f Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:27:03 +0200 +Subject: [PATCH 16/20] qemu-img: Let info print block graph + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [11/12] 2c1b8a03c918484449e876acf4c6663766848ad8 (hreitz/qemu-kvm-c-9-s) + +For every node in the backing chain, collect its BlockGraphInfo struct +using bdrv_query_block_graph_info(). Print all nodes' information, +indenting child nodes and labelling them with a path constructed from +the child names leading to the node from the root (e.g. /file/file). + +Note that we open each image with BDRV_O_NO_BACKING, so its backing +child is omitted from this graph, and thus presented in the previous +manner: By simply concatenating all images' information, separated with +blank lines. + +This affects two iotests: +- 065: Here we try to get the format node's format specific information. + The pre-patch code does so by taking all lines from "Format specific + information:" until an empty line. This format specific information + is no longer followed by an empty line, though, but by child node + information, so limit the range by "Child node '/file':". +- 302: Calls qemu_img() for qemu-img info directly, which does not + filter the output, so the child node information ends up in the + output. + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-12-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit c04d0ab026201d21873a63f768cb69c4554dfec1) +Signed-off-by: Hanna Czenczek +--- + qapi/block-core.json | 4 +-- + qemu-img.c | 69 ++++++++++++++++++++++++++------------ + tests/qemu-iotests/065 | 2 +- + tests/qemu-iotests/302.out | 5 +++ + 4 files changed, 56 insertions(+), 24 deletions(-) + +diff --git a/qapi/block-core.json b/qapi/block-core.json +index d703e0fb16..7f331eb8ea 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -5831,9 +5831,9 @@ + ## + # @DummyBlockCoreForceArrays: + # +-# Not used by QMP; hack to let us use BlockNodeInfoList internally ++# Not used by QMP; hack to let us use BlockGraphInfoList internally + # + # Since: 8.0 + ## + { 'struct': 'DummyBlockCoreForceArrays', +- 'data': { 'unused-block-node-info': ['BlockNodeInfo'] } } ++ 'data': { 'unused-block-graph-info': ['BlockGraphInfo'] } } +diff --git a/qemu-img.c b/qemu-img.c +index 30b4ea58bb..e281011245 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -2816,13 +2816,13 @@ static void dump_snapshots(BlockDriverState *bs) + g_free(sn_tab); + } + +-static void dump_json_block_node_info_list(BlockNodeInfoList *list) ++static void dump_json_block_graph_info_list(BlockGraphInfoList *list) + { + GString *str; + QObject *obj; + Visitor *v = qobject_output_visitor_new(&obj); + +- visit_type_BlockNodeInfoList(v, NULL, &list, &error_abort); ++ visit_type_BlockGraphInfoList(v, NULL, &list, &error_abort); + visit_complete(v, &obj); + str = qobject_to_json_pretty(obj, true); + assert(str != NULL); +@@ -2832,13 +2832,13 @@ static void dump_json_block_node_info_list(BlockNodeInfoList *list) + g_string_free(str, true); + } + +-static void dump_json_block_node_info(BlockNodeInfo *info) ++static void dump_json_block_graph_info(BlockGraphInfo *info) + { + GString *str; + QObject *obj; + Visitor *v = qobject_output_visitor_new(&obj); + +- visit_type_BlockNodeInfo(v, NULL, &info, &error_abort); ++ visit_type_BlockGraphInfo(v, NULL, &info, &error_abort); + visit_complete(v, &obj); + str = qobject_to_json_pretty(obj, true); + assert(str != NULL); +@@ -2848,9 +2848,29 @@ static void dump_json_block_node_info(BlockNodeInfo *info) + g_string_free(str, true); + } + +-static void dump_human_image_info_list(BlockNodeInfoList *list) ++static void dump_human_image_info(BlockGraphInfo *info, int indentation, ++ const char *path) + { +- BlockNodeInfoList *elem; ++ BlockChildInfoList *children_list; ++ ++ bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation); ++ ++ for (children_list = info->children; children_list; ++ children_list = children_list->next) ++ { ++ BlockChildInfo *child = children_list->value; ++ g_autofree char *child_path = NULL; ++ ++ printf("%*sChild node '%s%s':\n", ++ indentation * 4, "", path, child->name); ++ child_path = g_strdup_printf("%s%s/", path, child->name); ++ dump_human_image_info(child->info, indentation + 1, child_path); ++ } ++} ++ ++static void dump_human_image_info_list(BlockGraphInfoList *list) ++{ ++ BlockGraphInfoList *elem; + bool delim = false; + + for (elem = list; elem; elem = elem->next) { +@@ -2859,7 +2879,7 @@ static void dump_human_image_info_list(BlockNodeInfoList *list) + } + delim = true; + +- bdrv_node_info_dump(elem->value, 0); ++ dump_human_image_info(elem->value, 0, "/"); + } + } + +@@ -2869,7 +2889,7 @@ static gboolean str_equal_func(gconstpointer a, gconstpointer b) + } + + /** +- * Open an image file chain and return an BlockNodeInfoList ++ * Open an image file chain and return an BlockGraphInfoList + * + * @filename: topmost image filename + * @fmt: topmost image format (may be NULL to autodetect) +@@ -2880,13 +2900,13 @@ static gboolean str_equal_func(gconstpointer a, gconstpointer b) + * opening an image file. If there was an error a message will have been + * printed to stderr. + */ +-static BlockNodeInfoList *collect_image_info_list(bool image_opts, +- const char *filename, +- const char *fmt, +- bool chain, bool force_share) ++static BlockGraphInfoList *collect_image_info_list(bool image_opts, ++ const char *filename, ++ const char *fmt, ++ bool chain, bool force_share) + { +- BlockNodeInfoList *head = NULL; +- BlockNodeInfoList **tail = &head; ++ BlockGraphInfoList *head = NULL; ++ BlockGraphInfoList **tail = &head; + GHashTable *filenames; + Error *err = NULL; + +@@ -2895,7 +2915,7 @@ static BlockNodeInfoList *collect_image_info_list(bool image_opts, + while (filename) { + BlockBackend *blk; + BlockDriverState *bs; +- BlockNodeInfo *info; ++ BlockGraphInfo *info; + + if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) { + error_report("Backing file '%s' creates an infinite loop.", +@@ -2912,7 +2932,14 @@ static BlockNodeInfoList *collect_image_info_list(bool image_opts, + } + bs = blk_bs(blk); + +- bdrv_query_block_node_info(bs, &info, &err); ++ /* ++ * Note that the returned BlockGraphInfo object will not have ++ * information about this image's backing node, because we have opened ++ * it with BDRV_O_NO_BACKING. Printing this object will therefore not ++ * duplicate the backing chain information that we obtain by walking ++ * the chain manually here. ++ */ ++ bdrv_query_block_graph_info(bs, &info, &err); + if (err) { + error_report_err(err); + blk_unref(blk); +@@ -2945,7 +2972,7 @@ static BlockNodeInfoList *collect_image_info_list(bool image_opts, + return head; + + err: +- qapi_free_BlockNodeInfoList(head); ++ qapi_free_BlockGraphInfoList(head); + g_hash_table_destroy(filenames); + return NULL; + } +@@ -2956,7 +2983,7 @@ static int img_info(int argc, char **argv) + OutputFormat output_format = OFORMAT_HUMAN; + bool chain = false; + const char *filename, *fmt, *output; +- BlockNodeInfoList *list; ++ BlockGraphInfoList *list; + bool image_opts = false; + bool force_share = false; + +@@ -3035,14 +3062,14 @@ static int img_info(int argc, char **argv) + break; + case OFORMAT_JSON: + if (chain) { +- dump_json_block_node_info_list(list); ++ dump_json_block_graph_info_list(list); + } else { +- dump_json_block_node_info(list->value); ++ dump_json_block_graph_info(list->value); + } + break; + } + +- qapi_free_BlockNodeInfoList(list); ++ qapi_free_BlockGraphInfoList(list); + return 0; + } + +diff --git a/tests/qemu-iotests/065 b/tests/qemu-iotests/065 +index b724c89c7c..b76701c71e 100755 +--- a/tests/qemu-iotests/065 ++++ b/tests/qemu-iotests/065 +@@ -56,7 +56,7 @@ class TestQemuImgInfo(TestImageInfoSpecific): + def test_human(self): + data = qemu_img('info', '--output=human', test_img).stdout.split('\n') + data = data[(data.index('Format specific information:') + 1) +- :data.index('')] ++ :data.index("Child node '/file':")] + for field in data: + self.assertTrue(re.match('^ {4}[^ ]', field) is not None) + data = [line.strip() for line in data] +diff --git a/tests/qemu-iotests/302.out b/tests/qemu-iotests/302.out +index 3e7c281b91..edfa1c4f05 100644 +--- a/tests/qemu-iotests/302.out ++++ b/tests/qemu-iotests/302.out +@@ -4,6 +4,11 @@ image: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock + file format: raw + virtual size: 448 KiB (458752 bytes) + disk size: unavailable ++Child node '/file': ++ image: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock ++ file format: nbd ++ virtual size: 448 KiB (458752 bytes) ++ disk size: unavailable + + === Converted image info === + image: TEST_IMG +-- +2.31.1 + diff --git a/SOURCES/kvm-qemu-img-Use-BlockNodeInfo.patch b/SOURCES/kvm-qemu-img-Use-BlockNodeInfo.patch new file mode 100644 index 0000000..7bfb7e6 --- /dev/null +++ b/SOURCES/kvm-qemu-img-Use-BlockNodeInfo.patch @@ -0,0 +1,241 @@ +From dca4cbe680baff837ca8ac8bd39b77b46af3f64b Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:26:57 +0200 +Subject: [PATCH 10/20] qemu-img: Use BlockNodeInfo + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [5/12] b599af3ec05951a0ba11d9eae2ee19148d6bf624 (hreitz/qemu-kvm-c-9-s) + +qemu-img info never uses ImageInfo's backing-image field, because it +opens the backing chain one by one with BDRV_O_NO_BACKING, and prints +all backing chain nodes' information consecutively. Use BlockNodeInfo +to make it clear that we only print information about a single node, and +that we are not using the backing-image field. + +Notably, bdrv_image_info_dump() does not evaluate the backing-image +field, so we can easily make it take a BlockNodeInfo pointer (and +consequentially rename it to bdrv_node_info_dump()). It makes more +sense this way, because again, the interface now makes it syntactically +clear that backing-image is ignored by this function. + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-6-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit b1f4cd1589a16fec02f264a09bd3560e4ccce3c2) +Signed-off-by: Hanna Czenczek +--- + block/monitor/block-hmp-cmds.c | 2 +- + block/qapi.c | 2 +- + include/block/qapi.h | 2 +- + qapi/block-core.json | 4 +-- + qemu-img.c | 48 +++++++++++++++++----------------- + 5 files changed, 29 insertions(+), 29 deletions(-) + +diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c +index b6135e9bfe..aa37faa601 100644 +--- a/block/monitor/block-hmp-cmds.c ++++ b/block/monitor/block-hmp-cmds.c +@@ -734,7 +734,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info, + monitor_printf(mon, "\nImages:\n"); + image_info = inserted->image; + while (1) { +- bdrv_image_info_dump(image_info); ++ bdrv_node_info_dump(qapi_ImageInfo_base(image_info)); + if (image_info->has_backing_image) { + image_info = image_info->backing_image; + } else { +diff --git a/block/qapi.c b/block/qapi.c +index e5022b4481..ad88bf9b38 100644 +--- a/block/qapi.c ++++ b/block/qapi.c +@@ -865,7 +865,7 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, + visit_free(v); + } + +-void bdrv_image_info_dump(ImageInfo *info) ++void bdrv_node_info_dump(BlockNodeInfo *info) + { + char *size_buf, *dsize_buf; + if (!info->has_actual_size) { +diff --git a/include/block/qapi.h b/include/block/qapi.h +index c7de4e3fa9..22198dcd0c 100644 +--- a/include/block/qapi.h ++++ b/include/block/qapi.h +@@ -45,5 +45,5 @@ void bdrv_query_image_info(BlockDriverState *bs, + void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); + void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, + const char *prefix); +-void bdrv_image_info_dump(ImageInfo *info); ++void bdrv_node_info_dump(BlockNodeInfo *info); + #endif +diff --git a/qapi/block-core.json b/qapi/block-core.json +index 7720da0498..4cf2deeb6c 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -5796,9 +5796,9 @@ + ## + # @DummyBlockCoreForceArrays: + # +-# Not used by QMP; hack to let us use ImageInfoList internally ++# Not used by QMP; hack to let us use BlockNodeInfoList internally + # + # Since: 8.0 + ## + { 'struct': 'DummyBlockCoreForceArrays', +- 'data': { 'unused-image-info': ['ImageInfo'] } } ++ 'data': { 'unused-block-node-info': ['BlockNodeInfo'] } } +diff --git a/qemu-img.c b/qemu-img.c +index 2f85bb7ede..3b2ca3bbcb 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -2816,13 +2816,13 @@ static void dump_snapshots(BlockDriverState *bs) + g_free(sn_tab); + } + +-static void dump_json_image_info_list(ImageInfoList *list) ++static void dump_json_block_node_info_list(BlockNodeInfoList *list) + { + GString *str; + QObject *obj; + Visitor *v = qobject_output_visitor_new(&obj); + +- visit_type_ImageInfoList(v, NULL, &list, &error_abort); ++ visit_type_BlockNodeInfoList(v, NULL, &list, &error_abort); + visit_complete(v, &obj); + str = qobject_to_json_pretty(obj, true); + assert(str != NULL); +@@ -2832,13 +2832,13 @@ static void dump_json_image_info_list(ImageInfoList *list) + g_string_free(str, true); + } + +-static void dump_json_image_info(ImageInfo *info) ++static void dump_json_block_node_info(BlockNodeInfo *info) + { + GString *str; + QObject *obj; + Visitor *v = qobject_output_visitor_new(&obj); + +- visit_type_ImageInfo(v, NULL, &info, &error_abort); ++ visit_type_BlockNodeInfo(v, NULL, &info, &error_abort); + visit_complete(v, &obj); + str = qobject_to_json_pretty(obj, true); + assert(str != NULL); +@@ -2848,9 +2848,9 @@ static void dump_json_image_info(ImageInfo *info) + g_string_free(str, true); + } + +-static void dump_human_image_info_list(ImageInfoList *list) ++static void dump_human_image_info_list(BlockNodeInfoList *list) + { +- ImageInfoList *elem; ++ BlockNodeInfoList *elem; + bool delim = false; + + for (elem = list; elem; elem = elem->next) { +@@ -2859,7 +2859,7 @@ static void dump_human_image_info_list(ImageInfoList *list) + } + delim = true; + +- bdrv_image_info_dump(elem->value); ++ bdrv_node_info_dump(elem->value); + } + } + +@@ -2869,24 +2869,24 @@ static gboolean str_equal_func(gconstpointer a, gconstpointer b) + } + + /** +- * Open an image file chain and return an ImageInfoList ++ * Open an image file chain and return an BlockNodeInfoList + * + * @filename: topmost image filename + * @fmt: topmost image format (may be NULL to autodetect) + * @chain: true - enumerate entire backing file chain + * false - only topmost image file + * +- * Returns a list of ImageInfo objects or NULL if there was an error opening an +- * image file. If there was an error a message will have been printed to +- * stderr. ++ * Returns a list of BlockNodeInfo objects or NULL if there was an error ++ * opening an image file. If there was an error a message will have been ++ * printed to stderr. + */ +-static ImageInfoList *collect_image_info_list(bool image_opts, +- const char *filename, +- const char *fmt, +- bool chain, bool force_share) ++static BlockNodeInfoList *collect_image_info_list(bool image_opts, ++ const char *filename, ++ const char *fmt, ++ bool chain, bool force_share) + { +- ImageInfoList *head = NULL; +- ImageInfoList **tail = &head; ++ BlockNodeInfoList *head = NULL; ++ BlockNodeInfoList **tail = &head; + GHashTable *filenames; + Error *err = NULL; + +@@ -2895,7 +2895,7 @@ static ImageInfoList *collect_image_info_list(bool image_opts, + while (filename) { + BlockBackend *blk; + BlockDriverState *bs; +- ImageInfo *info; ++ BlockNodeInfo *info; + + if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) { + error_report("Backing file '%s' creates an infinite loop.", +@@ -2912,7 +2912,7 @@ static ImageInfoList *collect_image_info_list(bool image_opts, + } + bs = blk_bs(blk); + +- bdrv_query_image_info(bs, &info, &err); ++ bdrv_query_block_node_info(bs, &info, &err); + if (err) { + error_report_err(err); + blk_unref(blk); +@@ -2945,7 +2945,7 @@ static ImageInfoList *collect_image_info_list(bool image_opts, + return head; + + err: +- qapi_free_ImageInfoList(head); ++ qapi_free_BlockNodeInfoList(head); + g_hash_table_destroy(filenames); + return NULL; + } +@@ -2956,7 +2956,7 @@ static int img_info(int argc, char **argv) + OutputFormat output_format = OFORMAT_HUMAN; + bool chain = false; + const char *filename, *fmt, *output; +- ImageInfoList *list; ++ BlockNodeInfoList *list; + bool image_opts = false; + bool force_share = false; + +@@ -3035,14 +3035,14 @@ static int img_info(int argc, char **argv) + break; + case OFORMAT_JSON: + if (chain) { +- dump_json_image_info_list(list); ++ dump_json_block_node_info_list(list); + } else { +- dump_json_image_info(list->value); ++ dump_json_block_node_info(list->value); + } + break; + } + +- qapi_free_ImageInfoList(list); ++ qapi_free_BlockNodeInfoList(list); + return 0; + } + +-- +2.31.1 + diff --git a/SOURCES/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch b/SOURCES/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch new file mode 100644 index 0000000..693049c --- /dev/null +++ b/SOURCES/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch @@ -0,0 +1,70 @@ +From d0d3d694b3a8d200442484ae0c9d263e0439cd04 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 12 Jan 2023 20:14:53 +0100 +Subject: [PATCH 03/20] qemu-img bitmap: Report errors while closing the image +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image +RH-Bugzilla: 2150180 +RH-Acked-by: Thomas Huth +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefano Garzarella +RH-Commit: [3/4] 4a704fec2e3bcb47b2be1529e27fd1833d58c517 (kmwolf/centos-qemu-kvm) + +blk_unref() can't report any errors that happen while closing the image. +For example, if qcow2 hits an -ENOSPC error while writing out dirty +bitmaps when it's closed, it prints error messages to stderr, but +'qemu-img bitmap' won't see any error return value and will therefore +look successful with exit code 0. + +In order to fix this, manually inactivate the image first before calling +blk_unref(). This already performs the operations that would be most +likely to fail while closing the image, but it can still return errors. + +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1330 +Signed-off-by: Kevin Wolf +Message-Id: <20230112191454.169353-4-kwolf@redhat.com> +Reviewed-by: Hanna Czenczek +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Kevin Wolf +(cherry picked from commit c5e477110dcb8ef4642dce399777c3dee68fa96c) +Signed-off-by: Kevin Wolf +--- + qemu-img.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/qemu-img.c b/qemu-img.c +index 3cbdda9f76..2f85bb7ede 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -4646,6 +4646,7 @@ static int img_bitmap(int argc, char **argv) + QSIMPLEQ_HEAD(, ImgBitmapAction) actions; + ImgBitmapAction *act, *act_next; + const char *op; ++ int inactivate_ret; + + QSIMPLEQ_INIT(&actions); + +@@ -4830,6 +4831,16 @@ static int img_bitmap(int argc, char **argv) + ret = 0; + + out: ++ /* ++ * Manually inactivate the images first because this way we can know whether ++ * an error occurred. blk_unref() doesn't tell us about failures. ++ */ ++ inactivate_ret = bdrv_inactivate_all(); ++ if (inactivate_ret < 0) { ++ error_report("Error while closing the image: %s", strerror(-inactivate_ret)); ++ ret = 1; ++ } ++ + blk_unref(src); + blk_unref(blk); + qemu_opts_del(opts); +-- +2.31.1 + diff --git a/SOURCES/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch b/SOURCES/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch new file mode 100644 index 0000000..5cac3ba --- /dev/null +++ b/SOURCES/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch @@ -0,0 +1,67 @@ +From 2f5369f0effaa23be746f9b5d9f6a0bfc346fb7d Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 12 Jan 2023 20:14:52 +0100 +Subject: [PATCH 02/20] qemu-img commit: Report errors while closing the image + +RH-Author: Kevin Wolf +RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image +RH-Bugzilla: 2150180 +RH-Acked-by: Thomas Huth +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefano Garzarella +RH-Commit: [2/4] faedd43355463b1210a3f21ecd430f478bd06f5a (kmwolf/centos-qemu-kvm) + +blk_unref() can't report any errors that happen while closing the image. +For example, if qcow2 hits an -ENOSPC error while writing out dirty +bitmaps when it's closed, it prints error messages to stderr, but +'qemu-img commit' won't see any error return value and will therefore +look successful with exit code 0. + +In order to fix this, manually inactivate the image first before calling +blk_unref(). This already performs the operations that would be most +likely to fail while closing the image, but it can still return errors. + +Signed-off-by: Kevin Wolf +Message-Id: <20230112191454.169353-3-kwolf@redhat.com> +Reviewed-by: Hanna Czenczek +Signed-off-by: Kevin Wolf +(cherry picked from commit 44efba2d713aca076c411594d0c1a2b99155eeb3) +Signed-off-by: Kevin Wolf +--- + qemu-img.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/qemu-img.c b/qemu-img.c +index a9b3a8103c..3cbdda9f76 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -449,6 +449,11 @@ static BlockBackend *img_open(bool image_opts, + blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet, + force_share); + } ++ ++ if (blk) { ++ blk_set_force_allow_inactivate(blk); ++ } ++ + return blk; + } + +@@ -1119,6 +1124,14 @@ unref_backing: + done: + qemu_progress_end(); + ++ /* ++ * Manually inactivate the image first because this way we can know whether ++ * an error occurred. blk_unref() doesn't tell us about failures. ++ */ ++ ret = bdrv_inactivate_all(); ++ if (ret < 0 && !local_err) { ++ error_setg_errno(&local_err, -ret, "Error while closing the image"); ++ } + blk_unref(blk); + + if (local_err) { +-- +2.31.1 + diff --git a/SOURCES/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch b/SOURCES/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch new file mode 100644 index 0000000..6b88e5c --- /dev/null +++ b/SOURCES/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch @@ -0,0 +1,166 @@ +From 06030aa79fcb2d90d6a670e75d959aa0c3204b5c Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 12 Jan 2023 20:14:54 +0100 +Subject: [PATCH 04/20] qemu-iotests: Test qemu-img bitmap/commit exit code on + error + +RH-Author: Kevin Wolf +RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image +RH-Bugzilla: 2150180 +RH-Acked-by: Thomas Huth +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefano Garzarella +RH-Commit: [4/4] b96bb671bcfb7ae18015fda14db70f42a83a6ea7 (kmwolf/centos-qemu-kvm) + +This tests that when an error happens while writing back bitmaps to the +image file in qcow2_inactivate(), 'qemu-img bitmap/commit' actually +return an error value in their exit code instead of making the operation +look successful to scripts. + +Signed-off-by: Kevin Wolf +Message-Id: <20230112191454.169353-5-kwolf@redhat.com> +Reviewed-by: Hanna Czenczek +Signed-off-by: Kevin Wolf +(cherry picked from commit 07a4e1f8e5418f36424cd57d5d061b090a238c65) +Signed-off-by: Kevin Wolf +--- + .../qemu-iotests/tests/qemu-img-close-errors | 96 +++++++++++++++++++ + .../tests/qemu-img-close-errors.out | 23 +++++ + 2 files changed, 119 insertions(+) + create mode 100755 tests/qemu-iotests/tests/qemu-img-close-errors + create mode 100644 tests/qemu-iotests/tests/qemu-img-close-errors.out + +diff --git a/tests/qemu-iotests/tests/qemu-img-close-errors b/tests/qemu-iotests/tests/qemu-img-close-errors +new file mode 100755 +index 0000000000..50bfb6cfa2 +--- /dev/null ++++ b/tests/qemu-iotests/tests/qemu-img-close-errors +@@ -0,0 +1,96 @@ ++#!/usr/bin/env bash ++# group: rw auto quick ++# ++# Check that errors while closing the image, in particular writing back dirty ++# bitmaps, is correctly reported with a failing qemu-img exit code. ++# ++# Copyright (C) 2023 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++# creator ++owner=kwolf@redhat.com ++ ++seq="$(basename $0)" ++echo "QA output created by $seq" ++ ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ _cleanup_test_img ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++cd .. ++. ./common.rc ++. ./common.filter ++ ++_supported_fmt qcow2 ++_supported_proto file ++_supported_os Linux ++ ++size=1G ++ ++# The error we are going to use is ENOSPC. Depending on how many bitmaps we ++# create in the backing file (and therefore increase the used up space), we get ++# failures in different places. With a low number, only merging the bitmap ++# fails, whereas with a higher number, already 'qemu-img commit' fails. ++for max_bitmap in 6 7; do ++ echo ++ echo "=== Test with $max_bitmap bitmaps ===" ++ ++ TEST_IMG="$TEST_IMG.base" _make_test_img -q $size ++ for i in $(seq 1 $max_bitmap); do ++ $QEMU_IMG bitmap --add "$TEST_IMG.base" "stale-bitmap-$i" ++ done ++ ++ # Simulate a block device of 128 MB by resizing the image file accordingly ++ # and then enforcing the size with the raw driver ++ $QEMU_IO -f raw -c "truncate 128M" "$TEST_IMG.base" ++ BASE_JSON='json:{ ++ "driver": "qcow2", ++ "file": { ++ "driver": "raw", ++ "size": 134217728, ++ "file": { ++ "driver": "file", ++ "filename":"'"$TEST_IMG.base"'" ++ } ++ } ++ }' ++ ++ _make_test_img -q -b "$BASE_JSON" -F $IMGFMT ++ $QEMU_IMG bitmap --add "$TEST_IMG" "good-bitmap" ++ ++ $QEMU_IO -c 'write 0 126m' "$TEST_IMG" | _filter_qemu_io ++ ++ $QEMU_IMG commit -d "$TEST_IMG" 2>&1 | _filter_generated_node_ids ++ echo "qemu-img commit exit code: ${PIPESTATUS[0]}" ++ ++ $QEMU_IMG bitmap --add "$BASE_JSON" "good-bitmap" ++ echo "qemu-img bitmap --add exit code: $?" ++ ++ $QEMU_IMG bitmap --merge "good-bitmap" -b "$TEST_IMG" "$BASE_JSON" \ ++ "good-bitmap" 2>&1 | _filter_generated_node_ids ++ echo "qemu-img bitmap --merge exit code: ${PIPESTATUS[0]}" ++done ++ ++# success, all done ++echo "*** done" ++rm -f $seq.full ++status=0 ++ +diff --git a/tests/qemu-iotests/tests/qemu-img-close-errors.out b/tests/qemu-iotests/tests/qemu-img-close-errors.out +new file mode 100644 +index 0000000000..1bfe88f176 +--- /dev/null ++++ b/tests/qemu-iotests/tests/qemu-img-close-errors.out +@@ -0,0 +1,23 @@ ++QA output created by qemu-img-close-errors ++ ++=== Test with 6 bitmaps === ++wrote 132120576/132120576 bytes at offset 0 ++126 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++Image committed. ++qemu-img commit exit code: 0 ++qemu-img bitmap --add exit code: 0 ++qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'good-bitmap' to file: No space left on device ++qemu-img: Error while closing the image: Invalid argument ++qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'good-bitmap' to file: No space left on device ++qemu-img bitmap --merge exit code: 1 ++ ++=== Test with 7 bitmaps === ++wrote 132120576/132120576 bytes at offset 0 ++126 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'stale-bitmap-7' to file: No space left on device ++qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'stale-bitmap-7' to file: No space left on device ++qemu-img: Error while closing the image: Invalid argument ++qemu-img commit exit code: 1 ++qemu-img bitmap --add exit code: 0 ++qemu-img bitmap --merge exit code: 0 ++*** done +-- +2.31.1 + diff --git a/SOURCES/kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch b/SOURCES/kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch deleted file mode 100644 index 9acff58..0000000 --- a/SOURCES/kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch +++ /dev/null @@ -1,92 +0,0 @@ -From e6aae1d0368a152924c38775e517f4e83c1d898b Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Wed, 11 May 2022 19:49:23 -0500 -Subject: [PATCH 1/2] qemu-nbd: Pass max connections to blockdev layer -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Blake -RH-MergeRequest: 90: Advertise MULTI_CONN on writeable NBD servers -RH-Commit: [1/2] b0e33fd125bf3523b8b9a4dead3c8bb2342bfd4e (ebblake/centos-qemu-kvm) -RH-Bugzilla: 1708300 -RH-Acked-by: Nir Soffer -RH-Acked-by: Kevin Wolf -RH-Acked-by: Daniel P. Berrangé - -The next patch wants to adjust whether the NBD server code advertises -MULTI_CONN based on whether it is known if the server limits to -exactly one client. For a server started by QMP, this information is -obtained through nbd_server_start (which can support more than one -export); but for qemu-nbd (which supports exactly one export), it is -controlled only by the command-line option -e/--shared. Since we -already have a hook function used by qemu-nbd, it's easiest to just -alter its signature to fit our needs. - -Signed-off-by: Eric Blake -Message-Id: <20220512004924.417153-2-eblake@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit a5fced40212ed73c715ca298a2929dd4d99c9999) -Signed-off-by: Eric Blake ---- - blockdev-nbd.c | 8 ++++---- - include/block/nbd.h | 2 +- - qemu-nbd.c | 2 +- - 3 files changed, 6 insertions(+), 6 deletions(-) - -diff --git a/blockdev-nbd.c b/blockdev-nbd.c -index 9840d25a82..add41a23af 100644 ---- a/blockdev-nbd.c -+++ b/blockdev-nbd.c -@@ -30,18 +30,18 @@ typedef struct NBDServerData { - } NBDServerData; - - static NBDServerData *nbd_server; --static bool is_qemu_nbd; -+static int qemu_nbd_connections = -1; /* Non-negative if this is qemu-nbd */ - - static void nbd_update_server_watch(NBDServerData *s); - --void nbd_server_is_qemu_nbd(bool value) -+void nbd_server_is_qemu_nbd(int max_connections) - { -- is_qemu_nbd = value; -+ qemu_nbd_connections = max_connections; - } - - bool nbd_server_is_running(void) - { -- return nbd_server || is_qemu_nbd; -+ return nbd_server || qemu_nbd_connections >= 0; - } - - static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) -diff --git a/include/block/nbd.h b/include/block/nbd.h -index a98eb665da..c5a29ce1c6 100644 ---- a/include/block/nbd.h -+++ b/include/block/nbd.h -@@ -344,7 +344,7 @@ void nbd_client_new(QIOChannelSocket *sioc, - void nbd_client_get(NBDClient *client); - void nbd_client_put(NBDClient *client); - --void nbd_server_is_qemu_nbd(bool value); -+void nbd_server_is_qemu_nbd(int max_connections); - bool nbd_server_is_running(void); - void nbd_server_start(SocketAddress *addr, const char *tls_creds, - const char *tls_authz, uint32_t max_connections, -diff --git a/qemu-nbd.c b/qemu-nbd.c -index 713e7557a9..8c25ae93df 100644 ---- a/qemu-nbd.c -+++ b/qemu-nbd.c -@@ -1087,7 +1087,7 @@ int main(int argc, char **argv) - - bs->detect_zeroes = detect_zeroes; - -- nbd_server_is_qemu_nbd(true); -+ nbd_server_is_qemu_nbd(shared); - - export_opts = g_new(BlockExportOptions, 1); - *export_opts = (BlockExportOptions) { --- -2.31.1 - diff --git a/SOURCES/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch b/SOURCES/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch new file mode 100644 index 0000000..25f30ff --- /dev/null +++ b/SOURCES/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch @@ -0,0 +1,146 @@ +From aa61e4c437d29a791ea09a01f7230231f1e53356 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:24:36 -0500 +Subject: [PATCH 05/12] qemu-thread-posix: cleanup, fix, document QemuEvent + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2175660 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Eric Auger +RH-Commit: [2/9] c3bdf75f884e137c667316aaac96bb4a0b9ec2d9 (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 + +commit 9586a1329f5dce6c1d7f4de53cf0536644d7e593 +Author: Paolo Bonzini +Date: Thu Mar 2 11:19:52 2023 +0100 + + qemu-thread-posix: cleanup, fix, document QemuEvent + + QemuEvent is currently broken on ARM due to missing memory barriers + after qatomic_*(). Apart from adding the memory barrier, a closer look + reveals some unpaired memory barriers too. Document more clearly what + is going on. + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + util/qemu-thread-posix.c | 69 ++++++++++++++++++++++++++++------------ + 1 file changed, 49 insertions(+), 20 deletions(-) + +diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c +index bae938c670..cc74f4ede0 100644 +--- a/util/qemu-thread-posix.c ++++ b/util/qemu-thread-posix.c +@@ -379,13 +379,21 @@ void qemu_event_destroy(QemuEvent *ev) + + void qemu_event_set(QemuEvent *ev) + { +- /* qemu_event_set has release semantics, but because it *loads* ++ assert(ev->initialized); ++ ++ /* ++ * Pairs with both qemu_event_reset() and qemu_event_wait(). ++ * ++ * qemu_event_set has release semantics, but because it *loads* + * ev->value we need a full memory barrier here. + */ +- assert(ev->initialized); + smp_mb(); + if (qatomic_read(&ev->value) != EV_SET) { +- if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) { ++ int old = qatomic_xchg(&ev->value, EV_SET); ++ ++ /* Pairs with memory barrier in kernel futex_wait system call. */ ++ smp_mb__after_rmw(); ++ if (old == EV_BUSY) { + /* There were waiters, wake them up. */ + qemu_futex_wake(ev, INT_MAX); + } +@@ -394,18 +402,19 @@ void qemu_event_set(QemuEvent *ev) + + void qemu_event_reset(QemuEvent *ev) + { +- unsigned value; +- + assert(ev->initialized); +- value = qatomic_read(&ev->value); +- smp_mb_acquire(); +- if (value == EV_SET) { +- /* +- * If there was a concurrent reset (or even reset+wait), +- * do nothing. Otherwise change EV_SET->EV_FREE. +- */ +- qatomic_or(&ev->value, EV_FREE); +- } ++ ++ /* ++ * If there was a concurrent reset (or even reset+wait), ++ * do nothing. Otherwise change EV_SET->EV_FREE. ++ */ ++ qatomic_or(&ev->value, EV_FREE); ++ ++ /* ++ * Order reset before checking the condition in the caller. ++ * Pairs with the first memory barrier in qemu_event_set(). ++ */ ++ smp_mb__after_rmw(); + } + + void qemu_event_wait(QemuEvent *ev) +@@ -413,20 +422,40 @@ void qemu_event_wait(QemuEvent *ev) + unsigned value; + + assert(ev->initialized); +- value = qatomic_read(&ev->value); +- smp_mb_acquire(); ++ ++ /* ++ * qemu_event_wait must synchronize with qemu_event_set even if it does ++ * not go down the slow path, so this load-acquire is needed that ++ * synchronizes with the first memory barrier in qemu_event_set(). ++ * ++ * If we do go down the slow path, there is no requirement at all: we ++ * might miss a qemu_event_set() here but ultimately the memory barrier in ++ * qemu_futex_wait() will ensure the check is done correctly. ++ */ ++ value = qatomic_load_acquire(&ev->value); + if (value != EV_SET) { + if (value == EV_FREE) { + /* +- * Leave the event reset and tell qemu_event_set that there +- * are waiters. No need to retry, because there cannot be +- * a concurrent busy->free transition. After the CAS, the +- * event will be either set or busy. ++ * Leave the event reset and tell qemu_event_set that there are ++ * waiters. No need to retry, because there cannot be a concurrent ++ * busy->free transition. After the CAS, the event will be either ++ * set or busy. ++ * ++ * This cmpxchg doesn't have particular ordering requirements if it ++ * succeeds (moving the store earlier can only cause qemu_event_set() ++ * to issue _more_ wakeups), the failing case needs acquire semantics ++ * like the load above. + */ + if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { + return; + } + } ++ ++ /* ++ * This is the final check for a concurrent set, so it does need ++ * a smp_mb() pairing with the second barrier of qemu_event_set(). ++ * The barrier is inside the FUTEX_WAIT system call. ++ */ + qemu_futex_wait(ev, EV_BUSY); + } + } +-- +2.39.1 + diff --git a/SOURCES/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch b/SOURCES/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch new file mode 100644 index 0000000..631d541 --- /dev/null +++ b/SOURCES/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch @@ -0,0 +1,162 @@ +From 02347869410fe53d814487501fb586f7dc614375 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:24:36 -0500 +Subject: [PATCH 06/12] qemu-thread-win32: cleanup, fix, document QemuEvent + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2175660 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Eric Auger +RH-Commit: [3/9] d228e9d6a4a75dd1f0a23a6dceaf4fea23d69192 (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 + +commit 6c5df4b48f0c52a61342ecb307a43f4c2a3565c4 +Author: Paolo Bonzini +Date: Thu Mar 2 11:22:50 2023 +0100 + + qemu-thread-win32: cleanup, fix, document QemuEvent + + QemuEvent is currently broken on ARM due to missing memory barriers + after qatomic_*(). Apart from adding the memory barrier, a closer look + reveals some unpaired memory barriers that are not really needed and + complicated the functions unnecessarily. Also, it is relying on + a memory barrier in ResetEvent(); the barrier _ought_ to be there + but there is really no documentation about it, so make it explicit. + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + util/qemu-thread-win32.c | 82 +++++++++++++++++++++++++++------------- + 1 file changed, 56 insertions(+), 26 deletions(-) + +diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c +index 69db254ac7..a7fe3cc345 100644 +--- a/util/qemu-thread-win32.c ++++ b/util/qemu-thread-win32.c +@@ -272,12 +272,20 @@ void qemu_event_destroy(QemuEvent *ev) + void qemu_event_set(QemuEvent *ev) + { + assert(ev->initialized); +- /* qemu_event_set has release semantics, but because it *loads* ++ ++ /* ++ * Pairs with both qemu_event_reset() and qemu_event_wait(). ++ * ++ * qemu_event_set has release semantics, but because it *loads* + * ev->value we need a full memory barrier here. + */ + smp_mb(); + if (qatomic_read(&ev->value) != EV_SET) { +- if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) { ++ int old = qatomic_xchg(&ev->value, EV_SET); ++ ++ /* Pairs with memory barrier after ResetEvent. */ ++ smp_mb__after_rmw(); ++ if (old == EV_BUSY) { + /* There were waiters, wake them up. */ + SetEvent(ev->event); + } +@@ -286,17 +294,19 @@ void qemu_event_set(QemuEvent *ev) + + void qemu_event_reset(QemuEvent *ev) + { +- unsigned value; +- + assert(ev->initialized); +- value = qatomic_read(&ev->value); +- smp_mb_acquire(); +- if (value == EV_SET) { +- /* If there was a concurrent reset (or even reset+wait), +- * do nothing. Otherwise change EV_SET->EV_FREE. +- */ +- qatomic_or(&ev->value, EV_FREE); +- } ++ ++ /* ++ * If there was a concurrent reset (or even reset+wait), ++ * do nothing. Otherwise change EV_SET->EV_FREE. ++ */ ++ qatomic_or(&ev->value, EV_FREE); ++ ++ /* ++ * Order reset before checking the condition in the caller. ++ * Pairs with the first memory barrier in qemu_event_set(). ++ */ ++ smp_mb__after_rmw(); + } + + void qemu_event_wait(QemuEvent *ev) +@@ -304,29 +314,49 @@ void qemu_event_wait(QemuEvent *ev) + unsigned value; + + assert(ev->initialized); +- value = qatomic_read(&ev->value); +- smp_mb_acquire(); ++ ++ /* ++ * qemu_event_wait must synchronize with qemu_event_set even if it does ++ * not go down the slow path, so this load-acquire is needed that ++ * synchronizes with the first memory barrier in qemu_event_set(). ++ * ++ * If we do go down the slow path, there is no requirement at all: we ++ * might miss a qemu_event_set() here but ultimately the memory barrier in ++ * qemu_futex_wait() will ensure the check is done correctly. ++ */ ++ value = qatomic_load_acquire(&ev->value); + if (value != EV_SET) { + if (value == EV_FREE) { +- /* qemu_event_set is not yet going to call SetEvent, but we are +- * going to do another check for EV_SET below when setting EV_BUSY. +- * At that point it is safe to call WaitForSingleObject. ++ /* ++ * Here the underlying kernel event is reset, but qemu_event_set is ++ * not yet going to call SetEvent. However, there will be another ++ * check for EV_SET below when setting EV_BUSY. At that point it ++ * is safe to call WaitForSingleObject. + */ + ResetEvent(ev->event); + +- /* Tell qemu_event_set that there are waiters. No need to retry +- * because there cannot be a concurrent busy->free transition. +- * After the CAS, the event will be either set or busy. ++ /* ++ * It is not clear whether ResetEvent provides this barrier; kernel ++ * APIs (KeResetEvent/KeClearEvent) do not. Better safe than sorry! ++ */ ++ smp_mb(); ++ ++ /* ++ * Leave the event reset and tell qemu_event_set that there are ++ * waiters. No need to retry, because there cannot be a concurrent ++ * busy->free transition. After the CAS, the event will be either ++ * set or busy. + */ + if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { +- value = EV_SET; +- } else { +- value = EV_BUSY; ++ return; + } + } +- if (value == EV_BUSY) { +- WaitForSingleObject(ev->event, INFINITE); +- } ++ ++ /* ++ * ev->value is now EV_BUSY. Since we didn't observe EV_SET, ++ * qemu_event_set() must observe EV_BUSY and call SetEvent(). ++ */ ++ WaitForSingleObject(ev->event, INFINITE); + } + } + +-- +2.39.1 + diff --git a/SOURCES/kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch b/SOURCES/kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch new file mode 100644 index 0000000..d039212 --- /dev/null +++ b/SOURCES/kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch @@ -0,0 +1,49 @@ +From 48f45171b89b8ed24f2b2484d63b00ea7818b5c3 Mon Sep 17 00:00:00 2001 +From: Kfir Manor +Date: Sun, 22 Jan 2023 17:33:07 +0200 +Subject: [PATCH 9/9] qga/linux: add usb support to guest-get-fsinfo + +RH-Author: Kostiantyn Kostiuk +RH-MergeRequest: 140: qga/linux: add usb support to guest-get-fsinfo +RH-Bugzilla: 2149191 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: yvugenfi +RH-Commit: [1/1] bae929a2d0d0ad20e7308ede69c26499fc2119c7 (kostyanf14/redhat_centos-stream_src_qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2149191 +Upstream patch: https://patchew.org/QEMU/20230122153307.1050593-1-kfir@daynix.com/ + +Signed-off-by: Kfir Manor +Reviewed-by: Konstantin Kostiuk +Signed-off-by: Konstantin Kostiuk +--- + qga/commands-posix.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/qga/commands-posix.c b/qga/commands-posix.c +index 32493d6383..f1b2b87c13 100644 +--- a/qga/commands-posix.c ++++ b/qga/commands-posix.c +@@ -877,7 +877,9 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, + g_str_equal(driver, "sym53c8xx") || + g_str_equal(driver, "virtio-pci") || + g_str_equal(driver, "ahci") || +- g_str_equal(driver, "nvme"))) { ++ g_str_equal(driver, "nvme") || ++ g_str_equal(driver, "xhci_hcd") || ++ g_str_equal(driver, "ehci-pci"))) { + break; + } + +@@ -974,6 +976,8 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, + } + } else if (strcmp(driver, "nvme") == 0) { + disk->bus_type = GUEST_DISK_BUS_TYPE_NVME; ++ } else if (strcmp(driver, "ehci-pci") == 0 || strcmp(driver, "xhci_hcd") == 0) { ++ disk->bus_type = GUEST_DISK_BUS_TYPE_USB; + } else { + g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath); + goto cleanup; +-- +2.31.1 + diff --git a/SOURCES/kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch b/SOURCES/kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch deleted file mode 100644 index 9c2ac99..0000000 --- a/SOURCES/kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch +++ /dev/null @@ -1,100 +0,0 @@ -From a039ed652e6d2f5edcef9d5d1d3baec17ce7f929 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 11 May 2022 18:01:35 +0800 -Subject: [PATCH 04/16] qtest/numa-test: Correct CPU and NUMA association in - aarch64_numa_cpu() - -RH-Author: Gavin Shan -RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology -RH-Commit: [4/6] 64e9908a179eb4fb586d662f70f275a81808e50c (gwshan/qemu-rhel-9) -RH-Bugzilla: 2041823 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Andrew Jones - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 - -In aarch64_numa_cpu(), the CPU and NUMA association is something -like below. Two threads in the same core/cluster/socket are -associated with two individual NUMA nodes, which is unreal as -Igor Mammedov mentioned. We don't expect the association to break -NUMA-to-socket boundary, which matches with the real world. - -NUMA-node socket cluster core thread ------------------------------------------- -0 0 0 0 0 -1 0 0 0 1 - -This corrects the topology for CPUs and their association with -NUMA nodes. After this patch is applied, the CPU and NUMA -association becomes something like below, which looks real. -Besides, socket/cluster/core/thread IDs are all checked when -the NUMA node IDs are verified. It helps to check if the CPU -topology is properly populated or not. - -NUMA-node socket cluster core thread ------------------------------------------- -0 1 0 0 0 -1 0 0 0 0 - -Suggested-by: Igor Mammedov -Signed-off-by: Gavin Shan -Acked-by: Igor Mammedov -Message-id: 20220503140304.855514-5-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit e280ecb39bc1629f74ea5479d464fd1608dc8f76) -Signed-off-by: Gavin Shan ---- - tests/qtest/numa-test.c | 18 ++++++++++++------ - 1 file changed, 12 insertions(+), 6 deletions(-) - -diff --git a/tests/qtest/numa-test.c b/tests/qtest/numa-test.c -index aeda8c774c..32e35daaae 100644 ---- a/tests/qtest/numa-test.c -+++ b/tests/qtest/numa-test.c -@@ -224,17 +224,17 @@ static void aarch64_numa_cpu(const void *data) - g_autofree char *cli = NULL; - - cli = make_cli(data, "-machine " -- "smp.cpus=2,smp.sockets=1,smp.clusters=1,smp.cores=1,smp.threads=2 " -+ "smp.cpus=2,smp.sockets=2,smp.clusters=1,smp.cores=1,smp.threads=1 " - "-numa node,nodeid=0,memdev=ram -numa node,nodeid=1 " -- "-numa cpu,node-id=1,thread-id=0 " -- "-numa cpu,node-id=0,thread-id=1"); -+ "-numa cpu,node-id=0,socket-id=1,cluster-id=0,core-id=0,thread-id=0 " -+ "-numa cpu,node-id=1,socket-id=0,cluster-id=0,core-id=0,thread-id=0"); - qts = qtest_init(cli); - cpus = get_cpus(qts, &resp); - g_assert(cpus); - - while ((e = qlist_pop(cpus))) { - QDict *cpu, *props; -- int64_t thread, node; -+ int64_t socket, cluster, core, thread, node; - - cpu = qobject_to(QDict, e); - g_assert(qdict_haskey(cpu, "props")); -@@ -242,12 +242,18 @@ static void aarch64_numa_cpu(const void *data) - - g_assert(qdict_haskey(props, "node-id")); - node = qdict_get_int(props, "node-id"); -+ g_assert(qdict_haskey(props, "socket-id")); -+ socket = qdict_get_int(props, "socket-id"); -+ g_assert(qdict_haskey(props, "cluster-id")); -+ cluster = qdict_get_int(props, "cluster-id"); -+ g_assert(qdict_haskey(props, "core-id")); -+ core = qdict_get_int(props, "core-id"); - g_assert(qdict_haskey(props, "thread-id")); - thread = qdict_get_int(props, "thread-id"); - -- if (thread == 0) { -+ if (socket == 0 && cluster == 0 && core == 0 && thread == 0) { - g_assert_cmpint(node, ==, 1); -- } else if (thread == 1) { -+ } else if (socket == 1 && cluster == 0 && core == 0 && thread == 0) { - g_assert_cmpint(node, ==, 0); - } else { - g_assert(false); --- -2.31.1 - diff --git a/SOURCES/kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch b/SOURCES/kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch deleted file mode 100644 index a87abc0..0000000 --- a/SOURCES/kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 66f3928b40991d8467a3da086688f73d061886c8 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 11 May 2022 18:01:35 +0800 -Subject: [PATCH 02/16] qtest/numa-test: Specify CPU topology in - aarch64_numa_cpu() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Gavin Shan -RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology -RH-Commit: [2/6] b851e7ad59e057825392ddf75e9040cc102a0385 (gwshan/qemu-rhel-9) -RH-Bugzilla: 2041823 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Andrew Jones - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 - -The CPU topology isn't enabled on arm/virt machine yet, but we're -going to do it in next patch. After the CPU topology is enabled by -next patch, "thread-id=1" becomes invalid because the CPU core is -preferred on arm/virt machine. It means these two CPUs have 0/1 -as their core IDs, but their thread IDs are all 0. It will trigger -test failure as the following message indicates: - -[14/21 qemu:qtest+qtest-aarch64 / qtest-aarch64/numa-test ERROR -1.48s killed by signal 6 SIGABRT ->>> G_TEST_DBUS_DAEMON=/home/gavin/sandbox/qemu.main/tests/dbus-vmstate-daemon.sh \ -QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon \ -QTEST_QEMU_BINARY=./qemu-system-aarch64 \ -QTEST_QEMU_IMG=./qemu-img MALLOC_PERTURB_=83 \ -/home/gavin/sandbox/qemu.main/build/tests/qtest/numa-test --tap -k -―――――――――――――――――――――――――――――――――――――――――――――― -stderr: -qemu-system-aarch64: -numa cpu,node-id=0,thread-id=1: no match found - -This fixes the issue by providing comprehensive SMP configurations -in aarch64_numa_cpu(). The SMP configurations aren't used before -the CPU topology is enabled in next patch. - -Signed-off-by: Gavin Shan -Reviewed-by: Yanan Wang -Message-id: 20220503140304.855514-3-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit ac7199a2523ce2ccf8e685087a5d177eeca89b09) -Signed-off-by: Gavin Shan ---- - tests/qtest/numa-test.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/tests/qtest/numa-test.c b/tests/qtest/numa-test.c -index 90bf68a5b3..aeda8c774c 100644 ---- a/tests/qtest/numa-test.c -+++ b/tests/qtest/numa-test.c -@@ -223,7 +223,8 @@ static void aarch64_numa_cpu(const void *data) - QTestState *qts; - g_autofree char *cli = NULL; - -- cli = make_cli(data, "-machine smp.cpus=2 " -+ cli = make_cli(data, "-machine " -+ "smp.cpus=2,smp.sockets=1,smp.clusters=1,smp.cores=1,smp.threads=2 " - "-numa node,nodeid=0,memdev=ram -numa node,nodeid=1 " - "-numa cpu,node-id=1,thread-id=0 " - "-numa cpu,node-id=0,thread-id=1"); --- -2.31.1 - diff --git a/SOURCES/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch b/SOURCES/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch deleted file mode 100644 index f027c45..0000000 --- a/SOURCES/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 236f216309261bc924e49014267998fdc2ef7f46 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 Jul 2022 16:55:34 +0200 -Subject: [PATCH 28/32] redhat: Update linux-headers/linux/kvm.h to v5.18-rc6 - -RH-Author: Thomas Huth -RH-MergeRequest: 109: Honor storage keys during emulation of I/O instructions -RH-Commit: [1/2] f306d7ff8efa64b14158388b95815ac556a25d8a (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2111994 -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Claudio Imbrenda - -Upstream Status: RHEL-only -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2111994 - -Based on upstream commit e4082063e47e9731dbeb1c26174c17f6038f577f -("linux-headers: Update to v5.18-rc6"), but this is focusing on -the file linux-headers/linux/kvm.h only (since the other changes -related to the VFIO renaming might break some stuff). - -Signed-off-by: Thomas Huth ---- - linux-headers/linux/kvm.h | 27 +++++++++++++++++++++------ - 1 file changed, 21 insertions(+), 6 deletions(-) - -diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h -index d232feaae9..0d05d02ee4 100644 ---- a/linux-headers/linux/kvm.h -+++ b/linux-headers/linux/kvm.h -@@ -445,7 +445,11 @@ struct kvm_run { - #define KVM_SYSTEM_EVENT_RESET 2 - #define KVM_SYSTEM_EVENT_CRASH 3 - __u32 type; -- __u64 flags; -+ __u32 ndata; -+ union { -+ __u64 flags; -+ __u64 data[16]; -+ }; - } system_event; - /* KVM_EXIT_S390_STSI */ - struct { -@@ -562,9 +566,12 @@ struct kvm_s390_mem_op { - __u32 op; /* type of operation */ - __u64 buf; /* buffer in userspace */ - union { -- __u8 ar; /* the access register number */ -+ struct { -+ __u8 ar; /* the access register number */ -+ __u8 key; /* access key, ignored if flag unset */ -+ }; - __u32 sida_offset; /* offset into the sida */ -- __u8 reserved[32]; /* should be set to 0 */ -+ __u8 reserved[32]; /* ignored */ - }; - }; - /* types for kvm_s390_mem_op->op */ -@@ -572,9 +579,12 @@ struct kvm_s390_mem_op { - #define KVM_S390_MEMOP_LOGICAL_WRITE 1 - #define KVM_S390_MEMOP_SIDA_READ 2 - #define KVM_S390_MEMOP_SIDA_WRITE 3 -+#define KVM_S390_MEMOP_ABSOLUTE_READ 4 -+#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 - /* flags for kvm_s390_mem_op->flags */ - #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) - #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) -+#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) - - /* for KVM_INTERRUPT */ - struct kvm_interrupt { -@@ -1134,6 +1144,12 @@ struct kvm_ppc_resize_hpt { - #define KVM_CAP_VM_GPA_BITS 207 - #define KVM_CAP_XSAVE2 208 - #define KVM_CAP_SYS_ATTRIBUTES 209 -+#define KVM_CAP_PPC_AIL_MODE_3 210 -+#define KVM_CAP_S390_MEM_OP_EXTENSION 211 -+#define KVM_CAP_PMU_CAPABILITY 212 -+#define KVM_CAP_DISABLE_QUIRKS2 213 -+/* #define KVM_CAP_VM_TSC_CONTROL 214 */ -+#define KVM_CAP_SYSTEM_EVENT_DATA 215 - - #ifdef KVM_CAP_IRQ_ROUTING - -@@ -1624,9 +1640,6 @@ struct kvm_enc_region { - #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) - #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) - --/* Available with KVM_CAP_XSAVE2 */ --#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) -- - struct kvm_s390_pv_sec_parm { - __u64 origin; - __u64 length; -@@ -1973,6 +1986,8 @@ struct kvm_dirty_gfn { - #define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) - #define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) - -+#define KVM_PMU_CAP_DISABLE (1 << 0) -+ - /** - * struct kvm_stats_header - Header of per vm/vcpu binary statistics data. - * @flags: Some extra information for header, always 0 for now. --- -2.31.1 - diff --git a/SOURCES/kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch b/SOURCES/kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch new file mode 100644 index 0000000..1a2e863 --- /dev/null +++ b/SOURCES/kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch @@ -0,0 +1,43 @@ +From 546e4213c4e8a7b2e369315a71bc9aec091eed6e Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Mon, 19 Dec 2022 10:30:26 +0100 +Subject: redhat: fix virt-rhel9.2.0 compat props + +RH-Author: Cornelia Huck +RH-MergeRequest: 127: redhat: fix virt-rhel9.2.0 compat props +RH-Bugzilla: 2154640 +RH-Acked-by: Eric Auger +RH-Acked-by: Gavin Shan +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] 49635fdc1d9a934ece78abd160b07c19909f876a (cohuck/qemu-kvm-c9s) + +We need to include arm_rhel_compat props in the latest machine. + +Signed-off-by: Cornelia Huck +--- + hw/arm/virt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 0a94f31dd1..bf18838b87 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3520,6 +3520,7 @@ type_init(rhel_machine_init); + + static void rhel920_virt_options(MachineClass *mc) + { ++ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); + } + DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) + +@@ -3529,7 +3530,6 @@ static void rhel900_virt_options(MachineClass *mc) + + rhel920_virt_options(mc); + +- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); + + /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ +-- +2.38.1 + diff --git a/SOURCES/kvm-s390x-pci-coalesce-unmap-operations.patch b/SOURCES/kvm-s390x-pci-coalesce-unmap-operations.patch new file mode 100644 index 0000000..8bf1f61 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-coalesce-unmap-operations.patch @@ -0,0 +1,125 @@ +From ed90f91b61844abd2dff2eb970f721a6cf072235 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 28 Oct 2022 15:47:57 -0400 +Subject: [PATCH 6/9] s390x/pci: coalesce unmap operations +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 141: s390x/pci: reset ISM passthrough devices on shutdown and system reset +RH-Bugzilla: 2163701 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/3] 80c3a2c1d720057ae2a80b338ea06c9c6c804532 (clegoate/qemu-kvm-c9s) + +Currently, each unmapped page is handled as an individual iommu +region notification. Attempt to group contiguous unmap operations +into fewer notifications to reduce overhead. + +Signed-off-by: Matthew Rosato +Message-Id: <20221028194758.204007-3-mjrosato@linux.ibm.com> +Reviewed-by: Eric Farman +Signed-off-by: Thomas Huth +(cherry picked from commit ef536007c3301bbd6a787e4c2210ea289adaa6f0) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-inst.c | 51 ++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 51 insertions(+) + +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 7cc4bcf850..66e764f901 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -640,6 +640,8 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, + } + g_hash_table_remove(iommu->iotlb, &entry->iova); + inc_dma_avail(iommu); ++ /* Don't notify the iommu yet, maybe we can bundle contiguous unmaps */ ++ goto out; + } else { + if (cache) { + if (cache->perm == entry->perm && +@@ -663,15 +665,44 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, + dec_dma_avail(iommu); + } + ++ /* ++ * All associated iotlb entries have already been cleared, trigger the ++ * unmaps. ++ */ + memory_region_notify_iommu(&iommu->iommu_mr, 0, event); + + out: + return iommu->dma_limit ? iommu->dma_limit->avail : 1; + } + ++static void s390_pci_batch_unmap(S390PCIIOMMU *iommu, uint64_t iova, ++ uint64_t len) ++{ ++ uint64_t remain = len, start = iova, end = start + len - 1, mask, size; ++ IOMMUTLBEvent event = { ++ .type = IOMMU_NOTIFIER_UNMAP, ++ .entry = { ++ .target_as = &address_space_memory, ++ .translated_addr = 0, ++ .perm = IOMMU_NONE, ++ }, ++ }; ++ ++ while (remain >= TARGET_PAGE_SIZE) { ++ mask = dma_aligned_pow2_mask(start, end, 64); ++ size = mask + 1; ++ event.entry.iova = start; ++ event.entry.addr_mask = mask; ++ memory_region_notify_iommu(&iommu->iommu_mr, 0, event); ++ start += size; ++ remain -= size; ++ } ++} ++ + int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + { + CPUS390XState *env = &cpu->env; ++ uint64_t iova, coalesce = 0; + uint32_t fh; + uint16_t error = 0; + S390PCIBusDevice *pbdev; +@@ -742,6 +773,21 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + break; + } + ++ /* ++ * If this is an unmap of a PTE, let's try to coalesce multiple unmaps ++ * into as few notifier events as possible. ++ */ ++ if (entry.perm == IOMMU_NONE && entry.len == TARGET_PAGE_SIZE) { ++ if (coalesce == 0) { ++ iova = entry.iova; ++ } ++ coalesce += entry.len; ++ } else if (coalesce > 0) { ++ /* Unleash the coalesced unmap before processing a new map */ ++ s390_pci_batch_unmap(iommu, iova, coalesce); ++ coalesce = 0; ++ } ++ + start += entry.len; + while (entry.iova < start && entry.iova < end) { + if (dma_avail > 0 || entry.perm == IOMMU_NONE) { +@@ -759,6 +805,11 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + } + } + } ++ if (coalesce) { ++ /* Unleash the coalesced unmap before finishing rpcit */ ++ s390_pci_batch_unmap(iommu, iova, coalesce); ++ coalesce = 0; ++ } + if (again && dma_avail > 0) + goto retry; + err: +-- +2.31.1 + diff --git a/SOURCES/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch b/SOURCES/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch new file mode 100644 index 0000000..bbe2595 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch @@ -0,0 +1,147 @@ +From 1ed1f8fc20a4883bc0bc1f58d299b0278abc5442 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 9 Dec 2022 14:57:00 -0500 +Subject: [PATCH 8/9] s390x/pci: reset ISM passthrough devices on shutdown and + system reset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 141: s390x/pci: reset ISM passthrough devices on shutdown and system reset +RH-Bugzilla: 2163701 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/3] c531352b9d57f51ba938d4c46ee19a5706ade697 (clegoate/qemu-kvm-c9s) + +ISM device firmware stores unique state information that can +can cause a wholesale unmap of the associated IOMMU (e.g. when +we get a termination signal for QEMU) to trigger firmware errors +because firmware believes we are attempting to invalidate entries +that are still in-use by the guest OS (when in fact that guest is +in the process of being terminated or rebooted). +To alleviate this, register both a shutdown notifier (for unexpected +termination cases e.g. virsh destroy) as well as a reset callback +(for cases like guest OS reboot). For each of these scenarios, trigger +PCI device reset; this is enough to indicate to firmware that the IOMMU +is no longer in-use by the guest OS, making it safe to invalidate any +associated IOMMU entries. + +Fixes: 15d0e7942d3b ("s390x/pci: don't fence interpreted devices without MSI-X") +Signed-off-by: Matthew Rosato +Message-Id: <20221209195700.263824-1-mjrosato@linux.ibm.com> +Reviewed-by: Eric Farman +[thuth: Adjusted the hunk in s390-pci-vfio.c due to different context] +Signed-off-by: Thomas Huth +(cherry picked from commit 03451953c79e6b31f7860ee0c35b28e181d573c1) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-bus.c | 28 ++++++++++++++++++++++++++++ + hw/s390x/s390-pci-vfio.c | 2 ++ + include/hw/s390x/s390-pci-bus.h | 5 +++++ + 3 files changed, 35 insertions(+) + +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index 977e7daa15..02751f3597 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -24,6 +24,8 @@ + #include "hw/pci/msi.h" + #include "qemu/error-report.h" + #include "qemu/module.h" ++#include "sysemu/reset.h" ++#include "sysemu/runstate.h" + + #ifndef DEBUG_S390PCI_BUS + #define DEBUG_S390PCI_BUS 0 +@@ -150,10 +152,30 @@ out: + psccb->header.response_code = cpu_to_be16(rc); + } + ++static void s390_pci_shutdown_notifier(Notifier *n, void *opaque) ++{ ++ S390PCIBusDevice *pbdev = container_of(n, S390PCIBusDevice, ++ shutdown_notifier); ++ ++ pci_device_reset(pbdev->pdev); ++} ++ ++static void s390_pci_reset_cb(void *opaque) ++{ ++ S390PCIBusDevice *pbdev = opaque; ++ ++ pci_device_reset(pbdev->pdev); ++} ++ + static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev) + { + HotplugHandler *hotplug_ctrl; + ++ if (pbdev->pft == ZPCI_PFT_ISM) { ++ notifier_remove(&pbdev->shutdown_notifier); ++ qemu_unregister_reset(s390_pci_reset_cb, pbdev); ++ } ++ + /* Unplug the PCI device */ + if (pbdev->pdev) { + DeviceState *pdev = DEVICE(pbdev->pdev); +@@ -1111,6 +1133,12 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + pbdev->fh |= FH_SHM_VFIO; + pbdev->forwarding_assist = false; + } ++ /* Register shutdown notifier and reset callback for ISM devices */ ++ if (pbdev->pft == ZPCI_PFT_ISM) { ++ pbdev->shutdown_notifier.notify = s390_pci_shutdown_notifier; ++ qemu_register_shutdown_notifier(&pbdev->shutdown_notifier); ++ qemu_register_reset(s390_pci_reset_cb, pbdev); ++ } + } else { + pbdev->fh |= FH_SHM_EMUL; + /* Always intercept emulated devices */ +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index f7bf36cec8..f51190d466 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -124,6 +124,8 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, + /* The following values remain 0 until we support other FMB formats */ + pbdev->zpci_fn.fmbl = 0; + pbdev->zpci_fn.pft = 0; ++ /* Store function type separately for type-specific behavior */ ++ pbdev->pft = cap->pft; + + /* + * If appropriate, reduce the size of the supported DMA aperture reported +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index 1c46e3a269..e0a9f9385b 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -39,6 +39,9 @@ + #define UID_CHECKING_ENABLED 0x01 + #define ZPCI_DTSM 0x40 + ++/* zPCI Function Types */ ++#define ZPCI_PFT_ISM 5 ++ + OBJECT_DECLARE_SIMPLE_TYPE(S390pciState, S390_PCI_HOST_BRIDGE) + OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBus, S390_PCI_BUS) + OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBusDevice, S390_PCI_DEVICE) +@@ -344,6 +347,7 @@ struct S390PCIBusDevice { + uint16_t noi; + uint16_t maxstbl; + uint8_t sum; ++ uint8_t pft; + S390PCIGroup *pci_group; + ClpRspQueryPci zpci_fn; + S390MsixInfo msix; +@@ -352,6 +356,7 @@ struct S390PCIBusDevice { + MemoryRegion msix_notify_mr; + IndAddr *summary_ind; + IndAddr *indicator; ++ Notifier shutdown_notifier; + bool pci_unplug_request_processed; + bool unplug_requested; + bool interp; +-- +2.31.1 + diff --git a/SOURCES/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch b/SOURCES/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch new file mode 100644 index 0000000..0992724 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch @@ -0,0 +1,91 @@ +From ee69c8c57fe62fc200f749c4ce3927c88803644d Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 28 Oct 2022 15:47:58 -0400 +Subject: [PATCH 7/9] s390x/pci: shrink DMA aperture to be bound by vfio DMA + limit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 141: s390x/pci: reset ISM passthrough devices on shutdown and system reset +RH-Bugzilla: 2163701 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/3] 0956bbb4773dd0085f6aed59d6284c704b4fed3b (clegoate/qemu-kvm-c9s) + +Currently, s390x-pci performs accounting against the vfio DMA +limit and triggers the guest to clean up mappings when the limit +is reached. Let's go a step further and also limit the size of +the supported DMA aperture reported to the guest based upon the +initial vfio DMA limit reported for the container (if less than +than the size reported by the firmware/host zPCI layer). This +avoids processing sections of the guest DMA table during global +refresh that, for common use cases, will never be used anway, and +makes exhausting the vfio DMA limit due to mismatch between guest +aperture size and host limit far less likely and more indicitive +of an error. + +Signed-off-by: Matthew Rosato +Message-Id: <20221028194758.204007-4-mjrosato@linux.ibm.com> +Reviewed-by: Eric Farman +Signed-off-by: Thomas Huth +(cherry picked from commit df202e3ff3fccb49868e08f20d0bda86cb953fbe) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-vfio.c | 11 +++++++++++ + include/hw/s390x/s390-pci-bus.h | 1 + + 2 files changed, 12 insertions(+) + +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index 5f0adb0b4a..f7bf36cec8 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -84,6 +84,7 @@ S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, + cnt->users = 1; + cnt->avail = avail; + QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link); ++ pbdev->iommu->max_dma_limit = avail; + return cnt; + } + +@@ -103,6 +104,7 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, + struct vfio_info_cap_header *hdr; + struct vfio_device_info_cap_zpci_base *cap; + VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); ++ uint64_t vfio_size; + + hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE); + +@@ -122,6 +124,15 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, + /* The following values remain 0 until we support other FMB formats */ + pbdev->zpci_fn.fmbl = 0; + pbdev->zpci_fn.pft = 0; ++ ++ /* ++ * If appropriate, reduce the size of the supported DMA aperture reported ++ * to the guest based upon the vfio DMA limit. ++ */ ++ vfio_size = pbdev->iommu->max_dma_limit << TARGET_PAGE_BITS; ++ if (vfio_size < (cap->end_dma - cap->start_dma + 1)) { ++ pbdev->zpci_fn.edma = cap->start_dma + vfio_size - 1; ++ } + } + + static bool get_host_fh(S390PCIBusDevice *pbdev, struct vfio_device_info *info, +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index 0605fcea24..1c46e3a269 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -278,6 +278,7 @@ struct S390PCIIOMMU { + uint64_t g_iota; + uint64_t pba; + uint64_t pal; ++ uint64_t max_dma_limit; + GHashTable *iotlb; + S390PCIDMACount *dma_limit; + }; +-- +2.31.1 + diff --git a/SOURCES/kvm-s390x-pv-Implement-a-CGS-check-helper.patch b/SOURCES/kvm-s390x-pv-Implement-a-CGS-check-helper.patch new file mode 100644 index 0000000..c3383af --- /dev/null +++ b/SOURCES/kvm-s390x-pv-Implement-a-CGS-check-helper.patch @@ -0,0 +1,109 @@ +From 9452246e59a5f16f44fdf9a7d514b947faf1d5fc Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 16 Jan 2023 18:46:05 +0100 +Subject: [PATCH 5/9] s390x/pv: Implement a CGS check helper +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 139: s390x/pv: Implement a CGS check helper +RH-Bugzilla: 2122523 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Commit: [1/1] 8551ce772b10de653b4e1c8be60aae60ec98b421 (clegoate/qemu-kvm-c9s) + +When a protected VM is started with the maximum number of CPUs (248), +the service call providing information on the CPUs requires more +buffer space than allocated and QEMU disgracefully aborts : + + LOADPARM=[........] + Using virtio-blk. + Using SCSI scheme. + ................................................................................... + qemu-system-s390x: KVM_S390_MEM_OP failed: Argument list too long + +When protected virtualization is initialized, compute the maximum +number of vCPUs supported by the machine and return useful information +to the user before the machine starts in case of error. + +Suggested-by: Thomas Huth +Reviewed-by: Thomas Huth +Signed-off-by: Cédric Le Goater +Message-Id: <20230116174607.2459498-2-clg@kaod.org> +Signed-off-by: Thomas Huth +(cherry picked from commit 75d7150c636569f6687f7e70a33be893be43eb5f) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/pv.c | 40 ++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 40 insertions(+) + +diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c +index 8dfe92d8df..8a1c71436b 100644 +--- a/hw/s390x/pv.c ++++ b/hw/s390x/pv.c +@@ -20,6 +20,7 @@ + #include "exec/confidential-guest-support.h" + #include "hw/s390x/ipl.h" + #include "hw/s390x/pv.h" ++#include "hw/s390x/sclp.h" + #include "target/s390x/kvm/kvm_s390x.h" + + static bool info_valid; +@@ -249,6 +250,41 @@ struct S390PVGuestClass { + ConfidentialGuestSupportClass parent_class; + }; + ++/* ++ * If protected virtualization is enabled, the amount of data that the ++ * Read SCP Info Service Call can use is limited to one page. The ++ * available space also depends on the Extended-Length SCCB (ELS) ++ * feature which can take more buffer space to store feature ++ * information. This impacts the maximum number of CPUs supported in ++ * the machine. ++ */ ++static uint32_t s390_pv_get_max_cpus(void) ++{ ++ int offset_cpu = s390_has_feat(S390_FEAT_EXTENDED_LENGTH_SCCB) ? ++ offsetof(ReadInfo, entries) : SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET; ++ ++ return (TARGET_PAGE_SIZE - offset_cpu) / sizeof(CPUEntry); ++} ++ ++static bool s390_pv_check_cpus(Error **errp) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ uint32_t pv_max_cpus = s390_pv_get_max_cpus(); ++ ++ if (ms->smp.max_cpus > pv_max_cpus) { ++ error_setg(errp, "Protected VMs support a maximum of %d CPUs", ++ pv_max_cpus); ++ return false; ++ } ++ ++ return true; ++} ++ ++static bool s390_pv_guest_check(ConfidentialGuestSupport *cgs, Error **errp) ++{ ++ return s390_pv_check_cpus(errp); ++} ++ + int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + if (!object_dynamic_cast(OBJECT(cgs), TYPE_S390_PV_GUEST)) { +@@ -261,6 +297,10 @@ int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + return -1; + } + ++ if (!s390_pv_guest_check(cgs, errp)) { ++ return -1; ++ } ++ + cgs->ready = true; + + return 0; +-- +2.31.1 + diff --git a/SOURCES/kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch b/SOURCES/kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch new file mode 100644 index 0000000..42114a1 --- /dev/null +++ b/SOURCES/kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch @@ -0,0 +1,70 @@ +From 51fcf352a97f2e99a6a3fb8ae663b45436304120 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 10 Jan 2023 14:25:34 +0100 +Subject: [PATCH 11/31] s390x/s390-virtio-ccw: Activate zPCI features on + s390-ccw-virtio-rhel8.6.0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 133: s390x/s390-virtio-ccw: Activate zPCI features on s390-ccw-virtio-rhel8.6.0 +RH-Bugzilla: 2159408 +RH-Acked-by: Thomas Huth +RH-Acked-by: David Hildenbrand +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] 1ed82e56fe74a283a1726c4893dc3387e645072c (clegoate/qemu-kvm-c9s) + +commit c7b14d3af7 ("s390x/s390-virtio-ccw: Switch off zPCI enhancements +on older machines") activated zPCI enhancement features (interpretation +and forward assist) silently on the s390-ccw-virtio-rhel8.6.0 machine +for RHEL8.8. It didn't seem to be a problem since migration is not +possible but it broke LEAPP upgrade to RHEL9 when the machine is +defined with a passthrough device. Activate the zPCI features also on +RHEL9.2 for the machines to be alike in both latest RHEL distros. + +Upstream Status: RHEL-only +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2159408 + +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-virtio-ccw.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index aa142a1a4e..4cdd59c394 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1234,8 +1234,14 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) + + static void ccw_machine_rhel860_class_options(MachineClass *mc) + { ++ static GlobalProperty compat[] = { ++ { TYPE_S390_PCI_DEVICE, "interpret", "on", }, ++ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "on", }, ++ }; ++ + ccw_machine_rhel900_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + + /* All RHEL machines for prior major releases are deprecated */ + mc->deprecation_reason = rhel_old_machine_deprecation; +@@ -1259,8 +1265,14 @@ static void ccw_machine_rhel850_instance_options(MachineState *machine) + + static void ccw_machine_rhel850_class_options(MachineClass *mc) + { ++ static GlobalProperty compat[] = { ++ { TYPE_S390_PCI_DEVICE, "interpret", "off", }, ++ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, ++ }; ++ + ccw_machine_rhel860_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + mc->smp_props.prefer_sockets = true; + } + DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false); +-- +2.31.1 + diff --git a/SOURCES/kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch b/SOURCES/kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch deleted file mode 100644 index cee10e7..0000000 --- a/SOURCES/kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch +++ /dev/null @@ -1,96 +0,0 @@ -From e5360c1e76fee8b8dcbcba7efbb1e36f0b48ac40 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 22 Aug 2022 14:53:20 +0200 -Subject: [PATCH 01/23] scsi-generic: Fix emulated block limits VPD page - -RH-Author: Kevin Wolf -RH-MergeRequest: 115: scsi-generic: Fix emulated block limits VPD page -RH-Bugzilla: 2120275 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Reitz -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/1] 336ba583311a80beeadd1900336056404f63211a (kmwolf/centos-qemu-kvm) -Commits 01ef8185b80 amd 24b36e9813e updated the way that the maximum -transfer length is calculated for patching block limits VPD page in an -INQUIRY response. - -The same updates also need to be made for the case where the host device -does not support the block limits VPD page at all and we emulate the -whole page. - -Without this fix, on host block devices a maximum transfer length of -(INT_MAX - sector_size) bytes is advertised to the guest, resulting in -I/O errors when a request that exceeds the host limits is made by the -guest. (Prior to commit 24b36e9813e, this code path would use the -max_transfer value from the host instead of INT_MAX, but still miss the -fix from 01ef8185b80 where max_transfer is also capped to max_iov -host pages, so it would be less wrong, but still wrong.) - -Cc: qemu-stable@nongnu.org -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2096251 -Fixes: 01ef8185b809af9d287e1a03a3f9d8ea8231118a -Fixes: 24b36e9813ec15da7db62e3b3621730710c5f020 -Signed-off-by: Kevin Wolf -Message-Id: <20220822125320.48257-1-kwolf@redhat.com> -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Kevin Wolf -(cherry picked from commit 51e15194b0a091e5c40aab2eb234a1d36c5c58ee) - -Resolved conflict: qemu_real_host_page_size() is a getter function in -current upstream, but still just a public global variable downstream. - -Signed-off-by: Kevin Wolf ---- - hw/scsi/scsi-generic.c | 21 ++++++++++++++------- - 1 file changed, 14 insertions(+), 7 deletions(-) - -diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c -index 0306ccc7b1..3742899839 100644 ---- a/hw/scsi/scsi-generic.c -+++ b/hw/scsi/scsi-generic.c -@@ -147,6 +147,18 @@ static int execute_command(BlockBackend *blk, - return 0; - } - -+static uint64_t calculate_max_transfer(SCSIDevice *s) -+{ -+ uint64_t max_transfer = blk_get_max_hw_transfer(s->conf.blk); -+ uint32_t max_iov = blk_get_max_hw_iov(s->conf.blk); -+ -+ assert(max_transfer); -+ max_transfer = MIN_NON_ZERO(max_transfer, -+ max_iov * qemu_real_host_page_size); -+ -+ return max_transfer / s->blocksize; -+} -+ - static int scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s, int len) - { - uint8_t page, page_idx; -@@ -179,12 +191,7 @@ static int scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s, int len) - (r->req.cmd.buf[1] & 0x01)) { - page = r->req.cmd.buf[2]; - if (page == 0xb0) { -- uint64_t max_transfer = blk_get_max_hw_transfer(s->conf.blk); -- uint32_t max_iov = blk_get_max_hw_iov(s->conf.blk); -- -- assert(max_transfer); -- max_transfer = MIN_NON_ZERO(max_transfer, max_iov * qemu_real_host_page_size) -- / s->blocksize; -+ uint64_t max_transfer = calculate_max_transfer(s); - stl_be_p(&r->buf[8], max_transfer); - /* Also take care of the opt xfer len. */ - stl_be_p(&r->buf[12], -@@ -230,7 +237,7 @@ static int scsi_generic_emulate_block_limits(SCSIGenericReq *r, SCSIDevice *s) - uint8_t buf[64]; - - SCSIBlockLimits bl = { -- .max_io_sectors = blk_get_max_transfer(s->conf.blk) / s->blocksize -+ .max_io_sectors = calculate_max_transfer(s), - }; - - memset(r->buf, 0, r->buflen); --- -2.31.1 - diff --git a/SOURCES/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch b/SOURCES/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch new file mode 100644 index 0000000..ca61286 --- /dev/null +++ b/SOURCES/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch @@ -0,0 +1,176 @@ +From 0a4f5bcc2a6f8ac31431e971c1dce9e6ab2191c2 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 21 Feb 2023 16:22:16 -0500 +Subject: [PATCH 01/12] scsi: protect req->aiocb with AioContext lock + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread +RH-Bugzilla: 2155748 +RH-Acked-by: Eric Blake +RH-Acked-by: Kevin Wolf +RH-Acked-by: Laszlo Ersek +RH-Commit: [1/3] 61727297bd31dfe18220b61f1d265ced0649c60d (stefanha/centos-stream-qemu-kvm) + +If requests are being processed in the IOThread when a SCSIDevice is +unplugged, scsi_device_purge_requests() -> scsi_req_cancel_async() races +with I/O completion callbacks. Both threads load and store req->aiocb. +This can lead to assert(r->req.aiocb == NULL) failures and undefined +behavior. + +Protect r->req.aiocb with the AioContext lock to prevent the race. + +Reviewed-by: Eric Blake +Reviewed-by: Kevin Wolf +Signed-off-by: Stefan Hajnoczi +Message-Id: <20230221212218.1378734-2-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 7b7fc3d0102dafe8eb44802493036a526e921a71) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/scsi-disk.c | 23 ++++++++++++++++------- + hw/scsi/scsi-generic.c | 11 ++++++----- + 2 files changed, 22 insertions(+), 12 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index e493c28814..5327f93f4c 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -273,9 +273,11 @@ static void scsi_aio_complete(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + if (scsi_disk_req_check_error(r, ret, true)) { + goto done; + } +@@ -357,10 +359,11 @@ static void scsi_dma_complete(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } else { +@@ -393,10 +396,11 @@ static void scsi_read_complete(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } else { +@@ -446,10 +450,11 @@ static void scsi_do_read_cb(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert (r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } else { +@@ -530,10 +535,11 @@ static void scsi_write_complete(void * opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert (r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } else { +@@ -1737,10 +1743,11 @@ static void scsi_unmap_complete(void *opaque, int ret) + SCSIDiskReq *r = data->r; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); + if (scsi_disk_req_check_error(r, ret, true)) { + scsi_req_unref(&r->req); + g_free(data); +@@ -1816,9 +1823,11 @@ static void scsi_write_same_complete(void *opaque, int ret) + SCSIDiskReq *r = data->r; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + if (scsi_disk_req_check_error(r, ret, true)) { + goto done; + } +diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c +index 92cce20a4d..ac9fa662b4 100644 +--- a/hw/scsi/scsi-generic.c ++++ b/hw/scsi/scsi-generic.c +@@ -111,10 +111,11 @@ static void scsi_command_complete(void *opaque, int ret) + SCSIGenericReq *r = (SCSIGenericReq *)opaque; + SCSIDevice *s = r->req.dev; + ++ aio_context_acquire(blk_get_aio_context(s->conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->conf.blk)); + scsi_command_complete_noio(r, ret); + aio_context_release(blk_get_aio_context(s->conf.blk)); + } +@@ -269,11 +270,11 @@ static void scsi_read_complete(void * opaque, int ret) + SCSIDevice *s = r->req.dev; + int len; + ++ aio_context_acquire(blk_get_aio_context(s->conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->conf.blk)); +- + if (ret || r->req.io_canceled) { + scsi_command_complete_noio(r, ret); + goto done; +@@ -386,11 +387,11 @@ static void scsi_write_complete(void * opaque, int ret) + + trace_scsi_generic_write_complete(ret); + ++ aio_context_acquire(blk_get_aio_context(s->conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->conf.blk)); +- + if (ret || r->req.io_canceled) { + scsi_command_complete_noio(r, ret); + goto done; +-- +2.39.1 + diff --git a/SOURCES/kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch b/SOURCES/kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch new file mode 100644 index 0000000..a8e3957 --- /dev/null +++ b/SOURCES/kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch @@ -0,0 +1,159 @@ +From 5defda06ec4c24818a34126c5048be5e274b63f5 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:04 +0100 +Subject: [PATCH 22/31] stream: Replace subtree drain with a single node drain + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [10/16] a93250b1f6ef296e903df0ba5d8b29bc2ed540a8 (sgarzarella/qemu-kvm-c-9-s) + +The subtree drain was introduced in commit b1e1af394d9 as a way to avoid +graph changes between finding the base node and changing the block graph +as necessary on completion of the image streaming job. + +The block graph could change between these two points because +bdrv_set_backing_hd() first drains the parent node, which involved +polling and can do anything. + +Subtree draining was an imperfect way to make this less likely (because +with it, fewer callbacks are called during this window). Everyone agreed +that it's not really the right solution, and it was only committed as a +stopgap solution. + +This replaces the subtree drain with a solution that simply drains the +parent node before we try to find the base node, and then call a version +of bdrv_set_backing_hd() that doesn't drain, but just asserts that the +parent node is already drained. + +This way, any graph changes caused by draining happen before we start +looking at the graph and things stay consistent between finding the base +node and changing the graph. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-10-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 92140b9f3f07d80e2c27edcc6e32f392be2135e6) +Signed-off-by: Stefano Garzarella +--- + block.c | 17 ++++++++++++++--- + block/stream.c | 26 ++++++++++++++++---------- + include/block/block-global-state.h | 3 +++ + 3 files changed, 33 insertions(+), 13 deletions(-) + +diff --git a/block.c b/block.c +index b3449a312e..5330e89903 100644 +--- a/block.c ++++ b/block.c +@@ -3403,14 +3403,15 @@ static int bdrv_set_backing_noperm(BlockDriverState *bs, + return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp); + } + +-int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, +- Error **errp) ++int bdrv_set_backing_hd_drained(BlockDriverState *bs, ++ BlockDriverState *backing_hd, ++ Error **errp) + { + int ret; + Transaction *tran = tran_new(); + + GLOBAL_STATE_CODE(); +- bdrv_drained_begin(bs); ++ assert(bs->quiesce_counter > 0); + + ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp); + if (ret < 0) { +@@ -3420,7 +3421,17 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + ret = bdrv_refresh_perms(bs, errp); + out: + tran_finalize(tran, ret); ++ return ret; ++} + ++int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, ++ Error **errp) ++{ ++ int ret; ++ GLOBAL_STATE_CODE(); ++ ++ bdrv_drained_begin(bs); ++ ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp); + bdrv_drained_end(bs); + + return ret; +diff --git a/block/stream.c b/block/stream.c +index 694709bd25..8744ad103f 100644 +--- a/block/stream.c ++++ b/block/stream.c +@@ -64,13 +64,16 @@ static int stream_prepare(Job *job) + bdrv_cor_filter_drop(s->cor_filter_bs); + s->cor_filter_bs = NULL; + +- bdrv_subtree_drained_begin(s->above_base); ++ /* ++ * bdrv_set_backing_hd() requires that unfiltered_bs is drained. Drain ++ * already here and use bdrv_set_backing_hd_drained() instead because ++ * the polling during drained_begin() might change the graph, and if we do ++ * this only later, we may end up working with the wrong base node (or it ++ * might even have gone away by the time we want to use it). ++ */ ++ bdrv_drained_begin(unfiltered_bs); + + base = bdrv_filter_or_cow_bs(s->above_base); +- if (base) { +- bdrv_ref(base); +- } +- + unfiltered_base = bdrv_skip_filters(base); + + if (bdrv_cow_child(unfiltered_bs)) { +@@ -82,7 +85,13 @@ static int stream_prepare(Job *job) + } + } + +- bdrv_set_backing_hd(unfiltered_bs, base, &local_err); ++ bdrv_set_backing_hd_drained(unfiltered_bs, base, &local_err); ++ ++ /* ++ * This call will do I/O, so the graph can change again from here on. ++ * We have already completed the graph change, so we are not in danger ++ * of operating on the wrong node any more if this happens. ++ */ + ret = bdrv_change_backing_file(unfiltered_bs, base_id, base_fmt, false); + if (local_err) { + error_report_err(local_err); +@@ -92,10 +101,7 @@ static int stream_prepare(Job *job) + } + + out: +- if (base) { +- bdrv_unref(base); +- } +- bdrv_subtree_drained_end(s->above_base); ++ bdrv_drained_end(unfiltered_bs); + return ret; + } + +diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h +index c7bd4a2088..00e0cf8aea 100644 +--- a/include/block/block-global-state.h ++++ b/include/block/block-global-state.h +@@ -82,6 +82,9 @@ int bdrv_open_file_child(const char *filename, + BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp); + int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + Error **errp); ++int bdrv_set_backing_hd_drained(BlockDriverState *bs, ++ BlockDriverState *backing_hd, ++ Error **errp); + int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, + const char *bdref_key, Error **errp); + BlockDriverState *bdrv_open(const char *filename, const char *reference, +-- +2.31.1 + diff --git a/SOURCES/kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch b/SOURCES/kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch deleted file mode 100644 index 8fd2e16..0000000 --- a/SOURCES/kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 74b3e92dcb9e343e135a681259514b4fd28086ea Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Fri, 6 May 2022 15:25:09 +0200 -Subject: [PATCH 4/5] sysemu: tpm: Add a stub function for TPM_IS_CRB - -RH-Author: Eric Auger -RH-MergeRequest: 84: vfio/common: Remove spurious tpm-crb-cmd misalignment warning -RH-Commit: [1/2] 0ab55ca1aa12a3a7cbdef5a378928f75e030e536 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2037612 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cornelia Huck -RH-Acked-by: Andrew Jones - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2037612 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45166961 -Upstream Status: YES -Tested: With TPM-CRB and VFIO - -In a subsequent patch, VFIO will need to recognize if -a memory region owner is a TPM CRB device. Hence VFIO -needs to use TPM_IS_CRB() even if CONFIG_TPM is unset. So -let's add a stub function. - -Signed-off-by: Eric Auger -Suggested-by: Cornelia Huck -Reviewed-by: Stefan Berger -Link: https://lore.kernel.org/r/20220506132510.1847942-2-eric.auger@redhat.com -Signed-off-by: Alex Williamson -(cherry picked from commit 4168cdad398843ed53d650a27651868b4d3e21c9) -Signed-off-by: Eric Auger ---- - include/sysemu/tpm.h | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h -index 68b2206463..fb40e30ff6 100644 ---- a/include/sysemu/tpm.h -+++ b/include/sysemu/tpm.h -@@ -80,6 +80,12 @@ static inline TPMVersion tpm_get_version(TPMIf *ti) - #define tpm_init() (0) - #define tpm_cleanup() - -+/* needed for an alignment check in non-tpm code */ -+static inline Object *TPM_IS_CRB(Object *obj) -+{ -+ return NULL; -+} -+ - #endif /* CONFIG_TPM */ - - #endif /* QEMU_TPM_H */ --- -2.31.1 - diff --git a/SOURCES/kvm-target-arm-deprecate-named-CPU-models.patch b/SOURCES/kvm-target-arm-deprecate-named-CPU-models.patch deleted file mode 100644 index dbe8d24..0000000 --- a/SOURCES/kvm-target-arm-deprecate-named-CPU-models.patch +++ /dev/null @@ -1,129 +0,0 @@ -From 1f8528b71d96c01dd6106f11681f4a4e2776ef5f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Mon, 21 Mar 2022 12:05:42 +0000 -Subject: [PATCH 06/18] target/arm: deprecate named CPU models -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Daniel P. Berrangé -RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models -RH-Commit: [6/6] afddeb9e898206fd04499f01c48caf7dc1a8b8ef (berrange/centos-src-qemu) -RH-Bugzilla: 2060839 -RH-Acked-by: Thomas Huth -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cornelia Huck - -KVM requires use of the 'host' CPU model, so named CPU models are only -needed for TCG. Since we don't consider TCG to be supported we can -deprecate all the named CPU models. TCG users can rely on 'max' model. - -Note: this has the effect of deprecating the default built-in CPU -model 'cortex-a57'. Applications using QEMU are expected to make an -explicit choice about which CPU model they want, since no builtin -default can suit all purposes. - -https://bugzilla.redhat.com/show_bug.cgi?id=2060839 -Signed-off-by: Daniel P. Berrangé ---- - target/arm/cpu-qom.h | 1 + - target/arm/cpu.c | 5 +++++ - target/arm/cpu.h | 2 ++ - target/arm/cpu64.c | 8 +++++++- - target/arm/helper.c | 2 ++ - 5 files changed, 17 insertions(+), 1 deletion(-) - -diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h -index 64c44cef2d..82e97249bc 100644 ---- a/target/arm/cpu-qom.h -+++ b/target/arm/cpu-qom.h -@@ -35,6 +35,7 @@ typedef struct ARMCPUInfo { - const char *name; - void (*initfn)(Object *obj); - void (*class_init)(ObjectClass *oc, void *data); -+ const char *deprecation_note; - } ARMCPUInfo; - - void arm_cpu_register(const ARMCPUInfo *info); -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 5d4ca7a227..c74b0fb462 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -2105,8 +2105,13 @@ static void arm_cpu_instance_init(Object *obj) - static void cpu_register_class_init(ObjectClass *oc, void *data) - { - ARMCPUClass *acc = ARM_CPU_CLASS(oc); -+ CPUClass *cc = CPU_CLASS(oc); - - acc->info = data; -+ -+ if (acc->info->deprecation_note) { -+ cc->deprecation_note = acc->info->deprecation_note; -+ } - } - - void arm_cpu_register(const ARMCPUInfo *info) -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 23879de5fa..c0c9f680e5 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -33,6 +33,8 @@ - #define KVM_HAVE_MCE_INJECTION 1 - #endif - -+#define RHEL_CPU_DEPRECATION "use 'host' / 'max'" -+ - #define EXCP_UDEF 1 /* undefined instruction */ - #define EXCP_SWI 2 /* software interrupt */ - #define EXCP_PREFETCH_ABORT 3 -diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index e80b831073..c8f152891c 100644 ---- a/target/arm/cpu64.c -+++ b/target/arm/cpu64.c -@@ -975,7 +975,8 @@ static void aarch64_a64fx_initfn(Object *obj) - #endif /* disabled for RHEL */ - - static const ARMCPUInfo aarch64_cpus[] = { -- { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, -+ { .name = "cortex-a57", .initfn = aarch64_a57_initfn, -+ .deprecation_note = RHEL_CPU_DEPRECATION }, - #if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, - { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, -@@ -1052,8 +1053,13 @@ static void aarch64_cpu_instance_init(Object *obj) - static void cpu_register_class_init(ObjectClass *oc, void *data) - { - ARMCPUClass *acc = ARM_CPU_CLASS(oc); -+ CPUClass *cc = CPU_CLASS(oc); - - acc->info = data; -+ -+ if (acc->info->deprecation_note) { -+ cc->deprecation_note = acc->info->deprecation_note; -+ } - } - - void aarch64_cpu_register(const ARMCPUInfo *info) -diff --git a/target/arm/helper.c b/target/arm/helper.c -index 7d14650615..3d34f63e49 100644 ---- a/target/arm/helper.c -+++ b/target/arm/helper.c -@@ -8560,6 +8560,7 @@ void arm_cpu_list(void) - static void arm_cpu_add_definition(gpointer data, gpointer user_data) - { - ObjectClass *oc = data; -+ CPUClass *cc = CPU_CLASS(oc); - CpuDefinitionInfoList **cpu_list = user_data; - CpuDefinitionInfo *info; - const char *typename; -@@ -8569,6 +8570,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) - info->name = g_strndup(typename, - strlen(typename) - strlen("-" TYPE_ARM_CPU)); - info->q_typename = g_strdup(typename); -+ info->deprecated = !!cc->deprecation_note; - - QAPI_LIST_PREPEND(*cpu_list, info); - } --- -2.35.3 - diff --git a/SOURCES/kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch b/SOURCES/kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch new file mode 100644 index 0000000..52e73e7 --- /dev/null +++ b/SOURCES/kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch @@ -0,0 +1,144 @@ +From e419493e6ec188461aa6f06c1b1cdc8a698859df Mon Sep 17 00:00:00 2001 +From: Richard Henderson +Date: Sat, 14 Jan 2023 15:21:03 -1000 +Subject: [PATCH 6/8] target/i386: Fix 32-bit AD[CO]X insns in 64-bit mode +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions +RH-Bugzilla: 2173590 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Bandan Das +RH-Commit: [6/7] 0fa4d3858319d4f877a5b3f31776121a72e2c57a (bonzini/rhel-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 +Upstream-Status: merged + +Failure to truncate the inputs results in garbage for the carry-out. + +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1373 +Signed-off-by: Richard Henderson +Reviewed-by: Philippe Mathieu-Daudé +Message-Id: <20230115012103.3131796-1-richard.henderson@linaro.org> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 6fbef9426bac7184b5d5887589d8386e732865eb) +--- + target/i386/tcg/emit.c.inc | 2 + + tests/tcg/x86_64/Makefile.target | 3 ++ + tests/tcg/x86_64/adox.c | 69 ++++++++++++++++++++++++++++++++ + 3 files changed, 74 insertions(+) + create mode 100644 tests/tcg/x86_64/adox.c + +diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc +index 0d7c6e80ae..e61ae9a2e9 100644 +--- a/target/i386/tcg/emit.c.inc ++++ b/target/i386/tcg/emit.c.inc +@@ -1037,6 +1037,8 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) + #ifdef TARGET_X86_64 + case MO_32: + /* If TL is 64-bit just do everything in 64-bit arithmetic. */ ++ tcg_gen_ext32u_tl(s->T0, s->T0); ++ tcg_gen_ext32u_tl(s->T1, s->T1); + tcg_gen_add_i64(s->T0, s->T0, s->T1); + tcg_gen_add_i64(s->T0, s->T0, carry_in); + tcg_gen_shri_i64(carry_out, s->T0, 32); +diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target +index 4eac78293f..e64aab1b81 100644 +--- a/tests/tcg/x86_64/Makefile.target ++++ b/tests/tcg/x86_64/Makefile.target +@@ -12,11 +12,14 @@ ifeq ($(filter %-linux-user, $(TARGET)),$(TARGET)) + X86_64_TESTS += vsyscall + X86_64_TESTS += noexec + X86_64_TESTS += cmpxchg ++X86_64_TESTS += adox + TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64 + else + TESTS=$(MULTIARCH_TESTS) + endif + ++adox: CFLAGS=-O2 ++ + run-test-i386-ssse3: QEMU_OPTS += -cpu max + run-plugin-test-i386-ssse3-%: QEMU_OPTS += -cpu max + +diff --git a/tests/tcg/x86_64/adox.c b/tests/tcg/x86_64/adox.c +new file mode 100644 +index 0000000000..36be644c8b +--- /dev/null ++++ b/tests/tcg/x86_64/adox.c +@@ -0,0 +1,69 @@ ++/* See if ADOX give expected results */ ++ ++#include ++#include ++#include ++ ++static uint64_t adoxq(bool *c_out, uint64_t a, uint64_t b, bool c) ++{ ++ asm ("addl $0x7fffffff, %k1\n\t" ++ "adoxq %2, %0\n\t" ++ "seto %b1" ++ : "+r"(a), "=&r"(c) : "r"(b), "1"((int)c)); ++ *c_out = c; ++ return a; ++} ++ ++static uint64_t adoxl(bool *c_out, uint64_t a, uint64_t b, bool c) ++{ ++ asm ("addl $0x7fffffff, %k1\n\t" ++ "adoxl %k2, %k0\n\t" ++ "seto %b1" ++ : "+r"(a), "=&r"(c) : "r"(b), "1"((int)c)); ++ *c_out = c; ++ return a; ++} ++ ++int main() ++{ ++ uint64_t r; ++ bool c; ++ ++ r = adoxq(&c, 0, 0, 0); ++ assert(r == 0); ++ assert(c == 0); ++ ++ r = adoxl(&c, 0, 0, 0); ++ assert(r == 0); ++ assert(c == 0); ++ ++ r = adoxl(&c, 0x100000000, 0, 0); ++ assert(r == 0); ++ assert(c == 0); ++ ++ r = adoxq(&c, 0, 0, 1); ++ assert(r == 1); ++ assert(c == 0); ++ ++ r = adoxl(&c, 0, 0, 1); ++ assert(r == 1); ++ assert(c == 0); ++ ++ r = adoxq(&c, -1, -1, 0); ++ assert(r == -2); ++ assert(c == 1); ++ ++ r = adoxl(&c, -1, -1, 0); ++ assert(r == 0xfffffffe); ++ assert(c == 1); ++ ++ r = adoxq(&c, -1, -1, 1); ++ assert(r == -1); ++ assert(c == 1); ++ ++ r = adoxl(&c, -1, -1, 1); ++ assert(r == 0xffffffff); ++ assert(c == 1); ++ ++ return 0; ++} +-- +2.39.1 + diff --git a/SOURCES/kvm-target-i386-Fix-BEXTR-instruction.patch b/SOURCES/kvm-target-i386-Fix-BEXTR-instruction.patch new file mode 100644 index 0000000..0c28c7e --- /dev/null +++ b/SOURCES/kvm-target-i386-Fix-BEXTR-instruction.patch @@ -0,0 +1,110 @@ +From a019c203f0148e5fbb20e102a17453806f5296b6 Mon Sep 17 00:00:00 2001 +From: Richard Henderson +Date: Sat, 14 Jan 2023 13:05:42 -1000 +Subject: [PATCH 3/8] target/i386: Fix BEXTR instruction + +RH-Author: Paolo Bonzini +RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions +RH-Bugzilla: 2173590 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Bandan Das +RH-Commit: [3/7] bd1e3b26c72d7152b44be2d34308fd40dc106424 (bonzini/rhel-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 +Upstream-Status: merged + +There were two problems here: not limiting the input to operand bits, +and not correctly handling large extraction length. + +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1372 +Signed-off-by: Richard Henderson +Message-Id: <20230114230542.3116013-3-richard.henderson@linaro.org> +Cc: qemu-stable@nongnu.org +Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18) +Signed-off-by: Paolo Bonzini +(cherry picked from commit b14c0098975264ed03144f145bca0179a6763a07) +--- + target/i386/tcg/emit.c.inc | 22 +++++++++++----------- + tests/tcg/i386/test-i386-bmi2.c | 12 ++++++++++++ + 2 files changed, 23 insertions(+), 11 deletions(-) + +diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc +index 7037ff91c6..99f6ba6e19 100644 +--- a/target/i386/tcg/emit.c.inc ++++ b/target/i386/tcg/emit.c.inc +@@ -1078,30 +1078,30 @@ static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) + static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) + { + MemOp ot = decode->op[0].ot; +- TCGv bound, zero; ++ TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); ++ TCGv zero = tcg_constant_tl(0); ++ TCGv mone = tcg_constant_tl(-1); + + /* + * Extract START, and shift the operand. + * Shifts larger than operand size get zeros. + */ + tcg_gen_ext8u_tl(s->A0, s->T1); ++ if (TARGET_LONG_BITS == 64 && ot == MO_32) { ++ tcg_gen_ext32u_tl(s->T0, s->T0); ++ } + tcg_gen_shr_tl(s->T0, s->T0, s->A0); + +- bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); +- zero = tcg_constant_tl(0); + tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, s->T0, zero); + + /* +- * Extract the LEN into a mask. Lengths larger than +- * operand size get all ones. ++ * Extract the LEN into an inverse mask. Lengths larger than ++ * operand size get all zeros, length 0 gets all ones. + */ + tcg_gen_extract_tl(s->A0, s->T1, 8, 8); +- tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound, s->A0, bound); +- +- tcg_gen_movi_tl(s->T1, 1); +- tcg_gen_shl_tl(s->T1, s->T1, s->A0); +- tcg_gen_subi_tl(s->T1, s->T1, 1); +- tcg_gen_and_tl(s->T0, s->T0, s->T1); ++ tcg_gen_shl_tl(s->T1, mone, s->A0); ++ tcg_gen_movcond_tl(TCG_COND_LEU, s->T1, s->A0, bound, s->T1, zero); ++ tcg_gen_andc_tl(s->T0, s->T0, s->T1); + + gen_op_update1_cc(s); + set_cc_op(s, CC_OP_LOGICB + ot); +diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c +index 3c3ef85513..982d4abda4 100644 +--- a/tests/tcg/i386/test-i386-bmi2.c ++++ b/tests/tcg/i386/test-i386-bmi2.c +@@ -99,6 +99,9 @@ int main(int argc, char *argv[]) { + result = bextrq(mask, 0x10f8); + assert(result == 0); + ++ result = bextrq(0xfedcba9876543210ull, 0x7f00); ++ assert(result == 0xfedcba9876543210ull); ++ + result = blsiq(0x30); + assert(result == 0x10); + +@@ -164,6 +167,15 @@ int main(int argc, char *argv[]) { + result = bextrl(mask, 0x1038); + assert(result == 0); + ++ result = bextrl((reg_t)0x8f635a775ad3b9b4ull, 0x3018); ++ assert(result == 0x5a); ++ ++ result = bextrl((reg_t)0xfedcba9876543210ull, 0x7f00); ++ assert(result == 0x76543210u); ++ ++ result = bextrl(-1, 0); ++ assert(result == 0); ++ + result = blsil(0xffff); + assert(result == 1); + +-- +2.39.1 + diff --git a/SOURCES/kvm-target-i386-Fix-BZHI-instruction.patch b/SOURCES/kvm-target-i386-Fix-BZHI-instruction.patch new file mode 100644 index 0000000..bcf79f4 --- /dev/null +++ b/SOURCES/kvm-target-i386-Fix-BZHI-instruction.patch @@ -0,0 +1,77 @@ +From d49e5d193dfccf6f5cfa98ccce5bd491478d563d Mon Sep 17 00:00:00 2001 +From: Richard Henderson +Date: Sat, 14 Jan 2023 13:32:06 -1000 +Subject: [PATCH 7/8] target/i386: Fix BZHI instruction + +RH-Author: Paolo Bonzini +RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions +RH-Bugzilla: 2173590 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Bandan Das +RH-Commit: [7/7] ad6b343c09c0304ac32cc68670c49d1fc12d8cf8 (bonzini/rhel-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 +Upstream-Status: merged + +We did not correctly handle N >= operand size. + +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1374 +Signed-off-by: Richard Henderson +Message-Id: <20230114233206.3118472-1-richard.henderson@linaro.org> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 9ad2ba6e8e7fc195d0dd0b76ab38bd2fceb1bdd4) +--- + target/i386/tcg/emit.c.inc | 14 +++++++------- + tests/tcg/i386/test-i386-bmi2.c | 3 +++ + 2 files changed, 10 insertions(+), 7 deletions(-) + +diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc +index e61ae9a2e9..0d01e13002 100644 +--- a/target/i386/tcg/emit.c.inc ++++ b/target/i386/tcg/emit.c.inc +@@ -1147,20 +1147,20 @@ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) + static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) + { + MemOp ot = decode->op[0].ot; +- TCGv bound; ++ TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); ++ TCGv zero = tcg_constant_tl(0); ++ TCGv mone = tcg_constant_tl(-1); + +- tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]); +- bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); ++ tcg_gen_ext8u_tl(s->T1, s->T1); + + /* + * Note that since we're using BMILG (in order to get O + * cleared) we need to store the inverse into C. + */ +- tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src, s->T1, bound); +- tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1, bound, bound, s->T1); ++ tcg_gen_setcond_tl(TCG_COND_LEU, cpu_cc_src, s->T1, bound); + +- tcg_gen_movi_tl(s->A0, -1); +- tcg_gen_shl_tl(s->A0, s->A0, s->T1); ++ tcg_gen_shl_tl(s->A0, mone, s->T1); ++ tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->T1, bound, s->A0, zero); + tcg_gen_andc_tl(s->T0, s->T0, s->A0); + + gen_op_update1_cc(s); +diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c +index 982d4abda4..0244df7987 100644 +--- a/tests/tcg/i386/test-i386-bmi2.c ++++ b/tests/tcg/i386/test-i386-bmi2.c +@@ -123,6 +123,9 @@ int main(int argc, char *argv[]) { + result = bzhiq(mask, 0x1f); + assert(result == (mask & ~(-1 << 30))); + ++ result = bzhiq(mask, 0x40); ++ assert(result == mask); ++ + result = rorxq(0x2132435465768798, 8); + assert(result == 0x9821324354657687); + +-- +2.39.1 + diff --git a/SOURCES/kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch b/SOURCES/kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch new file mode 100644 index 0000000..7f3051f --- /dev/null +++ b/SOURCES/kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch @@ -0,0 +1,60 @@ +From cb2b591e1677db2837810eaedac534a7ff3a7b1c Mon Sep 17 00:00:00 2001 +From: Richard Henderson +Date: Sat, 14 Jan 2023 08:06:01 -1000 +Subject: [PATCH 4/8] target/i386: Fix C flag for BLSI, BLSMSK, BLSR + +RH-Author: Paolo Bonzini +RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions +RH-Bugzilla: 2173590 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Bandan Das +RH-Commit: [4/7] 173e23c492c830da6c5a4be0cfc20a69ac655b59 (bonzini/rhel-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 +Upstream-Status: merged + +We forgot to set cc_src, which is used for computing C. + +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1370 +Signed-off-by: Richard Henderson +Message-Id: <20230114180601.2993644-1-richard.henderson@linaro.org> +Cc: qemu-stable@nongnu.org +Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18) +Signed-off-by: Paolo Bonzini +(cherry picked from commit 99282098dc74c2055bde5652bde6cf0067d0c370) +--- + target/i386/tcg/emit.c.inc | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc +index 99f6ba6e19..4d7702c106 100644 +--- a/target/i386/tcg/emit.c.inc ++++ b/target/i386/tcg/emit.c.inc +@@ -1111,6 +1111,7 @@ static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) + { + MemOp ot = decode->op[0].ot; + ++ tcg_gen_mov_tl(cpu_cc_src, s->T0); + tcg_gen_neg_tl(s->T1, s->T0); + tcg_gen_and_tl(s->T0, s->T0, s->T1); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); +@@ -1121,6 +1122,7 @@ static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode + { + MemOp ot = decode->op[0].ot; + ++ tcg_gen_mov_tl(cpu_cc_src, s->T0); + tcg_gen_subi_tl(s->T1, s->T0, 1); + tcg_gen_xor_tl(s->T0, s->T0, s->T1); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); +@@ -1131,6 +1133,7 @@ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) + { + MemOp ot = decode->op[0].ot; + ++ tcg_gen_mov_tl(cpu_cc_src, s->T0); + tcg_gen_subi_tl(s->T1, s->T0, 1); + tcg_gen_and_tl(s->T0, s->T0, s->T1); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); +-- +2.39.1 + diff --git a/SOURCES/kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch b/SOURCES/kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch deleted file mode 100644 index d63bfdb..0000000 --- a/SOURCES/kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch +++ /dev/null @@ -1,273 +0,0 @@ -From 577b04770e47aed0f88acb4a415ed04ddbe087f1 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Thu, 17 Mar 2022 17:59:22 +0000 -Subject: [PATCH 04/18] target/i386: deprecate CPUs older than x86_64-v2 ABI -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Daniel P. Berrangé -RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models -RH-Commit: [4/6] 71f6043f11b31ffa841a2e14d24972e571c18a9e (berrange/centos-src-qemu) -RH-Bugzilla: 2060839 -RH-Acked-by: Thomas Huth -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cornelia Huck - -RHEL-9 is compiled with the x86_64-v2 ABI. We use this as a baseline to -select which CPUs we want to support, such that there is at least one -supported guest CPU that can be launched for every physical machine -capable of running RHEL-9 KVM. - -Supported CPUs: - - * QEMU models - - base (QEMU internal) - host (host passthrough) - max (host passthrough for KVM, - all emulated features for TCG) - - * Intel models - - Icelake-Server - Icelake-Server-noTSX - Cascadelake-Server (2019) - Cascadelake-Server-noTSX (2019) - Skylake-Server (2016) - Skylake-Server-IBRS (2016) - Skylake-Server-noTSX-IBRS (2016) - Skylake-Client (2015) - Skylake-Client-IBRS (2015) - Skylake-Client-noTSX-IBRS (2015) - Broadwell (2014) - Broadwell-IBRS (2014) - Broadwell-noTSX (2014) - Broadwell-noTSX-IBRS (2014) - Haswell (2013) - Haswell-IBRS (2013) - Haswell-noTSX (2013) - Haswell-noTSX-IBRS (2013) - IvyBridge (2012) - IvyBridge-IBRS (2012) - SandyBridge (2011) - SandyBridge-IBRS (2011) - Westmere (2010) - Westmere-IBRS (2010) - Nehalem (2008) - Nehalem-IBRS (2008) - - Cooperlake (2020) - Snowridge (2019) - KnightsMill (2017) - Denverton (2016) - - * AMD models - - EPYC-Milan (2021) - EPYC-Rome (2019) - EPYC (2017) - EPYC-IBPB (2017) - Opteron_G5 (2012) - Opteron_G4 (2011) - - * Other - - Dhyana (2018) - -(I've omitted the many -vNNN versions for brevity) - -Deprecated CPUs: - - 486 - athlon - Conroe - core2duo - coreduo - Icelake-Client (already deprecated upstream) - Icelake-Client-noTSX (already deprecated upstream) - kvm32 - kvm64 - n270 - Opteron_G1 - Opteron_G2 - Opteron_G3 - Penryn - pentium2 - pentium3 - pentium - phenom - qemu32 - qemu64 - -The deprecated CPU models are subject to removal in a future -major version of RHEL. - -Note: this has the effect of deprecating the default built-in CPU -model 'qemu64'. Applications using QEMU are expected to make an -explicit choice about which CPU model they want, since no builtin -default can suit all purposes. - -https://bugzilla.redhat.com/show_bug.cgi?id=2060839 -Signed-off-by: Daniel P. Berrangé ---- - target/i386/cpu.c | 21 +++++++++++++++++++++ - 1 file changed, 21 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index cb6b5467d0..87cb641b5f 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1780,9 +1780,13 @@ static const CPUCaches epyc_milan_cache_info = { - * PT in VMX operation - */ - -+#define RHEL_CPU_DEPRECATION \ -+ "use at least 'Nehalem' / 'Opteron_G4', or 'host' / 'max'" -+ - static const X86CPUDefinition builtin_x86_defs[] = { - { - .name = "qemu64", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 0xd, - .vendor = CPUID_VENDOR_AMD, - .family = 15, -@@ -1803,6 +1807,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "phenom", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 5, - .vendor = CPUID_VENDOR_AMD, - .family = 16, -@@ -1835,6 +1840,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "core2duo", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 10, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, -@@ -1877,6 +1883,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "kvm64", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 0xd, - .vendor = CPUID_VENDOR_INTEL, - .family = 15, -@@ -1918,6 +1925,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "qemu32", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 4, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, -@@ -1932,6 +1940,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "kvm32", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 5, - .vendor = CPUID_VENDOR_INTEL, - .family = 15, -@@ -1962,6 +1971,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "coreduo", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 10, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, -@@ -1995,6 +2005,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "486", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 1, - .vendor = CPUID_VENDOR_INTEL, - .family = 4, -@@ -2007,6 +2018,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "pentium", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 1, - .vendor = CPUID_VENDOR_INTEL, - .family = 5, -@@ -2019,6 +2031,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "pentium2", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 2, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, -@@ -2031,6 +2044,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "pentium3", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 3, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, -@@ -2043,6 +2057,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "athlon", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 2, - .vendor = CPUID_VENDOR_AMD, - .family = 6, -@@ -2058,6 +2073,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "n270", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 10, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, -@@ -2083,6 +2099,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "Conroe", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 10, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, -@@ -2123,6 +2140,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "Penryn", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 10, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, -@@ -3832,6 +3850,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "Opteron_G1", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 5, - .vendor = CPUID_VENDOR_AMD, - .family = 15, -@@ -3852,6 +3871,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "Opteron_G2", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 5, - .vendor = CPUID_VENDOR_AMD, - .family = 15, -@@ -3874,6 +3894,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "Opteron_G3", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 5, - .vendor = CPUID_VENDOR_AMD, - .family = 16, --- -2.35.3 - diff --git a/SOURCES/kvm-target-i386-fix-ADOX-followed-by-ADCX.patch b/SOURCES/kvm-target-i386-fix-ADOX-followed-by-ADCX.patch new file mode 100644 index 0000000..72ae8ee --- /dev/null +++ b/SOURCES/kvm-target-i386-fix-ADOX-followed-by-ADCX.patch @@ -0,0 +1,205 @@ +From 54d3e58aabf9716f9a07aeb7044d7b7997e28123 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 31 Jan 2023 09:48:03 +0100 +Subject: [PATCH 5/8] target/i386: fix ADOX followed by ADCX + +RH-Author: Paolo Bonzini +RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions +RH-Bugzilla: 2173590 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Bandan Das +RH-Commit: [5/7] 64dbe4e602f08e4a88fdeacee5a8993ca4383563 (bonzini/rhel-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 +Upstream-Status: merged + +When ADCX is followed by ADOX or vice versa, the second instruction's +carry comes from EFLAGS and the condition codes use the CC_OP_ADCOX +operation. Retrieving the carry from EFLAGS is handled by this bit +of gen_ADCOX: + + tcg_gen_extract_tl(carry_in, cpu_cc_src, + ctz32(cc_op == CC_OP_ADCX ? CC_C : CC_O), 1); + +Unfortunately, in this case cc_op has been overwritten by the previous +"if" statement to CC_OP_ADCOX. This works by chance when the first +instruction is ADCX; however, if the first instruction is ADOX, +ADCX will incorrectly take its carry from OF instead of CF. + +Fix by moving the computation of the new cc_op at the end of the function. +The included exhaustive test case fails without this patch and passes +afterwards. + +Because ADCX/ADOX need not be invoked through the VEX prefix, this +regression bisects to commit 16fc5726a6e2 ("target/i386: reimplement +0x0f 0x38, add AVX", 2022-10-18). However, the mistake happened a +little earlier, when BMI instructions were rewritten using the new +decoder framework. + +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1471 +Reported-by: Paul Jolly +Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18) +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +(cherry picked from commit 60c7dd22e1383754d5f150bc9f7c2785c662a7b6) +--- + target/i386/tcg/emit.c.inc | 20 +++++---- + tests/tcg/i386/Makefile.target | 6 ++- + tests/tcg/i386/test-i386-adcox.c | 75 ++++++++++++++++++++++++++++++++ + 3 files changed, 91 insertions(+), 10 deletions(-) + create mode 100644 tests/tcg/i386/test-i386-adcox.c + +diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc +index 4d7702c106..0d7c6e80ae 100644 +--- a/target/i386/tcg/emit.c.inc ++++ b/target/i386/tcg/emit.c.inc +@@ -1015,6 +1015,7 @@ VSIB_AVX(VPGATHERQ, vpgatherq) + + static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) + { ++ int opposite_cc_op; + TCGv carry_in = NULL; + TCGv carry_out = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2); + TCGv zero; +@@ -1022,14 +1023,8 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) + if (cc_op == s->cc_op || s->cc_op == CC_OP_ADCOX) { + /* Re-use the carry-out from a previous round. */ + carry_in = carry_out; +- cc_op = s->cc_op; +- } else if (s->cc_op == CC_OP_ADCX || s->cc_op == CC_OP_ADOX) { +- /* Merge with the carry-out from the opposite instruction. */ +- cc_op = CC_OP_ADCOX; +- } +- +- /* If we don't have a carry-in, get it out of EFLAGS. */ +- if (!carry_in) { ++ } else { ++ /* We don't have a carry-in, get it out of EFLAGS. */ + if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) { + gen_compute_eflags(s); + } +@@ -1053,7 +1048,14 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) + tcg_gen_add2_tl(s->T0, carry_out, s->T0, carry_out, s->T1, zero); + break; + } +- set_cc_op(s, cc_op); ++ ++ opposite_cc_op = cc_op == CC_OP_ADCX ? CC_OP_ADOX : CC_OP_ADCX; ++ if (s->cc_op == CC_OP_ADCOX || s->cc_op == opposite_cc_op) { ++ /* Merge with the carry-out from the opposite instruction. */ ++ set_cc_op(s, CC_OP_ADCOX); ++ } else { ++ set_cc_op(s, cc_op); ++ } + } + + static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target +index 81831cafbc..bafd8c2180 100644 +--- a/tests/tcg/i386/Makefile.target ++++ b/tests/tcg/i386/Makefile.target +@@ -14,7 +14,7 @@ config-cc.mak: Makefile + I386_SRCS=$(notdir $(wildcard $(I386_SRC)/*.c)) + ALL_X86_TESTS=$(I386_SRCS:.c=) + SKIP_I386_TESTS=test-i386-ssse3 test-avx test-3dnow test-mmx +-X86_64_TESTS:=$(filter test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS)) ++X86_64_TESTS:=$(filter test-i386-adcox test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS)) + + test-i386-sse-exceptions: CFLAGS += -msse4.1 -mfpmath=sse + run-test-i386-sse-exceptions: QEMU_OPTS += -cpu max +@@ -28,6 +28,10 @@ test-i386-bmi2: CFLAGS=-O2 + run-test-i386-bmi2: QEMU_OPTS += -cpu max + run-plugin-test-i386-bmi2-%: QEMU_OPTS += -cpu max + ++test-i386-adcox: CFLAGS=-O2 ++run-test-i386-adcox: QEMU_OPTS += -cpu max ++run-plugin-test-i386-adcox-%: QEMU_OPTS += -cpu max ++ + # + # hello-i386 is a barebones app + # +diff --git a/tests/tcg/i386/test-i386-adcox.c b/tests/tcg/i386/test-i386-adcox.c +new file mode 100644 +index 0000000000..16169efff8 +--- /dev/null ++++ b/tests/tcg/i386/test-i386-adcox.c +@@ -0,0 +1,75 @@ ++/* See if various BMI2 instructions give expected results */ ++#include ++#include ++#include ++ ++#define CC_C 1 ++#define CC_O (1 << 11) ++ ++#ifdef __x86_64__ ++#define REG uint64_t ++#else ++#define REG uint32_t ++#endif ++ ++void test_adox_adcx(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_operand) ++{ ++ REG flags; ++ REG out_adcx, out_adox; ++ ++ asm("pushf; pop %0" : "=r"(flags)); ++ flags &= ~(CC_C | CC_O); ++ flags |= (in_c ? CC_C : 0); ++ flags |= (in_o ? CC_O : 0); ++ ++ out_adcx = adcx_operand; ++ out_adox = adox_operand; ++ asm("push %0; popf;" ++ "adox %3, %2;" ++ "adcx %3, %1;" ++ "pushf; pop %0" ++ : "+r" (flags), "+r" (out_adcx), "+r" (out_adox) ++ : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox)); ++ ++ assert(out_adcx == in_c + adcx_operand - 1); ++ assert(out_adox == in_o + adox_operand - 1); ++ assert(!!(flags & CC_C) == (in_c || adcx_operand)); ++ assert(!!(flags & CC_O) == (in_o || adox_operand)); ++} ++ ++void test_adcx_adox(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_operand) ++{ ++ REG flags; ++ REG out_adcx, out_adox; ++ ++ asm("pushf; pop %0" : "=r"(flags)); ++ flags &= ~(CC_C | CC_O); ++ flags |= (in_c ? CC_C : 0); ++ flags |= (in_o ? CC_O : 0); ++ ++ out_adcx = adcx_operand; ++ out_adox = adox_operand; ++ asm("push %0; popf;" ++ "adcx %3, %1;" ++ "adox %3, %2;" ++ "pushf; pop %0" ++ : "+r" (flags), "+r" (out_adcx), "+r" (out_adox) ++ : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox)); ++ ++ assert(out_adcx == in_c + adcx_operand - 1); ++ assert(out_adox == in_o + adox_operand - 1); ++ assert(!!(flags & CC_C) == (in_c || adcx_operand)); ++ assert(!!(flags & CC_O) == (in_o || adox_operand)); ++} ++ ++int main(int argc, char *argv[]) { ++ /* try all combinations of input CF, input OF, CF from op1+op2, OF from op2+op1 */ ++ int i; ++ for (i = 0; i <= 15; i++) { ++ printf("%d\n", i); ++ test_adcx_adox(!!(i & 1), !!(i & 2), !!(i & 4), !!(i & 8)); ++ test_adox_adcx(!!(i & 1), !!(i & 2), !!(i & 4), !!(i & 8)); ++ } ++ return 0; ++} ++ +-- +2.39.1 + diff --git a/SOURCES/kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch b/SOURCES/kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch new file mode 100644 index 0000000..81a0003 --- /dev/null +++ b/SOURCES/kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch @@ -0,0 +1,77 @@ +From f4ddcdd2395e0944c20f6683c66068ed0ac7d757 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Sat, 7 Jan 2023 18:14:20 +0100 +Subject: [PATCH 1/8] target/i386: fix operand size of unary SSE operations + +RH-Author: Paolo Bonzini +RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions +RH-Bugzilla: 2173590 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Bandan Das +RH-Commit: [1/7] 7041f3e30e19add6bd8e5355d8bebf92390a5c2e (bonzini/rhel-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 +Upstream-Status: merged + +VRCPSS, VRSQRTSS and VCVTSx2Sx have a 32-bit or 64-bit memory operand, +which is represented in the decoding tables by X86_VEX_REPScalar. Add it +to the tables, and make validate_vex() handle the case of an instruction +that is in exception type 4 without the REP prefix and exception type 5 +with it; this is the cas of VRCP and VRSQRT. + +Reported-by: yongwoo +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1377 +Signed-off-by: Paolo Bonzini +(cherry picked from commit 3d304620ec6c95f31db17acc132f42f243369299) +--- + target/i386/tcg/decode-new.c.inc | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc +index 80c579164f..d5fd8d965c 100644 +--- a/target/i386/tcg/decode-new.c.inc ++++ b/target/i386/tcg/decode-new.c.inc +@@ -105,6 +105,7 @@ + #define vex3 .vex_class = 3, + #define vex4 .vex_class = 4, + #define vex4_unal .vex_class = 4, .vex_special = X86_VEX_SSEUnaligned, ++#define vex4_rep5 .vex_class = 4, .vex_special = X86_VEX_REPScalar, + #define vex5 .vex_class = 5, + #define vex6 .vex_class = 6, + #define vex7 .vex_class = 7, +@@ -839,8 +840,8 @@ static const X86OpEntry opcodes_0F[256] = { + + [0x50] = X86_OP_ENTRY3(MOVMSK, G,y, None,None, U,x, vex7 p_00_66), + [0x51] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), +- [0x52] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex5 p_00_f3), +- [0x53] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex5 p_00_f3), ++ [0x52] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex4_rep5 p_00_f3), ++ [0x53] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex4_rep5 p_00_f3), + [0x54] = X86_OP_ENTRY3(PAND, V,x, H,x, W,x, vex4 p_00_66), /* vand */ + [0x55] = X86_OP_ENTRY3(PANDN, V,x, H,x, W,x, vex4 p_00_66), /* vandn */ + [0x56] = X86_OP_ENTRY3(POR, V,x, H,x, W,x, vex4 p_00_66), /* vor */ +@@ -878,7 +879,7 @@ static const X86OpEntry opcodes_0F[256] = { + + [0x58] = X86_OP_ENTRY3(VADD, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), + [0x59] = X86_OP_ENTRY3(VMUL, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), +- [0x5a] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex3 p_00_66_f3_f2), ++ [0x5a] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), + [0x5b] = X86_OP_GROUP0(0F5B), + [0x5c] = X86_OP_ENTRY3(VSUB, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), + [0x5d] = X86_OP_ENTRY3(VMIN, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), +@@ -1447,9 +1448,9 @@ static bool validate_vex(DisasContext *s, X86DecodedInsn *decode) + * Instructions which differ between 00/66 and F2/F3 in the + * exception classification and the size of the memory operand. + */ +- assert(e->vex_class == 1 || e->vex_class == 2); ++ assert(e->vex_class == 1 || e->vex_class == 2 || e->vex_class == 4); + if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { +- e->vex_class = 3; ++ e->vex_class = e->vex_class < 4 ? 3 : 5; + if (s->vex_l) { + goto illegal; + } +-- +2.39.1 + diff --git a/SOURCES/kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch b/SOURCES/kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch deleted file mode 100644 index 3c80759..0000000 --- a/SOURCES/kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 0423b2a79991c6ae7aa65123e0d4f52294c006ee Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Wed, 18 Jan 2023 11:08:30 -0500 -Subject: [PATCH] target/i386/kvm: fix kvmclock_current_nsec: Assertion - `time.tsc_timestamp <= migration_tsc' failed - -RH-Author: Jon Maloy -RH-MergeRequest: 248: target/i386/kvm: fix kvmclock_current_nsec: Assertion `time.tsc_timestamp <= migration_tsc' failed -RH-Bugzilla: 2134896 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Paolo Bonzini -RH-Commit: [1/1] f7b46dad79581f7751a3f00a52d766207652e048 (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2134896 -Upstream: Merged - -commit c4ef867f2949bf2a2ae18a4e27cf1a34bbc8aecb -Author: Ray Zhang -Date: Thu Sep 22 18:05:23 2022 +0800 - - target/i386/kvm: fix kvmclock_current_nsec: Assertion `time.tsc_timestamp <= migration_tsc' failed - - New KVM_CLOCK flags were added in the kernel.(c68dc1b577eabd5605c6c7c08f3e07ae18d30d5d) - ``` - + #define KVM_CLOCK_VALID_FLAGS \ - + (KVM_CLOCK_TSC_STABLE | KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC) - - case KVM_CAP_ADJUST_CLOCK: - - r = KVM_CLOCK_TSC_STABLE; - + r = KVM_CLOCK_VALID_FLAGS; - ``` - - kvm_has_adjust_clock_stable needs to handle additional flags, - so that s->clock_is_reliable can be true and kvmclock_current_nsec doesn't need to be called. - - Signed-off-by: Ray Zhang - Message-Id: <20220922100523.2362205-1-zhanglei002@gmail.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Jon Maloy ---- - target/i386/kvm/kvm.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c -index ef70e2c85f..2c603df792 100644 ---- a/target/i386/kvm/kvm.c -+++ b/target/i386/kvm/kvm.c -@@ -153,7 +153,7 @@ bool kvm_has_adjust_clock_stable(void) - { - int ret = kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK); - -- return (ret == KVM_CLOCK_TSC_STABLE); -+ return (ret & KVM_CLOCK_TSC_STABLE); - } - - bool kvm_has_adjust_clock(void) --- -2.31.1 - diff --git a/SOURCES/kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch b/SOURCES/kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch deleted file mode 100644 index c940cdb..0000000 --- a/SOURCES/kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 39642d0d37e2ef61ce7fde0bc284d37a365e4482 Mon Sep 17 00:00:00 2001 -From: Murilo Opsfelder Araujo -Date: Mon, 2 May 2022 17:59:11 -0300 -Subject: [PATCH 2/2] target/ppc/cpu-models: Fix ppc_cpu_aliases list for RHEL -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Murilo Opsfelder Araújo -RH-MergeRequest: 81: target/ppc/cpu-models: remove extraneous "#endif" -RH-Commit: [1/1] 5fff003ad3deb84c6a8e69ab90552a31edb3b058 (mopsfelder/centos-stream-src-qemu-kvm) -RH-Bugzilla: 2081022 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Thomas Huth -RH-Acked-by: Laurent Vivier - -The commit b9d28ecdedaf ("Enable/disable devices for RHEL") removed the -"#if 0" from the beginning of the ppc_cpu_aliases list, which broke the -build on ppc64le: - - ../target/ppc/cpu-models.c:904:2: error: #endif without #if - #endif - ^ - 1 error generated. - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2081022 - -Fixes: b9d28ecdedaf (Enable/disable devices for RHEL) -Signed-off-by: Murilo Opsfelder Araujo ---- - target/ppc/cpu-models.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index dd78883410..528467eac1 100644 ---- a/target/ppc/cpu-models.c -+++ b/target/ppc/cpu-models.c -@@ -746,6 +746,7 @@ - /* PowerPC CPU aliases */ - - PowerPCCPUAlias ppc_cpu_aliases[] = { -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - { "405", "405d4" }, - { "405cr", "405crc" }, - { "405gp", "405gpd" }, --- -2.35.1 - diff --git a/SOURCES/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch b/SOURCES/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch new file mode 100644 index 0000000..b9536c3 --- /dev/null +++ b/SOURCES/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch @@ -0,0 +1,50 @@ +From b330bf0a2ad5af73d3c62997f7f0fa5b61f1796b Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 14 Feb 2023 14:48:37 +0100 +Subject: [PATCH 8/8] target/s390x/arch_dump: Fix memory corruption in + s390x_write_elf64_notes() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 152: Fix memory corruption in s390x_write_elf64_notes() +RH-Bugzilla: 2168172 +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cédric Le Goater +RH-Commit: [1/1] 37a2c997b2c8b7524e0b6299891bf3ea7c9a46d0 (thuth/qemu-kvm-cs9) + +Bugzilla: https://bugzilla.redhat.com/2168172 +Upstream-Status: Posted (and reviewed, but not merged yet) + +"note_size" can be smaller than sizeof(note), so unconditionally calling +memset(notep, 0, sizeof(note)) could cause a memory corruption here in +case notep has been allocated dynamically, thus let's use note_size as +length argument for memset() instead. + +Fixes: 113d8f4e95 ("s390x: pv: Add dump support") +Message-Id: <20230214141056.680969-1-thuth@redhat.com> +Reviewed-by: Janosch Frank +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Thomas Huth +--- + target/s390x/arch_dump.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c +index a2329141e8..a7c44ba49d 100644 +--- a/target/s390x/arch_dump.c ++++ b/target/s390x/arch_dump.c +@@ -248,7 +248,7 @@ static int s390x_write_elf64_notes(const char *note_name, + notep = g_malloc(note_size); + } + +- memset(notep, 0, sizeof(note)); ++ memset(notep, 0, note_size); + + /* Setup note header data */ + notep->hdr.n_descsz = cpu_to_be32(content_size); +-- +2.31.1 + diff --git a/SOURCES/kvm-target-s390x-deprecate-CPUs-older-than-z14.patch b/SOURCES/kvm-target-s390x-deprecate-CPUs-older-than-z14.patch deleted file mode 100644 index 212900d..0000000 --- a/SOURCES/kvm-target-s390x-deprecate-CPUs-older-than-z14.patch +++ /dev/null @@ -1,194 +0,0 @@ -From 8459c305914e2a7a19dcd1662d54a89def7acfa6 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Thu, 17 Mar 2022 17:59:22 +0000 -Subject: [PATCH 05/18] target/s390x: deprecate CPUs older than z14 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Daniel P. Berrangé -RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models -RH-Commit: [5/6] 2da9e06cf452287673f94f880a7eb8b2b37b7278 (berrange/centos-src-qemu) -RH-Bugzilla: 2060839 -RH-Acked-by: Thomas Huth -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cornelia Huck - -RHEL-9 is compiled with the z14 ABI. We use this as a baseline to -select which CPUs we want to support, such that there is at least one -supported guest CPU that can be launched for every physical -machine capable of running RHEL-9 KVM. - -Supported CPUs: - - gen15a-base - gen15a - gen15b-base - gen15b - gen16a-base - gen16a - gen16b-base - gen16b - max - qemu - z14.2-base - z14.2 - z14-base - z14 - z14ZR1-base - z14ZR1 - -Deprecated CPUs: - - z10BC.2-base - z10BC.2 - z10BC-base - z10BC - z10EC.2-base - z10EC.2 - z10EC.3-base - z10EC.3 - z10EC-base - z10EC - z114-base - z114 - z13.2-base - z13.2 - z13-base - z13s-base - z13s - z13 - z196.2-base - z196.2 - z196-base - z196 - z800-base - z800 - z890.2-base - z890.2 - z890.3-base - z890.3 - z890-base - z890 - z900.2-base - z900.2 - z900.3-base - z900.3 - z900-base - z900 - z990.2-base - z990.2 - z990.3-base - z990.3 - z990.4-base - z990.4 - z990.5-base - z990.5 - z990-base - z990 - z9BC.2-base - z9BC.2 - z9BC-base - z9BC - z9EC.2-base - z9EC.2 - z9EC.3-base - z9EC.3 - z9EC-base - z9EC - zBC12-base - zBC12 - zEC12.2-base - zEC12.2 - zEC12-base - zEC12 - -https://bugzilla.redhat.com/show_bug.cgi?id=2060839 -Signed-off-by: Daniel P. Berrangé ---- - target/s390x/cpu_models.c | 11 +++++++++++ - target/s390x/cpu_models.h | 2 ++ - target/s390x/cpu_models_sysemu.c | 2 ++ - 3 files changed, 15 insertions(+) - -diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index 6d71428056..9b9fc41676 100644 ---- a/target/s390x/cpu_models.c -+++ b/target/s390x/cpu_models.c -@@ -45,6 +45,9 @@ - * of a following release have been a superset of the previous release. With - * generation 15 one base feature and one optional feature have been deprecated. - */ -+ -+#define RHEL_CPU_DEPRECATION "use at least 'z14', or 'host' / 'qemu' / 'max'" -+ - static S390CPUDef s390_cpu_defs[] = { - CPUDEF_INIT(0x2064, 7, 1, 38, 0x00000000U, "z900", "IBM zSeries 900 GA1"), - CPUDEF_INIT(0x2064, 7, 2, 38, 0x00000000U, "z900.2", "IBM zSeries 900 GA2"), -@@ -852,22 +855,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) - static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data) - { - S390CPUClass *xcc = S390_CPU_CLASS(oc); -+ CPUClass *cc = CPU_CLASS(oc); - - /* all base models are migration safe */ - xcc->cpu_def = (const S390CPUDef *) data; - xcc->is_migration_safe = true; - xcc->is_static = true; - xcc->desc = xcc->cpu_def->desc; -+ if (xcc->cpu_def->gen < 14) { -+ cc->deprecation_note = RHEL_CPU_DEPRECATION; -+ } - } - - static void s390_cpu_model_class_init(ObjectClass *oc, void *data) - { - S390CPUClass *xcc = S390_CPU_CLASS(oc); -+ CPUClass *cc = CPU_CLASS(oc); - - /* model that can change between QEMU versions */ - xcc->cpu_def = (const S390CPUDef *) data; - xcc->is_migration_safe = true; - xcc->desc = xcc->cpu_def->desc; -+ if (xcc->cpu_def->gen < 14) { -+ cc->deprecation_note = RHEL_CPU_DEPRECATION; -+ } - } - - static void s390_qemu_cpu_model_class_init(ObjectClass *oc, void *data) -diff --git a/target/s390x/cpu_models.h b/target/s390x/cpu_models.h -index 74d1f87e4f..372160bcd7 100644 ---- a/target/s390x/cpu_models.h -+++ b/target/s390x/cpu_models.h -@@ -38,6 +38,8 @@ struct S390CPUDef { - S390FeatBitmap full_feat; - /* used to init full_feat from generated data */ - S390FeatInit full_init; -+ /* if deprecated, provides a suggestion */ -+ const char *deprecation_note; - }; - - /* CPU model based on a CPU definition */ -diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c -index 6a04ccab1b..f3b7c304ec 100644 ---- a/target/s390x/cpu_models_sysemu.c -+++ b/target/s390x/cpu_models_sysemu.c -@@ -61,6 +61,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) - CpuDefinitionInfo *info; - char *name = g_strdup(object_class_get_name(klass)); - S390CPUClass *scc = S390_CPU_CLASS(klass); -+ CPUClass *cc = CPU_CLASS(klass); - - /* strip off the -s390x-cpu */ - g_strrstr(name, "-" TYPE_S390_CPU)[0] = 0; -@@ -70,6 +71,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) - info->migration_safe = scc->is_migration_safe; - info->q_static = scc->is_static; - info->q_typename = g_strdup(object_class_get_name(klass)); -+ info->deprecated = !!cc->deprecation_note; - /* check for unavailable features */ - if (cpu_list_data->model) { - Object *obj; --- -2.35.3 - diff --git a/SOURCES/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch b/SOURCES/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch deleted file mode 100644 index 61752c7..0000000 --- a/SOURCES/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 27c1d979a994f5afc59c3520af58d15aa5aae723 Mon Sep 17 00:00:00 2001 -From: Janis Schoetterl-Glausch -Date: Fri, 6 May 2022 17:39:56 +0200 -Subject: [PATCH 29/32] target/s390x: kvm: Honor storage keys during emulation - -RH-Author: Thomas Huth -RH-MergeRequest: 109: Honor storage keys during emulation of I/O instructions -RH-Commit: [2/2] 346dee1e13bfe1c074e4c6a4417091711d852f9c (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2111994 -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Claudio Imbrenda - -Storage key controlled protection is currently not honored when -emulating instructions. -If available, enable key protection for the MEM_OP ioctl, thereby -enabling it for the s390_cpu_virt_mem_* functions, when using kvm. -As a result, the emulation of the following instructions honors storage -keys: - -* CLP - The Synch I/O CLP command would need special handling in order - to support storage keys, but is currently not supported. -* CHSC - Performing commands asynchronously would require special - handling, but commands are currently always synchronous. -* STSI -* TSCH - Must (and does) not change channel if terminated due to - protection. -* MSCH - Suppressed on protection, works because fetching instruction. -* SSCH - Suppressed on protection, works because fetching instruction. -* STSCH -* STCRW - Suppressed on protection, this works because no partial store is - possible, because the operand cannot span multiple pages. -* PCISTB -* MPCIFC -* STPCIFC - -Signed-off-by: Janis Schoetterl-Glausch -Message-Id: <20220506153956.2217601-3-scgl@linux.ibm.com> -Signed-off-by: Thomas Huth - -(cherry picked from commit 54354861d21b69ec0781f43e67b8d4f6edad7e3f) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2111994 -Signed-off-by: Thomas Huth ---- - target/s390x/kvm/kvm.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c -index 74f089d87f..1f1d1a33b8 100644 ---- a/target/s390x/kvm/kvm.c -+++ b/target/s390x/kvm/kvm.c -@@ -152,12 +152,15 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = { - static int cap_sync_regs; - static int cap_async_pf; - static int cap_mem_op; -+static int cap_mem_op_extension; - static int cap_s390_irq; - static int cap_ri; - static int cap_hpage_1m; - static int cap_vcpu_resets; - static int cap_protected; - -+static bool mem_op_storage_key_support; -+ - static int active_cmma; - - static int kvm_s390_query_mem_limit(uint64_t *memory_limit) -@@ -355,6 +358,8 @@ int kvm_arch_init(MachineState *ms, KVMState *s) - cap_sync_regs = kvm_check_extension(s, KVM_CAP_SYNC_REGS); - cap_async_pf = kvm_check_extension(s, KVM_CAP_ASYNC_PF); - cap_mem_op = kvm_check_extension(s, KVM_CAP_S390_MEM_OP); -+ cap_mem_op_extension = kvm_check_extension(s, KVM_CAP_S390_MEM_OP_EXTENSION); -+ mem_op_storage_key_support = cap_mem_op_extension > 0; - cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ); - cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS); - cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED); -@@ -843,6 +848,7 @@ int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, void *hostbuf, - : KVM_S390_MEMOP_LOGICAL_READ, - .buf = (uint64_t)hostbuf, - .ar = ar, -+ .key = (cpu->env.psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY, - }; - int ret; - -@@ -852,6 +858,9 @@ int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, void *hostbuf, - if (!hostbuf) { - mem_op.flags |= KVM_S390_MEMOP_F_CHECK_ONLY; - } -+ if (mem_op_storage_key_support) { -+ mem_op.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION; -+ } - - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_S390_MEM_OP, &mem_op); - if (ret < 0) { --- -2.31.1 - diff --git a/SOURCES/kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch b/SOURCES/kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch new file mode 100644 index 0000000..268c263 --- /dev/null +++ b/SOURCES/kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch @@ -0,0 +1,153 @@ +From 093c4a6834f3ec5a05390a3630ae4edec80885b8 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:40:57 +0100 +Subject: [PATCH 15/31] test-bdrv-drain: Don't yield in + .bdrv_co_drained_begin/end() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [3/16] 5282d3e13cb85dfb480edb11b7eb2769248465df (sgarzarella/qemu-kvm-c-9-s) + +We want to change .bdrv_co_drained_begin/end() back to be non-coroutine +callbacks, so in preparation, avoid yielding in their implementation. + +This does almost the same as the existing logic in bdrv_drain_invoke(), +by creating and entering coroutines internally. However, since the test +case is by far the heaviest user of coroutine code in drain callbacks, +it is preferable to have the complexity in the test case rather than the +drain core, which is already complicated enough without this. + +The behaviour for bdrv_drain_begin() is unchanged because we increase +bs->in_flight and this is still polled. However, bdrv_drain_end() +doesn't wait for the spawned coroutine to complete any more. This is +fine, we don't rely on bdrv_drain_end() restarting all operations +immediately before the next aio_poll(). + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Emanuele Giuseppe Esposito +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-3-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 7bce1c299834557bffd92294608ea528648cfe75) +Signed-off-by: Stefano Garzarella +--- + tests/unit/test-bdrv-drain.c | 64 ++++++++++++++++++++++++++---------- + 1 file changed, 46 insertions(+), 18 deletions(-) + +diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c +index 09dc4a4891..24f34e24ad 100644 +--- a/tests/unit/test-bdrv-drain.c ++++ b/tests/unit/test-bdrv-drain.c +@@ -38,12 +38,22 @@ typedef struct BDRVTestState { + bool sleep_in_drain_begin; + } BDRVTestState; + ++static void coroutine_fn sleep_in_drain_begin(void *opaque) ++{ ++ BlockDriverState *bs = opaque; ++ ++ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); ++ bdrv_dec_in_flight(bs); ++} ++ + static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) + { + BDRVTestState *s = bs->opaque; + s->drain_count++; + if (s->sleep_in_drain_begin) { +- qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); ++ Coroutine *co = qemu_coroutine_create(sleep_in_drain_begin, bs); ++ bdrv_inc_in_flight(bs); ++ aio_co_enter(bdrv_get_aio_context(bs), co); + } + } + +@@ -1916,6 +1926,21 @@ static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs, + return 0; + } + ++static void coroutine_fn bdrv_replace_test_drain_co(void *opaque) ++{ ++ BlockDriverState *bs = opaque; ++ BDRVReplaceTestState *s = bs->opaque; ++ ++ /* Keep waking io_co up until it is done */ ++ while (s->io_co) { ++ aio_co_wake(s->io_co); ++ s->io_co = NULL; ++ qemu_coroutine_yield(); ++ } ++ s->drain_co = NULL; ++ bdrv_dec_in_flight(bs); ++} ++ + /** + * If .drain_count is 0, wake up .io_co if there is one; and set + * .was_drained. +@@ -1926,20 +1951,27 @@ static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs) + BDRVReplaceTestState *s = bs->opaque; + + if (!s->drain_count) { +- /* Keep waking io_co up until it is done */ +- s->drain_co = qemu_coroutine_self(); +- while (s->io_co) { +- aio_co_wake(s->io_co); +- s->io_co = NULL; +- qemu_coroutine_yield(); +- } +- s->drain_co = NULL; +- ++ s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs); ++ bdrv_inc_in_flight(bs); ++ aio_co_enter(bdrv_get_aio_context(bs), s->drain_co); + s->was_drained = true; + } + s->drain_count++; + } + ++static void coroutine_fn bdrv_replace_test_read_entry(void *opaque) ++{ ++ BlockDriverState *bs = opaque; ++ char data; ++ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1); ++ int ret; ++ ++ /* Queue a read request post-drain */ ++ ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0); ++ g_assert(ret >= 0); ++ bdrv_dec_in_flight(bs); ++} ++ + /** + * Reduce .drain_count, set .was_undrained once it reaches 0. + * If .drain_count reaches 0 and the node has a backing file, issue a +@@ -1951,17 +1983,13 @@ static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs) + + g_assert(s->drain_count > 0); + if (!--s->drain_count) { +- int ret; +- + s->was_undrained = true; + + if (bs->backing) { +- char data; +- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1); +- +- /* Queue a read request post-drain */ +- ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0); +- g_assert(ret >= 0); ++ Coroutine *co = qemu_coroutine_create(bdrv_replace_test_read_entry, ++ bs); ++ bdrv_inc_in_flight(bs); ++ aio_co_enter(bdrv_get_aio_context(bs), co); + } + } + } +-- +2.31.1 + diff --git a/SOURCES/kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch b/SOURCES/kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch deleted file mode 100644 index 4fcf786..0000000 --- a/SOURCES/kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch +++ /dev/null @@ -1,157 +0,0 @@ -From f52aa60217634c96fef59ce76b803a94610bf5c8 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Wed, 15 Jun 2022 15:28:27 +0200 -Subject: [PATCH 01/18] tests/avocado: update aarch64_virt test to exercise - -cpu max -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Daniel P. Berrangé -RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models -RH-Commit: [1/6] df6839e567180a4c32afd98852f68b2279e00f7c (berrange/centos-src-qemu) -RH-Bugzilla: 2060839 -RH-Acked-by: Thomas Huth -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2066824 - -commit 11593544df6f8febb3ce87015c22b429bf43c4c7 -Author: Alex Bennée -Date: Tue Apr 19 10:09:56 2022 +0100 - - tests/avocado: update aarch64_virt test to exercise -cpu max - - The Fedora 29 kernel is quite old and importantly fails when running - in LPA2 scenarios. As it's not really exercising much of the CPU space - replace it with a custom 5.16.12 kernel with all the architecture - options turned on. There is a minimal buildroot initramfs included in - the kernel which has a few tools for stress testing the memory - subsystem. The userspace also targets the Neoverse N1 processor so - would fail with a v8.0 cpu like cortex-a53. - - While we are at it move the test into its own file so it can have an - assigned maintainer. - - Signed-off-by: Alex Bennée - Acked-by: Richard Henderson - Tested-by: Richard Henderson - Message-Id: <20220419091020.3008144-2-alex.bennee@linaro.org> - -Signed-off-by: Andrew Jones ---- - MAINTAINERS | 1 + - tests/avocado/boot_linux_console.py | 25 ------------- - tests/avocado/machine_aarch64_virt.py | 51 +++++++++++++++++++++++++++ - 3 files changed, 52 insertions(+), 25 deletions(-) - create mode 100644 tests/avocado/machine_aarch64_virt.py - -diff --git a/MAINTAINERS b/MAINTAINERS -index 2fe20a49ab..bfe8806f60 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -942,6 +942,7 @@ S: Maintained - F: hw/arm/virt* - F: include/hw/arm/virt.h - F: docs/system/arm/virt.rst -+F: tests/avocado/machine_aarch64_virt.py - - Xilinx Zynq - M: Edgar E. Iglesias -diff --git a/tests/avocado/boot_linux_console.py b/tests/avocado/boot_linux_console.py -index b40a3abc81..45a2ceda22 100644 ---- a/tests/avocado/boot_linux_console.py -+++ b/tests/avocado/boot_linux_console.py -@@ -325,31 +325,6 @@ def test_mips_malta32el_nanomips_64k_dbg(self): - kernel_hash = '18d1c68f2e23429e266ca39ba5349ccd0aeb7180' - self.do_test_mips_malta32el_nanomips(kernel_url, kernel_hash) - -- def test_aarch64_virt(self): -- """ -- :avocado: tags=arch:aarch64 -- :avocado: tags=machine:virt -- :avocado: tags=accel:tcg -- :avocado: tags=cpu:cortex-a53 -- """ -- kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' -- '/linux/releases/29/Everything/aarch64/os/images/pxeboot' -- '/vmlinuz') -- kernel_hash = '8c73e469fc6ea06a58dc83a628fc695b693b8493' -- kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) -- -- self.vm.set_console() -- kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + -- 'console=ttyAMA0') -- self.require_accelerator("tcg") -- self.vm.add_args('-cpu', 'cortex-a53', -- '-accel', 'tcg', -- '-kernel', kernel_path, -- '-append', kernel_command_line) -- self.vm.launch() -- console_pattern = 'Kernel command line: %s' % kernel_command_line -- self.wait_for_console_pattern(console_pattern) -- - def test_aarch64_xlnx_versal_virt(self): - """ - :avocado: tags=arch:aarch64 -diff --git a/tests/avocado/machine_aarch64_virt.py b/tests/avocado/machine_aarch64_virt.py -new file mode 100644 -index 0000000000..21848cba70 ---- /dev/null -+++ b/tests/avocado/machine_aarch64_virt.py -@@ -0,0 +1,51 @@ -+# Functional test that boots a Linux kernel and checks the console -+# -+# Copyright (c) 2022 Linaro Ltd. -+# -+# Author: -+# Alex Bennée -+# -+# SPDX-License-Identifier: GPL-2.0-or-later -+ -+import time -+ -+from avocado_qemu import QemuSystemTest -+from avocado_qemu import wait_for_console_pattern -+from avocado_qemu import exec_command -+ -+class Aarch64VirtMachine(QemuSystemTest): -+ KERNEL_COMMON_COMMAND_LINE = 'printk.time=0 ' -+ -+ def wait_for_console_pattern(self, success_message, vm=None): -+ wait_for_console_pattern(self, success_message, -+ failure_message='Kernel panic - not syncing', -+ vm=vm) -+ -+ def test_aarch64_virt(self): -+ """ -+ :avocado: tags=arch:aarch64 -+ :avocado: tags=machine:virt -+ :avocado: tags=accel:tcg -+ :avocado: tags=cpu:max -+ """ -+ kernel_url = ('https://fileserver.linaro.org/s/' -+ 'z6B2ARM7DQT3HWN/download') -+ -+ kernel_hash = 'ed11daab50c151dde0e1e9c9cb8b2d9bd3215347' -+ kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) -+ -+ self.vm.set_console() -+ kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + -+ 'console=ttyAMA0') -+ self.require_accelerator("tcg") -+ self.vm.add_args('-cpu', 'max,pauth-impdef=on', -+ '-accel', 'tcg', -+ '-kernel', kernel_path, -+ '-append', kernel_command_line) -+ self.vm.launch() -+ self.wait_for_console_pattern('Welcome to Buildroot') -+ time.sleep(0.1) -+ exec_command(self, 'root') -+ time.sleep(0.1) -+ exec_command(self, 'cat /proc/self/maps') -+ time.sleep(0.1) --- -2.35.3 - diff --git a/SOURCES/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch b/SOURCES/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch deleted file mode 100644 index 7b9a8f3..0000000 --- a/SOURCES/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch +++ /dev/null @@ -1,119 +0,0 @@ -From cea7b15c613a11ea15a1458d6990be7044df6643 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Thu, 18 Nov 2021 12:57:33 +0100 -Subject: [PATCH 17/17] tests/qtest/fdc-test: Add a regression test for - CVE-2021-3507 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -RH-MergeRequest: 107: hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507) -RH-Commit: [2/2] 067c052df790959c28c1fcc16547676d36523bd9 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1951522 -RH-Acked-by: Hanna Reitz -RH-Acked-by: Miroslav Rezanina - -Add the reproducer from https://gitlab.com/qemu-project/qemu/-/issues/339 - -Without the previous commit, when running 'make check-qtest-i386' -with QEMU configured with '--enable-sanitizers' we get: - - ==4028352==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x619000062a00 at pc 0x5626d03c491a bp 0x7ffdb4199410 sp 0x7ffdb4198bc0 - READ of size 786432 at 0x619000062a00 thread T0 - #0 0x5626d03c4919 in __asan_memcpy (qemu-system-i386+0x1e65919) - #1 0x5626d1c023cc in flatview_write_continue softmmu/physmem.c:2787:13 - #2 0x5626d1bf0c0f in flatview_write softmmu/physmem.c:2822:14 - #3 0x5626d1bf0798 in address_space_write softmmu/physmem.c:2914:18 - #4 0x5626d1bf0f37 in address_space_rw softmmu/physmem.c:2924:16 - #5 0x5626d1bf14c8 in cpu_physical_memory_rw softmmu/physmem.c:2933:5 - #6 0x5626d0bd5649 in cpu_physical_memory_write include/exec/cpu-common.h:82:5 - #7 0x5626d0bd0a07 in i8257_dma_write_memory hw/dma/i8257.c:452:9 - #8 0x5626d09f825d in fdctrl_transfer_handler hw/block/fdc.c:1616:13 - #9 0x5626d0a048b4 in fdctrl_start_transfer hw/block/fdc.c:1539:13 - #10 0x5626d09f4c3e in fdctrl_write_data hw/block/fdc.c:2266:13 - #11 0x5626d09f22f7 in fdctrl_write hw/block/fdc.c:829:9 - #12 0x5626d1c20bc5 in portio_write softmmu/ioport.c:207:17 - - 0x619000062a00 is located 0 bytes to the right of 512-byte region [0x619000062800,0x619000062a00) - allocated by thread T0 here: - #0 0x5626d03c66ec in posix_memalign (qemu-system-i386+0x1e676ec) - #1 0x5626d2b988d4 in qemu_try_memalign util/oslib-posix.c:210:11 - #2 0x5626d2b98b0c in qemu_memalign util/oslib-posix.c:226:27 - #3 0x5626d09fbaf0 in fdctrl_realize_common hw/block/fdc.c:2341:20 - #4 0x5626d0a150ed in isabus_fdc_realize hw/block/fdc-isa.c:113:5 - #5 0x5626d2367935 in device_set_realized hw/core/qdev.c:531:13 - - SUMMARY: AddressSanitizer: heap-buffer-overflow (qemu-system-i386+0x1e65919) in __asan_memcpy - Shadow bytes around the buggy address: - 0x0c32800044f0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa - 0x0c3280004500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - 0x0c3280004510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - 0x0c3280004520: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - 0x0c3280004530: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - =>0x0c3280004540:[fa]fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa - 0x0c3280004550: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa - 0x0c3280004560: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa - 0x0c3280004570: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa - 0x0c3280004580: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa - 0x0c3280004590: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd - Shadow byte legend (one shadow byte represents 8 application bytes): - Addressable: 00 - Heap left redzone: fa - Freed heap region: fd - ==4028352==ABORTING - -[ kwolf: Added snapshot=on to prevent write file lock failure ] - -Reported-by: Alexander Bulekov -Signed-off-by: Philippe Mathieu-Daudé -Reviewed-by: Alexander Bulekov -Signed-off-by: Kevin Wolf -(cherry picked from commit 46609b90d9e3a6304def11038a76b58ff43f77bc) -Signed-off-by: Jon Maloy ---- - tests/qtest/fdc-test.c | 21 +++++++++++++++++++++ - 1 file changed, 21 insertions(+) - -diff --git a/tests/qtest/fdc-test.c b/tests/qtest/fdc-test.c -index b0d40012e6..1d4f852128 100644 ---- a/tests/qtest/fdc-test.c -+++ b/tests/qtest/fdc-test.c -@@ -583,6 +583,26 @@ static void test_cve_2021_20196(void) - qtest_quit(s); - } - -+static void test_cve_2021_3507(void) -+{ -+ QTestState *s; -+ -+ s = qtest_initf("-nographic -m 32M -nodefaults " -+ "-drive file=%s,format=raw,if=floppy,snapshot=on", -+ test_image); -+ qtest_outl(s, 0x9, 0x0a0206); -+ qtest_outw(s, 0x3f4, 0x1600); -+ qtest_outw(s, 0x3f4, 0x0000); -+ qtest_outw(s, 0x3f4, 0x0000); -+ qtest_outw(s, 0x3f4, 0x0000); -+ qtest_outw(s, 0x3f4, 0x0200); -+ qtest_outw(s, 0x3f4, 0x0200); -+ qtest_outw(s, 0x3f4, 0x0000); -+ qtest_outw(s, 0x3f4, 0x0000); -+ qtest_outw(s, 0x3f4, 0x0000); -+ qtest_quit(s); -+} -+ - int main(int argc, char **argv) - { - int fd; -@@ -614,6 +634,7 @@ int main(int argc, char **argv) - qtest_add_func("/fdc/read_no_dma_19", test_read_no_dma_19); - qtest_add_func("/fdc/fuzz-registers", fuzz_registers); - qtest_add_func("/fdc/fuzz/cve_2021_20196", test_cve_2021_20196); -+ qtest_add_func("/fdc/fuzz/cve_2021_3507", test_cve_2021_3507); - - ret = g_test_run(); - --- -2.31.1 - diff --git a/SOURCES/kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch b/SOURCES/kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch new file mode 100644 index 0000000..ebd52cd --- /dev/null +++ b/SOURCES/kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch @@ -0,0 +1,505 @@ +From 39d5761fe1f546e764dedf2ea32c55d8f5222696 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Wed, 18 Jan 2023 13:04:05 +0100 +Subject: [PATCH 1/8] tests/qtest: netdev: test stream and dgram backends +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 148: net: stream: add a new option to automatically reconnect +RH-Bugzilla: 2169232 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Cindy Lu +RH-Acked-by: MST +RH-Acked-by: David Gibson (Red Hat) +RH-Commit: [1/2] 75c71b47eea072e14651a96612d402b50d2b8f1e (lvivier/qemu-kvm-centos) + +Signed-off-by: Laurent Vivier +Acked-by: Michael S. Tsirkin +Message-Id: <20230118120405.1876329-1-lvivier@redhat.com> +Signed-off-by: Thomas Huth +(cherry picked from commit c95031a19f0d7f418a597243f6f84b031a858997) +--- + tests/qtest/meson.build | 2 + + tests/qtest/netdev-socket.c | 448 ++++++++++++++++++++++++++++++++++++ + 2 files changed, 450 insertions(+) + create mode 100644 tests/qtest/netdev-socket.c + +diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build +index 9df3f9f8b9..2e7c6fe5e3 100644 +--- a/tests/qtest/meson.build ++++ b/tests/qtest/meson.build +@@ -27,6 +27,7 @@ qtests_generic = [ + 'test-hmp', + 'qos-test', + 'readconfig-test', ++ 'netdev-socket', + ] + if config_host.has_key('CONFIG_MODULES') + qtests_generic += [ 'modules-test' ] +@@ -299,6 +300,7 @@ qtests = { + 'tpm-tis-device-swtpm-test': [io, tpmemu_files, 'tpm-tis-util.c'], + 'tpm-tis-device-test': [io, tpmemu_files, 'tpm-tis-util.c'], + 'vmgenid-test': files('boot-sector.c', 'acpi-utils.c'), ++ 'netdev-socket': files('netdev-socket.c', '../unit/socket-helpers.c'), + } + + gvnc = dependency('gvnc-1.0', required: false) +diff --git a/tests/qtest/netdev-socket.c b/tests/qtest/netdev-socket.c +new file mode 100644 +index 0000000000..6ba256e173 +--- /dev/null ++++ b/tests/qtest/netdev-socket.c +@@ -0,0 +1,448 @@ ++/* ++ * QTest testcase for netdev stream and dgram ++ * ++ * Copyright (c) 2022 Red Hat, Inc. ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/sockets.h" ++#include ++#include "../unit/socket-helpers.h" ++#include "libqtest.h" ++ ++#define CONNECTION_TIMEOUT 5 ++ ++#define EXPECT_STATE(q, e, t) \ ++do { \ ++ char *resp = NULL; \ ++ g_test_timer_start(); \ ++ do { \ ++ g_free(resp); \ ++ resp = qtest_hmp(q, "info network"); \ ++ if (t) { \ ++ strrchr(resp, t)[0] = 0; \ ++ } \ ++ if (g_str_equal(resp, e)) { \ ++ break; \ ++ } \ ++ } while (g_test_timer_elapsed() < CONNECTION_TIMEOUT); \ ++ g_assert_cmpstr(resp, ==, e); \ ++ g_free(resp); \ ++} while (0) ++ ++static gchar *tmpdir; ++ ++static int inet_get_free_port_socket_ipv4(int sock) ++{ ++ struct sockaddr_in addr; ++ socklen_t len; ++ ++ memset(&addr, 0, sizeof(addr)); ++ addr.sin_family = AF_INET; ++ addr.sin_addr.s_addr = INADDR_ANY; ++ addr.sin_port = 0; ++ if (bind(sock, (struct sockaddr *)&addr, sizeof(addr)) < 0) { ++ return -1; ++ } ++ ++ len = sizeof(addr); ++ if (getsockname(sock, (struct sockaddr *)&addr, &len) < 0) { ++ return -1; ++ } ++ ++ return ntohs(addr.sin_port); ++} ++ ++static int inet_get_free_port_socket_ipv6(int sock) ++{ ++ struct sockaddr_in6 addr; ++ socklen_t len; ++ ++ memset(&addr, 0, sizeof(addr)); ++ addr.sin6_family = AF_INET6; ++ addr.sin6_addr = in6addr_any; ++ addr.sin6_port = 0; ++ if (bind(sock, (struct sockaddr *)&addr, sizeof(addr)) < 0) { ++ return -1; ++ } ++ ++ len = sizeof(addr); ++ if (getsockname(sock, (struct sockaddr *)&addr, &len) < 0) { ++ return -1; ++ } ++ ++ return ntohs(addr.sin6_port); ++} ++ ++static int inet_get_free_port_multiple(int nb, int *port, bool ipv6) ++{ ++ int sock[nb]; ++ int i; ++ ++ for (i = 0; i < nb; i++) { ++ sock[i] = socket(ipv6 ? AF_INET6 : AF_INET, SOCK_STREAM, 0); ++ if (sock[i] < 0) { ++ break; ++ } ++ port[i] = ipv6 ? inet_get_free_port_socket_ipv6(sock[i]) : ++ inet_get_free_port_socket_ipv4(sock[i]); ++ if (port[i] == -1) { ++ break; ++ } ++ } ++ ++ nb = i; ++ for (i = 0; i < nb; i++) { ++ closesocket(sock[i]); ++ } ++ ++ return nb; ++} ++ ++static int inet_get_free_port(bool ipv6) ++{ ++ int nb, port; ++ ++ nb = inet_get_free_port_multiple(1, &port, ipv6); ++ g_assert_cmpint(nb, ==, 1); ++ ++ return port; ++} ++ ++static void test_stream_inet_ipv4(void) ++{ ++ QTestState *qts0, *qts1; ++ char *expect; ++ int port; ++ ++ port = inet_get_free_port(false); ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=true,addr.type=inet," ++ "addr.ipv4=on,addr.ipv6=off," ++ "addr.host=127.0.0.1,addr.port=%d", port); ++ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,server=false,id=st0,addr.type=inet," ++ "addr.ipv4=on,addr.ipv6=off," ++ "addr.host=127.0.0.1,addr.port=%d", port); ++ ++ expect = g_strdup_printf("st0: index=0,type=stream,tcp:127.0.0.1:%d\r\n", ++ port); ++ EXPECT_STATE(qts1, expect, 0); ++ g_free(expect); ++ ++ /* the port is unknown, check only the address */ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,tcp:127.0.0.1", ':'); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++} ++ ++static void test_stream_inet_ipv6(void) ++{ ++ QTestState *qts0, *qts1; ++ char *expect; ++ int port; ++ ++ port = inet_get_free_port(true); ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=true,addr.type=inet," ++ "addr.ipv4=off,addr.ipv6=on," ++ "addr.host=::1,addr.port=%d", port); ++ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,server=false,id=st0,addr.type=inet," ++ "addr.ipv4=off,addr.ipv6=on," ++ "addr.host=::1,addr.port=%d", port); ++ ++ expect = g_strdup_printf("st0: index=0,type=stream,tcp:::1:%d\r\n", ++ port); ++ EXPECT_STATE(qts1, expect, 0); ++ g_free(expect); ++ ++ /* the port is unknown, check only the address */ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,tcp:::1", ':'); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++} ++ ++static void test_stream_unix(void) ++{ ++ QTestState *qts0, *qts1; ++ char *expect; ++ gchar *path; ++ ++ path = g_strconcat(tmpdir, "/stream_unix", NULL); ++ ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=true," ++ "addr.type=unix,addr.path=%s,", ++ path); ++ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=false," ++ "addr.type=unix,addr.path=%s", ++ path); ++ ++ expect = g_strdup_printf("st0: index=0,type=stream,unix:%s\r\n", path); ++ EXPECT_STATE(qts1, expect, 0); ++ EXPECT_STATE(qts0, expect, 0); ++ g_free(expect); ++ g_free(path); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++} ++ ++#ifdef CONFIG_LINUX ++static void test_stream_unix_abstract(void) ++{ ++ QTestState *qts0, *qts1; ++ char *expect; ++ gchar *path; ++ ++ path = g_strconcat(tmpdir, "/stream_unix_abstract", NULL); ++ ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=true," ++ "addr.type=unix,addr.path=%s," ++ "addr.abstract=on", ++ path); ++ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=false," ++ "addr.type=unix,addr.path=%s,addr.abstract=on", ++ path); ++ ++ expect = g_strdup_printf("st0: index=0,type=stream,unix:%s\r\n", path); ++ EXPECT_STATE(qts1, expect, 0); ++ EXPECT_STATE(qts0, expect, 0); ++ g_free(expect); ++ g_free(path); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++} ++#endif ++ ++#ifndef _WIN32 ++static void test_stream_fd(void) ++{ ++ QTestState *qts0, *qts1; ++ int sock[2]; ++ int ret; ++ ++ ret = socketpair(AF_LOCAL, SOCK_STREAM, 0, sock); ++ g_assert_true(ret == 0); ++ ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,addr.type=fd,addr.str=%d", ++ sock[0]); ++ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,unix:\r\n", 0); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,addr.type=fd,addr.str=%d", ++ sock[1]); ++ ++ EXPECT_STATE(qts1, "st0: index=0,type=stream,unix:\r\n", 0); ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,unix:\r\n", 0); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++ ++ closesocket(sock[0]); ++ closesocket(sock[1]); ++} ++#endif ++ ++static void test_dgram_inet(void) ++{ ++ QTestState *qts0, *qts1; ++ char *expect; ++ int port[2]; ++ int nb; ++ ++ nb = inet_get_free_port_multiple(2, port, false); ++ g_assert_cmpint(nb, ==, 2); ++ ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev dgram,id=st0," ++ "local.type=inet,local.host=127.0.0.1,local.port=%d," ++ "remote.type=inet,remote.host=127.0.0.1,remote.port=%d", ++ port[0], port[1]); ++ ++ expect = g_strdup_printf("st0: index=0,type=dgram," ++ "udp=127.0.0.1:%d/127.0.0.1:%d\r\n", ++ port[0], port[1]); ++ EXPECT_STATE(qts0, expect, 0); ++ g_free(expect); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev dgram,id=st0," ++ "local.type=inet,local.host=127.0.0.1,local.port=%d," ++ "remote.type=inet,remote.host=127.0.0.1,remote.port=%d", ++ port[1], port[0]); ++ ++ expect = g_strdup_printf("st0: index=0,type=dgram," ++ "udp=127.0.0.1:%d/127.0.0.1:%d\r\n", ++ port[1], port[0]); ++ EXPECT_STATE(qts1, expect, 0); ++ g_free(expect); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++} ++ ++#ifndef _WIN32 ++static void test_dgram_mcast(void) ++{ ++ QTestState *qts; ++ ++ qts = qtest_initf("-nodefaults -M none " ++ "-netdev dgram,id=st0," ++ "remote.type=inet,remote.host=230.0.0.1,remote.port=1234"); ++ ++ EXPECT_STATE(qts, "st0: index=0,type=dgram,mcast=230.0.0.1:1234\r\n", 0); ++ ++ qtest_quit(qts); ++} ++ ++static void test_dgram_unix(void) ++{ ++ QTestState *qts0, *qts1; ++ char *expect; ++ gchar *path0, *path1; ++ ++ path0 = g_strconcat(tmpdir, "/dgram_unix0", NULL); ++ path1 = g_strconcat(tmpdir, "/dgram_unix1", NULL); ++ ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev dgram,id=st0,local.type=unix,local.path=%s," ++ "remote.type=unix,remote.path=%s", ++ path0, path1); ++ ++ expect = g_strdup_printf("st0: index=0,type=dgram,udp=%s:%s\r\n", ++ path0, path1); ++ EXPECT_STATE(qts0, expect, 0); ++ g_free(expect); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev dgram,id=st0,local.type=unix,local.path=%s," ++ "remote.type=unix,remote.path=%s", ++ path1, path0); ++ ++ ++ expect = g_strdup_printf("st0: index=0,type=dgram,udp=%s:%s\r\n", ++ path1, path0); ++ EXPECT_STATE(qts1, expect, 0); ++ g_free(expect); ++ ++ unlink(path0); ++ g_free(path0); ++ unlink(path1); ++ g_free(path1); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++} ++ ++static void test_dgram_fd(void) ++{ ++ QTestState *qts0, *qts1; ++ char *expect; ++ int ret; ++ int sv[2]; ++ ++ ret = socketpair(PF_UNIX, SOCK_DGRAM, 0, sv); ++ g_assert_cmpint(ret, !=, -1); ++ ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev dgram,id=st0,local.type=fd,local.str=%d", ++ sv[0]); ++ ++ expect = g_strdup_printf("st0: index=0,type=dgram,fd=%d unix\r\n", sv[0]); ++ EXPECT_STATE(qts0, expect, 0); ++ g_free(expect); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev dgram,id=st0,local.type=fd,local.str=%d", ++ sv[1]); ++ ++ ++ expect = g_strdup_printf("st0: index=0,type=dgram,fd=%d unix\r\n", sv[1]); ++ EXPECT_STATE(qts1, expect, 0); ++ g_free(expect); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++ ++ closesocket(sv[0]); ++ closesocket(sv[1]); ++} ++#endif ++ ++int main(int argc, char **argv) ++{ ++ int ret; ++ bool has_ipv4, has_ipv6, has_afunix; ++ g_autoptr(GError) err = NULL; ++ ++ socket_init(); ++ g_test_init(&argc, &argv, NULL); ++ ++ if (socket_check_protocol_support(&has_ipv4, &has_ipv6) < 0) { ++ g_error("socket_check_protocol_support() failed\n"); ++ } ++ ++ tmpdir = g_dir_make_tmp("netdev-socket.XXXXXX", &err); ++ if (tmpdir == NULL) { ++ g_error("Can't create temporary directory in %s: %s", ++ g_get_tmp_dir(), err->message); ++ } ++ ++ if (has_ipv4) { ++ qtest_add_func("/netdev/stream/inet/ipv4", test_stream_inet_ipv4); ++ qtest_add_func("/netdev/dgram/inet", test_dgram_inet); ++#ifndef _WIN32 ++ qtest_add_func("/netdev/dgram/mcast", test_dgram_mcast); ++#endif ++ } ++ if (has_ipv6) { ++ qtest_add_func("/netdev/stream/inet/ipv6", test_stream_inet_ipv6); ++ } ++ ++ socket_check_afunix_support(&has_afunix); ++ if (has_afunix) { ++#ifndef _WIN32 ++ qtest_add_func("/netdev/dgram/unix", test_dgram_unix); ++#endif ++ qtest_add_func("/netdev/stream/unix", test_stream_unix); ++#ifdef CONFIG_LINUX ++ qtest_add_func("/netdev/stream/unix/abstract", ++ test_stream_unix_abstract); ++#endif ++#ifndef _WIN32 ++ qtest_add_func("/netdev/stream/fd", test_stream_fd); ++ qtest_add_func("/netdev/dgram/fd", test_dgram_fd); ++#endif ++ } ++ ++ ret = g_test_run(); ++ ++ g_rmdir(tmpdir); ++ g_free(tmpdir); ++ ++ return ret; ++} +-- +2.31.1 + diff --git a/SOURCES/kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch b/SOURCES/kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch new file mode 100644 index 0000000..14388fe --- /dev/null +++ b/SOURCES/kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch @@ -0,0 +1,299 @@ +From 120db3dfeb88c447f0e115c19b7ede704f8f80cb Mon Sep 17 00:00:00 2001 +From: Richard Henderson +Date: Sat, 14 Jan 2023 13:05:41 -1000 +Subject: [PATCH 2/8] tests/tcg/i386: Introduce and use reg_t consistently +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions +RH-Bugzilla: 2173590 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Bandan Das +RH-Commit: [2/7] 843a677555414170392db21c828bef3dc3c29300 (bonzini/rhel-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 +Upstream-Status: merged + +Define reg_t based on the actual register width. +Define the inlines using that type. This will allow +input registers to 32-bit insns to be set to 64-bit +values on x86-64, which allows testing various edge cases. + +Signed-off-by: Richard Henderson +Reviewed-by: Philippe Mathieu-Daudé +Message-Id: <20230114230542.3116013-2-richard.henderson@linaro.org> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 5d62d6649cd367b5b4a3676e7514d2f9ca86cb03) +--- + tests/tcg/i386/test-i386-bmi2.c | 182 ++++++++++++++++---------------- + 1 file changed, 93 insertions(+), 89 deletions(-) + +diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c +index 5fadf47510..3c3ef85513 100644 +--- a/tests/tcg/i386/test-i386-bmi2.c ++++ b/tests/tcg/i386/test-i386-bmi2.c +@@ -3,34 +3,40 @@ + #include + #include + ++#ifdef __x86_64 ++typedef uint64_t reg_t; ++#else ++typedef uint32_t reg_t; ++#endif ++ + #define insn1q(name, arg0) \ +-static inline uint64_t name##q(uint64_t arg0) \ ++static inline reg_t name##q(reg_t arg0) \ + { \ +- uint64_t result64; \ ++ reg_t result64; \ + asm volatile (#name "q %1, %0" : "=r"(result64) : "rm"(arg0)); \ + return result64; \ + } + + #define insn1l(name, arg0) \ +-static inline uint32_t name##l(uint32_t arg0) \ ++static inline reg_t name##l(reg_t arg0) \ + { \ +- uint32_t result32; \ ++ reg_t result32; \ + asm volatile (#name "l %k1, %k0" : "=r"(result32) : "rm"(arg0)); \ + return result32; \ + } + + #define insn2q(name, arg0, c0, arg1, c1) \ +-static inline uint64_t name##q(uint64_t arg0, uint64_t arg1) \ ++static inline reg_t name##q(reg_t arg0, reg_t arg1) \ + { \ +- uint64_t result64; \ ++ reg_t result64; \ + asm volatile (#name "q %2, %1, %0" : "=r"(result64) : c0(arg0), c1(arg1)); \ + return result64; \ + } + + #define insn2l(name, arg0, c0, arg1, c1) \ +-static inline uint32_t name##l(uint32_t arg0, uint32_t arg1) \ ++static inline reg_t name##l(reg_t arg0, reg_t arg1) \ + { \ +- uint32_t result32; \ ++ reg_t result32; \ + asm volatile (#name "l %k2, %k1, %k0" : "=r"(result32) : c0(arg0), c1(arg1)); \ + return result32; \ + } +@@ -65,130 +71,128 @@ insn1l(blsr, src) + int main(int argc, char *argv[]) { + uint64_t ehlo = 0x202020204f4c4845ull; + uint64_t mask = 0xa080800302020001ull; +- uint32_t result32; ++ reg_t result; + + #ifdef __x86_64 +- uint64_t result64; +- + /* 64 bits */ +- result64 = andnq(mask, ehlo); +- assert(result64 == 0x002020204d4c4844); ++ result = andnq(mask, ehlo); ++ assert(result == 0x002020204d4c4844); + +- result64 = pextq(ehlo, mask); +- assert(result64 == 133); ++ result = pextq(ehlo, mask); ++ assert(result == 133); + +- result64 = pdepq(result64, mask); +- assert(result64 == (ehlo & mask)); ++ result = pdepq(result, mask); ++ assert(result == (ehlo & mask)); + +- result64 = pextq(-1ull, mask); +- assert(result64 == 511); /* mask has 9 bits set */ ++ result = pextq(-1ull, mask); ++ assert(result == 511); /* mask has 9 bits set */ + +- result64 = pdepq(-1ull, mask); +- assert(result64 == mask); ++ result = pdepq(-1ull, mask); ++ assert(result == mask); + +- result64 = bextrq(mask, 0x3f00); +- assert(result64 == (mask & ~INT64_MIN)); ++ result = bextrq(mask, 0x3f00); ++ assert(result == (mask & ~INT64_MIN)); + +- result64 = bextrq(mask, 0x1038); +- assert(result64 == 0xa0); ++ result = bextrq(mask, 0x1038); ++ assert(result == 0xa0); + +- result64 = bextrq(mask, 0x10f8); +- assert(result64 == 0); ++ result = bextrq(mask, 0x10f8); ++ assert(result == 0); + +- result64 = blsiq(0x30); +- assert(result64 == 0x10); ++ result = blsiq(0x30); ++ assert(result == 0x10); + +- result64 = blsiq(0x30ull << 32); +- assert(result64 == 0x10ull << 32); ++ result = blsiq(0x30ull << 32); ++ assert(result == 0x10ull << 32); + +- result64 = blsmskq(0x30); +- assert(result64 == 0x1f); ++ result = blsmskq(0x30); ++ assert(result == 0x1f); + +- result64 = blsrq(0x30); +- assert(result64 == 0x20); ++ result = blsrq(0x30); ++ assert(result == 0x20); + +- result64 = blsrq(0x30ull << 32); +- assert(result64 == 0x20ull << 32); ++ result = blsrq(0x30ull << 32); ++ assert(result == 0x20ull << 32); + +- result64 = bzhiq(mask, 0x3f); +- assert(result64 == (mask & ~INT64_MIN)); ++ result = bzhiq(mask, 0x3f); ++ assert(result == (mask & ~INT64_MIN)); + +- result64 = bzhiq(mask, 0x1f); +- assert(result64 == (mask & ~(-1 << 30))); ++ result = bzhiq(mask, 0x1f); ++ assert(result == (mask & ~(-1 << 30))); + +- result64 = rorxq(0x2132435465768798, 8); +- assert(result64 == 0x9821324354657687); ++ result = rorxq(0x2132435465768798, 8); ++ assert(result == 0x9821324354657687); + +- result64 = sarxq(0xffeeddccbbaa9988, 8); +- assert(result64 == 0xffffeeddccbbaa99); ++ result = sarxq(0xffeeddccbbaa9988, 8); ++ assert(result == 0xffffeeddccbbaa99); + +- result64 = sarxq(0x77eeddccbbaa9988, 8 | 64); +- assert(result64 == 0x0077eeddccbbaa99); ++ result = sarxq(0x77eeddccbbaa9988, 8 | 64); ++ assert(result == 0x0077eeddccbbaa99); + +- result64 = shrxq(0xffeeddccbbaa9988, 8); +- assert(result64 == 0x00ffeeddccbbaa99); ++ result = shrxq(0xffeeddccbbaa9988, 8); ++ assert(result == 0x00ffeeddccbbaa99); + +- result64 = shrxq(0x77eeddccbbaa9988, 8 | 192); +- assert(result64 == 0x0077eeddccbbaa99); ++ result = shrxq(0x77eeddccbbaa9988, 8 | 192); ++ assert(result == 0x0077eeddccbbaa99); + +- result64 = shlxq(0xffeeddccbbaa9988, 8); +- assert(result64 == 0xeeddccbbaa998800); ++ result = shlxq(0xffeeddccbbaa9988, 8); ++ assert(result == 0xeeddccbbaa998800); + #endif + + /* 32 bits */ +- result32 = andnl(mask, ehlo); +- assert(result32 == 0x04d4c4844); ++ result = andnl(mask, ehlo); ++ assert(result == 0x04d4c4844); + +- result32 = pextl((uint32_t) ehlo, mask); +- assert(result32 == 5); ++ result = pextl((uint32_t) ehlo, mask); ++ assert(result == 5); + +- result32 = pdepl(result32, mask); +- assert(result32 == (uint32_t)(ehlo & mask)); ++ result = pdepl(result, mask); ++ assert(result == (uint32_t)(ehlo & mask)); + +- result32 = pextl(-1u, mask); +- assert(result32 == 7); /* mask has 3 bits set */ ++ result = pextl(-1u, mask); ++ assert(result == 7); /* mask has 3 bits set */ + +- result32 = pdepl(-1u, mask); +- assert(result32 == (uint32_t)mask); ++ result = pdepl(-1u, mask); ++ assert(result == (uint32_t)mask); + +- result32 = bextrl(mask, 0x1f00); +- assert(result32 == (mask & ~INT32_MIN)); ++ result = bextrl(mask, 0x1f00); ++ assert(result == (mask & ~INT32_MIN)); + +- result32 = bextrl(ehlo, 0x1018); +- assert(result32 == 0x4f); ++ result = bextrl(ehlo, 0x1018); ++ assert(result == 0x4f); + +- result32 = bextrl(mask, 0x1038); +- assert(result32 == 0); ++ result = bextrl(mask, 0x1038); ++ assert(result == 0); + +- result32 = blsil(0xffff); +- assert(result32 == 1); ++ result = blsil(0xffff); ++ assert(result == 1); + +- result32 = blsmskl(0x300); +- assert(result32 == 0x1ff); ++ result = blsmskl(0x300); ++ assert(result == 0x1ff); + +- result32 = blsrl(0xffc); +- assert(result32 == 0xff8); ++ result = blsrl(0xffc); ++ assert(result == 0xff8); + +- result32 = bzhil(mask, 0xf); +- assert(result32 == 1); ++ result = bzhil(mask, 0xf); ++ assert(result == 1); + +- result32 = rorxl(0x65768798, 8); +- assert(result32 == 0x98657687); ++ result = rorxl(0x65768798, 8); ++ assert(result == 0x98657687); + +- result32 = sarxl(0xffeeddcc, 8); +- assert(result32 == 0xffffeedd); ++ result = sarxl(0xffeeddcc, 8); ++ assert(result == 0xffffeedd); + +- result32 = sarxl(0x77eeddcc, 8 | 32); +- assert(result32 == 0x0077eedd); ++ result = sarxl(0x77eeddcc, 8 | 32); ++ assert(result == 0x0077eedd); + +- result32 = shrxl(0xffeeddcc, 8); +- assert(result32 == 0x00ffeedd); ++ result = shrxl(0xffeeddcc, 8); ++ assert(result == 0x00ffeedd); + +- result32 = shrxl(0x77eeddcc, 8 | 128); +- assert(result32 == 0x0077eedd); ++ result = shrxl(0x77eeddcc, 8 | 128); ++ assert(result == 0x0077eedd); + +- result32 = shlxl(0xffeeddcc, 8); +- assert(result32 == 0xeeddcc00); ++ result = shlxl(0xffeeddcc, 8); ++ assert(result == 0xeeddcc00); + + return 0; + } +-- +2.39.1 + diff --git a/SOURCES/kvm-util-Return-void-on-iova_tree_remove.patch b/SOURCES/kvm-util-Return-void-on-iova_tree_remove.patch deleted file mode 100644 index 07c6f8e..0000000 --- a/SOURCES/kvm-util-Return-void-on-iova_tree_remove.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 74c829f82eafa8e42ae94f7ace55c8aaed3bb5f4 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Wed, 27 Apr 2022 17:49:31 +0200 -Subject: [PATCH 05/23] util: Return void on iova_tree_remove -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/21] 252287acca896eba7b5d2b62fc6247cfc565ba57 (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: Merged - -It always returns IOVA_OK so nobody uses it. - -Acked-by: Jason Wang -Reviewed-by: Peter Xu -Signed-off-by: Eugenio Pérez -Message-Id: <20220427154931.3166388-1-eperezma@redhat.com> -Signed-off-by: Laurent Vivier -(cherry picked from commit 832fef7cc14d65f99d523f883ef384014e6476a7) ---- - include/qemu/iova-tree.h | 4 +--- - util/iova-tree.c | 4 +--- - 2 files changed, 2 insertions(+), 6 deletions(-) - -diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h -index c938fb0793..16bbfdf5f8 100644 ---- a/include/qemu/iova-tree.h -+++ b/include/qemu/iova-tree.h -@@ -72,10 +72,8 @@ int iova_tree_insert(IOVATree *tree, const DMAMap *map); - * provided. The range does not need to be exactly what has inserted, - * all the mappings that are included in the provided range will be - * removed from the tree. Here map->translated_addr is meaningless. -- * -- * Return: 0 if succeeded, or <0 if error. - */ --int iova_tree_remove(IOVATree *tree, const DMAMap *map); -+void iova_tree_remove(IOVATree *tree, const DMAMap *map); - - /** - * iova_tree_find: -diff --git a/util/iova-tree.c b/util/iova-tree.c -index 6dff29c1f6..fee530a579 100644 ---- a/util/iova-tree.c -+++ b/util/iova-tree.c -@@ -164,15 +164,13 @@ void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator) - g_tree_foreach(tree->tree, iova_tree_traverse, iterator); - } - --int iova_tree_remove(IOVATree *tree, const DMAMap *map) -+void iova_tree_remove(IOVATree *tree, const DMAMap *map) - { - const DMAMap *overlap; - - while ((overlap = iova_tree_find(tree, map))) { - g_tree_remove(tree->tree, overlap); - } -- -- return IOVA_OK; - } - - /** --- -2.31.1 - diff --git a/SOURCES/kvm-util-accept-iova_tree_remove_parameter-by-value.patch b/SOURCES/kvm-util-accept-iova_tree_remove_parameter-by-value.patch deleted file mode 100644 index cd073da..0000000 --- a/SOURCES/kvm-util-accept-iova_tree_remove_parameter-by-value.patch +++ /dev/null @@ -1,182 +0,0 @@ -From 90697579eaf598614293d75f684d6e8c55f8ab9b Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:20:04 +0200 -Subject: [PATCH 06/23] util: accept iova_tree_remove_parameter by value -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/21] ddaf052789e7ab3c67a77c038347113301587ffb (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -It's convenient to call iova_tree_remove from a map returned from -iova_tree_find or iova_tree_find_iova. With the current code this is not -possible, since we will free it, and then we will try to search for it -again. - -Fix it making accepting the map by value, forcing a copy of the -argument. Not applying a fixes tag, since there is no use like that at -the moment. - -Signed-off-by: Eugenio Pérez -Signed-off-by: Jason Wang -(cherry picked from commit d69ba6677405de86b3b617fc7688b549f84cf013) ---- - hw/i386/intel_iommu.c | 6 +++--- - hw/virtio/vhost-iova-tree.c | 2 +- - hw/virtio/vhost-iova-tree.h | 2 +- - hw/virtio/vhost-vdpa.c | 6 +++--- - include/qemu/iova-tree.h | 2 +- - net/vhost-vdpa.c | 4 ++-- - util/iova-tree.c | 4 ++-- - 7 files changed, 13 insertions(+), 13 deletions(-) - -diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c -index c64aa81a83..6738cf0929 100644 ---- a/hw/i386/intel_iommu.c -+++ b/hw/i386/intel_iommu.c -@@ -1157,7 +1157,7 @@ static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info) - return ret; - } - /* Drop any existing mapping */ -- iova_tree_remove(as->iova_tree, &target); -+ iova_tree_remove(as->iova_tree, target); - /* Recover the correct type */ - event->type = IOMMU_NOTIFIER_MAP; - entry->perm = cache_perm; -@@ -1170,7 +1170,7 @@ static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info) - trace_vtd_page_walk_one_skip_unmap(entry->iova, entry->addr_mask); - return 0; - } -- iova_tree_remove(as->iova_tree, &target); -+ iova_tree_remove(as->iova_tree, target); - } - - trace_vtd_page_walk_one(info->domain_id, entry->iova, -@@ -3532,7 +3532,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) - - map.iova = n->start; - map.size = size; -- iova_tree_remove(as->iova_tree, &map); -+ iova_tree_remove(as->iova_tree, map); - } - - static void vtd_address_space_unmap_all(IntelIOMMUState *s) -diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c -index 55fed1fefb..1339a4de8b 100644 ---- a/hw/virtio/vhost-iova-tree.c -+++ b/hw/virtio/vhost-iova-tree.c -@@ -104,7 +104,7 @@ int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map) - * @iova_tree: The vhost iova tree - * @map: The map to remove - */ --void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map) -+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map) - { - iova_tree_remove(iova_tree->iova_taddr_map, map); - } -diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h -index 6a4f24e0f9..4adfd79ff0 100644 ---- a/hw/virtio/vhost-iova-tree.h -+++ b/hw/virtio/vhost-iova-tree.h -@@ -22,6 +22,6 @@ G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete); - const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree, - const DMAMap *map); - int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map); --void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map); -+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map); - - #endif -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index cc15b7d8ee..39aa70f52d 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -238,7 +238,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, - - fail_map: - if (v->shadow_vqs_enabled) { -- vhost_iova_tree_remove(v->iova_tree, &mem_region); -+ vhost_iova_tree_remove(v->iova_tree, mem_region); - } - - fail: -@@ -298,7 +298,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, - return; - } - iova = result->iova; -- vhost_iova_tree_remove(v->iova_tree, result); -+ vhost_iova_tree_remove(v->iova_tree, *result); - } - vhost_vdpa_iotlb_batch_begin_once(v); - ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); -@@ -942,7 +942,7 @@ static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, - needle->perm == IOMMU_RO); - if (unlikely(r != 0)) { - error_setg_errno(errp, -r, "Cannot map region to device"); -- vhost_iova_tree_remove(v->iova_tree, needle); -+ vhost_iova_tree_remove(v->iova_tree, *needle); - } - - return r == 0; -diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h -index 16bbfdf5f8..8528e5c98f 100644 ---- a/include/qemu/iova-tree.h -+++ b/include/qemu/iova-tree.h -@@ -73,7 +73,7 @@ int iova_tree_insert(IOVATree *tree, const DMAMap *map); - * all the mappings that are included in the provided range will be - * removed from the tree. Here map->translated_addr is meaningless. - */ --void iova_tree_remove(IOVATree *tree, const DMAMap *map); -+void iova_tree_remove(IOVATree *tree, DMAMap map); - - /** - * iova_tree_find: -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 411e71e6c2..ba65736f83 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -244,7 +244,7 @@ static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) - error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); - } - -- vhost_iova_tree_remove(tree, map); -+ vhost_iova_tree_remove(tree, *map); - } - - static size_t vhost_vdpa_net_cvq_cmd_len(void) -@@ -297,7 +297,7 @@ static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, - return true; - - dma_map_err: -- vhost_iova_tree_remove(v->iova_tree, &map); -+ vhost_iova_tree_remove(v->iova_tree, map); - return false; - } - -diff --git a/util/iova-tree.c b/util/iova-tree.c -index fee530a579..536789797e 100644 ---- a/util/iova-tree.c -+++ b/util/iova-tree.c -@@ -164,11 +164,11 @@ void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator) - g_tree_foreach(tree->tree, iova_tree_traverse, iterator); - } - --void iova_tree_remove(IOVATree *tree, const DMAMap *map) -+void iova_tree_remove(IOVATree *tree, DMAMap map) - { - const DMAMap *overlap; - -- while ((overlap = iova_tree_find(tree, map))) { -+ while ((overlap = iova_tree_find(tree, &map))) { - g_tree_remove(tree->tree, overlap); - } - } --- -2.31.1 - diff --git a/SOURCES/kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch b/SOURCES/kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch deleted file mode 100644 index 77929a6..0000000 --- a/SOURCES/kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch +++ /dev/null @@ -1,385 +0,0 @@ -From 7a6fa42d4a4263c94b9bf18290f9e7680ea9e7f4 Mon Sep 17 00:00:00 2001 -From: Nicolas Saenz Julienne -Date: Mon, 25 Apr 2022 09:57:23 +0200 -Subject: [PATCH 03/16] util/event-loop-base: Introduce options to set the - thread pool size - -RH-Author: Nicolas Saenz Julienne -RH-MergeRequest: 93: util/thread-pool: Expose minimum and maximum size -RH-Commit: [3/3] af78a88ff3c69701cbb5f9e980c3d6ebbd13ff98 -RH-Bugzilla: 2031024 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi - -The thread pool regulates itself: when idle, it kills threads until -empty, when in demand, it creates new threads until full. This behaviour -doesn't play well with latency sensitive workloads where the price of -creating a new thread is too high. For example, when paired with qemu's -'-mlock', or using safety features like SafeStack, creating a new thread -has been measured take multiple milliseconds. - -In order to mitigate this let's introduce a new 'EventLoopBase' -property to set the thread pool size. The threads will be created during -the pool's initialization or upon updating the property's value, remain -available during its lifetime regardless of demand, and destroyed upon -freeing it. A properly characterized workload will then be able to -configure the pool to avoid any latency spikes. - -Signed-off-by: Nicolas Saenz Julienne -Reviewed-by: Stefan Hajnoczi -Acked-by: Markus Armbruster -Message-id: 20220425075723.20019-4-nsaenzju@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 71ad4713cc1d7fca24388b828ef31ae6cb38a31c) ---- - event-loop-base.c | 23 +++++++++++++ - include/block/aio.h | 10 ++++++ - include/block/thread-pool.h | 3 ++ - include/sysemu/event-loop-base.h | 4 +++ - iothread.c | 3 ++ - qapi/qom.json | 10 +++++- - util/aio-posix.c | 1 + - util/async.c | 20 ++++++++++++ - util/main-loop.c | 9 ++++++ - util/thread-pool.c | 55 +++++++++++++++++++++++++++++--- - 10 files changed, 133 insertions(+), 5 deletions(-) - -diff --git a/event-loop-base.c b/event-loop-base.c -index e7f99a6ec8..d5be4dc6fc 100644 ---- a/event-loop-base.c -+++ b/event-loop-base.c -@@ -14,6 +14,7 @@ - #include "qemu/osdep.h" - #include "qom/object_interfaces.h" - #include "qapi/error.h" -+#include "block/thread-pool.h" - #include "sysemu/event-loop-base.h" - - typedef struct { -@@ -21,9 +22,22 @@ typedef struct { - ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */ - } EventLoopBaseParamInfo; - -+static void event_loop_base_instance_init(Object *obj) -+{ -+ EventLoopBase *base = EVENT_LOOP_BASE(obj); -+ -+ base->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT; -+} -+ - static EventLoopBaseParamInfo aio_max_batch_info = { - "aio-max-batch", offsetof(EventLoopBase, aio_max_batch), - }; -+static EventLoopBaseParamInfo thread_pool_min_info = { -+ "thread-pool-min", offsetof(EventLoopBase, thread_pool_min), -+}; -+static EventLoopBaseParamInfo thread_pool_max_info = { -+ "thread-pool-max", offsetof(EventLoopBase, thread_pool_max), -+}; - - static void event_loop_base_get_param(Object *obj, Visitor *v, - const char *name, void *opaque, Error **errp) -@@ -95,12 +109,21 @@ static void event_loop_base_class_init(ObjectClass *klass, void *class_data) - event_loop_base_get_param, - event_loop_base_set_param, - NULL, &aio_max_batch_info); -+ object_class_property_add(klass, "thread-pool-min", "int", -+ event_loop_base_get_param, -+ event_loop_base_set_param, -+ NULL, &thread_pool_min_info); -+ object_class_property_add(klass, "thread-pool-max", "int", -+ event_loop_base_get_param, -+ event_loop_base_set_param, -+ NULL, &thread_pool_max_info); - } - - static const TypeInfo event_loop_base_info = { - .name = TYPE_EVENT_LOOP_BASE, - .parent = TYPE_OBJECT, - .instance_size = sizeof(EventLoopBase), -+ .instance_init = event_loop_base_instance_init, - .class_size = sizeof(EventLoopBaseClass), - .class_init = event_loop_base_class_init, - .abstract = true, -diff --git a/include/block/aio.h b/include/block/aio.h -index 5634173b12..d128558f1d 100644 ---- a/include/block/aio.h -+++ b/include/block/aio.h -@@ -192,6 +192,8 @@ struct AioContext { - QSLIST_HEAD(, Coroutine) scheduled_coroutines; - QEMUBH *co_schedule_bh; - -+ int thread_pool_min; -+ int thread_pool_max; - /* Thread pool for performing work and receiving completion callbacks. - * Has its own locking. - */ -@@ -769,4 +771,12 @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, - void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch, - Error **errp); - -+/** -+ * aio_context_set_thread_pool_params: -+ * @ctx: the aio context -+ * @min: min number of threads to have readily available in the thread pool -+ * @min: max number of threads the thread pool can contain -+ */ -+void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min, -+ int64_t max, Error **errp); - #endif -diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h -index 7dd7d730a0..2020bcc92d 100644 ---- a/include/block/thread-pool.h -+++ b/include/block/thread-pool.h -@@ -20,6 +20,8 @@ - - #include "block/block.h" - -+#define THREAD_POOL_MAX_THREADS_DEFAULT 64 -+ - typedef int ThreadPoolFunc(void *opaque); - - typedef struct ThreadPool ThreadPool; -@@ -33,5 +35,6 @@ BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool, - int coroutine_fn thread_pool_submit_co(ThreadPool *pool, - ThreadPoolFunc *func, void *arg); - void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg); -+void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx); - - #endif -diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h -index fced4c9fea..2748bf6ae1 100644 ---- a/include/sysemu/event-loop-base.h -+++ b/include/sysemu/event-loop-base.h -@@ -33,5 +33,9 @@ struct EventLoopBase { - - /* AioContext AIO engine parameters */ - int64_t aio_max_batch; -+ -+ /* AioContext thread pool parameters */ -+ int64_t thread_pool_min; -+ int64_t thread_pool_max; - }; - #endif -diff --git a/iothread.c b/iothread.c -index 8fa2f3bfb8..529194a566 100644 ---- a/iothread.c -+++ b/iothread.c -@@ -174,6 +174,9 @@ static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp) - aio_context_set_aio_params(iothread->ctx, - iothread->parent_obj.aio_max_batch, - errp); -+ -+ aio_context_set_thread_pool_params(iothread->ctx, base->thread_pool_min, -+ base->thread_pool_max, errp); - } - - -diff --git a/qapi/qom.json b/qapi/qom.json -index 7d4a2ac1b9..6a653c6636 100644 ---- a/qapi/qom.json -+++ b/qapi/qom.json -@@ -508,10 +508,18 @@ - # 0 means that the engine will use its default. - # (default: 0) - # -+# @thread-pool-min: minimum number of threads reserved in the thread pool -+# (default:0) -+# -+# @thread-pool-max: maximum number of threads the thread pool can contain -+# (default:64) -+# - # Since: 7.1 - ## - { 'struct': 'EventLoopBaseProperties', -- 'data': { '*aio-max-batch': 'int' } } -+ 'data': { '*aio-max-batch': 'int', -+ '*thread-pool-min': 'int', -+ '*thread-pool-max': 'int' } } - - ## - # @IothreadProperties: -diff --git a/util/aio-posix.c b/util/aio-posix.c -index be0182a3c6..731f3826c0 100644 ---- a/util/aio-posix.c -+++ b/util/aio-posix.c -@@ -15,6 +15,7 @@ - - #include "qemu/osdep.h" - #include "block/block.h" -+#include "block/thread-pool.h" - #include "qemu/main-loop.h" - #include "qemu/rcu.h" - #include "qemu/rcu_queue.h" -diff --git a/util/async.c b/util/async.c -index 2ea1172f3e..554ba70cca 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -563,6 +563,9 @@ AioContext *aio_context_new(Error **errp) - - ctx->aio_max_batch = 0; - -+ ctx->thread_pool_min = 0; -+ ctx->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT; -+ - return ctx; - fail: - g_source_destroy(&ctx->source); -@@ -696,3 +699,20 @@ void qemu_set_current_aio_context(AioContext *ctx) - assert(!get_my_aiocontext()); - set_my_aiocontext(ctx); - } -+ -+void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min, -+ int64_t max, Error **errp) -+{ -+ -+ if (min > max || !max || min > INT_MAX || max > INT_MAX) { -+ error_setg(errp, "bad thread-pool-min/thread-pool-max values"); -+ return; -+ } -+ -+ ctx->thread_pool_min = min; -+ ctx->thread_pool_max = max; -+ -+ if (ctx->thread_pool) { -+ thread_pool_update_params(ctx->thread_pool, ctx); -+ } -+} -diff --git a/util/main-loop.c b/util/main-loop.c -index 5b13f456fa..a0f48186ab 100644 ---- a/util/main-loop.c -+++ b/util/main-loop.c -@@ -30,6 +30,7 @@ - #include "sysemu/replay.h" - #include "qemu/main-loop.h" - #include "block/aio.h" -+#include "block/thread-pool.h" - #include "qemu/error-report.h" - #include "qemu/queue.h" - #include "qemu/compiler.h" -@@ -187,12 +188,20 @@ int qemu_init_main_loop(Error **errp) - - static void main_loop_update_params(EventLoopBase *base, Error **errp) - { -+ ERRP_GUARD(); -+ - if (!qemu_aio_context) { - error_setg(errp, "qemu aio context not ready"); - return; - } - - aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch, errp); -+ if (*errp) { -+ return; -+ } -+ -+ aio_context_set_thread_pool_params(qemu_aio_context, base->thread_pool_min, -+ base->thread_pool_max, errp); - } - - MainLoop *mloop; -diff --git a/util/thread-pool.c b/util/thread-pool.c -index d763cea505..196835b4d3 100644 ---- a/util/thread-pool.c -+++ b/util/thread-pool.c -@@ -58,7 +58,6 @@ struct ThreadPool { - QemuMutex lock; - QemuCond worker_stopped; - QemuSemaphore sem; -- int max_threads; - QEMUBH *new_thread_bh; - - /* The following variables are only accessed from one AioContext. */ -@@ -71,8 +70,27 @@ struct ThreadPool { - int new_threads; /* backlog of threads we need to create */ - int pending_threads; /* threads created but not running yet */ - bool stopping; -+ int min_threads; -+ int max_threads; - }; - -+static inline bool back_to_sleep(ThreadPool *pool, int ret) -+{ -+ /* -+ * The semaphore timed out, we should exit the loop except when: -+ * - There is work to do, we raced with the signal. -+ * - The max threads threshold just changed, we raced with the signal. -+ * - The thread pool forces a minimum number of readily available threads. -+ */ -+ if (ret == -1 && (!QTAILQ_EMPTY(&pool->request_list) || -+ pool->cur_threads > pool->max_threads || -+ pool->cur_threads <= pool->min_threads)) { -+ return true; -+ } -+ -+ return false; -+} -+ - static void *worker_thread(void *opaque) - { - ThreadPool *pool = opaque; -@@ -91,8 +109,9 @@ static void *worker_thread(void *opaque) - ret = qemu_sem_timedwait(&pool->sem, 10000); - qemu_mutex_lock(&pool->lock); - pool->idle_threads--; -- } while (ret == -1 && !QTAILQ_EMPTY(&pool->request_list)); -- if (ret == -1 || pool->stopping) { -+ } while (back_to_sleep(pool, ret)); -+ if (ret == -1 || pool->stopping || -+ pool->cur_threads > pool->max_threads) { - break; - } - -@@ -294,6 +313,33 @@ void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg) - thread_pool_submit_aio(pool, func, arg, NULL, NULL); - } - -+void thread_pool_update_params(ThreadPool *pool, AioContext *ctx) -+{ -+ qemu_mutex_lock(&pool->lock); -+ -+ pool->min_threads = ctx->thread_pool_min; -+ pool->max_threads = ctx->thread_pool_max; -+ -+ /* -+ * We either have to: -+ * - Increase the number available of threads until over the min_threads -+ * threshold. -+ * - Decrease the number of available threads until under the max_threads -+ * threshold. -+ * - Do nothing. The current number of threads fall in between the min and -+ * max thresholds. We'll let the pool manage itself. -+ */ -+ for (int i = pool->cur_threads; i < pool->min_threads; i++) { -+ spawn_thread(pool); -+ } -+ -+ for (int i = pool->cur_threads; i > pool->max_threads; i--) { -+ qemu_sem_post(&pool->sem); -+ } -+ -+ qemu_mutex_unlock(&pool->lock); -+} -+ - static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx) - { - if (!ctx) { -@@ -306,11 +352,12 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx) - qemu_mutex_init(&pool->lock); - qemu_cond_init(&pool->worker_stopped); - qemu_sem_init(&pool->sem, 0); -- pool->max_threads = 64; - pool->new_thread_bh = aio_bh_new(ctx, spawn_thread_bh_fn, pool); - - QLIST_INIT(&pool->head); - QTAILQ_INIT(&pool->request_list); -+ -+ thread_pool_update_params(pool, ctx); - } - - ThreadPool *thread_pool_new(AioContext *ctx) --- -2.31.1 - diff --git a/SOURCES/kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch b/SOURCES/kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch deleted file mode 100644 index 2104424..0000000 --- a/SOURCES/kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch +++ /dev/null @@ -1,233 +0,0 @@ -From b4969662de01848f887a3918e97e516efc213f71 Mon Sep 17 00:00:00 2001 -From: Nicolas Saenz Julienne -Date: Mon, 25 Apr 2022 09:57:22 +0200 -Subject: [PATCH 02/16] util/main-loop: Introduce the main loop into QOM - -RH-Author: Nicolas Saenz Julienne -RH-MergeRequest: 93: util/thread-pool: Expose minimum and maximum size -RH-Commit: [2/3] a481b77e25ad50d13dcbe26b36c551b18c89bddd -RH-Bugzilla: 2031024 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi - -'event-loop-base' provides basic property handling for all 'AioContext' -based event loops. So let's define a new 'MainLoopClass' that inherits -from it. This will permit tweaking the main loop's properties through -qapi as well as through the command line using the '-object' keyword[1]. -Only one instance of 'MainLoopClass' might be created at any time. - -'EventLoopBaseClass' learns a new callback, 'can_be_deleted()' so as to -mark 'MainLoop' as non-deletable. - -[1] For example: - -object main-loop,id=main-loop,aio-max-batch= - -Signed-off-by: Nicolas Saenz Julienne -Reviewed-by: Stefan Hajnoczi -Acked-by: Markus Armbruster -Message-id: 20220425075723.20019-3-nsaenzju@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 70ac26b9e5ca8374bb3ef3f30b871726673c9f27) ---- - event-loop-base.c | 13 ++++++++ - include/qemu/main-loop.h | 10 ++++++ - include/sysemu/event-loop-base.h | 1 + - meson.build | 3 +- - qapi/qom.json | 13 ++++++++ - util/main-loop.c | 56 ++++++++++++++++++++++++++++++++ - 6 files changed, 95 insertions(+), 1 deletion(-) - -diff --git a/event-loop-base.c b/event-loop-base.c -index a924c73a7c..e7f99a6ec8 100644 ---- a/event-loop-base.c -+++ b/event-loop-base.c -@@ -73,10 +73,23 @@ static void event_loop_base_complete(UserCreatable *uc, Error **errp) - } - } - -+static bool event_loop_base_can_be_deleted(UserCreatable *uc) -+{ -+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc); -+ EventLoopBase *backend = EVENT_LOOP_BASE(uc); -+ -+ if (bc->can_be_deleted) { -+ return bc->can_be_deleted(backend); -+ } -+ -+ return true; -+} -+ - static void event_loop_base_class_init(ObjectClass *klass, void *class_data) - { - UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); - ucc->complete = event_loop_base_complete; -+ ucc->can_be_deleted = event_loop_base_can_be_deleted; - - object_class_property_add(klass, "aio-max-batch", "int", - event_loop_base_get_param, -diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h -index d3750c8e76..20c9387654 100644 ---- a/include/qemu/main-loop.h -+++ b/include/qemu/main-loop.h -@@ -26,9 +26,19 @@ - #define QEMU_MAIN_LOOP_H - - #include "block/aio.h" -+#include "qom/object.h" -+#include "sysemu/event-loop-base.h" - - #define SIG_IPI SIGUSR1 - -+#define TYPE_MAIN_LOOP "main-loop" -+OBJECT_DECLARE_TYPE(MainLoop, MainLoopClass, MAIN_LOOP) -+ -+struct MainLoop { -+ EventLoopBase parent_obj; -+}; -+typedef struct MainLoop MainLoop; -+ - /** - * qemu_init_main_loop: Set up the process so that it can run the main loop. - * -diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h -index 8e77d8b69f..fced4c9fea 100644 ---- a/include/sysemu/event-loop-base.h -+++ b/include/sysemu/event-loop-base.h -@@ -25,6 +25,7 @@ struct EventLoopBaseClass { - - void (*init)(EventLoopBase *base, Error **errp); - void (*update_params)(EventLoopBase *base, Error **errp); -+ bool (*can_be_deleted)(EventLoopBase *base); - }; - - struct EventLoopBase { -diff --git a/meson.build b/meson.build -index b9c919a55e..5a7c10e639 100644 ---- a/meson.build -+++ b/meson.build -@@ -2832,7 +2832,8 @@ libqemuutil = static_library('qemuutil', - sources: util_ss.sources() + stub_ss.sources() + genh, - dependencies: [util_ss.dependencies(), libm, threads, glib, socket, malloc, pixman]) - qemuutil = declare_dependency(link_with: libqemuutil, -- sources: genh + version_res) -+ sources: genh + version_res, -+ dependencies: [event_loop_base]) - - if have_system or have_user - decodetree = generator(find_program('scripts/decodetree.py'), -diff --git a/qapi/qom.json b/qapi/qom.json -index a2439533c5..7d4a2ac1b9 100644 ---- a/qapi/qom.json -+++ b/qapi/qom.json -@@ -540,6 +540,17 @@ - '*poll-grow': 'int', - '*poll-shrink': 'int' } } - -+## -+# @MainLoopProperties: -+# -+# Properties for the main-loop object. -+# -+# Since: 7.1 -+## -+{ 'struct': 'MainLoopProperties', -+ 'base': 'EventLoopBaseProperties', -+ 'data': {} } -+ - ## - # @MemoryBackendProperties: - # -@@ -830,6 +841,7 @@ - { 'name': 'input-linux', - 'if': 'CONFIG_LINUX' }, - 'iothread', -+ 'main-loop', - { 'name': 'memory-backend-epc', - 'if': 'CONFIG_LINUX' }, - 'memory-backend-file', -@@ -895,6 +907,7 @@ - 'input-linux': { 'type': 'InputLinuxProperties', - 'if': 'CONFIG_LINUX' }, - 'iothread': 'IothreadProperties', -+ 'main-loop': 'MainLoopProperties', - 'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties', - 'if': 'CONFIG_LINUX' }, - 'memory-backend-file': 'MemoryBackendFileProperties', -diff --git a/util/main-loop.c b/util/main-loop.c -index b7b0ce4ca0..5b13f456fa 100644 ---- a/util/main-loop.c -+++ b/util/main-loop.c -@@ -33,6 +33,7 @@ - #include "qemu/error-report.h" - #include "qemu/queue.h" - #include "qemu/compiler.h" -+#include "qom/object.h" - - #ifndef _WIN32 - #include -@@ -184,6 +185,61 @@ int qemu_init_main_loop(Error **errp) - return 0; - } - -+static void main_loop_update_params(EventLoopBase *base, Error **errp) -+{ -+ if (!qemu_aio_context) { -+ error_setg(errp, "qemu aio context not ready"); -+ return; -+ } -+ -+ aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch, errp); -+} -+ -+MainLoop *mloop; -+ -+static void main_loop_init(EventLoopBase *base, Error **errp) -+{ -+ MainLoop *m = MAIN_LOOP(base); -+ -+ if (mloop) { -+ error_setg(errp, "only one main-loop instance allowed"); -+ return; -+ } -+ -+ main_loop_update_params(base, errp); -+ -+ mloop = m; -+ return; -+} -+ -+static bool main_loop_can_be_deleted(EventLoopBase *base) -+{ -+ return false; -+} -+ -+static void main_loop_class_init(ObjectClass *oc, void *class_data) -+{ -+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(oc); -+ -+ bc->init = main_loop_init; -+ bc->update_params = main_loop_update_params; -+ bc->can_be_deleted = main_loop_can_be_deleted; -+} -+ -+static const TypeInfo main_loop_info = { -+ .name = TYPE_MAIN_LOOP, -+ .parent = TYPE_EVENT_LOOP_BASE, -+ .class_init = main_loop_class_init, -+ .instance_size = sizeof(MainLoop), -+}; -+ -+static void main_loop_register_types(void) -+{ -+ type_register_static(&main_loop_info); -+} -+ -+type_init(main_loop_register_types) -+ - static int max_priority; - - #ifndef _WIN32 --- -2.31.1 - diff --git a/SOURCES/kvm-util-userfaultfd-Add-uffd_open.patch b/SOURCES/kvm-util-userfaultfd-Add-uffd_open.patch new file mode 100644 index 0000000..5a5f90c --- /dev/null +++ b/SOURCES/kvm-util-userfaultfd-Add-uffd_open.patch @@ -0,0 +1,169 @@ +From 80445fed73a7d1a87e8ce96f6cb7d505e437f845 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 1 Feb 2023 16:10:54 -0500 +Subject: [PATCH 4/8] util/userfaultfd: Add uffd_open() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 149: Support /dev/userfaultfd +RH-Bugzilla: 2158704 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: quintela1 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/3] 4c81696314ab26db47c3415fa2c2501c6a572b5c (peterx/qemu-kvm) + +Add a helper to create the uffd handle. + +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Juan Quintela +Signed-off-by: Peter Xu +Signed-off-by: Juan Quintela +(cherry picked from commit d5890ea0722831eea76a0efd23a496b3e8815fe8) +Signed-off-by: Peter Xu +--- + include/qemu/userfaultfd.h | 12 ++++++++++++ + migration/postcopy-ram.c | 11 +++++------ + tests/qtest/migration-test.c | 4 ++-- + util/userfaultfd.c | 13 +++++++++++-- + 4 files changed, 30 insertions(+), 10 deletions(-) + +diff --git a/include/qemu/userfaultfd.h b/include/qemu/userfaultfd.h +index 6b74f92792..d764496f0b 100644 +--- a/include/qemu/userfaultfd.h ++++ b/include/qemu/userfaultfd.h +@@ -13,10 +13,20 @@ + #ifndef USERFAULTFD_H + #define USERFAULTFD_H + ++#ifdef CONFIG_LINUX ++ + #include "qemu/osdep.h" + #include "exec/hwaddr.h" + #include + ++/** ++ * uffd_open(): Open an userfaultfd handle for current context. ++ * ++ * @flags: The flags we want to pass in when creating the handle. ++ * ++ * Returns: the uffd handle if >=0, or <0 if error happens. ++ */ ++int uffd_open(int flags); + int uffd_query_features(uint64_t *features); + int uffd_create_fd(uint64_t features, bool non_blocking); + void uffd_close_fd(int uffd_fd); +@@ -32,4 +42,6 @@ int uffd_wakeup(int uffd_fd, void *addr, uint64_t length); + int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count); + bool uffd_poll_events(int uffd_fd, int tmo); + ++#endif /* CONFIG_LINUX */ ++ + #endif /* USERFAULTFD_H */ +diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c +index b9a37ef255..0c55df0e52 100644 +--- a/migration/postcopy-ram.c ++++ b/migration/postcopy-ram.c +@@ -37,6 +37,7 @@ + #include "qemu-file.h" + #include "yank_functions.h" + #include "tls.h" ++#include "qemu/userfaultfd.h" + + /* Arbitrary limit on size of each discard command, + * keeps them around ~200 bytes +@@ -226,11 +227,9 @@ static bool receive_ufd_features(uint64_t *features) + int ufd; + bool ret = true; + +- /* if we are here __NR_userfaultfd should exists */ +- ufd = syscall(__NR_userfaultfd, O_CLOEXEC); ++ ufd = uffd_open(O_CLOEXEC); + if (ufd == -1) { +- error_report("%s: syscall __NR_userfaultfd failed: %s", __func__, +- strerror(errno)); ++ error_report("%s: uffd_open() failed: %s", __func__, strerror(errno)); + return false; + } + +@@ -375,7 +374,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + goto out; + } + +- ufd = syscall(__NR_userfaultfd, O_CLOEXEC); ++ ufd = uffd_open(O_CLOEXEC); + if (ufd == -1) { + error_report("%s: userfaultfd not available: %s", __func__, + strerror(errno)); +@@ -1160,7 +1159,7 @@ static int postcopy_temp_pages_setup(MigrationIncomingState *mis) + int postcopy_ram_incoming_setup(MigrationIncomingState *mis) + { + /* Open the fd for the kernel to give us userfaults */ +- mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); ++ mis->userfault_fd = uffd_open(O_CLOEXEC | O_NONBLOCK); + if (mis->userfault_fd == -1) { + error_report("%s: Failed to open userfault fd: %s", __func__, + strerror(errno)); +diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c +index dbde726adf..0100e1bdbc 100644 +--- a/tests/qtest/migration-test.c ++++ b/tests/qtest/migration-test.c +@@ -61,14 +61,14 @@ static bool uffd_feature_thread_id; + #if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD) + #include + #include +-#include ++#include "qemu/userfaultfd.h" + + static bool ufd_version_check(void) + { + struct uffdio_api api_struct; + uint64_t ioctl_mask; + +- int ufd = syscall(__NR_userfaultfd, O_CLOEXEC); ++ int ufd = uffd_open(O_CLOEXEC); + + if (ufd == -1) { + g_test_message("Skipping test: userfaultfd not available"); +diff --git a/util/userfaultfd.c b/util/userfaultfd.c +index f1cd6af2b1..4953b3137d 100644 +--- a/util/userfaultfd.c ++++ b/util/userfaultfd.c +@@ -19,6 +19,15 @@ + #include + #include + ++int uffd_open(int flags) ++{ ++#if defined(__NR_userfaultfd) ++ return syscall(__NR_userfaultfd, flags); ++#else ++ return -EINVAL; ++#endif ++} ++ + /** + * uffd_query_features: query UFFD features + * +@@ -32,7 +41,7 @@ int uffd_query_features(uint64_t *features) + struct uffdio_api api_struct = { 0 }; + int ret = -1; + +- uffd_fd = syscall(__NR_userfaultfd, O_CLOEXEC); ++ uffd_fd = uffd_open(O_CLOEXEC); + if (uffd_fd < 0) { + trace_uffd_query_features_nosys(errno); + return -1; +@@ -69,7 +78,7 @@ int uffd_create_fd(uint64_t features, bool non_blocking) + uint64_t ioctl_mask = BIT(_UFFDIO_REGISTER) | BIT(_UFFDIO_UNREGISTER); + + flags = O_CLOEXEC | (non_blocking ? O_NONBLOCK : 0); +- uffd_fd = syscall(__NR_userfaultfd, flags); ++ uffd_fd = uffd_open(flags); + if (uffd_fd < 0) { + trace_uffd_create_fd_nosys(errno); + return -1; +-- +2.31.1 + diff --git a/SOURCES/kvm-util-userfaultfd-Support-dev-userfaultfd.patch b/SOURCES/kvm-util-userfaultfd-Support-dev-userfaultfd.patch new file mode 100644 index 0000000..b0a22eb --- /dev/null +++ b/SOURCES/kvm-util-userfaultfd-Support-dev-userfaultfd.patch @@ -0,0 +1,94 @@ +From a91da7741464dadeb306a741b4fb562e49ffea57 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 7 Feb 2023 15:57:11 -0500 +Subject: [PATCH 5/8] util/userfaultfd: Support /dev/userfaultfd + +RH-Author: Peter Xu +RH-MergeRequest: 149: Support /dev/userfaultfd +RH-Bugzilla: 2158704 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: quintela1 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/3] 5f427d8c18c210ff8f66724c9e358a7120619e69 (peterx/qemu-kvm) + +Teach QEMU to use /dev/userfaultfd when it existed and fallback to the +system call if either it's not there or doesn't have enough permission. + +Firstly, as long as the app has permission to access /dev/userfaultfd, it +always have the ability to trap kernel faults which QEMU mostly wants. +Meanwhile, in some context (e.g. containers) the userfaultfd syscall can be +forbidden, so it can be the major way to use postcopy in a restricted +environment with strict seccomp setup. + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit c40c0463413b941c13fe5f99a90c02d7d6584828) +Signed-off-by: Peter Xu +--- + util/trace-events | 1 + + util/userfaultfd.c | 32 ++++++++++++++++++++++++++++++++ + 2 files changed, 33 insertions(+) + +diff --git a/util/trace-events b/util/trace-events +index c8f53d7d9f..16f78d8fe5 100644 +--- a/util/trace-events ++++ b/util/trace-events +@@ -93,6 +93,7 @@ qemu_vfio_region_info(const char *desc, uint64_t region_ofs, uint64_t region_siz + qemu_vfio_pci_map_bar(int index, uint64_t region_ofs, uint64_t region_size, int ofs, void *host) "map region bar#%d addr 0x%"PRIx64" size 0x%"PRIx64" ofs 0x%x host %p" + + #userfaultfd.c ++uffd_detect_open_mode(int mode) "%d" + uffd_query_features_nosys(int err) "errno: %i" + uffd_query_features_api_failed(int err) "errno: %i" + uffd_create_fd_nosys(int err) "errno: %i" +diff --git a/util/userfaultfd.c b/util/userfaultfd.c +index 4953b3137d..fdff4867e8 100644 +--- a/util/userfaultfd.c ++++ b/util/userfaultfd.c +@@ -18,10 +18,42 @@ + #include + #include + #include ++#include ++ ++typedef enum { ++ UFFD_UNINITIALIZED = 0, ++ UFFD_USE_DEV_PATH, ++ UFFD_USE_SYSCALL, ++} uffd_open_mode; + + int uffd_open(int flags) + { + #if defined(__NR_userfaultfd) ++ static uffd_open_mode open_mode; ++ static int uffd_dev; ++ ++ /* Detect how to generate uffd desc when run the 1st time */ ++ if (open_mode == UFFD_UNINITIALIZED) { ++ /* ++ * Make /dev/userfaultfd the default approach because it has better ++ * permission controls, meanwhile allows kernel faults without any ++ * privilege requirement (e.g. SYS_CAP_PTRACE). ++ */ ++ uffd_dev = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC); ++ if (uffd_dev >= 0) { ++ open_mode = UFFD_USE_DEV_PATH; ++ } else { ++ /* Fallback to the system call */ ++ open_mode = UFFD_USE_SYSCALL; ++ } ++ trace_uffd_detect_open_mode(open_mode); ++ } ++ ++ if (open_mode == UFFD_USE_DEV_PATH) { ++ assert(uffd_dev >= 0); ++ return ioctl(uffd_dev, USERFAULTFD_IOC_NEW, flags); ++ } ++ + return syscall(__NR_userfaultfd, flags); + #else + return -EINVAL; +-- +2.31.1 + diff --git a/SOURCES/kvm-vdpa-Add-device-migration-blocker.patch b/SOURCES/kvm-vdpa-Add-device-migration-blocker.patch deleted file mode 100644 index 1b83c98..0000000 --- a/SOURCES/kvm-vdpa-Add-device-migration-blocker.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 8e0fdce814af4cfc84dce5e5920da989b1f1a86d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 16:06:05 +0200 -Subject: [PATCH 26/32] vdpa: Add device migration blocker -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [26/27] 53d94d45b5e5e88f12b95f9b0f243696cfcbd7ce (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit c156d5bf2b142dcc06808ccee06882144f230aec -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:45 2022 +0200 - - vdpa: Add device migration blocker - - Since the vhost-vdpa device is exposing _F_LOG, adding a migration blocker if - it uses CVQ. - - However, qemu is able to migrate simple devices with no CVQ as long as - they use SVQ. To allow it, add a placeholder error to vhost_vdpa, and - only add to vhost_dev when used. vhost_dev machinery place the migration - blocker if needed. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-vdpa.c | 15 +++++++++++++++ - include/hw/virtio/vhost-vdpa.h | 1 + - 2 files changed, 16 insertions(+) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 49effe5462..e3e5bce4bb 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -20,6 +20,7 @@ - #include "hw/virtio/vhost-shadow-virtqueue.h" - #include "hw/virtio/vhost-vdpa.h" - #include "exec/address-spaces.h" -+#include "migration/blocker.h" - #include "qemu/main-loop.h" - #include "cpu.h" - #include "trace.h" -@@ -1020,6 +1021,13 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) - return true; - } - -+ if (v->migration_blocker) { -+ int r = migrate_add_blocker(v->migration_blocker, &err); -+ if (unlikely(r < 0)) { -+ return false; -+ } -+ } -+ - for (i = 0; i < v->shadow_vqs->len; ++i) { - VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); -@@ -1062,6 +1070,10 @@ err: - vhost_svq_stop(svq); - } - -+ if (v->migration_blocker) { -+ migrate_del_blocker(v->migration_blocker); -+ } -+ - return false; - } - -@@ -1081,6 +1093,9 @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev) - } - } - -+ if (v->migration_blocker) { -+ migrate_del_blocker(v->migration_blocker); -+ } - return true; - } - -diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h -index 1111d85643..d10a89303e 100644 ---- a/include/hw/virtio/vhost-vdpa.h -+++ b/include/hw/virtio/vhost-vdpa.h -@@ -35,6 +35,7 @@ typedef struct vhost_vdpa { - bool shadow_vqs_enabled; - /* IOVA mapping used by the Shadow Virtqueue */ - VhostIOVATree *iova_tree; -+ Error *migration_blocker; - GPtrArray *shadow_vqs; - const VhostShadowVirtqueueOps *shadow_vq_ops; - void *shadow_vq_ops_opaque; --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch b/SOURCES/kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch deleted file mode 100644 index 4dede70..0000000 --- a/SOURCES/kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch +++ /dev/null @@ -1,87 +0,0 @@ -From e1f9986cf77e4b2f16aca7b2523bc75bae0c4d3c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:36 +0200 -Subject: [PATCH 21/23] vdpa: Add virtio-net mac address via CVQ at start -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [20/21] a7920816d5faf7a0cfbb7c2731a48ddfc456b8d4 (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -This is needed so the destination vdpa device see the same state a the -guest set in the source. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit f34cd09b13855657a0d49c5ea6a1e37ba9dc2334) ---- - net/vhost-vdpa.c | 40 ++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 40 insertions(+) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index f09f044ec1..79ebda7de1 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -363,11 +363,51 @@ static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, - return vhost_svq_poll(svq); - } - -+static int vhost_vdpa_net_load(NetClientState *nc) -+{ -+ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -+ const struct vhost_vdpa *v = &s->vhost_vdpa; -+ const VirtIONet *n; -+ uint64_t features; -+ -+ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); -+ -+ if (!v->shadow_vqs_enabled) { -+ return 0; -+ } -+ -+ n = VIRTIO_NET(v->dev->vdev); -+ features = n->parent_obj.guest_features; -+ if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) { -+ const struct virtio_net_ctrl_hdr ctrl = { -+ .class = VIRTIO_NET_CTRL_MAC, -+ .cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET, -+ }; -+ char *cursor = s->cvq_cmd_out_buffer; -+ ssize_t dev_written; -+ -+ memcpy(cursor, &ctrl, sizeof(ctrl)); -+ cursor += sizeof(ctrl); -+ memcpy(cursor, n->mac, sizeof(n->mac)); -+ -+ dev_written = vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + sizeof(n->mac), -+ sizeof(virtio_net_ctrl_ack)); -+ if (unlikely(dev_written < 0)) { -+ return dev_written; -+ } -+ -+ return *((virtio_net_ctrl_ack *)s->cvq_cmd_in_buffer) != VIRTIO_NET_OK; -+ } -+ -+ return 0; -+} -+ - static NetClientInfo net_vhost_vdpa_cvq_info = { - .type = NET_CLIENT_DRIVER_VHOST_VDPA, - .size = sizeof(VhostVDPAState), - .receive = vhost_vdpa_receive, - .start = vhost_vdpa_net_cvq_start, -+ .load = vhost_vdpa_net_load, - .stop = vhost_vdpa_net_cvq_stop, - .cleanup = vhost_vdpa_cleanup, - .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch b/SOURCES/kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch deleted file mode 100644 index 8a7b600..0000000 --- a/SOURCES/kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch +++ /dev/null @@ -1,223 +0,0 @@ -From 0b27781f9984c67625c49a516c3e38fbf5fa1b1b Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 16:06:16 +0200 -Subject: [PATCH 27/32] vdpa: Add x-svq to NetdevVhostVDPAOptions -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [27/27] bd85496c2a8c1ebf34f908fca2be2ab9852fd0e9 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 1576dbb5bbc49344c606e969ec749be70c0fd94e -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:46 2022 +0200 - - vdpa: Add x-svq to NetdevVhostVDPAOptions - - Finally offering the possibility to enable SVQ from the command line. - - Signed-off-by: Eugenio Pérez - Acked-by: Markus Armbruster - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - net/vhost-vdpa.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++-- - qapi/net.json | 9 +++++- - 2 files changed, 77 insertions(+), 4 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 8b76dac966..50672bcd66 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -75,6 +75,28 @@ const int vdpa_feature_bits[] = { - VHOST_INVALID_FEATURE_BIT - }; - -+/** Supported device specific feature bits with SVQ */ -+static const uint64_t vdpa_svq_device_features = -+ BIT_ULL(VIRTIO_NET_F_CSUM) | -+ BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | -+ BIT_ULL(VIRTIO_NET_F_MTU) | -+ BIT_ULL(VIRTIO_NET_F_MAC) | -+ BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | -+ BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | -+ BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | -+ BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | -+ BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | -+ BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | -+ BIT_ULL(VIRTIO_NET_F_HOST_ECN) | -+ BIT_ULL(VIRTIO_NET_F_HOST_UFO) | -+ BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | -+ BIT_ULL(VIRTIO_NET_F_STATUS) | -+ BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | -+ BIT_ULL(VIRTIO_F_ANY_LAYOUT) | -+ BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | -+ BIT_ULL(VIRTIO_NET_F_RSC_EXT) | -+ BIT_ULL(VIRTIO_NET_F_STANDBY); -+ - VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) - { - VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -@@ -133,9 +155,13 @@ err_init: - static void vhost_vdpa_cleanup(NetClientState *nc) - { - VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -+ struct vhost_dev *dev = &s->vhost_net->dev; - - qemu_vfree(s->cvq_cmd_out_buffer); - qemu_vfree(s->cvq_cmd_in_buffer); -+ if (dev->vq_index + dev->nvqs == dev->vq_index_end) { -+ g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); -+ } - if (s->vhost_net) { - vhost_net_cleanup(s->vhost_net); - g_free(s->vhost_net); -@@ -437,7 +463,9 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - int vdpa_device_fd, - int queue_pair_index, - int nvqs, -- bool is_datapath) -+ bool is_datapath, -+ bool svq, -+ VhostIOVATree *iova_tree) - { - NetClientState *nc = NULL; - VhostVDPAState *s; -@@ -455,6 +483,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - - s->vhost_vdpa.device_fd = vdpa_device_fd; - s->vhost_vdpa.index = queue_pair_index; -+ s->vhost_vdpa.shadow_vqs_enabled = svq; -+ s->vhost_vdpa.iova_tree = iova_tree; - if (!is_datapath) { - s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size, - vhost_vdpa_net_cvq_cmd_page_len()); -@@ -465,6 +495,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - - s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; - s->vhost_vdpa.shadow_vq_ops_opaque = s; -+ error_setg(&s->vhost_vdpa.migration_blocker, -+ "Migration disabled: vhost-vdpa uses CVQ."); - } - ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); - if (ret) { -@@ -474,6 +506,14 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - return nc; - } - -+static int vhost_vdpa_get_iova_range(int fd, -+ struct vhost_vdpa_iova_range *iova_range) -+{ -+ int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range); -+ -+ return ret < 0 ? -errno : 0; -+} -+ - static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp) - { - int ret = ioctl(fd, VHOST_GET_FEATURES, features); -@@ -524,6 +564,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - uint64_t features; - int vdpa_device_fd; - g_autofree NetClientState **ncs = NULL; -+ g_autoptr(VhostIOVATree) iova_tree = NULL; - NetClientState *nc; - int queue_pairs, r, i, has_cvq = 0; - -@@ -551,22 +592,45 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - return queue_pairs; - } - -+ if (opts->x_svq) { -+ struct vhost_vdpa_iova_range iova_range; -+ -+ uint64_t invalid_dev_features = -+ features & ~vdpa_svq_device_features & -+ /* Transport are all accepted at this point */ -+ ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START, -+ VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START); -+ -+ if (invalid_dev_features) { -+ error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, -+ invalid_dev_features); -+ goto err_svq; -+ } -+ -+ vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); -+ iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last); -+ } -+ - ncs = g_malloc0(sizeof(*ncs) * queue_pairs); - - for (i = 0; i < queue_pairs; i++) { - ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, -- vdpa_device_fd, i, 2, true); -+ vdpa_device_fd, i, 2, true, opts->x_svq, -+ iova_tree); - if (!ncs[i]) - goto err; - } - - if (has_cvq) { - nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, -- vdpa_device_fd, i, 1, false); -+ vdpa_device_fd, i, 1, false, -+ opts->x_svq, iova_tree); - if (!nc) - goto err; - } - -+ /* iova_tree ownership belongs to last NetClientState */ -+ g_steal_pointer(&iova_tree); - return 0; - - err: -@@ -575,6 +639,8 @@ err: - qemu_del_net_client(ncs[i]); - } - } -+ -+err_svq: - qemu_close(vdpa_device_fd); - - return -1; -diff --git a/qapi/net.json b/qapi/net.json -index b92f3f5fb4..92848e4362 100644 ---- a/qapi/net.json -+++ b/qapi/net.json -@@ -445,12 +445,19 @@ - # @queues: number of queues to be created for multiqueue vhost-vdpa - # (default: 1) - # -+# @x-svq: Start device with (experimental) shadow virtqueue. (Since 7.1) -+# (default: false) -+# -+# Features: -+# @unstable: Member @x-svq is experimental. -+# - # Since: 5.1 - ## - { 'struct': 'NetdevVhostVDPAOptions', - 'data': { - '*vhostdev': 'str', -- '*queues': 'int' } } -+ '*queues': 'int', -+ '*x-svq': {'type': 'bool', 'features' : [ 'unstable'] } } } - - ## - # @NetClientDriver: --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch b/SOURCES/kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch deleted file mode 100644 index acd45e0..0000000 --- a/SOURCES/kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch +++ /dev/null @@ -1,65 +0,0 @@ -From df06ce560ddfefde98bef822ec2020382059921f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 10/32] vdpa: Avoid compiler to squash reads to used idx -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [10/27] b28789302d4f64749da26f413763f918161d9b70 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit c381abc37f0aba42ed2e3b41cdace8f8438829e4 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:29 2022 +0200 - - vdpa: Avoid compiler to squash reads to used idx - - In the next patch we will allow busypolling of this value. The compiler - have a running path where shadow_used_idx, last_used_idx, and vring used - idx are not modified within the same thread busypolling. - - This was not an issue before since we always cleared device event - notifier before checking it, and that could act as memory barrier. - However, the busypoll needs something similar to kernel READ_ONCE. - - Let's add it here, sepparated from the polling. - - Signed-off-by: Eugenio Pérez - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 3fbda1e3d4..9c46c3a8fa 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -327,11 +327,12 @@ static void vhost_handle_guest_kick_notifier(EventNotifier *n) - - static bool vhost_svq_more_used(VhostShadowVirtqueue *svq) - { -+ uint16_t *used_idx = &svq->vring.used->idx; - if (svq->last_used_idx != svq->shadow_used_idx) { - return true; - } - -- svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx); -+ svq->shadow_used_idx = cpu_to_le16(*(volatile uint16_t *)used_idx); - - return svq->last_used_idx != svq->shadow_used_idx; - } --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch b/SOURCES/kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch deleted file mode 100644 index 243aec8..0000000 --- a/SOURCES/kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch +++ /dev/null @@ -1,323 +0,0 @@ -From 881945094c0e4d33614d40959bfc20e395f5a478 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 16:05:40 +0200 -Subject: [PATCH 24/32] vdpa: Buffer CVQ support on shadow virtqueue -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [24/27] 5486f80141a3ad968a32e782bdcdead32f417352 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 2df4dd31e194c94da7d28c02e92449f4a989fca9 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:43 2022 +0200 - - vdpa: Buffer CVQ support on shadow virtqueue - - Introduce the control virtqueue support for vDPA shadow virtqueue. This - is needed for advanced networking features like rx filtering. - - Virtio-net control VQ copies the descriptors to qemu's VA, so we avoid - TOCTOU with the guest's or device's memory every time there is a device - model change. Otherwise, the guest could change the memory content in - the time between qemu and the device read it. - - To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is - implemented. If the virtio-net driver changes MAC the virtio-net device - model will be updated with the new one, and a rx filtering change event - will be raised. - - More cvq commands could be added here straightforwardly but they have - not been tested. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - net/vhost-vdpa.c | 213 +++++++++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 205 insertions(+), 8 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 2e3b6b10d8..df42822463 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -33,6 +33,9 @@ typedef struct VhostVDPAState { - NetClientState nc; - struct vhost_vdpa vhost_vdpa; - VHostNetState *vhost_net; -+ -+ /* Control commands shadow buffers */ -+ void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer; - bool started; - } VhostVDPAState; - -@@ -131,6 +134,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc) - { - VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); - -+ qemu_vfree(s->cvq_cmd_out_buffer); -+ qemu_vfree(s->cvq_cmd_in_buffer); - if (s->vhost_net) { - vhost_net_cleanup(s->vhost_net); - g_free(s->vhost_net); -@@ -190,24 +195,191 @@ static NetClientInfo net_vhost_vdpa_info = { - .check_peer_type = vhost_vdpa_check_peer_type, - }; - -+static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) -+{ -+ VhostIOVATree *tree = v->iova_tree; -+ DMAMap needle = { -+ /* -+ * No need to specify size or to look for more translations since -+ * this contiguous chunk was allocated by us. -+ */ -+ .translated_addr = (hwaddr)(uintptr_t)addr, -+ }; -+ const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle); -+ int r; -+ -+ if (unlikely(!map)) { -+ error_report("Cannot locate expected map"); -+ return; -+ } -+ -+ r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1); -+ if (unlikely(r != 0)) { -+ error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); -+ } -+ -+ vhost_iova_tree_remove(tree, map); -+} -+ -+static size_t vhost_vdpa_net_cvq_cmd_len(void) -+{ -+ /* -+ * MAC_TABLE_SET is the ctrl command that produces the longer out buffer. -+ * In buffer is always 1 byte, so it should fit here -+ */ -+ return sizeof(struct virtio_net_ctrl_hdr) + -+ 2 * sizeof(struct virtio_net_ctrl_mac) + -+ MAC_TABLE_ENTRIES * ETH_ALEN; -+} -+ -+static size_t vhost_vdpa_net_cvq_cmd_page_len(void) -+{ -+ return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size); -+} -+ -+/** Copy and map a guest buffer. */ -+static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, -+ const struct iovec *out_data, -+ size_t out_num, size_t data_len, void *buf, -+ size_t *written, bool write) -+{ -+ DMAMap map = {}; -+ int r; -+ -+ if (unlikely(!data_len)) { -+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n", -+ __func__, write ? "in" : "out"); -+ return false; -+ } -+ -+ *written = iov_to_buf(out_data, out_num, 0, buf, data_len); -+ map.translated_addr = (hwaddr)(uintptr_t)buf; -+ map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1; -+ map.perm = write ? IOMMU_RW : IOMMU_RO, -+ r = vhost_iova_tree_map_alloc(v->iova_tree, &map); -+ if (unlikely(r != IOVA_OK)) { -+ error_report("Cannot map injected element"); -+ return false; -+ } -+ -+ r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf, -+ !write); -+ if (unlikely(r < 0)) { -+ goto dma_map_err; -+ } -+ -+ return true; -+ -+dma_map_err: -+ vhost_iova_tree_remove(v->iova_tree, &map); -+ return false; -+} -+ - /** -- * Forward buffer for the moment. -+ * Copy the guest element into a dedicated buffer suitable to be sent to NIC -+ * -+ * @iov: [0] is the out buffer, [1] is the in one -+ */ -+static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s, -+ VirtQueueElement *elem, -+ struct iovec *iov) -+{ -+ size_t in_copied; -+ bool ok; -+ -+ iov[0].iov_base = s->cvq_cmd_out_buffer; -+ ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num, -+ vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base, -+ &iov[0].iov_len, false); -+ if (unlikely(!ok)) { -+ return false; -+ } -+ -+ iov[1].iov_base = s->cvq_cmd_in_buffer; -+ ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0, -+ sizeof(virtio_net_ctrl_ack), iov[1].iov_base, -+ &in_copied, true); -+ if (unlikely(!ok)) { -+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); -+ return false; -+ } -+ -+ iov[1].iov_len = sizeof(virtio_net_ctrl_ack); -+ return true; -+} -+ -+/** -+ * Do not forward commands not supported by SVQ. Otherwise, the device could -+ * accept it and qemu would not know how to update the device model. -+ */ -+static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out, -+ size_t out_num) -+{ -+ struct virtio_net_ctrl_hdr ctrl; -+ size_t n; -+ -+ n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl)); -+ if (unlikely(n < sizeof(ctrl))) { -+ qemu_log_mask(LOG_GUEST_ERROR, -+ "%s: invalid legnth of out buffer %zu\n", __func__, n); -+ return false; -+ } -+ -+ switch (ctrl.class) { -+ case VIRTIO_NET_CTRL_MAC: -+ switch (ctrl.cmd) { -+ case VIRTIO_NET_CTRL_MAC_ADDR_SET: -+ return true; -+ default: -+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n", -+ __func__, ctrl.cmd); -+ }; -+ break; -+ default: -+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n", -+ __func__, ctrl.class); -+ }; -+ -+ return false; -+} -+ -+/** -+ * Validate and copy control virtqueue commands. -+ * -+ * Following QEMU guidelines, we offer a copy of the buffers to the device to -+ * prevent TOCTOU bugs. - */ - static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - VirtQueueElement *elem, - void *opaque) - { -- unsigned int n = elem->out_num + elem->in_num; -- g_autofree struct iovec *dev_buffers = g_new(struct iovec, n); -+ VhostVDPAState *s = opaque; - size_t in_len, dev_written; - virtio_net_ctrl_ack status = VIRTIO_NET_ERR; -- int r; -+ /* out and in buffers sent to the device */ -+ struct iovec dev_buffers[2] = { -+ { .iov_base = s->cvq_cmd_out_buffer }, -+ { .iov_base = s->cvq_cmd_in_buffer }, -+ }; -+ /* in buffer used for device model */ -+ const struct iovec in = { -+ .iov_base = &status, -+ .iov_len = sizeof(status), -+ }; -+ int r = -EINVAL; -+ bool ok; -+ -+ ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers); -+ if (unlikely(!ok)) { -+ goto out; -+ } - -- memcpy(dev_buffers, elem->out_sg, elem->out_num); -- memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num); -+ ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1); -+ if (unlikely(!ok)) { -+ goto out; -+ } - -- r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1], -- elem->in_num, elem); -+ r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem); - if (unlikely(r != 0)) { - if (unlikely(r == -ENOSPC)) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", -@@ -224,6 +396,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - dev_written = vhost_svq_poll(svq); - if (unlikely(dev_written < sizeof(status))) { - error_report("Insufficient written data (%zu)", dev_written); -+ goto out; -+ } -+ -+ memcpy(&status, dev_buffers[1].iov_base, sizeof(status)); -+ if (status != VIRTIO_NET_OK) { -+ goto out; -+ } -+ -+ status = VIRTIO_NET_ERR; -+ virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1); -+ if (status != VIRTIO_NET_OK) { -+ error_report("Bad CVQ processing in model"); - } - - out: -@@ -234,6 +418,12 @@ out: - } - vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); - g_free(elem); -+ if (dev_buffers[0].iov_base) { -+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base); -+ } -+ if (dev_buffers[1].iov_base) { -+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base); -+ } - return r; - } - -@@ -266,6 +456,13 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - s->vhost_vdpa.device_fd = vdpa_device_fd; - s->vhost_vdpa.index = queue_pair_index; - if (!is_datapath) { -+ s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size, -+ vhost_vdpa_net_cvq_cmd_page_len()); -+ memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); -+ s->cvq_cmd_in_buffer = qemu_memalign(qemu_real_host_page_size, -+ vhost_vdpa_net_cvq_cmd_page_len()); -+ memset(s->cvq_cmd_in_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); -+ - s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; - s->vhost_vdpa.shadow_vq_ops_opaque = s; - } --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-Delete-CVQ-migration-blocker.patch b/SOURCES/kvm-vdpa-Delete-CVQ-migration-blocker.patch deleted file mode 100644 index 87dfb5a..0000000 --- a/SOURCES/kvm-vdpa-Delete-CVQ-migration-blocker.patch +++ /dev/null @@ -1,98 +0,0 @@ -From 896f7749c72afe988ab28ac6af77b9c53b685c03 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:37 +0200 -Subject: [PATCH 22/23] vdpa: Delete CVQ migration blocker -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [21/21] 286f55177a132a8845c2912fb28cb4add472005a (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -We can restore the device state in the destination via CVQ now. Remove -the migration blocker. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit fe2b0cd71cddbec4eaf6e325eaf357a4e72a469d) ---- - hw/virtio/vhost-vdpa.c | 15 --------------- - include/hw/virtio/vhost-vdpa.h | 1 - - net/vhost-vdpa.c | 2 -- - 3 files changed, 18 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 0bea1e1eb9..b61e313953 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -1031,13 +1031,6 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) - return true; - } - -- if (v->migration_blocker) { -- int r = migrate_add_blocker(v->migration_blocker, &err); -- if (unlikely(r < 0)) { -- return false; -- } -- } -- - for (i = 0; i < v->shadow_vqs->len; ++i) { - VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); -@@ -1080,10 +1073,6 @@ err: - vhost_svq_stop(svq); - } - -- if (v->migration_blocker) { -- migrate_del_blocker(v->migration_blocker); -- } -- - return false; - } - -@@ -1099,10 +1088,6 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); - vhost_vdpa_svq_unmap_rings(dev, svq); - } -- -- if (v->migration_blocker) { -- migrate_del_blocker(v->migration_blocker); -- } - } - - static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) -diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h -index d10a89303e..1111d85643 100644 ---- a/include/hw/virtio/vhost-vdpa.h -+++ b/include/hw/virtio/vhost-vdpa.h -@@ -35,7 +35,6 @@ typedef struct vhost_vdpa { - bool shadow_vqs_enabled; - /* IOVA mapping used by the Shadow Virtqueue */ - VhostIOVATree *iova_tree; -- Error *migration_blocker; - GPtrArray *shadow_vqs; - const VhostShadowVirtqueueOps *shadow_vq_ops; - void *shadow_vq_ops_opaque; -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 79ebda7de1..f4f16583e4 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -555,8 +555,6 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - - s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; - s->vhost_vdpa.shadow_vq_ops_opaque = s; -- error_setg(&s->vhost_vdpa.migration_blocker, -- "Migration disabled: vhost-vdpa uses CVQ."); - } - ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); - if (ret) { --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch b/SOURCES/kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch deleted file mode 100644 index d6e72ac..0000000 --- a/SOURCES/kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch +++ /dev/null @@ -1,84 +0,0 @@ -From 3a5d325fcb2958318262efac31d5fd25fb062523 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 21/32] vdpa: Export vhost_vdpa_dma_map and unmap calls -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [21/27] 97e7a583bbd3c12a0786d53132812ec41702c190 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 463ba1e3b8cf080812895c5f26d95d8d7db2e692 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:40 2022 +0200 - - vdpa: Export vhost_vdpa_dma_map and unmap calls - - Shadow CVQ will copy buffers on qemu VA, so we avoid TOCTOU attacks from - the guest that could set a different state in qemu device model and vdpa - device. - - To do so, it needs to be able to map these new buffers to the device. - - Signed-off-by: Eugenio Pérez - Acked-by: Jason Wang - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-vdpa.c | 7 +++---- - include/hw/virtio/vhost-vdpa.h | 4 ++++ - 2 files changed, 7 insertions(+), 4 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 28df57b12e..14b02fe079 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -71,8 +71,8 @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, - return false; - } - --static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, -- void *vaddr, bool readonly) -+int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, -+ void *vaddr, bool readonly) - { - struct vhost_msg_v2 msg = {}; - int fd = v->device_fd; -@@ -97,8 +97,7 @@ static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, - return ret; - } - --static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, -- hwaddr size) -+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size) - { - struct vhost_msg_v2 msg = {}; - int fd = v->device_fd; -diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h -index a29dbb3f53..7214eb47dc 100644 ---- a/include/hw/virtio/vhost-vdpa.h -+++ b/include/hw/virtio/vhost-vdpa.h -@@ -39,4 +39,8 @@ typedef struct vhost_vdpa { - VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; - } VhostVDPA; - -+int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, -+ void *vaddr, bool readonly); -+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size); -+ - #endif --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch b/SOURCES/kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch deleted file mode 100644 index 44e97af..0000000 --- a/SOURCES/kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch +++ /dev/null @@ -1,108 +0,0 @@ -From 9a290bd74f983f3a65aa9ec5df2da9aa94bfdecd Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 16:05:42 +0200 -Subject: [PATCH 25/32] vdpa: Extract get features part from - vhost_vdpa_get_max_queue_pairs -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [25/27] 654ad68e10a4df84cced923c64e72d500721ad67 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 8170ab3f43989680491d00f1017f60b25d346114 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:44 2022 +0200 - - vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs - - To know the device features is needed for CVQ SVQ, so SVQ knows if it - can handle all commands or not. Extract from - vhost_vdpa_get_max_queue_pairs so we can reuse it. - - Signed-off-by: Eugenio Pérez - Acked-by: Jason Wang - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - net/vhost-vdpa.c | 30 ++++++++++++++++++++---------- - 1 file changed, 20 insertions(+), 10 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index df42822463..8b76dac966 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -474,20 +474,24 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - return nc; - } - --static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp) -+static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp) -+{ -+ int ret = ioctl(fd, VHOST_GET_FEATURES, features); -+ if (unlikely(ret < 0)) { -+ error_setg_errno(errp, errno, -+ "Fail to query features from vhost-vDPA device"); -+ } -+ return ret; -+} -+ -+static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features, -+ int *has_cvq, Error **errp) - { - unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); - g_autofree struct vhost_vdpa_config *config = NULL; - __virtio16 *max_queue_pairs; -- uint64_t features; - int ret; - -- ret = ioctl(fd, VHOST_GET_FEATURES, &features); -- if (ret) { -- error_setg(errp, "Fail to query features from vhost-vDPA device"); -- return ret; -- } -- - if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) { - *has_cvq = 1; - } else { -@@ -517,10 +521,11 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - NetClientState *peer, Error **errp) - { - const NetdevVhostVDPAOptions *opts; -+ uint64_t features; - int vdpa_device_fd; - g_autofree NetClientState **ncs = NULL; - NetClientState *nc; -- int queue_pairs, i, has_cvq = 0; -+ int queue_pairs, r, i, has_cvq = 0; - - assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA); - opts = &netdev->u.vhost_vdpa; -@@ -534,7 +539,12 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - return -errno; - } - -- queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, -+ r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp); -+ if (unlikely(r < 0)) { -+ return r; -+ } -+ -+ queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features, - &has_cvq, errp); - if (queue_pairs < 0) { - qemu_close(vdpa_device_fd); --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-Fix-bad-index-calculus-at-vhost_vdpa_get_vring_.patch b/SOURCES/kvm-vdpa-Fix-bad-index-calculus-at-vhost_vdpa_get_vring_.patch deleted file mode 100644 index 2d0d55f..0000000 --- a/SOURCES/kvm-vdpa-Fix-bad-index-calculus-at-vhost_vdpa_get_vring_.patch +++ /dev/null @@ -1,50 +0,0 @@ -From e19adb058502e24580dbc4f6f944cd951ca288ed Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 12 May 2022 19:57:44 +0200 -Subject: [PATCH 08/11] vdpa: Fix bad index calculus at - vhost_vdpa_get_vring_base -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 114: vdpa: Fix memory listener deletions of iova tree -RH-Commit: [1/4] 754fb8960684fa7a91bddb18c8df58c3b947ee75 (eperezmartin/qemu-kvm) -RH-Bugzilla: 2116876 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Fixes: 6d0b222666 ("vdpa: Adapt vhost_vdpa_get_vring_base to SVQ") - -Acked-by: Jason Wang -Signed-off-by: Eugenio Pérez -Message-Id: <20220512175747.142058-4-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 639036477ef890958415967e753ca2cbb348c16c) ---- - hw/virtio/vhost-vdpa.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index e3e5bce4bb..a7dfac530f 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -1193,11 +1193,11 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, - struct vhost_vring_state *ring) - { - struct vhost_vdpa *v = dev->opaque; -+ int vdpa_idx = ring->index - dev->vq_index; - int ret; - - if (v->shadow_vqs_enabled) { -- VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, -- ring->index); -+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); - - /* - * Setting base as last used idx, so destination will see as available --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-Fix-file-descriptor-leak-on-get-features-error.patch b/SOURCES/kvm-vdpa-Fix-file-descriptor-leak-on-get-features-error.patch deleted file mode 100644 index 1757d3f..0000000 --- a/SOURCES/kvm-vdpa-Fix-file-descriptor-leak-on-get-features-error.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 71857062b7aea29fc418e107244cf4083cd78cd7 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 2 Aug 2022 13:24:46 +0200 -Subject: [PATCH 11/11] vdpa: Fix file descriptor leak on get features error -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 114: vdpa: Fix memory listener deletions of iova tree -RH-Commit: [4/4] bdfe6ed4539ecf68dc8bc4519755f9d5c096447d (eperezmartin/qemu-kvm) -RH-Bugzilla: 2116876 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -File descriptor vdpa_device_fd is not free in the case of returning -error from vhost_vdpa_get_features. Fixing it by making all errors go to -the same error path. - -Resolves: Coverity CID 1490785 -Fixes: 8170ab3f43 ("vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs") - -Signed-off-by: Eugenio Pérez -Reviewed-by: Laurent Vivier -Reviewed-by: Michael S. Tsirkin -Message-Id: <20220802112447.249436-2-eperezma@redhat.com> -Signed-off-by: Laurent Vivier -(cherry picked from commit aed5da45daf734ddc543c0791e877dac75e16f61) ---- - net/vhost-vdpa.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 50672bcd66..411e71e6c2 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -566,7 +566,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - g_autofree NetClientState **ncs = NULL; - g_autoptr(VhostIOVATree) iova_tree = NULL; - NetClientState *nc; -- int queue_pairs, r, i, has_cvq = 0; -+ int queue_pairs, r, i = 0, has_cvq = 0; - - assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA); - opts = &netdev->u.vhost_vdpa; -@@ -582,7 +582,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - - r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp); - if (unlikely(r < 0)) { -- return r; -+ goto err; - } - - queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features, --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch b/SOURCES/kvm-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch deleted file mode 100644 index 8125cb2..0000000 --- a/SOURCES/kvm-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 6335431b70dd55c1d52152d726fa462db2e10eb8 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 12 May 2022 19:57:45 +0200 -Subject: [PATCH 09/11] vdpa: Fix index calculus at vhost_vdpa_svqs_start -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 114: vdpa: Fix memory listener deletions of iova tree -RH-Commit: [2/4] 9ce732e6bba426f8e00020ee6ad77f972f3e75b5 (eperezmartin/qemu-kvm) -RH-Bugzilla: 2116876 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -With the introduction of MQ the index of the vq needs to be calculated -with the device model vq_index. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20220512175747.142058-5-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 1c82fdfef8a227518ffecae9d419bcada995c202) ---- - hw/virtio/vhost-vdpa.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index a7dfac530f..f877b354fa 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -1032,7 +1032,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) - VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); - struct vhost_vring_addr addr = { -- .index = i, -+ .index = dev->vq_index + i, - }; - int r; - bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err); --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-Fix-memory-listener-deletions-of-iova-tree.patch b/SOURCES/kvm-vdpa-Fix-memory-listener-deletions-of-iova-tree.patch deleted file mode 100644 index e6f1d39..0000000 --- a/SOURCES/kvm-vdpa-Fix-memory-listener-deletions-of-iova-tree.patch +++ /dev/null @@ -1,61 +0,0 @@ -From b212edc97a471c75f8b8b44ee2a3a2cf82ef14d9 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Fri, 22 Jul 2022 10:26:30 +0200 -Subject: [PATCH 10/11] vdpa: Fix memory listener deletions of iova tree -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 114: vdpa: Fix memory listener deletions of iova tree -RH-Commit: [3/4] ad71f098b3fa8654962ac7872b5393c37c9825f2 (eperezmartin/qemu-kvm) -RH-Bugzilla: 2116876 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -vhost_vdpa_listener_region_del is always deleting the first iova entry -of the tree, since it's using the needle iova instead of the result's -one. - -This was detected using a vga virtual device in the VM using vdpa SVQ. -It makes some extra memory adding and deleting, so the wrong one was -mapped / unmapped. This was undetected before since all the memory was -mappend and unmapped totally without that device, but other conditions -could trigger it too: - -* mem_region was with .iova = 0, .translated_addr = (correct GPA). -* iova_tree_find_iova returned right result, but does not update - mem_region. -* iova_tree_remove always removed region with .iova = 0. Right iova were - sent to the device. -* Next map will fill the first region with .iova = 0, causing a mapping - with the same iova and device complains, if the next action is a map. -* Next unmap will cause to try to unmap again iova = 0, causing the - device to complain that no region was mapped at iova = 0. - -Fixes: 34e3c94edaef ("vdpa: Add custom IOTLB translations to SVQ") -Reported-by: Lei Yang -Signed-off-by: Eugenio Pérez -Signed-off-by: Jason Wang -(cherry picked from commit 75a8ce64f6e37513698857fb4284170da163ed06) ---- - hw/virtio/vhost-vdpa.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index f877b354fa..03dc6014b0 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -288,7 +288,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, - - result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region); - iova = result->iova; -- vhost_iova_tree_remove(v->iova_tree, &mem_region); -+ vhost_iova_tree_remove(v->iova_tree, result); - } - vhost_vdpa_iotlb_batch_begin_once(v); - ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch b/SOURCES/kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch deleted file mode 100644 index e45a198..0000000 --- a/SOURCES/kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch +++ /dev/null @@ -1,133 +0,0 @@ -From 8e36feb4d3480b7c09d9dcbde18c9db1e8063f18 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:20:06 +0200 -Subject: [PATCH 08/23] vdpa: Make SVQ vring unmapping return void -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/21] 3366340dc7ae65f83894f5d0da0d1e0f64713751 (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -Nothing actually reads the return value, but an error in cleaning some -entries could cause device stop to abort, making a restart impossible. -Better ignore explicitely the return value. - -Reported-by: Lei Yang -Fixes: 34e3c94eda ("vdpa: Add custom IOTLB translations to SVQ") -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit bb5cf89ef2338ab6be946ede6821c3f61347eb1b) ---- - hw/virtio/vhost-vdpa.c | 32 ++++++++++---------------------- - 1 file changed, 10 insertions(+), 22 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index e5c264fb29..8eddf39f2a 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -882,7 +882,7 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, - /** - * Unmap a SVQ area in the device - */ --static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, -+static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, - const DMAMap *needle) - { - const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle); -@@ -891,38 +891,33 @@ static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, - - if (unlikely(!result)) { - error_report("Unable to find SVQ address to unmap"); -- return false; -+ return; - } - - size = ROUND_UP(result->size, qemu_real_host_page_size); - r = vhost_vdpa_dma_unmap(v, result->iova, size); - if (unlikely(r < 0)) { - error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r); -- return false; -+ return; - } - - vhost_iova_tree_remove(v->iova_tree, *result); -- return r == 0; - } - --static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, -+static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, - const VhostShadowVirtqueue *svq) - { - DMAMap needle = {}; - struct vhost_vdpa *v = dev->opaque; - struct vhost_vring_addr svq_addr; -- bool ok; - - vhost_svq_get_vring_addr(svq, &svq_addr); - - needle.translated_addr = svq_addr.desc_user_addr; -- ok = vhost_vdpa_svq_unmap_ring(v, &needle); -- if (unlikely(!ok)) { -- return false; -- } -+ vhost_vdpa_svq_unmap_ring(v, &needle); - - needle.translated_addr = svq_addr.used_user_addr; -- return vhost_vdpa_svq_unmap_ring(v, &needle); -+ vhost_vdpa_svq_unmap_ring(v, &needle); - } - - /** -@@ -1093,26 +1088,22 @@ err: - return false; - } - --static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev) -+static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) - { - struct vhost_vdpa *v = dev->opaque; - - if (!v->shadow_vqs) { -- return true; -+ return; - } - - for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); -- bool ok = vhost_vdpa_svq_unmap_rings(dev, svq); -- if (unlikely(!ok)) { -- return false; -- } -+ vhost_vdpa_svq_unmap_rings(dev, svq); - } - - if (v->migration_blocker) { - migrate_del_blocker(v->migration_blocker); - } -- return true; - } - - static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) -@@ -1129,10 +1120,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) - } - vhost_vdpa_set_vring_ready(dev); - } else { -- ok = vhost_vdpa_svqs_stop(dev); -- if (unlikely(!ok)) { -- return -1; -- } -+ vhost_vdpa_svqs_stop(dev); - vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); - } - --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch b/SOURCES/kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch deleted file mode 100644 index 7cdf05c..0000000 --- a/SOURCES/kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch +++ /dev/null @@ -1,251 +0,0 @@ -From 70c72316c26e95cd18b4d46b83e78ba3a148212c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:33 +0200 -Subject: [PATCH 18/23] vdpa: Move command buffers map to start of net device -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [17/21] 7a9824fa618f5c2904648b50e3078474cd3987aa (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -As this series will reuse them to restore the device state at the end of -a migration (or a device start), let's allocate only once at the device -start so we don't duplicate their map and unmap. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit d7d73dec14cebcebd8de774424795aeb821236c1) ---- - net/vhost-vdpa.c | 123 ++++++++++++++++++++++------------------------- - 1 file changed, 58 insertions(+), 65 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 03e4cf1abc..17626feb8d 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -263,29 +263,20 @@ static size_t vhost_vdpa_net_cvq_cmd_page_len(void) - return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size); - } - --/** Copy and map a guest buffer. */ --static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, -- const struct iovec *out_data, -- size_t out_num, size_t data_len, void *buf, -- size_t *written, bool write) -+/** Map CVQ buffer. */ -+static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, -+ bool write) - { - DMAMap map = {}; - int r; - -- if (unlikely(!data_len)) { -- qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n", -- __func__, write ? "in" : "out"); -- return false; -- } -- -- *written = iov_to_buf(out_data, out_num, 0, buf, data_len); - map.translated_addr = (hwaddr)(uintptr_t)buf; -- map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1; -+ map.size = size - 1; - map.perm = write ? IOMMU_RW : IOMMU_RO, - r = vhost_iova_tree_map_alloc(v->iova_tree, &map); - if (unlikely(r != IOVA_OK)) { - error_report("Cannot map injected element"); -- return false; -+ return r; - } - - r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf, -@@ -294,50 +285,58 @@ static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, - goto dma_map_err; - } - -- return true; -+ return 0; - - dma_map_err: - vhost_iova_tree_remove(v->iova_tree, map); -- return false; -+ return r; - } - --/** -- * Copy the guest element into a dedicated buffer suitable to be sent to NIC -- * -- * @iov: [0] is the out buffer, [1] is the in one -- */ --static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s, -- VirtQueueElement *elem, -- struct iovec *iov) -+static int vhost_vdpa_net_cvq_start(NetClientState *nc) - { -- size_t in_copied; -- bool ok; -+ VhostVDPAState *s; -+ int r; - -- iov[0].iov_base = s->cvq_cmd_out_buffer; -- ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num, -- vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base, -- &iov[0].iov_len, false); -- if (unlikely(!ok)) { -- return false; -+ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); -+ -+ s = DO_UPCAST(VhostVDPAState, nc, nc); -+ if (!s->vhost_vdpa.shadow_vqs_enabled) { -+ return 0; - } - -- iov[1].iov_base = s->cvq_cmd_in_buffer; -- ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0, -- sizeof(virtio_net_ctrl_ack), iov[1].iov_base, -- &in_copied, true); -- if (unlikely(!ok)) { -+ r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer, -+ vhost_vdpa_net_cvq_cmd_page_len(), false); -+ if (unlikely(r < 0)) { -+ return r; -+ } -+ -+ r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_in_buffer, -+ vhost_vdpa_net_cvq_cmd_page_len(), true); -+ if (unlikely(r < 0)) { - vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); -- return false; - } - -- iov[1].iov_len = sizeof(virtio_net_ctrl_ack); -- return true; -+ return r; -+} -+ -+static void vhost_vdpa_net_cvq_stop(NetClientState *nc) -+{ -+ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -+ -+ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); -+ -+ if (s->vhost_vdpa.shadow_vqs_enabled) { -+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); -+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_in_buffer); -+ } - } - - static NetClientInfo net_vhost_vdpa_cvq_info = { - .type = NET_CLIENT_DRIVER_VHOST_VDPA, - .size = sizeof(VhostVDPAState), - .receive = vhost_vdpa_receive, -+ .start = vhost_vdpa_net_cvq_start, -+ .stop = vhost_vdpa_net_cvq_stop, - .cleanup = vhost_vdpa_cleanup, - .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, - .has_ufo = vhost_vdpa_has_ufo, -@@ -348,19 +347,17 @@ static NetClientInfo net_vhost_vdpa_cvq_info = { - * Do not forward commands not supported by SVQ. Otherwise, the device could - * accept it and qemu would not know how to update the device model. - */ --static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out, -- size_t out_num) -+static bool vhost_vdpa_net_cvq_validate_cmd(const void *out_buf, size_t len) - { - struct virtio_net_ctrl_hdr ctrl; -- size_t n; - -- n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl)); -- if (unlikely(n < sizeof(ctrl))) { -+ if (unlikely(len < sizeof(ctrl))) { - qemu_log_mask(LOG_GUEST_ERROR, -- "%s: invalid legnth of out buffer %zu\n", __func__, n); -+ "%s: invalid legnth of out buffer %zu\n", __func__, len); - return false; - } - -+ memcpy(&ctrl, out_buf, sizeof(ctrl)); - switch (ctrl.class) { - case VIRTIO_NET_CTRL_MAC: - switch (ctrl.cmd) { -@@ -392,10 +389,14 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - VhostVDPAState *s = opaque; - size_t in_len, dev_written; - virtio_net_ctrl_ack status = VIRTIO_NET_ERR; -- /* out and in buffers sent to the device */ -- struct iovec dev_buffers[2] = { -- { .iov_base = s->cvq_cmd_out_buffer }, -- { .iov_base = s->cvq_cmd_in_buffer }, -+ /* Out buffer sent to both the vdpa device and the device model */ -+ struct iovec out = { -+ .iov_base = s->cvq_cmd_out_buffer, -+ }; -+ /* In buffer sent to the device */ -+ const struct iovec dev_in = { -+ .iov_base = s->cvq_cmd_in_buffer, -+ .iov_len = sizeof(virtio_net_ctrl_ack), - }; - /* in buffer used for device model */ - const struct iovec in = { -@@ -405,17 +406,15 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - int r = -EINVAL; - bool ok; - -- ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers); -- if (unlikely(!ok)) { -- goto out; -- } -- -- ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1); -+ out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, -+ s->cvq_cmd_out_buffer, -+ vhost_vdpa_net_cvq_cmd_len()); -+ ok = vhost_vdpa_net_cvq_validate_cmd(s->cvq_cmd_out_buffer, out.iov_len); - if (unlikely(!ok)) { - goto out; - } - -- r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem); -+ r = vhost_svq_add(svq, &out, 1, &dev_in, 1, elem); - if (unlikely(r != 0)) { - if (unlikely(r == -ENOSPC)) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", -@@ -435,13 +434,13 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - goto out; - } - -- memcpy(&status, dev_buffers[1].iov_base, sizeof(status)); -+ memcpy(&status, s->cvq_cmd_in_buffer, sizeof(status)); - if (status != VIRTIO_NET_OK) { - goto out; - } - - status = VIRTIO_NET_ERR; -- virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1); -+ virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, &out, 1); - if (status != VIRTIO_NET_OK) { - error_report("Bad CVQ processing in model"); - } -@@ -454,12 +453,6 @@ out: - } - vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); - g_free(elem); -- if (dev_buffers[0].iov_base) { -- vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base); -- } -- if (dev_buffers[1].iov_base) { -- vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base); -- } - return r; - } - --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch b/SOURCES/kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch deleted file mode 100644 index b23d64f..0000000 --- a/SOURCES/kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 51c1e9cf1612727ec4c6e795576ae8fa0c0b2d4c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:20:05 +0200 -Subject: [PATCH 07/23] vdpa: Remove SVQ vring from iova_tree at shutdown -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/21] f72e67b9c90103151cbf86bff53e8f14b30f0e5b (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -Although the device will be reset before usage, the right thing to do is -to clean it. - -Reported-by: Lei Yang -Fixes: 34e3c94eda ("vdpa: Add custom IOTLB translations to SVQ") -Signed-off-by: Eugenio Pérez -Signed-off-by: Jason Wang -(cherry picked from commit 0c45fa6c420ec3a1dd9ea9c40fa11bd943bb3be9) ---- - hw/virtio/vhost-vdpa.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 39aa70f52d..e5c264fb29 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -896,6 +896,12 @@ static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, - - size = ROUND_UP(result->size, qemu_real_host_page_size); - r = vhost_vdpa_dma_unmap(v, result->iova, size); -+ if (unlikely(r < 0)) { -+ error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r); -+ return false; -+ } -+ -+ vhost_iova_tree_remove(v->iova_tree, *result); - return r == 0; - } - --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch b/SOURCES/kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch deleted file mode 100644 index 98697cb..0000000 --- a/SOURCES/kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch +++ /dev/null @@ -1,48 +0,0 @@ -From edde0b6a805085255bccc0ccdc3b9b6f81cef37b Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:20:02 +0200 -Subject: [PATCH 03/23] vdpa: Skip the maps not in the iova tree -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/21] 73acd16375a17cdf4c58830386541dd3a1b18bf7 (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -Next patch will skip the registering of dma maps that the vdpa device -rejects in the iova tree. We need to consider that here or we cause a -SIGSEGV accessing result. - -Reported-by: Lei Yang -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit a92ca0ffee5858636432a6059eb2790df1c9c77f) ---- - hw/virtio/vhost-vdpa.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 96334ab5b6..aa7765c6bc 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -287,6 +287,10 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, - }; - - result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region); -+ if (!result) { -+ /* The memory listener map wasn't mapped */ -+ return; -+ } - iova = result->iova; - vhost_iova_tree_remove(v->iova_tree, result); - } --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch b/SOURCES/kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch deleted file mode 100644 index 8398415..0000000 --- a/SOURCES/kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 89a67e0ce3e4c7b9f9b2d4cfb9fc5eeebc5643ac Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:20:08 +0200 -Subject: [PATCH 10/23] vdpa: Use ring hwaddr at vhost_vdpa_svq_unmap_ring -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [9/21] 4420134d7be60fa8b04dc9a56566524bf8daddd4 (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -Reduce code duplication. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 5a92452da95b2edfbffdd42ddc2612a7d09a5db0) ---- - hw/virtio/vhost-vdpa.c | 17 ++++++++--------- - 1 file changed, 8 insertions(+), 9 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 8eddf39f2a..0bea1e1eb9 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -882,10 +882,12 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, - /** - * Unmap a SVQ area in the device - */ --static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, -- const DMAMap *needle) -+static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr) - { -- const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle); -+ const DMAMap needle = { -+ .translated_addr = addr, -+ }; -+ const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, &needle); - hwaddr size; - int r; - -@@ -907,17 +909,14 @@ static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, - static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, - const VhostShadowVirtqueue *svq) - { -- DMAMap needle = {}; - struct vhost_vdpa *v = dev->opaque; - struct vhost_vring_addr svq_addr; - - vhost_svq_get_vring_addr(svq, &svq_addr); - -- needle.translated_addr = svq_addr.desc_user_addr; -- vhost_vdpa_svq_unmap_ring(v, &needle); -+ vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr); - -- needle.translated_addr = svq_addr.used_user_addr; -- vhost_vdpa_svq_unmap_ring(v, &needle); -+ vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr); - } - - /** -@@ -995,7 +994,7 @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev, - ok = vhost_vdpa_svq_map_ring(v, &device_region, errp); - if (unlikely(!ok)) { - error_prepend(errp, "Cannot create vq device region: "); -- vhost_vdpa_svq_unmap_ring(v, &driver_region); -+ vhost_vdpa_svq_unmap_ring(v, driver_region.translated_addr); - } - addr->used_user_addr = device_region.iova; - --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch b/SOURCES/kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch new file mode 100644 index 0000000..a56c6eb --- /dev/null +++ b/SOURCES/kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch @@ -0,0 +1,221 @@ +From d0e7f24a8d941ab142f2a1973ae18ed1bfdc074f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:41 +0100 +Subject: [PATCH 09/14] vdpa: add asid parameter to vhost_vdpa_dma_map/unmap +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [9/13] 3e7f89e57f73661017ccf0206f2ea77a72ca46bb (eperezmartin/qemu-kvm) + +So the caller can choose which ASID is destined. + +No need to update the batch functions as they will always be called from +memory listener updates at the moment. Memory listener updates will +always update ASID 0, as it's the passthrough ASID. + +All vhost devices's ASID are 0 at this moment. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20221215113144.322011-10-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit cd831ed5c4add8ed6ee980c3645b241cbef5130f) +--- + hw/virtio/trace-events | 4 ++-- + hw/virtio/vhost-vdpa.c | 36 +++++++++++++++++++++++----------- + include/hw/virtio/vhost-vdpa.h | 14 ++++++++++--- + net/vhost-vdpa.c | 6 +++--- + 4 files changed, 41 insertions(+), 19 deletions(-) + +diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events +index 46f2faf04e..a87c5f39a2 100644 +--- a/hw/virtio/trace-events ++++ b/hw/virtio/trace-events +@@ -30,8 +30,8 @@ vhost_user_write(uint32_t req, uint32_t flags) "req:%d flags:0x%"PRIx32"" + vhost_user_create_notifier(int idx, void *n) "idx:%d n:%p" + + # vhost-vdpa.c +-vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint64_t iova, uint64_t size, uint64_t uaddr, uint8_t perm, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" uaddr: 0x%"PRIx64" perm: 0x%"PRIx8" type: %"PRIu8 +-vhost_vdpa_dma_unmap(void *vdpa, int fd, uint32_t msg_type, uint64_t iova, uint64_t size, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" type: %"PRIu8 ++vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint64_t uaddr, uint8_t perm, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" uaddr: 0x%"PRIx64" perm: 0x%"PRIx8" type: %"PRIu8 ++vhost_vdpa_dma_unmap(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" type: %"PRIu8 + vhost_vdpa_listener_begin_batch(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 + vhost_vdpa_listener_commit(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 + vhost_vdpa_listener_region_add(void *vdpa, uint64_t iova, uint64_t llend, void *vaddr, bool readonly) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64" vaddr: %p read-only: %d" +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index dd2768634b..0ecf2bbaa0 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -72,22 +72,28 @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, + return false; + } + +-int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, +- void *vaddr, bool readonly) ++/* ++ * The caller must set asid = 0 if the device does not support asid. ++ * This is not an ABI break since it is set to 0 by the initializer anyway. ++ */ ++int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, ++ hwaddr size, void *vaddr, bool readonly) + { + struct vhost_msg_v2 msg = {}; + int fd = v->device_fd; + int ret = 0; + + msg.type = v->msg_type; ++ msg.asid = asid; + msg.iotlb.iova = iova; + msg.iotlb.size = size; + msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr; + msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; + msg.iotlb.type = VHOST_IOTLB_UPDATE; + +- trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.iotlb.iova, msg.iotlb.size, +- msg.iotlb.uaddr, msg.iotlb.perm, msg.iotlb.type); ++ trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.asid, msg.iotlb.iova, ++ msg.iotlb.size, msg.iotlb.uaddr, msg.iotlb.perm, ++ msg.iotlb.type); + + if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { + error_report("failed to write, fd=%d, errno=%d (%s)", +@@ -98,18 +104,24 @@ int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, + return ret; + } + +-int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size) ++/* ++ * The caller must set asid = 0 if the device does not support asid. ++ * This is not an ABI break since it is set to 0 by the initializer anyway. ++ */ ++int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, ++ hwaddr size) + { + struct vhost_msg_v2 msg = {}; + int fd = v->device_fd; + int ret = 0; + + msg.type = v->msg_type; ++ msg.asid = asid; + msg.iotlb.iova = iova; + msg.iotlb.size = size; + msg.iotlb.type = VHOST_IOTLB_INVALIDATE; + +- trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.iotlb.iova, ++ trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.asid, msg.iotlb.iova, + msg.iotlb.size, msg.iotlb.type); + + if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { +@@ -229,8 +241,8 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, + } + + vhost_vdpa_iotlb_batch_begin_once(v); +- ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), +- vaddr, section->readonly); ++ ret = vhost_vdpa_dma_map(v, VHOST_VDPA_GUEST_PA_ASID, iova, ++ int128_get64(llsize), vaddr, section->readonly); + if (ret) { + error_report("vhost vdpa map fail!"); + goto fail_map; +@@ -303,7 +315,8 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, + vhost_iova_tree_remove(v->iova_tree, *result); + } + vhost_vdpa_iotlb_batch_begin_once(v); +- ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); ++ ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova, ++ int128_get64(llsize)); + if (ret) { + error_report("vhost_vdpa dma unmap error!"); + } +@@ -876,7 +889,7 @@ static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr) + } + + size = ROUND_UP(result->size, qemu_real_host_page_size()); +- r = vhost_vdpa_dma_unmap(v, result->iova, size); ++ r = vhost_vdpa_dma_unmap(v, v->address_space_id, result->iova, size); + if (unlikely(r < 0)) { + error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r); + return; +@@ -916,7 +929,8 @@ static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, + return false; + } + +- r = vhost_vdpa_dma_map(v, needle->iova, needle->size + 1, ++ r = vhost_vdpa_dma_map(v, v->address_space_id, needle->iova, ++ needle->size + 1, + (void *)(uintptr_t)needle->translated_addr, + needle->perm == IOMMU_RO); + if (unlikely(r != 0)) { +diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h +index 1111d85643..e57dfa1fd1 100644 +--- a/include/hw/virtio/vhost-vdpa.h ++++ b/include/hw/virtio/vhost-vdpa.h +@@ -19,6 +19,12 @@ + #include "hw/virtio/virtio.h" + #include "standard-headers/linux/vhost_types.h" + ++/* ++ * ASID dedicated to map guest's addresses. If SVQ is disabled it maps GPA to ++ * qemu's IOVA. If SVQ is enabled it maps also the SVQ vring here ++ */ ++#define VHOST_VDPA_GUEST_PA_ASID 0 ++ + typedef struct VhostVDPAHostNotifier { + MemoryRegion mr; + void *addr; +@@ -29,6 +35,7 @@ typedef struct vhost_vdpa { + int index; + uint32_t msg_type; + bool iotlb_batch_begin_sent; ++ uint32_t address_space_id; + MemoryListener listener; + struct vhost_vdpa_iova_range iova_range; + uint64_t acked_features; +@@ -42,8 +49,9 @@ typedef struct vhost_vdpa { + VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; + } VhostVDPA; + +-int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, +- void *vaddr, bool readonly); +-int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size); ++int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, ++ hwaddr size, void *vaddr, bool readonly); ++int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, ++ hwaddr size); + + #endif +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 85aa0da39a..c2f319eb88 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -258,7 +258,7 @@ static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) + return; + } + +- r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1); ++ r = vhost_vdpa_dma_unmap(v, v->address_space_id, map->iova, map->size + 1); + if (unlikely(r != 0)) { + error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); + } +@@ -298,8 +298,8 @@ static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, + return r; + } + +- r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf, +- !write); ++ r = vhost_vdpa_dma_map(v, v->address_space_id, map.iova, ++ vhost_vdpa_net_cvq_cmd_page_len(), buf, !write); + if (unlikely(r < 0)) { + goto dma_map_err; + } +-- +2.31.1 + diff --git a/SOURCES/kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch b/SOURCES/kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch deleted file mode 100644 index e1da31d..0000000 --- a/SOURCES/kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch +++ /dev/null @@ -1,62 +0,0 @@ -From f92b0ef80b4889ae0beb0b2a026ec3892d576d79 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:32 +0200 -Subject: [PATCH 17/23] vdpa: add net_vhost_vdpa_cvq_info NetClientInfo -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [16/21] c80c9fd89e81fc389e7d02e9d764331ab9fc7a0a (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -Next patches will add a new info callback to restore NIC status through -CVQ. Since only the CVQ vhost device is needed, create it with a new -NetClientInfo. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 9d379453404303069f93f9b8163ae3805bcd8c2e) ---- - net/vhost-vdpa.c | 12 +++++++++++- - 1 file changed, 11 insertions(+), 1 deletion(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index ba65736f83..03e4cf1abc 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -334,6 +334,16 @@ static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s, - return true; - } - -+static NetClientInfo net_vhost_vdpa_cvq_info = { -+ .type = NET_CLIENT_DRIVER_VHOST_VDPA, -+ .size = sizeof(VhostVDPAState), -+ .receive = vhost_vdpa_receive, -+ .cleanup = vhost_vdpa_cleanup, -+ .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, -+ .has_ufo = vhost_vdpa_has_ufo, -+ .check_peer_type = vhost_vdpa_check_peer_type, -+}; -+ - /** - * Do not forward commands not supported by SVQ. Otherwise, the device could - * accept it and qemu would not know how to update the device model. -@@ -475,7 +485,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, - name); - } else { -- nc = qemu_new_net_control_client(&net_vhost_vdpa_info, peer, -+ nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, - device, name); - } - snprintf(nc->info_str, sizeof(nc->info_str), TYPE_VHOST_VDPA); --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch b/SOURCES/kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch new file mode 100644 index 0000000..57c38d1 --- /dev/null +++ b/SOURCES/kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch @@ -0,0 +1,94 @@ +From 6282a83619f274ca45a52d61577c10a05a0714dc Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:43 +0100 +Subject: [PATCH 11/14] vdpa: add shadow_data to vhost_vdpa +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [11/13] 9d317add1318b555ba06e19e4c67849069e047b9 (eperezmartin/qemu-kvm) + +The memory listener that thells the device how to convert GPA to qemu's +va is registered against CVQ vhost_vdpa. memory listener translations +are always ASID 0, CVQ ones are ASID 1 if supported. + +Let's tell the listener if it needs to register them on iova tree or +not. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20221215113144.322011-12-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 6188d78a19894ac8f2bf9484d48a5235a529d3b7) +--- + hw/virtio/vhost-vdpa.c | 6 +++--- + include/hw/virtio/vhost-vdpa.h | 2 ++ + net/vhost-vdpa.c | 1 + + 3 files changed, 6 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 0ecf2bbaa0..dc3498e995 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -224,7 +224,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, + vaddr, section->readonly); + + llsize = int128_sub(llend, int128_make64(iova)); +- if (v->shadow_vqs_enabled) { ++ if (v->shadow_data) { + int r; + + mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr, +@@ -251,7 +251,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, + return; + + fail_map: +- if (v->shadow_vqs_enabled) { ++ if (v->shadow_data) { + vhost_iova_tree_remove(v->iova_tree, mem_region); + } + +@@ -296,7 +296,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, + + llsize = int128_sub(llend, int128_make64(iova)); + +- if (v->shadow_vqs_enabled) { ++ if (v->shadow_data) { + const DMAMap *result; + const void *vaddr = memory_region_get_ram_ptr(section->mr) + + section->offset_within_region + +diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h +index e57dfa1fd1..45b969a311 100644 +--- a/include/hw/virtio/vhost-vdpa.h ++++ b/include/hw/virtio/vhost-vdpa.h +@@ -40,6 +40,8 @@ typedef struct vhost_vdpa { + struct vhost_vdpa_iova_range iova_range; + uint64_t acked_features; + bool shadow_vqs_enabled; ++ /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */ ++ bool shadow_data; + /* IOVA mapping used by the Shadow Virtqueue */ + VhostIOVATree *iova_tree; + GPtrArray *shadow_vqs; +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 1757f1d028..eea7a0df12 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -581,6 +581,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + s->always_svq = svq; + s->vhost_vdpa.shadow_vqs_enabled = svq; + s->vhost_vdpa.iova_range = iova_range; ++ s->vhost_vdpa.shadow_data = svq; + s->vhost_vdpa.iova_tree = iova_tree; + if (!is_datapath) { + s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), +-- +2.31.1 + diff --git a/SOURCES/kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch b/SOURCES/kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch new file mode 100644 index 0000000..c54a831 --- /dev/null +++ b/SOURCES/kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch @@ -0,0 +1,76 @@ +From 0f3a28e1e128754184c4af6a578f27e16c6a61d5 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:37 +0100 +Subject: [PATCH 05/14] vdpa: add vhost_vdpa_net_valid_svq_features +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [5/13] 0b27e04f178ec73cb800f4fb05c17a92576142e4 (eperezmartin/qemu-kvm) + +It will be reused at vdpa device start so let's extract in its own +function. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20221215113144.322011-6-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 36e4647247f200b6fa4d2f656133f567036e8a85) +--- + net/vhost-vdpa.c | 26 +++++++++++++++++--------- + 1 file changed, 17 insertions(+), 9 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index b06540ac89..16a5ebe2dd 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -106,6 +106,22 @@ VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) + return s->vhost_net; + } + ++static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp) ++{ ++ uint64_t invalid_dev_features = ++ features & ~vdpa_svq_device_features & ++ /* Transport are all accepted at this point */ ++ ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START, ++ VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START); ++ ++ if (invalid_dev_features) { ++ error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, ++ invalid_dev_features); ++ } ++ ++ return !invalid_dev_features; ++} ++ + static int vhost_vdpa_net_check_device_id(struct vhost_net *net) + { + uint32_t device_id; +@@ -684,15 +700,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + if (opts->x_svq) { + struct vhost_vdpa_iova_range iova_range; + +- uint64_t invalid_dev_features = +- features & ~vdpa_svq_device_features & +- /* Transport are all accepted at this point */ +- ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START, +- VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START); +- +- if (invalid_dev_features) { +- error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, +- invalid_dev_features); ++ if (!vhost_vdpa_net_valid_svq_features(features, errp)) { + goto err_svq; + } + +-- +2.31.1 + diff --git a/SOURCES/kvm-vdpa-allocate-SVQ-array-unconditionally.patch b/SOURCES/kvm-vdpa-allocate-SVQ-array-unconditionally.patch new file mode 100644 index 0000000..22c5955 --- /dev/null +++ b/SOURCES/kvm-vdpa-allocate-SVQ-array-unconditionally.patch @@ -0,0 +1,50 @@ +From 72f296870805750df8dfe5eaad77dd7d435a8f41 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:40 +0100 +Subject: [PATCH 08/14] vdpa: allocate SVQ array unconditionally +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [8/13] 08cd86d0859f82d768794e29241cfeff25df667c (eperezmartin/qemu-kvm) + +SVQ may run or not in a device depending on runtime conditions (for +example, if the device can move CVQ to its own group or not). + +Allocate the SVQ array unconditionally at startup, since its hard to +move this allocation elsewhere. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20221215113144.322011-9-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 273e0003f0005cc17292dedae01e5edb0064b69c) +--- + hw/virtio/vhost-vdpa.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 84218ce078..dd2768634b 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -532,10 +532,6 @@ static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev) + struct vhost_vdpa *v = dev->opaque; + size_t idx; + +- if (!v->shadow_vqs) { +- return; +- } +- + for (idx = 0; idx < v->shadow_vqs->len; ++idx) { + vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx)); + } +-- +2.31.1 + diff --git a/SOURCES/kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch b/SOURCES/kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch new file mode 100644 index 0000000..9b78b5c --- /dev/null +++ b/SOURCES/kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch @@ -0,0 +1,193 @@ +From 84c203faa570b85eec006215768c83371c9f0399 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:44 +0100 +Subject: [PATCH 12/14] vdpa: always start CVQ in SVQ mode if possible +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [12/13] 83f94b3e163ca38d08dbf7c111a4cfa7a44e3dc2 (eperezmartin/qemu-kvm) + +Isolate control virtqueue in its own group, allowing to intercept control +commands but letting dataplane run totally passthrough to the guest. + +Signed-off-by: Eugenio Pérez +Message-Id: <20221215113144.322011-13-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Acked-by: Jason Wang +(cherry picked from commit c1a1008685af0327d9d03f03d43bdb77e7af5bea) +--- + hw/virtio/vhost-vdpa.c | 3 +- + net/vhost-vdpa.c | 110 ++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 111 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index dc3498e995..72ff06673c 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -638,7 +638,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) + { + uint64_t features; + uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | +- 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH; ++ 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | ++ 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID; + int r; + + if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index eea7a0df12..07d33dae26 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -101,6 +101,8 @@ static const uint64_t vdpa_svq_device_features = + BIT_ULL(VIRTIO_NET_F_RSC_EXT) | + BIT_ULL(VIRTIO_NET_F_STANDBY); + ++#define VHOST_VDPA_NET_CVQ_ASID 1 ++ + VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) + { + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); +@@ -242,6 +244,40 @@ static NetClientInfo net_vhost_vdpa_info = { + .check_peer_type = vhost_vdpa_check_peer_type, + }; + ++static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) ++{ ++ struct vhost_vring_state state = { ++ .index = vq_index, ++ }; ++ int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state); ++ ++ if (unlikely(r < 0)) { ++ error_report("Cannot get VQ %u group: %s", vq_index, ++ g_strerror(errno)); ++ return r; ++ } ++ ++ return state.num; ++} ++ ++static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v, ++ unsigned vq_group, ++ unsigned asid_num) ++{ ++ struct vhost_vring_state asid = { ++ .index = vq_group, ++ .num = asid_num, ++ }; ++ int r; ++ ++ r = ioctl(v->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid); ++ if (unlikely(r < 0)) { ++ error_report("Can't set vq group %u asid %u, errno=%d (%s)", ++ asid.index, asid.num, errno, g_strerror(errno)); ++ } ++ return r; ++} ++ + static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) + { + VhostIOVATree *tree = v->iova_tree; +@@ -316,11 +352,75 @@ dma_map_err: + static int vhost_vdpa_net_cvq_start(NetClientState *nc) + { + VhostVDPAState *s; +- int r; ++ struct vhost_vdpa *v; ++ uint64_t backend_features; ++ int64_t cvq_group; ++ int cvq_index, r; + + assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); + + s = DO_UPCAST(VhostVDPAState, nc, nc); ++ v = &s->vhost_vdpa; ++ ++ v->shadow_data = s->always_svq; ++ v->shadow_vqs_enabled = s->always_svq; ++ s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID; ++ ++ if (s->always_svq) { ++ /* SVQ is already configured for all virtqueues */ ++ goto out; ++ } ++ ++ /* ++ * If we early return in these cases SVQ will not be enabled. The migration ++ * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. ++ * ++ * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev ++ * yet. ++ */ ++ r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); ++ if (unlikely(r < 0)) { ++ error_report("Cannot get vdpa backend_features: %s(%d)", ++ g_strerror(errno), errno); ++ return -1; ++ } ++ if (!(backend_features & VHOST_BACKEND_F_IOTLB_ASID) || ++ !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { ++ return 0; ++ } ++ ++ /* ++ * Check if all the virtqueues of the virtio device are in a different vq ++ * than the last vq. VQ group of last group passed in cvq_group. ++ */ ++ cvq_index = v->dev->vq_index_end - 1; ++ cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index); ++ if (unlikely(cvq_group < 0)) { ++ return cvq_group; ++ } ++ for (int i = 0; i < cvq_index; ++i) { ++ int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i); ++ ++ if (unlikely(group < 0)) { ++ return group; ++ } ++ ++ if (group == cvq_group) { ++ return 0; ++ } ++ } ++ ++ r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); ++ if (unlikely(r < 0)) { ++ return r; ++ } ++ ++ v->iova_tree = vhost_iova_tree_new(v->iova_range.first, ++ v->iova_range.last); ++ v->shadow_vqs_enabled = true; ++ s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID; ++ ++out: + if (!s->vhost_vdpa.shadow_vqs_enabled) { + return 0; + } +@@ -349,6 +449,14 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc) + if (s->vhost_vdpa.shadow_vqs_enabled) { + vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); + vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status); ++ if (!s->always_svq) { ++ /* ++ * If only the CVQ is shadowed we can delete this safely. ++ * If all the VQs are shadows this will be needed by the time the ++ * device is started again to register SVQ vrings and similar. ++ */ ++ g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); ++ } + } + } + +-- +2.31.1 + diff --git a/SOURCES/kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch b/SOURCES/kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch new file mode 100644 index 0000000..d800258 --- /dev/null +++ b/SOURCES/kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch @@ -0,0 +1,44 @@ +From fbb177ad84d562a20e51e71c73257d2ef85be2d9 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Wed, 21 Dec 2022 12:50:15 +0100 +Subject: [PATCH 4/9] vdpa: do not handle VIRTIO_NET_F_GUEST_ANNOUNCE in + vhost-vdpa +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 137: vDPA net SVQ guest announce support +RH-Bugzilla: 2141088 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Jason Wang +RH-Commit: [4/4] b3960a8b3e4ca569b1b1e6ceccf2051d8c4b1079 (eperezmartin/qemu-kvm) + +So qemu emulates it even in case the device does not support it. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20221221115015.1400889-5-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 980003debddd18306ea2e1364b96598383c0e257) +--- + net/vhost-vdpa.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 52ef9cb3a2..b06540ac89 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -72,7 +72,6 @@ const int vdpa_feature_bits[] = { + VIRTIO_F_RING_RESET, + VIRTIO_NET_F_RSS, + VIRTIO_NET_F_HASH_REPORT, +- VIRTIO_NET_F_GUEST_ANNOUNCE, + VIRTIO_NET_F_STATUS, + VHOST_INVALID_FEATURE_BIT + }; +-- +2.31.1 + diff --git a/SOURCES/kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch b/SOURCES/kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch deleted file mode 100644 index 8c66f19..0000000 --- a/SOURCES/kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 6d16102aca24bab16c846fe6457071f4466b8e35 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:20:03 +0200 -Subject: [PATCH 04/23] vdpa: do not save failed dma maps in SVQ iova tree -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/21] f9bea39f7fa14c5ef0f85774cbad0ca3b52c4498 (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -If a map fails for whatever reason, it must not be saved in the tree. -Otherwise, qemu will try to unmap it in cleanup, leaving to more errors. - -Fixes: 34e3c94eda ("vdpa: Add custom IOTLB translations to SVQ") -Reported-by: Lei Yang -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 6cc2ec65382fde205511ac00a324995ce6ee8f28) ---- - hw/virtio/vhost-vdpa.c | 20 +++++++++++++------- - 1 file changed, 13 insertions(+), 7 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index aa7765c6bc..cc15b7d8ee 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -174,6 +174,7 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener) - static void vhost_vdpa_listener_region_add(MemoryListener *listener, - MemoryRegionSection *section) - { -+ DMAMap mem_region = {}; - struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); - hwaddr iova; - Int128 llend, llsize; -@@ -210,13 +211,13 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, - - llsize = int128_sub(llend, int128_make64(iova)); - if (v->shadow_vqs_enabled) { -- DMAMap mem_region = { -- .translated_addr = (hwaddr)(uintptr_t)vaddr, -- .size = int128_get64(llsize) - 1, -- .perm = IOMMU_ACCESS_FLAG(true, section->readonly), -- }; -+ int r; - -- int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); -+ mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr, -+ mem_region.size = int128_get64(llsize) - 1, -+ mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly), -+ -+ r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); - if (unlikely(r != IOVA_OK)) { - error_report("Can't allocate a mapping (%d)", r); - goto fail; -@@ -230,11 +231,16 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, - vaddr, section->readonly); - if (ret) { - error_report("vhost vdpa map fail!"); -- goto fail; -+ goto fail_map; - } - - return; - -+fail_map: -+ if (v->shadow_vqs_enabled) { -+ vhost_iova_tree_remove(v->iova_tree, &mem_region); -+ } -+ - fail: - /* - * On the initfn path, store the first error in the container so we --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch b/SOURCES/kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch deleted file mode 100644 index 3cc011f..0000000 --- a/SOURCES/kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch +++ /dev/null @@ -1,153 +0,0 @@ -From 56f4bebc591893e590481617da7cd7ecffeb166d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:34 +0200 -Subject: [PATCH 19/23] vdpa: extract vhost_vdpa_net_cvq_add from - vhost_vdpa_net_handle_ctrl_avail -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [18/21] 08ab71dbf050f5c2e97c622d1915f71a56c135b8 (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -So we can reuse it to inject state messages. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang --- -v7: -* Remove double free error - -v6: -* Do not assume in buffer sent to the device is sizeof(virtio_net_ctrl_ack) - -v5: -* Do not use an artificial !NULL VirtQueueElement -* Use only out size instead of iovec dev_buffers for these functions. - -Signed-off-by: Jason Wang -(cherry picked from commit d9afb1f0ee4d662ed67d3bc1220b943f7e4cfa6f) ---- - net/vhost-vdpa.c | 59 +++++++++++++++++++++++++++++++----------------- - 1 file changed, 38 insertions(+), 21 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 17626feb8d..f09f044ec1 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -331,6 +331,38 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc) - } - } - -+static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, -+ size_t in_len) -+{ -+ /* Buffers for the device */ -+ const struct iovec out = { -+ .iov_base = s->cvq_cmd_out_buffer, -+ .iov_len = out_len, -+ }; -+ const struct iovec in = { -+ .iov_base = s->cvq_cmd_in_buffer, -+ .iov_len = sizeof(virtio_net_ctrl_ack), -+ }; -+ VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); -+ int r; -+ -+ r = vhost_svq_add(svq, &out, 1, &in, 1, NULL); -+ if (unlikely(r != 0)) { -+ if (unlikely(r == -ENOSPC)) { -+ qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", -+ __func__); -+ } -+ return r; -+ } -+ -+ /* -+ * We can poll here since we've had BQL from the time we sent the -+ * descriptor. Also, we need to take the answer before SVQ pulls by itself, -+ * when BQL is released -+ */ -+ return vhost_svq_poll(svq); -+} -+ - static NetClientInfo net_vhost_vdpa_cvq_info = { - .type = NET_CLIENT_DRIVER_VHOST_VDPA, - .size = sizeof(VhostVDPAState), -@@ -387,23 +419,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - void *opaque) - { - VhostVDPAState *s = opaque; -- size_t in_len, dev_written; -+ size_t in_len; - virtio_net_ctrl_ack status = VIRTIO_NET_ERR; - /* Out buffer sent to both the vdpa device and the device model */ - struct iovec out = { - .iov_base = s->cvq_cmd_out_buffer, - }; -- /* In buffer sent to the device */ -- const struct iovec dev_in = { -- .iov_base = s->cvq_cmd_in_buffer, -- .iov_len = sizeof(virtio_net_ctrl_ack), -- }; - /* in buffer used for device model */ - const struct iovec in = { - .iov_base = &status, - .iov_len = sizeof(status), - }; -- int r = -EINVAL; -+ ssize_t dev_written = -EINVAL; - bool ok; - - out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, -@@ -414,21 +441,11 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - goto out; - } - -- r = vhost_svq_add(svq, &out, 1, &dev_in, 1, elem); -- if (unlikely(r != 0)) { -- if (unlikely(r == -ENOSPC)) { -- qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", -- __func__); -- } -+ dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status)); -+ if (unlikely(dev_written < 0)) { - goto out; - } - -- /* -- * We can poll here since we've had BQL from the time we sent the -- * descriptor. Also, we need to take the answer before SVQ pulls by itself, -- * when BQL is released -- */ -- dev_written = vhost_svq_poll(svq); - if (unlikely(dev_written < sizeof(status))) { - error_report("Insufficient written data (%zu)", dev_written); - goto out; -@@ -436,7 +453,7 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - - memcpy(&status, s->cvq_cmd_in_buffer, sizeof(status)); - if (status != VIRTIO_NET_OK) { -- goto out; -+ return VIRTIO_NET_ERR; - } - - status = VIRTIO_NET_ERR; -@@ -453,7 +470,7 @@ out: - } - vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); - g_free(elem); -- return r; -+ return dev_written < 0 ? dev_written : 0; - } - - static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch b/SOURCES/kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch new file mode 100644 index 0000000..bb55256 --- /dev/null +++ b/SOURCES/kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch @@ -0,0 +1,48 @@ +From 46e80a9350a02fdb5689638df96bc7389e953cf8 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 17 Jan 2023 11:53:08 +0100 +Subject: [PATCH 13/14] vdpa: fix VHOST_BACKEND_F_IOTLB_ASID flag check +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [13/13] b7fb4b8e9ea26b6664a9179ed0a88376acf5115f (eperezmartin/qemu-kvm) + +VHOST_BACKEND_F_IOTLB_ASID is the feature bit, not the bitmask. Since +the device under test also provided VHOST_BACKEND_F_IOTLB_MSG_V2 and +VHOST_BACKEND_F_IOTLB_BATCH, this went unnoticed. + +Fixes: c1a1008685 ("vdpa: always start CVQ in SVQ mode if possible") +Signed-off-by: Eugenio Pérez +Reviewed-by: Michael S. Tsirkin +Acked-by: Jason Wang +Signed-off-by: Jason Wang + +Upstream status: git@github.com:jasowang/qemu.git +(cherry picked from commit 2bd492bca521ee8594f1d5db8dc9aac126fc4f85) +--- + net/vhost-vdpa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 07d33dae26..7d9c4ea09d 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -384,7 +384,7 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) + g_strerror(errno), errno); + return -1; + } +- if (!(backend_features & VHOST_BACKEND_F_IOTLB_ASID) || ++ if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || + !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { + return 0; + } +-- +2.31.1 + diff --git a/SOURCES/kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch b/SOURCES/kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch new file mode 100644 index 0000000..ebb7f38 --- /dev/null +++ b/SOURCES/kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch @@ -0,0 +1,59 @@ +From b71724e94c94acd6e09fed2b47be2901799c2353 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Wed, 21 Dec 2022 12:50:14 +0100 +Subject: [PATCH 3/9] vdpa: handle VIRTIO_NET_CTRL_ANNOUNCE in + vhost_vdpa_net_handle_ctrl_avail +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 137: vDPA net SVQ guest announce support +RH-Bugzilla: 2141088 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Jason Wang +RH-Commit: [3/4] c4ef5b62a5d41911565b8960a88bb48d746ff6c7 (eperezmartin/qemu-kvm) + +Since this capability is emulated by qemu shadowed CVQ cannot forward it +to the device. Process all that command within qemu. + +Signed-off-by: Eugenio Pérez +Message-Id: <20221221115015.1400889-4-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Acked-by: Jason Wang +(cherry picked from commit 3f9a3eeb7ca6acd899e2205a9118928b4cd94e47) +--- + net/vhost-vdpa.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 2b4b85d8f8..52ef9cb3a2 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -489,9 +489,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, + out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, + s->cvq_cmd_out_buffer, + vhost_vdpa_net_cvq_cmd_len()); +- dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status)); +- if (unlikely(dev_written < 0)) { +- goto out; ++ if (*(uint8_t *)s->cvq_cmd_out_buffer == VIRTIO_NET_CTRL_ANNOUNCE) { ++ /* ++ * Guest announce capability is emulated by qemu, so don't forward to ++ * the device. ++ */ ++ dev_written = sizeof(status); ++ *s->status = VIRTIO_NET_OK; ++ } else { ++ dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status)); ++ if (unlikely(dev_written < 0)) { ++ goto out; ++ } + } + + if (unlikely(dev_written < sizeof(status))) { +-- +2.31.1 + diff --git a/SOURCES/kvm-vdpa-manual-forward-CVQ-buffers.patch b/SOURCES/kvm-vdpa-manual-forward-CVQ-buffers.patch deleted file mode 100644 index 61909ff..0000000 --- a/SOURCES/kvm-vdpa-manual-forward-CVQ-buffers.patch +++ /dev/null @@ -1,166 +0,0 @@ -From c33bc0b7f2b5cfa330a6d89d60ee94de129c65c1 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 16:05:38 +0200 -Subject: [PATCH 23/32] vdpa: manual forward CVQ buffers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [23/27] ce128d5152be7eebf87e186eb8b58c2ed95aff6d (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit bd907ae4b00ebedad5e586af05ea3d6490318d45 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:42 2022 +0200 - - vdpa: manual forward CVQ buffers - - Do a simple forwarding of CVQ buffers, the same work SVQ could do but - through callbacks. No functional change intended. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-vdpa.c | 3 +- - include/hw/virtio/vhost-vdpa.h | 3 ++ - net/vhost-vdpa.c | 58 ++++++++++++++++++++++++++++++++++ - 3 files changed, 63 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 14b02fe079..49effe5462 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -417,7 +417,8 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, - for (unsigned n = 0; n < hdev->nvqs; ++n) { - g_autoptr(VhostShadowVirtqueue) svq; - -- svq = vhost_svq_new(v->iova_tree, NULL, NULL); -+ svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops, -+ v->shadow_vq_ops_opaque); - if (unlikely(!svq)) { - error_setg(errp, "Cannot create svq %u", n); - return -1; -diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h -index 7214eb47dc..1111d85643 100644 ---- a/include/hw/virtio/vhost-vdpa.h -+++ b/include/hw/virtio/vhost-vdpa.h -@@ -15,6 +15,7 @@ - #include - - #include "hw/virtio/vhost-iova-tree.h" -+#include "hw/virtio/vhost-shadow-virtqueue.h" - #include "hw/virtio/virtio.h" - #include "standard-headers/linux/vhost_types.h" - -@@ -35,6 +36,8 @@ typedef struct vhost_vdpa { - /* IOVA mapping used by the Shadow Virtqueue */ - VhostIOVATree *iova_tree; - GPtrArray *shadow_vqs; -+ const VhostShadowVirtqueueOps *shadow_vq_ops; -+ void *shadow_vq_ops_opaque; - struct vhost_dev *dev; - VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; - } VhostVDPA; -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index df1e69ee72..2e3b6b10d8 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -11,11 +11,14 @@ - - #include "qemu/osdep.h" - #include "clients.h" -+#include "hw/virtio/virtio-net.h" - #include "net/vhost_net.h" - #include "net/vhost-vdpa.h" - #include "hw/virtio/vhost-vdpa.h" - #include "qemu/config-file.h" - #include "qemu/error-report.h" -+#include "qemu/log.h" -+#include "qemu/memalign.h" - #include "qemu/option.h" - #include "qapi/error.h" - #include -@@ -187,6 +190,57 @@ static NetClientInfo net_vhost_vdpa_info = { - .check_peer_type = vhost_vdpa_check_peer_type, - }; - -+/** -+ * Forward buffer for the moment. -+ */ -+static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, -+ VirtQueueElement *elem, -+ void *opaque) -+{ -+ unsigned int n = elem->out_num + elem->in_num; -+ g_autofree struct iovec *dev_buffers = g_new(struct iovec, n); -+ size_t in_len, dev_written; -+ virtio_net_ctrl_ack status = VIRTIO_NET_ERR; -+ int r; -+ -+ memcpy(dev_buffers, elem->out_sg, elem->out_num); -+ memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num); -+ -+ r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1], -+ elem->in_num, elem); -+ if (unlikely(r != 0)) { -+ if (unlikely(r == -ENOSPC)) { -+ qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", -+ __func__); -+ } -+ goto out; -+ } -+ -+ /* -+ * We can poll here since we've had BQL from the time we sent the -+ * descriptor. Also, we need to take the answer before SVQ pulls by itself, -+ * when BQL is released -+ */ -+ dev_written = vhost_svq_poll(svq); -+ if (unlikely(dev_written < sizeof(status))) { -+ error_report("Insufficient written data (%zu)", dev_written); -+ } -+ -+out: -+ in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, -+ sizeof(status)); -+ if (unlikely(in_len < sizeof(status))) { -+ error_report("Bad device CVQ written length"); -+ } -+ vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); -+ g_free(elem); -+ return r; -+} -+ -+static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { -+ .avail_handler = vhost_vdpa_net_handle_ctrl_avail, -+}; -+ - static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - const char *device, - const char *name, -@@ -211,6 +265,10 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - - s->vhost_vdpa.device_fd = vdpa_device_fd; - s->vhost_vdpa.index = queue_pair_index; -+ if (!is_datapath) { -+ s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; -+ s->vhost_vdpa.shadow_vq_ops_opaque = s; -+ } - ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); - if (ret) { - qemu_del_net_client(nc); --- -2.31.1 - diff --git a/SOURCES/kvm-vdpa-move-SVQ-vring-features-check-to-net.patch b/SOURCES/kvm-vdpa-move-SVQ-vring-features-check-to-net.patch new file mode 100644 index 0000000..7cda847 --- /dev/null +++ b/SOURCES/kvm-vdpa-move-SVQ-vring-features-check-to-net.patch @@ -0,0 +1,118 @@ +From 63a45add7c9f7bb2b7775ae4cb2d7df22f7f2033 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:39 +0100 +Subject: [PATCH 07/14] vdpa: move SVQ vring features check to net/ +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [7/13] a24189aea4dbde3ed4486f685d0d88aeee1a0ee7 (eperezmartin/qemu-kvm) + +The next patches will start control SVQ if possible. However, we don't +know if that will be possible at qemu boot anymore. + +Since the moved checks will be already evaluated at net/ to know if it +is ok to shadow CVQ, move them. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20221215113144.322011-8-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 258a03941fd23108a322d09abc9c55341e09688d) +--- + hw/virtio/vhost-vdpa.c | 32 ++------------------------------ + net/vhost-vdpa.c | 3 ++- + 2 files changed, 4 insertions(+), 31 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 9e7cbf1776..84218ce078 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -389,29 +389,9 @@ static int vhost_vdpa_get_dev_features(struct vhost_dev *dev, + return ret; + } + +-static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, +- Error **errp) ++static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v) + { + g_autoptr(GPtrArray) shadow_vqs = NULL; +- uint64_t dev_features, svq_features; +- int r; +- bool ok; +- +- if (!v->shadow_vqs_enabled) { +- return 0; +- } +- +- r = vhost_vdpa_get_dev_features(hdev, &dev_features); +- if (r != 0) { +- error_setg_errno(errp, -r, "Can't get vdpa device features"); +- return r; +- } +- +- svq_features = dev_features; +- ok = vhost_svq_valid_features(svq_features, errp); +- if (unlikely(!ok)) { +- return -1; +- } + + shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); + for (unsigned n = 0; n < hdev->nvqs; ++n) { +@@ -422,7 +402,6 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, + } + + v->shadow_vqs = g_steal_pointer(&shadow_vqs); +- return 0; + } + + static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) +@@ -447,10 +426,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) + dev->opaque = opaque ; + v->listener = vhost_vdpa_memory_listener; + v->msg_type = VHOST_IOTLB_MSG_V2; +- ret = vhost_vdpa_init_svq(dev, v, errp); +- if (ret) { +- goto err; +- } ++ vhost_vdpa_init_svq(dev, v); + + if (!vhost_vdpa_first_dev(dev)) { + return 0; +@@ -460,10 +436,6 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) + VIRTIO_CONFIG_S_DRIVER); + + return 0; +- +-err: +- ram_block_discard_disable(false); +- return ret; + } + + static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev, +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 8d3ed095d0..85aa0da39a 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -117,9 +117,10 @@ static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp) + if (invalid_dev_features) { + error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, + invalid_dev_features); ++ return false; + } + +- return !invalid_dev_features; ++ return vhost_svq_valid_features(features, errp); + } + + static int vhost_vdpa_net_check_device_id(struct vhost_net *net) +-- +2.31.1 + diff --git a/SOURCES/kvm-vdpa-request-iova_range-only-once.patch b/SOURCES/kvm-vdpa-request-iova_range-only-once.patch new file mode 100644 index 0000000..041e8f7 --- /dev/null +++ b/SOURCES/kvm-vdpa-request-iova_range-only-once.patch @@ -0,0 +1,145 @@ +From 760169d538a4e6ba61006f6796cd55af967a7f1e Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:38 +0100 +Subject: [PATCH 06/14] vdpa: request iova_range only once +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [6/13] 2a8ae2f46ae88f01c5535038f38cb7895098b610 (eperezmartin/qemu-kvm) + +Currently iova range is requested once per queue pair in the case of +net. Reduce the number of ioctls asking it once at initialization and +reusing that value for each vhost_vdpa. + +Signed-off-by: Eugenio Pérez +Message-Id: <20221215113144.322011-7-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Acked-by: Jason Wang +(cherry picked from commit a585fad26b2e6ccca156d9e65158ad1c5efd268d) +--- + hw/virtio/vhost-vdpa.c | 15 --------------- + net/vhost-vdpa.c | 27 ++++++++++++++------------- + 2 files changed, 14 insertions(+), 28 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index e65603022f..9e7cbf1776 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -365,19 +365,6 @@ static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) + return 0; + } + +-static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v) +-{ +- int ret = vhost_vdpa_call(v->dev, VHOST_VDPA_GET_IOVA_RANGE, +- &v->iova_range); +- if (ret != 0) { +- v->iova_range.first = 0; +- v->iova_range.last = UINT64_MAX; +- } +- +- trace_vhost_vdpa_get_iova_range(v->dev, v->iova_range.first, +- v->iova_range.last); +-} +- + /* + * The use of this function is for requests that only need to be + * applied once. Typically such request occurs at the beginning +@@ -465,8 +452,6 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) + goto err; + } + +- vhost_vdpa_get_iova_range(v); +- + if (!vhost_vdpa_first_dev(dev)) { + return 0; + } +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 16a5ebe2dd..8d3ed095d0 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -549,14 +549,15 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { + }; + + static NetClientState *net_vhost_vdpa_init(NetClientState *peer, +- const char *device, +- const char *name, +- int vdpa_device_fd, +- int queue_pair_index, +- int nvqs, +- bool is_datapath, +- bool svq, +- VhostIOVATree *iova_tree) ++ const char *device, ++ const char *name, ++ int vdpa_device_fd, ++ int queue_pair_index, ++ int nvqs, ++ bool is_datapath, ++ bool svq, ++ struct vhost_vdpa_iova_range iova_range, ++ VhostIOVATree *iova_tree) + { + NetClientState *nc = NULL; + VhostVDPAState *s; +@@ -575,6 +576,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + s->vhost_vdpa.device_fd = vdpa_device_fd; + s->vhost_vdpa.index = queue_pair_index; + s->vhost_vdpa.shadow_vqs_enabled = svq; ++ s->vhost_vdpa.iova_range = iova_range; + s->vhost_vdpa.iova_tree = iova_tree; + if (!is_datapath) { + s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), +@@ -654,6 +656,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + int vdpa_device_fd; + g_autofree NetClientState **ncs = NULL; + g_autoptr(VhostIOVATree) iova_tree = NULL; ++ struct vhost_vdpa_iova_range iova_range; + NetClientState *nc; + int queue_pairs, r, i = 0, has_cvq = 0; + +@@ -697,14 +700,12 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + return queue_pairs; + } + ++ vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); + if (opts->x_svq) { +- struct vhost_vdpa_iova_range iova_range; +- + if (!vhost_vdpa_net_valid_svq_features(features, errp)) { + goto err_svq; + } + +- vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); + iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last); + } + +@@ -713,7 +714,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + for (i = 0; i < queue_pairs; i++) { + ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, + vdpa_device_fd, i, 2, true, opts->x_svq, +- iova_tree); ++ iova_range, iova_tree); + if (!ncs[i]) + goto err; + } +@@ -721,7 +722,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + if (has_cvq) { + nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, + vdpa_device_fd, i, 1, false, +- opts->x_svq, iova_tree); ++ opts->x_svq, iova_range, iova_tree); + if (!nc) + goto err; + } +-- +2.31.1 + diff --git a/SOURCES/kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch b/SOURCES/kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch new file mode 100644 index 0000000..68c0c86 --- /dev/null +++ b/SOURCES/kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch @@ -0,0 +1,62 @@ +From 28163d7d61b6b0b8312b78d57dabc8f44bf39c46 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:42 +0100 +Subject: [PATCH 10/14] vdpa: store x-svq parameter in VhostVDPAState +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [10/13] 53f3b2698b4a5caca434f55e4300103a78778548 (eperezmartin/qemu-kvm) + +CVQ can be shadowed two ways: +- Device has x-svq=on parameter (current way) +- The device can isolate CVQ in its own vq group + +QEMU needs to check for the second condition dynamically, because CVQ +index is not known before the driver ack the features. Since this is +dynamic, the CVQ isolation could vary with different conditions, making +it possible to go from "not isolated group" to "isolated". + +Saving the cmdline parameter in an extra field so we never disable CVQ +SVQ in case the device was started with x-svq cmdline. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20221215113144.322011-11-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 7f211a28fd5482f76583988beecd8ee61588d45e) +--- + net/vhost-vdpa.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index c2f319eb88..1757f1d028 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -38,6 +38,8 @@ typedef struct VhostVDPAState { + void *cvq_cmd_out_buffer; + virtio_net_ctrl_ack *status; + ++ /* The device always have SVQ enabled */ ++ bool always_svq; + bool started; + } VhostVDPAState; + +@@ -576,6 +578,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + + s->vhost_vdpa.device_fd = vdpa_device_fd; + s->vhost_vdpa.index = queue_pair_index; ++ s->always_svq = svq; + s->vhost_vdpa.shadow_vqs_enabled = svq; + s->vhost_vdpa.iova_range = iova_range; + s->vhost_vdpa.iova_tree = iova_tree; +-- +2.31.1 + diff --git a/SOURCES/kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch b/SOURCES/kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch new file mode 100644 index 0000000..3d11438 --- /dev/null +++ b/SOURCES/kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch @@ -0,0 +1,58 @@ +From cb974f2f9a0c5b9520b6ac80bd1d1e4a6b12bbdc Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:33 +0100 +Subject: [PATCH 01/14] vdpa: use v->shadow_vqs_enabled in + vhost_vdpa_svqs_start & stop +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/13] f0db50a95f87dd011418617be7b80aa6813a1146 (eperezmartin/qemu-kvm) + +This function used to trust in v->shadow_vqs != NULL to know if it must +start svq or not. + +This is not going to be valid anymore, as qemu is going to allocate svq +array unconditionally (but it will only start them conditionally). + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20221215113144.322011-2-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 712c1a3171cf62d501dac5af58f77d5fea70350d) +--- + hw/virtio/vhost-vdpa.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index c5be2645b0..44e6a9b7b3 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -1036,7 +1036,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) + Error *err = NULL; + unsigned i; + +- if (!v->shadow_vqs) { ++ if (!v->shadow_vqs_enabled) { + return true; + } + +@@ -1089,7 +1089,7 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) + { + struct vhost_vdpa *v = dev->opaque; + +- if (!v->shadow_vqs) { ++ if (!v->shadow_vqs_enabled) { + return; + } + +-- +2.31.1 + diff --git a/SOURCES/kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch b/SOURCES/kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch deleted file mode 100644 index 26083c1..0000000 --- a/SOURCES/kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch +++ /dev/null @@ -1,114 +0,0 @@ -From b90a5878355bd549200ed1eff52ea084325bfc8a Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Fri, 6 May 2022 15:25:10 +0200 -Subject: [PATCH 5/5] vfio/common: remove spurious tpm-crb-cmd misalignment - warning -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 84: vfio/common: Remove spurious tpm-crb-cmd misalignment warning -RH-Commit: [2/2] 9b73a9aec59cb50d5e3468cc553464bf4a73d0a1 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2037612 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cornelia Huck -RH-Acked-by: Andrew Jones - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2037612 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45166961 -Upstream Status: YES -Tested: With TPM-CRB and VFIO - -The CRB command buffer currently is a RAM MemoryRegion and given -its base address alignment, it causes an error report on -vfio_listener_region_add(). This region could have been a RAM device -region, easing the detection of such safe situation but this option -was not well received. So let's add a helper function that uses the -memory region owner type to detect the situation is safe wrt -the assignment. Other device types can be checked here if such kind -of problem occurs again. - -Conflicts in hw/vfio/common.c -We don't have 8e3b0cbb721 ("Replace qemu_real_host_page variables with inlined functions") - -Signed-off-by: Eric Auger -Reviewed-by: Philippe Mathieu-Daudé -Acked-by: Stefan Berger -Reviewed-by: Cornelia Huck -Link: https://lore.kernel.org/r/20220506132510.1847942-3-eric.auger@redhat.com -Signed-off-by: Alex Williamson -(cherry picked from commit 851d6d1a0ff29a87ec588205842edf6b86d99b5c) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 27 ++++++++++++++++++++++++++- - hw/vfio/trace-events | 1 + - 2 files changed, 27 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 080046e3f5..0fbe0d47af 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -40,6 +40,7 @@ - #include "trace.h" - #include "qapi/error.h" - #include "migration/migration.h" -+#include "sysemu/tpm.h" - - VFIOGroupList vfio_group_list = - QLIST_HEAD_INITIALIZER(vfio_group_list); -@@ -861,6 +862,22 @@ static void vfio_unregister_ram_discard_listener(VFIOContainer *container, - g_free(vrdl); - } - -+static bool vfio_known_safe_misalignment(MemoryRegionSection *section) -+{ -+ MemoryRegion *mr = section->mr; -+ -+ if (!TPM_IS_CRB(mr->owner)) { -+ return false; -+ } -+ -+ /* this is a known safe misaligned region, just trace for debug purpose */ -+ trace_vfio_known_safe_misalignment(memory_region_name(mr), -+ section->offset_within_address_space, -+ section->offset_within_region, -+ qemu_real_host_page_size); -+ return true; -+} -+ - static void vfio_listener_region_add(MemoryListener *listener, - MemoryRegionSection *section) - { -@@ -884,7 +901,15 @@ static void vfio_listener_region_add(MemoryListener *listener, - if (unlikely((section->offset_within_address_space & - ~qemu_real_host_page_mask) != - (section->offset_within_region & ~qemu_real_host_page_mask))) { -- error_report("%s received unaligned region", __func__); -+ if (!vfio_known_safe_misalignment(section)) { -+ error_report("%s received unaligned region %s iova=0x%"PRIx64 -+ " offset_within_region=0x%"PRIx64 -+ " qemu_real_host_page_size=0x%"PRIxPTR, -+ __func__, memory_region_name(section->mr), -+ section->offset_within_address_space, -+ section->offset_within_region, -+ qemu_real_host_page_size); -+ } - return; - } - -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 0ef1b5f4a6..582882db91 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -100,6 +100,7 @@ vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add - vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d" - vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64 - vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]" -+vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR ": cannot be mapped for DMA" - vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA" - vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64 - vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64 --- -2.31.1 - diff --git a/SOURCES/kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch b/SOURCES/kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch deleted file mode 100644 index 7e644c5..0000000 --- a/SOURCES/kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch +++ /dev/null @@ -1,78 +0,0 @@ -From 3de8fb9f3dba18d04efa10b70bcec641035effc5 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Tue, 24 May 2022 05:14:05 -0400 -Subject: [PATCH 16/16] vfio/common: remove spurious warning on - vfio_listener_region_del - -RH-Author: Eric Auger -RH-MergeRequest: 101: vfio/common: remove spurious warning on vfio_listener_region_del -RH-Commit: [1/1] dac688b8a981ebb964fea79ea198c329b9cdb551 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2086262 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cornelia Huck -RH-Acked-by: Alex Williamson - - Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2086262 - Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45876133 - Upstream Status: YES - Tested: With TPM-CRB and VFIO - -851d6d1a0f ("vfio/common: remove spurious tpm-crb-cmd misalignment -warning") removed the warning on vfio_listener_region_add() path. - -However the same warning also hits on region_del path. Let's remove -it and reword the dynamic trace as this can be called on both -map and unmap path. - -Contextual Conflict in hw/vfio/common.c -We don't have 8e3b0cbb721 ("Replace qemu_real_host_page variables with inlined functions") - -Signed-off-by: Eric Auger -Reviewed-by: Cornelia Huck -Link: https://lore.kernel.org/r/20220524091405.416256-1-eric.auger@redhat.com -Fixes: 851d6d1a0ff2 ("vfio/common: remove spurious tpm-crb-cmd misalignment warning") -Signed-off-by: Alex Williamson -(cherry picked from commit ec6600be0dc16982181c7ad80d94c143c0807dd2) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 10 +++++++++- - hw/vfio/trace-events | 2 +- - 2 files changed, 10 insertions(+), 2 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 0fbe0d47af..637981f9a1 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1145,7 +1145,15 @@ static void vfio_listener_region_del(MemoryListener *listener, - if (unlikely((section->offset_within_address_space & - ~qemu_real_host_page_mask) != - (section->offset_within_region & ~qemu_real_host_page_mask))) { -- error_report("%s received unaligned region", __func__); -+ if (!vfio_known_safe_misalignment(section)) { -+ error_report("%s received unaligned region %s iova=0x%"PRIx64 -+ " offset_within_region=0x%"PRIx64 -+ " qemu_real_host_page_size=0x%"PRIxPTR, -+ __func__, memory_region_name(section->mr), -+ section->offset_within_address_space, -+ section->offset_within_region, -+ qemu_real_host_page_size); -+ } - return; - } - -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 582882db91..73dffe9e00 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -100,7 +100,7 @@ vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add - vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d" - vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64 - vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]" --vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR ": cannot be mapped for DMA" -+vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR - vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA" - vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64 - vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64 --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-Add-SVQDescState.patch b/SOURCES/kvm-vhost-Add-SVQDescState.patch deleted file mode 100644 index b1ea4bb..0000000 --- a/SOURCES/kvm-vhost-Add-SVQDescState.patch +++ /dev/null @@ -1,135 +0,0 @@ -From 14200f493243f73152ea4a4b97274f0ec4fb36fa Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 15/32] vhost: Add SVQDescState -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [15/27] 2e2866f22e37cace8598ff44dfcdc07fcc915d6d (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 9e87868fcaf5785c8e1490c290505fa32305ff91 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:34 2022 +0200 - - vhost: Add SVQDescState - - This will allow SVQ to add context to the different queue elements. - - This patch only store the actual element, no functional change intended. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 16 ++++++++-------- - hw/virtio/vhost-shadow-virtqueue.h | 8 ++++++-- - 2 files changed, 14 insertions(+), 10 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 3cec03d709..a08e3d4025 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -256,7 +256,7 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, - return -EINVAL; - } - -- svq->ring_id_maps[qemu_head] = elem; -+ svq->desc_state[qemu_head].elem = elem; - vhost_svq_kick(svq); - return 0; - } -@@ -411,21 +411,21 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, - return NULL; - } - -- if (unlikely(!svq->ring_id_maps[used_elem.id])) { -+ if (unlikely(!svq->desc_state[used_elem.id].elem)) { - qemu_log_mask(LOG_GUEST_ERROR, - "Device %s says index %u is used, but it was not available", - svq->vdev->name, used_elem.id); - return NULL; - } - -- num = svq->ring_id_maps[used_elem.id]->in_num + -- svq->ring_id_maps[used_elem.id]->out_num; -+ num = svq->desc_state[used_elem.id].elem->in_num + -+ svq->desc_state[used_elem.id].elem->out_num; - last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); - svq->desc_next[last_used_chain] = svq->free_head; - svq->free_head = used_elem.id; - - *len = used_elem.len; -- return g_steal_pointer(&svq->ring_id_maps[used_elem.id]); -+ return g_steal_pointer(&svq->desc_state[used_elem.id].elem); - } - - static void vhost_svq_flush(VhostShadowVirtqueue *svq, -@@ -595,7 +595,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, - memset(svq->vring.desc, 0, driver_size); - svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size); - memset(svq->vring.used, 0, device_size); -- svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num); -+ svq->desc_state = g_new0(SVQDescState, svq->vring.num); - svq->desc_next = g_new0(uint16_t, svq->vring.num); - for (unsigned i = 0; i < svq->vring.num - 1; i++) { - svq->desc_next[i] = cpu_to_le16(i + 1); -@@ -620,7 +620,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) - - for (unsigned i = 0; i < svq->vring.num; ++i) { - g_autofree VirtQueueElement *elem = NULL; -- elem = g_steal_pointer(&svq->ring_id_maps[i]); -+ elem = g_steal_pointer(&svq->desc_state[i].elem); - if (elem) { - virtqueue_detach_element(svq->vq, elem, 0); - } -@@ -632,7 +632,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) - } - svq->vq = NULL; - g_free(svq->desc_next); -- g_free(svq->ring_id_maps); -+ g_free(svq->desc_state); - qemu_vfree(svq->vring.desc); - qemu_vfree(svq->vring.used); - } -diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h -index c132c994e9..d646c35054 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.h -+++ b/hw/virtio/vhost-shadow-virtqueue.h -@@ -15,6 +15,10 @@ - #include "standard-headers/linux/vhost_types.h" - #include "hw/virtio/vhost-iova-tree.h" - -+typedef struct SVQDescState { -+ VirtQueueElement *elem; -+} SVQDescState; -+ - /* Shadow virtqueue to relay notifications */ - typedef struct VhostShadowVirtqueue { - /* Shadow vring */ -@@ -47,8 +51,8 @@ typedef struct VhostShadowVirtqueue { - /* IOVA mapping */ - VhostIOVATree *iova_tree; - -- /* Map for use the guest's descriptors */ -- VirtQueueElement **ring_id_maps; -+ /* SVQ vring descriptors state */ -+ SVQDescState *desc_state; - - /* Next VirtQueue element that guest made available */ - VirtQueueElement *next_guest_avail_elem; --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-Add-svq-avail_handler-callback.patch b/SOURCES/kvm-vhost-Add-svq-avail_handler-callback.patch deleted file mode 100644 index a8b585d..0000000 --- a/SOURCES/kvm-vhost-Add-svq-avail_handler-callback.patch +++ /dev/null @@ -1,164 +0,0 @@ -From 433106c286a1961737300ebaece6f10b2747e7d8 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 20/32] vhost: Add svq avail_handler callback -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [20/27] d228eb89d204f8be623bc870503bbf0078dfc9ae (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit e966c0b781aebabd2c0f5eef91678f08ce1d068c -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:39 2022 +0200 - - vhost: Add svq avail_handler callback - - This allows external handlers to be aware of new buffers that the guest - places in the virtqueue. - - When this callback is defined the ownership of the guest's virtqueue - element is transferred to the callback. This means that if the user - wants to forward the descriptor it needs to manually inject it. The - callback is also free to process the command by itself and use the - element with svq_push. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 14 ++++++++++++-- - hw/virtio/vhost-shadow-virtqueue.h | 31 +++++++++++++++++++++++++++++- - hw/virtio/vhost-vdpa.c | 3 ++- - 3 files changed, 44 insertions(+), 4 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 95d0d7a7ee..e53aac45f6 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -306,7 +306,11 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) - break; - } - -- r = vhost_svq_add_element(svq, elem); -+ if (svq->ops) { -+ r = svq->ops->avail_handler(svq, elem, svq->ops_opaque); -+ } else { -+ r = vhost_svq_add_element(svq, elem); -+ } - if (unlikely(r != 0)) { - if (r == -ENOSPC) { - /* -@@ -685,12 +689,16 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) - * shadow methods and file descriptors. - * - * @iova_tree: Tree to perform descriptors translations -+ * @ops: SVQ owner callbacks -+ * @ops_opaque: ops opaque pointer - * - * Returns the new virtqueue or NULL. - * - * In case of error, reason is reported through error_report. - */ --VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree) -+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, -+ const VhostShadowVirtqueueOps *ops, -+ void *ops_opaque) - { - g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); - int r; -@@ -712,6 +720,8 @@ VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree) - event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); - event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); - svq->iova_tree = iova_tree; -+ svq->ops = ops; -+ svq->ops_opaque = ops_opaque; - return g_steal_pointer(&svq); - - err_init_hdev_call: -diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h -index cf442f7dea..d04c34a589 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.h -+++ b/hw/virtio/vhost-shadow-virtqueue.h -@@ -25,6 +25,27 @@ typedef struct SVQDescState { - unsigned int ndescs; - } SVQDescState; - -+typedef struct VhostShadowVirtqueue VhostShadowVirtqueue; -+ -+/** -+ * Callback to handle an avail buffer. -+ * -+ * @svq: Shadow virtqueue -+ * @elem: Element placed in the queue by the guest -+ * @vq_callback_opaque: Opaque -+ * -+ * Returns 0 if the vq is running as expected. -+ * -+ * Note that ownership of elem is transferred to the callback. -+ */ -+typedef int (*VirtQueueAvailCallback)(VhostShadowVirtqueue *svq, -+ VirtQueueElement *elem, -+ void *vq_callback_opaque); -+ -+typedef struct VhostShadowVirtqueueOps { -+ VirtQueueAvailCallback avail_handler; -+} VhostShadowVirtqueueOps; -+ - /* Shadow virtqueue to relay notifications */ - typedef struct VhostShadowVirtqueue { - /* Shadow vring */ -@@ -69,6 +90,12 @@ typedef struct VhostShadowVirtqueue { - */ - uint16_t *desc_next; - -+ /* Caller callbacks */ -+ const VhostShadowVirtqueueOps *ops; -+ -+ /* Caller callbacks opaque */ -+ void *ops_opaque; -+ - /* Next head to expose to the device */ - uint16_t shadow_avail_idx; - -@@ -102,7 +129,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, - VirtQueue *vq); - void vhost_svq_stop(VhostShadowVirtqueue *svq); - --VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree); -+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, -+ const VhostShadowVirtqueueOps *ops, -+ void *ops_opaque); - - void vhost_svq_free(gpointer vq); - G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free); -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 33dcaa135e..28df57b12e 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -416,8 +416,9 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, - - shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); - for (unsigned n = 0; n < hdev->nvqs; ++n) { -- g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree); -+ g_autoptr(VhostShadowVirtqueue) svq; - -+ svq = vhost_svq_new(v->iova_tree, NULL, NULL); - if (unlikely(!svq)) { - error_setg(errp, "Cannot create svq %u", n); - return -1; --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch b/SOURCES/kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch deleted file mode 100644 index 9b6155b..0000000 --- a/SOURCES/kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 6cde15c70c86819033337771eb522e94e3ea9e34 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:20:07 +0200 -Subject: [PATCH 09/23] vhost: Always store new kick fd on - vhost_svq_set_svq_kick_fd -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [8/21] a09b8851c39d7cea67414560f6d322e988b9d59a (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -We can unbind twice a file descriptor if we call twice -vhost_svq_set_svq_kick_fd because of this. Since it comes from vhost and -not from SVQ, that file descriptor could be a different thing that -guest's vhost notifier. - -Likewise, it can happens the same if a guest start and stop the device -multiple times. - -Reported-by: Lei Yang -Fixes: dff4426fa6 ("vhost: Add Shadow VirtQueue kick forwarding capabilities") -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 6867f29c1425add7e0e8d1d8d58cc0ffbb8df0e4) ---- - hw/virtio/vhost-shadow-virtqueue.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index e53aac45f6..f420311b89 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -602,13 +602,13 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) - event_notifier_set_handler(svq_kick, NULL); - } - -+ event_notifier_init_fd(svq_kick, svq_kick_fd); - /* - * event_notifier_set_handler already checks for guest's notifications if - * they arrive at the new file descriptor in the switch, so there is no - * need to explicitly check for them. - */ - if (poll_start) { -- event_notifier_init_fd(svq_kick, svq_kick_fd); - event_notifier_set(svq_kick); - event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier); - } -@@ -655,7 +655,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, - */ - void vhost_svq_stop(VhostShadowVirtqueue *svq) - { -- event_notifier_set_handler(&svq->svq_kick, NULL); -+ vhost_svq_set_svq_kick_fd(svq, VHOST_FILE_UNBIND); - g_autofree VirtQueueElement *next_avail_elem = NULL; - - if (!svq->vq) { --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch b/SOURCES/kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch deleted file mode 100644 index 9b09d42..0000000 --- a/SOURCES/kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 893dffb820973361bcef33612a6b924554a856c1 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 13/32] vhost: Check for queue full at vhost_svq_add -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [13/27] d4bd8299fb7733a1e190618dfc92b4b53b7bbeb3 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit f20b70eb5a68cfd8fef74a13ccdd494ef1cb0221 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:32 2022 +0200 - - vhost: Check for queue full at vhost_svq_add - - The series need to expose vhost_svq_add with full functionality, - including checking for full queue. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 59 +++++++++++++++++------------- - 1 file changed, 33 insertions(+), 26 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index e3fc3c2658..1d2bab287b 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -233,21 +233,29 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) - * Add an element to a SVQ. - * - * The caller must check that there is enough slots for the new element. It -- * takes ownership of the element: In case of failure, it is free and the SVQ -- * is considered broken. -+ * takes ownership of the element: In case of failure not ENOSPC, it is free. -+ * -+ * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full - */ --static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) -+static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) - { - unsigned qemu_head; -- bool ok = vhost_svq_add_split(svq, elem, &qemu_head); -+ unsigned ndescs = elem->in_num + elem->out_num; -+ bool ok; -+ -+ if (unlikely(ndescs > vhost_svq_available_slots(svq))) { -+ return -ENOSPC; -+ } -+ -+ ok = vhost_svq_add_split(svq, elem, &qemu_head); - if (unlikely(!ok)) { - g_free(elem); -- return false; -+ return -EINVAL; - } - - svq->ring_id_maps[qemu_head] = elem; - vhost_svq_kick(svq); -- return true; -+ return 0; - } - - /** -@@ -274,7 +282,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) - - while (true) { - VirtQueueElement *elem; -- bool ok; -+ int r; - - if (svq->next_guest_avail_elem) { - elem = g_steal_pointer(&svq->next_guest_avail_elem); -@@ -286,25 +294,24 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) - break; - } - -- if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) { -- /* -- * This condition is possible since a contiguous buffer in GPA -- * does not imply a contiguous buffer in qemu's VA -- * scatter-gather segments. If that happens, the buffer exposed -- * to the device needs to be a chain of descriptors at this -- * moment. -- * -- * SVQ cannot hold more available buffers if we are here: -- * queue the current guest descriptor and ignore further kicks -- * until some elements are used. -- */ -- svq->next_guest_avail_elem = elem; -- return; -- } -- -- ok = vhost_svq_add(svq, elem); -- if (unlikely(!ok)) { -- /* VQ is broken, just return and ignore any other kicks */ -+ r = vhost_svq_add(svq, elem); -+ if (unlikely(r != 0)) { -+ if (r == -ENOSPC) { -+ /* -+ * This condition is possible since a contiguous buffer in -+ * GPA does not imply a contiguous buffer in qemu's VA -+ * scatter-gather segments. If that happens, the buffer -+ * exposed to the device needs to be a chain of descriptors -+ * at this moment. -+ * -+ * SVQ cannot hold more available buffers if we are here: -+ * queue the current guest descriptor and ignore kicks -+ * until some elements are used. -+ */ -+ svq->next_guest_avail_elem = elem; -+ } -+ -+ /* VQ is full or broken, just return and ignore kicks */ - return; - } - } --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch b/SOURCES/kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch deleted file mode 100644 index 6755aad..0000000 --- a/SOURCES/kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch +++ /dev/null @@ -1,138 +0,0 @@ -From 5c8de23e185a1a1f0b19eac3c9fa03411c9f545c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 14/32] vhost: Decouple vhost_svq_add from VirtQueueElement -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [14/27] 463087dd316adc91b9c7a4e6634c6fc1745c1849 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 1f46ae65d85f677b660bda46685dd3e94885a7cb -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:33 2022 +0200 - - vhost: Decouple vhost_svq_add from VirtQueueElement - - VirtQueueElement comes from the guest, but we're heading SVQ to be able - to modify the element presented to the device without the guest's - knowledge. - - To do so, make SVQ accept sg buffers directly, instead of using - VirtQueueElement. - - Add vhost_svq_add_element to maintain element convenience. - - Signed-off-by: Eugenio Pérez - Acked-by: Jason Wang - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 33 ++++++++++++++++++++---------- - 1 file changed, 22 insertions(+), 11 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 1d2bab287b..3cec03d709 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -172,30 +172,31 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, - } - - static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, -- VirtQueueElement *elem, unsigned *head) -+ const struct iovec *out_sg, size_t out_num, -+ const struct iovec *in_sg, size_t in_num, -+ unsigned *head) - { - unsigned avail_idx; - vring_avail_t *avail = svq->vring.avail; - bool ok; -- g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num)); -+ g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num)); - - *head = svq->free_head; - - /* We need some descriptors here */ -- if (unlikely(!elem->out_num && !elem->in_num)) { -+ if (unlikely(!out_num && !in_num)) { - qemu_log_mask(LOG_GUEST_ERROR, - "Guest provided element with no descriptors"); - return false; - } - -- ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, -- elem->in_num > 0, false); -+ ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0, -+ false); - if (unlikely(!ok)) { - return false; - } - -- ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, -- true); -+ ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true); - if (unlikely(!ok)) { - return false; - } -@@ -237,17 +238,19 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) - * - * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full - */ --static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) -+static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, -+ size_t out_num, const struct iovec *in_sg, -+ size_t in_num, VirtQueueElement *elem) - { - unsigned qemu_head; -- unsigned ndescs = elem->in_num + elem->out_num; -+ unsigned ndescs = in_num + out_num; - bool ok; - - if (unlikely(ndescs > vhost_svq_available_slots(svq))) { - return -ENOSPC; - } - -- ok = vhost_svq_add_split(svq, elem, &qemu_head); -+ ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head); - if (unlikely(!ok)) { - g_free(elem); - return -EINVAL; -@@ -258,6 +261,14 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) - return 0; - } - -+/* Convenience wrapper to add a guest's element to SVQ */ -+static int vhost_svq_add_element(VhostShadowVirtqueue *svq, -+ VirtQueueElement *elem) -+{ -+ return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg, -+ elem->in_num, elem); -+} -+ - /** - * Forward available buffers. - * -@@ -294,7 +305,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) - break; - } - -- r = vhost_svq_add(svq, elem); -+ r = vhost_svq_add_element(svq, elem); - if (unlikely(r != 0)) { - if (r == -ENOSPC) { - /* --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-Delete-useless-read-memory-barrier.patch b/SOURCES/kvm-vhost-Delete-useless-read-memory-barrier.patch deleted file mode 100644 index f5aad51..0000000 --- a/SOURCES/kvm-vhost-Delete-useless-read-memory-barrier.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 773d1bb4e9ea9ca704372e52569955937f91f15c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:28 +0200 -Subject: [PATCH 13/23] vhost: Delete useless read memory barrier -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [12/21] 0e238fe934b1fc2c7e10b6f693468bc25ea3243f (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -As discussed in previous series [1], this memory barrier is useless with -the atomic read of used idx at vhost_svq_more_used. Deleting it. - -[1] https://lists.nongnu.org/archive/html/qemu-devel/2022-07/msg02616.html - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit cdfb1612ba0f9b76367c96ce26ba94fedc7a0e61) ---- - hw/virtio/vhost-shadow-virtqueue.c | 3 --- - 1 file changed, 3 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 7792f3db1d..d36afbc547 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -509,9 +509,6 @@ size_t vhost_svq_poll(VhostShadowVirtqueue *svq) - if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { - return 0; - } -- -- /* Make sure we read new used_idx */ -- smp_rmb(); - } while (true); - } - --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch b/SOURCES/kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch deleted file mode 100644 index 81ed89e..0000000 --- a/SOURCES/kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 2f134d800a7ac521a637a0da2116b2603b12c8c0 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:29 +0200 -Subject: [PATCH 14/23] vhost: Do not depend on !NULL VirtQueueElement on - vhost_svq_flush -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [13/21] 93ec7baa2a29031db25d86b7dc1a949388623370 (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -Since QEMU will be able to inject new elements on CVQ to restore the -state, we need not to depend on a VirtQueueElement to know if a new -element has been used by the device or not. Instead of check that, check -if there are new elements only using used idx on vhost_svq_flush. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 7599f71c11c08b90f173c35ded1aaa1fdca86f1b) ---- - hw/virtio/vhost-shadow-virtqueue.c | 11 +++++++---- - 1 file changed, 7 insertions(+), 4 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index d36afbc547..c0e3c92e96 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -499,17 +499,20 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq, - size_t vhost_svq_poll(VhostShadowVirtqueue *svq) - { - int64_t start_us = g_get_monotonic_time(); -+ uint32_t len; -+ - do { -- uint32_t len; -- VirtQueueElement *elem = vhost_svq_get_buf(svq, &len); -- if (elem) { -- return len; -+ if (vhost_svq_more_used(svq)) { -+ break; - } - - if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { - return 0; - } - } while (true); -+ -+ vhost_svq_get_buf(svq, &len); -+ return len; - } - - /** --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-Expose-vhost_svq_add.patch b/SOURCES/kvm-vhost-Expose-vhost_svq_add.patch deleted file mode 100644 index 70dc774..0000000 --- a/SOURCES/kvm-vhost-Expose-vhost_svq_add.patch +++ /dev/null @@ -1,73 +0,0 @@ -From cefd6583a8483c7a80f9cde8f7ad4705983af9e7 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 18/32] vhost: Expose vhost_svq_add -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [18/27] bfb44f597d350336113783bcc9b3c9d9d32ff8c0 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit d0291f3f284d3bc220cdb13b0d8ac8a44eb5fd4c -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:37 2022 +0200 - - vhost: Expose vhost_svq_add - - This allows external parts of SVQ to forward custom buffers to the - device. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 6 +++--- - hw/virtio/vhost-shadow-virtqueue.h | 3 +++ - 2 files changed, 6 insertions(+), 3 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 1ce52d5b4a..cb879e7b88 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -238,9 +238,9 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) - * - * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full - */ --static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, -- size_t out_num, const struct iovec *in_sg, -- size_t in_num, VirtQueueElement *elem) -+int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, -+ size_t out_num, const struct iovec *in_sg, size_t in_num, -+ VirtQueueElement *elem) - { - unsigned qemu_head; - unsigned ndescs = in_num + out_num; -diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h -index d9fc1f1799..dd78f4bec2 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.h -+++ b/hw/virtio/vhost-shadow-virtqueue.h -@@ -86,6 +86,9 @@ bool vhost_svq_valid_features(uint64_t features, Error **errp); - - void vhost_svq_push_elem(VhostShadowVirtqueue *svq, - const VirtQueueElement *elem, uint32_t len); -+int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, -+ size_t out_num, const struct iovec *in_sg, size_t in_num, -+ VirtQueueElement *elem); - - void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); - void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch b/SOURCES/kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch deleted file mode 100644 index f149c05..0000000 --- a/SOURCES/kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 793d6d56190397624efdcaf6e0112bd12e39c05d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:25:01 +0200 -Subject: [PATCH 02/32] vhost: Fix device's used descriptor dequeue -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [2/27] b92803a0681c94c65d243dd07424522387594760 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 81abfa5724c9a6502d7a1d3a67c55f2a303a1170 -Author: Eugenio Pérez -Date: Thu May 12 19:57:43 2022 +0200 - - vhost: Fix device's used descriptor dequeue - - Only the first one of them were properly enqueued back. - - Fixes: 100890f7ca ("vhost: Shadow virtqueue buffers forwarding") - - Signed-off-by: Eugenio Pérez - Message-Id: <20220512175747.142058-3-eperezma@redhat.com> - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Michael S. Tsirkin - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 17 +++++++++++++++-- - 1 file changed, 15 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 3155801f50..31fc50907d 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -334,12 +334,22 @@ static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) - svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); - } - -+static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq, -+ uint16_t num, uint16_t i) -+{ -+ for (uint16_t j = 0; j < (num - 1); ++j) { -+ i = le16_to_cpu(svq->desc_next[i]); -+ } -+ -+ return i; -+} -+ - static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, - uint32_t *len) - { - const vring_used_t *used = svq->vring.used; - vring_used_elem_t used_elem; -- uint16_t last_used; -+ uint16_t last_used, last_used_chain, num; - - if (!vhost_svq_more_used(svq)) { - return NULL; -@@ -365,7 +375,10 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, - return NULL; - } - -- svq->desc_next[used_elem.id] = svq->free_head; -+ num = svq->ring_id_maps[used_elem.id]->in_num + -+ svq->ring_id_maps[used_elem.id]->out_num; -+ last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); -+ svq->desc_next[last_used_chain] = svq->free_head; - svq->free_head = used_elem.id; - - *len = used_elem.len; --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch b/SOURCES/kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch deleted file mode 100644 index 51eb700..0000000 --- a/SOURCES/kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch +++ /dev/null @@ -1,68 +0,0 @@ -From aa99cf129923e0203c0caeb3b4e94a0eb973746f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:36:38 +0200 -Subject: [PATCH 04/32] vhost: Fix element in vhost_svq_add failure -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [4/27] 96689c99a47dd49591c0d126cb1fbb975b2f79b4 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 5181db132b587754dda3a520eec923b87a65bbb7 -Author: Eugenio Pérez -Date: Thu May 12 19:57:47 2022 +0200 - - vhost: Fix element in vhost_svq_add failure - - Coverity rightly reports that is not free in that case. - - Fixes: Coverity CID 1487559 - Fixes: 100890f7ca ("vhost: Shadow virtqueue buffers forwarding") - - Signed-off-by: Eugenio Pérez - Message-Id: <20220512175747.142058-7-eperezma@redhat.com> - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Michael S. Tsirkin - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 31fc50907d..06d0bb39d9 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -199,11 +199,19 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, - return true; - } - -+/** -+ * Add an element to a SVQ. -+ * -+ * The caller must check that there is enough slots for the new element. It -+ * takes ownership of the element: In case of failure, it is free and the SVQ -+ * is considered broken. -+ */ - static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) - { - unsigned qemu_head; - bool ok = vhost_svq_add_split(svq, elem, &qemu_head); - if (unlikely(!ok)) { -+ g_free(elem); - return false; - } - --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-Get-vring-base-from-vq-not-svq.patch b/SOURCES/kvm-vhost-Get-vring-base-from-vq-not-svq.patch deleted file mode 100644 index 1c8e586..0000000 --- a/SOURCES/kvm-vhost-Get-vring-base-from-vq-not-svq.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 3f2ba7cce6b272a8b5c8953e8923e799e4aa7b88 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Mon, 18 Jul 2022 14:05:45 +0200 -Subject: [PATCH 02/23] vhost: Get vring base from vq, not svq -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/21] e7e0294bbc98f69ccdbc4af4715857e77b017f80 (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: Merged - -The SVQ vring used idx usually match with the guest visible one, as long -as all the guest buffers (GPA) maps to exactly one buffer within qemu's -VA. However, as we can see in virtqueue_map_desc, a single guest buffer -could map to many buffers in SVQ vring. - -Also, its also a mistake to rewind them at the source of migration. -Since VirtQueue is able to migrate the inflight descriptors, its -responsability of the destination to perform the rewind just in case it -cannot report the inflight descriptors to the device. - -This makes easier to migrate between backends or to recover them in -vhost devices that support set in flight descriptors. - -Fixes: 6d0b22266633 ("vdpa: Adapt vhost_vdpa_get_vring_base to SVQ") -Signed-off-by: Eugenio Pérez -Signed-off-by: Jason Wang -(cherry picked from commit 2fdac348fd3d243bb964937236af3cc27ae7af2b) ---- - hw/virtio/vhost-vdpa.c | 24 ++++++++++++------------ - 1 file changed, 12 insertions(+), 12 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 03dc6014b0..96334ab5b6 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -1177,7 +1177,18 @@ static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, - struct vhost_vring_state *ring) - { - struct vhost_vdpa *v = dev->opaque; -+ VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index); - -+ /* -+ * vhost-vdpa devices does not support in-flight requests. Set all of them -+ * as available. -+ * -+ * TODO: This is ok for networking, but other kinds of devices might -+ * have problems with these retransmissions. -+ */ -+ while (virtqueue_rewind(vq, 1)) { -+ continue; -+ } - if (v->shadow_vqs_enabled) { - /* - * Device vring base was set at device start. SVQ base is handled by -@@ -1193,21 +1204,10 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, - struct vhost_vring_state *ring) - { - struct vhost_vdpa *v = dev->opaque; -- int vdpa_idx = ring->index - dev->vq_index; - int ret; - - if (v->shadow_vqs_enabled) { -- VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); -- -- /* -- * Setting base as last used idx, so destination will see as available -- * all the entries that the device did not use, including the in-flight -- * processing ones. -- * -- * TODO: This is ok for networking, but other kinds of devices might -- * have problems with these retransmissions. -- */ -- ring->num = svq->last_used_idx; -+ ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index); - return 0; - } - --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch b/SOURCES/kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch deleted file mode 100644 index 513d7b4..0000000 --- a/SOURCES/kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 3a944d8cd3d35b2398ff68d9ed8ea51d27dfab3c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 12/32] vhost: Move vhost_svq_kick call to vhost_svq_add -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [12/27] 29a7e1fb4992c4beca1e9a3379bb4c8a0f567459 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 98b5adef8493a2bfad6655cfee84299e88bedbf7 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:31 2022 +0200 - - vhost: Move vhost_svq_kick call to vhost_svq_add - - The series needs to expose vhost_svq_add with full functionality, - including kick - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 05cd39d1eb..e3fc3c2658 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -246,6 +246,7 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) - } - - svq->ring_id_maps[qemu_head] = elem; -+ vhost_svq_kick(svq); - return true; - } - -@@ -306,7 +307,6 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) - /* VQ is broken, just return and ignore any other kicks */ - return; - } -- vhost_svq_kick(svq); - } - - virtio_queue_set_notification(svq->vq, true); --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-Reorder-vhost_svq_kick.patch b/SOURCES/kvm-vhost-Reorder-vhost_svq_kick.patch deleted file mode 100644 index f61f3c3..0000000 --- a/SOURCES/kvm-vhost-Reorder-vhost_svq_kick.patch +++ /dev/null @@ -1,88 +0,0 @@ -From fdbf66e4c70de16ab36d70ea591322b1b24df591 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 11/32] vhost: Reorder vhost_svq_kick -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [11/27] 1d08b97eb3960a0f85f2dd48c3331b803f7ea205 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit d93a2405ca6efa9dc1c420cee5a34bd8242818d0 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:30 2022 +0200 - - vhost: Reorder vhost_svq_kick - - Future code needs to call it from vhost_svq_add. - - No functional change intended. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 28 ++++++++++++++-------------- - 1 file changed, 14 insertions(+), 14 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 9c46c3a8fa..05cd39d1eb 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -215,6 +215,20 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, - return true; - } - -+static void vhost_svq_kick(VhostShadowVirtqueue *svq) -+{ -+ /* -+ * We need to expose the available array entries before checking the used -+ * flags -+ */ -+ smp_mb(); -+ if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) { -+ return; -+ } -+ -+ event_notifier_set(&svq->hdev_kick); -+} -+ - /** - * Add an element to a SVQ. - * -@@ -235,20 +249,6 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) - return true; - } - --static void vhost_svq_kick(VhostShadowVirtqueue *svq) --{ -- /* -- * We need to expose the available array entries before checking the used -- * flags -- */ -- smp_mb(); -- if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) { -- return; -- } -- -- event_notifier_set(&svq->hdev_kick); --} -- - /** - * Forward available buffers. - * --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch b/SOURCES/kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch deleted file mode 100644 index 31bfccc..0000000 --- a/SOURCES/kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch +++ /dev/null @@ -1,123 +0,0 @@ -From 486647551223cc01f4dba87197030bbf4e674f0f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:24:48 +0200 -Subject: [PATCH 01/32] vhost: Track descriptor chain in private at SVQ -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [1/27] 26d16dc383e3064ac6e4288d5c52b39fee0ad204 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 495fe3a78749c39c0e772c4e1a55d6cb8a7e5292 -Author: Eugenio Pérez -Date: Thu May 12 19:57:42 2022 +0200 - - vhost: Track descriptor chain in private at SVQ - - The device could have access to modify them, and it definitely have - access when we implement packed vq. Harden SVQ maintaining a private - copy of the descriptor chain. Other fields like buffer addresses are - already maintained sepparatedly. - - Signed-off-by: Eugenio Pérez - Message-Id: <20220512175747.142058-2-eperezma@redhat.com> - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Michael S. Tsirkin - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 12 +++++++----- - hw/virtio/vhost-shadow-virtqueue.h | 6 ++++++ - 2 files changed, 13 insertions(+), 5 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index b232803d1b..3155801f50 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -138,6 +138,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, - for (n = 0; n < num; n++) { - if (more_descs || (n + 1 < num)) { - descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT); -+ descs[i].next = cpu_to_le16(svq->desc_next[i]); - } else { - descs[i].flags = flags; - } -@@ -145,10 +146,10 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, - descs[i].len = cpu_to_le32(iovec[n].iov_len); - - last = i; -- i = cpu_to_le16(descs[i].next); -+ i = cpu_to_le16(svq->desc_next[i]); - } - -- svq->free_head = le16_to_cpu(descs[last].next); -+ svq->free_head = le16_to_cpu(svq->desc_next[last]); - } - - static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, -@@ -336,7 +337,6 @@ static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) - static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, - uint32_t *len) - { -- vring_desc_t *descs = svq->vring.desc; - const vring_used_t *used = svq->vring.used; - vring_used_elem_t used_elem; - uint16_t last_used; -@@ -365,7 +365,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, - return NULL; - } - -- descs[used_elem.id].next = svq->free_head; -+ svq->desc_next[used_elem.id] = svq->free_head; - svq->free_head = used_elem.id; - - *len = used_elem.len; -@@ -540,8 +540,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, - svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size); - memset(svq->vring.used, 0, device_size); - svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num); -+ svq->desc_next = g_new0(uint16_t, svq->vring.num); - for (unsigned i = 0; i < svq->vring.num - 1; i++) { -- svq->vring.desc[i].next = cpu_to_le16(i + 1); -+ svq->desc_next[i] = cpu_to_le16(i + 1); - } - } - -@@ -574,6 +575,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) - virtqueue_detach_element(svq->vq, next_avail_elem, 0); - } - svq->vq = NULL; -+ g_free(svq->desc_next); - g_free(svq->ring_id_maps); - qemu_vfree(svq->vring.desc); - qemu_vfree(svq->vring.used); -diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h -index e5e24c536d..c132c994e9 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.h -+++ b/hw/virtio/vhost-shadow-virtqueue.h -@@ -53,6 +53,12 @@ typedef struct VhostShadowVirtqueue { - /* Next VirtQueue element that guest made available */ - VirtQueueElement *next_guest_avail_elem; - -+ /* -+ * Backup next field for each descriptor so we can recover securely, not -+ * needing to trust the device access. -+ */ -+ uint16_t *desc_next; -+ - /* Next head to expose to the device */ - uint16_t shadow_avail_idx; - --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-Track-number-of-descs-in-SVQDescState.patch b/SOURCES/kvm-vhost-Track-number-of-descs-in-SVQDescState.patch deleted file mode 100644 index 6a2e147..0000000 --- a/SOURCES/kvm-vhost-Track-number-of-descs-in-SVQDescState.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 24b8cf88f53f9fc7cb393c9cad908f759980bfee Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 16/32] vhost: Track number of descs in SVQDescState -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [16/27] 26f30cb6dd35c1eb1ddabe25113431bed3d744aa (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit ac4cfdc6f39c06732d27554523f9d5f8a53b4ffa -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:35 2022 +0200 - - vhost: Track number of descs in SVQDescState - - A guest's buffer continuos on GPA may need multiple descriptors on - qemu's VA, so SVQ should track its length sepparatedly. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 4 ++-- - hw/virtio/vhost-shadow-virtqueue.h | 6 ++++++ - 2 files changed, 8 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index a08e3d4025..4d99075e73 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -257,6 +257,7 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, - } - - svq->desc_state[qemu_head].elem = elem; -+ svq->desc_state[qemu_head].ndescs = ndescs; - vhost_svq_kick(svq); - return 0; - } -@@ -418,8 +419,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, - return NULL; - } - -- num = svq->desc_state[used_elem.id].elem->in_num + -- svq->desc_state[used_elem.id].elem->out_num; -+ num = svq->desc_state[used_elem.id].ndescs; - last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); - svq->desc_next[last_used_chain] = svq->free_head; - svq->free_head = used_elem.id; -diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h -index d646c35054..5c7e7cbab6 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.h -+++ b/hw/virtio/vhost-shadow-virtqueue.h -@@ -17,6 +17,12 @@ - - typedef struct SVQDescState { - VirtQueueElement *elem; -+ -+ /* -+ * Number of descriptors exposed to the device. May or may not match -+ * guest's -+ */ -+ unsigned int ndescs; - } SVQDescState; - - /* Shadow virtqueue to relay notifications */ --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-add-support-for-configure-interrupt.patch b/SOURCES/kvm-vhost-add-support-for-configure-interrupt.patch new file mode 100644 index 0000000..a7cfb2f --- /dev/null +++ b/SOURCES/kvm-vhost-add-support-for-configure-interrupt.patch @@ -0,0 +1,185 @@ +From 42818e2bc6fa537fe52f7f0e6b094774a1eb00e1 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:48 +0800 +Subject: [PATCH 07/31] vhost: add support for configure interrupt +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [7/10] d58b439eb093f5dd3b7ca081af0ab75780e42917 (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +Add functions to support configure interrupt. +The configure interrupt process will start in vhost_dev_start +and stop in vhost_dev_stop. + +Also add the functions to support vhost_config_pending and +vhost_config_mask. + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-8-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit f9a09ca3ea69d108d828b7c82f1bd61b2df6fc96) +Signed-off-by: Cindy Lu +--- + hw/virtio/vhost.c | 78 ++++++++++++++++++++++++++++++++++++++- + include/hw/virtio/vhost.h | 4 ++ + 2 files changed, 81 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 7fb008bc9e..84dbb39e07 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -1596,7 +1596,68 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, + file.index = hdev->vhost_ops->vhost_get_vq_index(hdev, n); + r = hdev->vhost_ops->vhost_set_vring_call(hdev, &file); + if (r < 0) { +- VHOST_OPS_DEBUG(r, "vhost_set_vring_call failed"); ++ error_report("vhost_set_vring_call failed %d", -r); ++ } ++} ++ ++bool vhost_config_pending(struct vhost_dev *hdev) ++{ ++ assert(hdev->vhost_ops); ++ if ((hdev->started == false) || ++ (hdev->vhost_ops->vhost_set_config_call == NULL)) { ++ return false; ++ } ++ ++ EventNotifier *notifier = ++ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier; ++ return event_notifier_test_and_clear(notifier); ++} ++ ++void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask) ++{ ++ int fd; ++ int r; ++ EventNotifier *notifier = ++ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier; ++ EventNotifier *config_notifier = &vdev->config_notifier; ++ assert(hdev->vhost_ops); ++ ++ if ((hdev->started == false) || ++ (hdev->vhost_ops->vhost_set_config_call == NULL)) { ++ return; ++ } ++ if (mask) { ++ assert(vdev->use_guest_notifier_mask); ++ fd = event_notifier_get_fd(notifier); ++ } else { ++ fd = event_notifier_get_fd(config_notifier); ++ } ++ r = hdev->vhost_ops->vhost_set_config_call(hdev, fd); ++ if (r < 0) { ++ error_report("vhost_set_config_call failed %d", -r); ++ } ++} ++ ++static void vhost_stop_config_intr(struct vhost_dev *dev) ++{ ++ int fd = -1; ++ assert(dev->vhost_ops); ++ if (dev->vhost_ops->vhost_set_config_call) { ++ dev->vhost_ops->vhost_set_config_call(dev, fd); ++ } ++} ++ ++static void vhost_start_config_intr(struct vhost_dev *dev) ++{ ++ int r; ++ ++ assert(dev->vhost_ops); ++ int fd = event_notifier_get_fd(&dev->vdev->config_notifier); ++ if (dev->vhost_ops->vhost_set_config_call) { ++ r = dev->vhost_ops->vhost_set_config_call(dev, fd); ++ if (!r) { ++ event_notifier_set(&dev->vdev->config_notifier); ++ } + } + } + +@@ -1836,6 +1897,16 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) + } + } + ++ r = event_notifier_init( ++ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier, 0); ++ if (r < 0) { ++ return r; ++ } ++ event_notifier_test_and_clear( ++ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier); ++ if (!vdev->use_guest_notifier_mask) { ++ vhost_config_mask(hdev, vdev, true); ++ } + if (hdev->log_enabled) { + uint64_t log_base; + +@@ -1874,6 +1945,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) + vhost_device_iotlb_miss(hdev, vq->used_phys, true); + } + } ++ vhost_start_config_intr(hdev); + return 0; + fail_start: + if (vrings) { +@@ -1903,6 +1975,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) + + /* should only be called after backend is connected */ + assert(hdev->vhost_ops); ++ event_notifier_test_and_clear( ++ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier); ++ event_notifier_test_and_clear(&vdev->config_notifier); + + trace_vhost_dev_stop(hdev, vdev->name, vrings); + +@@ -1925,6 +2000,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) + } + memory_listener_unregister(&hdev->iommu_listener); + } ++ vhost_stop_config_intr(hdev); + vhost_log_put(hdev, true); + hdev->started = false; + vdev->vhost_started = false; +diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h +index 67a6807fac..05bedb2416 100644 +--- a/include/hw/virtio/vhost.h ++++ b/include/hw/virtio/vhost.h +@@ -33,6 +33,7 @@ struct vhost_virtqueue { + unsigned used_size; + EventNotifier masked_notifier; + EventNotifier error_notifier; ++ EventNotifier masked_config_notifier; + struct vhost_dev *dev; + }; + +@@ -41,6 +42,7 @@ typedef unsigned long vhost_log_chunk_t; + #define VHOST_LOG_BITS (8 * sizeof(vhost_log_chunk_t)) + #define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS) + #define VHOST_INVALID_FEATURE_BIT (0xff) ++#define VHOST_QUEUE_NUM_CONFIG_INR 0 + + struct vhost_log { + unsigned long long size; +@@ -168,6 +170,8 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); + * Disable direct notifications to vhost device. + */ + void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); ++bool vhost_config_pending(struct vhost_dev *hdev); ++void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask); + + /** + * vhost_dev_is_started() - report status of vhost device +-- +2.31.1 + diff --git a/SOURCES/kvm-vhost-add-vhost_svq_poll.patch b/SOURCES/kvm-vhost-add-vhost_svq_poll.patch deleted file mode 100644 index fa27e5e..0000000 --- a/SOURCES/kvm-vhost-add-vhost_svq_poll.patch +++ /dev/null @@ -1,92 +0,0 @@ -From 0ab3da1092362470d256b433c546bd365d34f930 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 19/32] vhost: add vhost_svq_poll -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [19/27] 6807bb0bb6e5183b46a03b12b4027c7d767e8555 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 3f44d13dda83d390cc9563e56e7d337e4f6223f4 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:38 2022 +0200 - - vhost: add vhost_svq_poll - - It allows the Shadow Control VirtQueue to wait for the device to use the - available buffers. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 27 +++++++++++++++++++++++++++ - hw/virtio/vhost-shadow-virtqueue.h | 1 + - 2 files changed, 28 insertions(+) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index cb879e7b88..95d0d7a7ee 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -485,6 +485,33 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq, - } while (!vhost_svq_enable_notification(svq)); - } - -+/** -+ * Poll the SVQ for one device used buffer. -+ * -+ * This function race with main event loop SVQ polling, so extra -+ * synchronization is needed. -+ * -+ * Return the length written by the device. -+ */ -+size_t vhost_svq_poll(VhostShadowVirtqueue *svq) -+{ -+ int64_t start_us = g_get_monotonic_time(); -+ do { -+ uint32_t len; -+ VirtQueueElement *elem = vhost_svq_get_buf(svq, &len); -+ if (elem) { -+ return len; -+ } -+ -+ if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { -+ return 0; -+ } -+ -+ /* Make sure we read new used_idx */ -+ smp_rmb(); -+ } while (true); -+} -+ - /** - * Forward used buffers. - * -diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h -index dd78f4bec2..cf442f7dea 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.h -+++ b/hw/virtio/vhost-shadow-virtqueue.h -@@ -89,6 +89,7 @@ void vhost_svq_push_elem(VhostShadowVirtqueue *svq, - int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, - size_t out_num, const struct iovec *in_sg, size_t in_num, - VirtQueueElement *elem); -+size_t vhost_svq_poll(VhostShadowVirtqueue *svq); - - void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); - void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-add-vhost_svq_push_elem.patch b/SOURCES/kvm-vhost-add-vhost_svq_push_elem.patch deleted file mode 100644 index 2a9ec40..0000000 --- a/SOURCES/kvm-vhost-add-vhost_svq_push_elem.patch +++ /dev/null @@ -1,83 +0,0 @@ -From a26eb02b3a49c5d1163685ba5b83b67138c09047 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 17/32] vhost: add vhost_svq_push_elem -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [17/27] d064b40a262f2dfdc9f648d250aa8c8020c40385 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 432efd144e990b6e040862de25f8f0b6a6eeb03d -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:36 2022 +0200 - - vhost: add vhost_svq_push_elem - - This function allows external SVQ users to return guest's available - buffers. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 16 ++++++++++++++++ - hw/virtio/vhost-shadow-virtqueue.h | 3 +++ - 2 files changed, 19 insertions(+) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 4d99075e73..1ce52d5b4a 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -428,6 +428,22 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, - return g_steal_pointer(&svq->desc_state[used_elem.id].elem); - } - -+/** -+ * Push an element to SVQ, returning it to the guest. -+ */ -+void vhost_svq_push_elem(VhostShadowVirtqueue *svq, -+ const VirtQueueElement *elem, uint32_t len) -+{ -+ virtqueue_push(svq->vq, elem, len); -+ if (svq->next_guest_avail_elem) { -+ /* -+ * Avail ring was full when vhost_svq_flush was called, so it's a -+ * good moment to make more descriptors available if possible. -+ */ -+ vhost_handle_guest_kick(svq); -+ } -+} -+ - static void vhost_svq_flush(VhostShadowVirtqueue *svq, - bool check_for_avail_queue) - { -diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h -index 5c7e7cbab6..d9fc1f1799 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.h -+++ b/hw/virtio/vhost-shadow-virtqueue.h -@@ -84,6 +84,9 @@ typedef struct VhostShadowVirtqueue { - - bool vhost_svq_valid_features(uint64_t features, Error **errp); - -+void vhost_svq_push_elem(VhostShadowVirtqueue *svq, -+ const VirtQueueElement *elem, uint32_t len); -+ - void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); - void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); - void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch b/SOURCES/kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch new file mode 100644 index 0000000..940133b --- /dev/null +++ b/SOURCES/kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch @@ -0,0 +1,171 @@ +From bffccbd59a2e2c641810cd7362c7b5ecf5989ed8 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:35 +0100 +Subject: [PATCH 03/14] vhost: allocate SVQ device file descriptors at device + start +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/13] bab2d43f0fc0d13a4917e706244b37e1a431b082 (eperezmartin/qemu-kvm) + +The next patches will start control SVQ if possible. However, we don't +know if that will be possible at qemu boot anymore. + +Delay device file descriptors until we know it at device start. This +will avoid to create them if the device does not support SVQ. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20221215113144.322011-4-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 3cfb4d069cd2977b707fb519c455d7d416e1f4b0) +--- + hw/virtio/vhost-shadow-virtqueue.c | 31 ++------------------------ + hw/virtio/vhost-vdpa.c | 35 ++++++++++++++++++++++++------ + 2 files changed, 30 insertions(+), 36 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 264ddc166d..3b05bab44d 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -715,43 +715,18 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) + * @iova_tree: Tree to perform descriptors translations + * @ops: SVQ owner callbacks + * @ops_opaque: ops opaque pointer +- * +- * Returns the new virtqueue or NULL. +- * +- * In case of error, reason is reported through error_report. + */ + VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, + const VhostShadowVirtqueueOps *ops, + void *ops_opaque) + { +- g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); +- int r; +- +- r = event_notifier_init(&svq->hdev_kick, 0); +- if (r != 0) { +- error_report("Couldn't create kick event notifier: %s (%d)", +- g_strerror(errno), errno); +- goto err_init_hdev_kick; +- } +- +- r = event_notifier_init(&svq->hdev_call, 0); +- if (r != 0) { +- error_report("Couldn't create call event notifier: %s (%d)", +- g_strerror(errno), errno); +- goto err_init_hdev_call; +- } ++ VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); + + event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); + svq->iova_tree = iova_tree; + svq->ops = ops; + svq->ops_opaque = ops_opaque; +- return g_steal_pointer(&svq); +- +-err_init_hdev_call: +- event_notifier_cleanup(&svq->hdev_kick); +- +-err_init_hdev_kick: +- return NULL; ++ return svq; + } + + /** +@@ -763,7 +738,5 @@ void vhost_svq_free(gpointer pvq) + { + VhostShadowVirtqueue *vq = pvq; + vhost_svq_stop(vq); +- event_notifier_cleanup(&vq->hdev_kick); +- event_notifier_cleanup(&vq->hdev_call); + g_free(vq); + } +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 44e6a9b7b3..530d2ca362 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -428,15 +428,11 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, + + shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); + for (unsigned n = 0; n < hdev->nvqs; ++n) { +- g_autoptr(VhostShadowVirtqueue) svq; ++ VhostShadowVirtqueue *svq; + + svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops, + v->shadow_vq_ops_opaque); +- if (unlikely(!svq)) { +- error_setg(errp, "Cannot create svq %u", n); +- return -1; +- } +- g_ptr_array_add(shadow_vqs, g_steal_pointer(&svq)); ++ g_ptr_array_add(shadow_vqs, svq); + } + + v->shadow_vqs = g_steal_pointer(&shadow_vqs); +@@ -871,11 +867,23 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, + const EventNotifier *event_notifier = &svq->hdev_kick; + int r; + ++ r = event_notifier_init(&svq->hdev_kick, 0); ++ if (r != 0) { ++ error_setg_errno(errp, -r, "Couldn't create kick event notifier"); ++ goto err_init_hdev_kick; ++ } ++ ++ r = event_notifier_init(&svq->hdev_call, 0); ++ if (r != 0) { ++ error_setg_errno(errp, -r, "Couldn't create call event notifier"); ++ goto err_init_hdev_call; ++ } ++ + file.fd = event_notifier_get_fd(event_notifier); + r = vhost_vdpa_set_vring_dev_kick(dev, &file); + if (unlikely(r != 0)) { + error_setg_errno(errp, -r, "Can't set device kick fd"); +- return r; ++ goto err_init_set_dev_fd; + } + + event_notifier = &svq->hdev_call; +@@ -883,8 +891,18 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, + r = vhost_vdpa_set_vring_dev_call(dev, &file); + if (unlikely(r != 0)) { + error_setg_errno(errp, -r, "Can't set device call fd"); ++ goto err_init_set_dev_fd; + } + ++ return 0; ++ ++err_init_set_dev_fd: ++ event_notifier_set_handler(&svq->hdev_call, NULL); ++ ++err_init_hdev_call: ++ event_notifier_cleanup(&svq->hdev_kick); ++ ++err_init_hdev_kick: + return r; + } + +@@ -1096,6 +1114,9 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) + for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); + vhost_vdpa_svq_unmap_rings(dev, svq); ++ ++ event_notifier_cleanup(&svq->hdev_kick); ++ event_notifier_cleanup(&svq->hdev_call); + } + } + +-- +2.31.1 + diff --git a/SOURCES/kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch b/SOURCES/kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch new file mode 100644 index 0000000..ca93785 --- /dev/null +++ b/SOURCES/kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch @@ -0,0 +1,157 @@ +From 55aad90e347599e88747888ddbefcba33427f386 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Fri, 16 Dec 2022 11:35:52 +0800 +Subject: [PATCH 12/31] vhost: fix vq dirty bitmap syncing when vIOMMU is + enabled + +RH-Author: Eric Auger +RH-MergeRequest: 134: vhost: fix vq dirty bitmap syncing when vIOMMU is enabled +RH-Bugzilla: 2124856 +RH-Acked-by: Peter Xu +RH-Acked-by: Jason Wang +RH-Acked-by: Laurent Vivier +RH-Commit: [1/1] 57ef499b63dc2cca6e64ee84d1dc127635868ca2 (eauger1/centos-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2124856 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=49989924 +Upstream: yes + +When vIOMMU is enabled, the vq->used_phys is actually the IOVA not +GPA. So we need to translate it to GPA before the syncing otherwise we +may hit the following crash since IOVA could be out of the scope of +the GPA log size. This could be noted when using virtio-IOMMU with +vhost using 1G memory. + +Fixes: c471ad0e9bd46 ("vhost_net: device IOTLB support") +Cc: qemu-stable@nongnu.org +Tested-by: Lei Yang +Reported-by: Yalan Zhang +Signed-off-by: Jason Wang +Message-Id: <20221216033552.77087-1-jasowang@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 345cc1cbcbce2bab00abc2b88338d7d89c702d6b) +Signed-off-by: Eric Auger +--- + hw/virtio/vhost.c | 84 ++++++++++++++++++++++++++++++++++++----------- + 1 file changed, 64 insertions(+), 20 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 84dbb39e07..2c566dc539 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -20,6 +20,7 @@ + #include "qemu/range.h" + #include "qemu/error-report.h" + #include "qemu/memfd.h" ++#include "qemu/log.h" + #include "standard-headers/linux/vhost_types.h" + #include "hw/virtio/virtio-bus.h" + #include "hw/virtio/virtio-access.h" +@@ -106,6 +107,24 @@ static void vhost_dev_sync_region(struct vhost_dev *dev, + } + } + ++static bool vhost_dev_has_iommu(struct vhost_dev *dev) ++{ ++ VirtIODevice *vdev = dev->vdev; ++ ++ /* ++ * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support ++ * incremental memory mapping API via IOTLB API. For platform that ++ * does not have IOMMU, there's no need to enable this feature ++ * which may cause unnecessary IOTLB miss/update transactions. ++ */ ++ if (vdev) { ++ return virtio_bus_device_iommu_enabled(vdev) && ++ virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); ++ } else { ++ return false; ++ } ++} ++ + static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, + MemoryRegionSection *section, + hwaddr first, +@@ -137,8 +156,51 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, + continue; + } + +- vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys, +- range_get_last(vq->used_phys, vq->used_size)); ++ if (vhost_dev_has_iommu(dev)) { ++ IOMMUTLBEntry iotlb; ++ hwaddr used_phys = vq->used_phys, used_size = vq->used_size; ++ hwaddr phys, s, offset; ++ ++ while (used_size) { ++ rcu_read_lock(); ++ iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as, ++ used_phys, ++ true, ++ MEMTXATTRS_UNSPECIFIED); ++ rcu_read_unlock(); ++ ++ if (!iotlb.target_as) { ++ qemu_log_mask(LOG_GUEST_ERROR, "translation " ++ "failure for used_iova %"PRIx64"\n", ++ used_phys); ++ return -EINVAL; ++ } ++ ++ offset = used_phys & iotlb.addr_mask; ++ phys = iotlb.translated_addr + offset; ++ ++ /* ++ * Distance from start of used ring until last byte of ++ * IOMMU page. ++ */ ++ s = iotlb.addr_mask - offset; ++ /* ++ * Size of used ring, or of the part of it until end ++ * of IOMMU page. To avoid zero result, do the adding ++ * outside of MIN(). ++ */ ++ s = MIN(s, used_size - 1) + 1; ++ ++ vhost_dev_sync_region(dev, section, start_addr, end_addr, phys, ++ range_get_last(phys, s)); ++ used_size -= s; ++ used_phys += s; ++ } ++ } else { ++ vhost_dev_sync_region(dev, section, start_addr, ++ end_addr, vq->used_phys, ++ range_get_last(vq->used_phys, vq->used_size)); ++ } + } + return 0; + } +@@ -306,24 +368,6 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) + dev->log_size = size; + } + +-static bool vhost_dev_has_iommu(struct vhost_dev *dev) +-{ +- VirtIODevice *vdev = dev->vdev; +- +- /* +- * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support +- * incremental memory mapping API via IOTLB API. For platform that +- * does not have IOMMU, there's no need to enable this feature +- * which may cause unnecessary IOTLB miss/update transactions. +- */ +- if (vdev) { +- return virtio_bus_device_iommu_enabled(vdev) && +- virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); +- } else { +- return false; +- } +-} +- + static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr, + hwaddr *plen, bool is_write) + { +-- +2.31.1 + diff --git a/SOURCES/kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch b/SOURCES/kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch new file mode 100644 index 0000000..1b48f5d --- /dev/null +++ b/SOURCES/kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch @@ -0,0 +1,56 @@ +From d135303da1187d9f214e520a977fe7c47e5ce1f0 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:45 +0800 +Subject: [PATCH 04/31] vhost: introduce new VhostOps vhost_set_config_call +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/10] c2492838d9c1415e42d2507f2956d640a30325f2 (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +This patch introduces new VhostOps vhost_set_config_call. +This function allows the qemu to set the config +event fd to kernel driver. + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-5-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 9b30cdf9bbf9524a4f4f8a6eb551eb13cbbd3893) +Signed-off-by: Cindy Lu +--- + include/hw/virtio/vhost-backend.h | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h +index eab46d7f0b..c5ab49051e 100644 +--- a/include/hw/virtio/vhost-backend.h ++++ b/include/hw/virtio/vhost-backend.h +@@ -128,6 +128,8 @@ typedef int (*vhost_get_device_id_op)(struct vhost_dev *dev, uint32_t *dev_id); + + typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev); + ++typedef int (*vhost_set_config_call_op)(struct vhost_dev *dev, ++ int fd); + typedef struct VhostOps { + VhostBackendType backend_type; + vhost_backend_init vhost_backend_init; +@@ -174,6 +176,7 @@ typedef struct VhostOps { + vhost_vq_get_addr_op vhost_vq_get_addr; + vhost_get_device_id_op vhost_get_device_id; + vhost_force_iommu_op vhost_force_iommu; ++ vhost_set_config_call_op vhost_set_config_call; + } VhostOps; + + int vhost_backend_update_device_iotlb(struct vhost_dev *dev, +-- +2.31.1 + diff --git a/SOURCES/kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch b/SOURCES/kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch deleted file mode 100644 index 08bcaf2..0000000 --- a/SOURCES/kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch +++ /dev/null @@ -1,120 +0,0 @@ -From 2bdea90bfbce3b8d5bfa86178a942a470b85b835 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 07/32] vhost: move descriptor translation to - vhost_svq_vring_write_descs -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [7/27] 5533c72065e4ebf8ea7db966c976a3b29bdafb82 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 009c2549bb9dc7f7061009eb87f2a53d4b364983 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:26 2022 +0200 - - vhost: move descriptor translation to vhost_svq_vring_write_descs - - It's done for both in and out descriptors so it's better placed here. - - Acked-by: Jason Wang - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 38 +++++++++++++++++++++--------- - 1 file changed, 27 insertions(+), 11 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 06d0bb39d9..3fbda1e3d4 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -122,17 +122,35 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, - return true; - } - --static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, -- const struct iovec *iovec, size_t num, -- bool more_descs, bool write) -+/** -+ * Write descriptors to SVQ vring -+ * -+ * @svq: The shadow virtqueue -+ * @sg: Cache for hwaddr -+ * @iovec: The iovec from the guest -+ * @num: iovec length -+ * @more_descs: True if more descriptors come in the chain -+ * @write: True if they are writeable descriptors -+ * -+ * Return true if success, false otherwise and print error. -+ */ -+static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, -+ const struct iovec *iovec, size_t num, -+ bool more_descs, bool write) - { - uint16_t i = svq->free_head, last = svq->free_head; - unsigned n; - uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0; - vring_desc_t *descs = svq->vring.desc; -+ bool ok; - - if (num == 0) { -- return; -+ return true; -+ } -+ -+ ok = vhost_svq_translate_addr(svq, sg, iovec, num); -+ if (unlikely(!ok)) { -+ return false; - } - - for (n = 0; n < num; n++) { -@@ -150,6 +168,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, - } - - svq->free_head = le16_to_cpu(svq->desc_next[last]); -+ return true; - } - - static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, -@@ -169,21 +188,18 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, - return false; - } - -- ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num); -+ ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, -+ elem->in_num > 0, false); - if (unlikely(!ok)) { - return false; - } -- vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, -- elem->in_num > 0, false); -- - -- ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num); -+ ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, -+ true); - if (unlikely(!ok)) { - return false; - } - -- vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true); -- - /* - * Put the entry in the available array (but don't update avail->idx until - * they do sync). --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch b/SOURCES/kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch new file mode 100644 index 0000000..de005ba --- /dev/null +++ b/SOURCES/kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch @@ -0,0 +1,122 @@ +From 6584478deca49d0ea20add588e4fdb51cdc26f1d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:36 +0100 +Subject: [PATCH 04/14] vhost: move iova_tree set to vhost_svq_start +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/13] 200d8e9b58e258a6e301430debc73ef7d962b732 (eperezmartin/qemu-kvm) + +Since we don't know if we will use SVQ at qemu initialization, let's +allocate iova_tree only if needed. To do so, accept it at SVQ start, not +at initialization. + +This will avoid to create it if the device does not support SVQ. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20221215113144.322011-5-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 5fde952bbdd521c10fc018ee04f922a7dca5f663) +--- + hw/virtio/vhost-shadow-virtqueue.c | 9 ++++----- + hw/virtio/vhost-shadow-virtqueue.h | 5 ++--- + hw/virtio/vhost-vdpa.c | 5 ++--- + 3 files changed, 8 insertions(+), 11 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 3b05bab44d..4307296358 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -642,9 +642,10 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) + * @svq: Shadow Virtqueue + * @vdev: VirtIO device + * @vq: Virtqueue to shadow ++ * @iova_tree: Tree to perform descriptors translations + */ + void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, +- VirtQueue *vq) ++ VirtQueue *vq, VhostIOVATree *iova_tree) + { + size_t desc_size, driver_size, device_size; + +@@ -655,6 +656,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, + svq->last_used_idx = 0; + svq->vdev = vdev; + svq->vq = vq; ++ svq->iova_tree = iova_tree; + + svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); + driver_size = vhost_svq_driver_area_size(svq); +@@ -712,18 +714,15 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) + * Creates vhost shadow virtqueue, and instructs the vhost device to use the + * shadow methods and file descriptors. + * +- * @iova_tree: Tree to perform descriptors translations + * @ops: SVQ owner callbacks + * @ops_opaque: ops opaque pointer + */ +-VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, +- const VhostShadowVirtqueueOps *ops, ++VhostShadowVirtqueue *vhost_svq_new(const VhostShadowVirtqueueOps *ops, + void *ops_opaque) + { + VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); + + event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); +- svq->iova_tree = iova_tree; + svq->ops = ops; + svq->ops_opaque = ops_opaque; + return svq; +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index d04c34a589..926a4897b1 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -126,11 +126,10 @@ size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq); + size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq); + + void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, +- VirtQueue *vq); ++ VirtQueue *vq, VhostIOVATree *iova_tree); + void vhost_svq_stop(VhostShadowVirtqueue *svq); + +-VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, +- const VhostShadowVirtqueueOps *ops, ++VhostShadowVirtqueue *vhost_svq_new(const VhostShadowVirtqueueOps *ops, + void *ops_opaque); + + void vhost_svq_free(gpointer vq); +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 530d2ca362..e65603022f 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -430,8 +430,7 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, + for (unsigned n = 0; n < hdev->nvqs; ++n) { + VhostShadowVirtqueue *svq; + +- svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops, +- v->shadow_vq_ops_opaque); ++ svq = vhost_svq_new(v->shadow_vq_ops, v->shadow_vq_ops_opaque); + g_ptr_array_add(shadow_vqs, svq); + } + +@@ -1070,7 +1069,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) + goto err; + } + +- vhost_svq_start(svq, dev->vdev, vq); ++ vhost_svq_start(svq, dev->vdev, vq, v->iova_tree); + ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err); + if (unlikely(!ok)) { + goto err_map; +-- +2.31.1 + diff --git a/SOURCES/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch b/SOURCES/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch deleted file mode 100644 index 70e8f59..0000000 --- a/SOURCES/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch +++ /dev/null @@ -1,56 +0,0 @@ -From edb2bd99355f300b512c040e91f5870ea14a5d7e Mon Sep 17 00:00:00 2001 -From: Si-Wei Liu -Date: Fri, 6 May 2022 19:28:15 -0700 -Subject: [PATCH 11/16] vhost-net: fix improper cleanup in vhost_net_start -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jason Wang -RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA -RH-Commit: [4/7] e88e482dd4b344f0cc887a358268beaed4d62917 (jasowang/qemu-kvm-cs) -RH-Bugzilla: 2070804 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu - -vhost_net_start() missed a corresponding stop_one() upon error from -vhost_set_vring_enable(). While at it, make the error handling for -err_start more robust. No real issue was found due to this though. - -Signed-off-by: Si-Wei Liu -Acked-by: Jason Wang -Message-Id: <1651890498-24478-5-git-send-email-si-wei.liu@oracle.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 6f3910b5eee00b8cc959e94659c0d524c482a418) -Signed-off-by: Jason Wang ---- - hw/net/vhost_net.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c -index 30379d2ca4..d6d7c51f62 100644 ---- a/hw/net/vhost_net.c -+++ b/hw/net/vhost_net.c -@@ -381,6 +381,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, - r = vhost_set_vring_enable(peer, peer->vring_enable); - - if (r < 0) { -+ vhost_net_stop_one(get_vhost_net(peer), dev); - goto err_start; - } - } -@@ -390,7 +391,8 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, - - err_start: - while (--i >= 0) { -- peer = qemu_get_peer(ncs , i); -+ peer = qemu_get_peer(ncs, i < data_queue_pairs ? -+ i : n->max_queue_pairs); - vhost_net_stop_one(get_vhost_net(peer), dev); - } - e = k->set_guest_notifiers(qbus->parent, total_notifiers, false); --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch b/SOURCES/kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch deleted file mode 100644 index 31677fd..0000000 --- a/SOURCES/kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch +++ /dev/null @@ -1,87 +0,0 @@ -From a9095850da8dd4ea3fdb725cb7f79118144e22fa Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:39:27 +0200 -Subject: [PATCH 22/32] vhost-net-vdpa: add stubs for when no virtio-net device - is present -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [22/27] a2b25a805bb06094a5fab27ce8f82bee12a9fcb5 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 94c643732dc110d04bbdf0eb43c41bce23b3593e -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:41 2022 +0200 - - vhost-net-vdpa: add stubs for when no virtio-net device is present - - net/vhost-vdpa.c will need functions that are declared in - vhost-shadow-virtqueue.c, that needs functions of virtio-net.c. - - Copy the vhost-vdpa-stub.c code so - only the constructor net_init_vhost_vdpa needs to be defined. - - Signed-off-by: Eugenio Pérez - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - net/meson.build | 3 ++- - net/vhost-vdpa-stub.c | 21 +++++++++++++++++++++ - 2 files changed, 23 insertions(+), 1 deletion(-) - create mode 100644 net/vhost-vdpa-stub.c - -diff --git a/net/meson.build b/net/meson.build -index c965e83b26..116a9e7cbb 100644 ---- a/net/meson.build -+++ b/net/meson.build -@@ -41,7 +41,8 @@ endif - softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files(tap_posix)) - softmmu_ss.add(when: 'CONFIG_WIN32', if_true: files('tap-win32.c')) - if have_vhost_net_vdpa -- softmmu_ss.add(files('vhost-vdpa.c')) -+ softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-vdpa.c'), if_false: files('vhost-vdpa-stub.c')) -+ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-vdpa-stub.c')) - endif - - subdir('can') -diff --git a/net/vhost-vdpa-stub.c b/net/vhost-vdpa-stub.c -new file mode 100644 -index 0000000000..1732ed2443 ---- /dev/null -+++ b/net/vhost-vdpa-stub.c -@@ -0,0 +1,21 @@ -+/* -+ * vhost-vdpa-stub.c -+ * -+ * Copyright (c) 2022 Red Hat, Inc. -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ * -+ */ -+ -+#include "qemu/osdep.h" -+#include "clients.h" -+#include "net/vhost-vdpa.h" -+#include "qapi/error.h" -+ -+int net_init_vhost_vdpa(const Netdev *netdev, const char *name, -+ NetClientState *peer, Error **errp) -+{ -+ error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*"); -+ return -1; -+} --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch b/SOURCES/kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch new file mode 100644 index 0000000..099dd73 --- /dev/null +++ b/SOURCES/kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch @@ -0,0 +1,73 @@ +From 2906f8df3c5e915a3dc05a705b87990211f114b5 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:34 +0100 +Subject: [PATCH 02/14] vhost: set SVQ device call handler at SVQ start +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/13] ad90a6cc5c71b70d705904433d5a986e8fedb924 (eperezmartin/qemu-kvm) + +By the end of this series CVQ is shadowed as long as the features +support it. + +Since we don't know at the beginning of qemu running if this is +supported, move the event notifier handler setting to the start of the +SVQ, instead of the start of qemu run. This will avoid to create them if +the device does not support SVQ. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20221215113144.322011-3-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 20e7412bfd63c68f1798fbdb799aedb7e05fee88) +--- + hw/virtio/vhost-shadow-virtqueue.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 5bd14cad96..264ddc166d 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -648,6 +648,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, + { + size_t desc_size, driver_size, device_size; + ++ event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); + svq->next_guest_avail_elem = NULL; + svq->shadow_avail_idx = 0; + svq->shadow_used_idx = 0; +@@ -704,6 +705,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) + g_free(svq->desc_state); + qemu_vfree(svq->vring.desc); + qemu_vfree(svq->vring.used); ++ event_notifier_set_handler(&svq->hdev_call, NULL); + } + + /** +@@ -740,7 +742,6 @@ VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, + } + + event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); +- event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); + svq->iova_tree = iova_tree; + svq->ops = ops; + svq->ops_opaque = ops_opaque; +@@ -763,7 +764,6 @@ void vhost_svq_free(gpointer pvq) + VhostShadowVirtqueue *vq = pvq; + vhost_svq_stop(vq); + event_notifier_cleanup(&vq->hdev_kick); +- event_notifier_set_handler(&vq->hdev_call, NULL); + event_notifier_cleanup(&vq->hdev_call); + g_free(vq); + } +-- +2.31.1 + diff --git a/SOURCES/kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch b/SOURCES/kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch deleted file mode 100644 index 7125f6a..0000000 --- a/SOURCES/kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch +++ /dev/null @@ -1,80 +0,0 @@ -From 45305ab202fa2191962152e5a501a9a13e31a0b2 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:26 +0200 -Subject: [PATCH 11/23] vhost: stop transfer elem ownership in - vhost_handle_guest_kick -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [10/21] 697a5c0ad59efe27abf447f7965091993bc39756 (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -It was easier to allow vhost_svq_add to handle the memory. Now that we -will allow qemu to add elements to a SVQ without the guest's knowledge, -it's better to handle it in the caller. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit eb42df8bb2c92a7313343d97409cd99ccba25b25) ---- - hw/virtio/vhost-shadow-virtqueue.c | 10 ++++------ - 1 file changed, 4 insertions(+), 6 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index f420311b89..2ae47d90a1 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -233,9 +233,6 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) - /** - * Add an element to a SVQ. - * -- * The caller must check that there is enough slots for the new element. It -- * takes ownership of the element: In case of failure not ENOSPC, it is free. -- * - * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full - */ - int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, -@@ -252,7 +249,6 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, - - ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head); - if (unlikely(!ok)) { -- g_free(elem); - return -EINVAL; - } - -@@ -293,7 +289,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) - virtio_queue_set_notification(svq->vq, false); - - while (true) { -- VirtQueueElement *elem; -+ g_autofree VirtQueueElement *elem; - int r; - - if (svq->next_guest_avail_elem) { -@@ -324,12 +320,14 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) - * queue the current guest descriptor and ignore kicks - * until some elements are used. - */ -- svq->next_guest_avail_elem = elem; -+ svq->next_guest_avail_elem = g_steal_pointer(&elem); - } - - /* VQ is full or broken, just return and ignore kicks */ - return; - } -+ /* elem belongs to SVQ or external caller now */ -+ elem = NULL; - } - - virtio_queue_set_notification(svq->vq, true); --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch b/SOURCES/kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch deleted file mode 100644 index b908739..0000000 --- a/SOURCES/kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 78b7d9af26ae802b3ca0d7b794b366ab4d515647 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:27 +0200 -Subject: [PATCH 12/23] vhost: use SVQ element ndescs instead of opaque data - for desc validation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [11/21] 536ba65ff7241c4dc66362294ba8de4354260d6f (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -Since we're going to allow SVQ to add elements without the guest's -knowledge and without its own VirtQueueElement, it's easier to check if -an element is a valid head checking a different thing than the -VirtQueueElement. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 70e0841722deb363b53cdcd465af12a0d1461b60) ---- - hw/virtio/vhost-shadow-virtqueue.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 2ae47d90a1..7792f3db1d 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -414,7 +414,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, - return NULL; - } - -- if (unlikely(!svq->desc_state[used_elem.id].elem)) { -+ if (unlikely(!svq->desc_state[used_elem.id].ndescs)) { - qemu_log_mask(LOG_GUEST_ERROR, - "Device %s says index %u is used, but it was not available", - svq->vdev->name, used_elem.id); -@@ -422,6 +422,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, - } - - num = svq->desc_state[used_elem.id].ndescs; -+ svq->desc_state[used_elem.id].ndescs = 0; - last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); - svq->desc_next[last_used_chain] = svq->free_head; - svq->free_head = used_elem.id; --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-vdpa-add-support-for-config-interrupt.patch b/SOURCES/kvm-vhost-vdpa-add-support-for-config-interrupt.patch new file mode 100644 index 0000000..88d4df6 --- /dev/null +++ b/SOURCES/kvm-vhost-vdpa-add-support-for-config-interrupt.patch @@ -0,0 +1,73 @@ +From e01563a8de9a45937ffd8d4c1d74a6890ffb6eb6 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:46 +0800 +Subject: [PATCH 05/31] vhost-vdpa: add support for config interrupt +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [5/10] 49bfd214a503f8e199ff93f4bbfcbd4c4f2405b5 (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +Add new call back function in vhost-vdpa, The function +vhost_set_config_call can set the event fd to kernel. +This function will be called in the vhost_dev_start +and vhost_dev_stop + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-6-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 259f3acc1c675dd77ebbdb28a483f5d0220bdbf6) +Signed-off-by: Cindy Lu +--- + hw/virtio/trace-events | 1 + + hw/virtio/vhost-vdpa.c | 8 ++++++++ + 2 files changed, 9 insertions(+) + +diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events +index 14fc5b9bb2..46f2faf04e 100644 +--- a/hw/virtio/trace-events ++++ b/hw/virtio/trace-events +@@ -62,6 +62,7 @@ vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI + vhost_vdpa_set_owner(void *dev) "dev: %p" + vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64 + vhost_vdpa_get_iova_range(void *dev, uint64_t first, uint64_t last) "dev: %p first: 0x%"PRIx64" last: 0x%"PRIx64 ++vhost_vdpa_set_config_call(void *dev, int fd)"dev: %p fd: %d" + + # virtio.c + virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u" +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 7468e44b87..c5be2645b0 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -754,6 +754,13 @@ static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) + return 0; + } + ++static int vhost_vdpa_set_config_call(struct vhost_dev *dev, ++ int fd) ++{ ++ trace_vhost_vdpa_set_config_call(dev, fd); ++ return vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG_CALL, &fd); ++} ++ + static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config, + uint32_t config_len) + { +@@ -1310,4 +1317,5 @@ const VhostOps vdpa_ops = { + .vhost_get_device_id = vhost_vdpa_get_device_id, + .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, + .vhost_force_iommu = vhost_vdpa_force_iommu, ++ .vhost_set_config_call = vhost_vdpa_set_config_call, + }; +-- +2.31.1 + diff --git a/SOURCES/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch b/SOURCES/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch deleted file mode 100644 index 747bf5f..0000000 --- a/SOURCES/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 46c5a35aa56cf0dd55376638dbf7d46e85f497e1 Mon Sep 17 00:00:00 2001 -From: Si-Wei Liu -Date: Fri, 6 May 2022 19:28:16 -0700 -Subject: [PATCH 12/16] vhost-vdpa: backend feature should set only once -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jason Wang -RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA -RH-Commit: [5/7] 7531bb8da0c99b29997e8bfc6d1e811daf3cdd38 (jasowang/qemu-kvm-cs) -RH-Bugzilla: 2070804 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu - -The vhost_vdpa_one_time_request() branch in -vhost_vdpa_set_backend_cap() incorrectly sends down -ioctls on vhost_dev with non-zero index. This may -end up with multiple VHOST_SET_BACKEND_FEATURES -ioctl calls sent down on the vhost-vdpa fd that is -shared between all these vhost_dev's. - -To fix it, send down ioctl only once via the first -vhost_dev with index 0. Toggle the polarity of the -vhost_vdpa_one_time_request() test should do the -trick. - -Fixes: 4d191cfdc7de ("vhost-vdpa: classify one time request") -Signed-off-by: Si-Wei Liu -Reviewed-by: Stefano Garzarella -Acked-by: Jason Wang -Acked-by: Eugenio Pérez -Message-Id: <1651890498-24478-6-git-send-email-si-wei.liu@oracle.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 6aee7e4233f6467f69531fcd352adff028f3f5ea) -Signed-off-by: Jason Wang ---- - hw/virtio/vhost-vdpa.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 8adf7c0b92..6e3dbd9e89 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -665,7 +665,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) - - features &= f; - -- if (vhost_vdpa_one_time_request(dev)) { -+ if (!vhost_vdpa_one_time_request(dev)) { - r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); - if (r) { - return -EFAULT; --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch b/SOURCES/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch deleted file mode 100644 index 2466557..0000000 --- a/SOURCES/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch +++ /dev/null @@ -1,123 +0,0 @@ -From 58acdab17ec00ab76105ab92a51c5ba4dec3df5a Mon Sep 17 00:00:00 2001 -From: Si-Wei Liu -Date: Fri, 6 May 2022 19:28:17 -0700 -Subject: [PATCH 13/16] vhost-vdpa: change name and polarity for - vhost_vdpa_one_time_request() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jason Wang -RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA -RH-Commit: [6/7] 7029778f463a136ff412c63b86b6953390e47bf8 (jasowang/qemu-kvm-cs) -RH-Bugzilla: 2070804 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu - -The name vhost_vdpa_one_time_request() was confusing. No -matter whatever it returns, its typical occurrence had -always been at requests that only need to be applied once. -And the name didn't suggest what it actually checks for. -Change it to vhost_vdpa_first_dev() with polarity flipped -for better readibility of code. That way it is able to -reflect what the check is really about. - -This call is applicable to request which performs operation -only once, before queues are set up, and usually at the beginning -of the caller function. Document the requirement for it in place. - -Signed-off-by: Si-Wei Liu -Message-Id: <1651890498-24478-7-git-send-email-si-wei.liu@oracle.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Stefano Garzarella -Acked-by: Jason Wang -(cherry picked from commit d71b0609fc04217e28d17009f04d74b08be6f466) -Signed-off-by: Jason Wang ---- - hw/virtio/vhost-vdpa.c | 23 +++++++++++++++-------- - 1 file changed, 15 insertions(+), 8 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 6e3dbd9e89..33dcaa135e 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -366,11 +366,18 @@ static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v) - v->iova_range.last); - } - --static bool vhost_vdpa_one_time_request(struct vhost_dev *dev) -+/* -+ * The use of this function is for requests that only need to be -+ * applied once. Typically such request occurs at the beginning -+ * of operation, and before setting up queues. It should not be -+ * used for request that performs operation until all queues are -+ * set, which would need to check dev->vq_index_end instead. -+ */ -+static bool vhost_vdpa_first_dev(struct vhost_dev *dev) - { - struct vhost_vdpa *v = dev->opaque; - -- return v->index != 0; -+ return v->index == 0; - } - - static int vhost_vdpa_get_dev_features(struct vhost_dev *dev, -@@ -451,7 +458,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) - - vhost_vdpa_get_iova_range(v); - -- if (vhost_vdpa_one_time_request(dev)) { -+ if (!vhost_vdpa_first_dev(dev)) { - return 0; - } - -@@ -594,7 +601,7 @@ static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) - static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, - struct vhost_memory *mem) - { -- if (vhost_vdpa_one_time_request(dev)) { -+ if (!vhost_vdpa_first_dev(dev)) { - return 0; - } - -@@ -623,7 +630,7 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev, - struct vhost_vdpa *v = dev->opaque; - int ret; - -- if (vhost_vdpa_one_time_request(dev)) { -+ if (!vhost_vdpa_first_dev(dev)) { - return 0; - } - -@@ -665,7 +672,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) - - features &= f; - -- if (!vhost_vdpa_one_time_request(dev)) { -+ if (vhost_vdpa_first_dev(dev)) { - r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); - if (r) { - return -EFAULT; -@@ -1118,7 +1125,7 @@ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, - struct vhost_log *log) - { - struct vhost_vdpa *v = dev->opaque; -- if (v->shadow_vqs_enabled || vhost_vdpa_one_time_request(dev)) { -+ if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) { - return 0; - } - -@@ -1240,7 +1247,7 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev, - - static int vhost_vdpa_set_owner(struct vhost_dev *dev) - { -- if (vhost_vdpa_one_time_request(dev)) { -+ if (!vhost_vdpa_first_dev(dev)) { - return 0; - } - --- -2.31.1 - diff --git a/SOURCES/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch b/SOURCES/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch deleted file mode 100644 index 7716cbf..0000000 --- a/SOURCES/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 3142102adb98f46518c0ac1773b0c48710c6bed6 Mon Sep 17 00:00:00 2001 -From: Si-Wei Liu -Date: Fri, 6 May 2022 19:28:14 -0700 -Subject: [PATCH 10/16] vhost-vdpa: fix improper cleanup in net_init_vhost_vdpa -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jason Wang -RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA -RH-Commit: [3/7] c83ff6c97d34cfae3c3447edde934b42a9ace75f (jasowang/qemu-kvm-cs) -RH-Bugzilla: 2070804 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu - -... such that no memory leaks on dangling net clients in case of -error. - -Signed-off-by: Si-Wei Liu -Acked-by: Jason Wang -Message-Id: <1651890498-24478-4-git-send-email-si-wei.liu@oracle.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 9bd055073e375c8a0d7ebce925e05d914d69fc7f) -Signed-off-by: Jason Wang ---- - net/vhost-vdpa.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 1e9fe47c03..df1e69ee72 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -306,7 +306,9 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - - err: - if (i) { -- qemu_del_net_client(ncs[0]); -+ for (i--; i >= 0; i--) { -+ qemu_del_net_client(ncs[i]); -+ } - } - qemu_close(vdpa_device_fd); - --- -2.31.1 - diff --git a/SOURCES/kvm-vhost_net-Add-NetClientInfo-start-callback.patch b/SOURCES/kvm-vhost_net-Add-NetClientInfo-start-callback.patch deleted file mode 100644 index 40bf5f6..0000000 --- a/SOURCES/kvm-vhost_net-Add-NetClientInfo-start-callback.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 6a6999311742b6dccdfce09f30742a63d72d1bd7 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:30 +0200 -Subject: [PATCH 15/23] vhost_net: Add NetClientInfo start callback -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [14/21] df6a96ae3aec02ecae793bdbd8e9c2fcfac7871a (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -This is used by the backend to perform actions before the device is -started. - -In particular, vdpa net use it to map CVQ buffers to the device, so it -can send control commands using them. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 80bda0e674fd0b439ac627ab7ecdbd4a1b46d525) ---- - hw/net/vhost_net.c | 7 +++++++ - include/net/net.h | 2 ++ - 2 files changed, 9 insertions(+) - -diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c -index d6d7c51f62..1005f9d8e6 100644 ---- a/hw/net/vhost_net.c -+++ b/hw/net/vhost_net.c -@@ -244,6 +244,13 @@ static int vhost_net_start_one(struct vhost_net *net, - struct vhost_vring_file file = { }; - int r; - -+ if (net->nc->info->start) { -+ r = net->nc->info->start(net->nc); -+ if (r < 0) { -+ return r; -+ } -+ } -+ - r = vhost_dev_enable_notifiers(&net->dev, dev); - if (r < 0) { - goto fail_notifiers; -diff --git a/include/net/net.h b/include/net/net.h -index 523136c7ac..ad9e80083a 100644 ---- a/include/net/net.h -+++ b/include/net/net.h -@@ -44,6 +44,7 @@ typedef struct NICConf { - - typedef void (NetPoll)(NetClientState *, bool enable); - typedef bool (NetCanReceive)(NetClientState *); -+typedef int (NetStart)(NetClientState *); - typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t); - typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int); - typedef void (NetCleanup) (NetClientState *); -@@ -71,6 +72,7 @@ typedef struct NetClientInfo { - NetReceive *receive_raw; - NetReceiveIOV *receive_iov; - NetCanReceive *can_receive; -+ NetStart *start; - NetCleanup *cleanup; - LinkStatusChanged *link_status_changed; - QueryRxFilter *query_rx_filter; --- -2.31.1 - diff --git a/SOURCES/kvm-vhost_net-Add-NetClientInfo-stop-callback.patch b/SOURCES/kvm-vhost_net-Add-NetClientInfo-stop-callback.patch deleted file mode 100644 index c622824..0000000 --- a/SOURCES/kvm-vhost_net-Add-NetClientInfo-stop-callback.patch +++ /dev/null @@ -1,68 +0,0 @@ -From effd0ed379deb43bb850f1aeff24fa85935d7f52 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:31 +0200 -Subject: [PATCH 16/23] vhost_net: Add NetClientInfo stop callback -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [15/21] 9f8a3e9bfb0d21fa0479f54a7a17cb738aa46359 (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -Used by the backend to perform actions after the device is stopped. - -In particular, vdpa net use it to unmap CVQ buffers to the device, -cleaning the actions performed in prepare(). - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit c6544e2331d721627fa7356da3592bcb46340f1b) ---- - hw/net/vhost_net.c | 3 +++ - include/net/net.h | 2 ++ - 2 files changed, 5 insertions(+) - -diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c -index 1005f9d8e6..275ece5324 100644 ---- a/hw/net/vhost_net.c -+++ b/hw/net/vhost_net.c -@@ -320,6 +320,9 @@ static void vhost_net_stop_one(struct vhost_net *net, - net->nc->info->poll(net->nc, true); - } - vhost_dev_stop(&net->dev, dev); -+ if (net->nc->info->stop) { -+ net->nc->info->stop(net->nc); -+ } - vhost_dev_disable_notifiers(&net->dev, dev); - } - -diff --git a/include/net/net.h b/include/net/net.h -index ad9e80083a..476ad45b9a 100644 ---- a/include/net/net.h -+++ b/include/net/net.h -@@ -45,6 +45,7 @@ typedef struct NICConf { - typedef void (NetPoll)(NetClientState *, bool enable); - typedef bool (NetCanReceive)(NetClientState *); - typedef int (NetStart)(NetClientState *); -+typedef void (NetStop)(NetClientState *); - typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t); - typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int); - typedef void (NetCleanup) (NetClientState *); -@@ -73,6 +74,7 @@ typedef struct NetClientInfo { - NetReceiveIOV *receive_iov; - NetCanReceive *can_receive; - NetStart *start; -+ NetStop *stop; - NetCleanup *cleanup; - LinkStatusChanged *link_status_changed; - QueryRxFilter *query_rx_filter; --- -2.31.1 - diff --git a/SOURCES/kvm-vhost_net-add-NetClientState-load-callback.patch b/SOURCES/kvm-vhost_net-add-NetClientState-load-callback.patch deleted file mode 100644 index 92a9078..0000000 --- a/SOURCES/kvm-vhost_net-add-NetClientState-load-callback.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 6a5c236b95ce475c556ccd92c2135ad48474e8fb Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:35 +0200 -Subject: [PATCH 20/23] vhost_net: add NetClientState->load() callback -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [19/21] 439b4133a757b2f1c5f4a1441eca25329896491a (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -It allows per-net client operations right after device's successful -start. In particular, to load the device status. - -Vhost-vdpa net will use it to add the CVQ buffers to restore the device -status. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 302f3d20e68a8a120d431f7ff7cb02a75917f54c) ---- - hw/net/vhost_net.c | 7 +++++++ - include/net/net.h | 2 ++ - 2 files changed, 9 insertions(+) - -diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c -index 275ece5324..ea3a8be1c9 100644 ---- a/hw/net/vhost_net.c -+++ b/hw/net/vhost_net.c -@@ -281,6 +281,13 @@ static int vhost_net_start_one(struct vhost_net *net, - } - } - } -+ -+ if (net->nc->info->load) { -+ r = net->nc->info->load(net->nc); -+ if (r < 0) { -+ goto fail; -+ } -+ } - return 0; - fail: - file.fd = -1; -diff --git a/include/net/net.h b/include/net/net.h -index 476ad45b9a..81d0b21def 100644 ---- a/include/net/net.h -+++ b/include/net/net.h -@@ -45,6 +45,7 @@ typedef struct NICConf { - typedef void (NetPoll)(NetClientState *, bool enable); - typedef bool (NetCanReceive)(NetClientState *); - typedef int (NetStart)(NetClientState *); -+typedef int (NetLoad)(NetClientState *); - typedef void (NetStop)(NetClientState *); - typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t); - typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int); -@@ -74,6 +75,7 @@ typedef struct NetClientInfo { - NetReceiveIOV *receive_iov; - NetCanReceive *can_receive; - NetStart *start; -+ NetLoad *load; - NetStop *stop; - NetCleanup *cleanup; - LinkStatusChanged *link_status_changed; --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-add-support-for-configure-interrupt.patch b/SOURCES/kvm-virtio-add-support-for-configure-interrupt.patch new file mode 100644 index 0000000..02f4666 --- /dev/null +++ b/SOURCES/kvm-virtio-add-support-for-configure-interrupt.patch @@ -0,0 +1,115 @@ +From e04c76339580effae41617b690b58a6605e0f40b Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:47 +0800 +Subject: [PATCH 06/31] virtio: add support for configure interrupt +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [6/10] 7048eb488b732578686d451684babaf17b582b05 (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +Add the functions to support the configure interrupt in virtio +The function virtio_config_guest_notifier_read will notify the +guest if there is an configure interrupt. +The function virtio_config_set_guest_notifier_fd_handler is +to set the fd hander for the notifier + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-7-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 7d847d0c9b93b91160f40d69a65c904d76f1edd8) +Signed-off-by: Cindy Lu +--- + hw/virtio/virtio.c | 29 +++++++++++++++++++++++++++++ + include/hw/virtio/virtio.h | 4 ++++ + 2 files changed, 33 insertions(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index eb6347ab5d..34e9c5d141 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -4012,7 +4012,14 @@ static void virtio_queue_guest_notifier_read(EventNotifier *n) + virtio_irq(vq); + } + } ++static void virtio_config_guest_notifier_read(EventNotifier *n) ++{ ++ VirtIODevice *vdev = container_of(n, VirtIODevice, config_notifier); + ++ if (event_notifier_test_and_clear(n)) { ++ virtio_notify_config(vdev); ++ } ++} + void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, + bool with_irqfd) + { +@@ -4029,6 +4036,23 @@ void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, + } + } + ++void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev, ++ bool assign, bool with_irqfd) ++{ ++ EventNotifier *n; ++ n = &vdev->config_notifier; ++ if (assign && !with_irqfd) { ++ event_notifier_set_handler(n, virtio_config_guest_notifier_read); ++ } else { ++ event_notifier_set_handler(n, NULL); ++ } ++ if (!assign) { ++ /* Test and clear notifier before closing it,*/ ++ /* in case poll callback didn't have time to run. */ ++ virtio_config_guest_notifier_read(n); ++ } ++} ++ + EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) + { + return &vq->guest_notifier; +@@ -4109,6 +4133,11 @@ EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) + return &vq->host_notifier; + } + ++EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev) ++{ ++ return &vdev->config_notifier; ++} ++ + void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled) + { + vq->host_notifier_enabled = enabled; +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index 1f4a41b958..9c3a4642f2 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -138,6 +138,7 @@ struct VirtIODevice + AddressSpace *dma_as; + QLIST_HEAD(, VirtQueue) *vector_queues; + QTAILQ_ENTRY(VirtIODevice) next; ++ EventNotifier config_notifier; + }; + + struct VirtioDeviceClass { +@@ -360,6 +361,9 @@ void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ct + void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx); + VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector); + VirtQueue *virtio_vector_next_queue(VirtQueue *vq); ++EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev); ++void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev, ++ bool assign, bool with_irqfd); + + static inline void virtio_add_feature(uint64_t *features, unsigned int fbit) + { +-- +2.31.1 + diff --git a/SOURCES/kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch b/SOURCES/kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch new file mode 100644 index 0000000..ea2589a --- /dev/null +++ b/SOURCES/kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch @@ -0,0 +1,262 @@ +From 34a267758cf016f34b327318500efdbf0f606033 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:42 +0800 +Subject: [PATCH 01/31] virtio: introduce macro VIRTIO_CONFIG_IRQ_IDX +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/10] f374aaae221bc5a4c2521a267d21350b812e11ba (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +To support configure interrupt for vhost-vdpa +Introduce VIRTIO_CONFIG_IRQ_IDX -1 as configure interrupt's queue index, +Then we can reuse the functions guest_notifier_mask and guest_notifier_pending. +Add the check of queue index in these drivers, if the driver does not support +configure interrupt, the function will just return + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-2-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 544f0278afcab2bebab61b14e4c2c58e65911f5b) +Signed-off-by: Cindy Lu +--- + hw/display/vhost-user-gpu.c | 18 ++++++++++++++++++ + hw/net/virtio-net.c | 22 ++++++++++++++++++++-- + hw/virtio/vhost-user-fs.c | 18 ++++++++++++++++++ + hw/virtio/vhost-user-gpio.c | 10 ++++++++++ + hw/virtio/vhost-vsock-common.c | 18 ++++++++++++++++++ + hw/virtio/virtio-crypto.c | 18 ++++++++++++++++++ + include/hw/virtio/virtio.h | 3 +++ + 7 files changed, 105 insertions(+), 2 deletions(-) + +diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c +index 19c0e20103..4380a5e672 100644 +--- a/hw/display/vhost-user-gpu.c ++++ b/hw/display/vhost-user-gpu.c +@@ -486,6 +486,15 @@ vhost_user_gpu_guest_notifier_pending(VirtIODevice *vdev, int idx) + { + VhostUserGPU *g = VHOST_USER_GPU(vdev); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return false; ++ } + return vhost_virtqueue_pending(&g->vhost->dev, idx); + } + +@@ -494,6 +503,15 @@ vhost_user_gpu_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) + { + VhostUserGPU *g = VHOST_USER_GPU(vdev); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return; ++ } + vhost_virtqueue_mask(&g->vhost->dev, vdev, idx, mask); + } + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index aba12759d5..bee35d6f9f 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3316,6 +3316,15 @@ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) + } else { + nc = qemu_get_subqueue(n->nic, vq2q(idx)); + } ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return false ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return false; ++ } + return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); + } + +@@ -3339,8 +3348,17 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, + } else { + nc = qemu_get_subqueue(n->nic, vq2q(idx)); + } +- vhost_net_virtqueue_mask(get_vhost_net(nc->peer), +- vdev, idx, mask); ++ /* ++ *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return; ++ } ++ ++ vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); + } + + static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) +diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c +index d97b179e6f..f5049735ac 100644 +--- a/hw/virtio/vhost-user-fs.c ++++ b/hw/virtio/vhost-user-fs.c +@@ -159,6 +159,15 @@ static void vuf_guest_notifier_mask(VirtIODevice *vdev, int idx, + { + VHostUserFS *fs = VHOST_USER_FS(vdev); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return; ++ } + vhost_virtqueue_mask(&fs->vhost_dev, vdev, idx, mask); + } + +@@ -166,6 +175,15 @@ static bool vuf_guest_notifier_pending(VirtIODevice *vdev, int idx) + { + VHostUserFS *fs = VHOST_USER_FS(vdev); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return false; ++ } + return vhost_virtqueue_pending(&fs->vhost_dev, idx); + } + +diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c +index b7b82a1099..fe3da32c74 100644 +--- a/hw/virtio/vhost-user-gpio.c ++++ b/hw/virtio/vhost-user-gpio.c +@@ -191,6 +191,16 @@ static void vu_gpio_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) + { + VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return; ++ } ++ + vhost_virtqueue_mask(&gpio->vhost_dev, vdev, idx, mask); + } + +diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c +index d21c72b401..d2b5519d5a 100644 +--- a/hw/virtio/vhost-vsock-common.c ++++ b/hw/virtio/vhost-vsock-common.c +@@ -127,6 +127,15 @@ static void vhost_vsock_common_guest_notifier_mask(VirtIODevice *vdev, int idx, + { + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return; ++ } + vhost_virtqueue_mask(&vvc->vhost_dev, vdev, idx, mask); + } + +@@ -135,6 +144,15 @@ static bool vhost_vsock_common_guest_notifier_pending(VirtIODevice *vdev, + { + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return false; ++ } + return vhost_virtqueue_pending(&vvc->vhost_dev, idx); + } + +diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c +index 97da74e719..516425e26a 100644 +--- a/hw/virtio/virtio-crypto.c ++++ b/hw/virtio/virtio-crypto.c +@@ -1182,6 +1182,15 @@ static void virtio_crypto_guest_notifier_mask(VirtIODevice *vdev, int idx, + + assert(vcrypto->vhost_started); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return; ++ } + cryptodev_vhost_virtqueue_mask(vdev, queue, idx, mask); + } + +@@ -1192,6 +1201,15 @@ static bool virtio_crypto_guest_notifier_pending(VirtIODevice *vdev, int idx) + + assert(vcrypto->vhost_started); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return false; ++ } + return cryptodev_vhost_virtqueue_pending(vdev, queue, idx); + } + +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index acfd4df125..1f4a41b958 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -79,6 +79,9 @@ typedef struct VirtQueueElement + + #define VIRTIO_NO_VECTOR 0xffff + ++/* special index value used internally for config irqs */ ++#define VIRTIO_CONFIG_IRQ_IDX -1 ++ + #define TYPE_VIRTIO_DEVICE "virtio-device" + OBJECT_DECLARE_TYPE(VirtIODevice, VirtioDeviceClass, VIRTIO_DEVICE) + +-- +2.31.1 + diff --git a/SOURCES/kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch b/SOURCES/kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch deleted file mode 100644 index 2a72cc7..0000000 --- a/SOURCES/kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 643d9c28ff8b15c333cc748c5e712659ad2a257c Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Mon, 13 Jun 2022 14:10:10 +0800 -Subject: [PATCH 03/17] virtio-iommu: Add an assert check in translate routine - -RH-Author: Eric Auger -RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices -RH-Commit: [3/5] 19f309fd0beda40d65f51c454e37936658ac9f38 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2100106 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Peter Xu -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 - -With address space switch supported, dma access translation only -happen after endpoint is attached to a non-bypass domain. - -Signed-off-by: Zhenzhong Duan -Message-Id: <20220613061010.2674054-4-zhenzhong.duan@intel.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 23b5f0ff6d923d3bca11cf44eed3daf7a0a836a8) -Signed-off-by: Eric Auger ---- - hw/virtio/virtio-iommu.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index 440a1c28a7..e970d4d5a6 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -866,6 +866,10 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, - qemu_rec_mutex_lock(&s->mutex); - - ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); -+ -+ if (bypass_allowed) -+ assert(ep && ep->domain && !ep->domain->bypass); -+ - if (!ep) { - if (!bypass_allowed) { - error_report_once("%s sid=%d is not known!!", __func__, sid); --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch b/SOURCES/kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch deleted file mode 100644 index 3352666..0000000 --- a/SOURCES/kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch +++ /dev/null @@ -1,250 +0,0 @@ -From d60774ee3168eefb21a4120a38107cd36ae17e07 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Mon, 13 Jun 2022 14:10:08 +0800 -Subject: [PATCH 01/17] virtio-iommu: Add bypass mode support to assigned - device - -RH-Author: Eric Auger -RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices -RH-Commit: [1/5] 4777815533b31c7f4f09af8902e378fd3fc1186a (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2100106 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Peter Xu -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 - -Currently assigned devices can not work in virtio-iommu bypass mode. -Guest driver fails to probe the device due to DMA failure. And the -reason is because of lacking GPA -> HPA mappings when VM is created. - -Add a root container memory region to hold both bypass memory region -and iommu memory region, so the switch between them is supported -just like the implementation in virtual VT-d. - -Signed-off-by: Zhenzhong Duan -Message-Id: <20220613061010.2674054-2-zhenzhong.duan@intel.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 90519b90539b16258d1d52b908b199f44877dc18) -Signed-off-by: Eric Auger ---- - hw/virtio/trace-events | 1 + - hw/virtio/virtio-iommu.c | 115 ++++++++++++++++++++++++++++++- - include/hw/virtio/virtio-iommu.h | 2 + - 3 files changed, 116 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events -index a5102eac9e..2ab5881b88 100644 ---- a/hw/virtio/trace-events -+++ b/hw/virtio/trace-events -@@ -114,6 +114,7 @@ virtio_iommu_remap(const char *name, uint64_t virt_start, uint64_t virt_end, uin - virtio_iommu_set_page_size_mask(const char *name, uint64_t old, uint64_t new) "mr=%s old_mask=0x%"PRIx64" new_mask=0x%"PRIx64 - virtio_iommu_notify_flag_add(const char *name) "add notifier to mr %s" - virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s" -+virtio_iommu_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" - - # virtio-mem.c - virtio_mem_send_response(uint16_t type) "type=%" PRIu16 -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index 6d5ea0bdf1..5e99e6c62b 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -70,6 +70,77 @@ static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev) - return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); - } - -+static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev) -+{ -+ uint32_t sid; -+ bool bypassed; -+ VirtIOIOMMU *s = sdev->viommu; -+ VirtIOIOMMUEndpoint *ep; -+ -+ sid = virtio_iommu_get_bdf(sdev); -+ -+ qemu_mutex_lock(&s->mutex); -+ /* need to check bypass before system reset */ -+ if (!s->endpoints) { -+ bypassed = s->config.bypass; -+ goto unlock; -+ } -+ -+ ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); -+ if (!ep || !ep->domain) { -+ bypassed = s->config.bypass; -+ } else { -+ bypassed = ep->domain->bypass; -+ } -+ -+unlock: -+ qemu_mutex_unlock(&s->mutex); -+ return bypassed; -+} -+ -+/* Return whether the device is using IOMMU translation. */ -+static bool virtio_iommu_switch_address_space(IOMMUDevice *sdev) -+{ -+ bool use_remapping; -+ -+ assert(sdev); -+ -+ use_remapping = !virtio_iommu_device_bypassed(sdev); -+ -+ trace_virtio_iommu_switch_address_space(pci_bus_num(sdev->bus), -+ PCI_SLOT(sdev->devfn), -+ PCI_FUNC(sdev->devfn), -+ use_remapping); -+ -+ /* Turn off first then on the other */ -+ if (use_remapping) { -+ memory_region_set_enabled(&sdev->bypass_mr, false); -+ memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), true); -+ } else { -+ memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), false); -+ memory_region_set_enabled(&sdev->bypass_mr, true); -+ } -+ -+ return use_remapping; -+} -+ -+static void virtio_iommu_switch_address_space_all(VirtIOIOMMU *s) -+{ -+ GHashTableIter iter; -+ IOMMUPciBus *iommu_pci_bus; -+ int i; -+ -+ g_hash_table_iter_init(&iter, s->as_by_busptr); -+ while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) { -+ for (i = 0; i < PCI_DEVFN_MAX; i++) { -+ if (!iommu_pci_bus->pbdev[i]) { -+ continue; -+ } -+ virtio_iommu_switch_address_space(iommu_pci_bus->pbdev[i]); -+ } -+ } -+} -+ - /** - * The bus number is used for lookup when SID based operations occur. - * In that case we lazily populate the IOMMUPciBus array from the bus hash -@@ -214,6 +285,7 @@ static gboolean virtio_iommu_notify_map_cb(gpointer key, gpointer value, - static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) - { - VirtIOIOMMUDomain *domain = ep->domain; -+ IOMMUDevice *sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr); - - if (!ep->domain) { - return; -@@ -222,6 +294,7 @@ static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) - ep->iommu_mr); - QLIST_REMOVE(ep, next); - ep->domain = NULL; -+ virtio_iommu_switch_address_space(sdev); - } - - static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s, -@@ -324,12 +397,39 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, - - trace_virtio_iommu_init_iommu_mr(name); - -+ memory_region_init(&sdev->root, OBJECT(s), name, UINT64_MAX); -+ address_space_init(&sdev->as, &sdev->root, TYPE_VIRTIO_IOMMU); -+ -+ /* -+ * Build the IOMMU disabled container with aliases to the -+ * shared MRs. Note that aliasing to a shared memory region -+ * could help the memory API to detect same FlatViews so we -+ * can have devices to share the same FlatView when in bypass -+ * mode. (either by not configuring virtio-iommu driver or with -+ * "iommu=pt"). It will greatly reduce the total number of -+ * FlatViews of the system hence VM runs faster. -+ */ -+ memory_region_init_alias(&sdev->bypass_mr, OBJECT(s), -+ "system", get_system_memory(), 0, -+ memory_region_size(get_system_memory())); -+ - memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr), - TYPE_VIRTIO_IOMMU_MEMORY_REGION, - OBJECT(s), name, - UINT64_MAX); -- address_space_init(&sdev->as, -- MEMORY_REGION(&sdev->iommu_mr), TYPE_VIRTIO_IOMMU); -+ -+ /* -+ * Hook both the containers under the root container, we -+ * switch between iommu & bypass MRs by enable/disable -+ * corresponding sub-containers -+ */ -+ memory_region_add_subregion_overlap(&sdev->root, 0, -+ MEMORY_REGION(&sdev->iommu_mr), -+ 0); -+ memory_region_add_subregion_overlap(&sdev->root, 0, -+ &sdev->bypass_mr, 0); -+ -+ virtio_iommu_switch_address_space(sdev); - g_free(name); - } - return &sdev->as; -@@ -343,6 +443,7 @@ static int virtio_iommu_attach(VirtIOIOMMU *s, - uint32_t flags = le32_to_cpu(req->flags); - VirtIOIOMMUDomain *domain; - VirtIOIOMMUEndpoint *ep; -+ IOMMUDevice *sdev; - - trace_virtio_iommu_attach(domain_id, ep_id); - -@@ -376,6 +477,8 @@ static int virtio_iommu_attach(VirtIOIOMMU *s, - QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next); - - ep->domain = domain; -+ sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr); -+ virtio_iommu_switch_address_space(sdev); - - /* Replay domain mappings on the associated memory region */ - g_tree_foreach(domain->mappings, virtio_iommu_notify_map_cb, -@@ -888,6 +991,7 @@ static void virtio_iommu_set_config(VirtIODevice *vdev, - return; - } - dev_config->bypass = in_config->bypass; -+ virtio_iommu_switch_address_space_all(dev); - } - - trace_virtio_iommu_set_config(in_config->bypass); -@@ -1027,6 +1131,8 @@ static void virtio_iommu_system_reset(void *opaque) - * system reset - */ - s->config.bypass = s->boot_bypass; -+ virtio_iommu_switch_address_space_all(s); -+ - } - - static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) -@@ -1043,6 +1149,11 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) - virtio_iommu_handle_command); - s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL); - -+ /* -+ * config.bypass is needed to get initial address space early, such as -+ * in vfio realize -+ */ -+ s->config.bypass = s->boot_bypass; - s->config.page_size_mask = TARGET_PAGE_MASK; - s->config.input_range.end = UINT64_MAX; - s->config.domain_range.end = UINT32_MAX; -diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h -index 84391f8448..102eeefa73 100644 ---- a/include/hw/virtio/virtio-iommu.h -+++ b/include/hw/virtio/virtio-iommu.h -@@ -37,6 +37,8 @@ typedef struct IOMMUDevice { - int devfn; - IOMMUMemoryRegion iommu_mr; - AddressSpace as; -+ MemoryRegion root; /* The root container of the device */ -+ MemoryRegion bypass_mr; /* The alias of shared memory MR */ - } IOMMUDevice; - - typedef struct IOMMUPciBus { --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-iommu-Fix-migration-regression.patch b/SOURCES/kvm-virtio-iommu-Fix-migration-regression.patch deleted file mode 100644 index f5ae4d6..0000000 --- a/SOURCES/kvm-virtio-iommu-Fix-migration-regression.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 8d45902b4884315ec090e607e9f03606b21001cf Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Fri, 24 Jun 2022 17:37:40 +0800 -Subject: [PATCH 05/17] virtio-iommu: Fix migration regression - -RH-Author: Eric Auger -RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices -RH-Commit: [5/5] 9652c4aaaf88e24083fab1fbc3d1423260c93ca6 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2100106 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Peter Xu -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 - -We also need to switch to the right address space on dest side -after loading the device status. DMA to wrong address space is -destructive. - -Fixes: 3facd774962fd ("virtio-iommu: Add bypass mode support to assigned device") -Suggested-by: Eric Auger -Signed-off-by: Zhenzhong Duan -Message-Id: <20220624093740.3525267-1-zhenzhong.duan@intel.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Eric Auger -(cherry picked from commit d355566bd958e24e7e384da6ea89a9fc88d7bfed) -Signed-off-by: Eric Auger ---- - hw/virtio/virtio-iommu.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index 44a041dec9..2012835554 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -1324,6 +1324,14 @@ static int iommu_post_load(void *opaque, int version_id) - VirtIOIOMMU *s = opaque; - - g_tree_foreach(s->domains, reconstruct_endpoints, s); -+ -+ /* -+ * Memory regions are dynamically turned on/off depending on -+ * 'config.bypass' and attached domain type if there is. After -+ * migration, we need to make sure the memory regions are -+ * still correct. -+ */ -+ virtio_iommu_switch_address_space_all(s); - return 0; - } - --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch b/SOURCES/kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch deleted file mode 100644 index 7747bfe..0000000 --- a/SOURCES/kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch +++ /dev/null @@ -1,67 +0,0 @@ -From b681247c29b59af40c86f8f0ae5709138ae9bf1a Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 23 Jun 2022 10:31:52 +0800 -Subject: [PATCH 04/17] virtio-iommu: Fix the partial copy of probe request - -RH-Author: Eric Auger -RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices -RH-Commit: [4/5] c402164414a8e69bbb6df20af3c2b6d2589d6f3e (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2100106 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Peter Xu -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 - -The structure of probe request doesn't include the tail, this leads -to a few field missed to be copied. Currently this isn't an issue as -those missed field belong to reserved field, just in case reserved -field will be used in the future. - -Changed 4th parameter of virtio_iommu_iov_to_req() to receive size -of device-readable part. - -Fixes: 1733eebb9e75b ("virtio-iommu: Implement RESV_MEM probe request") -Signed-off-by: Zhenzhong Duan -Message-Id: <20220623023152.3473231-1-zhenzhong.duan@intel.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Jean-Philippe Brucker -Reviewed-by: Eric Auger -(cherry picked from commit 45461aace83d961e933b27519b81d17b4c690514) -Signed-off-by: Eric Auger ---- - hw/virtio/virtio-iommu.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index e970d4d5a6..44a041dec9 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -676,11 +676,10 @@ static int virtio_iommu_probe(VirtIOIOMMU *s, - - static int virtio_iommu_iov_to_req(struct iovec *iov, - unsigned int iov_cnt, -- void *req, size_t req_sz) -+ void *req, size_t payload_sz) - { -- size_t sz, payload_sz = req_sz - sizeof(struct virtio_iommu_req_tail); -+ size_t sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz); - -- sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz); - if (unlikely(sz != payload_sz)) { - return VIRTIO_IOMMU_S_INVAL; - } -@@ -693,7 +692,8 @@ static int virtio_iommu_handle_ ## __req(VirtIOIOMMU *s, \ - unsigned int iov_cnt) \ - { \ - struct virtio_iommu_req_ ## __req req; \ -- int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req)); \ -+ int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, \ -+ sizeof(req) - sizeof(struct virtio_iommu_req_tail));\ - \ - return ret ? ret : virtio_iommu_ ## __req(s, &req); \ - } --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch b/SOURCES/kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch deleted file mode 100644 index df961b0..0000000 --- a/SOURCES/kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch +++ /dev/null @@ -1,141 +0,0 @@ -From 881c999e302e7ee1212b47c523a2cf442c549417 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Mon, 13 Jun 2022 14:10:09 +0800 -Subject: [PATCH 02/17] virtio-iommu: Use recursive lock to avoid deadlock - -RH-Author: Eric Auger -RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices -RH-Commit: [2/5] 67dce1eecb49555f728f119f8efac00417ff65bf (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2100106 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Peter Xu -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 - -When switching address space with mutex lock hold, mapping will be -replayed for assigned device. This will trigger relock deadlock. - -Also release the mutex resource in unrealize routine. - -Signed-off-by: Zhenzhong Duan -Message-Id: <20220613061010.2674054-3-zhenzhong.duan@intel.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 08f2030a2e46f1e93d186b3a683e5caef1df562b) -Signed-off-by: Eric Auger ---- - hw/virtio/virtio-iommu.c | 20 +++++++++++--------- - include/hw/virtio/virtio-iommu.h | 2 +- - 2 files changed, 12 insertions(+), 10 deletions(-) - -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index 5e99e6c62b..440a1c28a7 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -79,7 +79,7 @@ static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev) - - sid = virtio_iommu_get_bdf(sdev); - -- qemu_mutex_lock(&s->mutex); -+ qemu_rec_mutex_lock(&s->mutex); - /* need to check bypass before system reset */ - if (!s->endpoints) { - bypassed = s->config.bypass; -@@ -94,7 +94,7 @@ static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev) - } - - unlock: -- qemu_mutex_unlock(&s->mutex); -+ qemu_rec_mutex_unlock(&s->mutex); - return bypassed; - } - -@@ -746,7 +746,7 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) - tail.status = VIRTIO_IOMMU_S_DEVERR; - goto out; - } -- qemu_mutex_lock(&s->mutex); -+ qemu_rec_mutex_lock(&s->mutex); - switch (head.type) { - case VIRTIO_IOMMU_T_ATTACH: - tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt); -@@ -775,7 +775,7 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) - default: - tail.status = VIRTIO_IOMMU_S_UNSUPP; - } -- qemu_mutex_unlock(&s->mutex); -+ qemu_rec_mutex_unlock(&s->mutex); - - out: - sz = iov_from_buf(elem->in_sg, elem->in_num, 0, -@@ -863,7 +863,7 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, - sid = virtio_iommu_get_bdf(sdev); - - trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag); -- qemu_mutex_lock(&s->mutex); -+ qemu_rec_mutex_lock(&s->mutex); - - ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); - if (!ep) { -@@ -947,7 +947,7 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, - trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid); - - unlock: -- qemu_mutex_unlock(&s->mutex); -+ qemu_rec_mutex_unlock(&s->mutex); - return entry; - } - -@@ -1036,7 +1036,7 @@ static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n) - - sid = virtio_iommu_get_bdf(sdev); - -- qemu_mutex_lock(&s->mutex); -+ qemu_rec_mutex_lock(&s->mutex); - - if (!s->endpoints) { - goto unlock; -@@ -1050,7 +1050,7 @@ static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n) - g_tree_foreach(ep->domain->mappings, virtio_iommu_remap, mr); - - unlock: -- qemu_mutex_unlock(&s->mutex); -+ qemu_rec_mutex_unlock(&s->mutex); - } - - static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr, -@@ -1169,7 +1169,7 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) - virtio_add_feature(&s->features, VIRTIO_IOMMU_F_PROBE); - virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS_CONFIG); - -- qemu_mutex_init(&s->mutex); -+ qemu_rec_mutex_init(&s->mutex); - - s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free); - -@@ -1197,6 +1197,8 @@ static void virtio_iommu_device_unrealize(DeviceState *dev) - g_tree_destroy(s->endpoints); - } - -+ qemu_rec_mutex_destroy(&s->mutex); -+ - virtio_delete_queue(s->req_vq); - virtio_delete_queue(s->event_vq); - virtio_cleanup(vdev); -diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h -index 102eeefa73..2ad5ee320b 100644 ---- a/include/hw/virtio/virtio-iommu.h -+++ b/include/hw/virtio/virtio-iommu.h -@@ -58,7 +58,7 @@ struct VirtIOIOMMU { - ReservedRegion *reserved_regions; - uint32_t nb_reserved_regions; - GTree *domains; -- QemuMutex mutex; -+ QemuRecMutex mutex; - GTree *endpoints; - bool boot_bypass; - }; --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-mmio-add-support-for-configure-interrupt.patch b/SOURCES/kvm-virtio-mmio-add-support-for-configure-interrupt.patch new file mode 100644 index 0000000..275b197 --- /dev/null +++ b/SOURCES/kvm-virtio-mmio-add-support-for-configure-interrupt.patch @@ -0,0 +1,80 @@ +From 181705090c9963c2da97811838ace5bb058737c6 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:50 +0800 +Subject: [PATCH 09/31] virtio-mmio: add support for configure interrupt +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [9/10] 742cc2b425ffd7bbd393772526e7481446ee131c (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +Add configure interrupt support in virtio-mmio bus. +add function to set configure guest notifier. + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-10-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit cd336e834620ea78edef049c3567f312974e475b) +Signed-off-by: Cindy Lu +--- + hw/virtio/virtio-mmio.c | 27 +++++++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c +index d240efef97..103260ec15 100644 +--- a/hw/virtio/virtio-mmio.c ++++ b/hw/virtio/virtio-mmio.c +@@ -670,7 +670,30 @@ static int virtio_mmio_set_guest_notifier(DeviceState *d, int n, bool assign, + + return 0; + } ++static int virtio_mmio_set_config_guest_notifier(DeviceState *d, bool assign, ++ bool with_irqfd) ++{ ++ VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); ++ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); ++ VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); ++ EventNotifier *notifier = virtio_config_get_guest_notifier(vdev); ++ int r = 0; + ++ if (assign) { ++ r = event_notifier_init(notifier, 0); ++ if (r < 0) { ++ return r; ++ } ++ virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd); ++ } else { ++ virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd); ++ event_notifier_cleanup(notifier); ++ } ++ if (vdc->guest_notifier_mask && vdev->use_guest_notifier_mask) { ++ vdc->guest_notifier_mask(vdev, VIRTIO_CONFIG_IRQ_IDX, !assign); ++ } ++ return r; ++} + static int virtio_mmio_set_guest_notifiers(DeviceState *d, int nvqs, + bool assign) + { +@@ -692,6 +715,10 @@ static int virtio_mmio_set_guest_notifiers(DeviceState *d, int nvqs, + goto assign_error; + } + } ++ r = virtio_mmio_set_config_guest_notifier(d, assign, with_irqfd); ++ if (r < 0) { ++ goto assign_error; ++ } + + return 0; + +-- +2.31.1 + diff --git a/SOURCES/kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch b/SOURCES/kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch deleted file mode 100644 index 4ae4cc4..0000000 --- a/SOURCES/kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch +++ /dev/null @@ -1,69 +0,0 @@ -From dffe24d5c1f5a4676e9d2a5bc032effd420b008f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 08/32] virtio-net: Expose MAC_TABLE_ENTRIES -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [8/27] 5c3b96215ddf853cafc594da47f57d7e157db4ee (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 6758c01f054c2a842d41d927d628b09f649d3254 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:27 2022 +0200 - - virtio-net: Expose MAC_TABLE_ENTRIES - - vhost-vdpa control virtqueue needs to know the maximum entries supported - by the virtio-net device, so we know if it is possible to apply the - filter. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/net/virtio-net.c | 1 - - include/hw/virtio/virtio-net.h | 3 +++ - 2 files changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 633de61513..2a127f0a3b 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -49,7 +49,6 @@ - - #define VIRTIO_NET_VM_VERSION 11 - --#define MAC_TABLE_ENTRIES 64 - #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ - - /* previously fixed value */ -diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h -index eb87032627..cce1c554f7 100644 ---- a/include/hw/virtio/virtio-net.h -+++ b/include/hw/virtio/virtio-net.h -@@ -35,6 +35,9 @@ OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET) - * and latency. */ - #define TX_BURST 256 - -+/* Maximum VIRTIO_NET_CTRL_MAC_TABLE_SET unicast + multicast entries. */ -+#define MAC_TABLE_ENTRIES 64 -+ - typedef struct virtio_net_conf - { - uint32_t txtimer; --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch b/SOURCES/kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch deleted file mode 100644 index b4b9012..0000000 --- a/SOURCES/kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch +++ /dev/null @@ -1,169 +0,0 @@ -From 49e91b34b62f5da147fa2fb80d203dd675c48f64 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 09/32] virtio-net: Expose ctrl virtqueue logic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [9/27] c4ab1e35f4ca728df82a687763c662369282c513 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 640b8a1c588b56349b3307d88459ea1cd86181fb -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:28 2022 +0200 - - virtio-net: Expose ctrl virtqueue logic - - This allows external vhost-net devices to modify the state of the - VirtIO device model once the vhost-vdpa device has acknowledged the - control commands. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/net/virtio-net.c | 84 ++++++++++++++++++++-------------- - include/hw/virtio/virtio-net.h | 4 ++ - 2 files changed, 53 insertions(+), 35 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 2a127f0a3b..59bedba681 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -1433,57 +1433,71 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, - return VIRTIO_NET_OK; - } - --static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) -+size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, -+ const struct iovec *in_sg, unsigned in_num, -+ const struct iovec *out_sg, -+ unsigned out_num) - { - VirtIONet *n = VIRTIO_NET(vdev); - struct virtio_net_ctrl_hdr ctrl; - virtio_net_ctrl_ack status = VIRTIO_NET_ERR; -- VirtQueueElement *elem; - size_t s; - struct iovec *iov, *iov2; -- unsigned int iov_cnt; -+ -+ if (iov_size(in_sg, in_num) < sizeof(status) || -+ iov_size(out_sg, out_num) < sizeof(ctrl)) { -+ virtio_error(vdev, "virtio-net ctrl missing headers"); -+ return 0; -+ } -+ -+ iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); -+ s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); -+ iov_discard_front(&iov, &out_num, sizeof(ctrl)); -+ if (s != sizeof(ctrl)) { -+ status = VIRTIO_NET_ERR; -+ } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { -+ status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); -+ } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { -+ status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); -+ } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { -+ status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); -+ } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { -+ status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); -+ } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { -+ status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); -+ } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { -+ status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); -+ } -+ -+ s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); -+ assert(s == sizeof(status)); -+ -+ g_free(iov2); -+ return sizeof(status); -+} -+ -+static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) -+{ -+ VirtQueueElement *elem; - - for (;;) { -+ size_t written; - elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); - if (!elem) { - break; - } -- if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) || -- iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) { -- virtio_error(vdev, "virtio-net ctrl missing headers"); -+ -+ written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, -+ elem->out_sg, elem->out_num); -+ if (written > 0) { -+ virtqueue_push(vq, elem, written); -+ virtio_notify(vdev, vq); -+ g_free(elem); -+ } else { - virtqueue_detach_element(vq, elem, 0); - g_free(elem); - break; - } -- -- iov_cnt = elem->out_num; -- iov2 = iov = g_memdup2(elem->out_sg, -- sizeof(struct iovec) * elem->out_num); -- s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); -- iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); -- if (s != sizeof(ctrl)) { -- status = VIRTIO_NET_ERR; -- } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { -- status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt); -- } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { -- status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt); -- } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { -- status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt); -- } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { -- status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt); -- } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { -- status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt); -- } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { -- status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt); -- } -- -- s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status)); -- assert(s == sizeof(status)); -- -- virtqueue_push(vq, elem, sizeof(status)); -- virtio_notify(vdev, vq); -- g_free(iov2); -- g_free(elem); - } - } - -diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h -index cce1c554f7..ef234ffe7e 100644 ---- a/include/hw/virtio/virtio-net.h -+++ b/include/hw/virtio/virtio-net.h -@@ -221,6 +221,10 @@ struct VirtIONet { - struct EBPFRSSContext ebpf_rss; - }; - -+size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, -+ const struct iovec *in_sg, unsigned in_num, -+ const struct iovec *out_sg, -+ unsigned out_num); - void virtio_net_set_netclient_name(VirtIONet *n, const char *name, - const char *type); - --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-net-add-support-for-configure-interrupt.patch b/SOURCES/kvm-virtio-net-add-support-for-configure-interrupt.patch new file mode 100644 index 0000000..74b956a --- /dev/null +++ b/SOURCES/kvm-virtio-net-add-support-for-configure-interrupt.patch @@ -0,0 +1,115 @@ +From 2b8e3409edb8a17d89c3829cfa3d92bdfdd43c53 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:49 +0800 +Subject: [PATCH 08/31] virtio-net: add support for configure interrupt +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [8/10] 1b125169bea6c81c508b154fa1bae68af153b312 (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +Add functions to support configure interrupt in virtio_net +Add the functions to support vhost_net_config_pending +and vhost_net_config_mask. + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-9-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 8aab0d1dbe90c7b5ac6672a1a09b0578178f5f4c) +Signed-off-by: Cindy Lu +--- + hw/net/vhost_net-stub.c | 9 +++++++++ + hw/net/vhost_net.c | 9 +++++++++ + hw/net/virtio-net.c | 4 ++-- + include/net/vhost_net.h | 2 ++ + 4 files changed, 22 insertions(+), 2 deletions(-) + +diff --git a/hw/net/vhost_net-stub.c b/hw/net/vhost_net-stub.c +index 9f7daae99c..c36f258201 100644 +--- a/hw/net/vhost_net-stub.c ++++ b/hw/net/vhost_net-stub.c +@@ -82,6 +82,15 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, + { + } + ++bool vhost_net_config_pending(VHostNetState *net) ++{ ++ return false; ++} ++ ++void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask) ++{ ++} ++ + int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr) + { + return -1; +diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c +index 043058ff43..6a55f5a473 100644 +--- a/hw/net/vhost_net.c ++++ b/hw/net/vhost_net.c +@@ -478,6 +478,15 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, + vhost_virtqueue_mask(&net->dev, dev, idx, mask); + } + ++bool vhost_net_config_pending(VHostNetState *net) ++{ ++ return vhost_config_pending(&net->dev); ++} ++ ++void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask) ++{ ++ vhost_config_mask(&net->dev, dev, mask); ++} + VHostNetState *get_vhost_net(NetClientState *nc) + { + VHostNetState *vhost_net = 0; +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index bee35d6f9f..ec974f7a76 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3323,7 +3323,7 @@ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) + */ + + if (idx == VIRTIO_CONFIG_IRQ_IDX) { +- return false; ++ return vhost_net_config_pending(get_vhost_net(nc->peer)); + } + return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); + } +@@ -3355,9 +3355,9 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, + */ + + if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask); + return; + } +- + vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); + } + +diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h +index 40b9a40074..dbbd0dc04e 100644 +--- a/include/net/vhost_net.h ++++ b/include/net/vhost_net.h +@@ -39,6 +39,8 @@ int vhost_net_set_config(struct vhost_net *net, const uint8_t *data, + bool vhost_net_virtqueue_pending(VHostNetState *net, int n); + void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, + int idx, bool mask); ++bool vhost_net_config_pending(VHostNetState *net); ++void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask); + int vhost_net_notify_migration_done(VHostNetState *net, char* mac_addr); + VHostNetState *get_vhost_net(NetClientState *nc); + +-- +2.31.1 + diff --git a/SOURCES/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch b/SOURCES/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch deleted file mode 100644 index 9da7ea7..0000000 --- a/SOURCES/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch +++ /dev/null @@ -1,143 +0,0 @@ -From 316b73277de233c7a9b6917077c00d7012060944 Mon Sep 17 00:00:00 2001 -From: Si-Wei Liu -Date: Fri, 6 May 2022 19:28:13 -0700 -Subject: [PATCH 09/16] virtio-net: align ctrl_vq index for non-mq guest for - vhost_vdpa -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jason Wang -RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA -RH-Commit: [2/7] 7f764bbb579c7b473ad67fc25b46e698d277e781 (jasowang/qemu-kvm-cs) -RH-Bugzilla: 2070804 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu - -With MQ enabled vdpa device and non-MQ supporting guest e.g. -booting vdpa with mq=on over OVMF of single vqp, below assert -failure is seen: - -../hw/virtio/vhost-vdpa.c:560: vhost_vdpa_get_vq_index: Assertion `idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs' failed. - -0 0x00007f8ce3ff3387 in raise () at /lib64/libc.so.6 -1 0x00007f8ce3ff4a78 in abort () at /lib64/libc.so.6 -2 0x00007f8ce3fec1a6 in __assert_fail_base () at /lib64/libc.so.6 -3 0x00007f8ce3fec252 in () at /lib64/libc.so.6 -4 0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=, idx=) at ../hw/virtio/vhost-vdpa.c:563 -5 0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=, idx=) at ../hw/virtio/vhost-vdpa.c:558 -6 0x0000558f52d7329a in vhost_virtqueue_mask (hdev=0x558f55c01800, vdev=0x558f568f91f0, n=2, mask=) at ../hw/virtio/vhost.c:1557 -7 0x0000558f52c6b89a in virtio_pci_set_guest_notifier (d=d@entry=0x558f568f0f60, n=n@entry=2, assign=assign@entry=true, with_irqfd=with_irqfd@entry=false) - at ../hw/virtio/virtio-pci.c:974 -8 0x0000558f52c6c0d8 in virtio_pci_set_guest_notifiers (d=0x558f568f0f60, nvqs=3, assign=true) at ../hw/virtio/virtio-pci.c:1019 -9 0x0000558f52bf091d in vhost_net_start (dev=dev@entry=0x558f568f91f0, ncs=0x558f56937cd0, data_queue_pairs=data_queue_pairs@entry=1, cvq=cvq@entry=1) - at ../hw/net/vhost_net.c:361 -10 0x0000558f52d4e5e7 in virtio_net_set_status (status=, n=0x558f568f91f0) at ../hw/net/virtio-net.c:289 -11 0x0000558f52d4e5e7 in virtio_net_set_status (vdev=0x558f568f91f0, status=15 '\017') at ../hw/net/virtio-net.c:370 -12 0x0000558f52d6c4b2 in virtio_set_status (vdev=vdev@entry=0x558f568f91f0, val=val@entry=15 '\017') at ../hw/virtio/virtio.c:1945 -13 0x0000558f52c69eff in virtio_pci_common_write (opaque=0x558f568f0f60, addr=, val=, size=) at ../hw/virtio/virtio-pci.c:1292 -14 0x0000558f52d15d6e in memory_region_write_accessor (mr=0x558f568f19d0, addr=20, value=, size=1, shift=, mask=, attrs=...) - at ../softmmu/memory.c:492 -15 0x0000558f52d127de in access_with_adjusted_size (addr=addr@entry=20, value=value@entry=0x7f8cdbffe748, size=size@entry=1, access_size_min=, access_size_max=, access_fn=0x558f52d15cf0 , mr=0x558f568f19d0, attrs=...) at ../softmmu/memory.c:554 -16 0x0000558f52d157ef in memory_region_dispatch_write (mr=mr@entry=0x558f568f19d0, addr=20, data=, op=, attrs=attrs@entry=...) - at ../softmmu/memory.c:1504 -17 0x0000558f52d078e7 in flatview_write_continue (fv=fv@entry=0x7f8accbc3b90, addr=addr@entry=103079215124, attrs=..., ptr=ptr@entry=0x7f8ce6300028, len=len@entry=1, addr1=, l=, mr=0x558f568f19d0) at /home/opc/qemu-upstream/include/qemu/host-utils.h:165 -18 0x0000558f52d07b06 in flatview_write (fv=0x7f8accbc3b90, addr=103079215124, attrs=..., buf=0x7f8ce6300028, len=1) at ../softmmu/physmem.c:2822 -19 0x0000558f52d0b36b in address_space_write (as=, addr=, attrs=..., buf=buf@entry=0x7f8ce6300028, len=) - at ../softmmu/physmem.c:2914 -20 0x0000558f52d0b3da in address_space_rw (as=, addr=, attrs=..., - attrs@entry=..., buf=buf@entry=0x7f8ce6300028, len=, is_write=) at ../softmmu/physmem.c:2924 -21 0x0000558f52dced09 in kvm_cpu_exec (cpu=cpu@entry=0x558f55c2da60) at ../accel/kvm/kvm-all.c:2903 -22 0x0000558f52dcfabd in kvm_vcpu_thread_fn (arg=arg@entry=0x558f55c2da60) at ../accel/kvm/kvm-accel-ops.c:49 -23 0x0000558f52f9f04a in qemu_thread_start (args=) at ../util/qemu-thread-posix.c:556 -24 0x00007f8ce4392ea5 in start_thread () at /lib64/libpthread.so.0 -25 0x00007f8ce40bb9fd in clone () at /lib64/libc.so.6 - -The cause for the assert failure is due to that the vhost_dev index -for the ctrl vq was not aligned with actual one in use by the guest. -Upon multiqueue feature negotiation in virtio_net_set_multiqueue(), -if guest doesn't support multiqueue, the guest vq layout would shrink -to a single queue pair, consisting of 3 vqs in total (rx, tx and ctrl). -This results in ctrl_vq taking a different vhost_dev group index than -the default. We can map vq to the correct vhost_dev group by checking -if MQ is supported by guest and successfully negotiated. Since the -MQ feature is only present along with CTRL_VQ, we ensure the index -2 is only meant for the control vq while MQ is not supported by guest. - -Fixes: 22288fe ("virtio-net: vhost control virtqueue support") -Suggested-by: Jason Wang -Signed-off-by: Si-Wei Liu -Acked-by: Jason Wang -Message-Id: <1651890498-24478-3-git-send-email-si-wei.liu@oracle.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 68b0a6395f36a8f48f56f46d05f30be2067598b0) -Signed-off-by: Jason Wang ---- - hw/net/virtio-net.c | 33 +++++++++++++++++++++++++++++++-- - 1 file changed, 31 insertions(+), 2 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index ffb3475201..f0bb29c741 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -14,6 +14,7 @@ - #include "qemu/osdep.h" - #include "qemu/atomic.h" - #include "qemu/iov.h" -+#include "qemu/log.h" - #include "qemu/main-loop.h" - #include "qemu/module.h" - #include "hw/virtio/virtio.h" -@@ -3171,8 +3172,22 @@ static NetClientInfo net_virtio_info = { - static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) - { - VirtIONet *n = VIRTIO_NET(vdev); -- NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); -+ NetClientState *nc; - assert(n->vhost_started); -+ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { -+ /* Must guard against invalid features and bogus queue index -+ * from being set by malicious guest, or penetrated through -+ * buggy migration stream. -+ */ -+ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { -+ qemu_log_mask(LOG_GUEST_ERROR, -+ "%s: bogus vq index ignored\n", __func__); -+ return false; -+ } -+ nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); -+ } else { -+ nc = qemu_get_subqueue(n->nic, vq2q(idx)); -+ } - return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); - } - -@@ -3180,8 +3195,22 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, - bool mask) - { - VirtIONet *n = VIRTIO_NET(vdev); -- NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); -+ NetClientState *nc; - assert(n->vhost_started); -+ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { -+ /* Must guard against invalid features and bogus queue index -+ * from being set by malicious guest, or penetrated through -+ * buggy migration stream. -+ */ -+ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { -+ qemu_log_mask(LOG_GUEST_ERROR, -+ "%s: bogus vq index ignored\n", __func__); -+ return; -+ } -+ nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); -+ } else { -+ nc = qemu_get_subqueue(n->nic, vq2q(idx)); -+ } - vhost_net_virtqueue_mask(get_vhost_net(nc->peer), - vdev, idx, mask); - } --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch b/SOURCES/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch deleted file mode 100644 index 3930cc2..0000000 --- a/SOURCES/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch +++ /dev/null @@ -1,109 +0,0 @@ -From 521a1953bc11ab6823dcbbee773bcf86e926a9e7 Mon Sep 17 00:00:00 2001 -From: Si-Wei Liu -Date: Fri, 6 May 2022 19:28:18 -0700 -Subject: [PATCH 14/16] virtio-net: don't handle mq request in userspace - handler for vhost-vdpa -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jason Wang -RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA -RH-Commit: [7/7] 9781cab45448ae16a00fbf10cf7995df6b984a0a (jasowang/qemu-kvm-cs) -RH-Bugzilla: 2070804 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu - -virtio_queue_host_notifier_read() tends to read pending event -left behind on ioeventfd in the vhost_net_stop() path, and -attempts to handle outstanding kicks from userspace vq handler. -However, in the ctrl_vq handler, virtio_net_handle_mq() has a -recursive call into virtio_net_set_status(), which may lead to -segmentation fault as shown in below stack trace: - -0 0x000055f800df1780 in qdev_get_parent_bus (dev=0x0) at ../hw/core/qdev.c:376 -1 0x000055f800c68ad8 in virtio_bus_device_iommu_enabled (vdev=vdev@entry=0x0) at ../hw/virtio/virtio-bus.c:331 -2 0x000055f800d70d7f in vhost_memory_unmap (dev=) at ../hw/virtio/vhost.c:318 -3 0x000055f800d70d7f in vhost_memory_unmap (dev=, buffer=0x7fc19bec5240, len=2052, is_write=1, access_len=2052) at ../hw/virtio/vhost.c:336 -4 0x000055f800d71867 in vhost_virtqueue_stop (dev=dev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590, vq=0x55f8037cceb0, idx=0) at ../hw/virtio/vhost.c:1241 -5 0x000055f800d7406c in vhost_dev_stop (hdev=hdev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590) at ../hw/virtio/vhost.c:1839 -6 0x000055f800bf00a7 in vhost_net_stop_one (net=0x55f8037ccc30, dev=0x55f8044ec590) at ../hw/net/vhost_net.c:315 -7 0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590, ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7, cvq=cvq@entry=1) - at ../hw/net/vhost_net.c:423 -8 0x000055f800d4e628 in virtio_net_set_status (status=, n=0x55f8044ec590) at ../hw/net/virtio-net.c:296 -9 0x000055f800d4e628 in virtio_net_set_status (vdev=vdev@entry=0x55f8044ec590, status=15 '\017') at ../hw/net/virtio-net.c:370 -10 0x000055f800d534d8 in virtio_net_handle_ctrl (iov_cnt=, iov=, cmd=0 '\000', n=0x55f8044ec590) at ../hw/net/virtio-net.c:1408 -11 0x000055f800d534d8 in virtio_net_handle_ctrl (vdev=0x55f8044ec590, vq=0x7fc1a7e888d0) at ../hw/net/virtio-net.c:1452 -12 0x000055f800d69f37 in virtio_queue_host_notifier_read (vq=0x7fc1a7e888d0) at ../hw/virtio/virtio.c:2331 -13 0x000055f800d69f37 in virtio_queue_host_notifier_read (n=n@entry=0x7fc1a7e8894c) at ../hw/virtio/virtio.c:3575 -14 0x000055f800c688e6 in virtio_bus_cleanup_host_notifier (bus=, n=n@entry=14) at ../hw/virtio/virtio-bus.c:312 -15 0x000055f800d73106 in vhost_dev_disable_notifiers (hdev=hdev@entry=0x55f8035b51b0, vdev=vdev@entry=0x55f8044ec590) - at ../../../include/hw/virtio/virtio-bus.h:35 -16 0x000055f800bf00b2 in vhost_net_stop_one (net=0x55f8035b51b0, dev=0x55f8044ec590) at ../hw/net/vhost_net.c:316 -17 0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590, ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7, cvq=cvq@entry=1) - at ../hw/net/vhost_net.c:423 -18 0x000055f800d4e628 in virtio_net_set_status (status=, n=0x55f8044ec590) at ../hw/net/virtio-net.c:296 -19 0x000055f800d4e628 in virtio_net_set_status (vdev=0x55f8044ec590, status=15 '\017') at ../hw/net/virtio-net.c:370 -20 0x000055f800d6c4b2 in virtio_set_status (vdev=0x55f8044ec590, val=) at ../hw/virtio/virtio.c:1945 -21 0x000055f800d11d9d in vm_state_notify (running=running@entry=false, state=state@entry=RUN_STATE_SHUTDOWN) at ../softmmu/runstate.c:333 -22 0x000055f800d04e7a in do_vm_stop (state=state@entry=RUN_STATE_SHUTDOWN, send_stop=send_stop@entry=false) at ../softmmu/cpus.c:262 -23 0x000055f800d04e99 in vm_shutdown () at ../softmmu/cpus.c:280 -24 0x000055f800d126af in qemu_cleanup () at ../softmmu/runstate.c:812 -25 0x000055f800ad5b13 in main (argc=, argv=, envp=) at ../softmmu/main.c:51 - -For now, temporarily disable handling MQ request from the ctrl_vq -userspace hanlder to avoid the recursive virtio_net_set_status() -call. Some rework is needed to allow changing the number of -queues without going through a full virtio_net_set_status cycle, -particularly for vhost-vdpa backend. - -This patch will need to be reverted as soon as future patches of -having the change of #queues handled in userspace is merged. - -Fixes: 402378407db ("vhost-vdpa: multiqueue support") -Signed-off-by: Si-Wei Liu -Acked-by: Jason Wang -Message-Id: <1651890498-24478-8-git-send-email-si-wei.liu@oracle.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 2a7888cc3aa31faee839fa5dddad354ff8941f4c) -Signed-off-by: Jason Wang ---- - hw/net/virtio-net.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index f0bb29c741..099e65036d 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -1381,6 +1381,7 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, - { - VirtIODevice *vdev = VIRTIO_DEVICE(n); - uint16_t queue_pairs; -+ NetClientState *nc = qemu_get_queue(n->nic); - - virtio_net_disable_rss(n); - if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { -@@ -1412,6 +1413,18 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, - return VIRTIO_NET_ERR; - } - -+ /* Avoid changing the number of queue_pairs for vdpa device in -+ * userspace handler. A future fix is needed to handle the mq -+ * change in userspace handler with vhost-vdpa. Let's disable -+ * the mq handling from userspace for now and only allow get -+ * done through the kernel. Ripples may be seen when falling -+ * back to userspace, but without doing it qemu process would -+ * crash on a recursive entry to virtio_net_set_status(). -+ */ -+ if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { -+ return VIRTIO_NET_ERR; -+ } -+ - n->curr_queue_pairs = queue_pairs; - /* stop the backend before changing the number of queue_pairs to avoid handling a - * disabled queue */ --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch b/SOURCES/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch deleted file mode 100644 index f6072d2..0000000 --- a/SOURCES/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 9e737aba614e94da4458f02d4ff97e95ffffd19f Mon Sep 17 00:00:00 2001 -From: Si-Wei Liu -Date: Fri, 6 May 2022 19:28:12 -0700 -Subject: [PATCH 08/16] virtio-net: setup vhost_dev and notifiers for cvq only - when feature is negotiated -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jason Wang -RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA -RH-Commit: [1/7] a5c5a2862b2e4d15ef7c09da3e4234fdef37cc66 (jasowang/qemu-kvm-cs) -RH-Bugzilla: 2070804 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu - -When the control virtqueue feature is absent or not negotiated, -vhost_net_start() still tries to set up vhost_dev and install -vhost notifiers for the control virtqueue, which results in -erroneous ioctl calls with incorrect queue index sending down -to driver. Do that only when needed. - -Fixes: 22288fe ("virtio-net: vhost control virtqueue support") -Signed-off-by: Si-Wei Liu -Acked-by: Jason Wang -Message-Id: <1651890498-24478-2-git-send-email-si-wei.liu@oracle.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit aa8581945a13712ff3eed0ad3ba7a9664fc1604b) -Signed-off-by: Jason Wang ---- - hw/net/virtio-net.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 1067e72b39..ffb3475201 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -245,7 +245,8 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) - VirtIODevice *vdev = VIRTIO_DEVICE(n); - NetClientState *nc = qemu_get_queue(n->nic); - int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; -- int cvq = n->max_ncs - n->max_queue_pairs; -+ int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? -+ n->max_ncs - n->max_queue_pairs : 0; - - if (!get_vhost_net(nc->peer)) { - return; --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-pci-add-support-for-configure-interrupt.patch b/SOURCES/kvm-virtio-pci-add-support-for-configure-interrupt.patch new file mode 100644 index 0000000..14070a4 --- /dev/null +++ b/SOURCES/kvm-virtio-pci-add-support-for-configure-interrupt.patch @@ -0,0 +1,274 @@ +From 61ac1476d3820c97e1cc103af422b17bc94c6ca5 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:51 +0800 +Subject: [PATCH 10/31] virtio-pci: add support for configure interrupt +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [10/10] ebd6a11d7699660d8ac5a4e44a790f823daea57c (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +Add process to handle the configure interrupt, The function's +logic is the same with vq interrupt.Add extra process to check +the configure interrupt + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-11-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 1680542862edd963e6380dd4121a5e85df55581f) +Signed-off-by: Cindy Lu +--- + hw/virtio/virtio-pci.c | 118 +++++++++++++++++++++++++++------ + include/hw/virtio/virtio-pci.h | 4 +- + 2 files changed, 102 insertions(+), 20 deletions(-) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index ec816ea367..3f00e91718 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -751,7 +751,8 @@ static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, + VirtQueue *vq; + + if (queue_no == VIRTIO_CONFIG_IRQ_IDX) { +- return -1; ++ *n = virtio_config_get_guest_notifier(vdev); ++ *vector = vdev->config_vector; + } else { + if (!virtio_queue_get_num(vdev, queue_no)) { + return -1; +@@ -811,7 +812,7 @@ undo: + } + return ret; + } +-static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) ++static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs) + { + int queue_no; + int ret = 0; +@@ -826,6 +827,10 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) + return ret; + } + ++static int kvm_virtio_pci_vector_config_use(VirtIOPCIProxy *proxy) ++{ ++ return kvm_virtio_pci_vector_use_one(proxy, VIRTIO_CONFIG_IRQ_IDX); ++} + + static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy, + int queue_no) +@@ -850,7 +855,7 @@ static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy, + kvm_virtio_pci_vq_vector_release(proxy, vector); + } + +-static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) ++static void kvm_virtio_pci_vector_vq_release(VirtIOPCIProxy *proxy, int nvqs) + { + int queue_no; + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); +@@ -863,6 +868,11 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) + } + } + ++static void kvm_virtio_pci_vector_config_release(VirtIOPCIProxy *proxy) ++{ ++ kvm_virtio_pci_vector_release_one(proxy, VIRTIO_CONFIG_IRQ_IDX); ++} ++ + static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy, + unsigned int queue_no, + unsigned int vector, +@@ -944,9 +954,19 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector, + } + vq = virtio_vector_next_queue(vq); + } +- ++ /* unmask config intr */ ++ if (vector == vdev->config_vector) { ++ n = virtio_config_get_guest_notifier(vdev); ++ ret = virtio_pci_one_vector_unmask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, ++ msg, n); ++ if (ret < 0) { ++ goto undo_config; ++ } ++ } + return 0; +- ++undo_config: ++ n = virtio_config_get_guest_notifier(vdev); ++ virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n); + undo: + vq = virtio_vector_first_queue(vdev, vector); + while (vq && unmasked >= 0) { +@@ -980,6 +1000,11 @@ static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector) + } + vq = virtio_vector_next_queue(vq); + } ++ ++ if (vector == vdev->config_vector) { ++ n = virtio_config_get_guest_notifier(vdev); ++ virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n); ++ } + } + + static void virtio_pci_vector_poll(PCIDevice *dev, +@@ -1011,6 +1036,34 @@ static void virtio_pci_vector_poll(PCIDevice *dev, + msix_set_pending(dev, vector); + } + } ++ /* poll the config intr */ ++ ret = virtio_pci_get_notifier(proxy, VIRTIO_CONFIG_IRQ_IDX, ¬ifier, ++ &vector); ++ if (ret < 0) { ++ return; ++ } ++ if (vector < vector_start || vector >= vector_end || ++ !msix_is_masked(dev, vector)) { ++ return; ++ } ++ if (k->guest_notifier_pending) { ++ if (k->guest_notifier_pending(vdev, VIRTIO_CONFIG_IRQ_IDX)) { ++ msix_set_pending(dev, vector); ++ } ++ } else if (event_notifier_test_and_clear(notifier)) { ++ msix_set_pending(dev, vector); ++ } ++} ++ ++void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue *vq, ++ int n, bool assign, ++ bool with_irqfd) ++{ ++ if (n == VIRTIO_CONFIG_IRQ_IDX) { ++ virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd); ++ } else { ++ virtio_queue_set_guest_notifier_fd_handler(vq, assign, with_irqfd); ++ } + } + + static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign, +@@ -1019,17 +1072,25 @@ static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign, + VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); +- VirtQueue *vq = virtio_get_queue(vdev, n); +- EventNotifier *notifier = virtio_queue_get_guest_notifier(vq); ++ VirtQueue *vq = NULL; ++ EventNotifier *notifier = NULL; ++ ++ if (n == VIRTIO_CONFIG_IRQ_IDX) { ++ notifier = virtio_config_get_guest_notifier(vdev); ++ } else { ++ vq = virtio_get_queue(vdev, n); ++ notifier = virtio_queue_get_guest_notifier(vq); ++ } + + if (assign) { + int r = event_notifier_init(notifier, 0); + if (r < 0) { + return r; + } +- virtio_queue_set_guest_notifier_fd_handler(vq, true, with_irqfd); ++ virtio_pci_set_guest_notifier_fd_handler(vdev, vq, n, true, with_irqfd); + } else { +- virtio_queue_set_guest_notifier_fd_handler(vq, false, with_irqfd); ++ virtio_pci_set_guest_notifier_fd_handler(vdev, vq, n, false, ++ with_irqfd); + event_notifier_cleanup(notifier); + } + +@@ -1072,10 +1133,13 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) + proxy->nvqs_with_notifiers = nvqs; + + /* Must unset vector notifier while guest notifier is still assigned */ +- if ((proxy->vector_irqfd || k->guest_notifier_mask) && !assign) { ++ if ((proxy->vector_irqfd || ++ (vdev->use_guest_notifier_mask && k->guest_notifier_mask)) && ++ !assign) { + msix_unset_vector_notifiers(&proxy->pci_dev); + if (proxy->vector_irqfd) { +- kvm_virtio_pci_vector_release(proxy, nvqs); ++ kvm_virtio_pci_vector_vq_release(proxy, nvqs); ++ kvm_virtio_pci_vector_config_release(proxy); + g_free(proxy->vector_irqfd); + proxy->vector_irqfd = NULL; + } +@@ -1091,20 +1155,30 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) + goto assign_error; + } + } +- ++ r = virtio_pci_set_guest_notifier(d, VIRTIO_CONFIG_IRQ_IDX, assign, ++ with_irqfd); ++ if (r < 0) { ++ goto config_assign_error; ++ } + /* Must set vector notifier after guest notifier has been assigned */ +- if ((with_irqfd || k->guest_notifier_mask) && assign) { ++ if ((with_irqfd || ++ (vdev->use_guest_notifier_mask && k->guest_notifier_mask)) && ++ assign) { + if (with_irqfd) { + proxy->vector_irqfd = + g_malloc0(sizeof(*proxy->vector_irqfd) * + msix_nr_vectors_allocated(&proxy->pci_dev)); +- r = kvm_virtio_pci_vector_use(proxy, nvqs); ++ r = kvm_virtio_pci_vector_vq_use(proxy, nvqs); ++ if (r < 0) { ++ goto config_assign_error; ++ } ++ r = kvm_virtio_pci_vector_config_use(proxy); + if (r < 0) { +- goto assign_error; ++ goto config_error; + } + } +- r = msix_set_vector_notifiers(&proxy->pci_dev, +- virtio_pci_vector_unmask, ++ ++ r = msix_set_vector_notifiers(&proxy->pci_dev, virtio_pci_vector_unmask, + virtio_pci_vector_mask, + virtio_pci_vector_poll); + if (r < 0) { +@@ -1117,9 +1191,15 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) + notifiers_error: + if (with_irqfd) { + assert(assign); +- kvm_virtio_pci_vector_release(proxy, nvqs); ++ kvm_virtio_pci_vector_vq_release(proxy, nvqs); + } +- ++config_error: ++ if (with_irqfd) { ++ kvm_virtio_pci_vector_config_release(proxy); ++ } ++config_assign_error: ++ virtio_pci_set_guest_notifier(d, VIRTIO_CONFIG_IRQ_IDX, !assign, ++ with_irqfd); + assign_error: + /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */ + assert(assign); +diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h +index 938799e8f6..c02e278f46 100644 +--- a/include/hw/virtio/virtio-pci.h ++++ b/include/hw/virtio/virtio-pci.h +@@ -256,5 +256,7 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t); + * @fixed_queues. + */ + unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues); +- ++void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue *vq, ++ int n, bool assign, ++ bool with_irqfd); + #endif +-- +2.31.1 + diff --git a/SOURCES/kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch b/SOURCES/kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch new file mode 100644 index 0000000..a8c32a2 --- /dev/null +++ b/SOURCES/kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch @@ -0,0 +1,272 @@ +From 9a234f849273d3480e4a88042cb1ea06a37a626b Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:43 +0800 +Subject: [PATCH 02/31] virtio-pci: decouple notifier from interrupt process +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/10] a20f4c9ff38b239531d12cbcc7deaa649c86abc3 (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +To reuse the notifier process. We add the virtio_pci_get_notifier +to get the notifier and vector. The INPUT for this function is IDX, +The OUTPUT is the notifier and the vector + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-3-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 2e07f69d0c828e21515b63dc22884d548540b382) +Signed-off-by: Cindy Lu +--- + hw/virtio/virtio-pci.c | 88 +++++++++++++++++++++++++++--------------- + 1 file changed, 57 insertions(+), 31 deletions(-) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index a1c9dfa7bb..52c7692fff 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -728,29 +728,41 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy, + } + + static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy, +- unsigned int queue_no, ++ EventNotifier *n, + unsigned int vector) + { + VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; +- VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); +- VirtQueue *vq = virtio_get_queue(vdev, queue_no); +- EventNotifier *n = virtio_queue_get_guest_notifier(vq); + return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq); + } + + static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy, +- unsigned int queue_no, ++ EventNotifier *n , + unsigned int vector) + { +- VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); +- VirtQueue *vq = virtio_get_queue(vdev, queue_no); +- EventNotifier *n = virtio_queue_get_guest_notifier(vq); + VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; + int ret; + + ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq); + assert(ret == 0); + } ++static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, ++ EventNotifier **n, unsigned int *vector) ++{ ++ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); ++ VirtQueue *vq; ++ ++ if (queue_no == VIRTIO_CONFIG_IRQ_IDX) { ++ return -1; ++ } else { ++ if (!virtio_queue_get_num(vdev, queue_no)) { ++ return -1; ++ } ++ *vector = virtio_queue_vector(vdev, queue_no); ++ vq = virtio_get_queue(vdev, queue_no); ++ *n = virtio_queue_get_guest_notifier(vq); ++ } ++ return 0; ++} + + static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) + { +@@ -759,12 +771,15 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); + unsigned int vector; + int ret, queue_no; +- ++ EventNotifier *n; + for (queue_no = 0; queue_no < nvqs; queue_no++) { + if (!virtio_queue_get_num(vdev, queue_no)) { + break; + } +- vector = virtio_queue_vector(vdev, queue_no); ++ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); ++ if (ret < 0) { ++ break; ++ } + if (vector >= msix_nr_vectors_allocated(dev)) { + continue; + } +@@ -776,7 +791,7 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) + * Otherwise, delay until unmasked in the frontend. + */ + if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { +- ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector); ++ ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); + if (ret < 0) { + kvm_virtio_pci_vq_vector_release(proxy, vector); + goto undo; +@@ -792,7 +807,11 @@ undo: + continue; + } + if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { +- kvm_virtio_pci_irqfd_release(proxy, queue_no, vector); ++ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); ++ if (ret < 0) { ++ break; ++ } ++ kvm_virtio_pci_irqfd_release(proxy, n, vector); + } + kvm_virtio_pci_vq_vector_release(proxy, vector); + } +@@ -806,12 +825,16 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) + unsigned int vector; + int queue_no; + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); +- ++ EventNotifier *n; ++ int ret ; + for (queue_no = 0; queue_no < nvqs; queue_no++) { + if (!virtio_queue_get_num(vdev, queue_no)) { + break; + } +- vector = virtio_queue_vector(vdev, queue_no); ++ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); ++ if (ret < 0) { ++ break; ++ } + if (vector >= msix_nr_vectors_allocated(dev)) { + continue; + } +@@ -819,21 +842,20 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) + * Otherwise, it was cleaned when masked in the frontend. + */ + if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { +- kvm_virtio_pci_irqfd_release(proxy, queue_no, vector); ++ kvm_virtio_pci_irqfd_release(proxy, n, vector); + } + kvm_virtio_pci_vq_vector_release(proxy, vector); + } + } + +-static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy, ++static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy, + unsigned int queue_no, + unsigned int vector, +- MSIMessage msg) ++ MSIMessage msg, ++ EventNotifier *n) + { + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); +- VirtQueue *vq = virtio_get_queue(vdev, queue_no); +- EventNotifier *n = virtio_queue_get_guest_notifier(vq); + VirtIOIRQFD *irqfd; + int ret = 0; + +@@ -860,14 +882,15 @@ static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy, + event_notifier_set(n); + } + } else { +- ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector); ++ ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); + } + return ret; + } + +-static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy, ++static void virtio_pci_one_vector_mask(VirtIOPCIProxy *proxy, + unsigned int queue_no, +- unsigned int vector) ++ unsigned int vector, ++ EventNotifier *n) + { + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); +@@ -878,7 +901,7 @@ static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy, + if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { + k->guest_notifier_mask(vdev, queue_no, true); + } else { +- kvm_virtio_pci_irqfd_release(proxy, queue_no, vector); ++ kvm_virtio_pci_irqfd_release(proxy, n, vector); + } + } + +@@ -888,6 +911,7 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector, + VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + VirtQueue *vq = virtio_vector_first_queue(vdev, vector); ++ EventNotifier *n; + int ret, index, unmasked = 0; + + while (vq) { +@@ -896,7 +920,8 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector, + break; + } + if (index < proxy->nvqs_with_notifiers) { +- ret = virtio_pci_vq_vector_unmask(proxy, index, vector, msg); ++ n = virtio_queue_get_guest_notifier(vq); ++ ret = virtio_pci_one_vector_unmask(proxy, index, vector, msg, n); + if (ret < 0) { + goto undo; + } +@@ -912,7 +937,8 @@ undo: + while (vq && unmasked >= 0) { + index = virtio_get_queue_index(vq); + if (index < proxy->nvqs_with_notifiers) { +- virtio_pci_vq_vector_mask(proxy, index, vector); ++ n = virtio_queue_get_guest_notifier(vq); ++ virtio_pci_one_vector_mask(proxy, index, vector, n); + --unmasked; + } + vq = virtio_vector_next_queue(vq); +@@ -925,15 +951,17 @@ static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector) + VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + VirtQueue *vq = virtio_vector_first_queue(vdev, vector); ++ EventNotifier *n; + int index; + + while (vq) { + index = virtio_get_queue_index(vq); ++ n = virtio_queue_get_guest_notifier(vq); + if (!virtio_queue_get_num(vdev, index)) { + break; + } + if (index < proxy->nvqs_with_notifiers) { +- virtio_pci_vq_vector_mask(proxy, index, vector); ++ virtio_pci_one_vector_mask(proxy, index, vector, n); + } + vq = virtio_vector_next_queue(vq); + } +@@ -949,19 +977,17 @@ static void virtio_pci_vector_poll(PCIDevice *dev, + int queue_no; + unsigned int vector; + EventNotifier *notifier; +- VirtQueue *vq; ++ int ret; + + for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) { +- if (!virtio_queue_get_num(vdev, queue_no)) { ++ ret = virtio_pci_get_notifier(proxy, queue_no, ¬ifier, &vector); ++ if (ret < 0) { + break; + } +- vector = virtio_queue_vector(vdev, queue_no); + if (vector < vector_start || vector >= vector_end || + !msix_is_masked(dev, vector)) { + continue; + } +- vq = virtio_get_queue(vdev, queue_no); +- notifier = virtio_queue_get_guest_notifier(vq); + if (k->guest_notifier_pending) { + if (k->guest_notifier_pending(vdev, queue_no)) { + msix_set_pending(dev, vector); +-- +2.31.1 + diff --git a/SOURCES/kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch b/SOURCES/kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch new file mode 100644 index 0000000..be9b3c7 --- /dev/null +++ b/SOURCES/kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch @@ -0,0 +1,212 @@ +From 58cd577ff157cfaf7506bba135db58e75c330ff0 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:44 +0800 +Subject: [PATCH 03/31] virtio-pci: decouple the single vector from the + interrupt process +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/10] 2c79cb678f005fb2f53b2db0f237347634ab3422 (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 + +To reuse the interrupt process in configure interrupt +Need to decouple the single vector from the interrupt process. +We add new function kvm_virtio_pci_vector_use_one and _release_one. +These functions are used for the single vector, the whole process will +finish in the loop with vq number. + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-4-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit ee3b8dc6cc496ba7f4e27aed4493275c706a7942) +Signed-off-by: Cindy Lu +--- + hw/virtio/virtio-pci.c | 131 +++++++++++++++++++++++------------------ + 1 file changed, 73 insertions(+), 58 deletions(-) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index 52c7692fff..ec816ea367 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -699,7 +699,6 @@ static uint32_t virtio_read_config(PCIDevice *pci_dev, + } + + static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, +- unsigned int queue_no, + unsigned int vector) + { + VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; +@@ -764,87 +763,103 @@ static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, + return 0; + } + +-static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) ++static int kvm_virtio_pci_vector_use_one(VirtIOPCIProxy *proxy, int queue_no) + { ++ unsigned int vector; ++ int ret; ++ EventNotifier *n; + PCIDevice *dev = &proxy->pci_dev; + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); +- unsigned int vector; +- int ret, queue_no; +- EventNotifier *n; +- for (queue_no = 0; queue_no < nvqs; queue_no++) { +- if (!virtio_queue_get_num(vdev, queue_no)) { +- break; +- } +- ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); +- if (ret < 0) { +- break; +- } +- if (vector >= msix_nr_vectors_allocated(dev)) { +- continue; +- } +- ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector); ++ ++ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); ++ if (ret < 0) { ++ return ret; ++ } ++ if (vector >= msix_nr_vectors_allocated(dev)) { ++ return 0; ++ } ++ ret = kvm_virtio_pci_vq_vector_use(proxy, vector); ++ if (ret < 0) { ++ goto undo; ++ } ++ /* ++ * If guest supports masking, set up irqfd now. ++ * Otherwise, delay until unmasked in the frontend. ++ */ ++ if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { ++ ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); + if (ret < 0) { ++ kvm_virtio_pci_vq_vector_release(proxy, vector); + goto undo; + } +- /* If guest supports masking, set up irqfd now. +- * Otherwise, delay until unmasked in the frontend. +- */ +- if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { +- ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); +- if (ret < 0) { +- kvm_virtio_pci_vq_vector_release(proxy, vector); +- goto undo; +- } +- } + } +- return 0; + ++ return 0; + undo: +- while (--queue_no >= 0) { +- vector = virtio_queue_vector(vdev, queue_no); +- if (vector >= msix_nr_vectors_allocated(dev)) { +- continue; ++ ++ vector = virtio_queue_vector(vdev, queue_no); ++ if (vector >= msix_nr_vectors_allocated(dev)) { ++ return ret; ++ } ++ if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { ++ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); ++ if (ret < 0) { ++ return ret; + } +- if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { +- ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); +- if (ret < 0) { +- break; +- } +- kvm_virtio_pci_irqfd_release(proxy, n, vector); ++ kvm_virtio_pci_irqfd_release(proxy, n, vector); ++ } ++ return ret; ++} ++static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) ++{ ++ int queue_no; ++ int ret = 0; ++ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); ++ ++ for (queue_no = 0; queue_no < nvqs; queue_no++) { ++ if (!virtio_queue_get_num(vdev, queue_no)) { ++ return -1; + } +- kvm_virtio_pci_vq_vector_release(proxy, vector); ++ ret = kvm_virtio_pci_vector_use_one(proxy, queue_no); + } + return ret; + } + +-static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) ++ ++static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy, ++ int queue_no) + { +- PCIDevice *dev = &proxy->pci_dev; + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + unsigned int vector; +- int queue_no; +- VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); + EventNotifier *n; +- int ret ; ++ int ret; ++ VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); ++ PCIDevice *dev = &proxy->pci_dev; ++ ++ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); ++ if (ret < 0) { ++ return; ++ } ++ if (vector >= msix_nr_vectors_allocated(dev)) { ++ return; ++ } ++ if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { ++ kvm_virtio_pci_irqfd_release(proxy, n, vector); ++ } ++ kvm_virtio_pci_vq_vector_release(proxy, vector); ++} ++ ++static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) ++{ ++ int queue_no; ++ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); ++ + for (queue_no = 0; queue_no < nvqs; queue_no++) { + if (!virtio_queue_get_num(vdev, queue_no)) { + break; + } +- ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); +- if (ret < 0) { +- break; +- } +- if (vector >= msix_nr_vectors_allocated(dev)) { +- continue; +- } +- /* If guest supports masking, clean up irqfd now. +- * Otherwise, it was cleaned when masked in the frontend. +- */ +- if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { +- kvm_virtio_pci_irqfd_release(proxy, n, vector); +- } +- kvm_virtio_pci_vq_vector_release(proxy, vector); ++ kvm_virtio_pci_vector_release_one(proxy, queue_no); + } + } + +-- +2.31.1 + diff --git a/SOURCES/kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch b/SOURCES/kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch new file mode 100644 index 0000000..0555a68 --- /dev/null +++ b/SOURCES/kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch @@ -0,0 +1,53 @@ +From 35ffe28a91a2ef08dd181d1a22695050ccbb6995 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 9 Jan 2023 16:04:43 +0000 +Subject: [PATCH 1/2] virtio-rng-pci: fix migration compat for vectors + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 131: virtio-rng-pci: fix migration compat for vectors +RH-Bugzilla: 2155749 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Thomas Huth +RH-Commit: [1/1] 1a866491dd191b073d71ae1aa5f4d76ee885de6d (dagrh/c-9-s-qemu-kvm) + +Fixup the migration compatibility for existing machine types +so that they do not enable msi-x. + +Symptom: + +(qemu) qemu: get_pci_config_device: Bad config data: i=0x34 read: 84 device: 98 cmask: ff wmask: 0 w1cmask:0 +qemu: Failed to load PCIDevice:config +qemu: Failed to load virtio-rng:virtio +qemu: error while loading state for instance 0x0 of device '0000:00:03.0/virtio-rng' +qemu: load of migration failed: Invalid argument + +Note: This fix will break migration from 7.2->7.2-fixed with this patch + +bz: https://bugzilla.redhat.com/show_bug.cgi?id=2155749 +Fixes: 9ea02e8f1 ("virtio-rng-pci: Allow setting nvectors, so we can use MSI-X") + +This downstream fix is the equivalent of an upstream fix I've posted to +the 7.2 machine type compatibility. + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/core/machine.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 3d851d34da..7adbac6f87 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -56,6 +56,8 @@ GlobalProperty hw_compat_rhel_9_1[] = { + { "nvme-ns", "eui64-default", "on"}, + /* hw_compat_rhel_9_1 from hw_compat_7_1 */ + { "virtio-device", "queue_reset", "false" }, ++ /* hw_compat_rhel_9_1 bz 2155749 */ ++ { "virtio-rng-pci", "vectors", "0" }, + }; + const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1); + +-- +2.31.1 + diff --git a/SOURCES/kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch b/SOURCES/kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch new file mode 100644 index 0000000..e5288d6 --- /dev/null +++ b/SOURCES/kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch @@ -0,0 +1,47 @@ +From 5413b8825db6eecc6f245854a6bce58e4dee3294 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 7 Feb 2023 17:57:39 +0000 +Subject: [PATCH 20/20] virtio-rng-pci: fix transitional migration compat for + vectors + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 147: virtio-rng-pci: fix transitional migration compat for vectors +RH-Bugzilla: 2162569 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Thomas Huth +RH-Acked-by: Gerd Hoffmann +RH-Commit: [1/1] 6e2bd111cd56808fccf2c0464a40f7784fd893a2 (dagrh/c-9-s-qemu-kvm) + +In upstream bad9c5a5166/downstream 46e08bafe9ed I fixed the virito-rng-pci +migration compatibility, but it was discovered that we also need to fix +the other aliases of the device for the transitional cases. + +I've sent upstream: +https://lists.gnu.org/archive/html/qemu-devel/2023-02/msg01926.html +but downstream we need to change the downstream machine type anyway, +so it's not quite identical. + +Fixes: 9ea02e8f1 ('virtio-rng-pci: Allow setting nvectors, so we can use MSI-X') + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/core/machine.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 7adbac6f87..3ee638394b 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -58,6 +58,9 @@ GlobalProperty hw_compat_rhel_9_1[] = { + { "virtio-device", "queue_reset", "false" }, + /* hw_compat_rhel_9_1 bz 2155749 */ + { "virtio-rng-pci", "vectors", "0" }, ++ /* hw_compat_rhel_9_1 bz 2162569 */ ++ { "virtio-rng-pci-transitional", "vectors", "0" }, ++ { "virtio-rng-pci-non-transitional", "vectors", "0" }, + }; + const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1); + +-- +2.31.1 + diff --git a/SOURCES/kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch b/SOURCES/kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch deleted file mode 100644 index 897e04c..0000000 --- a/SOURCES/kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 975af1b9f1811e113e1babd928ae70f8e4ebefb5 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 17 May 2022 09:28:19 +0100 -Subject: [PATCH 13/16] virtio-scsi: clean up virtio_scsi_handle_cmd_vq() - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads -RH-Commit: [5/6] 27b0225783fa9bbb8fe5ee692bd3f0a888d49d07 (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 2079347 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefano Garzarella - -virtio_scsi_handle_cmd_vq() is only called from hw/scsi/virtio-scsi.c -now and its return value is no longer used. Remove the function -prototype from virtio-scsi.h and drop the return value. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Paolo Bonzini -Message-id: 20220427143541.119567-6-stefanha@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit ad482b57ef841b2d4883c5079d20ba44ff5e4b3e) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/virtio-scsi.c | 5 +---- - include/hw/virtio/virtio-scsi.h | 1 - - 2 files changed, 1 insertion(+), 5 deletions(-) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index a47033d91d..df5ff8bab7 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -685,12 +685,11 @@ static void virtio_scsi_handle_cmd_req_submit(VirtIOSCSI *s, VirtIOSCSIReq *req) - scsi_req_unref(sreq); - } - --bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) -+static void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) - { - VirtIOSCSIReq *req, *next; - int ret = 0; - bool suppress_notifications = virtio_queue_get_notification(vq); -- bool progress = false; - - QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); - -@@ -700,7 +699,6 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) - } - - while ((req = virtio_scsi_pop_req(s, vq))) { -- progress = true; - ret = virtio_scsi_handle_cmd_req_prepare(s, req); - if (!ret) { - QTAILQ_INSERT_TAIL(&reqs, req, next); -@@ -725,7 +723,6 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) - QTAILQ_FOREACH_SAFE(req, &reqs, next, next) { - virtio_scsi_handle_cmd_req_submit(s, req); - } -- return progress; - } - - static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) -diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h -index 44dc3b81ec..2497530064 100644 ---- a/include/hw/virtio/virtio-scsi.h -+++ b/include/hw/virtio/virtio-scsi.h -@@ -151,7 +151,6 @@ void virtio_scsi_common_realize(DeviceState *dev, - Error **errp); - - void virtio_scsi_common_unrealize(DeviceState *dev); --bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq); - void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req); - void virtio_scsi_free_req(VirtIOSCSIReq *req); - void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch b/SOURCES/kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch deleted file mode 100644 index 30f012f..0000000 --- a/SOURCES/kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch +++ /dev/null @@ -1,65 +0,0 @@ -From c6e16a7a5a18ec2bc4f8a6f5cc1c887e18b16cdf Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 17 May 2022 09:28:12 +0100 -Subject: [PATCH 12/16] virtio-scsi: clean up virtio_scsi_handle_ctrl_vq() - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads -RH-Commit: [4/6] ca3751b7bfad5163c5b1c81b8525936a848d42ea (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 2079347 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefano Garzarella - -virtio_scsi_handle_ctrl_vq() is only called from hw/scsi/virtio-scsi.c -now and its return value is no longer used. Remove the function -prototype from virtio-scsi.h and drop the return value. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Paolo Bonzini -Message-id: 20220427143541.119567-5-stefanha@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 73b3b49f1880f236b4d0ffd7efb00280c05a5fab) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/virtio-scsi.c | 5 +---- - include/hw/virtio/virtio-scsi.h | 1 - - 2 files changed, 1 insertion(+), 5 deletions(-) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index dd2185b943..a47033d91d 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -460,16 +460,13 @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req) - } - } - --bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq) -+static void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq) - { - VirtIOSCSIReq *req; -- bool progress = false; - - while ((req = virtio_scsi_pop_req(s, vq))) { -- progress = true; - virtio_scsi_handle_ctrl_req(s, req); - } -- return progress; - } - - /* -diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h -index 5957597825..44dc3b81ec 100644 ---- a/include/hw/virtio/virtio-scsi.h -+++ b/include/hw/virtio/virtio-scsi.h -@@ -152,7 +152,6 @@ void virtio_scsi_common_realize(DeviceState *dev, - - void virtio_scsi_common_unrealize(DeviceState *dev); - bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq); --bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq); - void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req); - void virtio_scsi_free_req(VirtIOSCSIReq *req); - void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch b/SOURCES/kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch deleted file mode 100644 index bfdd39b..0000000 --- a/SOURCES/kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 019d5a0ca5d13f837a59b9e2815e2fd7ac120807 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 17 May 2022 09:28:06 +0100 -Subject: [PATCH 11/16] virtio-scsi: clean up virtio_scsi_handle_event_vq() - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads -RH-Commit: [3/6] f8dbc4c1991c61e4cf8dea50942c3cd509c9c4bd (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 2079347 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefano Garzarella - -virtio_scsi_handle_event_vq() is only called from hw/scsi/virtio-scsi.c -now and its return value is no longer used. Remove the function -prototype from virtio-scsi.h and drop the return value. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Paolo Bonzini -Message-id: 20220427143541.119567-4-stefanha@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 37ce2de95169dacab3fb53d11bd4509b9c2e3a4c) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/virtio-scsi.c | 4 +--- - include/hw/virtio/virtio-scsi.h | 1 - - 2 files changed, 1 insertion(+), 4 deletions(-) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 7b69eeed64..dd2185b943 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -856,13 +856,11 @@ void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, - virtio_scsi_complete_req(req); - } - --bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq) -+static void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq) - { - if (s->events_dropped) { - virtio_scsi_push_event(s, NULL, VIRTIO_SCSI_T_NO_EVENT, 0); -- return true; - } -- return false; - } - - static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq) -diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h -index 543681bc18..5957597825 100644 ---- a/include/hw/virtio/virtio-scsi.h -+++ b/include/hw/virtio/virtio-scsi.h -@@ -151,7 +151,6 @@ void virtio_scsi_common_realize(DeviceState *dev, - Error **errp); - - void virtio_scsi_common_unrealize(DeviceState *dev); --bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq); - bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq); - bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq); - void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req); --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch b/SOURCES/kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch deleted file mode 100644 index 5ba11a2..0000000 --- a/SOURCES/kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 1b609b2af303fb6498b2ef94ac4f2e900dc8c1b2 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 17 May 2022 09:27:45 +0100 -Subject: [PATCH 10/16] virtio-scsi: don't waste CPU polling the event - virtqueue - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads -RH-Commit: [2/6] 7e613d9b9fa8ceb668c78cb3ce7ebe1d73a004b5 (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 2079347 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefano Garzarella - -The virtio-scsi event virtqueue is not emptied by its handler function. -This is typical for rx virtqueues where the device uses buffers when -some event occurs (e.g. a packet is received, an error condition -happens, etc). - -Polling non-empty virtqueues wastes CPU cycles. We are not waiting for -new buffers to become available, we are waiting for an event to occur, -so it's a misuse of CPU resources to poll for buffers. - -Introduce the new virtio_queue_aio_attach_host_notifier_no_poll() API, -which is identical to virtio_queue_aio_attach_host_notifier() except -that it does not poll the virtqueue. - -Before this patch the following command-line consumed 100% CPU in the -IOThread polling and calling virtio_scsi_handle_event(): - - $ qemu-system-x86_64 -M accel=kvm -m 1G -cpu host \ - --object iothread,id=iothread0 \ - --device virtio-scsi-pci,iothread=iothread0 \ - --blockdev file,filename=test.img,aio=native,cache.direct=on,node-name=drive0 \ - --device scsi-hd,drive=drive0 - -After this patch CPU is no longer wasted. - -Reported-by: Nir Soffer -Signed-off-by: Stefan Hajnoczi -Tested-by: Nir Soffer -Message-id: 20220427143541.119567-3-stefanha@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 38738f7dbbda90fbc161757b7f4be35b52205552) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/virtio-scsi-dataplane.c | 2 +- - hw/virtio/virtio.c | 13 +++++++++++++ - include/hw/virtio/virtio.h | 1 + - 3 files changed, 15 insertions(+), 1 deletion(-) - -diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c -index 29575cbaf6..8bb6e6acfc 100644 ---- a/hw/scsi/virtio-scsi-dataplane.c -+++ b/hw/scsi/virtio-scsi-dataplane.c -@@ -138,7 +138,7 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) - - aio_context_acquire(s->ctx); - virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx); -- virtio_queue_aio_attach_host_notifier(vs->event_vq, s->ctx); -+ virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx); - - for (i = 0; i < vs->conf.num_queues; i++) { - virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx); -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index 9d637e043e..67a873f54a 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -3534,6 +3534,19 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) - virtio_queue_host_notifier_aio_poll_end); - } - -+/* -+ * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use -+ * this for rx virtqueues and similar cases where the virtqueue handler -+ * function does not pop all elements. When the virtqueue is left non-empty -+ * polling consumes CPU cycles and should not be used. -+ */ -+void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx) -+{ -+ aio_set_event_notifier(ctx, &vq->host_notifier, true, -+ virtio_queue_host_notifier_read, -+ NULL, NULL); -+} -+ - void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx) - { - aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL); -diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h -index b31c4507f5..b62a35fdca 100644 ---- a/include/hw/virtio/virtio.h -+++ b/include/hw/virtio/virtio.h -@@ -317,6 +317,7 @@ EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq); - void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled); - void virtio_queue_host_notifier_read(EventNotifier *n); - void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx); -+void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx); - void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx); - VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector); - VirtQueue *virtio_vector_next_queue(VirtQueue *vq); --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch b/SOURCES/kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch deleted file mode 100644 index 1f22ba0..0000000 --- a/SOURCES/kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch +++ /dev/null @@ -1,119 +0,0 @@ -From 5aaf33dbbbc89d58a52337985641723b9ee13541 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Wed, 27 Apr 2022 15:35:36 +0100 -Subject: [PATCH 09/16] virtio-scsi: fix ctrl and event handler functions in - dataplane mode - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads -RH-Commit: [1/6] 3087889041b960f14a6b3893243f78523a78f637 (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 2079347 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefano Garzarella - -Commit f34e8d8b8d48d73f36a67b6d5e492ef9784b5012 ("virtio-scsi: prepare -virtio_scsi_handle_cmd for dataplane") prepared the virtio-scsi cmd -virtqueue handler function to be used in both the dataplane and -non-datpalane code paths. - -It failed to convert the ctrl and event virtqueue handler functions, -which are not designed to be called from the dataplane code path but -will be since the ioeventfd is set up for those virtqueues when -dataplane starts. - -Convert the ctrl and event virtqueue handler functions now so they -operate correctly when called from the dataplane code path. Avoid code -duplication by extracting this code into a helper function. - -Fixes: f34e8d8b8d48d73f36a67b6d5e492ef9784b5012 ("virtio-scsi: prepare virtio_scsi_handle_cmd for dataplane") -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Paolo Bonzini -Message-id: 20220427143541.119567-2-stefanha@redhat.com -[Fixed s/by used/be used/ typo pointed out by Michael Tokarev -. ---Stefan] -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 2f743ef6366c2df4ef51ef3ae318138cdc0125ab) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/virtio-scsi.c | 42 +++++++++++++++++++++++++++--------------- - 1 file changed, 27 insertions(+), 15 deletions(-) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 7f6da33a8a..7b69eeed64 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -472,16 +472,32 @@ bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq) - return progress; - } - -+/* -+ * If dataplane is configured but not yet started, do so now and return true on -+ * success. -+ * -+ * Dataplane is started by the core virtio code but virtqueue handler functions -+ * can also be invoked when a guest kicks before DRIVER_OK, so this helper -+ * function helps us deal with manually starting ioeventfd in that case. -+ */ -+static bool virtio_scsi_defer_to_dataplane(VirtIOSCSI *s) -+{ -+ if (!s->ctx || s->dataplane_started) { -+ return false; -+ } -+ -+ virtio_device_start_ioeventfd(&s->parent_obj.parent_obj); -+ return !s->dataplane_fenced; -+} -+ - static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) - { - VirtIOSCSI *s = (VirtIOSCSI *)vdev; - -- if (s->ctx) { -- virtio_device_start_ioeventfd(vdev); -- if (!s->dataplane_fenced) { -- return; -- } -+ if (virtio_scsi_defer_to_dataplane(s)) { -+ return; - } -+ - virtio_scsi_acquire(s); - virtio_scsi_handle_ctrl_vq(s, vq); - virtio_scsi_release(s); -@@ -720,12 +736,10 @@ static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) - /* use non-QOM casts in the data path */ - VirtIOSCSI *s = (VirtIOSCSI *)vdev; - -- if (s->ctx && !s->dataplane_started) { -- virtio_device_start_ioeventfd(vdev); -- if (!s->dataplane_fenced) { -- return; -- } -+ if (virtio_scsi_defer_to_dataplane(s)) { -+ return; - } -+ - virtio_scsi_acquire(s); - virtio_scsi_handle_cmd_vq(s, vq); - virtio_scsi_release(s); -@@ -855,12 +869,10 @@ static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq) - { - VirtIOSCSI *s = VIRTIO_SCSI(vdev); - -- if (s->ctx) { -- virtio_device_start_ioeventfd(vdev); -- if (!s->dataplane_fenced) { -- return; -- } -+ if (virtio_scsi_defer_to_dataplane(s)) { -+ return; - } -+ - virtio_scsi_acquire(s); - virtio_scsi_handle_event_vq(s, vq); - virtio_scsi_release(s); --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-scsi-fix-race-in-virtio_scsi_dataplane_start.patch b/SOURCES/kvm-virtio-scsi-fix-race-in-virtio_scsi_dataplane_start.patch deleted file mode 100644 index 8f1fb3e..0000000 --- a/SOURCES/kvm-virtio-scsi-fix-race-in-virtio_scsi_dataplane_start.patch +++ /dev/null @@ -1,117 +0,0 @@ -From cbcab5ed1686fddeb2c6adb3a3f6ed0678a36e71 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 8 Aug 2022 12:21:34 -0400 -Subject: [PATCH 23/23] virtio-scsi: fix race in virtio_scsi_dataplane_start() - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 211: virtio-scsi: fix race in virtio_scsi_dataplane_start() (RHEL src-git) -RH-Commit: [1/1] 2d4964d8863e259326a73fb918fa2f5f63b4a60a -RH-Bugzilla: 2099541 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Reitz -RH-Acked-by: Paolo Bonzini - -As soon as virtio_scsi_data_plane_start() attaches host notifiers the -IOThread may start virtqueue processing. There is a race between -IOThread virtqueue processing and virtio_scsi_data_plane_start() because -it only assigns s->dataplane_started after attaching host notifiers. - -When a virtqueue handler function in the IOThread calls -virtio_scsi_defer_to_dataplane() it may see !s->dataplane_started and -attempt to start dataplane even though we're already in the IOThread: - - #0 0x00007f67b360857c __pthread_kill_implementation (libc.so.6 + 0xa257c) - #1 0x00007f67b35bbd56 raise (libc.so.6 + 0x55d56) - #2 0x00007f67b358e833 abort (libc.so.6 + 0x28833) - #3 0x00007f67b358e75b __assert_fail_base.cold (libc.so.6 + 0x2875b) - #4 0x00007f67b35b4cd6 __assert_fail (libc.so.6 + 0x4ecd6) - #5 0x000055ca87fd411b memory_region_transaction_commit (qemu-kvm + 0x67511b) - #6 0x000055ca87e17811 virtio_pci_ioeventfd_assign (qemu-kvm + 0x4b8811) - #7 0x000055ca87e14836 virtio_bus_set_host_notifier (qemu-kvm + 0x4b5836) - #8 0x000055ca87f8e14e virtio_scsi_set_host_notifier (qemu-kvm + 0x62f14e) - #9 0x000055ca87f8dd62 virtio_scsi_dataplane_start (qemu-kvm + 0x62ed62) - #10 0x000055ca87e14610 virtio_bus_start_ioeventfd (qemu-kvm + 0x4b5610) - #11 0x000055ca87f8c29a virtio_scsi_handle_ctrl (qemu-kvm + 0x62d29a) - #12 0x000055ca87fa5902 virtio_queue_host_notifier_read (qemu-kvm + 0x646902) - #13 0x000055ca882c099e aio_dispatch_handler (qemu-kvm + 0x96199e) - #14 0x000055ca882c1761 aio_poll (qemu-kvm + 0x962761) - #15 0x000055ca880e1052 iothread_run (qemu-kvm + 0x782052) - #16 0x000055ca882c562a qemu_thread_start (qemu-kvm + 0x96662a) - -This patch assigns s->dataplane_started before attaching host notifiers -so that virtqueue handler functions that run in the IOThread before -virtio_scsi_data_plane_start() returns correctly identify that dataplane -does not need to be started. This fix is taken from the virtio-blk -dataplane code and it's worth adding a comment in virtio-blk as well to -explain why it works. - -Note that s->dataplane_started does not need the AioContext lock because -it is set before attaching host notifiers and cleared after detaching -host notifiers. In other words, the IOThread always sees the value true -and the main loop thread does not modify it while the IOThread is -active. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2099541 -Reported-by: Qing Wang -Signed-off-by: Stefan Hajnoczi -Message-Id: <20220808162134.240405-1-stefanha@redhat.com> -Reviewed-by: Emanuele Giuseppe Esposito -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 9a4b6a63aee885931622549c85669dcca03bed39) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Miroslav Rezanina ---- - hw/block/dataplane/virtio-blk.c | 5 +++++ - hw/scsi/virtio-scsi-dataplane.c | 11 ++++++++--- - 2 files changed, 13 insertions(+), 3 deletions(-) - -diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c -index 49276e46f2..26f965cabc 100644 ---- a/hw/block/dataplane/virtio-blk.c -+++ b/hw/block/dataplane/virtio-blk.c -@@ -219,6 +219,11 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - - memory_region_transaction_commit(); - -+ /* -+ * These fields are visible to the IOThread so we rely on implicit barriers -+ * in aio_context_acquire() on the write side and aio_notify_accept() on -+ * the read side. -+ */ - s->starting = false; - vblk->dataplane_started = true; - trace_virtio_blk_data_plane_start(s); -diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c -index 8bb6e6acfc..20bb91766e 100644 ---- a/hw/scsi/virtio-scsi-dataplane.c -+++ b/hw/scsi/virtio-scsi-dataplane.c -@@ -136,6 +136,14 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) - - memory_region_transaction_commit(); - -+ /* -+ * These fields are visible to the IOThread so we rely on implicit barriers -+ * in aio_context_acquire() on the write side and aio_notify_accept() on -+ * the read side. -+ */ -+ s->dataplane_starting = false; -+ s->dataplane_started = true; -+ - aio_context_acquire(s->ctx); - virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx); - virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx); -@@ -143,9 +151,6 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) - for (i = 0; i < vs->conf.num_queues; i++) { - virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx); - } -- -- s->dataplane_starting = false; -- s->dataplane_started = true; - aio_context_release(s->ctx); - return 0; - --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch b/SOURCES/kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch deleted file mode 100644 index 8487f5c..0000000 --- a/SOURCES/kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch +++ /dev/null @@ -1,168 +0,0 @@ -From 6603f216dbc07a1d221b1665409cfec6cc9960e2 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 17 May 2022 09:28:26 +0100 -Subject: [PATCH 14/16] virtio-scsi: move request-related items from .h to .c - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads -RH-Commit: [6/6] ecdf5289abd04062c85c5ed8e577a5249684a3b0 (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 2079347 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefano Garzarella - -There is no longer a need to expose the request and related APIs in -virtio-scsi.h since there are no callers outside virtio-scsi.c. - -Note the block comment in VirtIOSCSIReq has been adjusted to meet the -coding style. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Paolo Bonzini -Message-id: 20220427143541.119567-7-stefanha@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 3dc584abeef0e1277c2de8c1c1974cb49444eb0a) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/virtio-scsi.c | 45 ++++++++++++++++++++++++++++++--- - include/hw/virtio/virtio-scsi.h | 40 ----------------------------- - 2 files changed, 41 insertions(+), 44 deletions(-) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index df5ff8bab7..2450c9438c 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -29,6 +29,43 @@ - #include "hw/virtio/virtio-access.h" - #include "trace.h" - -+typedef struct VirtIOSCSIReq { -+ /* -+ * Note: -+ * - fields up to resp_iov are initialized by virtio_scsi_init_req; -+ * - fields starting at vring are zeroed by virtio_scsi_init_req. -+ */ -+ VirtQueueElement elem; -+ -+ VirtIOSCSI *dev; -+ VirtQueue *vq; -+ QEMUSGList qsgl; -+ QEMUIOVector resp_iov; -+ -+ union { -+ /* Used for two-stage request submission */ -+ QTAILQ_ENTRY(VirtIOSCSIReq) next; -+ -+ /* Used for cancellation of request during TMFs */ -+ int remaining; -+ }; -+ -+ SCSIRequest *sreq; -+ size_t resp_size; -+ enum SCSIXferMode mode; -+ union { -+ VirtIOSCSICmdResp cmd; -+ VirtIOSCSICtrlTMFResp tmf; -+ VirtIOSCSICtrlANResp an; -+ VirtIOSCSIEvent event; -+ } resp; -+ union { -+ VirtIOSCSICmdReq cmd; -+ VirtIOSCSICtrlTMFReq tmf; -+ VirtIOSCSICtrlANReq an; -+ } req; -+} VirtIOSCSIReq; -+ - static inline int virtio_scsi_get_lun(uint8_t *lun) - { - return ((lun[2] << 8) | lun[3]) & 0x3FFF; -@@ -45,7 +82,7 @@ static inline SCSIDevice *virtio_scsi_device_get(VirtIOSCSI *s, uint8_t *lun) - return scsi_device_get(&s->bus, 0, lun[1], virtio_scsi_get_lun(lun)); - } - --void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req) -+static void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req) - { - VirtIODevice *vdev = VIRTIO_DEVICE(s); - const size_t zero_skip = -@@ -58,7 +95,7 @@ void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req) - memset((uint8_t *)req + zero_skip, 0, sizeof(*req) - zero_skip); - } - --void virtio_scsi_free_req(VirtIOSCSIReq *req) -+static void virtio_scsi_free_req(VirtIOSCSIReq *req) - { - qemu_iovec_destroy(&req->resp_iov); - qemu_sglist_destroy(&req->qsgl); -@@ -801,8 +838,8 @@ static void virtio_scsi_reset(VirtIODevice *vdev) - s->events_dropped = false; - } - --void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, -- uint32_t event, uint32_t reason) -+static void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, -+ uint32_t event, uint32_t reason) - { - VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); - VirtIOSCSIReq *req; -diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h -index 2497530064..abdda2cbd0 100644 ---- a/include/hw/virtio/virtio-scsi.h -+++ b/include/hw/virtio/virtio-scsi.h -@@ -94,42 +94,6 @@ struct VirtIOSCSI { - uint32_t host_features; - }; - --typedef struct VirtIOSCSIReq { -- /* Note: -- * - fields up to resp_iov are initialized by virtio_scsi_init_req; -- * - fields starting at vring are zeroed by virtio_scsi_init_req. -- * */ -- VirtQueueElement elem; -- -- VirtIOSCSI *dev; -- VirtQueue *vq; -- QEMUSGList qsgl; -- QEMUIOVector resp_iov; -- -- union { -- /* Used for two-stage request submission */ -- QTAILQ_ENTRY(VirtIOSCSIReq) next; -- -- /* Used for cancellation of request during TMFs */ -- int remaining; -- }; -- -- SCSIRequest *sreq; -- size_t resp_size; -- enum SCSIXferMode mode; -- union { -- VirtIOSCSICmdResp cmd; -- VirtIOSCSICtrlTMFResp tmf; -- VirtIOSCSICtrlANResp an; -- VirtIOSCSIEvent event; -- } resp; -- union { -- VirtIOSCSICmdReq cmd; -- VirtIOSCSICtrlTMFReq tmf; -- VirtIOSCSICtrlANReq an; -- } req; --} VirtIOSCSIReq; -- - static inline void virtio_scsi_acquire(VirtIOSCSI *s) - { - if (s->ctx) { -@@ -151,10 +115,6 @@ void virtio_scsi_common_realize(DeviceState *dev, - Error **errp); - - void virtio_scsi_common_unrealize(DeviceState *dev); --void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req); --void virtio_scsi_free_req(VirtIOSCSIReq *req); --void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, -- uint32_t event, uint32_t reason); - - void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp); - int virtio_scsi_dataplane_start(VirtIODevice *s); --- -2.31.1 - diff --git a/SOURCES/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch b/SOURCES/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch new file mode 100644 index 0000000..c951897 --- /dev/null +++ b/SOURCES/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch @@ -0,0 +1,325 @@ +From c64027b1ff9856031c01009f4b5c3560d92cc998 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 21 Feb 2023 16:22:18 -0500 +Subject: [PATCH 03/12] virtio-scsi: reset SCSI devices from main loop thread + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread +RH-Bugzilla: 2155748 +RH-Acked-by: Eric Blake +RH-Acked-by: Kevin Wolf +RH-Acked-by: Laszlo Ersek +RH-Commit: [3/3] 2a29cb9600709a799daadb4addb58a747ed2e3a3 (stefanha/centos-stream-qemu-kvm) + +When an IOThread is configured, the ctrl virtqueue is processed in the +IOThread. TMFs that reset SCSI devices are currently called directly +from the IOThread and trigger an assertion failure in blk_drain() from +the following call stack: + +virtio_scsi_handle_ctrl_req -> virtio_scsi_do_tmf -> device_code_reset +-> scsi_disk_reset -> scsi_device_purge_requests -> blk_drain + + ../block/block-backend.c:1780: void blk_drain(BlockBackend *): Assertion `qemu_in_main_thread()' failed. + +The blk_drain() function is not designed to be called from an IOThread +because it needs the Big QEMU Lock (BQL). + +This patch defers TMFs that reset SCSI devices to a Bottom Half (BH) +that runs in the main loop thread under the BQL. This way it's safe to +call blk_drain() and the assertion failure is avoided. + +Introduce s->tmf_bh_list for tracking TMF requests that have been +deferred to the BH. When the BH runs it will grab the entire list and +process all requests. Care must be taken to clear the list when the +virtio-scsi device is reset or unrealized. Otherwise deferred TMF +requests could execute later and lead to use-after-free or other +undefined behavior. + +The s->resetting counter that's used by TMFs that reset SCSI devices is +accessed from multiple threads. This patch makes that explicit by using +atomic accessor functions. With this patch applied the counter is only +modified by the main loop thread under the BQL but can be read by any +thread. + +Reported-by: Qing Wang +Cc: Paolo Bonzini +Reviewed-by: Eric Blake +Signed-off-by: Stefan Hajnoczi +Message-Id: <20230221212218.1378734-4-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit be2c42b97c3a3a395b2f05bad1b6c7de20ecf2a5) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/virtio-scsi.c | 169 +++++++++++++++++++++++++------- + include/hw/virtio/virtio-scsi.h | 11 ++- + 2 files changed, 143 insertions(+), 37 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 6f6e2e32ba..7d27e4c2a1 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -42,13 +42,11 @@ typedef struct VirtIOSCSIReq { + QEMUSGList qsgl; + QEMUIOVector resp_iov; + +- union { +- /* Used for two-stage request submission */ +- QTAILQ_ENTRY(VirtIOSCSIReq) next; ++ /* Used for two-stage request submission and TMFs deferred to BH */ ++ QTAILQ_ENTRY(VirtIOSCSIReq) next; + +- /* Used for cancellation of request during TMFs */ +- int remaining; +- }; ++ /* Used for cancellation of request during TMFs */ ++ int remaining; + + SCSIRequest *sreq; + size_t resp_size; +@@ -293,6 +291,122 @@ static inline void virtio_scsi_ctx_check(VirtIOSCSI *s, SCSIDevice *d) + } + } + ++static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req) ++{ ++ VirtIOSCSI *s = req->dev; ++ SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun); ++ BusChild *kid; ++ int target; ++ ++ switch (req->req.tmf.subtype) { ++ case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET: ++ if (!d) { ++ req->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET; ++ goto out; ++ } ++ if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) { ++ req->resp.tmf.response = VIRTIO_SCSI_S_INCORRECT_LUN; ++ goto out; ++ } ++ qatomic_inc(&s->resetting); ++ device_cold_reset(&d->qdev); ++ qatomic_dec(&s->resetting); ++ break; ++ ++ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: ++ target = req->req.tmf.lun[1]; ++ qatomic_inc(&s->resetting); ++ ++ rcu_read_lock(); ++ QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) { ++ SCSIDevice *d1 = SCSI_DEVICE(kid->child); ++ if (d1->channel == 0 && d1->id == target) { ++ device_cold_reset(&d1->qdev); ++ } ++ } ++ rcu_read_unlock(); ++ ++ qatomic_dec(&s->resetting); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ break; ++ } ++ ++out: ++ object_unref(OBJECT(d)); ++ ++ virtio_scsi_acquire(s); ++ virtio_scsi_complete_req(req); ++ virtio_scsi_release(s); ++} ++ ++/* Some TMFs must be processed from the main loop thread */ ++static void virtio_scsi_do_tmf_bh(void *opaque) ++{ ++ VirtIOSCSI *s = opaque; ++ QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); ++ VirtIOSCSIReq *req; ++ VirtIOSCSIReq *tmp; ++ ++ GLOBAL_STATE_CODE(); ++ ++ virtio_scsi_acquire(s); ++ ++ QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { ++ QTAILQ_REMOVE(&s->tmf_bh_list, req, next); ++ QTAILQ_INSERT_TAIL(&reqs, req, next); ++ } ++ ++ qemu_bh_delete(s->tmf_bh); ++ s->tmf_bh = NULL; ++ ++ virtio_scsi_release(s); ++ ++ QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) { ++ QTAILQ_REMOVE(&reqs, req, next); ++ virtio_scsi_do_one_tmf_bh(req); ++ } ++} ++ ++static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s) ++{ ++ VirtIOSCSIReq *req; ++ VirtIOSCSIReq *tmp; ++ ++ GLOBAL_STATE_CODE(); ++ ++ virtio_scsi_acquire(s); ++ ++ if (s->tmf_bh) { ++ qemu_bh_delete(s->tmf_bh); ++ s->tmf_bh = NULL; ++ } ++ ++ QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { ++ QTAILQ_REMOVE(&s->tmf_bh_list, req, next); ++ ++ /* SAM-6 6.3.2 Hard reset */ ++ req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE; ++ virtio_scsi_complete_req(req); ++ } ++ ++ virtio_scsi_release(s); ++} ++ ++static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req) ++{ ++ VirtIOSCSI *s = req->dev; ++ ++ QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next); ++ ++ if (!s->tmf_bh) { ++ s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s); ++ qemu_bh_schedule(s->tmf_bh); ++ } ++} ++ + /* Return 0 if the request is ready to be completed and return to guest; + * -EINPROGRESS if the request is submitted and will be completed later, in the + * case of async cancellation. */ +@@ -300,8 +414,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) + { + SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun); + SCSIRequest *r, *next; +- BusChild *kid; +- int target; + int ret = 0; + + virtio_scsi_ctx_check(s, d); +@@ -358,15 +470,9 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) + break; + + case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET: +- if (!d) { +- goto fail; +- } +- if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) { +- goto incorrect_lun; +- } +- s->resetting++; +- device_cold_reset(&d->qdev); +- s->resetting--; ++ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: ++ virtio_scsi_defer_tmf_to_bh(req); ++ ret = -EINPROGRESS; + break; + + case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET: +@@ -409,22 +515,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) + } + break; + +- case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: +- target = req->req.tmf.lun[1]; +- s->resetting++; +- +- rcu_read_lock(); +- QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) { +- SCSIDevice *d1 = SCSI_DEVICE(kid->child); +- if (d1->channel == 0 && d1->id == target) { +- device_cold_reset(&d1->qdev); +- } +- } +- rcu_read_unlock(); +- +- s->resetting--; +- break; +- + case VIRTIO_SCSI_T_TMF_CLEAR_ACA: + default: + req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_REJECTED; +@@ -654,7 +744,7 @@ static void virtio_scsi_request_cancelled(SCSIRequest *r) + if (!req) { + return; + } +- if (req->dev->resetting) { ++ if (qatomic_read(&req->dev->resetting)) { + req->resp.cmd.response = VIRTIO_SCSI_S_RESET; + } else { + req->resp.cmd.response = VIRTIO_SCSI_S_ABORTED; +@@ -830,9 +920,12 @@ static void virtio_scsi_reset(VirtIODevice *vdev) + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev); + + assert(!s->dataplane_started); +- s->resetting++; ++ ++ virtio_scsi_reset_tmf_bh(s); ++ ++ qatomic_inc(&s->resetting); + bus_cold_reset(BUS(&s->bus)); +- s->resetting--; ++ qatomic_dec(&s->resetting); + + vs->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE; + vs->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE; +@@ -1052,6 +1145,8 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) + VirtIOSCSI *s = VIRTIO_SCSI(dev); + Error *err = NULL; + ++ QTAILQ_INIT(&s->tmf_bh_list); ++ + virtio_scsi_common_realize(dev, + virtio_scsi_handle_ctrl, + virtio_scsi_handle_event, +@@ -1089,6 +1184,8 @@ static void virtio_scsi_device_unrealize(DeviceState *dev) + { + VirtIOSCSI *s = VIRTIO_SCSI(dev); + ++ virtio_scsi_reset_tmf_bh(s); ++ + qbus_set_hotplug_handler(BUS(&s->bus), NULL); + virtio_scsi_common_unrealize(dev); + } +diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h +index a36aad9c86..1c1cd77d6e 100644 +--- a/include/hw/virtio/virtio-scsi.h ++++ b/include/hw/virtio/virtio-scsi.h +@@ -75,13 +75,22 @@ struct VirtIOSCSICommon { + VirtQueue **cmd_vqs; + }; + ++struct VirtIOSCSIReq; ++ + struct VirtIOSCSI { + VirtIOSCSICommon parent_obj; + + SCSIBus bus; +- int resetting; ++ int resetting; /* written from main loop thread, read from any thread */ + bool events_dropped; + ++ /* ++ * TMFs deferred to main loop BH. These fields are protected by ++ * virtio_scsi_acquire(). ++ */ ++ QEMUBH *tmf_bh; ++ QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list; ++ + /* Fields for dataplane below */ + AioContext *ctx; /* one iothread per virtio-scsi-pci for now */ + +-- +2.39.1 + diff --git a/SOURCES/kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch b/SOURCES/kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch new file mode 100644 index 0000000..d797023 --- /dev/null +++ b/SOURCES/kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch @@ -0,0 +1,74 @@ +From 3f55d12df35552ae948587a62d6f9015664adc13 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Wed, 21 Dec 2022 12:50:12 +0100 +Subject: [PATCH 1/9] virtio_net: Modify virtio_net_get_config to early return +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 137: vDPA net SVQ guest announce support +RH-Bugzilla: 2141088 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Jason Wang +RH-Commit: [1/4] 4f5e79afd54e157f32e6fff56ae33e2b71492525 (eperezmartin/qemu-kvm) + +Next patches introduce more code on vhost-vdpa branch, with already have +too much indentation. + +Signed-off-by: Eugenio Pérez +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Michael S. Tsirkin +Acked-by: Jason Wang +Message-Id: <20221221115015.1400889-2-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit ebc141a62508dc91901373c1a19fe7e2cf560dfb) +--- + hw/net/virtio-net.c | 28 +++++++++++++++------------- + 1 file changed, 15 insertions(+), 13 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index ec974f7a76..5935e55653 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -168,20 +168,22 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) + if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { + ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, + n->config_size); +- if (ret != -1) { +- /* +- * Some NIC/kernel combinations present 0 as the mac address. As +- * that is not a legal address, try to proceed with the +- * address from the QEMU command line in the hope that the +- * address has been configured correctly elsewhere - just not +- * reported by the device. +- */ +- if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { +- info_report("Zero hardware mac address detected. Ignoring."); +- memcpy(netcfg.mac, n->mac, ETH_ALEN); +- } +- memcpy(config, &netcfg, n->config_size); ++ if (ret == -1) { ++ return; + } ++ ++ /* ++ * Some NIC/kernel combinations present 0 as the mac address. As that ++ * is not a legal address, try to proceed with the address from the ++ * QEMU command line in the hope that the address has been configured ++ * correctly elsewhere - just not reported by the device. ++ */ ++ if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { ++ info_report("Zero hardware mac address detected. Ignoring."); ++ memcpy(netcfg.mac, n->mac, ETH_ALEN); ++ } ++ ++ memcpy(config, &netcfg, n->config_size); + } + } + +-- +2.31.1 + diff --git a/SOURCES/kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch b/SOURCES/kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch new file mode 100644 index 0000000..866957c --- /dev/null +++ b/SOURCES/kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch @@ -0,0 +1,46 @@ +From b3d728b53abaae0c9884dfb5e9c216b1088196e3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Wed, 21 Dec 2022 12:50:13 +0100 +Subject: [PATCH 2/9] virtio_net: copy VIRTIO_NET_S_ANNOUNCE if device model + has it +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 137: vDPA net SVQ guest announce support +RH-Bugzilla: 2141088 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Jason Wang +RH-Commit: [2/4] fb04186829eb93bab3c9ececf90fa5b035ffa2ec (eperezmartin/qemu-kvm) + +Status part of the emulated feature. It will follow device model, so we +must copy it as long as NIC device model has it set. + +Signed-off-by: Eugenio Pérez +Message-Id: <20221221115015.1400889-3-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Acked-by: Jason Wang +(cherry picked from commit 4f93aafc8f9d731c6588f5dc5594c6a1dd1fbe66) +--- + hw/net/virtio-net.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 5935e55653..948bcf33cf 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -183,6 +183,8 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) + memcpy(netcfg.mac, n->mac, ETH_ALEN); + } + ++ netcfg.status |= virtio_tswap16(vdev, ++ n->status & VIRTIO_NET_S_ANNOUNCE); + memcpy(config, &netcfg, n->config_size); + } + } +-- +2.31.1 + diff --git a/SOURCES/qemu-ga.sysconfig b/SOURCES/qemu-ga.sysconfig index 67bad0c..a78b428 100644 --- a/SOURCES/qemu-ga.sysconfig +++ b/SOURCES/qemu-ga.sysconfig @@ -1,11 +1,11 @@ # This is a systemd environment file, not a shell script. # It provides settings for "/lib/systemd/system/qemu-guest-agent.service". -# Comma-separated blacklist of RPCs to disable, or empty list to enable all. +# Comma-separated blocked RPCs to disable, or empty list to enable all. # -# You can get the list of RPC commands using "qemu-ga --blacklist='?'". -# There should be no spaces between commas and commands in the blacklist. -BLACKLIST_RPC=guest-file-open,guest-file-close,guest-file-read,guest-file-write,guest-file-seek,guest-file-flush,guest-exec,guest-exec-status +# You can get the list of RPC commands using "qemu-ga --block-rpcs='?'". +# There should be no spaces between commas and commands in the block list. +BLOCK_RPCS=guest-file-open,guest-file-close,guest-file-read,guest-file-write,guest-file-seek,guest-file-flush,guest-exec,guest-exec-status # Fsfreeze hook script specification. # diff --git a/SOURCES/qemu-guest-agent.service b/SOURCES/qemu-guest-agent.service index b3157d5..244da02 100644 --- a/SOURCES/qemu-guest-agent.service +++ b/SOURCES/qemu-guest-agent.service @@ -10,7 +10,7 @@ EnvironmentFile=/etc/sysconfig/qemu-ga ExecStart=/usr/bin/qemu-ga \ --method=virtio-serial \ --path=/dev/virtio-ports/org.qemu.guest_agent.0 \ - --blacklist=${BLACKLIST_RPC} \ + --block-rpcs=${BLOCK_RPCS} \ -F${FSFREEZE_HOOK_PATHNAME} Restart=always RestartSec=0 diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec index 2e2997c..7b64c6d 100644 --- a/SPECS/qemu-kvm.spec +++ b/SPECS/qemu-kvm.spec @@ -1,7 +1,7 @@ %global libfdt_version 1.6.0 %global libseccomp_version 2.4.0 %global libusbx_version 1.0.23 -%global meson_version 0.58.2 +%global meson_version 0.61.3 %global usbredir_version 0.7.1 %global ipxe_version 20200823-5.git4bd064de @@ -113,16 +113,13 @@ Requires: %{name}-ui-opengl = %{epoch}:%{version}-%{release} \ Requires: %{name}-ui-egl-headless = %{epoch}:%{version}-%{release} \ %endif \ Requires: %{name}-device-display-virtio-gpu = %{epoch}:%{version}-%{release} \ -Requires: %{name}-device-display-virtio-gpu-gl = %{epoch}:%{version}-%{release} \ %ifarch s390x \ Requires: %{name}-device-display-virtio-gpu-ccw = %{epoch}:%{version}-%{release} \ %else \ Requires: %{name}-device-display-virtio-gpu-pci = %{epoch}:%{version}-%{release} \ -Requires: %{name}-device-display-virtio-gpu-pci-gl = %{epoch}:%{version}-%{release} \ %endif \ %ifarch x86_64 %{power64} \ Requires: %{name}-device-display-virtio-vga = %{epoch}:%{version}-%{release} \ -Requires: %{name}-device-display-virtio-vga-gl = %{epoch}:%{version}-%{release} \ %endif \ Requires: %{name}-device-usb-host = %{epoch}:%{version}-%{release} \ %if %{have_usbredir} \ @@ -150,8 +147,8 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 7.0.0 -Release: 13%{?rcrel}%{?dist}%{?cc_suffix}.2 +Version: 7.2.0 +Release: 14%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -186,320 +183,239 @@ Patch0010: 0010-Add-x86_64-machine-types.patch Patch0011: 0011-Enable-make-check.patch Patch0012: 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch Patch0013: 0013-Add-support-statement-to-help-output.patch -Patch0014: 0014-globally-limit-the-maximum-number-of-CPUs.patch -Patch0015: 0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch -Patch0016: 0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch -Patch0017: 0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch -Patch0018: 0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch -Patch0019: 0019-WRB-Introduce-RHEL-9.0.0-hw-compat-structure.patch -Patch0020: 0020-redhat-Update-s390x-machine-type-compatibility-for-r.patch -Patch0021: 0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch -# For bz#2044162 - [RHEL9.1] Enable virtio-mem as tech-preview on ARM64 QEMU -Patch22: kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch -# For bz#2081022 - Build regression on ppc64le with c9s qemu-kvm 7.0.0-1 changes -Patch23: kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch -# For bz#2046029 - [WRB] New machine type property - dtb-kaslr-seed -Patch24: kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch -# For bz#2046029 - [WRB] New machine type property - dtb-kaslr-seed -Patch25: kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch -# For bz#1477099 - virtio-iommu (including ACPI, VHOST/VFIO integration, migration support) -Patch26: kvm-Enable-virtio-iommu-pci-on-aarch64.patch -# For bz#2037612 - [Win11][tpm][QL41112 PF] vfio_listener_region_add received unaligned region -Patch27: kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch -# For bz#2037612 - [Win11][tpm][QL41112 PF] vfio_listener_region_add received unaligned region -Patch28: kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch -# For bz#2041823 - [aarch64][numa] When there are at least 6 Numa nodes serial log shows 'arch topology borken' -Patch29: kvm-qapi-machine.json-Add-cluster-id.patch -# For bz#2041823 - [aarch64][numa] When there are at least 6 Numa nodes serial log shows 'arch topology borken' -Patch30: kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch -# For bz#2041823 - [aarch64][numa] When there are at least 6 Numa nodes serial log shows 'arch topology borken' -Patch31: kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch -# For bz#2041823 - [aarch64][numa] When there are at least 6 Numa nodes serial log shows 'arch topology borken' -Patch32: kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch -# For bz#2041823 - [aarch64][numa] When there are at least 6 Numa nodes serial log shows 'arch topology borken' -Patch33: kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch -# For bz#2041823 - [aarch64][numa] When there are at least 6 Numa nodes serial log shows 'arch topology borken' -Patch34: kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch -# For bz#2079938 - qemu coredump when boot with multi disks (qemu) failed to set up stack guard page: Cannot allocate memory -Patch35: kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch -# For bz#2079938 - qemu coredump when boot with multi disks (qemu) failed to set up stack guard page: Cannot allocate memory -Patch36: kvm-coroutine-Revert-to-constant-batch-size.patch -# For bz#2079347 - Guest boot blocked when scsi disks using same iothread and 100% CPU consumption -Patch37: kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch -# For bz#2079347 - Guest boot blocked when scsi disks using same iothread and 100% CPU consumption -Patch38: kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch -# For bz#2079347 - Guest boot blocked when scsi disks using same iothread and 100% CPU consumption -Patch39: kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch -# For bz#2079347 - Guest boot blocked when scsi disks using same iothread and 100% CPU consumption -Patch40: kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch -# For bz#2079347 - Guest boot blocked when scsi disks using same iothread and 100% CPU consumption -Patch41: kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch -# For bz#2079347 - Guest boot blocked when scsi disks using same iothread and 100% CPU consumption -Patch42: kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch -# For bz#1995710 - RFE: Allow virtio-scsi CD-ROM media change with IOThreads -Patch43: kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch -# For bz#2064530 - Rebuild qemu-kvm with clang-14 -Patch44: kvm-migration-Fix-operator-type.patch -# For bz#1708300 - RFE: qemu-nbd vs NBD_FLAG_CAN_MULTI_CONN -Patch45: kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch -# For bz#1708300 - RFE: qemu-nbd vs NBD_FLAG_CAN_MULTI_CONN -Patch46: kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch -# For bz#2031024 - Add support for fixing thread pool size [QEMU] -Patch47: kvm-Introduce-event-loop-base-abstract-class.patch -# For bz#2031024 - Add support for fixing thread pool size [QEMU] -Patch48: kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch -# For bz#2031024 - Add support for fixing thread pool size [QEMU] -Patch49: kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch -# For bz#2072379 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) -Patch50: kvm-qcow2-Improve-refcount-structure-rebuilding.patch -# For bz#2072379 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) -Patch51: kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch -# For bz#2072379 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) -Patch52: kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch -# For bz#2072379 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) -Patch53: kvm-iotests-108-Fix-when-missing-user_allow_other.patch -# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA -Patch54: kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch -# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA -Patch55: kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch -# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA -Patch56: kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch -# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA -Patch57: kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch -# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA -Patch58: kvm-vhost-vdpa-backend-feature-should-set-only-once.patch -# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA -Patch59: kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch -# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA -Patch60: kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch -# For bz#2094270 - Do not set the hard vCPU limit to the soft vCPU limit in downstream qemu-kvm anymore -Patch61: kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch -# For bz#2086262 - [Win11][tpm]vfio_listener_region_del received unaligned region -Patch62: kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch -# For bz#1952483 - RFE: QEMU's coroutines fail with CFLAGS=-flto on non-x86_64 architectures -Patch63: kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch -# For bz#1952483 - RFE: QEMU's coroutines fail with CFLAGS=-flto on non-x86_64 architectures -Patch64: kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch -# For bz#1952483 - RFE: QEMU's coroutines fail with CFLAGS=-flto on non-x86_64 architectures -Patch65: kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch -# For bz#2094252 - Compile the virtio-iommu device on x86_64 -Patch66: kvm-Enable-virtio-iommu-pci-on-x86_64.patch -# For bz#2092788 - Stalled IO Operations in VM -Patch67: kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch -# For bz#2092788 - Stalled IO Operations in VM -Patch68: kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch -# For bz#2060839 - Consider deprecating CPU models like "kvm64" / "qemu64" on RHEL 9 -Patch69: kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch -# For bz#2060839 - Consider deprecating CPU models like "kvm64" / "qemu64" on RHEL 9 -Patch70: kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch -# For bz#2060839 - Consider deprecating CPU models like "kvm64" / "qemu64" on RHEL 9 -Patch71: kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch -# For bz#2060839 - Consider deprecating CPU models like "kvm64" / "qemu64" on RHEL 9 -Patch72: kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch -# For bz#2060839 - Consider deprecating CPU models like "kvm64" / "qemu64" on RHEL 9 -Patch73: kvm-target-s390x-deprecate-CPUs-older-than-z14.patch -# For bz#2060839 - Consider deprecating CPU models like "kvm64" / "qemu64" on RHEL 9 -Patch74: kvm-target-arm-deprecate-named-CPU-models.patch -# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration -Patch75: kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch -# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration -Patch76: kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch -# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration -Patch77: kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch -# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration -Patch78: kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch -# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration -Patch79: kvm-migration-Add-migrate_use_tls-helper.patch -# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration -Patch80: kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch -# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration -Patch81: kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch -# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration -Patch82: kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch -# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration -Patch83: kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch -# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration -Patch84: kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch -# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration -Patch85: kvm-migration-Change-zero_copy_send-from-migration-param.patch -# For bz#2096143 - The migration port is not released if use it again for recovering postcopy migration -Patch86: kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch -# For bz#2100106 - Fix virtio-iommu/vfio bypass -Patch87: kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch -# For bz#2100106 - Fix virtio-iommu/vfio bypass -Patch88: kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch -# For bz#2100106 - Fix virtio-iommu/vfio bypass -Patch89: kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch -# For bz#2100106 - Fix virtio-iommu/vfio bypass -Patch90: kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch -# For bz#2100106 - Fix virtio-iommu/vfio bypass -Patch91: kvm-virtio-iommu-Fix-migration-regression.patch -# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions -Patch92: kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch -# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions -Patch93: kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch -# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions -Patch94: kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch -# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions -Patch95: kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch -# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions -Patch96: kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch -# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions -Patch97: kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch -# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions -Patch98: kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch -# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions -Patch99: kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch -# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions -Patch100: kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch -# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions -Patch101: kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch -# For bz#1951522 - CVE-2021-3507 qemu-kvm: QEMU: fdc: heap buffer overflow in DMA read data transfers [rhel-9.0] -Patch102: kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch -# For bz#1951522 - CVE-2021-3507 qemu-kvm: QEMU: fdc: heap buffer overflow in DMA read data transfers [rhel-9.0] -Patch103: kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch104: kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch105: kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch106: kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch107: kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch108: kvm-meson-create-have_vhost_-variables.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch109: kvm-meson-use-have_vhost_-variables-to-pick-sources.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch110: kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch111: kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch112: kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch113: kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch114: kvm-vhost-Reorder-vhost_svq_kick.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch115: kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch116: kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch117: kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch118: kvm-vhost-Add-SVQDescState.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch119: kvm-vhost-Track-number-of-descs-in-SVQDescState.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch120: kvm-vhost-add-vhost_svq_push_elem.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch121: kvm-vhost-Expose-vhost_svq_add.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch122: kvm-vhost-add-vhost_svq_poll.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch123: kvm-vhost-Add-svq-avail_handler-callback.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch124: kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch125: kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch126: kvm-vdpa-manual-forward-CVQ-buffers.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch127: kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch128: kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch129: kvm-vdpa-Add-device-migration-blocker.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch130: kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch -# For bz#2111994 - RHEL9: skey test in kvm_unit_test got failed -Patch131: kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch -# For bz#2111994 - RHEL9: skey test in kvm_unit_test got failed -Patch132: kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch -# For bz#2095608 - Please correct the error message when try to start qemu with "-M kernel-irqchip=split" -Patch133: kvm-kvm-don-t-use-perror-without-useful-errno.patch -# For bz#2099934 - Guest reboot on destination host after postcopy migration completed -Patch134: kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch -# For bz#2099934 - Guest reboot on destination host after postcopy migration completed -Patch135: kvm-Revert-migration-Simplify-unqueue_page.patch -# For bz#2107466 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together -Patch136: kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch -# For bz#2107466 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together -Patch137: kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch -# For bz#2107466 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together -Patch138: kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch -# For bz#2107466 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together -Patch139: kvm-migration-Avoid-false-positive-on-non-supported-scen.patch -# For bz#2107466 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together -Patch140: kvm-migration-add-remaining-params-has_-true-in-migratio.patch -# For bz#2107466 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together -Patch141: kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch -# For bz#2112303 - virtio-blk: Can't boot fresh installation from used 512 cluster_size image under certain conditions -Patch142: kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch -# For bz#2116876 - Fixes for vDPA control virtqueue support in Qemu -Patch143: kvm-vdpa-Fix-bad-index-calculus-at-vhost_vdpa_get_vring_.patch -# For bz#2116876 - Fixes for vDPA control virtqueue support in Qemu -Patch144: kvm-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch -# For bz#2116876 - Fixes for vDPA control virtqueue support in Qemu -Patch145: kvm-vdpa-Fix-memory-listener-deletions-of-iova-tree.patch -# For bz#2116876 - Fixes for vDPA control virtqueue support in Qemu -Patch146: kvm-vdpa-Fix-file-descriptor-leak-on-get-features-error.patch -# For bz#2120275 - Wrong max_sectors_kb and Maximum transfer length on the pass-through device [rhel-9.1] -Patch147: kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch148: kvm-vhost-Get-vring-base-from-vq-not-svq.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch149: kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch150: kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch151: kvm-util-Return-void-on-iova_tree_remove.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch152: kvm-util-accept-iova_tree_remove_parameter-by-value.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch153: kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch154: kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch155: kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch156: kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch157: kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch158: kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch159: kvm-vhost-Delete-useless-read-memory-barrier.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch160: kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch161: kvm-vhost_net-Add-NetClientInfo-start-callback.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch162: kvm-vhost_net-Add-NetClientInfo-stop-callback.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch163: kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch164: kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch165: kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch166: kvm-vhost_net-add-NetClientState-load-callback.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch167: kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch168: kvm-vdpa-Delete-CVQ-migration-blocker.patch -# For bz#2099541 - qemu coredump with error Assertion `qemu_mutex_iothread_locked()' failed when repeatly hotplug/unplug disks in pause status -Patch169: kvm-virtio-scsi-fix-race-in-virtio_scsi_dataplane_start.patch -# For bz#2117546 - [RHEL9.1] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command -Patch170: kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch -# For bz#2117546 - [RHEL9.1] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command -Patch171: kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch -# For bz#2134896 - Windows guest reboot after migration with wsl2 installed inside [rhel-9.1.0.z] -Patch172: kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch -# For bz#2168221 - while live-migrating many instances concurrently, libvirt sometimes return internal error: migration was active, but no RAM info was set [rhel-9.1.0.z] -Patch173: kvm-migration-Read-state-once.patch - -# Source-git patches +Patch0014: 0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +Patch0015: 0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +Patch0016: 0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +Patch0018: 0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch +Patch0019: 0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch +Patch0020: 0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch +Patch0021: 0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch +Patch0022: 0022-x86-rhel-9.2.0-machine-type.patch +Patch23: kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch +# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine +Patch24: kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch +# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine +Patch25: kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch +# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine +Patch26: kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch +# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine +Patch27: kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch +# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine +Patch28: kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch +# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine +Patch29: kvm-hw-arm-virt-Add-compact-highmem-property.patch +# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine +Patch30: kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch +# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine +Patch31: kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch +# For bz#2155749 - [regression][stable guest abi][qemu-kvm7.2]Migration failed due to virtio-rng device between RHEL8.8 and RHEL9.2/MSI-X +Patch32: kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch33: kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch34: kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch35: kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch36: kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch37: kvm-vhost-vdpa-add-support-for-config-interrupt.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch38: kvm-virtio-add-support-for-configure-interrupt.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch39: kvm-vhost-add-support-for-configure-interrupt.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch40: kvm-virtio-net-add-support-for-configure-interrupt.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch41: kvm-virtio-mmio-add-support-for-configure-interrupt.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch42: kvm-virtio-pci-add-support-for-configure-interrupt.patch +# For bz#2159408 - [s390x] VMs with ISM passthrough don't autostart after leapp upgrade from RHEL 8 +Patch43: kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch +# For bz#2124856 - VM with virtio interface and iommu=on will crash when try to migrate +Patch44: kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch45: kvm-block-drop-bdrv_remove_filter_or_cow_child.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch46: kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch47: kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch48: kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch49: kvm-block-Remove-drained_end_counter.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch50: kvm-block-Inline-bdrv_drain_invoke.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch51: kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch52: kvm-block-Drain-individual-nodes-during-reopen.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch53: kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch54: kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch55: kvm-block-Remove-subtree-drains.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch56: kvm-block-Call-drain-callbacks-only-once.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch57: kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch58: kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch59: kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch60: kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch +# For bz#1979276 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on +Patch61: kvm-accel-introduce-accelerator-blocker-API.patch +# For bz#1979276 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on +Patch62: kvm-KVM-keep-track-of-running-ioctls.patch +# For bz#1979276 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on +Patch63: kvm-kvm-Atomic-memslot-updates.patch +# For bz#2141088 - vDPA SVQ guest announce support +Patch64: kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch +# For bz#2141088 - vDPA SVQ guest announce support +Patch65: kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch +# For bz#2141088 - vDPA SVQ guest announce support +Patch66: kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch +# For bz#2141088 - vDPA SVQ guest announce support +Patch67: kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch +# For bz#2122523 - Secure guest can't boot with maximal number of vcpus (248) +Patch68: kvm-s390x-pv-Implement-a-CGS-check-helper.patch +# For bz#2163701 - [s390x] VM fails to start with ISM passed through +Patch69: kvm-s390x-pci-coalesce-unmap-operations.patch +# For bz#2163701 - [s390x] VM fails to start with ISM passed through +Patch70: kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch +# For bz#2163701 - [s390x] VM fails to start with ISM passed through +Patch71: kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch +# For bz#2149191 - [RFE][guest-agent] - USB bus type support +Patch72: kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch73: kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch74: kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch75: kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch76: kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch77: kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch78: kvm-vdpa-request-iova_range-only-once.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch79: kvm-vdpa-move-SVQ-vring-features-check-to-net.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch80: kvm-vdpa-allocate-SVQ-array-unconditionally.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch81: kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch82: kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch83: kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch84: kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch85: kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch +# For bz#2150180 - qemu-img finishes successfully while having errors in commit or bitmaps operations +Patch86: kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch +# For bz#2150180 - qemu-img finishes successfully while having errors in commit or bitmaps operations +Patch87: kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch +# For bz#2150180 - qemu-img finishes successfully while having errors in commit or bitmaps operations +Patch88: kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch +# For bz#2150180 - qemu-img finishes successfully while having errors in commit or bitmaps operations +Patch89: kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch +# For bz#2165280 - [kvm-unit-tests] debug-wp-migration fails +Patch90: kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch91: kvm-block-Improve-empty-format-specific-info-dump.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch92: kvm-block-file-Add-file-specific-image-info.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch93: kvm-block-vmdk-Change-extent-info-type.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch94: kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch95: kvm-qemu-img-Use-BlockNodeInfo.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch96: kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch97: kvm-block-qapi-Introduce-BlockGraphInfo.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch98: kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch99: kvm-iotests-Filter-child-node-information.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch100: kvm-iotests-106-214-308-Read-only-one-size-line.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch101: kvm-qemu-img-Let-info-print-block-graph.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch102: kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch +# For bz#2155173 - [vhost-user] unable to start vhost net: 71: falling back on userspace +Patch103: kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch +# For bz#2155173 - [vhost-user] unable to start vhost net: 71: falling back on userspace +Patch104: kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch +# For bz#2162569 - [transitional device][virtio-rng-pci-transitional]Stable Guest ABI failed between RHEL 8.6 to RHEL 9.2 +Patch105: kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch +# For bz#2169232 - RFE: reconnect option for stream socket back-end +Patch106: kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch +# For bz#2169232 - RFE: reconnect option for stream socket back-end +Patch107: kvm-net-stream-add-a-new-option-to-automatically-reconne.patch +# For bz#2158704 - RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall +Patch108: kvm-linux-headers-Update-to-v6.1.patch +# For bz#2158704 - RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall +Patch109: kvm-util-userfaultfd-Add-uffd_open.patch +# For bz#2158704 - RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall +Patch110: kvm-util-userfaultfd-Support-dev-userfaultfd.patch +# For bz#2169732 - Multifd migration fails under a weak network/socket ordering race +Patch111: kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch +# For bz#2169732 - Multifd migration fails under a weak network/socket ordering race +Patch112: kvm-migration-check-magic-value-for-deciding-the-mapping.patch +# For bz#2168172 - [s390x] qemu-kvm coredumps when SE crashes +Patch113: kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch +# For bz#2168209 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch114: kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch +# For bz#2169904 - [SVVP] job 'Check SMBIOS Table Specific Requirements' failed on win2022 +Patch115: kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch +# For bz#2155748 - qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed +Patch116: kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch +# For bz#2155748 - qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed +Patch117: kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch +# For bz#2155748 - qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed +Patch118: kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch +# For bz#2175660 - Guest hangs when starting or rebooting +Patch119: kvm-qatomic-add-smp_mb__before-after_rmw.patch +# For bz#2175660 - Guest hangs when starting or rebooting +Patch120: kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch +# For bz#2175660 - Guest hangs when starting or rebooting +Patch121: kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch +# For bz#2175660 - Guest hangs when starting or rebooting +Patch122: kvm-edu-add-smp_mb__after_rmw.patch +# For bz#2175660 - Guest hangs when starting or rebooting +Patch123: kvm-aio-wait-switch-to-smp_mb__after_rmw.patch +# For bz#2175660 - Guest hangs when starting or rebooting +Patch124: kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch +# For bz#2175660 - Guest hangs when starting or rebooting +Patch125: kvm-physmem-add-missing-memory-barrier.patch +# For bz#2175660 - Guest hangs when starting or rebooting +Patch126: kvm-async-update-documentation-of-the-memory-barriers.patch +# For bz#2175660 - Guest hangs when starting or rebooting +Patch127: kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch +# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) +Patch128: kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch +# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) +Patch129: kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch +# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) +Patch130: kvm-target-i386-Fix-BEXTR-instruction.patch +# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) +Patch131: kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch +# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) +Patch132: kvm-target-i386-fix-ADOX-followed-by-ADCX.patch +# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) +Patch133: kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch +# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) +Patch134: kvm-target-i386-Fix-BZHI-instruction.patch +# For bz#2156876 - [virtual network][rhel7.9_guest] qemu-kvm: vhost vring error in virtqueue 1: Invalid argument (22) +Patch135: kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch %if %{have_clang} BuildRequires: clang @@ -570,13 +486,14 @@ BuildRequires: perl-Test-Harness BuildRequires: libslirp-devel BuildRequires: pulseaudio-libs-devel BuildRequires: spice-protocol +BuildRequires: capstone-devel # Requires for qemu-kvm package Requires: %{name}-core = %{epoch}:%{version}-%{release} Requires: %{name}-docs = %{epoch}:%{version}-%{release} Requires: %{name}-tools = %{epoch}:%{version}-%{release} Requires: qemu-pr-helper = %{epoch}:%{version}-%{release} -Requires: virtiofsd = %{epoch}:%{version}-%{release} +Requires: virtiofsd >= 1.5.0 %{requires_all_modules} %description @@ -600,6 +517,7 @@ Requires: edk2-aarch64 Requires: libseccomp >= %{libseccomp_version} Requires: libusbx >= %{libusbx_version} +Requires: capstone %if %{have_fdt} Requires: libfdt >= %{libfdt_version} %endif @@ -634,6 +552,10 @@ Requires: seabios-bin >= 1.10.2-1 Requires: seavgabios-bin >= 1.12.0-3 Requires: ipxe-roms-qemu >= %{ipxe_version} %endif +# Removal -gl modules as they do not provide any functionality - see bz#2149022 +Obsoletes: %{name}-device-display-virtio-gpu-gl <= %{epoch}:%{version} +Obsoletes: %{name}-device-display-virtio-gpu-pci-gl <= %{epoch}:%{version} +Obsoletes: %{name}-device-display-virtio-vga-gl <= %{epoch}:%{version} %description common %{name} is an open source virtualizer that provides hardware emulation for @@ -750,30 +672,20 @@ Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} %description device-display-virtio-gpu This package provides the virtio-gpu display device for QEMU. -%package device-display-virtio-gpu-gl -Summary: QEMU virtio-gpu-gl display device -Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} -%description device-display-virtio-gpu-gl -This package provides the virtio-gpu-gl display device for QEMU. - %ifarch s390x %package device-display-virtio-gpu-ccw Summary: QEMU virtio-gpu-ccw display device Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +Requires: %{name}-device-display-virtio-gpu = %{epoch}:%{version}-%{release} %description device-display-virtio-gpu-ccw This package provides the virtio-gpu-ccw display device for QEMU. %else %package device-display-virtio-gpu-pci Summary: QEMU virtio-gpu-pci display device Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +Requires: %{name}-device-display-virtio-gpu = %{epoch}:%{version}-%{release} %description device-display-virtio-gpu-pci This package provides the virtio-gpu-pci display device for QEMU. - -%package device-display-virtio-gpu-pci-gl -Summary: QEMU virtio-gpu-pci-gl display device -Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} -%description device-display-virtio-gpu-pci-gl -This package provides the virtio-gpu-pci-gl display device for QEMU. %endif %ifarch x86_64 %{power64} @@ -782,12 +694,6 @@ Summary: QEMU virtio-vga display device Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} %description device-display-virtio-vga This package provides the virtio-vga display device for QEMU. - -%package device-display-virtio-vga-gl -Summary: QEMU virtio-vga-gl display device -Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} -%description device-display-virtio-vga-gl -This package provides the virtio-vga-gl display device for QEMU. %endif %package device-usb-host @@ -817,6 +723,10 @@ mkdir -p %{qemu_kvm_build} %build + +# Necessary hack for ZUUL CI +ulimit -n 10240 + %define disable_everything \\\ --audio-drv-list= \\\ --disable-alsa \\\ @@ -824,6 +734,7 @@ mkdir -p %{qemu_kvm_build} --disable-auth-pam \\\ --disable-avx2 \\\ --disable-avx512f \\\ + --disable-blkio \\\ --disable-block-drv-whitelist-in-tools \\\ --disable-bochs \\\ --disable-bpf \\\ @@ -872,6 +783,7 @@ mkdir -p %{qemu_kvm_build} --disable-libssh \\\ --disable-libudev \\\ --disable-libusb \\\ + --disable-libvduse \\\ --disable-linux-aio \\\ --disable-linux-io-uring \\\ --disable-linux-user \\\ @@ -913,6 +825,7 @@ mkdir -p %{qemu_kvm_build} --disable-slirp-smbd \\\ --disable-smartcard \\\ --disable-snappy \\\ + --disable-sndio \\\ --disable-sparse \\\ --disable-spice \\\ --disable-spice-protocol \\\ @@ -926,20 +839,19 @@ mkdir -p %{qemu_kvm_build} --disable-user \\\ --disable-vde \\\ --disable-vdi \\\ + --disable-vduse-blk-export \\\ --disable-vhost-crypto \\\ --disable-vhost-kernel \\\ --disable-vhost-net \\\ - --disable-vhost-scsi \\\ --disable-vhost-user \\\ --disable-vhost-user-blk-server \\\ --disable-vhost-vdpa \\\ - --disable-vhost-vsock \\\ --disable-virglrenderer \\\ --disable-virtfs \\\ --disable-virtiofsd \\\ --disable-vnc \\\ --disable-vnc-jpeg \\\ - --disable-vnc-png \\\ + --disable-png \\\ --disable-vnc-sasl \\\ --disable-vte \\\ --disable-vvfat \\\ @@ -970,7 +882,7 @@ run_configure() { --with-pkgversion="%{name}-%{version}-%{release}" \ --with-suffix="%{name}" \ --firmwarepath=%{firmwaredirs} \ - --meson="internal" \ + --meson="%{__meson}" \ --enable-trace-backend=dtrace \ --with-coroutine=ucontext \ --with-git=git \ @@ -999,7 +911,7 @@ run_configure \ %endif --enable-attr \ --enable-cap-ng \ - --enable-capstone=internal \ + --enable-capstone \ --enable-coroutine-pool \ --enable-curl \ --enable-debug-info \ @@ -1037,7 +949,7 @@ run_configure \ %endif --enable-seccomp \ --enable-selinux \ - --enable-slirp=system \ + --enable-slirp \ --enable-snappy \ --enable-spice-protocol \ --enable-system \ @@ -1054,9 +966,8 @@ run_configure \ --enable-vhost-user \ --enable-vhost-user-blk-server \ --enable-vhost-vdpa \ - --enable-vhost-vsock \ --enable-vnc \ - --enable-vnc-png \ + --enable-png \ --enable-vnc-sasl \ %if %{enable_werror} --enable-werror \ @@ -1323,6 +1234,16 @@ install -D -m 0644 %{_sourcedir}/bridge.conf %{buildroot}%{_sysconfdir}/%{name}/ install -m 0644 contrib/systemd/qemu-pr-helper.service %{buildroot}%{_unitdir} install -m 0644 contrib/systemd/qemu-pr-helper.socket %{buildroot}%{_unitdir} +# We do not support gl display devices so we can remove their modules as they +# do not have expected functionality included. +# +# https://gitlab.com/qemu-project/qemu/-/issues/1352 was filed to stop building these +# modules in case all dependencies are not satisfied. + +rm -rf %{buildroot}%{_libdir}/%{name}/hw-display-virtio-gpu-gl.so +rm -rf %{buildroot}%{_libdir}/%{name}/hw-display-virtio-gpu-pci-gl.so +rm -rf %{buildroot}%{_libdir}/%{name}/hw-display-virtio-vga-gl.so + # We need to make the block device modules and other qemu SO files executable # otherwise RPM won't pick up their dependencies. chmod +x %{buildroot}%{_libdir}/%{name}/*.so @@ -1480,25 +1401,17 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %files device-display-virtio-gpu %{_libdir}/%{name}/hw-display-virtio-gpu.so -%files device-display-virtio-gpu-gl -%{_libdir}/%{name}/hw-display-virtio-gpu-gl.so - %ifarch s390x %files device-display-virtio-gpu-ccw %{_libdir}/%{name}/hw-s390x-virtio-gpu-ccw.so %else %files device-display-virtio-gpu-pci %{_libdir}/%{name}/hw-display-virtio-gpu-pci.so - -%files device-display-virtio-gpu-pci-gl - %{_libdir}/%{name}/hw-display-virtio-gpu-pci-gl.so %endif %ifarch x86_64 %{power64} %files device-display-virtio-vga %{_libdir}/%{name}/hw-display-virtio-vga.so -%files device-display-virtio-vga-gl - %{_libdir}/%{name}/hw-display-virtio-vga-gl.so %endif %files tests @@ -1533,52 +1446,287 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog -* Mon Feb 13 2023 Miroslav Rezanina - 7.0.0-13.el9_1.2 -- kvm-migration-Read-state-once.patch [bz#2168221] -- Resolves: bz#2168221 - (while live-migrating many instances concurrently, libvirt sometimes return internal error: migration was active, but no RAM info was set [rhel-9.1.0.z]) - -* Thu Feb 02 2023 Miroslav Rezanina - 7.0.0-13.el9_1.1 -- kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch [bz#2134896] -- Resolves: bz#2134896 - (Windows guest reboot after migration with wsl2 installed inside [rhel-9.1.0.z]) - -* Tue Sep 13 2022 Miroslav Rezanina - 7.0.0-13 -- kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch [bz#2117546] -- kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch [bz#2117546] -- Resolves: bz#2117546 - ([RHEL9.1] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command) - -* Fri Aug 26 2022 Miroslav Rezanina - 7.0.0-12 -- kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch [bz#2120275] -- kvm-vhost-Get-vring-base-from-vq-not-svq.patch [bz#2114060] -- kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch [bz#2114060] -- kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch [bz#2114060] -- kvm-util-Return-void-on-iova_tree_remove.patch [bz#2114060] -- kvm-util-accept-iova_tree_remove_parameter-by-value.patch [bz#2114060] -- kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch [bz#2114060] -- kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch [bz#2114060] -- kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch [bz#2114060] -- kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch [bz#2114060] -- kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch [bz#2114060] -- kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch [bz#2114060] -- kvm-vhost-Delete-useless-read-memory-barrier.patch [bz#2114060] -- kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch [bz#2114060] -- kvm-vhost_net-Add-NetClientInfo-start-callback.patch [bz#2114060] -- kvm-vhost_net-Add-NetClientInfo-stop-callback.patch [bz#2114060] -- kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch [bz#2114060] -- kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch [bz#2114060] -- kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch [bz#2114060] -- kvm-vhost_net-add-NetClientState-load-callback.patch [bz#2114060] -- kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch [bz#2114060] -- kvm-vdpa-Delete-CVQ-migration-blocker.patch [bz#2114060] -- kvm-virtio-scsi-fix-race-in-virtio_scsi_dataplane_start.patch [bz#2099541] -- Resolves: bz#2120275 - (Wrong max_sectors_kb and Maximum transfer length on the pass-through device [rhel-9.1]) -- Resolves: bz#2114060 - (vDPA state restore support through control virtqueue in Qemu) -- Resolves: bz#2099541 - (qemu coredump with error Assertion `qemu_mutex_iothread_locked()' failed when repeatly hotplug/unplug disks in pause status) +* Mon Mar 20 2023 Miroslav Rezanina - 7.2.0-14 +- Rebuild for 9.2 release +- Resolves: bz#2173590 + (bugs in emulation of BMI instructions (for libguestfs without KVM)) +- Resolves: bz#2156876 + ([virtual network][rhel7.9_guest] qemu-kvm: vhost vring error in virtqueue 1: Invalid argument (22)) + +* Mon Mar 20 2023 Miroslav Rezanina - 7.2.0-13 +- kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch [bz#2173590] +- kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch [bz#2173590] +- kvm-target-i386-Fix-BEXTR-instruction.patch [bz#2173590] +- kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch [bz#2173590] +- kvm-target-i386-fix-ADOX-followed-by-ADCX.patch [bz#2173590] +- kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch [bz#2173590] +- kvm-target-i386-Fix-BZHI-instruction.patch [bz#2173590] +- kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch [bz#2156876] +- Resolves: bz#2173590 + (bugs in emulation of BMI instructions (for libguestfs without KVM)) +- Resolves: bz#2156876 + ([virtual network][rhel7.9_guest] qemu-kvm: vhost vring error in virtqueue 1: Invalid argument (22)) + +* Sun Mar 12 2023 Miroslav Rezanina - 7.2.0-12 +- kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch [bz#2155748] +- kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch [bz#2155748] +- kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch [bz#2155748] +- kvm-qatomic-add-smp_mb__before-after_rmw.patch [bz#2175660] +- kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch [bz#2175660] +- kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch [bz#2175660] +- kvm-edu-add-smp_mb__after_rmw.patch [bz#2175660] +- kvm-aio-wait-switch-to-smp_mb__after_rmw.patch [bz#2175660] +- kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch [bz#2175660] +- kvm-physmem-add-missing-memory-barrier.patch [bz#2175660] +- kvm-async-update-documentation-of-the-memory-barriers.patch [bz#2175660] +- kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch [bz#2175660] +- Resolves: bz#2155748 + (qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed) +- Resolves: bz#2175660 + (Guest hangs when starting or rebooting) + +* Mon Mar 06 2023 Miroslav Rezanina - 7.2.0-11 +- kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch [bz#2169904] +- Resolves: bz#2169904 + ([SVVP] job 'Check SMBIOS Table Specific Requirements' failed on win2022) + +* Tue Feb 21 2023 Miroslav Rezanina - 7.2.0-10 +- kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch [bz#2168209] +- Resolves: bz#2168209 + (Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)) + +* Fri Feb 17 2023 Miroslav Rezanina - 7.2.0-9 +- kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch [bz#2169232] +- kvm-net-stream-add-a-new-option-to-automatically-reconne.patch [bz#2169232] +- kvm-linux-headers-Update-to-v6.1.patch [bz#2158704] +- kvm-util-userfaultfd-Add-uffd_open.patch [bz#2158704] +- kvm-util-userfaultfd-Support-dev-userfaultfd.patch [bz#2158704] +- kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch [bz#2169732] +- kvm-migration-check-magic-value-for-deciding-the-mapping.patch [bz#2169732] +- kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch [bz#2168172] +- Resolves: bz#2169232 + (RFE: reconnect option for stream socket back-end) +- Resolves: bz#2158704 + (RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall) +- Resolves: bz#2169732 + (Multifd migration fails under a weak network/socket ordering race) +- Resolves: bz#2168172 + ([s390x] qemu-kvm coredumps when SE crashes) + +* Thu Feb 09 2023 Miroslav Rezanina - 7.2.0-8 +- kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch [bz#2150180] +- kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch [bz#2150180] +- kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch [bz#2150180] +- kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch [bz#2150180] +- kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch [bz#2165280] +- kvm-block-Improve-empty-format-specific-info-dump.patch [bz#1860292] +- kvm-block-file-Add-file-specific-image-info.patch [bz#1860292] +- kvm-block-vmdk-Change-extent-info-type.patch [bz#1860292] +- kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch [bz#1860292] +- kvm-qemu-img-Use-BlockNodeInfo.patch [bz#1860292] +- kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch [bz#1860292] +- kvm-block-qapi-Introduce-BlockGraphInfo.patch [bz#1860292] +- kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch [bz#1860292] +- kvm-iotests-Filter-child-node-information.patch [bz#1860292] +- kvm-iotests-106-214-308-Read-only-one-size-line.patch [bz#1860292] +- kvm-qemu-img-Let-info-print-block-graph.patch [bz#1860292] +- kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch [bz#1860292] +- kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch [bz#2155173] +- kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch [bz#2155173] +- kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch [bz#2162569] +- Resolves: bz#2150180 + (qemu-img finishes successfully while having errors in commit or bitmaps operations) +- Resolves: bz#2165280 + ([kvm-unit-tests] debug-wp-migration fails) +- Resolves: bz#1860292 + (RFE: add extent_size_hint information to qemu-img info) +- Resolves: bz#2155173 + ([vhost-user] unable to start vhost net: 71: falling back on userspace) +- Resolves: bz#2162569 + ([transitional device][virtio-rng-pci-transitional]Stable Guest ABI failed between RHEL 8.6 to RHEL 9.2) + +* Mon Feb 06 2023 Miroslav Rezanina - 7.2.0-7 +- kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch [bz#2104412] +- kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch [bz#2104412] +- kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch [bz#2104412] +- kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch [bz#2104412] +- kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch [bz#2104412] +- kvm-vdpa-request-iova_range-only-once.patch [bz#2104412] +- kvm-vdpa-move-SVQ-vring-features-check-to-net.patch [bz#2104412] +- kvm-vdpa-allocate-SVQ-array-unconditionally.patch [bz#2104412] +- kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch [bz#2104412] +- kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch [bz#2104412] +- kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch [bz#2104412] +- kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch [bz#2104412] +- kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch [bz#2104412] +- kvm-spec-Disable-VDUSE.patch [bz#2128222] +- Resolves: bz#2104412 + (vDPA ASID support in Qemu) +- Resolves: bz#2128222 + (VDUSE block export should be disabled in builds for now) + +* Mon Jan 30 2023 Miroslav Rezanina - 7.2.0-6 +- kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch [bz#2141088] +- kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch [bz#2141088] +- kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch [bz#2141088] +- kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch [bz#2141088] +- kvm-s390x-pv-Implement-a-CGS-check-helper.patch [bz#2122523] +- kvm-s390x-pci-coalesce-unmap-operations.patch [bz#2163701] +- kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch [bz#2163701] +- kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch [bz#2163701] +- kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch [bz#2149191] +- Resolves: bz#2141088 + (vDPA SVQ guest announce support) +- Resolves: bz#2122523 + (Secure guest can't boot with maximal number of vcpus (248)) +- Resolves: bz#2163701 + ([s390x] VM fails to start with ISM passed through) +- Resolves: bz#2149191 + ([RFE][guest-agent] - USB bus type support) + +* Tue Jan 17 2023 Miroslav Rezanina - 7.2.0-5 +- kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch [bz#1905805] +- kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch [bz#1905805] +- kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch [bz#1905805] +- kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch [bz#1905805] +- kvm-vhost-vdpa-add-support-for-config-interrupt.patch [bz#1905805] +- kvm-virtio-add-support-for-configure-interrupt.patch [bz#1905805] +- kvm-vhost-add-support-for-configure-interrupt.patch [bz#1905805] +- kvm-virtio-net-add-support-for-configure-interrupt.patch [bz#1905805] +- kvm-virtio-mmio-add-support-for-configure-interrupt.patch [bz#1905805] +- kvm-virtio-pci-add-support-for-configure-interrupt.patch [bz#1905805] +- kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch [bz#2159408] +- kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch [bz#2124856] +- kvm-block-drop-bdrv_remove_filter_or_cow_child.patch [bz#2155112] +- kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch [bz#2155112] +- kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch [bz#2155112] +- kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch [bz#2155112] +- kvm-block-Remove-drained_end_counter.patch [bz#2155112] +- kvm-block-Inline-bdrv_drain_invoke.patch [bz#2155112] +- kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch [bz#2155112] +- kvm-block-Drain-individual-nodes-during-reopen.patch [bz#2155112] +- kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch [bz#2155112] +- kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch [bz#2155112] +- kvm-block-Remove-subtree-drains.patch [bz#2155112] +- kvm-block-Call-drain-callbacks-only-once.patch [bz#2155112] +- kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch [bz#2155112] +- kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch [bz#2155112] +- kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch [bz#2155112] +- kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch [bz#2155112] +- kvm-accel-introduce-accelerator-blocker-API.patch [bz#1979276] +- kvm-KVM-keep-track-of-running-ioctls.patch [bz#1979276] +- kvm-kvm-Atomic-memslot-updates.patch [bz#1979276] +- Resolves: bz#1905805 + (support config interrupt in vhost-vdpa qemu) +- Resolves: bz#2159408 + ([s390x] VMs with ISM passthrough don't autostart after leapp upgrade from RHEL 8) +- Resolves: bz#2124856 + (VM with virtio interface and iommu=on will crash when try to migrate) +- Resolves: bz#2155112 + (Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)) +- Resolves: bz#1979276 + (SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on) + +* Thu Jan 12 2023 Miroslav Rezanina - 7.2.0-4 +- kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch [bz#2155749] +- kvm-Update-QGA-service-for-new-command-line.patch [bz#2156515] +- Resolves: bz#2155749 + ([regression][stable guest abi][qemu-kvm7.2]Migration failed due to virtio-rng device between RHEL8.8 and RHEL9.2/MSI-X) +- Resolves: bz#2156515 + ([guest-agent] Replace '-blacklist' to '-block-rpcs' in qemu-ga config file) + +* Wed Jan 04 2023 Miroslav Rezanina - 7.2.0-3 +- kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch [bz#2113840] +- kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch [bz#2113840] +- kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch [bz#2113840] +- kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch [bz#2113840] +- kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch [bz#2113840] +- kvm-hw-arm-virt-Add-compact-highmem-property.patch [bz#2113840] +- kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch [bz#2113840] +- kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch [bz#2113840] +- Resolves: bz#2113840 + ([RHEL9.2] Memory mapping optimization for virt machine) + +* Tue Dec 20 2022 Miroslav Rezanina - 7.2.0-2 +- Fix updating from 7.1.0 +- kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch[bz#2154640] +- Resolves: bz#2154640 + ([aarch64] qemu fails to load "efi-virtio.rom" romfile when creating virtio-net-pci) + +* Thu Dec 15 2022 Miroslav Rezanina - 7.2.0-1 +- Rebase to QEMU 7.2.0 [bz#2135806] +- Resolves: bz#2135806 + (Rebase to QEMU 7.2 for RHEL 9.2.0) + +* Wed Dec 14 2022 Jon Maloy - 7.1.0-7 +- kvm-hw-acpi-erst.c-Fix-memory-handling-issues.patch [bz#2149108] +- Resolves: bz#2149108 + (CVE-2022-4172 qemu-kvm: QEMU: ACPI ERST: memory corruption issues in read_erst_record and write_erst_record [rhel-9]) + +* Fri Dec 02 2022 Miroslav Rezanina - 7.1.0-6 +- kvm-block-move-bdrv_qiov_is_aligned-to-file-posix.patch [bz#2143170] +- kvm-block-use-the-request-length-for-iov-alignment.patch [bz#2143170] +- Resolves: bz#2143170 + (The installation can not start when install files (iso) locate on a 4k disk) + +* Mon Nov 14 2022 Miroslav Rezanina - 7.1.0-5 +- kvm-rtl8139-Remove-unused-variable.patch [bz#2141218] +- kvm-qemu-img-remove-unused-variable.patch [bz#2141218] +- kvm-host-libusb-Remove-unused-variable.patch [bz#2141218] +- Resolves: bz#2141218 + (qemu-kvm build fails with clang 15.0.1 due to false unused variable error) + +* Tue Nov 01 2022 Miroslav Rezanina - 7.1.0-4 +- kvm-Revert-intel_iommu-Fix-irqchip-X2APIC-configuration-.patch [bz#2126095] +- Resolves: bz#2126095 + ([rhel9.2][intel_iommu]Booting guest with "-device intel-iommu,intremap=on,device-iotlb=on,caching-mode=on" causes kernel call trace) + +* Thu Oct 13 2022 Jon Maloy - 7.1.0-3 +- kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch [bz#2108531] +- Resolves: bz#2108531 + (Windows guest reboot after migration with wsl2 installed inside) + +* Thu Sep 29 2022 Miroslav Rezanina - 7.1.0-2 +- kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch [RHELX-57] +- kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch [RHELX-57] +- kvm-util-accept-iova_tree_remove_parameter-by-value.patch [RHELX-57] +- kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch [RHELX-57] +- kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch [RHELX-57] +- kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch [RHELX-57] +- kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch [RHELX-57] +- kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch [RHELX-57] +- kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch [RHELX-57] +- kvm-vhost-Delete-useless-read-memory-barrier.patch [RHELX-57] +- kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch [RHELX-57] +- kvm-vhost_net-Add-NetClientInfo-start-callback.patch [RHELX-57] +- kvm-vhost_net-Add-NetClientInfo-stop-callback.patch [RHELX-57] +- kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch [RHELX-57] +- kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch [RHELX-57] +- kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch [RHELX-57] +- kvm-vhost_net-add-NetClientState-load-callback.patch [RHELX-57] +- kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch [RHELX-57] +- kvm-vdpa-Delete-CVQ-migration-blocker.patch [RHELX-57] +- kvm-vdpa-Make-VhostVDPAState-cvq_cmd_in_buffer-control-a.patch [RHELX-57] +- kvm-vdpa-extract-vhost_vdpa_net_load_mac-from-vhost_vdpa.patch [RHELX-57] +- kvm-vdpa-Add-vhost_vdpa_net_load_mq.patch [RHELX-57] +- kvm-vdpa-validate-MQ-CVQ-commands.patch [RHELX-57] +- kvm-virtio-net-Update-virtio-net-curr_queue_pairs-in-vdp.patch [RHELX-57] +- kvm-vdpa-Allow-MQ-feature-in-SVQ.patch [RHELX-57] +- kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch [bz#2125281] +- kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch [bz#2125281] +- kvm-Revert-Re-enable-capstone-internal-build.patch [bz#2127825] +- kvm-spec-Use-capstone-package.patch [bz#2127825] +- Resolves: RHELX-57 + (vDPA SVQ Multiqueue support ) +- Resolves: bz#2125281 + ([RHEL9.1] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command [rhel-9.2.0]) +- Resolves: bz#2127825 + (Use capstone for qemu-kvm build) + +* Mon Sep 05 2022 Miroslav Rezanina - 7.1.0-1 +- Rebase to QEMU 7.1.0 [bz#2111769] +- Resolves: bz#2111769 + (Rebase to QEMU 7.1.0) * Mon Aug 15 2022 Miroslav Rezanina - 7.0.0-11 - kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch [bz#2107466]