From 22c213d007f569c2ebabcc7c9bfc505b7c198da7 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Jun 09 2020 20:50:32 +0000 Subject: import qemu-kvm-4.2.0-19.module+el8.3.0+6473+93e27135 --- diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..39356a4 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +SOURCES/qemu-4.2.0.tar.xz diff --git a/.qemu-kvm.metadata b/.qemu-kvm.metadata new file mode 100644 index 0000000..f479eb3 --- /dev/null +++ b/.qemu-kvm.metadata @@ -0,0 +1 @@ +b27aa828a8457bd8551ae3c81b80cc365e1f6bfe SOURCES/qemu-4.2.0.tar.xz diff --git a/SOURCES/0005-Initial-redhat-build.patch b/SOURCES/0005-Initial-redhat-build.patch new file mode 100644 index 0000000..cde66a1 --- /dev/null +++ b/SOURCES/0005-Initial-redhat-build.patch @@ -0,0 +1,167 @@ +From 4df157781801c50224373be57fa3c8c3741c0535 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 12 Oct 2018 07:31:11 +0200 +Subject: Initial redhat build + +This patch introduces redhat build structure in redhat subdirectory. In addition, +several issues are fixed in QEMU tree: + + - Change of app name for sasl_server_init in VNC code from qemu to qemu-kvm + - As we use qemu-kvm as name in all places, this is updated to be consistent + - Man page renamed from qemu to qemu-kvm + - man page is installed using make install so we have to fix it in qemu tree + - Use "/share/qemu-kvm" as SHARE_SUFFIX + - We reconfigured our share to qemu-kvm to be consistent with used name + +This rebase includes changes up to qemu-kvm-4.1.0-18.el8 + +Rebase notes (3.1.0): +- added new configure options + +Rebase notes (4.0.0): +- Added dependency to perl-Test-Harness (upstream) +- Added dependency to python3-sphinx (upstream) +- Change location of icons (upstream) +- Remove .desktop file (added upstream) +- Added qemu-trace-stap (added upstream) +- Removed elf2dmp (added upstream) +- Remove .buildinfo +- Added pvh.bin rom (added upstream) +- Added interop documentation files +- Use python module instead of qemu.py (upstream) + +Rebase notes (4.1.0): +- Remove edk2 files generated by build +- Switch to rhel-8.1-candidate build target +- Remove specs documentation +- Switched from libssh2 to libssh +- Add rc0 tarball usage hacks +- Added BuildRequires for wget, rpm-build and python3-sphinx +- Removed new unpacked files +- Update configure line to use new options + +Rebase notes (4.2.0): +- Disable iotest run during make check +- README renamed to README.rst (upstream) +- Removed ui-spice-app.so +- Added relevant changes from "505f7f4 redhat: Adding slirp to the exploded tree" +- Removed qemu-ga.8 install from spec file - installed by make +- Removed spapr-rtas.bin (upstream) +- Require newer SLOF (20191022) + +Merged patches (3.1.0): +- 01f0c9f RHEL8: Add disable configure options to qemu spec file +- Spec file cleanups + +Merged patches (4.0.0): +- aa4297c Add edk2 Requires to qemu-kvm +- d124ff5779 Fixing brew build target +- eb204b5 Introduce the qemu-kvm-tests rpm +- 223cf0c Load kvm module during boot (partial) + +Merged patches (4.1.0): +- ebb6e97 redhat: Fix LOCALVERSION creation +- b0ab0cc redhat: enable tpmdev passthrough (not disabling tests) +- 7cb3c4a Enable libpmem to support nvdimm +- 8943607 qemu-kvm.spec: bump libseccomp >= 2.4.0 +- 27b7c44 rh: set CONFIG_BOCHS_DISPLAY=y for x86 (partial) +- e1fe9fe x86_64-rh-devices: enable TPM emulation (partial) + +Merged patches (4.2.0): +- 69e1fb2 enable virgla +- d4f6115 enable virgl, for real this time ... + +Signed-off-by: Danilo C. L. de Paula +--- + .gitignore | 1 + + Makefile | 3 +- + configure | 1 + + os-posix.c | 2 +- + redhat/Makefile | 82 + + redhat/Makefile.common | 51 + + redhat/README.tests | 39 + + redhat/qemu-kvm.spec.template | 2434 +++++++++++++++++++++++++++++ + redhat/scripts/process-patches.sh | 7 +- + tests/Makefile.include | 2 +- + ui/vnc.c | 2 +- + 11 files changed, 2615 insertions(+), 9 deletions(-) + create mode 100644 redhat/Makefile + create mode 100644 redhat/Makefile.common + create mode 100644 redhat/README.tests + create mode 100644 redhat/qemu-kvm.spec.template + +diff --git a/Makefile b/Makefile +index b437a346d7..086727dbb9 100644 +--- a/Makefile ++++ b/Makefile +@@ -512,6 +512,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM + CAP_CFLAGS += -DCAPSTONE_HAS_ARM64 + CAP_CFLAGS += -DCAPSTONE_HAS_POWERPC + CAP_CFLAGS += -DCAPSTONE_HAS_X86 ++CAP_CFLAGS += -Wp,-D_GLIBCXX_ASSERTIONS + + .PHONY: capstone/all + capstone/all: .git-submodule-status +@@ -826,7 +827,7 @@ install-doc: $(DOCS) install-sphinxdocs + $(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)" + ifdef CONFIG_POSIX + $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" +- $(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1" ++ $(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1/qemu-kvm.1" + $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man7" + $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" + $(INSTALL_DATA) docs/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" +diff --git a/configure b/configure +index 6099be1d84..16564f8ccc 100755 +--- a/configure ++++ b/configure +@@ -2424,6 +2424,7 @@ if test "$seccomp" != "no" ; then + seccomp="no" + fi + fi ++ + ########################################## + # xen probe + +diff --git a/os-posix.c b/os-posix.c +index 86cffd2c7d..1c9f86768d 100644 +--- a/os-posix.c ++++ b/os-posix.c +@@ -83,7 +83,7 @@ void os_setup_signal_handling(void) + /* Find a likely location for support files using the location of the binary. + For installed binaries this will be "$bindir/../share/qemu". When + running from the build tree this will be "$bindir/../pc-bios". */ +-#define SHARE_SUFFIX "/share/qemu" ++#define SHARE_SUFFIX "/share/qemu-kvm" + #define BUILD_SUFFIX "/pc-bios" + char *os_find_datadir(void) + { +diff --git a/tests/Makefile.include b/tests/Makefile.include +index 8566f5f119..b483790cf3 100644 +--- a/tests/Makefile.include ++++ b/tests/Makefile.include +@@ -1194,7 +1194,7 @@ check-acceptance: check-venv $(TESTS_RESULTS_DIR) + check-qapi-schema: check-tests/qapi-schema/frontend check-tests/qapi-schema/doc-good.texi + check-qtest: $(patsubst %,check-qtest-%, $(QTEST_TARGETS)) + check-block: $(patsubst %,check-%, $(check-block-y)) +-check: check-block check-qapi-schema check-unit check-softfloat check-qtest check-decodetree ++check: check-qapi-schema check-unit check-softfloat check-qtest check-decodetree + check-clean: + rm -rf $(check-unit-y) tests/*.o $(QEMU_IOTESTS_HELPERS-y) + rm -rf $(sort $(foreach target,$(SYSEMU_TARGET_LIST), $(check-qtest-$(target)-y)) $(check-qtest-generic-y)) +diff --git a/ui/vnc.c b/ui/vnc.c +index 87b8045afe..ecf6276f5b 100644 +--- a/ui/vnc.c ++++ b/ui/vnc.c +@@ -3987,7 +3987,7 @@ void vnc_display_open(const char *id, Error **errp) + + #ifdef CONFIG_VNC_SASL + if (sasl) { +- int saslErr = sasl_server_init(NULL, "qemu"); ++ int saslErr = sasl_server_init(NULL, "qemu-kvm"); + + if (saslErr != SASL_OK) { + error_setg(errp, "Failed to initialize SASL auth: %s", +-- +2.21.0 + diff --git a/SOURCES/0006-Enable-disable-devices-for-RHEL.patch b/SOURCES/0006-Enable-disable-devices-for-RHEL.patch new file mode 100644 index 0000000..b14bb1b --- /dev/null +++ b/SOURCES/0006-Enable-disable-devices-for-RHEL.patch @@ -0,0 +1,994 @@ +From 67511676246cce57becbd2dcf5abccf08d9ef737 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Mon, 11 Jan 2016 11:53:33 +0100 +Subject: Enable/disable devices for RHEL + +This commit adds all changes related to changes in supported devices. + +Signed-off-by: Miroslav Rezanina + +Rebase notes (qemu 3.1.0) +- spapr_rng disabled in default_config +- new hyperv.mak in default configs +- Move changes from x86_64-softmmu.mak to i386-softmmu.mak +- Added CONFIG_VIRTIO_MMIO to aarch64-softmmu.mak +- Removed config_vga_isa.c changes as no longer needed +- Removed new devices + +Rebase notes (4.0.0): +- Added CONFIG_PCI_EXPRESS_GENERIC_BRIDGE for aarch64-softmmu.mak +- Added CONFIG_ARM_VIRT for aarch64-softmmu.mak +- Switch to KConfig (upstream) + - Using device whitelist + without-defualt-devices option + +Rebase notes (4.1.0): +- Added CONFIG_USB_OHCI_PCI for ppc64 +- Added CONFIG_XIVE_KVM for ppc64 +- Added CONFIG_ACPI_PCI for x86_64 +- Added CONFIG_SEMIHOSTING for aarch64 +- Cleanup aarch64 devices +- Do not build a15mpcore.c +- Removed ide-isa.c stub file +- Use CONFIG_USB_EHCI_PCI on x86_64 (new upstream) + +Rebase notes (4.2.0-rc0): +- Use conditional build for isa-superio.c (upstream change) +- Rename PCI_PIIX to PCI_I440FX (upstream change) + +Rebase notes (4.2.0-rc3): +- Disabled ccid-card-emulated (patch 92566) +- Disabled vfio-pci-igd-lpc-bridge (patch 92565) + +Merged patches (qemu 3.1.0): +- d51e082 Re-enable CONFIG_HYPERV_TESTDEV +- 4b889f3 Declare cirrus-vga as deprecated +- b579d32 Do not build bluetooth support +- 3eef52a Disable CONFIG_IPMI and CONFIG_I2C for ppc64 +- 9caf292 Disable CONFIG_CAN_BUS and CONFIG_CAN_SJA1000 + +Merged patches (4.1.0): +- 20a51f6 fdc: Revert downstream disablement of device "floppy" +- f869cc0 fdc: Restrict floppy controllers to RHEL-7 machine types +- 5909721 aarch64: Compile out IOH3420 +- 27b7c44 rh: set CONFIG_BOCHS_DISPLAY=y for x86 (partial) +- 495a27d x86_64-rh-devices: add missing TPM passthrough +- e1fe9fe x86_64-rh-devices: enable TPM emulation (partial) + +Merged patches (4.2.0): +- f7587dd RHEL: disable hostmem-memfd + +Signed-off-by: Danilo C. L. de Paula +--- + Makefile.objs | 4 +- + backends/Makefile.objs | 3 +- + default-configs/aarch64-rh-devices.mak | 20 +++++ + default-configs/aarch64-softmmu.mak | 10 ++- + default-configs/ppc64-rh-devices.mak | 32 ++++++++ + default-configs/ppc64-softmmu.mak | 8 +- + default-configs/rh-virtio.mak | 10 +++ + default-configs/s390x-rh-devices.mak | 15 ++++ + default-configs/s390x-softmmu.mak | 4 +- + default-configs/x86_64-rh-devices.mak | 100 +++++++++++++++++++++++++ + default-configs/x86_64-softmmu.mak | 4 +- + hw/acpi/ich9.c | 4 +- + hw/arm/Makefile.objs | 2 +- + hw/block/fdc.c | 10 +++ + hw/bt/Makefile.objs | 4 +- + hw/cpu/Makefile.objs | 5 +- + hw/display/Makefile.objs | 5 +- + hw/display/cirrus_vga.c | 3 + + hw/ide/piix.c | 5 +- + hw/input/pckbd.c | 2 + + hw/net/e1000.c | 2 + + hw/pci-host/i440fx.c | 4 + + hw/ppc/spapr_cpu_core.c | 2 + + hw/usb/Makefile.objs | 4 +- + hw/vfio/pci-quirks.c | 9 +++ + hw/vfio/pci.c | 5 ++ + qemu-options.hx | 7 +- + redhat/qemu-kvm.spec.template | 5 +- + target/arm/cpu.c | 4 +- + target/i386/cpu.c | 35 +++++++-- + target/ppc/cpu-models.c | 10 +++ + target/s390x/cpu_models.c | 3 + + target/s390x/kvm.c | 8 ++ + util/memfd.c | 2 +- + vl.c | 8 +- + 35 files changed, 317 insertions(+), 41 deletions(-) + create mode 100644 default-configs/aarch64-rh-devices.mak + create mode 100644 default-configs/ppc64-rh-devices.mak + create mode 100644 default-configs/rh-virtio.mak + create mode 100644 default-configs/s390x-rh-devices.mak + create mode 100644 default-configs/x86_64-rh-devices.mak + +diff --git a/Makefile.objs b/Makefile.objs +index 11ba1a36bd..fcf63e1096 100644 +--- a/Makefile.objs ++++ b/Makefile.objs +@@ -65,8 +65,8 @@ common-obj-y += replay/ + + common-obj-y += ui/ + common-obj-m += ui/ +-common-obj-y += bt-host.o bt-vhci.o +-bt-host.o-cflags := $(BLUEZ_CFLAGS) ++#common-obj-y += bt-host.o bt-vhci.o ++#bt-host.o-cflags := $(BLUEZ_CFLAGS) + + common-obj-y += dma-helpers.o + common-obj-y += vl.o +diff --git a/backends/Makefile.objs b/backends/Makefile.objs +index f0691116e8..f328d404bf 100644 +--- a/backends/Makefile.objs ++++ b/backends/Makefile.objs +@@ -16,4 +16,5 @@ endif + + common-obj-$(call land,$(CONFIG_VHOST_USER),$(CONFIG_VIRTIO)) += vhost-user.o + +-common-obj-$(CONFIG_LINUX) += hostmem-memfd.o ++# RHEL: disable memfd ++# common-obj-$(CONFIG_LINUX) += hostmem-memfd.o +diff --git a/default-configs/aarch64-rh-devices.mak b/default-configs/aarch64-rh-devices.mak +new file mode 100644 +index 0000000000..a1ed641174 +--- /dev/null ++++ b/default-configs/aarch64-rh-devices.mak +@@ -0,0 +1,20 @@ ++include rh-virtio.mak ++ ++CONFIG_ARM_GIC_KVM=y ++CONFIG_ARM_SMMUV3=y ++CONFIG_ARM_V7M=y ++CONFIG_ARM_VIRT=y ++CONFIG_EDID=y ++CONFIG_PCIE_PORT=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PFLASH_CFI01=y ++CONFIG_SCSI=y ++CONFIG_SEMIHOSTING=y ++CONFIG_USB=y ++CONFIG_USB_XHCI=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VIRTIO_MMIO=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_XIO3130=y +diff --git a/default-configs/aarch64-softmmu.mak b/default-configs/aarch64-softmmu.mak +index 958b1e08e4..8f6867d48a 100644 +--- a/default-configs/aarch64-softmmu.mak ++++ b/default-configs/aarch64-softmmu.mak +@@ -1,8 +1,10 @@ + # Default configuration for aarch64-softmmu + + # We support all the 32 bit boards so need all their config +-include arm-softmmu.mak ++#include arm-softmmu.mak + +-CONFIG_XLNX_ZYNQMP_ARM=y +-CONFIG_XLNX_VERSAL=y +-CONFIG_SBSA_REF=y ++#CONFIG_XLNX_ZYNQMP_ARM=y ++#CONFIG_XLNX_VERSAL=y ++#CONFIG_SBSA_REF=y ++ ++include aarch64-rh-devices.mak +diff --git a/default-configs/ppc64-rh-devices.mak b/default-configs/ppc64-rh-devices.mak +new file mode 100644 +index 0000000000..35f2106d06 +--- /dev/null ++++ b/default-configs/ppc64-rh-devices.mak +@@ -0,0 +1,32 @@ ++include rh-virtio.mak ++ ++CONFIG_DIMM=y ++CONFIG_MEM_DEVICE=y ++CONFIG_PCI=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PSERIES=y ++CONFIG_SCSI=y ++CONFIG_SPAPR_VSCSI=y ++CONFIG_TEST_DEVICES=y ++CONFIG_USB=y ++CONFIG_USB_OHCI=y ++CONFIG_USB_OHCI_PCI=y ++CONFIG_USB_SMARTCARD=y ++CONFIG_USB_STORAGE_BOT=y ++CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_NEC=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VGA=y ++CONFIG_VGA_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_VGA=y ++CONFIG_WDT_IB6300ESB=y ++CONFIG_XICS=y ++CONFIG_XICS_KVM=y ++CONFIG_XICS_SPAPR=y ++CONFIG_XIVE=y ++CONFIG_XIVE_SPAPR=y ++CONFIG_XIVE_KVM=y +diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak +index cca52665d9..fec354f327 100644 +--- a/default-configs/ppc64-softmmu.mak ++++ b/default-configs/ppc64-softmmu.mak +@@ -1,10 +1,12 @@ + # Default configuration for ppc64-softmmu + + # Include all 32-bit boards +-include ppc-softmmu.mak ++#include ppc-softmmu.mak + + # For PowerNV +-CONFIG_POWERNV=y ++#CONFIG_POWERNV=y + + # For pSeries +-CONFIG_PSERIES=y ++#CONFIG_PSERIES=y ++ ++include ppc64-rh-devices.mak +diff --git a/default-configs/rh-virtio.mak b/default-configs/rh-virtio.mak +new file mode 100644 +index 0000000000..94ede1b5f6 +--- /dev/null ++++ b/default-configs/rh-virtio.mak +@@ -0,0 +1,10 @@ ++CONFIG_VIRTIO=y ++CONFIG_VIRTIO_BALLOON=y ++CONFIG_VIRTIO_BLK=y ++CONFIG_VIRTIO_GPU=y ++CONFIG_VIRTIO_INPUT=y ++CONFIG_VIRTIO_INPUT_HOST=y ++CONFIG_VIRTIO_NET=y ++CONFIG_VIRTIO_RNG=y ++CONFIG_VIRTIO_SCSI=y ++CONFIG_VIRTIO_SERIAL=y +diff --git a/default-configs/s390x-rh-devices.mak b/default-configs/s390x-rh-devices.mak +new file mode 100644 +index 0000000000..c3c73fe752 +--- /dev/null ++++ b/default-configs/s390x-rh-devices.mak +@@ -0,0 +1,15 @@ ++include rh-virtio.mak ++ ++CONFIG_PCI=y ++CONFIG_S390_CCW_VIRTIO=y ++CONFIG_S390_FLIC=y ++CONFIG_S390_FLIC_KVM=y ++CONFIG_SCLPCONSOLE=y ++CONFIG_SCSI=y ++CONFIG_TERMINAL3270=y ++CONFIG_VFIO=y ++CONFIG_VFIO_AP=y ++CONFIG_VFIO_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VIRTIO_CCW=y ++CONFIG_WDT_DIAG288=y +diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak +index f2287a133f..3e2e388e91 100644 +--- a/default-configs/s390x-softmmu.mak ++++ b/default-configs/s390x-softmmu.mak +@@ -10,4 +10,6 @@ + + # Boards: + # +-CONFIG_S390_CCW_VIRTIO=y ++#CONFIG_S390_CCW_VIRTIO=y ++ ++include s390x-rh-devices.mak +diff --git a/default-configs/x86_64-rh-devices.mak b/default-configs/x86_64-rh-devices.mak +new file mode 100644 +index 0000000000..d59b6d9bb5 +--- /dev/null ++++ b/default-configs/x86_64-rh-devices.mak +@@ -0,0 +1,100 @@ ++include rh-virtio.mak ++ ++CONFIG_AC97=y ++CONFIG_ACPI=y ++CONFIG_ACPI_PCI=y ++CONFIG_ACPI_CPU_HOTPLUG=y ++CONFIG_ACPI_MEMORY_HOTPLUG=y ++CONFIG_ACPI_NVDIMM=y ++CONFIG_ACPI_SMBUS=y ++CONFIG_ACPI_VMGENID=y ++CONFIG_ACPI_X86=y ++CONFIG_ACPI_X86_ICH=y ++CONFIG_AHCI=y ++CONFIG_APIC=y ++CONFIG_APM=y ++CONFIG_BOCHS_DISPLAY=y ++CONFIG_DIMM=y ++CONFIG_E1000E_PCI_EXPRESS=y ++CONFIG_E1000_PCI=y ++CONFIG_EDU=y ++CONFIG_FDC=y ++CONFIG_FW_CFG_DMA=y ++CONFIG_HDA=y ++CONFIG_HYPERV=y ++CONFIG_HYPERV_TESTDEV=y ++CONFIG_I2C=y ++CONFIG_I440FX=y ++CONFIG_I8254=y ++CONFIG_I8257=y ++CONFIG_I8259=y ++CONFIG_I82801B11=y ++CONFIG_IDE_CORE=y ++CONFIG_IDE_PCI=y ++CONFIG_IDE_PIIX=y ++CONFIG_IDE_QDEV=y ++CONFIG_IOAPIC=y ++CONFIG_IOH3420=y ++CONFIG_ISA_BUS=y ++CONFIG_ISA_DEBUG=y ++CONFIG_ISA_TESTDEV=y ++CONFIG_LPC_ICH9=y ++CONFIG_MC146818RTC=y ++CONFIG_MEM_DEVICE=y ++CONFIG_NVDIMM=y ++CONFIG_OPENGL=y ++CONFIG_PAM=y ++CONFIG_PC=y ++CONFIG_PCI=y ++CONFIG_PCIE_PORT=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_EXPRESS=y ++CONFIG_PCI_EXPRESS_Q35=y ++CONFIG_PCI_I440FX=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PCKBD=y ++CONFIG_PCSPK=y ++CONFIG_PC_ACPI=y ++CONFIG_PC_PCI=y ++CONFIG_PFLASH_CFI01=y ++CONFIG_PVPANIC=y ++CONFIG_PXB=y ++CONFIG_Q35=y ++CONFIG_QXL=y ++CONFIG_RTL8139_PCI=y ++CONFIG_SCSI=y ++CONFIG_SERIAL=y ++CONFIG_SERIAL_ISA=y ++CONFIG_SERIAL_PCI=y ++CONFIG_SEV=y ++CONFIG_SGA=y ++CONFIG_SMBIOS=y ++CONFIG_SMBUS_EEPROM=y ++CONFIG_SPICE=y ++CONFIG_TEST_DEVICES=y ++CONFIG_USB=y ++CONFIG_USB_EHCI=y ++CONFIG_USB_EHCI_PCI=y ++CONFIG_USB_SMARTCARD=y ++CONFIG_USB_STORAGE_BOT=y ++CONFIG_USB_UHCI=y ++CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_NEC=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VGA=y ++CONFIG_VGA_CIRRUS=y ++CONFIG_VGA_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_VGA=y ++CONFIG_VMMOUSE=y ++CONFIG_VMPORT=y ++CONFIG_VTD=y ++CONFIG_WDT_IB6300ESB=y ++CONFIG_WDT_IB700=y ++CONFIG_XIO3130=y ++CONFIG_TPM_CRB=y ++CONFIG_TPM_TIS=y ++CONFIG_TPM_EMULATOR=y ++CONFIG_TPM_PASSTHROUGH=y +diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak +index 64b2ee2960..b5de7e5279 100644 +--- a/default-configs/x86_64-softmmu.mak ++++ b/default-configs/x86_64-softmmu.mak +@@ -1,3 +1,5 @@ + # Default configuration for x86_64-softmmu + +-include i386-softmmu.mak ++#include i386-softmmu.mak ++ ++include x86_64-rh-devices.mak +diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c +index 2034dd749e..ab203ad448 100644 +--- a/hw/acpi/ich9.c ++++ b/hw/acpi/ich9.c +@@ -449,8 +449,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) + static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; + pm->acpi_memory_hotplug.is_enabled = true; + pm->cpu_hotplug_legacy = true; +- pm->disable_s3 = 0; +- pm->disable_s4 = 0; ++ pm->disable_s3 = 1; ++ pm->disable_s4 = 1; + pm->s4_val = 2; + + object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, +diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs +index fe749f65fd..2aa1a9efdd 100644 +--- a/hw/arm/Makefile.objs ++++ b/hw/arm/Makefile.objs +@@ -27,7 +27,7 @@ obj-$(CONFIG_VEXPRESS) += vexpress.o + obj-$(CONFIG_ZYNQ) += xilinx_zynq.o + obj-$(CONFIG_SABRELITE) += sabrelite.o + +-obj-$(CONFIG_ARM_V7M) += armv7m.o ++#obj-$(CONFIG_ARM_V7M) += armv7m.o + obj-$(CONFIG_EXYNOS4) += exynos4210.o + obj-$(CONFIG_PXA2XX) += pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o + obj-$(CONFIG_DIGIC) += digic.o +diff --git a/hw/block/fdc.c b/hw/block/fdc.c +index ac5d31e8c1..e925bac002 100644 +--- a/hw/block/fdc.c ++++ b/hw/block/fdc.c +@@ -46,6 +46,8 @@ + #include "qemu/module.h" + #include "trace.h" + ++#include "hw/boards.h" ++ + /********************************************************/ + /* debug Floppy devices */ + +@@ -2638,6 +2640,14 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, + int i, j; + static int command_tables_inited = 0; + ++ /* Restricted for Red Hat Enterprise Linux: */ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (!strstr(mc->name, "-rhel7.")) { ++ error_setg(errp, "Device %s is not supported with machine type %s", ++ object_get_typename(OBJECT(dev)), mc->name); ++ return; ++ } ++ + if (fdctrl->fallback == FLOPPY_DRIVE_TYPE_AUTO) { + error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); + } +diff --git a/hw/bt/Makefile.objs b/hw/bt/Makefile.objs +index 867a7d2e8a..e678e9ee3c 100644 +--- a/hw/bt/Makefile.objs ++++ b/hw/bt/Makefile.objs +@@ -1,3 +1,3 @@ +-common-obj-y += core.o l2cap.o sdp.o hci.o hid.o +-common-obj-y += hci-csr.o ++#common-obj-y += core.o l2cap.o sdp.o hci.o hid.o ++#common-obj-y += hci-csr.o + +diff --git a/hw/cpu/Makefile.objs b/hw/cpu/Makefile.objs +index 8db9e8a7b3..1601ea93c7 100644 +--- a/hw/cpu/Makefile.objs ++++ b/hw/cpu/Makefile.objs +@@ -1,5 +1,6 @@ + obj-$(CONFIG_ARM11MPCORE) += arm11mpcore.o + obj-$(CONFIG_REALVIEW) += realview_mpcore.o + obj-$(CONFIG_A9MPCORE) += a9mpcore.o +-obj-$(CONFIG_A15MPCORE) += a15mpcore.o +-common-obj-y += core.o cluster.o ++#obj-$(CONFIG_A15MPCORE) += a15mpcore.o ++common-obj-y += core.o ++# cluster.o +diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs +index f2182e3bef..3d0cda1b52 100644 +--- a/hw/display/Makefile.objs ++++ b/hw/display/Makefile.objs +@@ -1,8 +1,9 @@ + common-obj-$(CONFIG_DDC) += i2c-ddc.o + common-obj-$(CONFIG_EDID) += edid-generate.o edid-region.o + +-common-obj-$(CONFIG_FW_CFG_DMA) += ramfb.o +-common-obj-$(CONFIG_FW_CFG_DMA) += ramfb-standalone.o ++# Disabled for Red Hat Enterprise Linux ++#common-obj-$(CONFIG_FW_CFG_DMA) += ramfb.o ++#common-obj-$(CONFIG_FW_CFG_DMA) += ramfb-standalone.o + + common-obj-$(CONFIG_ADS7846) += ads7846.o + common-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o +diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c +index cd283e53b4..93afa26fda 100644 +--- a/hw/display/cirrus_vga.c ++++ b/hw/display/cirrus_vga.c +@@ -2975,6 +2975,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); + int16_t device_id = pc->device_id; + ++ warn_report("'cirrus-vga' is deprecated, " ++ "please use a different VGA card instead"); ++ + /* follow real hardware, cirrus card emulated has 4 MB video memory. + Also accept 8 MB/16 MB for backward compatibility. */ + if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 && +diff --git a/hw/ide/piix.c b/hw/ide/piix.c +index db313dd3b1..e14858ca64 100644 +--- a/hw/ide/piix.c ++++ b/hw/ide/piix.c +@@ -251,7 +251,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) + k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; + k->class_id = PCI_CLASS_STORAGE_IDE; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +- dc->hotpluggable = false; ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo piix3_ide_info = { +@@ -279,6 +280,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) + k->class_id = PCI_CLASS_STORAGE_IDE; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->hotpluggable = false; ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo piix4_ide_info = { +diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c +index f0acfd86f7..390eb6579c 100644 +--- a/hw/input/pckbd.c ++++ b/hw/input/pckbd.c +@@ -571,6 +571,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) + dc->realize = i8042_realizefn; + dc->vmsd = &vmstate_kbd_isa; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo i8042_info = { +diff --git a/hw/net/e1000.c b/hw/net/e1000.c +index a73f8d404e..fc73fdd6fa 100644 +--- a/hw/net/e1000.c ++++ b/hw/net/e1000.c +@@ -1795,6 +1795,7 @@ static const E1000Info e1000_devices[] = { + .revision = 0x03, + .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, + }, ++#if 0 /* Disabled for Red Hat Enterprise Linux 7 */ + { + .name = "e1000-82544gc", + .device_id = E1000_DEV_ID_82544GC_COPPER, +@@ -1807,6 +1808,7 @@ static const E1000Info e1000_devices[] = { + .revision = 0x03, + .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, + }, ++#endif + }; + + static void e1000_register_types(void) +diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c +index f27131102d..17f10efae2 100644 +--- a/hw/pci-host/i440fx.c ++++ b/hw/pci-host/i440fx.c +@@ -386,6 +386,7 @@ static const TypeInfo i440fx_info = { + }, + }; + ++#if 0 /* Disabled in Red Hat Enterprise Linux */ + /* IGD Passthrough Host Bridge. */ + typedef struct { + uint8_t offset; +@@ -469,6 +470,7 @@ static const TypeInfo igd_passthrough_i440fx_info = { + .instance_size = sizeof(PCII440FXState), + .class_init = igd_passthrough_i440fx_class_init, + }; ++#endif + + static const char *i440fx_pcihost_root_bus_path(PCIHostState *host_bridge, + PCIBus *rootbus) +@@ -514,7 +516,9 @@ static const TypeInfo i440fx_pcihost_info = { + static void i440fx_register_types(void) + { + type_register_static(&i440fx_info); ++#if 0 /* Disabled in Red Hat Enterprise Linux */ + type_register_static(&igd_passthrough_i440fx_info); ++#endif + type_register_static(&i440fx_pcihost_info); + } + +diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c +index 8339c4c0f8..301cd7b4e4 100644 +--- a/hw/ppc/spapr_cpu_core.c ++++ b/hw/ppc/spapr_cpu_core.c +@@ -403,10 +403,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { + .instance_size = sizeof(SpaprCpuCore), + .class_size = sizeof(SpaprCpuCoreClass), + }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_SPAPR_CPU_CORE_TYPE("970_v2.2"), + DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.0"), + DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power5+_v2.1"), ++#endif + DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"), + DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), +diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs +index 303ac084a0..700a91886e 100644 +--- a/hw/usb/Makefile.objs ++++ b/hw/usb/Makefile.objs +@@ -30,7 +30,9 @@ common-obj-$(CONFIG_USB_BLUETOOTH) += dev-bluetooth.o + ifeq ($(CONFIG_USB_SMARTCARD),y) + common-obj-y += dev-smartcard-reader.o + common-obj-$(CONFIG_SMARTCARD) += smartcard.mo +-smartcard.mo-objs := ccid-card-passthru.o ccid-card-emulated.o ++# Disabled for Red Hat Enterprise Linux: ++# smartcard.mo-objs := ccid-card-passthru.o ccid-card-emulated.o ++smartcard.mo-objs := ccid-card-passthru.o + smartcard.mo-cflags := $(SMARTCARD_CFLAGS) + smartcard.mo-libs := $(SMARTCARD_LIBS) + endif +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index 136f3a9ad6..4505ffe48a 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -1166,6 +1166,7 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) + trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* + * Intel IGD support + * +@@ -1239,6 +1240,7 @@ static int igd_gen(VFIOPCIDevice *vdev) + + return 8; /* Assume newer is compatible */ + } ++#endif + + typedef struct VFIOIGDQuirk { + struct VFIOPCIDevice *vdev; +@@ -1311,6 +1313,7 @@ typedef struct { + uint8_t len; + } IGDHostInfo; + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static const IGDHostInfo igd_host_bridge_infos[] = { + {PCI_REVISION_ID, 2}, + {PCI_SUBSYSTEM_VENDOR_ID, 2}, +@@ -1559,9 +1562,11 @@ static const MemoryRegionOps vfio_igd_index_quirk = { + .write = vfio_igd_quirk_index_write, + .endianness = DEVICE_LITTLE_ENDIAN, + }; ++#endif + + static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) + { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + struct vfio_region_info *rom = NULL, *opregion = NULL, + *host = NULL, *lpc = NULL; + VFIOQuirk *quirk; +@@ -1572,6 +1577,7 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) + uint32_t gmch; + uint16_t cmd_orig, cmd; + Error *err = NULL; ++#endif + + /* + * This must be an Intel VGA device at address 00:02.0 for us to even +@@ -1585,6 +1591,8 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) + return; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ ++ + /* + * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we + * can stuff host values into, so if there's already one there and it's not +@@ -1809,6 +1817,7 @@ out: + g_free(opregion); + g_free(host); + g_free(lpc); ++#endif + } + + /* +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 2d40b396f2..c8534d3035 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3220,6 +3220,7 @@ static const TypeInfo vfio_pci_dev_info = { + }, + }; + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static Property vfio_pci_dev_nohotplug_properties[] = { + DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false), + DEFINE_PROP_END_OF_LIST(), +@@ -3239,11 +3240,15 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = { + .instance_size = sizeof(VFIOPCIDevice), + .class_init = vfio_pci_nohotplug_dev_class_init, + }; ++#endif + + static void register_vfio_pci_dev_type(void) + { + type_register_static(&vfio_pci_dev_info); ++ ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + type_register_static(&vfio_pci_nohotplug_dev_info); ++#endif + } + + type_init(register_vfio_pci_dev_type) +diff --git a/qemu-options.hx b/qemu-options.hx +index 65c9473b73..fc17aca631 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -2111,11 +2111,6 @@ ETEXI + + DEF("no-hpet", 0, QEMU_OPTION_no_hpet, + "-no-hpet disable HPET\n", QEMU_ARCH_I386) +-STEXI +-@item -no-hpet +-@findex -no-hpet +-Disable HPET support. +-ETEXI + + DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable, + "-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...]\n" +@@ -3125,6 +3120,7 @@ STEXI + ETEXI + DEFHEADING() + ++#if 0 + DEFHEADING(Bluetooth(R) options:) + STEXI + @table @option +@@ -3203,6 +3199,7 @@ STEXI + @end table + ETEXI + DEFHEADING() ++#endif + + #ifdef CONFIG_TPM + DEFHEADING(TPM device options:) +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 7a4ac9339b..3788fc3c4a 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2744,7 +2744,9 @@ static void arm_cpu_register_types(void) + type_register_static(&idau_interface_type_info); + + while (info->name) { +- cpu_register(info); ++ /* RHEL specific: Filter out unsupported cpu models */ ++ if (!strcmp(info->name, "cortex-a15")) ++ cpu_register(info); + info++; + } + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 69f518a21a..1b7880ae3a 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1835,14 +1835,14 @@ static X86CPUDefinition builtin_x86_defs[] = { + .family = 6, + .model = 6, + .stepping = 3, +- .features[FEAT_1_EDX] = +- PPRO_FEATURES | +- CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA | +- CPUID_PSE36, +- .features[FEAT_1_ECX] = +- CPUID_EXT_SSE3 | CPUID_EXT_CX16, +- .features[FEAT_8000_0001_EDX] = +- CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX, ++ .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | ++ CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | ++ CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | ++ CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | ++ CPUID_PSE | CPUID_DE | CPUID_FP87, ++ .features[FEAT_1_ECX] = CPUID_EXT_CX16 | CPUID_EXT_SSE3, ++ .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_NX | ++ CPUID_EXT2_SYSCALL, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM, + .xlevel = 0x8000000A, +@@ -2128,6 +2128,25 @@ static X86CPUDefinition builtin_x86_defs[] = { + .xlevel = 0x80000008, + .model_id = "Intel(R) Atom(TM) CPU N270 @ 1.60GHz", + }, ++ { ++ .name = "cpu64-rhel6", ++ .level = 4, ++ .vendor = CPUID_VENDOR_AMD, ++ .family = 6, ++ .model = 13, ++ .stepping = 3, ++ .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | ++ CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | ++ CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | ++ CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | ++ CPUID_PSE | CPUID_DE | CPUID_FP87, ++ .features[FEAT_1_ECX] = CPUID_EXT_CX16 | CPUID_EXT_SSE3, ++ .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, ++ .features[FEAT_8000_0001_ECX] = CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | ++ CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, ++ .xlevel = 0x8000000A, ++ .model_id = "QEMU Virtual CPU version (cpu64-rhel6)", ++ }, + { + .name = "Conroe", + .level = 10, +diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c +index 086548e9b9..1bbf378c18 100644 +--- a/target/ppc/cpu-models.c ++++ b/target/ppc/cpu-models.c +@@ -66,6 +66,7 @@ + #define POWERPC_DEF(_name, _pvr, _type, _desc) \ + POWERPC_DEF_SVR(_name, _desc, _pvr, POWERPC_SVR_NONE, _type) + ++#if 0 /* Embedded and 32-bit CPUs disabled for Red Hat Enterprise Linux */ + /* Embedded PowerPC */ + /* PowerPC 401 family */ + POWERPC_DEF("401", CPU_POWERPC_401, 401, +@@ -740,8 +741,10 @@ + "PowerPC 7447A v1.2 (G4)") + POWERPC_DEF("7457a_v1.2", CPU_POWERPC_74x7A_v12, 7455, + "PowerPC 7457A v1.2 (G4)") ++#endif + /* 64 bits PowerPC */ + #if defined(TARGET_PPC64) ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + POWERPC_DEF("970_v2.2", CPU_POWERPC_970_v22, 970, + "PowerPC 970 v2.2") + POWERPC_DEF("970fx_v1.0", CPU_POWERPC_970FX_v10, 970, +@@ -760,6 +763,7 @@ + "PowerPC 970MP v1.1") + POWERPC_DEF("power5+_v2.1", CPU_POWERPC_POWER5P_v21, POWER5P, + "POWER5+ v2.1") ++#endif + POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, + "POWER7 v2.3") + POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, +@@ -780,6 +784,7 @@ + /* PowerPC CPU aliases */ + + PowerPCCPUAlias ppc_cpu_aliases[] = { ++#if 0 /* Embedded and 32-bit CPUs disabled for Red Hat Enterprise Linux */ + { "403", "403gc" }, + { "405", "405d4" }, + { "405cr", "405crc" }, +@@ -938,12 +943,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "7447a", "7447a_v1.2" }, + { "7457a", "7457a_v1.2" }, + { "apollo7pm", "7457a_v1.0" }, ++#endif + #if defined(TARGET_PPC64) ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "970", "970_v2.2" }, + { "970fx", "970fx_v3.1" }, + { "970mp", "970mp_v1.1" }, + { "power5+", "power5+_v2.1" }, + { "power5gs", "power5+_v2.1" }, ++#endif + { "power7", "power7_v2.3" }, + { "power7+", "power7+_v2.1" }, + { "power8e", "power8e_v2.1" }, +@@ -952,6 +960,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "power9", "power9_v2.0" }, + #endif + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* Generic PowerPCs */ + #if defined(TARGET_PPC64) + { "ppc64", "970fx_v3.1" }, +@@ -959,5 +968,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "ppc32", "604" }, + { "ppc", "604" }, + { "default", "604" }, ++#endif + { NULL, NULL } + }; +diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c +index 7e92fb2e15..be718220d7 100644 +--- a/target/s390x/cpu_models.c ++++ b/target/s390x/cpu_models.c +@@ -404,6 +404,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, + (max_model->def->gen == model->def->gen && + max_model->def->ec_ga < model->def->ec_ga)) { + list_add_feat("type", unavailable); ++ } else if (model->def->gen < 11 && kvm_enabled()) { ++ /* Older CPU models are not supported on Red Hat Enterprise Linux */ ++ list_add_feat("type", unavailable); + } + + /* detect missing features if any to properly report them */ +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index 0c9d14b4b1..a02d569537 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -2387,6 +2387,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) + error_setg(errp, "KVM doesn't support CPU models"); + return; + } ++ ++ /* Older CPU models are not supported on Red Hat Enterprise Linux */ ++ if (model->def->gen < 11) { ++ error_setg(errp, "KVM: Unsupported CPU type specified: %s", ++ MACHINE(qdev_get_machine())->cpu_type); ++ return; ++ } ++ + prop.cpuid = s390_cpuid_from_cpu_model(model); + prop.ibc = s390_ibc_from_cpu_model(model); + /* configure cpu features indicated via STFL(e) */ +diff --git a/util/memfd.c b/util/memfd.c +index 4a3c07e0be..3303ec9da4 100644 +--- a/util/memfd.c ++++ b/util/memfd.c +@@ -193,7 +193,7 @@ bool qemu_memfd_alloc_check(void) + */ + bool qemu_memfd_check(unsigned int flags) + { +-#ifdef CONFIG_LINUX ++#if 0 /* RHEL: memfd support disabled */ + int mfd = memfd_create("test", flags | MFD_CLOEXEC); + + if (mfd >= 0) { +diff --git a/vl.c b/vl.c +index 6a65a64bfd..668a34577e 100644 +--- a/vl.c ++++ b/vl.c +@@ -166,7 +166,7 @@ Chardev *parallel_hds[MAX_PARALLEL_PORTS]; + int win2k_install_hack = 0; + int singlestep = 0; + int acpi_enabled = 1; +-int no_hpet = 0; ++int no_hpet = 1; /* Always disabled for Red Hat Enterprise Linux */ + int fd_bootchk = 1; + static int no_reboot; + int no_shutdown = 0; +@@ -914,6 +914,7 @@ static void configure_rtc(QemuOpts *opts) + } + } + ++#if 0 // Disabled for Red Hat Enterprise Linux + /***********************************************************/ + /* Bluetooth support */ + static int nb_hcis; +@@ -1035,6 +1036,7 @@ static int bt_parse(const char *opt) + error_report("bad bluetooth parameter '%s'", opt); + return 1; + } ++#endif + + static int parse_name(void *opaque, QemuOpts *opts, Error **errp) + { +@@ -3128,6 +3130,7 @@ int main(int argc, char **argv, char **envp) + } + break; + #endif ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + case QEMU_OPTION_bt: + warn_report("The bluetooth subsystem is deprecated and will " + "be removed soon. If the bluetooth subsystem is " +@@ -3135,6 +3138,7 @@ int main(int argc, char **argv, char **envp) + "qemu-devel@nongnu.org with your usecase."); + add_device_config(DEV_BT, optarg); + break; ++#endif + case QEMU_OPTION_audio_help: + audio_legacy_help(); + exit (0); +@@ -4282,9 +4286,11 @@ int main(int argc, char **argv, char **envp) + + tpm_init(); + ++#if 0 // Disabled for Red Hat Enterprise Linux + /* init the bluetooth world */ + if (foreach_device_config(DEV_BT, bt_parse)) + exit(1); ++#endif + + if (!xen_enabled()) { + /* On 32-bit hosts, QEMU is limited by virtual address space */ +-- +2.21.0 + diff --git a/SOURCES/0007-Machine-type-related-general-changes.patch b/SOURCES/0007-Machine-type-related-general-changes.patch new file mode 100644 index 0000000..4ae3966 --- /dev/null +++ b/SOURCES/0007-Machine-type-related-general-changes.patch @@ -0,0 +1,675 @@ +From 113078b23a4747b07eb363719d7cbc0af403dd2a Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 11 Jan 2019 09:54:45 +0100 +Subject: Machine type related general changes + +This patch is first part of original "Add RHEL machine types" patch we +split to allow easier review. It contains changes not related to any +architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (4.0.0): +- Remove e1000 device duplication changes to reflect upstream solution +- Rewrite machine compat properties to upstream solution + +Rebase changes (4.1.0): +- Removed optional flag for machine compat properties (upstream) +- Remove c3e002cb chunk from hw/net/e1000.c +- Reorder compat structures +- Use one format for compat scructures +- Added compat for virtio-balloon-pci.any_layout for rhel71 + +Merged patches (4.0.0): +- d4c0957 compat: Generic HW_COMPAT_RHEL7_6 +- cbac773 virtio: Make disable-legacy/disable-modern compat properties optional + +Merged patches (4.1.0): +- 479ad30 redhat: fix cut'n'paste garbage in hw_compat comments +- f19738e compat: Generic hw_compat_rhel_8_0 + +Merged patches (4.2.0): +- 9f2bfaa machine types: Update hw_compat_rhel_8_0 from hw_compat_4_0 +- ca4a5e8 virtio: Make disable-legacy/disable-modern compat properties optional +- compat: Generic hw_compat_rhel_8_1 (patch 93040/92956) + +Signed-off-by: Danilo C. L. de Paula +--- + hw/acpi/ich9.c | 16 ++++ + hw/acpi/piix4.c | 5 +- + hw/char/serial.c | 16 ++++ + hw/core/machine.c | 170 ++++++++++++++++++++++++++++++++++++++++ + hw/display/vga-isa.c | 2 +- + hw/net/e1000e.c | 21 +++++ + hw/net/rtl8139.c | 4 +- + hw/rtc/mc146818rtc.c | 6 ++ + hw/smbios/smbios.c | 1 + + hw/timer/i8254_common.c | 2 +- + hw/usb/hcd-uhci.c | 4 +- + hw/usb/hcd-xhci.c | 20 +++++ + hw/usb/hcd-xhci.h | 2 + + include/hw/acpi/ich9.h | 3 + + include/hw/boards.h | 24 ++++++ + include/hw/usb.h | 4 + + migration/migration.c | 2 + + migration/migration.h | 5 ++ + 18 files changed, 301 insertions(+), 6 deletions(-) + +diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c +index ab203ad448..7ec26884e8 100644 +--- a/hw/acpi/ich9.c ++++ b/hw/acpi/ich9.c +@@ -444,6 +444,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) + s->pm.enable_tco = value; + } + ++static bool ich9_pm_get_force_rev1_fadt(Object *obj, Error **errp) ++{ ++ ICH9LPCState *s = ICH9_LPC_DEVICE(obj); ++ return s->pm.force_rev1_fadt; ++} ++ ++static void ich9_pm_set_force_rev1_fadt(Object *obj, bool value, Error **errp) ++{ ++ ICH9LPCState *s = ICH9_LPC_DEVICE(obj); ++ s->pm.force_rev1_fadt = value; ++} ++ + void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) + { + static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; +@@ -468,6 +480,10 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) + ich9_pm_get_cpu_hotplug_legacy, + ich9_pm_set_cpu_hotplug_legacy, + NULL); ++ object_property_add_bool(obj, "__com.redhat_force-rev1-fadt", ++ ich9_pm_get_force_rev1_fadt, ++ ich9_pm_set_force_rev1_fadt, ++ NULL); + object_property_add(obj, ACPI_PM_PROP_S3_DISABLED, "uint8", + ich9_pm_get_disable_s3, + ich9_pm_set_disable_s3, +diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c +index 93aec2dd2c..3a26193cbe 100644 +--- a/hw/acpi/piix4.c ++++ b/hw/acpi/piix4.c +@@ -274,6 +274,7 @@ static const VMStateDescription vmstate_acpi = { + .name = "piix4_pm", + .version_id = 3, + .minimum_version_id = 3, ++ .minimum_version_id = 2, + .post_load = vmstate_acpi_post_load, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), +@@ -627,8 +628,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) + + static Property piix4_pm_properties[] = { + DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 1), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1), + DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), + DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, + use_acpi_pci_hotplug, true), +diff --git a/hw/char/serial.c b/hw/char/serial.c +index b4aa250950..0012f0e44d 100644 +--- a/hw/char/serial.c ++++ b/hw/char/serial.c +@@ -34,6 +34,7 @@ + #include "sysemu/runstate.h" + #include "qemu/error-report.h" + #include "trace.h" ++#include "migration/migration.h" + + //#define DEBUG_SERIAL + +@@ -703,6 +704,9 @@ static int serial_post_load(void *opaque, int version_id) + static bool serial_thr_ipending_needed(void *opaque) + { + SerialState *s = opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } + + if (s->ier & UART_IER_THRI) { + bool expected_value = ((s->iir & UART_IIR_ID) == UART_IIR_THRI); +@@ -784,6 +788,10 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { + static bool serial_fifo_timeout_timer_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return timer_pending(s->fifo_timeout_timer); + } + +@@ -801,6 +809,10 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { + static bool serial_timeout_ipending_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return s->timeout_ipending != 0; + } + +@@ -818,6 +830,10 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { + static bool serial_poll_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return s->poll_msl >= 0; + } + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 1689ad3bf8..e0e0eec8bf 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -27,6 +27,176 @@ + #include "hw/pci/pci.h" + #include "hw/mem/nvdimm.h" + ++/* ++ * The same as hw_compat_4_1 ++ */ ++GlobalProperty hw_compat_rhel_8_1[] = { ++ /* hw_compat_rhel_8_1 from hw_compat_4_1 */ ++ { "virtio-pci", "x-pcie-flr-init", "off" }, ++}; ++const size_t hw_compat_rhel_8_1_len = G_N_ELEMENTS(hw_compat_rhel_8_1); ++ ++/* The same as hw_compat_3_1 ++ * format of array has been changed by: ++ * 6c36bddf5340 ("machine: Use shorter format for GlobalProperty arrays") ++ */ ++GlobalProperty hw_compat_rhel_8_0[] = { ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "pcie-root-port", "x-speed", "2_5" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "pcie-root-port", "x-width", "1" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "memory-backend-file", "x-use-canonical-path-for-ramblock-id", "true" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "memory-backend-memfd", "x-use-canonical-path-for-ramblock-id", "true" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "tpm-crb", "ppi", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "tpm-tis", "ppi", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-kbd", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-mouse", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-tablet", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "virtio-blk-device", "discard", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "virtio-blk-device", "write-zeroes", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "VGA", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "secondary-vga", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "bochs-display", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-vga", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-gpu-pci", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-device", "use-started", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 - that was added in 4.1 */ ++ { "pcie-root-port-base", "disable-acs", "true" }, ++}; ++const size_t hw_compat_rhel_8_0_len = G_N_ELEMENTS(hw_compat_rhel_8_0); ++ ++/* The same as hw_compat_3_0 + hw_compat_2_12 ++ * except that ++ * there's nothing in 3_0 ++ * migration.decompress-error-check=off was in 7.5 from bz 1584139 ++ */ ++GlobalProperty hw_compat_rhel_7_6[] = { ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "hda-audio", "use-timer", "false" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "cirrus-vga", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "VGA", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "vmware-svga", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "qxl-vga", "global-vmstate", "true" }, ++}; ++const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); ++ ++/* The same as hw_compat_2_11 + hw_compat_2_10 */ ++GlobalProperty hw_compat_rhel_7_5[] = { ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 */ ++ { "hpet", "hpet-offset-saved", "false" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 */ ++ { "virtio-blk-pci", "vectors", "2" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 */ ++ { "vhost-user-blk-pci", "vectors", "2" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 ++ bz 1608778 modified for our naming */ ++ { "e1000-82540em", "migrate_tso_props", "off" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_10 */ ++ { "virtio-mouse-device", "wheel-axis", "false" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_10 */ ++ { "virtio-tablet-device", "wheel-axis", "false" }, ++ { "cirrus-vga", "vgamem_mb", "16" }, ++ { "migration", "decompress-error-check", "off" }, ++}; ++const size_t hw_compat_rhel_7_5_len = G_N_ELEMENTS(hw_compat_rhel_7_5); ++ ++/* Mostly like hw_compat_2_9 except ++ * x-mtu-bypass-backend, x-migrate-msix has already been ++ * backported to RHEL7.4. shpc was already on in 7.4. ++ */ ++GlobalProperty hw_compat_rhel_7_4[] = { ++ { "intel-iommu", "pt", "off" }, ++}; ++ ++const size_t hw_compat_rhel_7_4_len = G_N_ELEMENTS(hw_compat_rhel_7_4); ++/* Mostly like HW_COMPAT_2_6 + HW_COMPAT_2_7 + HW_COMPAT_2_8 except ++ * disable-modern, disable-legacy, page-per-vq have already been ++ * backported to RHEL7.3 ++ */ ++GlobalProperty hw_compat_rhel_7_3[] = { ++ { "virtio-mmio", "format_transport_address", "off" }, ++ { "virtio-serial-device", "emergency-write", "off" }, ++ { "ioapic", "version", "0x11" }, ++ { "intel-iommu", "x-buggy-eim", "true" }, ++ { "virtio-pci", "x-ignore-backend-features", "on" }, ++ { "fw_cfg_mem", "x-file-slots", stringify(0x10) }, ++ { "fw_cfg_io", "x-file-slots", stringify(0x10) }, ++ { "pflash_cfi01", "old-multiple-chip-handling", "on" }, ++ { TYPE_PCI_DEVICE, "x-pcie-extcap-init", "off" }, ++ { "virtio-pci", "x-pcie-deverr-init", "off" }, ++ { "virtio-pci", "x-pcie-lnkctl-init", "off" }, ++ { "virtio-pci", "x-pcie-pm-init", "off" }, ++ { "virtio-net-device", "x-mtu-bypass-backend", "off" }, ++ { "e1000e", "__redhat_e1000e_7_3_intr_state", "on" }, ++}; ++const size_t hw_compat_rhel_7_3_len = G_N_ELEMENTS(hw_compat_rhel_7_3); ++ ++/* Mostly like hw_compat_2_4 + 2_3 but: ++ * we don't need "any_layout" as it has been backported to 7.2 ++ */ ++GlobalProperty hw_compat_rhel_7_2[] = { ++ { "virtio-blk-device", "scsi", "true" }, ++ { "e1000-82540em", "extra_mac_registers", "off" }, ++ { "virtio-pci", "x-disable-pcie", "on" }, ++ { "virtio-pci", "migrate-extra", "off" }, ++ { "fw_cfg_mem", "dma_enabled", "off" }, ++ { "fw_cfg_io", "dma_enabled", "off" }, ++ { "isa-fdc", "fallback", "144" }, ++ /* Optional because not all virtio-pci devices support legacy mode */ ++ { "virtio-pci", "disable-modern", "on", .optional = true }, ++ { "virtio-pci", "disable-legacy", "off", .optional = true }, ++ { TYPE_PCI_DEVICE, "x-pcie-lnksta-dllla", "off" }, ++ { "virtio-pci", "page-per-vq", "on" }, ++ /* hw_compat_rhel_7_2 - introduced with 2.10.0 */ ++ { "migration", "send-section-footer", "off" }, ++ /* hw_compat_rhel_7_2 - introduced with 2.10.0 */ ++ { "migration", "store-global-state", "off", ++ }, ++}; ++const size_t hw_compat_rhel_7_2_len = G_N_ELEMENTS(hw_compat_rhel_7_2); ++ ++/* Mostly like hw_compat_2_1 but: ++ * we don't need virtio-scsi-pci since 7.0 already had that on ++ * ++ * RH: Note, qemu-extended-regs should have been enabled in the 7.1 ++ * machine type, but was accidentally turned off in 7.2 onwards. ++ */ ++GlobalProperty hw_compat_rhel_7_1[] = { ++ { "intel-hda-generic", "old_msi_addr", "on" }, ++ { "VGA", "qemu-extended-regs", "off" }, ++ { "secondary-vga", "qemu-extended-regs", "off" }, ++ { "usb-mouse", "usb_version", stringify(1) }, ++ { "usb-kbd", "usb_version", stringify(1) }, ++ { "virtio-pci", "virtio-pci-bus-master-bug-migration", "on" }, ++ { "virtio-blk-pci", "any_layout", "off" }, ++ { "virtio-balloon-pci", "any_layout", "off" }, ++ { "virtio-serial-pci", "any_layout", "off" }, ++ { "virtio-9p-pci", "any_layout", "off" }, ++ { "virtio-rng-pci", "any_layout", "off" }, ++ /* HW_COMPAT_RHEL7_1 - introduced with 2.10.0 */ ++ { "migration", "send-configuration", "off" }, ++}; ++const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1); ++ + GlobalProperty hw_compat_4_1[] = { + { "virtio-pci", "x-pcie-flr-init", "off" }, + }; +diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c +index 873e5e9706..d1a2efe47e 100644 +--- a/hw/display/vga-isa.c ++++ b/hw/display/vga-isa.c +@@ -82,7 +82,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) + } + + static Property vga_isa_properties[] = { +- DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 8), ++ DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 16), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c +index b69fd7d8ad..d8be50a1ce 100644 +--- a/hw/net/e1000e.c ++++ b/hw/net/e1000e.c +@@ -79,6 +79,11 @@ typedef struct E1000EState { + + E1000ECore core; + ++ /* 7.3 had the intr_state field that was in the original e1000e code ++ * but that was removed prior to 2.7's release ++ */ ++ bool redhat_7_3_intr_state_enable; ++ uint32_t redhat_7_3_intr_state; + } E1000EState; + + #define E1000E_MMIO_IDX 0 +@@ -94,6 +99,10 @@ typedef struct E1000EState { + #define E1000E_MSIX_TABLE (0x0000) + #define E1000E_MSIX_PBA (0x2000) + ++/* Values as in RHEL 7.3 build and original upstream */ ++#define RH_E1000E_USE_MSI BIT(0) ++#define RH_E1000E_USE_MSIX BIT(1) ++ + static uint64_t + e1000e_mmio_read(void *opaque, hwaddr addr, unsigned size) + { +@@ -305,6 +314,8 @@ e1000e_init_msix(E1000EState *s) + } else { + if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) { + msix_uninit(d, &s->msix, &s->msix); ++ } else { ++ s->redhat_7_3_intr_state |= RH_E1000E_USE_MSIX; + } + } + } +@@ -476,6 +487,8 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) + ret = msi_init(PCI_DEVICE(s), 0xD0, 1, true, false, NULL); + if (ret) { + trace_e1000e_msi_init_fail(ret); ++ } else { ++ s->redhat_7_3_intr_state |= RH_E1000E_USE_MSI; + } + + if (e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset, +@@ -599,6 +612,11 @@ static const VMStateDescription e1000e_vmstate_intr_timer = { + VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \ + e1000e_vmstate_intr_timer, E1000IntrDelayTimer) + ++static bool rhel_7_3_check(void *opaque, int version_id) ++{ ++ return ((E1000EState *)opaque)->redhat_7_3_intr_state_enable; ++} ++ + static const VMStateDescription e1000e_vmstate = { + .name = "e1000e", + .version_id = 1, +@@ -610,6 +628,7 @@ static const VMStateDescription e1000e_vmstate = { + VMSTATE_MSIX(parent_obj, E1000EState), + + VMSTATE_UINT32(ioaddr, E1000EState), ++ VMSTATE_UINT32_TEST(redhat_7_3_intr_state, E1000EState, rhel_7_3_check), + VMSTATE_UINT32(core.rxbuf_min_shift, E1000EState), + VMSTATE_UINT8(core.rx_desc_len, E1000EState), + VMSTATE_UINT32_ARRAY(core.rxbuf_sizes, E1000EState, +@@ -658,6 +677,8 @@ static PropertyInfo e1000e_prop_disable_vnet, + + static Property e1000e_properties[] = { + DEFINE_NIC_PROPERTIES(E1000EState, conf), ++ DEFINE_PROP_BOOL("__redhat_e1000e_7_3_intr_state", E1000EState, ++ redhat_7_3_intr_state_enable, false), + DEFINE_PROP_SIGNED("disable_vnet_hdr", E1000EState, disable_vnet, false, + e1000e_prop_disable_vnet, bool), + DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, +diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c +index 88a97d756d..21d80e96cf 100644 +--- a/hw/net/rtl8139.c ++++ b/hw/net/rtl8139.c +@@ -3177,7 +3177,7 @@ static int rtl8139_pre_save(void *opaque) + + static const VMStateDescription vmstate_rtl8139 = { + .name = "rtl8139", +- .version_id = 5, ++ .version_id = 4, + .minimum_version_id = 3, + .post_load = rtl8139_post_load, + .pre_save = rtl8139_pre_save, +@@ -3258,7 +3258,9 @@ static const VMStateDescription vmstate_rtl8139 = { + VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State), + VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State), + VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State), ++#if 0 /* Disabled for Red Hat Enterprise Linux bz 1420195 */ + VMSTATE_UINT32_V(tally_counters.RxOkMul, RTL8139State, 5), ++#endif + VMSTATE_UINT16(tally_counters.TxAbt, RTL8139State), + VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), + +diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c +index 74ae74bc5c..73820517df 100644 +--- a/hw/rtc/mc146818rtc.c ++++ b/hw/rtc/mc146818rtc.c +@@ -42,6 +42,7 @@ + #include "qapi/visitor.h" + #include "exec/address-spaces.h" + #include "hw/rtc/mc146818rtc_regs.h" ++#include "migration/migration.h" + + #ifdef TARGET_I386 + #include "qapi/qapi-commands-misc-target.h" +@@ -820,6 +821,11 @@ static int rtc_post_load(void *opaque, int version_id) + static bool rtc_irq_reinject_on_ack_count_needed(void *opaque) + { + RTCState *s = (RTCState *)opaque; ++ ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return s->irq_reinject_on_ack_count != 0; + } + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 11d476c4a2..e6e9355384 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -777,6 +777,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type1.product, product); + SMBIOS_SET_DEFAULT(type1.version, version); ++ SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); + SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type2.product, product); + SMBIOS_SET_DEFAULT(type2.version, version); +diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c +index 050875b497..32935da46c 100644 +--- a/hw/timer/i8254_common.c ++++ b/hw/timer/i8254_common.c +@@ -231,7 +231,7 @@ static const VMStateDescription vmstate_pit_common = { + .pre_save = pit_dispatch_pre_save, + .post_load = pit_dispatch_post_load, + .fields = (VMStateField[]) { +- VMSTATE_UINT32_V(channels[0].irq_disabled, PITCommonState, 3), ++ VMSTATE_UINT32(channels[0].irq_disabled, PITCommonState), /* qemu-kvm's v2 had 'flags' here */ + VMSTATE_STRUCT_ARRAY(channels, PITCommonState, 3, 2, + vmstate_pit_channel, PITChannelState), + VMSTATE_INT64(channels[0].next_transition_time, +diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c +index 23507ad3b5..9fd87a7ad9 100644 +--- a/hw/usb/hcd-uhci.c ++++ b/hw/usb/hcd-uhci.c +@@ -1219,12 +1219,14 @@ static void usb_uhci_common_realize(PCIDevice *dev, Error **errp) + UHCIState *s = UHCI(dev); + uint8_t *pci_conf = s->dev.config; + int i; ++ int irq_pin; + + pci_conf[PCI_CLASS_PROG] = 0x00; + /* TODO: reset value should be 0. */ + pci_conf[USB_SBRN] = USB_RELEASE_1; // release number + +- pci_config_set_interrupt_pin(pci_conf, u->info.irq_pin + 1); ++ irq_pin = u->info.irq_pin; ++ pci_config_set_interrupt_pin(pci_conf, irq_pin + 1); + + if (s->masterbus) { + USBPort *ports[NB_PORTS]; +diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c +index 80988bb305..8fed2eedd6 100644 +--- a/hw/usb/hcd-xhci.c ++++ b/hw/usb/hcd-xhci.c +@@ -3590,9 +3590,27 @@ static const VMStateDescription vmstate_xhci_slot = { + } + }; + ++static int xhci_event_pre_save(void *opaque) ++{ ++ XHCIEvent *s = opaque; ++ ++ s->cve_2014_5263_a = ((uint8_t *)&s->type)[0]; ++ s->cve_2014_5263_b = ((uint8_t *)&s->type)[1]; ++ ++ return 0; ++} ++ ++bool migrate_cve_2014_5263_xhci_fields; ++ ++static bool xhci_event_cve_2014_5263(void *opaque, int version_id) ++{ ++ return migrate_cve_2014_5263_xhci_fields; ++} ++ + static const VMStateDescription vmstate_xhci_event = { + .name = "xhci-event", + .version_id = 1, ++ .pre_save = xhci_event_pre_save, + .fields = (VMStateField[]) { + VMSTATE_UINT32(type, XHCIEvent), + VMSTATE_UINT32(ccode, XHCIEvent), +@@ -3601,6 +3619,8 @@ static const VMStateDescription vmstate_xhci_event = { + VMSTATE_UINT32(flags, XHCIEvent), + VMSTATE_UINT8(slotid, XHCIEvent), + VMSTATE_UINT8(epid, XHCIEvent), ++ VMSTATE_UINT8_TEST(cve_2014_5263_a, XHCIEvent, xhci_event_cve_2014_5263), ++ VMSTATE_UINT8_TEST(cve_2014_5263_b, XHCIEvent, xhci_event_cve_2014_5263), + VMSTATE_END_OF_LIST() + } + }; +diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h +index 2fad4df2a7..f554b671e3 100644 +--- a/hw/usb/hcd-xhci.h ++++ b/hw/usb/hcd-xhci.h +@@ -157,6 +157,8 @@ typedef struct XHCIEvent { + uint32_t flags; + uint8_t slotid; + uint8_t epid; ++ uint8_t cve_2014_5263_a; ++ uint8_t cve_2014_5263_b; + } XHCIEvent; + + typedef struct XHCIInterrupter { +diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h +index 41568d1837..1a23ccc412 100644 +--- a/include/hw/acpi/ich9.h ++++ b/include/hw/acpi/ich9.h +@@ -61,6 +61,9 @@ typedef struct ICH9LPCPMRegs { + uint8_t smm_enabled; + bool enable_tco; + TCOIORegs tco_regs; ++ ++ /* RH addition, see bz 1489800 */ ++ bool force_rev1_fadt; + } ICH9LPCPMRegs; + + #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" +diff --git a/include/hw/boards.h b/include/hw/boards.h +index de45087f34..6f85a0e032 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -377,4 +377,28 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_8_1[]; ++extern const size_t hw_compat_rhel_8_1_len; ++ ++extern GlobalProperty hw_compat_rhel_8_0[]; ++extern const size_t hw_compat_rhel_8_0_len; ++ ++extern GlobalProperty hw_compat_rhel_7_6[]; ++extern const size_t hw_compat_rhel_7_6_len; ++ ++extern GlobalProperty hw_compat_rhel_7_5[]; ++extern const size_t hw_compat_rhel_7_5_len; ++ ++extern GlobalProperty hw_compat_rhel_7_4[]; ++extern const size_t hw_compat_rhel_7_4_len; ++ ++extern GlobalProperty hw_compat_rhel_7_3[]; ++extern const size_t hw_compat_rhel_7_3_len; ++ ++extern GlobalProperty hw_compat_rhel_7_2[]; ++extern const size_t hw_compat_rhel_7_2_len; ++ ++extern GlobalProperty hw_compat_rhel_7_1[]; ++extern const size_t hw_compat_rhel_7_1_len; ++ + #endif +diff --git a/include/hw/usb.h b/include/hw/usb.h +index c24d968a19..b353438ea0 100644 +--- a/include/hw/usb.h ++++ b/include/hw/usb.h +@@ -605,4 +605,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, + uint8_t interface_class, uint8_t interface_subclass, + uint8_t interface_protocol); + ++ ++/* hcd-xhci.c -- rhel7.0.0 machine type compatibility */ ++extern bool migrate_cve_2014_5263_xhci_fields; ++ + #endif +diff --git a/migration/migration.c b/migration/migration.c +index 354ad072fa..30c53c623b 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -121,6 +121,8 @@ enum mig_rp_message_type { + MIG_RP_MSG_MAX + }; + ++bool migrate_pre_2_2; ++ + /* When we add fault tolerance, we could have several + migrations at once. For now we don't need to add + dynamic creation of migration */ +diff --git a/migration/migration.h b/migration/migration.h +index 79b3dda146..0b1b0d4df5 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -335,6 +335,11 @@ void init_dirty_bitmap_incoming_migration(void); + void migrate_add_address(SocketAddress *address); + + int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); ++/* ++ * Disables a load of subsections that were added in 2.2/rh7.2 for backwards ++ * migration compatibility. ++ */ ++extern bool migrate_pre_2_2; + + #define qemu_ram_foreach_block \ + #warning "Use foreach_not_ignored_block in migration code" +-- +2.21.0 + diff --git a/SOURCES/0008-Add-aarch64-machine-types.patch b/SOURCES/0008-Add-aarch64-machine-types.patch new file mode 100644 index 0000000..5397c8b --- /dev/null +++ b/SOURCES/0008-Add-aarch64-machine-types.patch @@ -0,0 +1,276 @@ +From 49164264d9928f73961acbbe4d56d8dfa23d8099 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 12:53:31 +0200 +Subject: Add aarch64 machine types + +Adding changes to add RHEL machine types for aarch64 architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (4.0.0): +- Use upstream compat handling + +Rebase changes (4.1.0-rc0): +- Removed a15memmap (upstream) +- Use virt_flash_create in rhel800_virt_instance_init + +Rebase changes (4.2.0-rc0): +- Set numa_mem_supported + +Rebase notes (4.2.0-rc3): +- aarch64: Add virt-rhel8.2.0 machine type for ARM (patch 92246) +- aarch64: virt: Allow more than 1TB of RAM (patch 92249) +- aarch64: virt: Allow PCDIMM instantiation (patch 92247) +- aarch64: virt: Enhance the comment related to gic-version (patch 92248) + +Merged patches (4.0.0): +- 7bfdb4c aarch64: Add virt-rhel8.0.0 machine type for ARM +- 3433e69 aarch64: Set virt-rhel8.0.0 max_cpus to 512 +- 4d20863 aarch64: Use 256MB ECAM region by default + +Merged patches (4.1.0): +- c3e39ef aarch64: Add virt-rhel8.1.0 machine type for ARM +- 59a46d1 aarch64: Allow ARM VIRT iommu option in RHEL8.1 machine + +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/virt.c | 161 +++++++++++++++++++++++++++++++++++++++++- + include/hw/arm/virt.h | 11 +++ + 2 files changed, 171 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index d4bedc2607..e10839100e 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -72,6 +72,7 @@ + #include "hw/mem/nvdimm.h" + #include "hw/acpi/generic_event_device.h" + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ + static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ + void *data) \ +@@ -98,7 +99,49 @@ + DEFINE_VIRT_MACHINE_LATEST(major, minor, true) + #define DEFINE_VIRT_MACHINE(major, minor) \ + DEFINE_VIRT_MACHINE_LATEST(major, minor, false) +- ++#endif /* disabled for RHEL */ ++ ++#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \ ++ static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \ ++ void *data) \ ++ { \ ++ MachineClass *mc = MACHINE_CLASS(oc); \ ++ rhel##m##n##s##_virt_options(mc); \ ++ mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \ ++ if (latest) { \ ++ mc->alias = "virt"; \ ++ mc->is_default = 1; \ ++ } \ ++ } \ ++ static const TypeInfo rhel##m##n##s##_machvirt_info = { \ ++ .name = MACHINE_TYPE_NAME("virt-rhel" # m "." # n "." # s), \ ++ .parent = TYPE_RHEL_MACHINE, \ ++ .instance_init = rhel##m##n##s##_virt_instance_init, \ ++ .class_init = rhel##m##n##s##_virt_class_init, \ ++ }; \ ++ static void rhel##m##n##s##_machvirt_init(void) \ ++ { \ ++ type_register_static(&rhel##m##n##s##_machvirt_info); \ ++ } \ ++ type_init(rhel##m##n##s##_machvirt_init); ++ ++#define DEFINE_RHEL_MACHINE_AS_LATEST(major, minor, subminor) \ ++ DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, true) ++#define DEFINE_RHEL_MACHINE(major, minor, subminor) \ ++ DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false) ++ ++/* This variable is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. ++ */ ++GlobalProperty arm_rhel_compat[] = { ++ { ++ .driver = "virtio-net-pci", ++ .property = "romfile", ++ .value = "", ++ }, ++}; ++const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); + + /* Number of external interrupt lines to configure the GIC with */ + #define NUM_IRQS 256 +@@ -1763,6 +1806,7 @@ static void machvirt_init(MachineState *machine) + qemu_add_machine_init_done_notifier(&vms->machine_done); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_secure(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -1791,6 +1835,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) + vms->virt = value; + } + ++#endif /* disabled for RHEL */ + static bool virt_get_highmem(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2022,6 +2067,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) + return requested_pa_size > 40 ? requested_pa_size : 0; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void virt_machine_class_init(ObjectClass *oc, void *data) + { + MachineClass *mc = MACHINE_CLASS(oc); +@@ -2258,3 +2304,116 @@ static void virt_machine_2_6_options(MachineClass *mc) + vmc->no_pmu = true; + } + DEFINE_VIRT_MACHINE(2, 6) ++#endif /* disabled for RHEL */ ++ ++static void rhel_machine_class_init(ObjectClass *oc, void *data) ++{ ++ MachineClass *mc = MACHINE_CLASS(oc); ++ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); ++ ++ mc->family = "virt-rhel-Z"; ++ mc->init = machvirt_init; ++ /* Start with max_cpus set to 512, which is the maximum supported by KVM. ++ * The value may be reduced later when we have more information about the ++ * configuration of the particular instance. ++ */ ++ mc->max_cpus = 512; ++ mc->block_default_type = IF_VIRTIO; ++ mc->no_cdrom = 1; ++ mc->pci_allow_0_address = true; ++ /* We know we will never create a pre-ARMv7 CPU which needs 1K pages */ ++ mc->minimum_page_bits = 12; ++ mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids; ++ mc->cpu_index_to_instance_props = virt_cpu_index_to_props; ++ mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a57"); ++ mc->get_default_cpu_node_id = virt_get_default_cpu_node_id; ++ mc->kvm_type = virt_kvm_type; ++ assert(!mc->get_hotplug_handler); ++ mc->get_hotplug_handler = virt_machine_get_hotplug_handler; ++ hc->pre_plug = virt_machine_device_pre_plug_cb; ++ hc->plug = virt_machine_device_plug_cb; ++ hc->unplug_request = virt_machine_device_unplug_request_cb; ++ mc->numa_mem_supported = true; ++ mc->auto_enable_numa_with_memhp = true; ++} ++ ++static const TypeInfo rhel_machine_info = { ++ .name = TYPE_RHEL_MACHINE, ++ .parent = TYPE_MACHINE, ++ .abstract = true, ++ .instance_size = sizeof(VirtMachineState), ++ .class_size = sizeof(VirtMachineClass), ++ .class_init = rhel_machine_class_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_HOTPLUG_HANDLER }, ++ { } ++ }, ++}; ++ ++static void rhel_machine_init(void) ++{ ++ type_register_static(&rhel_machine_info); ++} ++type_init(rhel_machine_init); ++ ++static void rhel820_virt_instance_init(Object *obj) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); ++ ++ /* EL3 is disabled by default and non-configurable for RHEL */ ++ vms->secure = false; ++ /* EL2 is disabled by default and non-configurable for RHEL */ ++ vms->virt = false; ++ /* High memory is enabled by default for RHEL */ ++ vms->highmem = true; ++ object_property_add_bool(obj, "highmem", virt_get_highmem, ++ virt_set_highmem, NULL); ++ object_property_set_description(obj, "highmem", ++ "Set on/off to enable/disable using " ++ "physical address space above 32 bits", ++ NULL); ++ /* ++ * Default GIC type is still v2, but became configurable for RHEL. We ++ * keep v2 instead of max as TCG CI test cases require an MSI controller ++ * and there is no userspace ITS MSI emulation available. ++ */ ++ vms->gic_version = 2; ++ object_property_add_str(obj, "gic-version", virt_get_gic_version, ++ virt_set_gic_version, NULL); ++ object_property_set_description(obj, "gic-version", ++ "Set GIC version. " ++ "Valid values are 2, 3 and host", NULL); ++ ++ vms->highmem_ecam = !vmc->no_highmem_ecam; ++ ++ if (vmc->no_its) { ++ vms->its = false; ++ } else { ++ /* Default allows ITS instantiation */ ++ vms->its = true; ++ object_property_add_bool(obj, "its", virt_get_its, ++ virt_set_its, NULL); ++ object_property_set_description(obj, "its", ++ "Set on/off to enable/disable " ++ "ITS instantiation", ++ NULL); ++ } ++ ++ /* Default disallows iommu instantiation */ ++ vms->iommu = VIRT_IOMMU_NONE; ++ object_property_add_str(obj, "iommu", virt_get_iommu, virt_set_iommu, NULL); ++ object_property_set_description(obj, "iommu", ++ "Set the IOMMU type. " ++ "Valid values are none and smmuv3", ++ NULL); ++ ++ vms->irqmap=a15irqmap; ++ virt_flash_create(vms); ++} ++ ++static void rhel820_virt_options(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(8, 2, 0) +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 0b41083e9d..53fdf16563 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -142,6 +142,7 @@ typedef struct { + + #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) + ++#if 0 /* disabled for Red Hat Enterprise Linux */ + #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") + #define VIRT_MACHINE(obj) \ + OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE) +@@ -150,6 +151,16 @@ typedef struct { + #define VIRT_MACHINE_CLASS(klass) \ + OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE) + ++#else ++#define TYPE_RHEL_MACHINE MACHINE_TYPE_NAME("virt-rhel") ++#define VIRT_MACHINE(obj) \ ++ OBJECT_CHECK(VirtMachineState, (obj), TYPE_RHEL_MACHINE) ++#define VIRT_MACHINE_GET_CLASS(obj) \ ++ OBJECT_GET_CLASS(VirtMachineClass, obj, TYPE_RHEL_MACHINE) ++#define VIRT_MACHINE_CLASS(klass) \ ++ OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_RHEL_MACHINE) ++#endif ++ + void virt_acpi_setup(VirtMachineState *vms); + + /* Return the number of used redistributor regions */ +-- +2.21.0 + diff --git a/SOURCES/0009-Add-ppc64-machine-types.patch b/SOURCES/0009-Add-ppc64-machine-types.patch new file mode 100644 index 0000000..a3f1a54 --- /dev/null +++ b/SOURCES/0009-Add-ppc64-machine-types.patch @@ -0,0 +1,463 @@ +From 136eae41007e2e5b0d693cc656f3ec36cbabf16f Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:27:13 +0200 +Subject: Add ppc64 machine types + +Adding changes to add RHEL machine types for ppc64 architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (4.0.0): +- remove instance options and use upstream solution +- Use upstream compat handling +- Replace SPAPR_PCI_2_7_MMIO_WIN_SIZE with value (changed upstream) +- re-add handling of instance_options (removed upstream) +- Use p8 as default for rhel machine types (p9 default upstream) +- sPAPRMachineClass renamed to SpaprMachineClass (upstream) + +Rebase changes (4.1.0): +- Update format for compat structures + +Merged patches (4.0.0): +- 467d59a redhat: define pseries-rhel8.0.0 machine type + +Merged patches (4.1.0): +- f21757edc target/ppc/spapr: Enable mitigations by default for pseries-4.0 machine type +- 2511c63 redhat: sync pseries-rhel7.6.0 with rhel-av-8.0.1 +- 89f01da redhat: define pseries-rhel8.1.0 machine type + +Merged patches (4.2.0): +- bcba728 redhat: update pseries-rhel8.1.0 machine type +- redhat: update pseries-rhel-7.6.0 machine type (patch 93039) +- redhat: define pseries-rhel8.2.0 machine type (patch 93041) + +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 278 ++++++++++++++++++++++++++++++++++++++++ + hw/ppc/spapr_cpu_core.c | 13 ++ + include/hw/ppc/spapr.h | 1 + + target/ppc/compat.c | 13 +- + target/ppc/cpu.h | 1 + + 5 files changed, 305 insertions(+), 1 deletion(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index e076f6023c..8749c72066 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -4447,6 +4447,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) + smc->linux_pci_probe = true; + smc->smp_threads_vsmt = true; + smc->nr_xirqs = SPAPR_NR_XIRQS; ++ smc->has_power9_support = true; + } + + static const TypeInfo spapr_machine_info = { +@@ -4491,6 +4492,7 @@ static const TypeInfo spapr_machine_info = { + } \ + type_init(spapr_machine_register_##suffix) + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* + * pseries-4.2 + */ +@@ -4520,6 +4522,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) + } + + DEFINE_SPAPR_MACHINE(4_1, "4.1", false); ++#endif + + /* + * pseries-4.0 +@@ -4536,6 +4539,7 @@ static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, + *nv2atsd = 0; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void spapr_machine_4_0_class_options(MachineClass *mc) + { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); +@@ -4695,6 +4699,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); + /* + * pseries-2.7 + */ ++#endif + + static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, +@@ -4749,6 +4754,7 @@ static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, + *nv2atsd = 0; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void spapr_machine_2_7_class_options(MachineClass *mc) + { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); +@@ -4863,6 +4869,278 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) + compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); + } + DEFINE_SPAPR_MACHINE(2_1, "2.1", false); ++#endif ++ ++/* ++ * pseries-rhel8.2.0 ++ */ ++ ++static void spapr_machine_rhel820_class_options(MachineClass *mc) ++{ ++ /* Defaults for the latest behaviour inherited from the base class */ ++} ++ ++DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", true); ++ ++/* ++ * pseries-rhel8.1.0 ++ * like pseries-4.1 ++ */ ++ ++static void spapr_machine_rhel810_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ static GlobalProperty compat[] = { ++ /* Only allow 4kiB and 64kiB IOMMU pagesizes */ ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pgsz", "0x11000" }, ++ }; ++ ++ spapr_machine_rhel820_class_options(mc); ++ ++ /* from pseries-4.1 */ ++ smc->linux_pci_probe = false; ++ smc->smp_threads_vsmt = false; ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_1, ++ hw_compat_rhel_8_1_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); ++ ++} ++ ++DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", false); ++ ++/* ++ * pseries-rhel8.0.0 ++ * like pseries-3.1 and pseries-4.0 ++ * except SPAPR_CAP_CFPC, SPAPR_CAP_SBBC and SPAPR_CAP_IBS ++ * that have been backported to pseries-rhel8.0.0 ++ */ ++ ++static void spapr_machine_rhel800_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel810_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_0, ++ hw_compat_rhel_8_0_len); ++ ++ /* pseries-4.0 */ ++ smc->phb_placement = phb_placement_4_0; ++ smc->irq = &spapr_irq_xics; ++ smc->pre_4_1_migration = true; ++ ++ /* pseries-3.1 */ ++ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); ++ smc->update_dt_enabled = false; ++ smc->dr_phb_enabled = false; ++ smc->broken_host_serial_model = true; ++ smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel800, "rhel8.0.0", false); ++ ++/* ++ * pseries-rhel7.6.0 ++ * like spapr_compat_2_12 and spapr_compat_3_0 ++ * spapr_compat_0 is empty ++ */ ++GlobalProperty spapr_compat_rhel7_6[] = { ++ { TYPE_POWERPC_CPU, "pre-3.0-migration", "on" }, ++ { TYPE_SPAPR_CPU_CORE, "pre-3.0-migration", "on" }, ++}; ++const size_t spapr_compat_rhel7_6_len = G_N_ELEMENTS(spapr_compat_rhel7_6); ++ ++ ++static void spapr_machine_rhel760_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel800_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_6, spapr_compat_rhel7_6_len); ++ ++ /* from spapr_machine_3_0_class_options() */ ++ smc->legacy_irq_allocation = true; ++ smc->nr_xirqs = 0x400; ++ smc->irq = &spapr_irq_xics_legacy; ++ ++ /* from spapr_machine_2_12_class_options() */ ++ /* We depend on kvm_enabled() to choose a default value for the ++ * hpt-max-page-size capability. Of course we can't do it here ++ * because this is too early and the HW accelerator isn't initialzed ++ * yet. Postpone this to machine init (see default_caps_with_cpu()). ++ */ ++ smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 0; ++ ++ /* SPAPR_CAP_WORKAROUND enabled in pseries-rhel800 by ++ * f21757edc554 ++ * "Enable mitigations by default for pseries-4.0 machine type") ++ */ ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_BROKEN; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel760, "rhel7.6.0", false); ++ ++/* ++ * pseries-rhel7.6.0-sxxm ++ * ++ * pseries-rhel7.6.0 with speculative execution exploit mitigations enabled by default ++ */ ++ ++static void spapr_machine_rhel760sxxm_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel760_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel760sxxm, "rhel7.6.0-sxxm", false); ++ ++static void spapr_machine_rhel750_class_options(MachineClass *mc) ++{ ++ spapr_machine_rhel760_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ ++} ++ ++DEFINE_SPAPR_MACHINE(rhel750, "rhel7.5.0", false); ++ ++/* ++ * pseries-rhel7.5.0-sxxm ++ * ++ * pseries-rhel7.5.0 with speculative execution exploit mitigations enabled by default ++ */ ++ ++static void spapr_machine_rhel750sxxm_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel750_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel750sxxm, "rhel7.5.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.4.0 ++ * like spapr_compat_2_9 ++ */ ++GlobalProperty spapr_compat_rhel7_4[] = { ++ { TYPE_POWERPC_CPU, "pre-2.10-migration", "on" }, ++}; ++const size_t spapr_compat_rhel7_4_len = G_N_ELEMENTS(spapr_compat_rhel7_4); ++ ++static void spapr_machine_rhel740_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel750_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_4, spapr_compat_rhel7_4_len); ++ mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram; ++ smc->has_power9_support = false; ++ smc->pre_2_10_has_unused_icps = true; ++ smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED; ++ smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_ON; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel740, "rhel7.4.0", false); ++ ++/* ++ * pseries-rhel7.4.0-sxxm ++ * ++ * pseries-rhel7.4.0 with speculative execution exploit mitigations enabled by default ++ */ ++ ++static void spapr_machine_rhel740sxxm_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel740_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel740sxxm, "rhel7.4.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.3.0 ++ * like spapr_compat_2_6/_2_7/_2_8 but "ddw" has been backported to RHEL7_3 ++ */ ++GlobalProperty spapr_compat_rhel7_3[] = { ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem_win_size", "0xf80000000" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem64_win_size", "0" }, ++ { TYPE_POWERPC_CPU, "pre-2.8-migration", "on" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-2.8-migration", "on" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pcie-extended-configuration-space", "off" }, ++}; ++const size_t spapr_compat_rhel7_3_len = G_N_ELEMENTS(spapr_compat_rhel7_3); ++ ++static void spapr_machine_rhel730_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel740_class_options(mc); ++ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power7_v2.3"); ++ mc->default_machine_opts = "modern-hotplug-events=off"; ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_3, spapr_compat_rhel7_3_len); ++ ++ smc->phb_placement = phb_placement_2_7; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel730, "rhel7.3.0", false); ++ ++/* ++ * pseries-rhel7.3.0-sxxm ++ * ++ * pseries-rhel7.3.0 with speculative execution exploit mitigations enabled by default ++ */ ++ ++static void spapr_machine_rhel730sxxm_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel730_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel730sxxm, "rhel7.3.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.2.0 ++ */ ++/* Should be like spapr_compat_2_5 + 2_4 + 2_3, but "dynamic-reconfiguration" ++ * has been backported to RHEL7_2 so we don't need it here. ++ */ ++ ++GlobalProperty spapr_compat_rhel7_2[] = { ++ { "spapr-vlan", "use-rx-buffer-pools", "off" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "ddw", "off" }, ++}; ++const size_t spapr_compat_rhel7_2_len = G_N_ELEMENTS(spapr_compat_rhel7_2); ++ ++static void spapr_machine_rhel720_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel730_class_options(mc); ++ smc->use_ohci_by_default = true; ++ mc->has_hotpluggable_cpus = NULL; ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_2, hw_compat_rhel_7_2_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_2, spapr_compat_rhel7_2_len); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel720, "rhel7.2.0", false); + + static void spapr_machine_register_types(void) + { +diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c +index 301cd7b4e4..ba5a8fb82b 100644 +--- a/hw/ppc/spapr_cpu_core.c ++++ b/hw/ppc/spapr_cpu_core.c +@@ -24,6 +24,7 @@ + #include "sysemu/reset.h" + #include "sysemu/hw_accel.h" + #include "qemu/error-report.h" ++#include "cpu-models.h" + + static void spapr_reset_vcpu(PowerPCCPU *cpu) + { +@@ -242,6 +243,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, + CPUPPCState *env = &cpu->env; + CPUState *cs = CPU(cpu); + Error *local_err = NULL; ++ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + + object_property_set_bool(OBJECT(cpu), true, "realized", &local_err); + if (local_err) { +@@ -254,6 +256,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, + cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); + kvmppc_set_papr(cpu); + ++ if (!smc->has_power9_support && ++ (((spapr->max_compat_pvr && ++ ppc_compat_cmp(spapr->max_compat_pvr, ++ CPU_POWERPC_LOGICAL_3_00) >= 0)) || ++ (!spapr->max_compat_pvr && ++ ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, 0)))) { ++ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, ++ "POWER9 CPU is not supported by this machine class"); ++ return; ++ } ++ + if (spapr_irq_cpu_intc_create(spapr, cpu, &local_err) < 0) { + goto error_intc_create; + } +diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h +index d5ab5ea7b2..aa89cc4a95 100644 +--- a/include/hw/ppc/spapr.h ++++ b/include/hw/ppc/spapr.h +@@ -125,6 +125,7 @@ struct SpaprMachineClass { + bool linux_pci_probe; + bool smp_threads_vsmt; /* set VSMT to smp_threads by default */ + ++ bool has_power9_support; + void (*phb_placement)(SpaprMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, + hwaddr *mmio32, hwaddr *mmio64, +diff --git a/target/ppc/compat.c b/target/ppc/compat.c +index 7de4bf3122..3e2e35342d 100644 +--- a/target/ppc/compat.c ++++ b/target/ppc/compat.c +@@ -105,8 +105,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) + return NULL; + } + ++long ppc_compat_cmp(uint32_t pvr1, uint32_t pvr2) ++{ ++ const CompatInfo *compat1 = compat_by_pvr(pvr1); ++ const CompatInfo *compat2 = compat_by_pvr(pvr2); ++ ++ g_assert(compat1); ++ g_assert(compat2); ++ ++ return compat1 - compat2; ++} ++ + static bool pcc_compat(PowerPCCPUClass *pcc, uint32_t compat_pvr, +- uint32_t min_compat_pvr, uint32_t max_compat_pvr) ++ uint32_t min_compat_pvr, uint32_t max_compat_pvr) + { + const CompatInfo *compat = compat_by_pvr(compat_pvr); + const CompatInfo *min = compat_by_pvr(min_compat_pvr); +diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h +index e3e82327b7..5c53801cfd 100644 +--- a/target/ppc/cpu.h ++++ b/target/ppc/cpu.h +@@ -1367,6 +1367,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) + + /* Compatibility modes */ + #if defined(TARGET_PPC64) ++long ppc_compat_cmp(uint32_t pvr1, uint32_t pvr2); + bool ppc_check_compat(PowerPCCPU *cpu, uint32_t compat_pvr, + uint32_t min_compat_pvr, uint32_t max_compat_pvr); + bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, +-- +2.21.0 + diff --git a/SOURCES/0010-Add-s390x-machine-types.patch b/SOURCES/0010-Add-s390x-machine-types.patch new file mode 100644 index 0000000..d0f6669 --- /dev/null +++ b/SOURCES/0010-Add-s390x-machine-types.patch @@ -0,0 +1,126 @@ +From 0842700b3a01891c316e9169fa651f26714cafa5 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:47:32 +0200 +Subject: Add s390x machine types + +Adding changes to add RHEL machine types for s390x architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (weekly-4.1.0): +- Use upstream compat handling + +Merged patches (3.1.0): +- 29df663 s390x/cpumodel: default enable bpb and ppa15 for z196 and later + +Merged patches (4.1.0): +- 6c200d665b hw/s390x/s390-virtio-ccw: Add machine types for RHEL8.0.0 + +Merged patches (4.2.0): +- fb192e5 redhat: s390x: Rename s390-ccw-virtio-rhel8.0.0 to s390-ccw-virtio-rhel8.1.0 +- a9b22e8 redhat: s390x: Add proper compatibility options for the -rhel7.6.0 machine +- hw/s390x: Add the s390-ccw-virtio-rhel8.2.0 machine types (patch 92954) + +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/s390-virtio-ccw.c | 70 +++++++++++++++++++++++++++++++++++++- + 1 file changed, 69 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index d3edeef0ad..c2c83d2fce 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -615,7 +615,7 @@ bool css_migration_enabled(void) + { \ + MachineClass *mc = MACHINE_CLASS(oc); \ + ccw_machine_##suffix##_class_options(mc); \ +- mc->desc = "VirtIO-ccw based S390 machine v" verstr; \ ++ mc->desc = "VirtIO-ccw based S390 machine " verstr; \ + if (latest) { \ + mc->alias = "s390-ccw-virtio"; \ + mc->is_default = 1; \ +@@ -639,6 +639,7 @@ bool css_migration_enabled(void) + } \ + type_init(ccw_machine_register_##suffix) + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void ccw_machine_4_2_instance_options(MachineState *machine) + { + } +@@ -866,6 +867,73 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + } + DEFINE_CCW_MACHINE(2_4, "2.4", false); ++#endif ++ ++static void ccw_machine_rhel820_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel820_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel820, "rhel8.2.0", true); ++ ++static void ccw_machine_rhel760_instance_options(MachineState *machine) ++{ ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V3_1 }; ++ ++ ccw_machine_rhel820_instance_options(machine); ++ ++ s390_set_qemu_cpu_model(0x2827, 12, 2, qemu_cpu_feat); ++ ++ /* The multiple-epoch facility was not available with rhel7.6.0 on z14GA1 */ ++ s390_cpudef_featoff(14, 1, S390_FEAT_MULTIPLE_EPOCH); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_QSIE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_QTOUE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_STOE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_STOUE); ++} ++ ++static void ccw_machine_rhel760_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel820_class_options(mc); ++ /* We never published the s390x version of RHEL-AV 8.0 and 8.1, so add this here */ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++} ++DEFINE_CCW_MACHINE(rhel760, "rhel7.6.0", false); ++ ++static void ccw_machine_rhel750_instance_options(MachineState *machine) ++{ ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V2_11 }; ++ ccw_machine_rhel760_instance_options(machine); ++ ++ /* before 2.12 we emulated the very first z900, and RHEL 7.5 is ++ based on 2.10 */ ++ s390_set_qemu_cpu_model(0x2064, 7, 1, qemu_cpu_feat); ++ ++ /* bpb and ppa15 were only in the full model in RHEL 7.5 */ ++ s390_cpudef_featoff_greater(11, 1, S390_FEAT_PPA15); ++ s390_cpudef_featoff_greater(11, 1, S390_FEAT_BPB); ++} ++ ++GlobalProperty ccw_compat_rhel_7_5[] = { ++ { ++ .driver = TYPE_SCLP_EVENT_FACILITY, ++ .property = "allow_all_mask_sizes", ++ .value = "off", ++ }, ++}; ++const size_t ccw_compat_rhel_7_5_len = G_N_ELEMENTS(ccw_compat_rhel_7_5); ++ ++static void ccw_machine_rhel750_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel760_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ compat_props_add(mc->compat_props, ccw_compat_rhel_7_5, ccw_compat_rhel_7_5_len); ++ S390_MACHINE_CLASS(mc)->hpage_1m_allowed = false; ++} ++DEFINE_CCW_MACHINE(rhel750, "rhel7.5.0", false); + + static void ccw_machine_register_types(void) + { +-- +2.21.0 + diff --git a/SOURCES/0011-Add-x86_64-machine-types.patch b/SOURCES/0011-Add-x86_64-machine-types.patch new file mode 100644 index 0000000..72a5159 --- /dev/null +++ b/SOURCES/0011-Add-x86_64-machine-types.patch @@ -0,0 +1,897 @@ +From 2ebaeca6e26950f401a8169d1324be2bafd11741 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:10:31 +0200 +Subject: Add x86_64 machine types + +Adding changes to add RHEL machine types for x86_64 architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (qemu-4.0.0): +- Use upstream compat handling + +Rebase notes (3.1.0): +- Removed xsave changes + +Rebase notes (4.1.0): +- Updated format for compat structures + +Rebase notes (4.2.0-rc2): +- Use X86MachineClass for save_tsc_khz (upstream change) + +Merged patches (4.1.0): +- f4dc802 pc: 7.5 compat entries +- 456ed3e pc: PC_RHEL7_6_COMPAT +- 04119ee pc: Add compat for pc-i440fx-rhel7.6.0 machine type +- b3b3687 pc: Add pc-q35-8.0.0 machine type +- 8d46fc6 pc: Add x-migrate-smi-count=off to PC_RHEL7_6_COMPAT +- 1de7949 kvm: clear out KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT for older machine types +- 18cf0d7 target/i386: Disable MPX support on named CPU models (partialy) +- 2660667 rhel: Set host-phys-bits-limit=48 on rhel machine-types + +Merged patches (4.2.0): +- 7d5c2ef pc: Don't make die-id mandatory unless necessary +- e42808c x86 machine types: pc_rhel_8_0_compat +- 9de83a8 x86 machine types: q35: Fixup units_per_default_bus +- 6df1559 x86 machine types: Fixup dynamic sysbus entries +- 0784125 x86 machine types: add pc-q35-rhel8.1.0 +- machines/x86: Add rhel 8.2 machine type (patch 92959) + +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/acpi-build.c | 3 + + hw/i386/pc.c | 263 ++++++++++++++++++++++++++++++++++++++++++- + hw/i386/pc_piix.c | 210 +++++++++++++++++++++++++++++++++- + hw/i386/pc_q35.c | 156 ++++++++++++++++++++++++- + include/hw/boards.h | 2 + + include/hw/i386/pc.h | 33 ++++++ + target/i386/cpu.c | 9 +- + target/i386/kvm.c | 4 + + 8 files changed, 673 insertions(+), 7 deletions(-) + +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index 12ff55fcfb..64001893ab 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -204,6 +204,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) + pm->fadt.reset_reg = r; + pm->fadt.reset_val = 0xf; + pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; ++ if (object_property_get_bool(lpc, ++ "__com.redhat_force-rev1-fadt", NULL)) ++ pm->fadt.rev = 1; + pm->cpu_hp_io_base = ICH9_CPU_HOTPLUG_IO_BASE; + } + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index ac08e63604..61e70e4811 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -344,6 +344,261 @@ GlobalProperty pc_compat_1_4[] = { + }; + const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); + ++/* This macro is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. ++ */ ++GlobalProperty pc_rhel_compat[] = { ++ { TYPE_X86_CPU, "host-phys-bits", "on" }, ++ { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, ++ /* bz 1508330 */ ++ { "vfio-pci", "x-no-geforce-quirks", "on" }, ++}; ++const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); ++ ++/* pc_rhel_8_1_compat is empty since pc_4_1_compat is */ ++GlobalProperty pc_rhel_8_1_compat[] = { }; ++const size_t pc_rhel_8_1_compat_len = G_N_ELEMENTS(pc_rhel_8_1_compat); ++ ++GlobalProperty pc_rhel_8_0_compat[] = { ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "intel-iommu", "dma-drain", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC-IBPB" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC-IBPB" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /** The mpx=on entries from pc_compat_3_1 are in pc_rhel_7_6_compat **/ ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Cascadelake-Server" "-" TYPE_X86_CPU, "stepping", "5" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { TYPE_X86_CPU, "x-intel-pt-auto-level", "off" }, ++}; ++const size_t pc_rhel_8_0_compat_len = G_N_ELEMENTS(pc_rhel_8_0_compat); ++ ++/* Similar to PC_COMPAT_3_0 + PC_COMPAT_2_12, but: ++ * all of the 2_12 stuff was already in 7.6 from bz 1481253 ++ * x-migrate-smi-count comes from PC_COMPAT_2_11 but ++ * is really tied to kernel version so keep it off on 7.x ++ * machine types irrespective of host. ++ */ ++GlobalProperty pc_rhel_7_6_compat[] = { ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { TYPE_X86_CPU, "x-hv-synic-kvm-only", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { "Skylake-Server" "-" TYPE_X86_CPU, "pku", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "pku", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { TYPE_X86_CPU, "x-migrate-smi-count", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Client-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Cascadelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Icelake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Icelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++}; ++const size_t pc_rhel_7_6_compat_len = G_N_ELEMENTS(pc_rhel_7_6_compat); ++ ++/* Similar to PC_COMPAT_2_11 + PC_COMPAT_2_10, but: ++ * - x-hv-max-vps was backported to 7.5 ++ * - x-pci-hole64-fix was backported to 7.5 ++ */ ++GlobalProperty pc_rhel_7_5_compat[] = { ++ /* pc_rhel_7_5_compat from pc_compat_2_11 */ ++ { "Skylake-Server" "-" TYPE_X86_CPU, "clflushopt", "off" }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { TYPE_X86_CPU, "legacy-cache", "on" }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { TYPE_X86_CPU, "topoext", "off" }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { "EPYC-" TYPE_X86_CPU, "xlevel", stringify(0x8000000a) }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { "EPYC-IBPB-" TYPE_X86_CPU, "xlevel", stringify(0x8000000a) }, ++}; ++const size_t pc_rhel_7_5_compat_len = G_N_ELEMENTS(pc_rhel_7_5_compat); ++ ++GlobalProperty pc_rhel_7_4_compat[] = { ++ /* pc_rhel_7_4_compat from pc_compat_2_9 */ ++ { "mch", "extended-tseg-mbytes", stringify(0) }, ++ /* bz 1489800 */ ++ { "ICH9-LPC", "__com.redhat_force-rev1-fadt", "on" }, ++ /* pc_rhel_7_4_compat from pc_compat_2_10 */ ++ { "i440FX-pcihost", "x-pci-hole64-fix", "off" }, ++ /* pc_rhel_7_4_compat from pc_compat_2_10 */ ++ { "q35-pcihost", "x-pci-hole64-fix", "off" }, ++ /* pc_rhel_7_4_compat from pc_compat_2_10 */ ++ { TYPE_X86_CPU, "x-hv-max-vps", "0x40" }, ++}; ++const size_t pc_rhel_7_4_compat_len = G_N_ELEMENTS(pc_rhel_7_4_compat); ++ ++GlobalProperty pc_rhel_7_3_compat[] = { ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { "kvmclock", "x-mach-use-reliable-get-clock", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { TYPE_X86_CPU, "l3-cache", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { TYPE_X86_CPU, "full-cpuid-auto-level", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "family", "15" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "model", "6" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "stepping", "1" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "isa-pcspk", "migrate", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_6 */ ++ { TYPE_X86_CPU, "cpuid-0xb", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { "ICH9-LPC", "x-smi-broadcast", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { TYPE_X86_CPU, "vmware-cpuid-freq", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { "Haswell-" TYPE_X86_CPU, "stepping", "1" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_3 added in 2.9*/ ++ { TYPE_X86_CPU, "kvm-no-smi-migration", "on" }, ++}; ++const size_t pc_rhel_7_3_compat_len = G_N_ELEMENTS(pc_rhel_7_3_compat); ++ ++GlobalProperty pc_rhel_7_2_compat[] = { ++ { "phenom" "-" TYPE_X86_CPU, "rdtscp", "off"}, ++ { "qemu64" "-" TYPE_X86_CPU, "sse4a", "on" }, ++ { "qemu64" "-" TYPE_X86_CPU, "abm", "on" }, ++ { "Haswell-" TYPE_X86_CPU, "abm", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "Haswell-noTSX-" TYPE_X86_CPU, "abm", "off" }, ++ { "Haswell-noTSX-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-noTSX-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-noTSX-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "host" "-" TYPE_X86_CPU, "host-cache-info", "on" }, ++ { TYPE_X86_CPU, "check", "off" }, ++ { "qemu32" "-" TYPE_X86_CPU, "popcnt", "on" }, ++ { TYPE_X86_CPU, "arat", "off" }, ++ { "usb-redir", "streams", "off" }, ++ { TYPE_X86_CPU, "fill-mtrr-mask", "off" }, ++ { "apic-common", "legacy-instance-id", "on" }, ++}; ++const size_t pc_rhel_7_2_compat_len = G_N_ELEMENTS(pc_rhel_7_2_compat); ++ ++GlobalProperty pc_rhel_7_1_compat[] = { ++ { "kvm64" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "kvm32" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Conroe" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Penryn" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Nehalem" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Nehalem-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Westmere" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Westmere-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "SandyBridge" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "SandyBridge-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Haswell" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Broadwell" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G1" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G2" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G3" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G4" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G5" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Haswell" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Haswell" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "Broadwell" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Broadwell" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "coreduo" "-" TYPE_X86_CPU, "vmx", "on" }, ++ { "core2duo" "-" TYPE_X86_CPU, "vmx", "on" }, ++ { "qemu64" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "kvm64" "-" TYPE_X86_CPU, "min-level", stringify(5) }, ++ { "pentium3" "-" TYPE_X86_CPU, "min-level", stringify(2) }, ++ { "n270" "-" TYPE_X86_CPU, "min-level", stringify(5) }, ++ { "Conroe" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "Penryn" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "Nehalem" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "n270" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Penryn" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Conroe" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Nehalem" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Westmere" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "SandyBridge" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "IvyBridge" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Haswell" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Haswell-noTSX" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Broadwell" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Broadwell-noTSX" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++}; ++const size_t pc_rhel_7_1_compat_len = G_N_ELEMENTS(pc_rhel_7_1_compat); ++ ++/* ++ * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine ++ * types as the PC_COMPAT_* do for upstream types. ++ * PC_RHEL_7_*_COMPAT apply both to i440fx and q35 types. ++ */ ++ ++/* ++ * RHEL-7 is based on QEMU 1.5.3, so this needs the PC_COMPAT_* ++ * between our base and 1.5, less stuff backported to RHEL-7.0 ++ * (usb-device.msos-desc), less stuff for devices we changed ++ * (qemu64-x86_64-cpu) or don't support (hpet, pci-serial-2x, ++ * pci-serial-4x) in 7.0. ++ */ ++GlobalProperty pc_rhel_7_0_compat[] = { ++ { "virtio-scsi-pci", "any_layout", "off" }, ++ { "PIIX4_PM", "memory-hotplug-support", "off" }, ++ { "apic", "version", stringify(0x11) }, ++ { "nec-usb-xhci", "superspeed-ports-first", "off" }, ++ { "nec-usb-xhci", "force-pcie-endcap", "on" }, ++ { "pci-serial", "prog_if", stringify(0) }, ++ { "virtio-net-pci", "guest_announce", "off" }, ++ { "ICH9-LPC", "memory-hotplug-support", "off" }, ++ { "xio3130-downstream", COMPAT_PROP_PCP, "off" }, ++ { "ioh3420", COMPAT_PROP_PCP, "off" }, ++ { "PIIX4_PM", "acpi-pci-hotplug-with-bridge-support", "off" }, ++ { "e1000", "mitigation", "off" }, ++ { "virtio-net-pci", "ctrl_guest_offloads", "off" }, ++ { "Conroe" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Penryn" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Nehalem" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Nehalem-IBRS" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Westmere" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Westmere-IBRS" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G1" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G2" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G3" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G4" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G5" "-" TYPE_X86_CPU, "x2apic", "on" }, ++}; ++const size_t pc_rhel_7_0_compat_len = G_N_ELEMENTS(pc_rhel_7_0_compat); ++ + void gsi_handler(void *opaque, int n, int level) + { + GSIState *s = opaque; +@@ -1225,7 +1480,8 @@ void pc_memory_init(PCMachineState *pcms, + option_rom_mr = g_malloc(sizeof(*option_rom_mr)); + memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, + &error_fatal); +- if (pcmc->pci_enabled) { ++ /* RH difference: See bz 1489800, explicitly make ROM ro */ ++ if (pcmc->pc_rom_ro) { + memory_region_set_readonly(option_rom_mr, true); + } + memory_region_add_subregion_overlap(rom_memory, +@@ -2198,6 +2454,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + pcmc->linuxboot_dma_enabled = true; + pcmc->pvh_enabled = true; + assert(!mc->get_hotplug_handler); ++ pcmc->pc_rom_ro = true; ++ mc->async_pf_vmexit_disable = false; + mc->get_hotplug_handler = pc_get_hotplug_handler; + mc->hotplug_allowed = pc_hotplug_allowed; + mc->cpu_index_to_instance_props = x86_cpu_index_to_props; +@@ -2209,7 +2467,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + mc->hot_add_cpu = pc_hot_add_cpu; + mc->smp_parse = pc_smp_parse; + mc->block_default_type = IF_IDE; +- mc->max_cpus = 255; ++ /* 240: max CPU count for RHEL */ ++ mc->max_cpus = 240; + mc->reset = pc_machine_reset; + mc->wakeup = pc_machine_wakeup; + hc->pre_plug = pc_machine_device_pre_plug_cb; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 1bd70d1abb..bd7fdb99bb 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -53,6 +53,7 @@ + #include "cpu.h" + #include "qapi/error.h" + #include "qemu/error-report.h" ++#include "migration/migration.h" + #ifdef CONFIG_XEN + #include + #include "hw/xen/xen_pt.h" +@@ -173,8 +174,8 @@ static void pc_init1(MachineState *machine, + if (pcmc->smbios_defaults) { + MachineClass *mc = MACHINE_GET_CLASS(machine); + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", +- mc->name, pcmc->smbios_legacy_mode, ++ smbios_set_defaults("Red Hat", "KVM", ++ mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, + SMBIOS_ENTRY_POINT_21); + } +@@ -307,6 +308,7 @@ else { + * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). + */ + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_compat_2_3_fn(MachineState *machine) + { + PCMachineState *pcms = PC_MACHINE(machine); +@@ -1026,3 +1028,207 @@ static void xenfv_machine_options(MachineClass *m) + DEFINE_PC_MACHINE(xenfv, "xenfv", pc_xen_hvm_init, + xenfv_machine_options); + #endif ++#endif /* Disabled for Red Hat Enterprise Linux */ ++ ++/* Red Hat Enterprise Linux machine types */ ++ ++/* Options for the latest rhel7 machine type */ ++static void pc_machine_rhel7_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ m->family = "pc_piix_Y"; ++ m->default_machine_opts = "firmware=bios-256k.bin"; ++ pcmc->default_nic_model = "e1000"; ++ m->default_display = "std"; ++ m->no_parallel = 1; ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); ++ compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); ++ m->alias = "pc"; ++ m->is_default = 1; ++} ++ ++static void pc_init_rhel760(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel760_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_machine_rhel7_options(m); ++ m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; ++ m->async_pf_vmexit_disable = true; ++ m->smbus_no_migration_support = true; ++ pcmc->pvh_enabled = false; ++ pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); ++ compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(m->compat_props, pc_rhel_7_6_compat, pc_rhel_7_6_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, ++ pc_machine_rhel760_options); ++ ++static void pc_init_rhel750(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel750_options(MachineClass *m) ++{ ++ pc_machine_rhel760_options(m); ++ m->alias = NULL; ++ m->is_default = 0; ++ m->desc = "RHEL 7.5.0 PC (i440FX + PIIX, 1996)"; ++ m->auto_enable_numa_with_memhp = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ compat_props_add(m->compat_props, pc_rhel_7_5_compat, pc_rhel_7_5_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel750, "pc-i440fx-rhel7.5.0", pc_init_rhel750, ++ pc_machine_rhel750_options); ++ ++static void pc_init_rhel740(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel740_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_machine_rhel750_options(m); ++ m->desc = "RHEL 7.4.0 PC (i440FX + PIIX, 1996)"; ++ m->numa_auto_assign_ram = numa_legacy_auto_assign_ram; ++ pcmc->pc_rom_ro = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); ++ compat_props_add(m->compat_props, pc_rhel_7_4_compat, pc_rhel_7_4_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel740, "pc-i440fx-rhel7.4.0", pc_init_rhel740, ++ pc_machine_rhel740_options); ++ ++static void pc_init_rhel730(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel730_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_machine_rhel740_options(m); ++ m->desc = "RHEL 7.3.0 PC (i440FX + PIIX, 1996)"; ++ pcmc->linuxboot_dma_enabled = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); ++ compat_props_add(m->compat_props, pc_rhel_7_3_compat, pc_rhel_7_3_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel730, "pc-i440fx-rhel7.3.0", pc_init_rhel730, ++ pc_machine_rhel730_options); ++ ++ ++static void pc_init_rhel720(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel720_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ X86MachineClass *x86mc = X86_MACHINE_CLASS(m); ++ pc_machine_rhel730_options(m); ++ m->desc = "RHEL 7.2.0 PC (i440FX + PIIX, 1996)"; ++ /* From pc_i440fx_2_5_machine_options */ ++ x86mc->save_tsc_khz = false; ++ m->legacy_fw_cfg_order = 1; ++ /* Note: broken_reserved_end was already in 7.2 */ ++ /* From pc_i440fx_2_6_machine_options */ ++ pcmc->legacy_cpu_hotplug = true; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_2, hw_compat_rhel_7_2_len); ++ compat_props_add(m->compat_props, pc_rhel_7_2_compat, pc_rhel_7_2_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel720, "pc-i440fx-rhel7.2.0", pc_init_rhel720, ++ pc_machine_rhel720_options); ++ ++static void pc_compat_rhel710(MachineState *machine) ++{ ++ PCMachineState *pcms = PC_MACHINE(machine); ++ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); ++ ++ /* From pc_compat_2_2 */ ++ pcmc->rsdp_in_ram = false; ++ machine->suppress_vmdesc = true; ++ ++ /* From pc_compat_2_1 */ ++ pcmc->smbios_uuid_encoded = false; ++ x86_cpu_change_kvm_default("svm", NULL); ++ pcmc->enforce_aligned_dimm = false; ++ ++ /* Disable all the extra subsections that were added in 2.2 */ ++ migrate_pre_2_2 = true; ++ ++ /* From pc_i440fx_2_4_machine_options */ ++ pcmc->broken_reserved_end = true; ++} ++ ++static void pc_init_rhel710(MachineState *machine) ++{ ++ pc_compat_rhel710(machine); ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel710_options(MachineClass *m) ++{ ++ pc_machine_rhel720_options(m); ++ m->family = "pc_piix_Y"; ++ m->desc = "RHEL 7.1.0 PC (i440FX + PIIX, 1996)"; ++ m->default_display = "cirrus"; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_1, hw_compat_rhel_7_1_len); ++ compat_props_add(m->compat_props, pc_rhel_7_1_compat, pc_rhel_7_1_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel710, "pc-i440fx-rhel7.1.0", pc_init_rhel710, ++ pc_machine_rhel710_options); ++ ++static void pc_compat_rhel700(MachineState *machine) ++{ ++ PCMachineState *pcms = PC_MACHINE(machine); ++ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); ++ ++ pc_compat_rhel710(machine); ++ ++ /* Upstream enables it for everyone, we're a little more selective */ ++ x86_cpu_change_kvm_default("x2apic", NULL); ++ x86_cpu_change_kvm_default("svm", NULL); ++ pcmc->legacy_acpi_table_size = 6418; /* see pc_compat_2_0() */ ++ pcmc->smbios_legacy_mode = true; ++ pcmc->has_reserved_memory = false; ++ migrate_cve_2014_5263_xhci_fields = true; ++} ++ ++static void pc_init_rhel700(MachineState *machine) ++{ ++ pc_compat_rhel700(machine); ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel700_options(MachineClass *m) ++{ ++ pc_machine_rhel710_options(m); ++ m->family = "pc_piix_Y"; ++ m->desc = "RHEL 7.0.0 PC (i440FX + PIIX, 1996)"; ++ compat_props_add(m->compat_props, pc_rhel_7_0_compat, pc_rhel_7_0_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, ++ pc_machine_rhel700_options); +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 385e5cffb1..7531d8ed76 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -197,8 +197,8 @@ static void pc_q35_init(MachineState *machine) + + if (pcmc->smbios_defaults) { + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", +- mc->name, pcmc->smbios_legacy_mode, ++ smbios_set_defaults("Red Hat", "KVM", ++ mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, + SMBIOS_ENTRY_POINT_21); + } +@@ -330,6 +330,7 @@ static void pc_q35_init(MachineState *machine) + DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) + + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_q35_machine_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +@@ -533,3 +534,154 @@ static void pc_q35_2_4_machine_options(MachineClass *m) + + DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, + pc_q35_2_4_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ ++ ++/* Red Hat Enterprise Linux machine types */ ++ ++/* Options for the latest rhel q35 machine type */ ++static void pc_q35_machine_rhel_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pcmc->default_nic_model = "e1000e"; ++ m->family = "pc_q35_Z"; ++ m->units_per_default_bus = 1; ++ m->default_machine_opts = "firmware=bios-256k.bin"; ++ m->default_display = "std"; ++ m->no_floppy = 1; ++ m->no_parallel = 1; ++ pcmc->default_cpu_version = 1; ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE); ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); ++ m->alias = "q35"; ++ m->max_cpus = 384; ++ compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); ++} ++ ++static void pc_q35_init_rhel820(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel820_options(MachineClass *m) ++{ ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820, ++ pc_q35_machine_rhel820_options); ++ ++static void pc_q35_init_rhel810(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel810_options(MachineClass *m) ++{ ++ pc_q35_machine_rhel820_options(m); ++ m->desc = "RHEL-8.1.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); ++ compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel810, "pc-q35-rhel8.1.0", pc_q35_init_rhel810, ++ pc_q35_machine_rhel810_options); ++ ++static void pc_q35_init_rhel800(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel800_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel810_options(m); ++ m->desc = "RHEL-8.0.0 PC (Q35 + ICH9, 2009)"; ++ m->smbus_no_migration_support = true; ++ m->alias = NULL; ++ pcmc->pvh_enabled = false; ++ pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel800, "pc-q35-rhel8.0.0", pc_q35_init_rhel800, ++ pc_q35_machine_rhel800_options); ++ ++static void pc_q35_init_rhel760(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel760_options(MachineClass *m) ++{ ++ pc_q35_machine_rhel800_options(m); ++ m->alias = NULL; ++ m->desc = "RHEL-7.6.0 PC (Q35 + ICH9, 2009)"; ++ m->async_pf_vmexit_disable = true; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(m->compat_props, pc_rhel_7_6_compat, pc_rhel_7_6_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, ++ pc_q35_machine_rhel760_options); ++ ++static void pc_q35_init_rhel750(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel750_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel760_options(m); ++ m->alias = NULL; ++ m->desc = "RHEL-7.5.0 PC (Q35 + ICH9, 2009)"; ++ m->auto_enable_numa_with_memhp = false; ++ pcmc->default_nic_model = "e1000"; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ compat_props_add(m->compat_props, pc_rhel_7_5_compat, pc_rhel_7_5_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel750, "pc-q35-rhel7.5.0", pc_q35_init_rhel750, ++ pc_q35_machine_rhel750_options); ++ ++static void pc_q35_init_rhel740(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel740_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel750_options(m); ++ m->desc = "RHEL-7.4.0 PC (Q35 + ICH9, 2009)"; ++ m->numa_auto_assign_ram = numa_legacy_auto_assign_ram; ++ pcmc->pc_rom_ro = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); ++ compat_props_add(m->compat_props, pc_rhel_7_4_compat, pc_rhel_7_4_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel740, "pc-q35-rhel7.4.0", pc_q35_init_rhel740, ++ pc_q35_machine_rhel740_options); ++ ++static void pc_q35_init_rhel730(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel730_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel740_options(m); ++ m->desc = "RHEL-7.3.0 PC (Q35 + ICH9, 2009)"; ++ m->max_cpus = 255; ++ pcmc->linuxboot_dma_enabled = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); ++ compat_props_add(m->compat_props, pc_rhel_7_3_compat, pc_rhel_7_3_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, ++ pc_q35_machine_rhel730_options); +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 6f85a0e032..2920bdef5b 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -222,6 +222,8 @@ struct MachineClass { + const char **valid_cpu_types; + strList *allowed_dynamic_sysbus_devices; + bool auto_enable_numa_with_memhp; ++ /* RHEL only */ ++ bool async_pf_vmexit_disable; + void (*numa_auto_assign_ram)(MachineClass *mc, NodeInfo *nodes, + int nb_nodes, ram_addr_t size); + bool ignore_boot_device_suffixes; +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 1f86eba3f9..2e362c8faa 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -124,6 +124,9 @@ typedef struct PCMachineClass { + + /* use PVH to load kernels that support this feature */ + bool pvh_enabled; ++ ++ /* RH only, see bz 1489800 */ ++ bool pc_rom_ro; + } PCMachineClass; + + #define TYPE_PC_MACHINE "generic-pc-machine" +@@ -300,6 +303,36 @@ extern const size_t pc_compat_1_5_len; + extern GlobalProperty pc_compat_1_4[]; + extern const size_t pc_compat_1_4_len; + ++extern GlobalProperty pc_rhel_compat[]; ++extern const size_t pc_rhel_compat_len; ++ ++extern GlobalProperty pc_rhel_8_1_compat[]; ++extern const size_t pc_rhel_8_1_compat_len; ++ ++extern GlobalProperty pc_rhel_8_0_compat[]; ++extern const size_t pc_rhel_8_0_compat_len; ++ ++extern GlobalProperty pc_rhel_7_6_compat[]; ++extern const size_t pc_rhel_7_6_compat_len; ++ ++extern GlobalProperty pc_rhel_7_5_compat[]; ++extern const size_t pc_rhel_7_5_compat_len; ++ ++extern GlobalProperty pc_rhel_7_4_compat[]; ++extern const size_t pc_rhel_7_4_compat_len; ++ ++extern GlobalProperty pc_rhel_7_3_compat[]; ++extern const size_t pc_rhel_7_3_compat_len; ++ ++extern GlobalProperty pc_rhel_7_2_compat[]; ++extern const size_t pc_rhel_7_2_compat_len; ++ ++extern GlobalProperty pc_rhel_7_1_compat[]; ++extern const size_t pc_rhel_7_1_compat_len; ++ ++extern GlobalProperty pc_rhel_7_0_compat[]; ++extern const size_t pc_rhel_7_0_compat_len; ++ + /* Helper for setting model-id for CPU models that changed model-id + * depending on QEMU versions up to QEMU 2.4. + */ +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 1b7880ae3a..790db778ab 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1829,11 +1829,17 @@ static CPUCaches epyc_cache_info = { + + static X86CPUDefinition builtin_x86_defs[] = { + { ++ /* qemu64 is the default CPU model for all *-rhel7.* machine-types. ++ * The default on RHEL-6 was cpu64-rhel6. ++ * libvirt assumes that qemu64 is the default for _all_ machine-types, ++ * so we should try to keep qemu64 and cpu64-rhel6 as similar as ++ * possible. ++ */ + .name = "qemu64", + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, + .family = 6, +- .model = 6, ++ .model = 13, + .stepping = 3, + .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | + CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | +@@ -3932,6 +3938,7 @@ static PropValue kvm_default_props[] = { + { "acpi", "off" }, + { "monitor", "off" }, + { "svm", "off" }, ++ { "kvm-pv-unhalt", "on" }, + { NULL, NULL }, + }; + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 1d10046a6c..86d9a1f364 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -3079,6 +3079,7 @@ static int kvm_get_msrs(X86CPU *cpu) + struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; + int ret, i; + uint64_t mtrr_top_bits; ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); + + kvm_msr_buf_reset(cpu); + +@@ -3388,6 +3389,9 @@ static int kvm_get_msrs(X86CPU *cpu) + break; + case MSR_KVM_ASYNC_PF_EN: + env->async_pf_en_msr = msrs[i].data; ++ if (mc->async_pf_vmexit_disable) { ++ env->async_pf_en_msr &= ~(1ULL << 2); ++ } + break; + case MSR_KVM_PV_EOI_EN: + env->pv_eoi_en_msr = msrs[i].data; +-- +2.21.0 + diff --git a/SOURCES/0012-Enable-make-check.patch b/SOURCES/0012-Enable-make-check.patch new file mode 100644 index 0000000..09f7b4e --- /dev/null +++ b/SOURCES/0012-Enable-make-check.patch @@ -0,0 +1,307 @@ +From 154215041df085271a780a2989f4f481226e3e34 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:48:41 +0200 +Subject: Enable make check + +Fixing tests after device disabling and machine types changes and enabling +make check run during build. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (4.0.0): +- Remove testing for pseries-2.7 in endianess test +- Disable device-plug-test on s390x as it use disabled device +- Do not run cpu-plug-tests on 7.3 and older machine types + +Rebase changes (4.1.0-rc0): +- removed iotests 068 + +Rebase changes (4.1.0-rc1): +- remove all 205 tests (unstable) + +Rebase changes (4.2.0-rc0): +- partially disable hd-geo-test (requires lsi53c895a) + +Merged patches (4.0.0): +- f7ffd13 Remove 7 qcow2 and luks iotests that are taking > 25 sec to run during the fast train build proce + +Merged patches (4.1.0-rc0): +- 41288ff redhat: Remove raw iotest 205 + +Signed-off-by: Danilo C. L. de Paula +--- + redhat/qemu-kvm.spec.template | 2 +- + tests/Makefile.include | 10 +++++----- + tests/boot-serial-test.c | 6 +++++- + tests/cpu-plug-test.c | 4 ++-- + tests/e1000-test.c | 2 ++ + tests/hd-geo-test.c | 4 ++++ + tests/prom-env-test.c | 4 ++++ + tests/qemu-iotests/051 | 12 ++++++------ + tests/qemu-iotests/group | 4 ++-- + tests/test-x86-cpuid-compat.c | 2 ++ + tests/usb-hcd-xhci-test.c | 4 ++++ + 11 files changed, 37 insertions(+), 17 deletions(-) + +diff --git a/tests/Makefile.include b/tests/Makefile.include +index b483790cf3..53bdbdfee0 100644 +--- a/tests/Makefile.include ++++ b/tests/Makefile.include +@@ -172,7 +172,7 @@ check-qtest-i386-y += tests/ide-test$(EXESUF) + check-qtest-i386-y += tests/ahci-test$(EXESUF) + check-qtest-i386-y += tests/hd-geo-test$(EXESUF) + check-qtest-i386-y += tests/boot-order-test$(EXESUF) +-check-qtest-i386-y += tests/bios-tables-test$(EXESUF) ++#check-qtest-i386-y += tests/bios-tables-test$(EXESUF) + check-qtest-i386-$(CONFIG_SGA) += tests/boot-serial-test$(EXESUF) + check-qtest-i386-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) + check-qtest-i386-y += tests/rtc-test$(EXESUF) +@@ -230,7 +230,7 @@ check-qtest-mips64el-$(CONFIG_VGA) += tests/display-vga-test$(EXESUF) + check-qtest-moxie-y += tests/boot-serial-test$(EXESUF) + + check-qtest-ppc-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF) +-check-qtest-ppc-y += tests/boot-order-test$(EXESUF) ++#check-qtest-ppc-y += tests/boot-order-test$(EXESUF) + check-qtest-ppc-y += tests/prom-env-test$(EXESUF) + check-qtest-ppc-y += tests/drive_del-test$(EXESUF) + check-qtest-ppc-y += tests/boot-serial-test$(EXESUF) +@@ -244,8 +244,8 @@ check-qtest-ppc64-$(CONFIG_PSERIES) += tests/rtas-test$(EXESUF) + check-qtest-ppc64-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) + check-qtest-ppc64-$(CONFIG_USB_UHCI) += tests/usb-hcd-uhci-test$(EXESUF) + check-qtest-ppc64-$(CONFIG_USB_XHCI_NEC) += tests/usb-hcd-xhci-test$(EXESUF) +-check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) +-check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) ++#check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) ++#check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) + check-qtest-ppc64-$(CONFIG_RTL8139_PCI) += tests/test-filter-redirector$(EXESUF) + check-qtest-ppc64-$(CONFIG_VGA) += tests/display-vga-test$(EXESUF) + check-qtest-ppc64-y += tests/numa-test$(EXESUF) +@@ -291,7 +291,7 @@ check-qtest-s390x-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) + check-qtest-s390x-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) + check-qtest-s390x-$(CONFIG_POSIX) += tests/test-filter-redirector$(EXESUF) + check-qtest-s390x-y += tests/drive_del-test$(EXESUF) +-check-qtest-s390x-y += tests/device-plug-test$(EXESUF) ++#check-qtest-s390x-y += tests/device-plug-test$(EXESUF) + check-qtest-s390x-y += tests/virtio-ccw-test$(EXESUF) + check-qtest-s390x-y += tests/cpu-plug-test$(EXESUF) + check-qtest-s390x-y += tests/migration-test$(EXESUF) +diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c +index d3a54a0ba5..33ce72b89c 100644 +--- a/tests/boot-serial-test.c ++++ b/tests/boot-serial-test.c +@@ -108,19 +108,23 @@ static testdef_t tests[] = { + { "ppc", "g3beige", "", "PowerPC,750" }, + { "ppc", "mac99", "", "PowerPC,G4" }, + { "ppc", "sam460ex", "-m 256", "DRAM: 256 MiB" }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "ppc64", "ppce500", "", "U-Boot" }, + { "ppc64", "40p", "-m 192", "Memory: 192M" }, + { "ppc64", "mac99", "", "PowerPC,970FX" }, ++#endif + { "ppc64", "pseries", + "-machine cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken", + "Open Firmware" }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "ppc64", "powernv8", "", "OPAL" }, + { "ppc64", "powernv9", "", "OPAL" }, + { "ppc64", "sam460ex", "-device e1000", "8086 100e" }, ++#endif + { "i386", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, + { "i386", "pc", "-device sga", "SGABIOS" }, + { "i386", "q35", "-device sga", "SGABIOS" }, +- { "x86_64", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, ++ { "x86_64", "pc", "-cpu qemu32 -device sga", "SGABIOS" }, + { "x86_64", "q35", "-device sga", "SGABIOS" }, + { "sparc", "LX", "", "TMS390S10" }, + { "sparc", "SS-4", "", "MB86904" }, +diff --git a/tests/cpu-plug-test.c b/tests/cpu-plug-test.c +index 30e514bbfb..a04beae1c6 100644 +--- a/tests/cpu-plug-test.c ++++ b/tests/cpu-plug-test.c +@@ -185,8 +185,8 @@ static void add_pseries_test_case(const char *mname) + char *path; + PlugTestData *data; + +- if (!g_str_has_prefix(mname, "pseries-") || +- (g_str_has_prefix(mname, "pseries-2.") && atoi(&mname[10]) < 7)) { ++ if (!g_str_has_prefix(mname, "pseries-rhel") || ++ (g_str_has_prefix(mname, "pseries-rhel7.") && atoi(&mname[14]) < 4)) { + return; + } + data = g_new(PlugTestData, 1); +diff --git a/tests/e1000-test.c b/tests/e1000-test.c +index c387984ef6..c89112d6f8 100644 +--- a/tests/e1000-test.c ++++ b/tests/e1000-test.c +@@ -22,9 +22,11 @@ struct QE1000 { + + static const char *models[] = { + "e1000", ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + "e1000-82540em", + "e1000-82544gc", + "e1000-82545em", ++#endif + }; + + static void *e1000_get_driver(void *obj, const char *interface) +diff --git a/tests/hd-geo-test.c b/tests/hd-geo-test.c +index 7e86c5416c..cc068bad87 100644 +--- a/tests/hd-geo-test.c ++++ b/tests/hd-geo-test.c +@@ -732,6 +732,7 @@ static void test_override_ide(void) + test_override(args, expected); + } + ++#if 0 /* Require lsi53c895a - not supported on RHEL */ + static void test_override_scsi(void) + { + TestArgs *args = create_args(); +@@ -776,6 +777,7 @@ static void test_override_scsi_2_controllers(void) + add_scsi_disk(args, 3, 1, 0, 1, 2, 0, 1, 0); + test_override(args, expected); + } ++#endif + + static void test_override_virtio_blk(void) + { +@@ -951,9 +953,11 @@ int main(int argc, char **argv) + qtest_add_func("hd-geo/ide/device/user/chst", test_ide_device_user_chst); + if (have_qemu_img()) { + qtest_add_func("hd-geo/override/ide", test_override_ide); ++#if 0 /* Require lsi53c895a - not supported on RHEL */ + qtest_add_func("hd-geo/override/scsi", test_override_scsi); + qtest_add_func("hd-geo/override/scsi_2_controllers", + test_override_scsi_2_controllers); ++#endif + qtest_add_func("hd-geo/override/virtio_blk", test_override_virtio_blk); + qtest_add_func("hd-geo/override/zero_chs", test_override_zero_chs); + qtest_add_func("hd-geo/override/scsi_hot_unplug", +diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c +index 61bc1d1e7b..028d45c7d7 100644 +--- a/tests/prom-env-test.c ++++ b/tests/prom-env-test.c +@@ -88,10 +88,14 @@ int main(int argc, char *argv[]) + if (!strcmp(arch, "ppc")) { + add_tests(ppc_machines); + } else if (!strcmp(arch, "ppc64")) { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + add_tests(ppc_machines); + if (g_test_slow()) { ++#endif + qtest_add_data_func("prom-env/pseries", "pseries", test_machine); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + } ++#endif + } else if (!strcmp(arch, "sparc")) { + add_tests(sparc_machines); + } else if (!strcmp(arch, "sparc64")) { +diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 +index 53bcdbc911..b387e0c233 100755 +--- a/tests/qemu-iotests/051 ++++ b/tests/qemu-iotests/051 +@@ -181,11 +181,11 @@ run_qemu -drive if=virtio + case "$QEMU_DEFAULT_MACHINE" in + pc) + run_qemu -drive if=none,id=disk -device ide-cd,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk + run_qemu -drive if=none,id=disk -device ide-drive,drive=disk + run_qemu -drive if=none,id=disk -device ide-hd,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-disk,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-disk,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk + ;; + *) + ;; +@@ -234,11 +234,11 @@ run_qemu -drive file="$TEST_IMG",if=virtio,readonly=on + case "$QEMU_DEFAULT_MACHINE" in + pc) + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-cd,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-drive,drive=disk + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-hd,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk + ;; + *) + ;; +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index 6b10a6a762..06cc734b26 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -92,7 +92,7 @@ + 068 rw quick + 069 rw auto quick + 070 rw quick +-071 rw auto quick ++# 071 rw auto quick -- requires whitelisted blkverify + 072 rw auto quick + 073 rw auto quick + 074 rw auto quick +@@ -120,7 +120,7 @@ + 096 rw quick + 097 rw auto backing + 098 rw auto backing quick +-099 rw auto quick ++# 099 rw auto quick -- requires whitelisted blkverify + # 100 was removed, do not reuse + 101 rw quick + 102 rw quick +diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c +index 772287bdb4..e7c075ed98 100644 +--- a/tests/test-x86-cpuid-compat.c ++++ b/tests/test-x86-cpuid-compat.c +@@ -300,6 +300,7 @@ int main(int argc, char **argv) + "-cpu 486,xlevel2=0xC0000002,+xstore", + "xlevel2", 0xC0000002); + ++#if 0 /* Disabled in Red Hat Enterprise Linux */ + /* Check compatibility of old machine-types that didn't + * auto-increase level/xlevel/xlevel2: */ + +@@ -350,6 +351,7 @@ int main(int argc, char **argv) + add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-on", + "-machine pc-i440fx-2.4 -cpu SandyBridge,+npt", + "xlevel", 0x80000008); ++#endif + + /* Test feature parsing */ + add_feature_test("x86/cpuid/features/plus", +diff --git a/tests/usb-hcd-xhci-test.c b/tests/usb-hcd-xhci-test.c +index 10ef9d2a91..3855873050 100644 +--- a/tests/usb-hcd-xhci-test.c ++++ b/tests/usb-hcd-xhci-test.c +@@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) + usb_test_hotplug(global_qtest, "xhci", "1", NULL); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void test_usb_uas_hotplug(void) + { + QTestState *qts = global_qtest; +@@ -36,6 +37,7 @@ static void test_usb_uas_hotplug(void) + qtest_qmp_device_del(qts, "scsihd"); + qtest_qmp_device_del(qts, "uas"); + } ++#endif + + static void test_usb_ccid_hotplug(void) + { +@@ -56,7 +58,9 @@ int main(int argc, char **argv) + + qtest_add_func("/xhci/pci/init", test_xhci_init); + qtest_add_func("/xhci/pci/hotplug", test_xhci_hotplug); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug); ++#endif + qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug); + + qtest_start("-device nec-usb-xhci,id=xhci" +-- +2.21.0 + diff --git a/SOURCES/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/SOURCES/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch new file mode 100644 index 0000000..db776c4 --- /dev/null +++ b/SOURCES/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -0,0 +1,114 @@ +From de433da59448eaad4ac1b902d07d57b57f922aff Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Tue, 3 Dec 2013 20:05:13 +0100 +Subject: vfio: cap number of devices that can be assigned + +RH-Author: Bandan Das +Message-id: <1386101113-31560-3-git-send-email-bsd@redhat.com> +Patchwork-id: 55984 +O-Subject: [PATCH RHEL7 qemu-kvm v2 2/2] vfio: cap number of devices that can be assigned +Bugzilla: 678368 +RH-Acked-by: Alex Williamson +RH-Acked-by: Marcelo Tosatti +RH-Acked-by: Michael S. Tsirkin + +Go through all groups to get count of total number of devices +active to enforce limit + +Reasoning from Alex for the limit(32) - Assuming 3 slots per +device, with 125 slots (number of memory slots for RHEL 7), +we can support almost 40 devices and still have few slots left +for other uses. Stepping down a bit, the number 32 arbitrarily +matches the number of slots on a PCI bus and is also a nice power +of two. + +Signed-off-by: Bandan Das + +Rebase notes (2.8.0): +- removed return value for vfio_realize (commit 1a22aca) + +Merged patches (2.9.0): +- 17eb774 vfio: Use error_setg when reporting max assigned device overshoot + + Merged patches (4.1.0-rc3): +- 2b89558 vfio: increase the cap on number of assigned devices to 64 + +(cherry picked from commit 9fa3c9fc6dfcde76d80db1aa601b2d577f72ceec) +(cherry picked from commit 3cb35556dc7d994f203d732fe952f95fcdb03c0a) +Signed-off-by: Danilo C. L. de Paula +--- + hw/vfio/pci.c | 29 ++++++++++++++++++++++++++++- + hw/vfio/pci.h | 1 + + 2 files changed, 29 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index c8534d3035..309535f306 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -47,6 +47,9 @@ + + #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" + ++/* RHEL only: Set once for the first assigned dev */ ++static uint16_t device_limit; ++ + static void vfio_disable_interrupts(VFIOPCIDevice *vdev); + static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); + +@@ -2722,9 +2725,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + ssize_t len; + struct stat st; + int groupid; +- int i, ret; ++ int ret, i = 0; + bool is_mdev; + ++ if (device_limit && device_limit != vdev->assigned_device_limit) { ++ error_setg(errp, "Assigned device limit has been redefined. " ++ "Old:%d, New:%d", ++ device_limit, vdev->assigned_device_limit); ++ return; ++ } else { ++ device_limit = vdev->assigned_device_limit; ++ } ++ ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { ++ i++; ++ } ++ } ++ ++ if (i >= vdev->assigned_device_limit) { ++ error_setg(errp, "Maximum supported vfio devices (%d) " ++ "already attached", vdev->assigned_device_limit); ++ return; ++ } ++ + if (!vdev->vbasedev.sysfsdev) { + if (!(~vdev->host.domain || ~vdev->host.bus || + ~vdev->host.slot || ~vdev->host.function)) { +@@ -3167,6 +3191,9 @@ static Property vfio_pci_dev_properties[] = { + DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), + DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, + no_geforce_quirks, false), ++ /* RHEL only */ ++ DEFINE_PROP_UINT16("x-assigned-device-limit", VFIOPCIDevice, ++ assigned_device_limit, 64), + DEFINE_PROP_BOOL("x-no-kvm-ioeventfd", VFIOPCIDevice, no_kvm_ioeventfd, + false), + DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index 35626cd63e..0cd4803aee 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -135,6 +135,7 @@ typedef struct VFIOPCIDevice { + EventNotifier err_notifier; + EventNotifier req_notifier; + int (*resetfn)(struct VFIOPCIDevice *); ++ uint16_t assigned_device_limit; + uint32_t vendor_id; + uint32_t device_id; + uint32_t sub_vendor_id; +-- +2.21.0 + diff --git a/SOURCES/0014-Add-support-statement-to-help-output.patch b/SOURCES/0014-Add-support-statement-to-help-output.patch new file mode 100644 index 0000000..cb77bfe --- /dev/null +++ b/SOURCES/0014-Add-support-statement-to-help-output.patch @@ -0,0 +1,58 @@ +From 2754dd8da8975757753fd491985d5e7b36966106 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Wed, 4 Dec 2013 18:53:17 +0100 +Subject: Add support statement to -help output + +RH-Author: Eduardo Habkost +Message-id: <1386183197-27761-1-git-send-email-ehabkost@redhat.com> +Patchwork-id: 55994 +O-Subject: [qemu-kvm RHEL7 PATCH] Add support statement to -help output +Bugzilla: 972773 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: knoel@redhat.com +RH-Acked-by: Paolo Bonzini + +Add support statement to -help output, reporting direct qemu-kvm usage +as unsupported by Red Hat, and advising users to use libvirt instead. + +Signed-off-by: Eduardo Habkost +(cherry picked from commit 2a07700936e39856cc9f149c6a6517f0715536a6) +(cherry picked from commit 5dd2f4706e2fef945771949e59a8fcc1b5452de9) +Signed-off-by: Danilo C. L. de Paula +--- + vl.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/vl.c b/vl.c +index 668a34577e..9f3e7e7733 100644 +--- a/vl.c ++++ b/vl.c +@@ -1822,9 +1822,17 @@ static void version(void) + QEMU_COPYRIGHT "\n"); + } + ++static void print_rh_warning(void) ++{ ++ printf("\nWARNING: Direct use of qemu-kvm from the command line is not supported by Red Hat.\n" ++ "WARNING: Use libvirt as the stable management interface.\n" ++ "WARNING: Some command line options listed here may not be available in future releases.\n\n"); ++} ++ + static void help(int exitcode) + { + version(); ++ print_rh_warning(); + printf("usage: %s [options] [disk_image]\n\n" + "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", + error_get_progname()); +@@ -1841,6 +1849,7 @@ static void help(int exitcode) + "\n" + QEMU_HELP_BOTTOM "\n"); + ++ print_rh_warning(); + exit(exitcode); + } + +-- +2.21.0 + diff --git a/SOURCES/0015-globally-limit-the-maximum-number-of-CPUs.patch b/SOURCES/0015-globally-limit-the-maximum-number-of-CPUs.patch new file mode 100644 index 0000000..cec862d --- /dev/null +++ b/SOURCES/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -0,0 +1,152 @@ +From c9c3cf721b0e9e359418f64c2a5121c3f8b5d27a Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Tue, 21 Jan 2014 10:46:52 +0100 +Subject: globally limit the maximum number of CPUs + +We now globally limit the number of VCPUs. +Especially, there is no way one can specify more than +max_cpus VCPUs for a VM. + +This allows us the restore the ppc max_cpus limitation to the upstream +default and minimize the ppc hack in kvm-all.c. + +Signed-off-by: David Hildenbrand +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo Cesar Lemes de Paula + +Rebase notes (2.11.0): +- Removed CONFIG_RHV reference +- Update commit log + +Merged patches (2.11.0): +- 92fef14623 redhat: remove manual max_cpus limitations for ppc +- bb722e9eff redhat: globally limit the maximum number of CPUs +- fdeef3c1c7 RHEL: Set vcpus hard limit to 240 for Power +- 0584216921 Match POWER max cpus to x86 + +Signed-off-by: Andrew Jones +(cherry picked from commit a4ceb63bdc5cbac19f5f633ec761b9de0dedb55e) +(cherry picked from commit a1f26d85171b4d554225150053700e93ba6eba10) + +redhat: globally limit the maximum number of CPUs + +RH-Author: David Hildenbrand +Message-id: <20180109103253.24517-2-david@redhat.com> +Patchwork-id: 78531 +O-Subject: [RHEL-7.5 qemu-kvm-ma PATCH v2 1/2] redhat: globally limit the maximum number of CPUs +Bugzilla: 1527449 +RH-Acked-by: David Gibson +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck + +Upstream-status: n/a + +For RHEL, we support 240, for RHV up to 384 VCPUs. Let's limit this +globally instead of fixing up all machines. This way, we can easily +change (increase) the product specific levels later. + +Signed-off-by: David Hildenbrand +Signed-off-by: Miroslav Rezanina + +redhat: remove manual max_cpus limitations for ppc + +RH-Author: David Hildenbrand +Message-id: <20180109103253.24517-3-david@redhat.com> +Patchwork-id: 78532 +O-Subject: [RHEL-7.5 qemu-kvm-ma PATCH v2 2/2] redhat: remove manual max_cpus limitations for ppc +Bugzilla: 1527449 +RH-Acked-by: David Gibson +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck + +Upstream-status: n/a + +RH-Author: Andrew Jones +Message-id: <1390301212-15344-1-git-send-email-drjones@redhat.com> +Patchwork-id: 56862 +O-Subject: [RHEL7.0 qemu-kvm PATCH v6] use recommended max vcpu count +Bugzilla: 998708 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Marcelo Tosatti + +The recommended vcpu max limit (KVM_CAP_NR_VCPUS) should be used instead +of the actual max vcpu limit (KVM_CAP_MAX_VCPUS) to give an error. + +This commit matches the limit to current KVM_CAP_NR_VCPUS value. + +Signed-off-by: Danilo C. L. de Paula +--- + accel/kvm/kvm-all.c | 12 ++++++++++++ + vl.c | 18 ++++++++++++++++++ + 2 files changed, 30 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index ca00daa2f5..dc3ed7f04e 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -1943,6 +1943,18 @@ static int kvm_init(MachineState *ms) + soft_vcpus_limit = kvm_recommended_vcpus(s); + hard_vcpus_limit = kvm_max_vcpus(s); + ++#ifdef HOST_PPC64 ++ /* ++ * On POWER, the kernel advertises a soft limit based on the ++ * number of CPU threads on the host. We want to allow exceeding ++ * this for testing purposes, so we don't want to set hard limit ++ * to soft limit as on x86. ++ */ ++#else ++ /* RHEL doesn't support nr_vcpus > soft_vcpus_limit */ ++ hard_vcpus_limit = soft_vcpus_limit; ++#endif ++ + while (nc->name) { + if (nc->num > soft_vcpus_limit) { + warn_report("Number of %s cpus requested (%d) exceeds " +diff --git a/vl.c b/vl.c +index 9f3e7e7733..1550aa2aaa 100644 +--- a/vl.c ++++ b/vl.c +@@ -134,6 +134,8 @@ int main(int argc, char **argv) + + #define MAX_VIRTIO_CONSOLES 1 + ++#define RHEL_MAX_CPUS 384 ++ + static const char *data_dir[16]; + static int data_dir_idx; + const char *bios_name = NULL; +@@ -1339,6 +1341,20 @@ static MachineClass *find_default_machine(GSList *machines) + return NULL; + } + ++/* Maximum number of CPUs limited for Red Hat Enterprise Linux */ ++static void limit_max_cpus_in_machines(void) ++{ ++ GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false); ++ ++ for (el = machines; el; el = el->next) { ++ MachineClass *mc = el->data; ++ ++ if (mc->max_cpus > RHEL_MAX_CPUS) { ++ mc->max_cpus = RHEL_MAX_CPUS; ++ } ++ } ++} ++ + static int machine_help_func(QemuOpts *opts, MachineState *machine) + { + ObjectProperty *prop; +@@ -3857,6 +3873,8 @@ int main(int argc, char **argv, char **envp) + "mutually exclusive"); + exit(EXIT_FAILURE); + } ++ /* Maximum number of CPUs limited for Red Hat Enterprise Linux */ ++ limit_max_cpus_in_machines(); + + configure_rtc(qemu_find_opts_singleton("rtc")); + +-- +2.21.0 + diff --git a/SOURCES/0016-Add-support-for-simpletrace.patch b/SOURCES/0016-Add-support-for-simpletrace.patch new file mode 100644 index 0000000..9624855 --- /dev/null +++ b/SOURCES/0016-Add-support-for-simpletrace.patch @@ -0,0 +1,121 @@ +From 26128b3ede339e292a3c50a84e3248af46ecd0ec Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 8 Oct 2015 09:50:17 +0200 +Subject: Add support for simpletrace + +As simpletrace is upstream, we just need to properly handle it during rpmbuild. + +Signed-off-by: Miroslav Rezanina + +Rebase notes (3.1.0): +- Fixed python 2 to python3 switch + +Rebase notes (2.9.0): +- Added group argument for tracetool.py (upstream) + +Rebase notes (2.8.0): +- Changed tracetool.py parameters + +Merged patches (2.3.0): +- db959d6 redhat/qemu-kvm.spec.template: Install qemu-kvm-simpletrace.stp +- 5292fc3 trace: add SystemTap init scripts for simpletrace bridge +- eda9e5e simpletrace: install simpletrace.py +- 85c4c8f trace: add systemtap-initscript README file to RPM + +Signed-off-by: Danilo C. L. de Paula +--- + .gitignore | 2 ++ + Makefile | 4 +++ + README.systemtap | 43 +++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 26 ++++++++++++++- + scripts/systemtap/conf.d/qemu_kvm.conf | 4 +++ + scripts/systemtap/script.d/qemu_kvm.stp | 1 + + 6 files changed, 79 insertions(+), 1 deletion(-) + create mode 100644 README.systemtap + create mode 100644 scripts/systemtap/conf.d/qemu_kvm.conf + create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp + +diff --git a/Makefile b/Makefile +index 086727dbb9..4254950f7f 100644 +--- a/Makefile ++++ b/Makefile +@@ -939,6 +939,10 @@ endif + $(INSTALL_DATA) $(SRC_PATH)/pc-bios/keymaps/$$x "$(DESTDIR)$(qemu_datadir)/keymaps"; \ + done + $(INSTALL_DATA) $(BUILD_DIR)/trace-events-all "$(DESTDIR)$(qemu_datadir)/trace-events-all" ++ $(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)/systemtap/script.d" ++ $(INSTALL_DATA) $(SRC_PATH)/scripts/systemtap/script.d/qemu_kvm.stp "$(DESTDIR)$(qemu_datadir)/systemtap/script.d/" ++ $(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)/systemtap/conf.d" ++ $(INSTALL_DATA) $(SRC_PATH)/scripts/systemtap/conf.d/qemu_kvm.conf "$(DESTDIR)$(qemu_datadir)/systemtap/conf.d/" + + .PHONY: ctags + ctags: +diff --git a/README.systemtap b/README.systemtap +new file mode 100644 +index 0000000000..ad913fc990 +--- /dev/null ++++ b/README.systemtap +@@ -0,0 +1,43 @@ ++QEMU tracing using systemtap-initscript ++--------------------------------------- ++ ++You can capture QEMU trace data all the time using systemtap-initscript. This ++uses SystemTap's flight recorder mode to trace all running guests to a ++fixed-size buffer on the host. Old trace entries are overwritten by new ++entries when the buffer size wraps. ++ ++1. Install the systemtap-initscript package: ++ # yum install systemtap-initscript ++ ++2. Install the systemtap scripts and the conf file: ++ # cp /usr/share/qemu-kvm/systemtap/script.d/qemu_kvm.stp /etc/systemtap/script.d/ ++ # cp /usr/share/qemu-kvm/systemtap/conf.d/qemu_kvm.conf /etc/systemtap/conf.d/ ++ ++The set of trace events to enable is given in qemu_kvm.stp. This SystemTap ++script can be customized to add or remove trace events provided in ++/usr/share/systemtap/tapset/qemu-kvm-simpletrace.stp. ++ ++SystemTap customizations can be made to qemu_kvm.conf to control the flight ++recorder buffer size and whether to store traces in memory only or disk too. ++See stap(1) for option documentation. ++ ++3. Start the systemtap service. ++ # service systemtap start qemu_kvm ++ ++4. Make the service start at boot time. ++ # chkconfig systemtap on ++ ++5. Confirm that the service works. ++ # service systemtap status qemu_kvm ++ qemu_kvm is running... ++ ++When you want to inspect the trace buffer, perform the following steps: ++ ++1. Dump the trace buffer. ++ # staprun -A qemu_kvm >/tmp/trace.log ++ ++2. Start the systemtap service because the preceding step stops the service. ++ # service systemtap start qemu_kvm ++ ++3. Translate the trace record to readable format. ++ # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log +diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf +new file mode 100644 +index 0000000000..372d8160a4 +--- /dev/null ++++ b/scripts/systemtap/conf.d/qemu_kvm.conf +@@ -0,0 +1,4 @@ ++# Force load uprobes (see BZ#1118352) ++stap -e 'probe process("/usr/libexec/qemu-kvm").function("main") { printf("") }' -c true ++ ++qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes +diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp +new file mode 100644 +index 0000000000..c04abf9449 +--- /dev/null ++++ b/scripts/systemtap/script.d/qemu_kvm.stp +@@ -0,0 +1 @@ ++probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} +-- +2.21.0 + diff --git a/SOURCES/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/SOURCES/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch new file mode 100644 index 0000000..ef83445 --- /dev/null +++ b/SOURCES/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -0,0 +1,118 @@ +From 97ed62562b883c384346bfef3e1c7e379f03ccab Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 30 Nov 2018 09:11:03 +0100 +Subject: Use qemu-kvm in documentation instead of qemu-system- + +Patchwork-id: 62380 +O-Subject: [RHEV-7.1 qemu-kvm-rhev PATCHv4] Use qemu-kvm in documentation instead of qemu-system-i386 +Bugzilla: 1140620 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Markus Armbruster +RH-Acked-by: Stefan Hajnoczi + +From: Miroslav Rezanina + +We change the name and location of qemu-kvm binaries. Update documentation +to reflect this change. Only architectures available in RHEL are updated. + +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + docs/qemu-block-drivers.texi | 2 +- + docs/qemu-cpu-models.texi | 2 +- + qemu-doc.texi | 6 +++--- + qemu-options.hx | 16 ++++++++-------- + 4 files changed, 13 insertions(+), 13 deletions(-) + +diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi +index 2c7ea49c32..5d0afb3dee 100644 +--- a/docs/qemu-block-drivers.texi ++++ b/docs/qemu-block-drivers.texi +@@ -2,7 +2,7 @@ + QEMU block driver reference manual + @c man end + +-@set qemu_system qemu-system-x86_64 ++@set qemu_system qemu-kvm + + @c man begin DESCRIPTION + +diff --git a/docs/qemu-cpu-models.texi b/docs/qemu-cpu-models.texi +index f88a1def0d..c82cf8fab7 100644 +--- a/docs/qemu-cpu-models.texi ++++ b/docs/qemu-cpu-models.texi +@@ -2,7 +2,7 @@ + QEMU / KVM CPU model configuration + @c man end + +-@set qemu_system_x86 qemu-system-x86_64 ++@set qemu_system_x86 qemu-kvm + + @c man begin DESCRIPTION + +diff --git a/qemu-doc.texi b/qemu-doc.texi +index 3ddf5c0a68..d460f8d2c0 100644 +--- a/qemu-doc.texi ++++ b/qemu-doc.texi +@@ -11,8 +11,8 @@ + @paragraphindent 0 + @c %**end of header + +-@set qemu_system qemu-system-x86_64 +-@set qemu_system_x86 qemu-system-x86_64 ++@set qemu_system qemu-kvm ++@set qemu_system_x86 qemu-kvm + + @ifinfo + @direntry +@@ -1827,7 +1827,7 @@ Set the initial VGA graphic mode. The default is 800x600x32. + Set OpenBIOS variables in NVRAM, for example: + + @example +-qemu-system-ppc -prom-env 'auto-boot?=false' \ ++qemu-kvm -prom-env 'auto-boot?=false' \ + -prom-env 'boot-device=hd:2,\yaboot' \ + -prom-env 'boot-args=conf=hd:2,\yaboot.conf' + @end example +diff --git a/qemu-options.hx b/qemu-options.hx +index fc17aca631..df1d27b6f2 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -2737,11 +2737,11 @@ be created for multiqueue vhost-user. + + Example: + @example +-qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ +- -numa node,memdev=mem \ +- -chardev socket,id=chr0,path=/path/to/socket \ +- -netdev type=vhost-user,id=net0,chardev=chr0 \ +- -device virtio-net-pci,netdev=net0 ++qemu-kvm -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ ++ -numa node,memdev=mem \ ++ -chardev socket,id=chr0,path=/path/to/socket \ ++ -netdev type=vhost-user,id=net0,chardev=chr0 \ ++ -device virtio-net-pci,netdev=net0 + @end example + + @item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}] +@@ -3631,14 +3631,14 @@ ETEXI + + DEF("realtime", HAS_ARG, QEMU_OPTION_realtime, + "-realtime [mlock=on|off]\n" +- " run qemu with realtime features\n" ++ " run qemu-kvm with realtime features\n" + " mlock=on|off controls mlock support (default: on)\n", + QEMU_ARCH_ALL) + STEXI + @item -realtime mlock=on|off + @findex -realtime +-Run qemu with realtime features. +-mlocking qemu and guest memory can be enabled via @option{mlock=on} ++Run qemu-kvm with realtime features. ++mlocking qemu-kvm and guest memory can be enabled via @option{mlock=on} + (enabled by default). + ETEXI + +-- +2.21.0 + diff --git a/SOURCES/0018-usb-xhci-Fix-PCI-capability-order.patch b/SOURCES/0018-usb-xhci-Fix-PCI-capability-order.patch new file mode 100644 index 0000000..bc6146d --- /dev/null +++ b/SOURCES/0018-usb-xhci-Fix-PCI-capability-order.patch @@ -0,0 +1,96 @@ +From b13a7d3527c5c91e7a50236de30a2244b8453911 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 5 May 2017 19:06:14 +0200 +Subject: usb-xhci: Fix PCI capability order + +RH-Author: Dr. David Alan Gilbert +Message-id: <20170505190614.15987-2-dgilbert@redhat.com> +Patchwork-id: 75038 +O-Subject: [RHEL-7.4 qemu-kvm-rhev PATCH 1/1] usb-xhci: Fix PCI capability order +Bugzilla: 1447874 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Juan Quintela + +From: "Dr. David Alan Gilbert" + +Upstream commit 1108b2f8a9 in 2.7.0 changed the order +of the PCI capability chain in the XHCI pci device in the case +where the device has the PCIe endpoint capability (i.e. only +older machine types, pc-i440fx-2.0 upstream, pc-i440fx-rhel7.0.0 +apparently for us). + +Changing the order breaks migration compatibility; fixing this +upstream would mean breaking the same case going from 2.7.0->current +that currently works 2.7.0->2.9.0 - so upstream it's a choice +of two breakages. + +Since we never released 2.7.0/2.8.0 we can fix this downstream. + +This reverts the order so that we create the capabilities in the +order: + PCIe + MSI + MSI-X + +The symptom is: +qemu-kvm: get_pci_config_device: Bad config data: i=0x71 read: a0 device: 0 cmask: ff wmask: 0 w1cmask:0 +qemu-kvm: Failed to load PCIDevice:config +qemu-kvm: Failed to load xhci:parent_obj +qemu-kvm: error while loading state for instance 0x0 of device '0000:00:0d.0/xhci' +qemu-kvm: load of migration failed: Invalid argument + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Miroslav Rezanina + +-- +Rebase notes (2.9.0): +- Change in assert condition (upstream) + +(cherry picked from commit aad727a5ecde1ad4935eb8427604d4df5a1f1f35) +(cherry picked from commit 2dd7402227e77d748a7375233ac9e7feab244bda) + +Conflicts: + hw/usb/hcd-xhci.c + +(cherry picked from commit a42f86dc906cc7d2c16d02bf125ed76847b469cb) +(cherry picked from commit 992ab2e4f6e15d3e51bc716763aa8d6f45c6d29d) +Signed-off-by: Danilo C. L. de Paula +--- + hw/usb/hcd-xhci.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c +index 8fed2eedd6..d2b9744030 100644 +--- a/hw/usb/hcd-xhci.c ++++ b/hw/usb/hcd-xhci.c +@@ -3403,6 +3403,12 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) + xhci->max_pstreams_mask = 0; + } + ++ if (pci_bus_is_express(pci_get_bus(dev)) || ++ xhci_get_flag(xhci, XHCI_FLAG_FORCE_PCIE_ENDCAP)) { ++ ret = pcie_endpoint_cap_init(dev, 0xa0); ++ assert(ret > 0); ++ } ++ + if (xhci->msi != ON_OFF_AUTO_OFF) { + ret = msi_init(dev, 0x70, xhci->numintrs, true, false, &err); + /* Any error other than -ENOTSUP(board's MSI support is broken) +@@ -3451,12 +3457,6 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) + PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64, + &xhci->mem); + +- if (pci_bus_is_express(pci_get_bus(dev)) || +- xhci_get_flag(xhci, XHCI_FLAG_FORCE_PCIE_ENDCAP)) { +- ret = pcie_endpoint_cap_init(dev, 0xa0); +- assert(ret > 0); +- } +- + if (xhci->msix != ON_OFF_AUTO_OFF) { + /* TODO check for errors, and should fail when msix=on */ + msix_init(dev, xhci->numintrs, +-- +2.21.0 + diff --git a/SOURCES/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/SOURCES/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch new file mode 100644 index 0000000..e167b2e --- /dev/null +++ b/SOURCES/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -0,0 +1,69 @@ +From 3fab8f5e8a9e190c1ed6916ac13c7c4d65e874b7 Mon Sep 17 00:00:00 2001 +From: Fam Zheng +Date: Wed, 14 Jun 2017 15:37:01 +0200 +Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] + +RH-Author: Fam Zheng +Message-id: <20170614153701.14757-1-famz@redhat.com> +Patchwork-id: 75613 +O-Subject: [RHV-7.4 qemu-kvm-rhev PATCH v3] virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] +Bugzilla: 1378816 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +We need a fix for RHEL 7.4 and 7.3.z, but unfortunately upstream isn't +ready. If it were, the changes will be too invasive. To have an idea: + +https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg05400.html + +is an incomplete attempt to fix part of the issue, and the remaining +work unfortunately involve even more complex changes. + +As a band-aid, this partially reverts the effect of ef8875b +(virtio-scsi: Remove op blocker for dataplane, since v2.7). We cannot +simply revert that commit as a whole because we already shipped it in +qemu-kvm-rhev 7.3, since when, block jobs has been possible. We should +only block what has been broken. Also, faithfully reverting the above +commit means adding back the removed op blocker, but that is not enough, +because it still crashes when inserting media into an initially empty +scsi-cd. + +All in all, scsi-cd on virtio-scsi-dataplane has basically been unusable +unless the scsi-cd never enters an empty state, so, disable it +altogether. Otherwise it would be much more difficult to avoid +crashing. + +Signed-off-by: Fam Zheng +Signed-off-by: Miroslav Rezanina +(cherry picked from commit b0caf00bbc35c7d89e02999bdce86e1f867728e8) +(cherry picked from commit c9c4f117d8b507c2f86035c282d537c0a327364f) +(cherry picked from commit 5d586bb2543337f0ff172c6ce942dba3acbcedff) +Signed-off-by: Danilo C. L. de Paula +--- + hw/scsi/virtio-scsi.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index e8b2b64d09..54108c0056 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -808,6 +808,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, + SCSIDevice *sd = SCSI_DEVICE(dev); + int ret; + ++ /* XXX: Remove this check once block backend is capable of handling ++ * AioContext change upon eject/insert. ++ * s->ctx is NULL if ioeventfd is off, s->ctx is qemu_get_aio_context() if ++ * data plane is not used, both cases are safe for scsi-cd. */ ++ if (s->ctx && s->ctx != qemu_get_aio_context() && ++ object_dynamic_cast(OBJECT(dev), "scsi-cd")) { ++ error_setg(errp, "scsi-cd is not supported by data plane"); ++ return; ++ } + if (s->ctx && !s->dataplane_fenced) { + if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { + return; +-- +2.21.0 + diff --git a/SOURCES/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/SOURCES/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch new file mode 100644 index 0000000..b3350da --- /dev/null +++ b/SOURCES/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -0,0 +1,60 @@ +From 148e9e80a3a430615b552075082fad22d007d851 Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Wed, 6 Feb 2019 03:58:56 +0000 +Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts + +RH-Author: David Gibson +Message-id: <20190206035856.19058-1-dgibson@redhat.com> +Patchwork-id: 84246 +O-Subject: [RHELAV-8.0/rhel qemu-kvm PATCH] BZ1653590: Require at least 64kiB pages for downstream guests & hosts +Bugzilla: 1653590 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Serhii Popovych +RH-Acked-by: Thomas Huth + +Most current POWER guests require 64kiB page support, so that's the default +for the cap-hpt-max-pagesize option in qemu which limits available guest +page sizes. We warn if the value is set smaller than that, but don't +outright fail upstream, because we need to allow for the possibility of +guest (and/or host) kernels configured for 4kiB page sizes. + +Downstream, however, we simply don't support 4kiB pagesize configured +kernels in guest or host, so we can have qemu simply error out in this +situation. + +Testing: Attempted to start a guest with cap-hpt-max-page-size=4k and verified + it failed immediately with a qemu error + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr_caps.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c +index 481dfd2a27..805f38533e 100644 +--- a/hw/ppc/spapr_caps.c ++++ b/hw/ppc/spapr_caps.c +@@ -351,12 +351,19 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, + static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, + uint8_t val, Error **errp) + { ++#if 0 /* disabled for RHEL */ + if (val < 12) { + error_setg(errp, "Require at least 4kiB hpt-max-page-size"); + return; + } else if (val < 16) { + warn_report("Many guests require at least 64kiB hpt-max-page-size"); + } ++#else /* Only page sizes >=64kiB supported for RHEL */ ++ if (val < 16) { ++ error_setg(errp, "Require at least 64kiB hpt-max-page-size"); ++ return; ++ } ++#endif + + spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); + } +-- +2.21.0 + diff --git a/SOURCES/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch b/SOURCES/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch new file mode 100644 index 0000000..a2a800b --- /dev/null +++ b/SOURCES/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch @@ -0,0 +1,61 @@ +From ab9ebc29bb9bb142e73a160750a451d40bfe9746 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Mon, 16 Sep 2019 17:07:00 +0100 +Subject: Using ip_deq after m_free might read pointers from an allocation + reuse. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Philippe Mathieu-Daudé +Message-id: <20190916170700.647-2-philmd@redhat.com> +Patchwork-id: 90470 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] Using ip_deq after m_free might read pointers from an allocation reuse. +Bugzilla: 1749737 +RH-Acked-by: Danilo de Paula +RH-Acked-by: John Snow + +From: Samuel Thibault + +This would be difficult to exploit, but that is still related with +CVE-2019-14378 which generates fragmented IP packets that would trigger this +issue and at least produce a DoS. + +Signed-off-by: Samuel Thibault +(cherry picked from libslirp commit c59279437eda91841b9d26079c70b8a540d41204) +Signed-off-by: Philippe Mathieu-Daudé + +Signed-off-by: Danilo C. L. de Paula +--- + slirp/src/ip_input.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c +index 8c75d91495..df1c846ade 100644 +--- a/slirp/src/ip_input.c ++++ b/slirp/src/ip_input.c +@@ -292,6 +292,7 @@ static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) + */ + while (q != (struct ipasfrag *)&fp->frag_link && + ip->ip_off + ip->ip_len > q->ipf_off) { ++ struct ipasfrag *prev; + i = (ip->ip_off + ip->ip_len) - q->ipf_off; + if (i < q->ipf_len) { + q->ipf_len -= i; +@@ -299,9 +300,11 @@ static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) + m_adj(dtom(slirp, q), i); + break; + } ++ prev = q; + q = q->ipf_next; +- m_free(dtom(slirp, q->ipf_prev)); +- ip_deq(q->ipf_prev); ++ ip_deq(prev); ++ m_free(dtom(slirp, prev)); ++ + } + + insert: +-- +2.21.0 + diff --git a/SOURCES/81-kvm-rhel.rules b/SOURCES/81-kvm-rhel.rules new file mode 100644 index 0000000..787cad6 --- /dev/null +++ b/SOURCES/81-kvm-rhel.rules @@ -0,0 +1 @@ +DEVPATH=="*/kvm", ACTION=="change", RUN+="/lib/udev/udev-kvm-check $env{COUNT} $env{EVENT}" diff --git a/SOURCES/85-kvm.preset b/SOURCES/85-kvm.preset new file mode 100644 index 0000000..8024052 --- /dev/null +++ b/SOURCES/85-kvm.preset @@ -0,0 +1,5 @@ +# Enable kvm-setup by default. This can have odd side effects on +# PowerNV systems that aren't intended as KVM hosts, but at present we +# only support RHEL on PowerNV for the purpose of being a RHEV host. + +enable kvm-setup.service diff --git a/SOURCES/95-kvm-memlock.conf b/SOURCES/95-kvm-memlock.conf new file mode 100644 index 0000000..fc59dbe --- /dev/null +++ b/SOURCES/95-kvm-memlock.conf @@ -0,0 +1,10 @@ +# The KVM HV implementation on Power can require a significant amount +# of unswappable memory (about half of which also needs to be host +# physically contiguous) to hold the guest's Hash Page Table (HPT) - +# roughly 1/64th of the guest's RAM size, minimum 16MiB. +# +# These limits allow unprivileged users to start smallish VMs, such as +# those used by libguestfs. +# +* hard memlock 65536 +* soft memlock 65536 diff --git a/SOURCES/99-qemu-guest-agent.rules b/SOURCES/99-qemu-guest-agent.rules new file mode 100644 index 0000000..8a290ab --- /dev/null +++ b/SOURCES/99-qemu-guest-agent.rules @@ -0,0 +1,2 @@ +SUBSYSTEM=="virtio-ports", ATTR{name}=="org.qemu.guest_agent.0", \ + TAG+="systemd" ENV{SYSTEMD_WANTS}="qemu-guest-agent.service" diff --git a/SOURCES/README.tests b/SOURCES/README.tests new file mode 100644 index 0000000..9932773 --- /dev/null +++ b/SOURCES/README.tests @@ -0,0 +1,39 @@ +qemu-kvm-tests README +===================== + +The qemu-kvm-tests rpm contains tests that can be used to verify the +functionality of the installed qemu-kvm package + +When installed, the files from this rpm will be arranged in the following +directory structure + +tests-src/ +├── README +├── scripts +│   ├── qemu.py +│   └── qmp +└── tests + ├── acceptance + ├── Makefile.include + └── qemu-iotests + +The tests/ directory within the tests-src/ directory is setup to remain a copy +of a subset of the tests/ directory from the QEMU source tree + +The avocado_qemu tests and qemu-iotests, along with files required for the +execution of the avocado_qemu tests (scripts/qemu.py and scripts/qmp/) will be +installed in a new location - /usr/lib64/qemu-kvm/tests-src/ + +avocado_qemu tests: +The avocado_qemu tests can be executed by running the following avocado command: +avocado run -p qemu_bin=/usr/libexec/qemu-kvm /usr/lib64/qemu-kvm/tests/acceptance/ +Avocado needs to be installed separately using either pip or from source as +Avocado is not being packaged for RHEL-8. + +qemu-iotests: +symlinks to corresponding binaries need to be created for QEMU_PROG, +QEMU_IO_PROG, QEMU_IMG_PROG, and QEMU_NBD_PROG before the iotests can be +executed. + +The primary purpose of this package is to make these tests available to be +executed as gating tests for the virt module in the RHEL-8 OSCI environment. diff --git a/SOURCES/bridge.conf b/SOURCES/bridge.conf new file mode 100644 index 0000000..a573665 --- /dev/null +++ b/SOURCES/bridge.conf @@ -0,0 +1 @@ +allow virbr0 diff --git a/SOURCES/ksm.service b/SOURCES/ksm.service new file mode 100644 index 0000000..35c6f1d --- /dev/null +++ b/SOURCES/ksm.service @@ -0,0 +1,13 @@ +[Unit] +Description=Kernel Samepage Merging +ConditionPathExists=/sys/kernel/mm/ksm + +[Service] +Type=oneshot +RemainAfterExit=yes +EnvironmentFile=-/etc/sysconfig/ksm +ExecStart=/usr/libexec/ksmctl start +ExecStop=/usr/libexec/ksmctl stop + +[Install] +WantedBy=multi-user.target diff --git a/SOURCES/ksm.sysconfig b/SOURCES/ksm.sysconfig new file mode 100644 index 0000000..d99656d --- /dev/null +++ b/SOURCES/ksm.sysconfig @@ -0,0 +1,4 @@ +# The maximum number of unswappable kernel pages +# which may be allocated by ksm (0 for unlimited) +# If unset, defaults to half of total memory +# KSM_MAX_KERNEL_PAGES= diff --git a/SOURCES/ksmctl.c b/SOURCES/ksmctl.c new file mode 100644 index 0000000..af39591 --- /dev/null +++ b/SOURCES/ksmctl.c @@ -0,0 +1,77 @@ +/* Start/stop KSM, for systemd. + * Copyright (C) 2009, 2011 Red Hat, Inc. + * Written by Paolo Bonzini . + * Based on the original sysvinit script by Dan Kenigsberg + * This file is distributed under the GNU General Public License, version 2 + * or later. */ + +#include +#include +#include +#include +#include +#include + +#define KSM_MAX_KERNEL_PAGES_FILE "/sys/kernel/mm/ksm/max_kernel_pages" +#define KSM_RUN_FILE "/sys/kernel/mm/ksm/run" + +char *program_name; + +int usage(void) +{ + fprintf(stderr, "Usage: %s {start|stop}\n", program_name); + return 1; +} + +int write_value(uint64_t value, char *filename) +{ + FILE *fp; + if (!(fp = fopen(filename, "w")) || + fprintf(fp, "%llu\n", (unsigned long long) value) == EOF || + fflush(fp) == EOF || + fclose(fp) == EOF) + return 1; + + return 0; +} + +uint64_t ksm_max_kernel_pages() +{ + char *var = getenv("KSM_MAX_KERNEL_PAGES"); + char *endptr; + uint64_t value; + if (var && *var) { + value = strtoll(var, &endptr, 0); + if (value < LLONG_MAX && !*endptr) + return value; + } + /* Unless KSM_MAX_KERNEL_PAGES is set, let KSM munch up to half of + * total memory. */ + return sysconf(_SC_PHYS_PAGES) / 2; +} + +int start(void) +{ + if (access(KSM_MAX_KERNEL_PAGES_FILE, R_OK) >= 0) + write_value(ksm_max_kernel_pages(), KSM_MAX_KERNEL_PAGES_FILE); + return write_value(1, KSM_RUN_FILE); +} + +int stop(void) +{ + return write_value(0, KSM_RUN_FILE); +} + +int main(int argc, char **argv) +{ + program_name = argv[0]; + if (argc < 2) { + return usage(); + } else if (!strcmp(argv[1], "start")) { + return start(); + } else if (!strcmp(argv[1], "stop")) { + return stop(); + } else { + return usage(); + } +} diff --git a/SOURCES/ksmtuned b/SOURCES/ksmtuned new file mode 100644 index 0000000..7bc5743 --- /dev/null +++ b/SOURCES/ksmtuned @@ -0,0 +1,139 @@ +#!/bin/bash +# +# Copyright 2009 Red Hat, Inc. and/or its affiliates. +# Released under the GPL +# +# Author: Dan Kenigsberg +# +# ksmtuned - a simple script that controls whether (and with what vigor) ksm +# should search for duplicated pages. +# +# starts ksm when memory commited to qemu processes exceeds a threshold, and +# make ksm work harder and harder untill memory load falls below that +# threshold. +# +# send SIGUSR1 to this process right after a new qemu process is started, or +# following its death, to retune ksm accordingly +# +# needs testing and ironing. contact danken@redhat.com if something breaks. + +if [ -f /etc/ksmtuned.conf ]; then + . /etc/ksmtuned.conf +fi + +debug() { + if [ -n "$DEBUG" ]; then + s="`/bin/date`: $*" + [ -n "$LOGFILE" ] && echo "$s" >> "$LOGFILE" || echo "$s" + fi +} + + +KSM_MONITOR_INTERVAL=${KSM_MONITOR_INTERVAL:-60} +KSM_NPAGES_BOOST=${KSM_NPAGES_BOOST:-300} +KSM_NPAGES_DECAY=${KSM_NPAGES_DECAY:--50} + +KSM_NPAGES_MIN=${KSM_NPAGES_MIN:-64} +KSM_NPAGES_MAX=${KSM_NPAGES_MAX:-1250} +# millisecond sleep between ksm scans for 16Gb server. Smaller servers sleep +# more, bigger sleep less. +KSM_SLEEP_MSEC=${KSM_SLEEP_MSEC:-10} + +KSM_THRES_COEF=${KSM_THRES_COEF:-20} +KSM_THRES_CONST=${KSM_THRES_CONST:-2048} + +total=`awk '/^MemTotal:/ {print $2}' /proc/meminfo` +debug total $total + +npages=0 +sleep=$[KSM_SLEEP_MSEC * 16 * 1024 * 1024 / total] +[ $sleep -le 10 ] && sleep=10 +debug sleep $sleep +thres=$[total * KSM_THRES_COEF / 100] +if [ $KSM_THRES_CONST -gt $thres ]; then + thres=$KSM_THRES_CONST +fi +debug thres $thres + +KSMCTL () { + case x$1 in + xstop) + echo 0 > /sys/kernel/mm/ksm/run + ;; + xstart) + echo $2 > /sys/kernel/mm/ksm/pages_to_scan + echo $3 > /sys/kernel/mm/ksm/sleep_millisecs + echo 1 > /sys/kernel/mm/ksm/run + ;; + esac +} + +committed_memory () { + # calculate how much memory is committed to running qemu processes + local pidlist + pidlist=$(pgrep -d ' ' -- '^qemu(-(kvm|system-.+)|:.{1,11})$') + if [ -n "$pidlist" ]; then + ps -p "$pidlist" -o rsz= + fi | awk '{ sum += $1 }; END { print 0+sum }' +} + +free_memory () { + awk '/^(MemFree|Buffers|Cached):/ {free += $2}; END {print free}' \ + /proc/meminfo +} + +increase_npages() { + local delta + delta=${1:-0} + npages=$[npages + delta] + if [ $npages -lt $KSM_NPAGES_MIN ]; then + npages=$KSM_NPAGES_MIN + elif [ $npages -gt $KSM_NPAGES_MAX ]; then + npages=$KSM_NPAGES_MAX + fi + echo $npages +} + + +adjust () { + local free committed + free=`free_memory` + committed=`committed_memory` + debug committed $committed free $free + if [ $[committed + thres] -lt $total -a $free -gt $thres ]; then + KSMCTL stop + debug "$[committed + thres] < $total and free > $thres, stop ksm" + return 1 + fi + debug "$[committed + thres] > $total, start ksm" + if [ $free -lt $thres ]; then + npages=`increase_npages $KSM_NPAGES_BOOST` + debug "$free < $thres, boost" + else + npages=`increase_npages $KSM_NPAGES_DECAY` + debug "$free > $thres, decay" + fi + KSMCTL start $npages $sleep + debug "KSMCTL start $npages $sleep" + return 0 +} + +function nothing () { + : +} + +loop () { + trap nothing SIGUSR1 + while true + do + sleep $KSM_MONITOR_INTERVAL & + wait $! + adjust + done +} + +PIDFILE=${PIDFILE-/var/run/ksmtune.pid} +if touch "$PIDFILE"; then + loop & + echo $! > "$PIDFILE" +fi diff --git a/SOURCES/ksmtuned.conf b/SOURCES/ksmtuned.conf new file mode 100644 index 0000000..fc4518c --- /dev/null +++ b/SOURCES/ksmtuned.conf @@ -0,0 +1,21 @@ +# Configuration file for ksmtuned. + +# How long ksmtuned should sleep between tuning adjustments +# KSM_MONITOR_INTERVAL=60 + +# Millisecond sleep between ksm scans for 16Gb server. +# Smaller servers sleep more, bigger sleep less. +# KSM_SLEEP_MSEC=10 + +# KSM_NPAGES_BOOST=300 +# KSM_NPAGES_DECAY=-50 +# KSM_NPAGES_MIN=64 +# KSM_NPAGES_MAX=1250 + +# KSM_THRES_COEF=20 +# KSM_THRES_CONST=2048 + +# uncomment the following if you want ksmtuned debug info + +# LOGFILE=/var/log/ksmtuned +# DEBUG=1 diff --git a/SOURCES/ksmtuned.service b/SOURCES/ksmtuned.service new file mode 100644 index 0000000..39febcc --- /dev/null +++ b/SOURCES/ksmtuned.service @@ -0,0 +1,12 @@ +[Unit] +Description=Kernel Samepage Merging (KSM) Tuning Daemon +After=ksm.service +Requires=ksm.service + +[Service] +ExecStart=/usr/sbin/ksmtuned +ExecReload=/bin/kill -USR1 $MAINPID +Type=forking + +[Install] +WantedBy=multi-user.target diff --git a/SOURCES/kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch b/SOURCES/kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch new file mode 100644 index 0000000..1435017 --- /dev/null +++ b/SOURCES/kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch @@ -0,0 +1,53 @@ +From 481357ea8ae32b6894860c296cf6a2898260195f Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 17 Jan 2020 13:18:27 +0100 +Subject: [PATCH 4/4] RHEL: hw/i386: disable nested PERF_GLOBAL_CTRL MSR + support + +RH-Author: Paolo Bonzini +Message-id: <20200117131827.20361-1-pbonzini@redhat.com> +Patchwork-id: 93405 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v3] RHEL: hw/i386: disable nested PERF_GLOBAL_CTRL MSR support +Bugzilla: 1559846 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Miroslav Rezanina + +BZ: 1559846 +BRANCH: rhel-av-8.2.0 +BREW: 25775160 +UPSTREAM: RHEL only + +Nested PERF_GLOBAL_CTRL support is not present in the 8.2 kernel. Drop the +features via compat properties, they will be moved to 8.2 machine type compat +properties in the 8.3 timeframe. + +Signed-off-by: Paolo Bonzini +--- + No change, for v2 I mistakenly wrote "origin/rhel-av-8.2.0" as the + branch. :( + + hw/i386/pc.c | 2 ++ + 1 file changed, 2 insertions(+) + +Signed-off-by: Miroslav Rezanina +--- + hw/i386/pc.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 61e70e4..73a0f11 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -351,6 +351,8 @@ const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); + GlobalProperty pc_rhel_compat[] = { + { TYPE_X86_CPU, "host-phys-bits", "on" }, + { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, ++ { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, ++ { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, + /* bz 1508330 */ + { "vfio-pci", "x-no-geforce-quirks", "on" }, + }; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch b/SOURCES/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch new file mode 100644 index 0000000..d717ae2 --- /dev/null +++ b/SOURCES/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch @@ -0,0 +1,115 @@ +From c477581ccc6962651d4d6c702a6c3e2fcc5e4205 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 2 Jan 2020 11:56:51 +0000 +Subject: [PATCH 2/2] kvm: Reallocate dirty_bmap when we change a slot + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200102115651.140177-1-dgilbert@redhat.com> +Patchwork-id: 93256 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] kvm: Reallocate dirty_bmap when we change a slot +Bugzilla: 1772774 +RH-Acked-by: Peter Xu +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Laszlo Ersek + +From: "Dr. David Alan Gilbert" + +bz: https://bugzilla.redhat.com/show_bug.cgi?id=1772774 +brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=25575691 +branch: rhel-av-8.2.0 + +kvm_set_phys_mem can be called to reallocate a slot by something the +guest does (e.g. writing to PAM and other chipset registers). +This can happen in the middle of a migration, and if we're unlucky +it can now happen between the split 'sync' and 'clear'; the clear +asserts if there's no bmap to clear. Recreate the bmap whenever +we change the slot, keeping the clear path happy. + +Typically this is triggered by the guest rebooting during a migrate. + +Corresponds to: +https://bugzilla.redhat.com/show_bug.cgi?id=1772774 +https://bugzilla.redhat.com/show_bug.cgi?id=1771032 + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Peter Xu +(cherry picked from commit 9b3a31c745b61758aaa5466a3a9fc0526d409188) +Signed-off-by: Danilo C. L. de Paula +--- + accel/kvm/kvm-all.c | 44 +++++++++++++++++++++++++++++--------------- + 1 file changed, 29 insertions(+), 15 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index dc3ed7f..5007bda 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -518,6 +518,27 @@ static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section, + + #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1)) + ++/* Allocate the dirty bitmap for a slot */ ++static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem) ++{ ++ /* ++ * XXX bad kernel interface alert ++ * For dirty bitmap, kernel allocates array of size aligned to ++ * bits-per-long. But for case when the kernel is 64bits and ++ * the userspace is 32bits, userspace can't align to the same ++ * bits-per-long, since sizeof(long) is different between kernel ++ * and user space. This way, userspace will provide buffer which ++ * may be 4 bytes less than the kernel will use, resulting in ++ * userspace memory corruption (which is not detectable by valgrind ++ * too, in most cases). ++ * So for now, let's align to 64 instead of HOST_LONG_BITS here, in ++ * a hope that sizeof(long) won't become >8 any time soon. ++ */ ++ hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), ++ /*HOST_LONG_BITS*/ 64) / 8; ++ mem->dirty_bmap = g_malloc0(bitmap_size); ++} ++ + /** + * kvm_physical_sync_dirty_bitmap - Sync dirty bitmap from kernel space + * +@@ -550,23 +571,9 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, + goto out; + } + +- /* XXX bad kernel interface alert +- * For dirty bitmap, kernel allocates array of size aligned to +- * bits-per-long. But for case when the kernel is 64bits and +- * the userspace is 32bits, userspace can't align to the same +- * bits-per-long, since sizeof(long) is different between kernel +- * and user space. This way, userspace will provide buffer which +- * may be 4 bytes less than the kernel will use, resulting in +- * userspace memory corruption (which is not detectable by valgrind +- * too, in most cases). +- * So for now, let's align to 64 instead of HOST_LONG_BITS here, in +- * a hope that sizeof(long) won't become >8 any time soon. +- */ + if (!mem->dirty_bmap) { +- hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), +- /*HOST_LONG_BITS*/ 64) / 8; + /* Allocate on the first log_sync, once and for all */ +- mem->dirty_bmap = g_malloc0(bitmap_size); ++ kvm_memslot_init_dirty_bitmap(mem); + } + + d.dirty_bitmap = mem->dirty_bmap; +@@ -1067,6 +1074,13 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + mem->ram = ram; + mem->flags = kvm_mem_flags(mr); + ++ if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { ++ /* ++ * Reallocate the bmap; it means it doesn't disappear in ++ * middle of a migrate. ++ */ ++ kvm_memslot_init_dirty_bitmap(mem); ++ } + err = kvm_set_user_memory_region(kml, mem, true); + if (err) { + fprintf(stderr, "%s: error registering slot: %s\n", __func__, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch b/SOURCES/kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch new file mode 100644 index 0000000..0c1c37f --- /dev/null +++ b/SOURCES/kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch @@ -0,0 +1,121 @@ +From 71b5267ed33f9e60bc98acbabcbed62f01a96ff4 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 30 Mar 2020 11:19:23 +0100 +Subject: [PATCH 3/4] Revert "mirror: Don't let an operation wait for itself" + +RH-Author: Kevin Wolf +Message-id: <20200330111924.22938-2-kwolf@redhat.com> +Patchwork-id: 94464 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] Revert "mirror: Don't let an operation wait for itself" +Bugzilla: 1794692 +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +This reverts commit 7e6c4ff792734e196c8ca82564c56b5e7c6288ca. + +The fix was incomplete as it only protected against requests waiting for +themselves, but not against requests waiting for each other. We need a +different solution. + +Signed-off-by: Kevin Wolf +Message-Id: <20200326153628.4869-2-kwolf@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit 9178f4fe5f083064f5c91f04d98c815ce5a5af1c) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/mirror.c | 21 +++++++++------------ + 1 file changed, 9 insertions(+), 12 deletions(-) + +diff --git a/block/mirror.c b/block/mirror.c +index cacbc70..8959e42 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -283,14 +283,11 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, + } + + static inline void coroutine_fn +-mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active) ++mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) + { + MirrorOp *op; + + QTAILQ_FOREACH(op, &s->ops_in_flight, next) { +- if (self == op) { +- continue; +- } + /* Do not wait on pseudo ops, because it may in turn wait on + * some other operation to start, which may in fact be the + * caller of this function. Since there is only one pseudo op +@@ -305,10 +302,10 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active) + } + + static inline void coroutine_fn +-mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s, MirrorOp *self) ++mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) + { + /* Only non-active operations use up in-flight slots */ +- mirror_wait_for_any_operation(s, self, false); ++ mirror_wait_for_any_operation(s, false); + } + + /* Perform a mirror copy operation. +@@ -351,7 +348,7 @@ static void coroutine_fn mirror_co_read(void *opaque) + + while (s->buf_free_count < nb_chunks) { + trace_mirror_yield_in_flight(s, op->offset, s->in_flight); +- mirror_wait_for_free_in_flight_slot(s, op); ++ mirror_wait_for_free_in_flight_slot(s); + } + + /* Now make a QEMUIOVector taking enough granularity-sized chunks +@@ -558,7 +555,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) + + while (s->in_flight >= MAX_IN_FLIGHT) { + trace_mirror_yield_in_flight(s, offset, s->in_flight); +- mirror_wait_for_free_in_flight_slot(s, pseudo_op); ++ mirror_wait_for_free_in_flight_slot(s); + } + + if (s->ret < 0) { +@@ -612,7 +609,7 @@ static void mirror_free_init(MirrorBlockJob *s) + static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s) + { + while (s->in_flight > 0) { +- mirror_wait_for_free_in_flight_slot(s, NULL); ++ mirror_wait_for_free_in_flight_slot(s); + } + } + +@@ -797,7 +794,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) + if (s->in_flight >= MAX_IN_FLIGHT) { + trace_mirror_yield(s, UINT64_MAX, s->buf_free_count, + s->in_flight); +- mirror_wait_for_free_in_flight_slot(s, NULL); ++ mirror_wait_for_free_in_flight_slot(s); + continue; + } + +@@ -950,7 +947,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + /* Do not start passive operations while there are active + * writes in progress */ + while (s->in_active_write_counter) { +- mirror_wait_for_any_operation(s, NULL, true); ++ mirror_wait_for_any_operation(s, true); + } + + if (s->ret < 0) { +@@ -976,7 +973,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || + (cnt == 0 && s->in_flight > 0)) { + trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); +- mirror_wait_for_free_in_flight_slot(s, NULL); ++ mirror_wait_for_free_in_flight_slot(s); + continue; + } else if (cnt != 0) { + delay_ns = mirror_iteration(s); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch b/SOURCES/kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch new file mode 100644 index 0000000..dc65c26 --- /dev/null +++ b/SOURCES/kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch @@ -0,0 +1,63 @@ +From ceb6d97674b8bc9a072db1be4167411bc0ee48d7 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:02 +0100 +Subject: [PATCH 091/116] Virtiofsd: fix memory leak on fuse queueinfo +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-88-dgilbert@redhat.com> +Patchwork-id: 93542 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 087/112] Virtiofsd: fix memory leak on fuse queueinfo +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Liu Bo + +For fuse's queueinfo, both queueinfo array and queueinfos are allocated in +fv_queue_set_started() but not cleaned up when the daemon process quits. + +This fixes the leak in proper places. + +Signed-off-by: Liu Bo +Signed-off-by: Eric Ren +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 740b0b700a6338a1cf60c26229651ac5f6724944) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index b7948de..fb8d6d1 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -625,6 +625,8 @@ static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx) + } + close(ourqi->kill_fd); + ourqi->kick_fd = -1; ++ free(vud->qi[qidx]); ++ vud->qi[qidx] = NULL; + } + + /* Callback from libvhost-user on start or stop of a queue */ +@@ -884,6 +886,12 @@ int virtio_session_mount(struct fuse_session *se) + void virtio_session_close(struct fuse_session *se) + { + close(se->vu_socketfd); ++ ++ if (!se->virtio_dev) { ++ return; ++ } ++ ++ free(se->virtio_dev->qi); + free(se->virtio_dev); + se->virtio_dev = NULL; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch b/SOURCES/kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch new file mode 100644 index 0000000..becba21 --- /dev/null +++ b/SOURCES/kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch @@ -0,0 +1,62 @@ +From 0d5a09173eb75b7e56122c2aefb2646a2be58400 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 31 Jan 2020 17:12:57 +0000 +Subject: [PATCH 15/15] apic: Use 32bit APIC ID for migration instance ID + +RH-Author: Peter Xu +Message-id: <20200131171257.1066593-4-peterx@redhat.com> +Patchwork-id: 93628 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/3] apic: Use 32bit APIC ID for migration instance ID +Bugzilla: 1529231 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert + +Migration is silently broken now with x2apic config like this: + + -smp 200,maxcpus=288,sockets=2,cores=72,threads=2 \ + -device intel-iommu,intremap=on,eim=on + +After migration, the guest kernel could hang at anything, due to +x2apic bit not migrated correctly in IA32_APIC_BASE on some vcpus, so +any operations related to x2apic could be broken then (e.g., RDMSR on +x2apic MSRs could fail because KVM would think that the vcpu hasn't +enabled x2apic at all). + +The issue is that the x2apic bit was never applied correctly for vcpus +whose ID > 255 when migrate completes, and that's because when we +migrate APIC we use the APICCommonState.id as instance ID of the +migration stream, while that's too short for x2apic. + +Let's use the newly introduced initial_apic_id for that. + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Reviewed-by: Eduardo Habkost +Signed-off-by: Juan Quintela +(cherry picked from commit 0ab994867c365db21e15f9503922c79234d8e40e) +Signed-off-by: Peter Xu +Signed-off-by: Danilo C. L. de Paula +--- + hw/intc/apic_common.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c +index 54b8731..b5dbeb6 100644 +--- a/hw/intc/apic_common.c ++++ b/hw/intc/apic_common.c +@@ -268,7 +268,10 @@ static void apic_common_realize(DeviceState *dev, Error **errp) + APICCommonState *s = APIC_COMMON(dev); + APICCommonClass *info; + static DeviceState *vapic; +- uint32_t instance_id = s->id; ++ uint32_t instance_id = s->initial_apic_id; ++ ++ /* Normally initial APIC ID should be no more than hundreds */ ++ assert(instance_id != VMSTATE_INSTANCE_ID_ANY); + + info = APIC_COMMON_GET_CLASS(s); + info->realize(dev, errp); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch b/SOURCES/kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch new file mode 100644 index 0000000..7fb76c1 --- /dev/null +++ b/SOURCES/kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch @@ -0,0 +1,57 @@ +From 619b3aac9790a7ca7c01846144395a318a9ab250 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 8 Apr 2020 17:29:14 +0100 +Subject: [PATCH 3/6] backup: don't acquire aio_context in backup_clean + +RH-Author: Kevin Wolf +Message-id: <20200408172917.18712-4-kwolf@redhat.com> +Patchwork-id: 94596 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/6] backup: don't acquire aio_context in backup_clean +Bugzilla: 1817621 +RH-Acked-by: Eric Blake +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +From: Stefan Reiter + +All code-paths leading to backup_clean (via job_clean) have the job's +context already acquired. The job's context is guaranteed to be the same +as the one used by backup_top via backup_job_create. + +Since the previous logic effectively acquired the lock twice, this +broke cleanup of backups for disks using IO threads, since the BDRV_POLL_WHILE +in bdrv_backup_top_drop -> bdrv_do_drained_begin would only release the lock +once, thus deadlocking with the IO thread. + +This is a partial revert of 0abf2581717a19. + +Signed-off-by: Stefan Reiter +Reviewed-by: Max Reitz +Message-Id: <20200407115651.69472-4-s.reiter@proxmox.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit eca0f3524a4eb57d03a56b0cbcef5527a0981ce4) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/backup.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/block/backup.c b/block/backup.c +index 1383e21..ec50946 100644 +--- a/block/backup.c ++++ b/block/backup.c +@@ -135,11 +135,7 @@ static void backup_abort(Job *job) + static void backup_clean(Job *job) + { + BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); +- AioContext *aio_context = bdrv_get_aio_context(s->backup_top); +- +- aio_context_acquire(aio_context); + bdrv_backup_top_drop(s->backup_top); +- aio_context_release(aio_context); + } + + void backup_do_checkpoint(BlockJob *job, Error **errp) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-backup-top-Begin-drain-earlier.patch b/SOURCES/kvm-backup-top-Begin-drain-earlier.patch new file mode 100644 index 0000000..ef289b7 --- /dev/null +++ b/SOURCES/kvm-backup-top-Begin-drain-earlier.patch @@ -0,0 +1,56 @@ +From bc78ee07bf400cbff0021367e05d308870471710 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:45 +0000 +Subject: [PATCH 12/18] backup-top: Begin drain earlier + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-6-slp@redhat.com> +Patchwork-id: 93757 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 5/9] backup-top: Begin drain earlier +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +From: Max Reitz + +When dropping backup-top, we need to drain the node before freeing the +BlockCopyState. Otherwise, requests may still be in flight and then the +assertion in shres_destroy() will fail. + +(This becomes visible in intermittent failure of 056.) + +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +Message-id: 20191219182638.104621-1-mreitz@redhat.com +Signed-off-by: Max Reitz +(cherry picked from commit 503ca1262bab2c11c533a4816d1ff4297d4f58a6) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + block/backup-top.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/block/backup-top.c b/block/backup-top.c +index 7cdb1f8..818d3f2 100644 +--- a/block/backup-top.c ++++ b/block/backup-top.c +@@ -257,12 +257,12 @@ void bdrv_backup_top_drop(BlockDriverState *bs) + BDRVBackupTopState *s = bs->opaque; + AioContext *aio_context = bdrv_get_aio_context(bs); + +- block_copy_state_free(s->bcs); +- + aio_context_acquire(aio_context); + + bdrv_drained_begin(bs); + ++ block_copy_state_free(s->bcs); ++ + s->active = false; + bdrv_child_refresh_perms(bs, bs->backing, &error_abort); + bdrv_replace_node(bs, backing_bs(bs), &error_abort); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Activate-recursively-even-for-already-active-n.patch b/SOURCES/kvm-block-Activate-recursively-even-for-already-active-n.patch new file mode 100644 index 0000000..d6cad06 --- /dev/null +++ b/SOURCES/kvm-block-Activate-recursively-even-for-already-active-n.patch @@ -0,0 +1,116 @@ +From 0ef6691ce8964bb2bbd677756c4e594793ca3ad8 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 7 Feb 2020 11:24:01 +0000 +Subject: [PATCH 04/18] block: Activate recursively even for already active + nodes + +RH-Author: Kevin Wolf +Message-id: <20200207112404.25198-4-kwolf@redhat.com> +Patchwork-id: 93749 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 3/6] block: Activate recursively even for already active nodes +Bugzilla: 1781637 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +bdrv_invalidate_cache_all() assumes that all nodes in a given subtree +are either active or inactive when it starts. Therefore, as soon as it +arrives at an already active node, it stops. + +However, this assumption is wrong. For example, it's possible to take a +snapshot of an inactive node, which results in an active overlay over an +inactive backing file. The active overlay is probably also the root node +of an inactive BlockBackend (blk->disable_perm == true). + +In this case, bdrv_invalidate_cache_all() does not need to do anything +to activate the overlay node, but it still needs to recurse into the +children and the parents to make sure that after returning success, +really everything is activated. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Max Reitz +(cherry picked from commit 7bb4941ace471fc7dd6ded4749b95b9622baa6ed) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 50 ++++++++++++++++++++++++-------------------------- + 1 file changed, 24 insertions(+), 26 deletions(-) + +diff --git a/block.c b/block.c +index 473eb6e..2e5e8b6 100644 +--- a/block.c ++++ b/block.c +@@ -5335,10 +5335,6 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, + return; + } + +- if (!(bs->open_flags & BDRV_O_INACTIVE)) { +- return; +- } +- + QLIST_FOREACH(child, &bs->children, next) { + bdrv_co_invalidate_cache(child->bs, &local_err); + if (local_err) { +@@ -5360,34 +5356,36 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, + * just keep the extended permissions for the next time that an activation + * of the image is tried. + */ +- bs->open_flags &= ~BDRV_O_INACTIVE; +- bdrv_get_cumulative_perm(bs, &perm, &shared_perm); +- ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, NULL, &local_err); +- if (ret < 0) { +- bs->open_flags |= BDRV_O_INACTIVE; +- error_propagate(errp, local_err); +- return; +- } +- bdrv_set_perm(bs, perm, shared_perm); +- +- if (bs->drv->bdrv_co_invalidate_cache) { +- bs->drv->bdrv_co_invalidate_cache(bs, &local_err); +- if (local_err) { ++ if (bs->open_flags & BDRV_O_INACTIVE) { ++ bs->open_flags &= ~BDRV_O_INACTIVE; ++ bdrv_get_cumulative_perm(bs, &perm, &shared_perm); ++ ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, NULL, &local_err); ++ if (ret < 0) { + bs->open_flags |= BDRV_O_INACTIVE; + error_propagate(errp, local_err); + return; + } +- } ++ bdrv_set_perm(bs, perm, shared_perm); + +- FOR_EACH_DIRTY_BITMAP(bs, bm) { +- bdrv_dirty_bitmap_skip_store(bm, false); +- } ++ if (bs->drv->bdrv_co_invalidate_cache) { ++ bs->drv->bdrv_co_invalidate_cache(bs, &local_err); ++ if (local_err) { ++ bs->open_flags |= BDRV_O_INACTIVE; ++ error_propagate(errp, local_err); ++ return; ++ } ++ } + +- ret = refresh_total_sectors(bs, bs->total_sectors); +- if (ret < 0) { +- bs->open_flags |= BDRV_O_INACTIVE; +- error_setg_errno(errp, -ret, "Could not refresh total sector count"); +- return; ++ FOR_EACH_DIRTY_BITMAP(bs, bm) { ++ bdrv_dirty_bitmap_skip_store(bm, false); ++ } ++ ++ ret = refresh_total_sectors(bs, bs->total_sectors); ++ if (ret < 0) { ++ bs->open_flags |= BDRV_O_INACTIVE; ++ error_setg_errno(errp, -ret, "Could not refresh total sector count"); ++ return; ++ } + } + + QLIST_FOREACH(parent, &bs->parents, next_parent) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch b/SOURCES/kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch new file mode 100644 index 0000000..b16c0b7 --- /dev/null +++ b/SOURCES/kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch @@ -0,0 +1,84 @@ +From f17b37b58a57d849d2ff5fa04f149d9415803a39 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 8 Apr 2020 17:29:17 +0100 +Subject: [PATCH 6/6] block: Fix blk->in_flight during blk_wait_while_drained() + +RH-Author: Kevin Wolf +Message-id: <20200408172917.18712-7-kwolf@redhat.com> +Patchwork-id: 94599 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 6/6] block: Fix blk->in_flight during blk_wait_while_drained() +Bugzilla: 1817621 +RH-Acked-by: Eric Blake +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +Waiting in blk_wait_while_drained() while blk->in_flight is increased +for the current request is wrong because it will cause the drain +operation to deadlock. + +This patch makes sure that blk_wait_while_drained() is called with +blk->in_flight increased exactly once for the current request, and that +it temporarily decreases the counter while it waits. + +Fixes: cf3129323f900ef5ddbccbe86e4fa801e88c566e +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Max Reitz +Message-Id: <20200407121259.21350-4-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 7f16476fab14fc32388e0ebae793f64673848efa) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/block-backend.c | 17 +++++------------ + 1 file changed, 5 insertions(+), 12 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 610dbfa..38ae413 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -1140,10 +1140,15 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset, + return 0; + } + ++/* To be called between exactly one pair of blk_inc/dec_in_flight() */ + static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) + { ++ assert(blk->in_flight > 0); ++ + if (blk->quiesce_counter && !blk->disable_request_queuing) { ++ blk_dec_in_flight(blk); + qemu_co_queue_wait(&blk->queued_requests, NULL); ++ blk_inc_in_flight(blk); + } + } + +@@ -1418,12 +1423,6 @@ static void blk_aio_read_entry(void *opaque) + BlkRwCo *rwco = &acb->rwco; + QEMUIOVector *qiov = rwco->iobuf; + +- if (rwco->blk->quiesce_counter) { +- blk_dec_in_flight(rwco->blk); +- blk_wait_while_drained(rwco->blk); +- blk_inc_in_flight(rwco->blk); +- } +- + assert(qiov->size == acb->bytes); + rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes, + qiov, rwco->flags); +@@ -1436,12 +1435,6 @@ static void blk_aio_write_entry(void *opaque) + BlkRwCo *rwco = &acb->rwco; + QEMUIOVector *qiov = rwco->iobuf; + +- if (rwco->blk->quiesce_counter) { +- blk_dec_in_flight(rwco->blk); +- blk_wait_while_drained(rwco->blk); +- blk_inc_in_flight(rwco->blk); +- } +- + assert(!qiov || qiov->size == acb->bytes); + rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, + qiov, 0, rwco->flags); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch b/SOURCES/kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch new file mode 100644 index 0000000..0bad890 --- /dev/null +++ b/SOURCES/kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch @@ -0,0 +1,91 @@ +From 5774af5a3c713d0c93010c30453812eae6a749cd Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:37 +0000 +Subject: [PATCH 17/20] block: Fix cross-AioContext blockdev-snapshot + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-12-kwolf@redhat.com> +Patchwork-id: 94286 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 11/13] block: Fix cross-AioContext blockdev-snapshot +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +external_snapshot_prepare() tries to move the overlay to the AioContext +of the backing file (the snapshotted node). However, it's possible that +this doesn't work, but the backing file can instead be moved to the +overlay's AioContext (e.g. opening the backing chain for a mirror +target). + +bdrv_append() already indirectly uses bdrv_attach_node(), which takes +care to move nodes to make sure they use the same AioContext and which +tries both directions. + +So the problem has a simple fix: Just delete the unnecessary extra +bdrv_try_set_aio_context() call in external_snapshot_prepare() and +instead assert in bdrv_append() that both nodes were indeed moved to the +same AioContext. + +Signed-off-by: Kevin Wolf +Message-Id: <20200310113831.27293-6-kwolf@redhat.com> +Tested-by: Peter Krempa +Signed-off-by: Kevin Wolf +(cherry picked from commit 30dd65f307b647eef8156c4a33bd007823ef85cb) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 1 + + blockdev.c | 16 ---------------- + 2 files changed, 1 insertion(+), 16 deletions(-) + +diff --git a/block.c b/block.c +index 354d388..ec29b1e 100644 +--- a/block.c ++++ b/block.c +@@ -4327,6 +4327,7 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, + bdrv_ref(from); + + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); ++ assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to)); + bdrv_drained_begin(from); + + /* Put all parents into @list and calculate their cumulative permissions */ +diff --git a/blockdev.c b/blockdev.c +index 7918533..c8d4b51 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1535,9 +1535,7 @@ static void external_snapshot_prepare(BlkActionState *common, + DO_UPCAST(ExternalSnapshotState, common, common); + TransactionAction *action = common->action; + AioContext *aio_context; +- AioContext *old_context; + uint64_t perm, shared; +- int ret; + + /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar + * purpose but a different set of parameters */ +@@ -1678,20 +1676,6 @@ static void external_snapshot_prepare(BlkActionState *common, + goto out; + } + +- /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ +- old_context = bdrv_get_aio_context(state->new_bs); +- aio_context_release(aio_context); +- aio_context_acquire(old_context); +- +- ret = bdrv_try_set_aio_context(state->new_bs, aio_context, errp); +- +- aio_context_release(old_context); +- aio_context_acquire(aio_context); +- +- if (ret < 0) { +- goto out; +- } +- + /* This removes our old bs and adds the new bs. This is an operation that + * can fail, so we need to do it in .prepare; undoing it for abort is + * always possible. */ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch b/SOURCES/kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch new file mode 100644 index 0000000..1735dc0 --- /dev/null +++ b/SOURCES/kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch @@ -0,0 +1,60 @@ +From 05452efd7e0fb0522099ae09a396f8f97e66014a Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 11 Mar 2020 10:51:47 +0000 +Subject: [PATCH 06/20] block: Fix leak in bdrv_create_file_fallback() + +RH-Author: Maxim Levitsky +Message-id: <20200311105147.13208-7-mlevitsk@redhat.com> +Patchwork-id: 94229 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 6/6] block: Fix leak in bdrv_create_file_fallback() +Bugzilla: 1640894 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz + +From: Max Reitz + +@options is leaked by the first two return statements in this function. + +Note that blk_new_open() takes the reference to @options even on +failure, so all we need to do to fix the leak is to move the QDict +allocation down to where we actually need it. + +Reported-by: Coverity (CID 1419884) +Fixes: fd17146cd93d1704cd96d7c2757b325fc7aac6fd + ("block: Generic file creation fallback") +Signed-off-by: Max Reitz +Message-Id: <20200225155618.133412-1-mreitz@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit eeea1faa099f82328f5831cf252f8ce0a59a9287) +Signed-off-by: Maxim Levitsky + +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/block.c b/block.c +index 3beec7f..e1a4e38 100644 +--- a/block.c ++++ b/block.c +@@ -600,7 +600,7 @@ static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, + QemuOpts *opts, Error **errp) + { + BlockBackend *blk; +- QDict *options = qdict_new(); ++ QDict *options; + int64_t size = 0; + char *buf = NULL; + PreallocMode prealloc; +@@ -623,6 +623,7 @@ static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, + return -ENOTSUP; + } + ++ options = qdict_new(); + qdict_put_str(options, "driver", drv->format_name); + + blk = blk_new_open(filename, NULL, options, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Generic-file-creation-fallback.patch b/SOURCES/kvm-block-Generic-file-creation-fallback.patch new file mode 100644 index 0000000..a5dd1d7 --- /dev/null +++ b/SOURCES/kvm-block-Generic-file-creation-fallback.patch @@ -0,0 +1,227 @@ +From 882d09226b7f45b72c5b7763c4c4aba182e0f8a1 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 11 Mar 2020 10:51:43 +0000 +Subject: [PATCH 02/20] block: Generic file creation fallback + +RH-Author: Maxim Levitsky +Message-id: <20200311105147.13208-3-mlevitsk@redhat.com> +Patchwork-id: 94227 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 2/6] block: Generic file creation fallback +Bugzilla: 1640894 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz + +From: Max Reitz + +If a protocol driver does not support image creation, we can see whether +maybe the file exists already. If so, just truncating it will be +sufficient. + +Signed-off-by: Max Reitz +Message-Id: <20200122164532.178040-3-mreitz@redhat.com> +Signed-off-by: Max Reitz +(cherry picked from commit fd17146cd93d1704cd96d7c2757b325fc7aac6fd) +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 147 insertions(+), 12 deletions(-) + +diff --git a/block.c b/block.c +index 2e5e8b6..3beec7f 100644 +--- a/block.c ++++ b/block.c +@@ -532,20 +532,139 @@ out: + return ret; + } + +-int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) ++/** ++ * Helper function for bdrv_create_file_fallback(): Resize @blk to at ++ * least the given @minimum_size. ++ * ++ * On success, return @blk's actual length. ++ * Otherwise, return -errno. ++ */ ++static int64_t create_file_fallback_truncate(BlockBackend *blk, ++ int64_t minimum_size, Error **errp) + { +- BlockDriver *drv; ++ Error *local_err = NULL; ++ int64_t size; ++ int ret; ++ ++ ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, &local_err); ++ if (ret < 0 && ret != -ENOTSUP) { ++ error_propagate(errp, local_err); ++ return ret; ++ } ++ ++ size = blk_getlength(blk); ++ if (size < 0) { ++ error_free(local_err); ++ error_setg_errno(errp, -size, ++ "Failed to inquire the new image file's length"); ++ return size; ++ } ++ ++ if (size < minimum_size) { ++ /* Need to grow the image, but we failed to do that */ ++ error_propagate(errp, local_err); ++ return -ENOTSUP; ++ } ++ ++ error_free(local_err); ++ local_err = NULL; ++ ++ return size; ++} ++ ++/** ++ * Helper function for bdrv_create_file_fallback(): Zero the first ++ * sector to remove any potentially pre-existing image header. ++ */ ++static int create_file_fallback_zero_first_sector(BlockBackend *blk, ++ int64_t current_size, ++ Error **errp) ++{ ++ int64_t bytes_to_clear; ++ int ret; ++ ++ bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE); ++ if (bytes_to_clear) { ++ ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, ++ "Failed to clear the new image's first sector"); ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ ++static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, ++ QemuOpts *opts, Error **errp) ++{ ++ BlockBackend *blk; ++ QDict *options = qdict_new(); ++ int64_t size = 0; ++ char *buf = NULL; ++ PreallocMode prealloc; + Error *local_err = NULL; + int ret; + ++ size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); ++ buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); ++ prealloc = qapi_enum_parse(&PreallocMode_lookup, buf, ++ PREALLOC_MODE_OFF, &local_err); ++ g_free(buf); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ return -EINVAL; ++ } ++ ++ if (prealloc != PREALLOC_MODE_OFF) { ++ error_setg(errp, "Unsupported preallocation mode '%s'", ++ PreallocMode_str(prealloc)); ++ return -ENOTSUP; ++ } ++ ++ qdict_put_str(options, "driver", drv->format_name); ++ ++ blk = blk_new_open(filename, NULL, options, ++ BDRV_O_RDWR | BDRV_O_RESIZE, errp); ++ if (!blk) { ++ error_prepend(errp, "Protocol driver '%s' does not support image " ++ "creation, and opening the image failed: ", ++ drv->format_name); ++ return -EINVAL; ++ } ++ ++ size = create_file_fallback_truncate(blk, size, errp); ++ if (size < 0) { ++ ret = size; ++ goto out; ++ } ++ ++ ret = create_file_fallback_zero_first_sector(blk, size, errp); ++ if (ret < 0) { ++ goto out; ++ } ++ ++ ret = 0; ++out: ++ blk_unref(blk); ++ return ret; ++} ++ ++int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) ++{ ++ BlockDriver *drv; ++ + drv = bdrv_find_protocol(filename, true, errp); + if (drv == NULL) { + return -ENOENT; + } + +- ret = bdrv_create(drv, filename, opts, &local_err); +- error_propagate(errp, local_err); +- return ret; ++ if (drv->bdrv_co_create_opts) { ++ return bdrv_create(drv, filename, opts, errp); ++ } else { ++ return bdrv_create_file_fallback(filename, drv, opts, errp); ++ } + } + + /** +@@ -1422,6 +1541,24 @@ QemuOptsList bdrv_runtime_opts = { + }, + }; + ++static QemuOptsList fallback_create_opts = { ++ .name = "fallback-create-opts", ++ .head = QTAILQ_HEAD_INITIALIZER(fallback_create_opts.head), ++ .desc = { ++ { ++ .name = BLOCK_OPT_SIZE, ++ .type = QEMU_OPT_SIZE, ++ .help = "Virtual disk size" ++ }, ++ { ++ .name = BLOCK_OPT_PREALLOC, ++ .type = QEMU_OPT_STRING, ++ .help = "Preallocation mode (allowed values: off)" ++ }, ++ { /* end of list */ } ++ } ++}; ++ + /* + * Common part for opening disk images and files + * +@@ -5743,14 +5880,12 @@ void bdrv_img_create(const char *filename, const char *fmt, + return; + } + +- if (!proto_drv->create_opts) { +- error_setg(errp, "Protocol driver '%s' does not support image creation", +- proto_drv->format_name); +- return; +- } +- + create_opts = qemu_opts_append(create_opts, drv->create_opts); +- create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); ++ if (proto_drv->create_opts) { ++ create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); ++ } else { ++ create_opts = qemu_opts_append(create_opts, &fallback_create_opts); ++ } + + /* Create parameter list with default values */ + opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch b/SOURCES/kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch new file mode 100644 index 0000000..463501a --- /dev/null +++ b/SOURCES/kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch @@ -0,0 +1,295 @@ +From 52cc1d1cd2f695c5761d65baec961d14552a79ed Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 8 Apr 2020 17:29:16 +0100 +Subject: [PATCH 5/6] block: Increase BB.in_flight for coroutine and sync + interfaces + +RH-Author: Kevin Wolf +Message-id: <20200408172917.18712-6-kwolf@redhat.com> +Patchwork-id: 94600 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/6] block: Increase BB.in_flight for coroutine and sync interfaces +Bugzilla: 1817621 +RH-Acked-by: Eric Blake +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +External callers of blk_co_*() and of the synchronous blk_*() functions +don't currently increase the BlockBackend.in_flight counter, but calls +from blk_aio_*() do, so there is an inconsistency whether the counter +has been increased or not. + +This patch moves the actual operations to static functions that can +later know they will always be called with in_flight increased exactly +once, even for external callers using the blk_co_*() coroutine +interfaces. + +If the public blk_co_*() interface is unused, remove it. + +Signed-off-by: Kevin Wolf +Message-Id: <20200407121259.21350-3-kwolf@redhat.com> +Reviewed-by: Max Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit fbb92b6798894d3bf62fe3578d99fa62c720b242) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/block-backend.c | 103 ++++++++++++++++++++++++++++++++--------- + include/sysemu/block-backend.h | 1 - + 2 files changed, 80 insertions(+), 24 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 17b2e87..610dbfa 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -1147,9 +1147,10 @@ static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) + } + } + +-int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, +- unsigned int bytes, QEMUIOVector *qiov, +- BdrvRequestFlags flags) ++/* To be called between exactly one pair of blk_inc/dec_in_flight() */ ++static int coroutine_fn ++blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes, ++ QEMUIOVector *qiov, BdrvRequestFlags flags) + { + int ret; + BlockDriverState *bs; +@@ -1178,10 +1179,24 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, + return ret; + } + +-int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, +- unsigned int bytes, +- QEMUIOVector *qiov, size_t qiov_offset, +- BdrvRequestFlags flags) ++int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, ++ unsigned int bytes, QEMUIOVector *qiov, ++ BdrvRequestFlags flags) ++{ ++ int ret; ++ ++ blk_inc_in_flight(blk); ++ ret = blk_do_preadv(blk, offset, bytes, qiov, flags); ++ blk_dec_in_flight(blk); ++ ++ return ret; ++} ++ ++/* To be called between exactly one pair of blk_inc/dec_in_flight() */ ++static int coroutine_fn ++blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes, ++ QEMUIOVector *qiov, size_t qiov_offset, ++ BdrvRequestFlags flags) + { + int ret; + BlockDriverState *bs; +@@ -1214,6 +1229,20 @@ int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, + return ret; + } + ++int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, ++ unsigned int bytes, ++ QEMUIOVector *qiov, size_t qiov_offset, ++ BdrvRequestFlags flags) ++{ ++ int ret; ++ ++ blk_inc_in_flight(blk); ++ ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags); ++ blk_dec_in_flight(blk); ++ ++ return ret; ++} ++ + int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, + unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) +@@ -1234,7 +1263,7 @@ static void blk_read_entry(void *opaque) + BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; + +- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size, ++ rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, qiov->size, + qiov, rwco->flags); + aio_wait_kick(); + } +@@ -1244,8 +1273,8 @@ static void blk_write_entry(void *opaque) + BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; + +- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size, +- qiov, rwco->flags); ++ rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, qiov->size, ++ qiov, 0, rwco->flags); + aio_wait_kick(); + } + +@@ -1262,6 +1291,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, + .ret = NOT_DONE, + }; + ++ blk_inc_in_flight(blk); + if (qemu_in_coroutine()) { + /* Fast-path if already in coroutine context */ + co_entry(&rwco); +@@ -1270,6 +1300,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, + bdrv_coroutine_enter(blk_bs(blk), co); + BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE); + } ++ blk_dec_in_flight(blk); + + return rwco.ret; + } +@@ -1394,7 +1425,7 @@ static void blk_aio_read_entry(void *opaque) + } + + assert(qiov->size == acb->bytes); +- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes, ++ rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes, + qiov, rwco->flags); + blk_aio_complete(acb); + } +@@ -1412,8 +1443,8 @@ static void blk_aio_write_entry(void *opaque) + } + + assert(!qiov || qiov->size == acb->bytes); +- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes, +- qiov, rwco->flags); ++ rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, ++ qiov, 0, rwco->flags); + blk_aio_complete(acb); + } + +@@ -1498,7 +1529,9 @@ void blk_aio_cancel_async(BlockAIOCB *acb) + bdrv_aio_cancel_async(acb); + } + +-int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf) ++/* To be called between exactly one pair of blk_inc/dec_in_flight() */ ++static int coroutine_fn ++blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf) + { + blk_wait_while_drained(blk); + +@@ -1514,8 +1547,7 @@ static void blk_ioctl_entry(void *opaque) + BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; + +- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, +- qiov->iov[0].iov_base); ++ rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, qiov->iov[0].iov_base); + aio_wait_kick(); + } + +@@ -1529,7 +1561,7 @@ static void blk_aio_ioctl_entry(void *opaque) + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + +- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf); ++ rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf); + + blk_aio_complete(acb); + } +@@ -1540,7 +1572,9 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, + return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); + } + +-int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) ++/* To be called between exactly one pair of blk_inc/dec_in_flight() */ ++static int coroutine_fn ++blk_do_pdiscard(BlockBackend *blk, int64_t offset, int bytes) + { + int ret; + +@@ -1559,7 +1593,7 @@ static void blk_aio_pdiscard_entry(void *opaque) + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + +- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes); ++ rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, acb->bytes); + blk_aio_complete(acb); + } + +@@ -1571,12 +1605,23 @@ BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, + cb, opaque); + } + ++int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) ++{ ++ int ret; ++ ++ blk_inc_in_flight(blk); ++ ret = blk_do_pdiscard(blk, offset, bytes); ++ blk_dec_in_flight(blk); ++ ++ return ret; ++} ++ + static void blk_pdiscard_entry(void *opaque) + { + BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; + +- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); ++ rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, qiov->size); + aio_wait_kick(); + } + +@@ -1585,7 +1630,8 @@ int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) + return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); + } + +-int blk_co_flush(BlockBackend *blk) ++/* To be called between exactly one pair of blk_inc/dec_in_flight() */ ++static int coroutine_fn blk_do_flush(BlockBackend *blk) + { + blk_wait_while_drained(blk); + +@@ -1601,7 +1647,7 @@ static void blk_aio_flush_entry(void *opaque) + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + +- rwco->ret = blk_co_flush(rwco->blk); ++ rwco->ret = blk_do_flush(rwco->blk); + blk_aio_complete(acb); + } + +@@ -1611,10 +1657,21 @@ BlockAIOCB *blk_aio_flush(BlockBackend *blk, + return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); + } + ++int coroutine_fn blk_co_flush(BlockBackend *blk) ++{ ++ int ret; ++ ++ blk_inc_in_flight(blk); ++ ret = blk_do_flush(blk); ++ blk_dec_in_flight(blk); ++ ++ return ret; ++} ++ + static void blk_flush_entry(void *opaque) + { + BlkRwCo *rwco = opaque; +- rwco->ret = blk_co_flush(rwco->blk); ++ rwco->ret = blk_do_flush(rwco->blk); + aio_wait_kick(); + } + +diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h +index b198dec..9bbdbd6 100644 +--- a/include/sysemu/block-backend.h ++++ b/include/sysemu/block-backend.h +@@ -171,7 +171,6 @@ BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int bytes, + BlockCompletionFunc *cb, void *opaque); + void blk_aio_cancel(BlockAIOCB *acb); + void blk_aio_cancel_async(BlockAIOCB *acb); +-int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf); + int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf); + BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, + BlockCompletionFunc *cb, void *opaque); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Introduce-bdrv_reopen_commit_post-step.patch b/SOURCES/kvm-block-Introduce-bdrv_reopen_commit_post-step.patch new file mode 100644 index 0000000..72c8986 --- /dev/null +++ b/SOURCES/kvm-block-Introduce-bdrv_reopen_commit_post-step.patch @@ -0,0 +1,65 @@ +From f7dd953c2d0380cef3c351afb03d68c6fcda1dca Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:28 +0000 +Subject: [PATCH 08/20] block: Introduce 'bdrv_reopen_commit_post' step + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-3-kwolf@redhat.com> +Patchwork-id: 94278 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 02/13] block: Introduce 'bdrv_reopen_commit_post' step +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +From: Peter Krempa + +Add another step in the reopen process where driver can execute code +after permission changes are comitted. + +Signed-off-by: Peter Krempa +Message-Id: +Signed-off-by: Kevin Wolf +(cherry picked from commit 17e1e2be5f9e84e0298e28e70675655b43e225ea) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 9 +++++++++ + include/block/block_int.h | 1 + + 2 files changed, 10 insertions(+) + +diff --git a/block.c b/block.c +index e1a4e38..a744bb5 100644 +--- a/block.c ++++ b/block.c +@@ -3657,6 +3657,15 @@ cleanup_perm: + } + } + } ++ ++ if (ret == 0) { ++ QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { ++ BlockDriverState *bs = bs_entry->state.bs; ++ ++ if (bs->drv->bdrv_reopen_commit_post) ++ bs->drv->bdrv_reopen_commit_post(&bs_entry->state); ++ } ++ } + cleanup: + QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { + if (ret) { +diff --git a/include/block/block_int.h b/include/block/block_int.h +index dd033d0..c168690 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -123,6 +123,7 @@ struct BlockDriver { + int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp); + void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); ++ void (*bdrv_reopen_commit_post)(BDRVReopenState *reopen_state); + void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); + void (*bdrv_join_options)(QDict *options, QDict *old_options); + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Make-bdrv_get_cumulative_perm-public.patch b/SOURCES/kvm-block-Make-bdrv_get_cumulative_perm-public.patch new file mode 100644 index 0000000..2f0f999 --- /dev/null +++ b/SOURCES/kvm-block-Make-bdrv_get_cumulative_perm-public.patch @@ -0,0 +1,67 @@ +From 294ab4c4963295556d12ac15150b48c8536175a7 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:33 +0000 +Subject: [PATCH 13/20] block: Make bdrv_get_cumulative_perm() public + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-8-kwolf@redhat.com> +Patchwork-id: 94287 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 07/13] block: Make bdrv_get_cumulative_perm() public +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +Signed-off-by: Kevin Wolf +Message-Id: <20200310113831.27293-2-kwolf@redhat.com> +Reviewed-by: Peter Krempa +Signed-off-by: Kevin Wolf +(cherry picked from commit c7a0f2be8f95b220cdadbba9a9236eaf115951dc) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 6 ++---- + include/block/block_int.h | 3 +++ + 2 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/block.c b/block.c +index 39e4647..354d388 100644 +--- a/block.c ++++ b/block.c +@@ -1850,8 +1850,6 @@ static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, + bool *tighten_restrictions, Error **errp); + static void bdrv_child_abort_perm_update(BdrvChild *c); + static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared); +-static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, +- uint64_t *shared_perm); + + typedef struct BlockReopenQueueEntry { + bool prepared; +@@ -2075,8 +2073,8 @@ static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms, + } + } + +-static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, +- uint64_t *shared_perm) ++void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, ++ uint64_t *shared_perm) + { + BdrvChild *c; + uint64_t cumulative_perms = 0; +diff --git a/include/block/block_int.h b/include/block/block_int.h +index c168690..96e327b 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -1228,6 +1228,9 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, + void *opaque, Error **errp); + void bdrv_root_unref_child(BdrvChild *child); + ++void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, ++ uint64_t *shared_perm); ++ + /** + * Sets a BdrvChild's permissions. Avoid if the parent is a BDS; use + * bdrv_child_refresh_perms() instead and make the parent's +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Relax-restrictions-for-blockdev-snapshot.patch b/SOURCES/kvm-block-Relax-restrictions-for-blockdev-snapshot.patch new file mode 100644 index 0000000..de85205 --- /dev/null +++ b/SOURCES/kvm-block-Relax-restrictions-for-blockdev-snapshot.patch @@ -0,0 +1,117 @@ +From 9ba321e18a357c1a3a238ceee301bbb174f96eee Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:34 +0000 +Subject: [PATCH 14/20] block: Relax restrictions for blockdev-snapshot + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-9-kwolf@redhat.com> +Patchwork-id: 94285 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 08/13] block: Relax restrictions for blockdev-snapshot +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +blockdev-snapshot returned an error if the overlay was already in use, +which it defined as having any BlockBackend parent. This is in fact both +too strict (some parents can tolerate the change of visible data caused +by attaching a backing file) and too loose (some non-BlockBackend +parents may not be happy with it). + +One important use case that is prevented by the too strict check is live +storage migration with blockdev-mirror. Here, the target node is +usually opened without a backing file so that the active layer is +mirrored while its backing chain can be copied in the background. + +The backing chain should be attached to the mirror target node when +finalising the job, just before switching the users of the source node +to the new copy (at which point the mirror job still has a reference to +the node). drive-mirror did this automatically, but with blockdev-mirror +this is the job of the QMP client, so it needs a way to do this. + +blockdev-snapshot is the obvious way, so this patch makes it work in +this scenario. The new condition is that no parent uses CONSISTENT_READ +permissions. This will ensure that the operation will still be blocked +when the node is attached to the guest device, so blockdev-snapshot +remains safe. + +(For the sake of completeness, x-blockdev-reopen can be used to achieve +the same, however it is a big hammer, performs the graph change +completely unchecked and is still experimental. So even with the option +of using x-blockdev-reopen, there are reasons why blockdev-snapshot +should be able to perform this operation.) + +Signed-off-by: Kevin Wolf +Message-Id: <20200310113831.27293-3-kwolf@redhat.com> +Reviewed-by: Peter Krempa +Tested-by: Peter Krempa +Signed-off-by: Kevin Wolf +(cherry picked from commit d29d3d1f80b3947fb26e7139645c83de66d146a9) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 14 ++++++++------ + tests/qemu-iotests/085.out | 4 ++-- + 2 files changed, 10 insertions(+), 8 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 4cd9a58..7918533 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1536,6 +1536,7 @@ static void external_snapshot_prepare(BlkActionState *common, + TransactionAction *action = common->action; + AioContext *aio_context; + AioContext *old_context; ++ uint64_t perm, shared; + int ret; + + /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar +@@ -1656,16 +1657,17 @@ static void external_snapshot_prepare(BlkActionState *common, + goto out; + } + +- if (bdrv_has_blk(state->new_bs)) { ++ /* ++ * Allow attaching a backing file to an overlay that's already in use only ++ * if the parents don't assume that they are already seeing a valid image. ++ * (Specifically, allow it as a mirror target, which is write-only access.) ++ */ ++ bdrv_get_cumulative_perm(state->new_bs, &perm, &shared); ++ if (perm & BLK_PERM_CONSISTENT_READ) { + error_setg(errp, "The overlay is already in use"); + goto out; + } + +- if (bdrv_op_is_blocked(state->new_bs, BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, +- errp)) { +- goto out; +- } +- + if (state->new_bs->backing != NULL) { + error_setg(errp, "The overlay already has a backing image"); + goto out; +diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out +index bb50227..487d920 100644 +--- a/tests/qemu-iotests/085.out ++++ b/tests/qemu-iotests/085.out +@@ -82,7 +82,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ + === Invalid command - cannot create a snapshot using a file BDS === + + { 'execute': 'blockdev-snapshot', 'arguments': { 'node':'virtio0', 'overlay':'file_12' } } +-{"error": {"class": "GenericError", "desc": "The overlay does not support backing images"}} ++{"error": {"class": "GenericError", "desc": "The overlay is already in use"}} + + === Invalid command - snapshot node used as active layer === + +@@ -96,7 +96,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ + === Invalid command - snapshot node used as backing hd === + + { 'execute': 'blockdev-snapshot', 'arguments': { 'node': 'virtio0', 'overlay':'snap_11' } } +-{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'snap_12'"}} ++{"error": {"class": "GenericError", "desc": "The overlay is already in use"}} + + === Invalid command - snapshot node has a backing image === + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch b/SOURCES/kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch new file mode 100644 index 0000000..ea796d5 --- /dev/null +++ b/SOURCES/kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch @@ -0,0 +1,57 @@ +From 371d312300251c0dc24522607b06b7e47e760b53 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:32 +0000 +Subject: [PATCH 12/20] block: Versioned x-blockdev-reopen API with feature + flag + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-7-kwolf@redhat.com> +Patchwork-id: 94283 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 06/13] block: Versioned x-blockdev-reopen API with feature flag +Bugzilla: 1790482 1805143 +RH-Acked-by: Eric Blake +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +x-blockdev-reopen is still considered unstable upstream. libvirt needs +(a small subset of) it for incremental backups, though. + +Add a downstream-only feature flag that effectively makes this a +versioned interface. As long as the feature is present, we promise that +we won't change the interface incompatibly. Incompatible changes to the +command will require us to drop the feature flag (and possibly introduce +a new one if the new version is still not stable upstream). + +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + qapi/block-core.json | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/qapi/block-core.json b/qapi/block-core.json +index 0cf68fe..a1e85b0 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -4202,10 +4202,17 @@ + # image does not have a default backing file name as part of its + # metadata. + # ++# Features: ++# @__com.redhat_rhel-av-8_2_0-api: Versioning the downstream interface while ++# it's still unstable upstream. As long as ++# this flag is present, this command will not ++# change incompatibly. ++# + # Since: 4.0 + ## + { 'command': 'x-blockdev-reopen', +- 'data': 'BlockdevOptions', 'boxed': true } ++ 'data': 'BlockdevOptions', 'boxed': true, ++ 'features': [ '__com.redhat_rhel-av-8_2_0-api' ] } + + ## + # @blockdev-del: +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch b/SOURCES/kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch new file mode 100644 index 0000000..9d49cfa --- /dev/null +++ b/SOURCES/kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch @@ -0,0 +1,158 @@ +From 6cc456c4c1e6557fdc7e138e8ef8171b71609222 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 8 Apr 2020 17:29:15 +0100 +Subject: [PATCH 4/6] block-backend: Reorder flush/pdiscard function + definitions + +RH-Author: Kevin Wolf +Message-id: <20200408172917.18712-5-kwolf@redhat.com> +Patchwork-id: 94598 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/6] block-backend: Reorder flush/pdiscard function definitions +Bugzilla: 1817621 +RH-Acked-by: Eric Blake +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +Move all variants of the flush/pdiscard functions to a single place and +put the blk_co_*() version first because it is called by all other +variants (and will become static in the next patch). + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Max Reitz +Message-Id: <20200407121259.21350-2-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 564806c529d4e0acad209b1e5b864a8886092f1f) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/block-backend.c | 92 +++++++++++++++++++++++++-------------------------- + 1 file changed, 46 insertions(+), 46 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 8b8f2a8..17b2e87 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -1488,38 +1488,6 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, + blk_aio_write_entry, flags, cb, opaque); + } + +-static void blk_aio_flush_entry(void *opaque) +-{ +- BlkAioEmAIOCB *acb = opaque; +- BlkRwCo *rwco = &acb->rwco; +- +- rwco->ret = blk_co_flush(rwco->blk); +- blk_aio_complete(acb); +-} +- +-BlockAIOCB *blk_aio_flush(BlockBackend *blk, +- BlockCompletionFunc *cb, void *opaque) +-{ +- return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); +-} +- +-static void blk_aio_pdiscard_entry(void *opaque) +-{ +- BlkAioEmAIOCB *acb = opaque; +- BlkRwCo *rwco = &acb->rwco; +- +- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes); +- blk_aio_complete(acb); +-} +- +-BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, +- int64_t offset, int bytes, +- BlockCompletionFunc *cb, void *opaque) +-{ +- return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0, +- cb, opaque); +-} +- + void blk_aio_cancel(BlockAIOCB *acb) + { + bdrv_aio_cancel(acb); +@@ -1586,6 +1554,37 @@ int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) + return bdrv_co_pdiscard(blk->root, offset, bytes); + } + ++static void blk_aio_pdiscard_entry(void *opaque) ++{ ++ BlkAioEmAIOCB *acb = opaque; ++ BlkRwCo *rwco = &acb->rwco; ++ ++ rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes); ++ blk_aio_complete(acb); ++} ++ ++BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, ++ int64_t offset, int bytes, ++ BlockCompletionFunc *cb, void *opaque) ++{ ++ return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0, ++ cb, opaque); ++} ++ ++static void blk_pdiscard_entry(void *opaque) ++{ ++ BlkRwCo *rwco = opaque; ++ QEMUIOVector *qiov = rwco->iobuf; ++ ++ rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); ++ aio_wait_kick(); ++} ++ ++int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) ++{ ++ return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); ++} ++ + int blk_co_flush(BlockBackend *blk) + { + blk_wait_while_drained(blk); +@@ -1597,6 +1596,21 @@ int blk_co_flush(BlockBackend *blk) + return bdrv_co_flush(blk_bs(blk)); + } + ++static void blk_aio_flush_entry(void *opaque) ++{ ++ BlkAioEmAIOCB *acb = opaque; ++ BlkRwCo *rwco = &acb->rwco; ++ ++ rwco->ret = blk_co_flush(rwco->blk); ++ blk_aio_complete(acb); ++} ++ ++BlockAIOCB *blk_aio_flush(BlockBackend *blk, ++ BlockCompletionFunc *cb, void *opaque) ++{ ++ return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); ++} ++ + static void blk_flush_entry(void *opaque) + { + BlkRwCo *rwco = opaque; +@@ -2083,20 +2097,6 @@ int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, + return bdrv_truncate(blk->root, offset, exact, prealloc, errp); + } + +-static void blk_pdiscard_entry(void *opaque) +-{ +- BlkRwCo *rwco = opaque; +- QEMUIOVector *qiov = rwco->iobuf; +- +- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); +- aio_wait_kick(); +-} +- +-int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) +-{ +- return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); +-} +- + int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, + int64_t pos, int size) + { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch b/SOURCES/kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch new file mode 100644 index 0000000..45f506c --- /dev/null +++ b/SOURCES/kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch @@ -0,0 +1,130 @@ +From aefff389c4d11bd69180db7177135c4645a9b1bd Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:46 +0000 +Subject: [PATCH 13/18] block/backup-top: Don't acquire context while dropping + top + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-7-slp@redhat.com> +Patchwork-id: 93759 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 6/9] block/backup-top: Don't acquire context while dropping top +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +All paths that lead to bdrv_backup_top_drop(), except for the call +from backup_clean(), imply that the BDS AioContext has already been +acquired, so doing it there too can potentially lead to QEMU hanging +on AIO_WAIT_WHILE(). + +An easy way to trigger this situation is by issuing a two actions +transaction, with a proper and a bogus blockdev-backup, so the second +one will trigger a rollback. This will trigger a hang with an stack +trace like this one: + + #0 0x00007fb680c75016 in __GI_ppoll (fds=0x55e74580f7c0, nfds=1, timeout=, + timeout@entry=0x0, sigmask=sigmask@entry=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:39 + #1 0x000055e743386e09 in ppoll (__ss=0x0, __timeout=0x0, __nfds=, __fds=) + at /usr/include/bits/poll2.h:77 + #2 0x000055e743386e09 in qemu_poll_ns + (fds=, nfds=, timeout=) at util/qemu-timer.c:336 + #3 0x000055e743388dc4 in aio_poll (ctx=0x55e7458925d0, blocking=blocking@entry=true) + at util/aio-posix.c:669 + #4 0x000055e743305dea in bdrv_flush (bs=bs@entry=0x55e74593c0d0) at block/io.c:2878 + #5 0x000055e7432be58e in bdrv_close (bs=0x55e74593c0d0) at block.c:4017 + #6 0x000055e7432be58e in bdrv_delete (bs=) at block.c:4262 + #7 0x000055e7432be58e in bdrv_unref (bs=bs@entry=0x55e74593c0d0) at block.c:5644 + #8 0x000055e743316b9b in bdrv_backup_top_drop (bs=bs@entry=0x55e74593c0d0) at block/backup-top.c:273 + #9 0x000055e74331461f in backup_job_create + (job_id=0x0, bs=bs@entry=0x55e7458d5820, target=target@entry=0x55e74589f640, speed=0, sync_mode=MIRROR_SYNC_MODE_FULL, sync_bitmap=sync_bitmap@entry=0x0, bitmap_mode=BITMAP_SYNC_MODE_ON_SUCCESS, compress=false, filter_node_name=0x0, on_source_error=BLOCKDEV_ON_ERROR_REPORT, on_target_error=BLOCKDEV_ON_ERROR_REPORT, creation_flags=0, cb=0x0, opaque=0x0, txn=0x0, errp=0x7ffddfd1efb0) at block/backup.c:478 + #10 0x000055e74315bc52 in do_backup_common + (backup=backup@entry=0x55e746c066d0, bs=bs@entry=0x55e7458d5820, target_bs=target_bs@entry=0x55e74589f640, aio_context=aio_context@entry=0x55e7458a91e0, txn=txn@entry=0x0, errp=errp@entry=0x7ffddfd1efb0) + at blockdev.c:3580 + #11 0x000055e74315c37c in do_blockdev_backup + (backup=backup@entry=0x55e746c066d0, txn=0x0, errp=errp@entry=0x7ffddfd1efb0) + at /usr/src/debug/qemu-kvm-4.2.0-2.module+el8.2.0+5135+ed3b2489.x86_64/./qapi/qapi-types-block-core.h:1492 + #12 0x000055e74315c449 in blockdev_backup_prepare (common=0x55e746a8de90, errp=0x7ffddfd1f018) + at blockdev.c:1885 + #13 0x000055e743160152 in qmp_transaction + (dev_list=, has_props=, props=0x55e7467fe2c0, errp=errp@entry=0x7ffddfd1f088) at blockdev.c:2340 + #14 0x000055e743287ff5 in qmp_marshal_transaction + (args=, ret=, errp=0x7ffddfd1f0f8) + at qapi/qapi-commands-transaction.c:44 + #15 0x000055e74333de6c in do_qmp_dispatch + (errp=0x7ffddfd1f0f0, allow_oob=, request=, cmds=0x55e743c28d60 ) at qapi/qmp-dispatch.c:132 + #16 0x000055e74333de6c in qmp_dispatch + (cmds=0x55e743c28d60 , request=, allow_oob=) + at qapi/qmp-dispatch.c:175 + #17 0x000055e74325c061 in monitor_qmp_dispatch (mon=0x55e745908030, req=) + at monitor/qmp.c:145 + #18 0x000055e74325c6fa in monitor_qmp_bh_dispatcher (data=) at monitor/qmp.c:234 + #19 0x000055e743385866 in aio_bh_call (bh=0x55e745807ae0) at util/async.c:117 + #20 0x000055e743385866 in aio_bh_poll (ctx=ctx@entry=0x55e7458067a0) at util/async.c:117 + #21 0x000055e743388c54 in aio_dispatch (ctx=0x55e7458067a0) at util/aio-posix.c:459 + #22 0x000055e743385742 in aio_ctx_dispatch + (source=, callback=, user_data=) at util/async.c:260 + #23 0x00007fb68543e67d in g_main_dispatch (context=0x55e745893a40) at gmain.c:3176 + #24 0x00007fb68543e67d in g_main_context_dispatch (context=context@entry=0x55e745893a40) at gmain.c:3829 + #25 0x000055e743387d08 in glib_pollfds_poll () at util/main-loop.c:219 + #26 0x000055e743387d08 in os_host_main_loop_wait (timeout=) at util/main-loop.c:242 + #27 0x000055e743387d08 in main_loop_wait (nonblocking=) at util/main-loop.c:518 + #28 0x000055e74316a3c1 in main_loop () at vl.c:1828 + #29 0x000055e743016a72 in main (argc=, argv=, envp=) + at vl.c:4504 + +Fix this by not acquiring the AioContext there, and ensuring all paths +leading to it have it already acquired (backup_clean()). + +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1782111 +Signed-off-by: Sergio Lopez +Signed-off-by: Kevin Wolf +(cherry picked from commit 0abf2581717a19d9749d5c2ff8acd0ac203452c2) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + block/backup-top.c | 5 ----- + block/backup.c | 3 +++ + 2 files changed, 3 insertions(+), 5 deletions(-) + +diff --git a/block/backup-top.c b/block/backup-top.c +index 818d3f2..b8d863f 100644 +--- a/block/backup-top.c ++++ b/block/backup-top.c +@@ -255,9 +255,6 @@ append_failed: + void bdrv_backup_top_drop(BlockDriverState *bs) + { + BDRVBackupTopState *s = bs->opaque; +- AioContext *aio_context = bdrv_get_aio_context(bs); +- +- aio_context_acquire(aio_context); + + bdrv_drained_begin(bs); + +@@ -271,6 +268,4 @@ void bdrv_backup_top_drop(BlockDriverState *bs) + bdrv_drained_end(bs); + + bdrv_unref(bs); +- +- aio_context_release(aio_context); + } +diff --git a/block/backup.c b/block/backup.c +index cf62b1a..1383e21 100644 +--- a/block/backup.c ++++ b/block/backup.c +@@ -135,8 +135,11 @@ static void backup_abort(Job *job) + static void backup_clean(Job *job) + { + BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); ++ AioContext *aio_context = bdrv_get_aio_context(s->backup_top); + ++ aio_context_acquire(aio_context); + bdrv_backup_top_drop(s->backup_top); ++ aio_context_release(aio_context); + } + + void backup_do_checkpoint(BlockJob *job, Error **errp) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch b/SOURCES/kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch new file mode 100644 index 0000000..745be9f --- /dev/null +++ b/SOURCES/kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch @@ -0,0 +1,114 @@ +From 1e0582ad34e77a060e2067a35992979c9eae82c9 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:31 +0000 +Subject: [PATCH 11/20] block: bdrv_reopen() with backing file in different + AioContext + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-6-kwolf@redhat.com> +Patchwork-id: 94282 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 05/13] block: bdrv_reopen() with backing file in different AioContext +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +This patch allows bdrv_reopen() (and therefore the x-blockdev-reopen QMP +command) to attach a node as the new backing file even if the node is in +a different AioContext than the parent if one of both nodes can be moved +to the AioContext of the other node. + +Signed-off-by: Kevin Wolf +Tested-by: Peter Krempa +Message-Id: <20200306141413.30705-3-kwolf@redhat.com> +Reviewed-by: Alberto Garcia +Signed-off-by: Kevin Wolf +(cherry picked from commit 1de6b45fb5c1489b450df7d1a4c692bba9678ce6) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 32 ++++++++++++++++++++++++++------ + tests/qemu-iotests/245 | 8 +++----- + 2 files changed, 29 insertions(+), 11 deletions(-) + +diff --git a/block.c b/block.c +index a744bb5..39e4647 100644 +--- a/block.c ++++ b/block.c +@@ -3749,6 +3749,29 @@ static void bdrv_reopen_perm(BlockReopenQueue *q, BlockDriverState *bs, + *shared = cumulative_shared_perms; + } + ++static bool bdrv_reopen_can_attach(BlockDriverState *parent, ++ BdrvChild *child, ++ BlockDriverState *new_child, ++ Error **errp) ++{ ++ AioContext *parent_ctx = bdrv_get_aio_context(parent); ++ AioContext *child_ctx = bdrv_get_aio_context(new_child); ++ GSList *ignore; ++ bool ret; ++ ++ ignore = g_slist_prepend(NULL, child); ++ ret = bdrv_can_set_aio_context(new_child, parent_ctx, &ignore, NULL); ++ g_slist_free(ignore); ++ if (ret) { ++ return ret; ++ } ++ ++ ignore = g_slist_prepend(NULL, child); ++ ret = bdrv_can_set_aio_context(parent, child_ctx, &ignore, errp); ++ g_slist_free(ignore); ++ return ret; ++} ++ + /* + * Take a BDRVReopenState and check if the value of 'backing' in the + * reopen_state->options QDict is valid or not. +@@ -3800,14 +3823,11 @@ static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state, + } + + /* +- * TODO: before removing the x- prefix from x-blockdev-reopen we +- * should move the new backing file into the right AioContext +- * instead of returning an error. ++ * Check AioContext compatibility so that the bdrv_set_backing_hd() call in ++ * bdrv_reopen_commit() won't fail. + */ + if (new_backing_bs) { +- if (bdrv_get_aio_context(new_backing_bs) != bdrv_get_aio_context(bs)) { +- error_setg(errp, "Cannot use a new backing file " +- "with a different AioContext"); ++ if (!bdrv_reopen_can_attach(bs, bs->backing, new_backing_bs, errp)) { + return -EINVAL; + } + } +diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 +index f69c2fa..919131d 100644 +--- a/tests/qemu-iotests/245 ++++ b/tests/qemu-iotests/245 +@@ -1013,18 +1013,16 @@ class TestBlockdevReopen(iotests.QMPTestCase): + # neither of them can switch to the other AioContext + def test_iothreads_error(self): + self.run_test_iothreads('iothread0', 'iothread1', +- "Cannot use a new backing file with a different AioContext") ++ "Cannot change iothread of active block backend") + + def test_iothreads_compatible_users(self): + self.run_test_iothreads('iothread0', 'iothread0') + + def test_iothreads_switch_backing(self): +- self.run_test_iothreads('iothread0', None, +- "Cannot use a new backing file with a different AioContext") ++ self.run_test_iothreads('iothread0', None) + + def test_iothreads_switch_overlay(self): +- self.run_test_iothreads(None, 'iothread0', +- "Cannot use a new backing file with a different AioContext") ++ self.run_test_iothreads(None, 'iothread0') + + if __name__ == '__main__': + iotests.main(supported_fmts=["qcow2"], +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-nbd-Fix-hang-in-.bdrv_close.patch b/SOURCES/kvm-block-nbd-Fix-hang-in-.bdrv_close.patch new file mode 100644 index 0000000..378ae1a --- /dev/null +++ b/SOURCES/kvm-block-nbd-Fix-hang-in-.bdrv_close.patch @@ -0,0 +1,78 @@ +From 4ef2c464a54b0b618d933641ac0a7012e629fed9 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 11 Mar 2020 10:51:42 +0000 +Subject: [PATCH 01/20] block/nbd: Fix hang in .bdrv_close() + +RH-Author: Maxim Levitsky +Message-id: <20200311105147.13208-2-mlevitsk@redhat.com> +Patchwork-id: 94224 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 1/6] block/nbd: Fix hang in .bdrv_close() +Bugzilla: 1640894 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz + +From: Max Reitz + +When nbd_close() is called from a coroutine, the connection_co never +gets to run, and thus nbd_teardown_connection() hangs. + +This is because aio_co_enter() only puts the connection_co into the main +coroutine's wake-up queue, so this main coroutine needs to yield and +wait for connection_co to terminate. + +Suggested-by: Kevin Wolf +Signed-off-by: Max Reitz +Message-Id: <20200122164532.178040-2-mreitz@redhat.com> +Reviewed-by: Eric Blake +Reviewed-by: Maxim Levitsky +Signed-off-by: Max Reitz +(cherry picked from commit 78c81a3f108870d325b0a39d88711366afe6f703) +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block/nbd.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +diff --git a/block/nbd.c b/block/nbd.c +index 5f18f78..a73f0d9 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -70,6 +70,7 @@ typedef struct BDRVNBDState { + CoMutex send_mutex; + CoQueue free_sema; + Coroutine *connection_co; ++ Coroutine *teardown_co; + QemuCoSleepState *connection_co_sleep_ns_state; + bool drained; + bool wait_drained_end; +@@ -203,7 +204,15 @@ static void nbd_teardown_connection(BlockDriverState *bs) + qemu_co_sleep_wake(s->connection_co_sleep_ns_state); + } + } +- BDRV_POLL_WHILE(bs, s->connection_co); ++ if (qemu_in_coroutine()) { ++ s->teardown_co = qemu_coroutine_self(); ++ /* connection_co resumes us when it terminates */ ++ qemu_coroutine_yield(); ++ s->teardown_co = NULL; ++ } else { ++ BDRV_POLL_WHILE(bs, s->connection_co); ++ } ++ assert(!s->connection_co); + } + + static bool nbd_client_connecting(BDRVNBDState *s) +@@ -395,6 +404,9 @@ static coroutine_fn void nbd_connection_entry(void *opaque) + s->ioc = NULL; + } + ++ if (s->teardown_co) { ++ aio_co_wake(s->teardown_co); ++ } + aio_wait_kick(); + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch b/SOURCES/kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch new file mode 100644 index 0000000..43f9ffc --- /dev/null +++ b/SOURCES/kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch @@ -0,0 +1,328 @@ +From 25c528b30f8774f33e957d14060805398da524d9 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Thu, 26 Mar 2020 20:23:06 +0000 +Subject: [PATCH 1/4] block: pass BlockDriver reference to the .bdrv_co_create + +RH-Author: Maxim Levitsky +Message-id: <20200326202307.9264-2-mlevitsk@redhat.com> +Patchwork-id: 94447 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] block: pass BlockDriver reference to the .bdrv_co_create +Bugzilla: 1816007 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Kevin Wolf +RH-Acked-by: Max Reitz + +This will allow the reuse of a single generic .bdrv_co_create +implementation for several drivers. +No functional changes. + +Signed-off-by: Maxim Levitsky +Message-Id: <20200326011218.29230-2-mlevitsk@redhat.com> +Reviewed-by: Denis V. Lunev +Signed-off-by: Max Reitz +(cherry picked from commit b92902dfeaafbceaf744ab7473f2d070284f6172) +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 3 ++- + block/crypto.c | 3 ++- + block/file-posix.c | 4 +++- + block/file-win32.c | 4 +++- + block/gluster.c | 3 ++- + block/nfs.c | 4 +++- + block/parallels.c | 3 ++- + block/qcow.c | 3 ++- + block/qcow2.c | 4 +++- + block/qed.c | 3 ++- + block/raw-format.c | 4 +++- + block/rbd.c | 3 ++- + block/sheepdog.c | 4 +++- + block/ssh.c | 4 +++- + block/vdi.c | 4 +++- + block/vhdx.c | 3 ++- + block/vmdk.c | 4 +++- + block/vpc.c | 6 ++++-- + include/block/block_int.h | 3 ++- + 19 files changed, 49 insertions(+), 20 deletions(-) + +diff --git a/block.c b/block.c +index ec29b1e..f9a1c5b 100644 +--- a/block.c ++++ b/block.c +@@ -482,7 +482,8 @@ static void coroutine_fn bdrv_create_co_entry(void *opaque) + CreateCo *cco = opaque; + assert(cco->drv); + +- ret = cco->drv->bdrv_co_create_opts(cco->filename, cco->opts, &local_err); ++ ret = cco->drv->bdrv_co_create_opts(cco->drv, ++ cco->filename, cco->opts, &local_err); + error_propagate(&cco->err, local_err); + cco->ret = ret; + } +diff --git a/block/crypto.c b/block/crypto.c +index 2482383..970d463 100644 +--- a/block/crypto.c ++++ b/block/crypto.c +@@ -539,7 +539,8 @@ fail: + return ret; + } + +-static int coroutine_fn block_crypto_co_create_opts_luks(const char *filename, ++static int coroutine_fn block_crypto_co_create_opts_luks(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, + Error **errp) + { +diff --git a/block/file-posix.c b/block/file-posix.c +index fd29372..a2e0a74 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -2346,7 +2346,9 @@ out: + return result; + } + +-static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn raw_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + BlockdevCreateOptions options; +diff --git a/block/file-win32.c b/block/file-win32.c +index 77e8ff7..1585983 100644 +--- a/block/file-win32.c ++++ b/block/file-win32.c +@@ -588,7 +588,9 @@ static int raw_co_create(BlockdevCreateOptions *options, Error **errp) + return 0; + } + +-static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn raw_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + BlockdevCreateOptions options; +diff --git a/block/gluster.c b/block/gluster.c +index 4fa4a77..0aa1f2c 100644 +--- a/block/gluster.c ++++ b/block/gluster.c +@@ -1130,7 +1130,8 @@ out: + return ret; + } + +-static int coroutine_fn qemu_gluster_co_create_opts(const char *filename, ++static int coroutine_fn qemu_gluster_co_create_opts(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, + Error **errp) + { +diff --git a/block/nfs.c b/block/nfs.c +index 9a6311e..cc2413d 100644 +--- a/block/nfs.c ++++ b/block/nfs.c +@@ -662,7 +662,9 @@ out: + return ret; + } + +-static int coroutine_fn nfs_file_co_create_opts(const char *url, QemuOpts *opts, ++static int coroutine_fn nfs_file_co_create_opts(BlockDriver *drv, ++ const char *url, ++ QemuOpts *opts, + Error **errp) + { + BlockdevCreateOptions *create_options; +diff --git a/block/parallels.c b/block/parallels.c +index 7a01997..6d4ed77 100644 +--- a/block/parallels.c ++++ b/block/parallels.c +@@ -609,7 +609,8 @@ exit: + goto out; + } + +-static int coroutine_fn parallels_co_create_opts(const char *filename, ++static int coroutine_fn parallels_co_create_opts(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, + Error **errp) + { +diff --git a/block/qcow.c b/block/qcow.c +index fce8989..8973e4e 100644 +--- a/block/qcow.c ++++ b/block/qcow.c +@@ -934,7 +934,8 @@ exit: + return ret; + } + +-static int coroutine_fn qcow_co_create_opts(const char *filename, ++static int coroutine_fn qcow_co_create_opts(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, Error **errp) + { + BlockdevCreateOptions *create_options = NULL; +diff --git a/block/qcow2.c b/block/qcow2.c +index 83b1fc0..71067c6 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -3558,7 +3558,9 @@ out: + return ret; + } + +-static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + BlockdevCreateOptions *create_options = NULL; +diff --git a/block/qed.c b/block/qed.c +index d8c4e5f..1af9b3c 100644 +--- a/block/qed.c ++++ b/block/qed.c +@@ -720,7 +720,8 @@ out: + return ret; + } + +-static int coroutine_fn bdrv_qed_co_create_opts(const char *filename, ++static int coroutine_fn bdrv_qed_co_create_opts(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, + Error **errp) + { +diff --git a/block/raw-format.c b/block/raw-format.c +index 3a76ec7..93b25e1 100644 +--- a/block/raw-format.c ++++ b/block/raw-format.c +@@ -419,7 +419,9 @@ static int raw_has_zero_init_truncate(BlockDriverState *bs) + return bdrv_has_zero_init_truncate(bs->file->bs); + } + +-static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn raw_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + return bdrv_create_file(filename, opts, errp); +diff --git a/block/rbd.c b/block/rbd.c +index 027cbcc..8847259 100644 +--- a/block/rbd.c ++++ b/block/rbd.c +@@ -425,7 +425,8 @@ static int qemu_rbd_co_create(BlockdevCreateOptions *options, Error **errp) + return qemu_rbd_do_create(options, NULL, NULL, errp); + } + +-static int coroutine_fn qemu_rbd_co_create_opts(const char *filename, ++static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, + Error **errp) + { +diff --git a/block/sheepdog.c b/block/sheepdog.c +index cfa8433..a8a7e32 100644 +--- a/block/sheepdog.c ++++ b/block/sheepdog.c +@@ -2157,7 +2157,9 @@ out: + return ret; + } + +-static int coroutine_fn sd_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn sd_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + BlockdevCreateOptions *create_options = NULL; +diff --git a/block/ssh.c b/block/ssh.c +index b4375cf..84e9282 100644 +--- a/block/ssh.c ++++ b/block/ssh.c +@@ -963,7 +963,9 @@ fail: + return ret; + } + +-static int coroutine_fn ssh_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn ssh_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + BlockdevCreateOptions *create_options; +diff --git a/block/vdi.c b/block/vdi.c +index 0142da7..e1a11f2 100644 +--- a/block/vdi.c ++++ b/block/vdi.c +@@ -896,7 +896,9 @@ static int coroutine_fn vdi_co_create(BlockdevCreateOptions *create_options, + return vdi_co_do_create(create_options, DEFAULT_CLUSTER_SIZE, errp); + } + +-static int coroutine_fn vdi_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn vdi_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + QDict *qdict = NULL; +diff --git a/block/vhdx.c b/block/vhdx.c +index f02d261..33e57cd 100644 +--- a/block/vhdx.c ++++ b/block/vhdx.c +@@ -2046,7 +2046,8 @@ delete_and_exit: + return ret; + } + +-static int coroutine_fn vhdx_co_create_opts(const char *filename, ++static int coroutine_fn vhdx_co_create_opts(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, + Error **errp) + { +diff --git a/block/vmdk.c b/block/vmdk.c +index 20e909d..eb726f2 100644 +--- a/block/vmdk.c ++++ b/block/vmdk.c +@@ -2588,7 +2588,9 @@ exit: + return blk; + } + +-static int coroutine_fn vmdk_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + Error *local_err = NULL; +diff --git a/block/vpc.c b/block/vpc.c +index a655502..6df75e2 100644 +--- a/block/vpc.c ++++ b/block/vpc.c +@@ -1089,8 +1089,10 @@ out: + return ret; + } + +-static int coroutine_fn vpc_co_create_opts(const char *filename, +- QemuOpts *opts, Error **errp) ++static int coroutine_fn vpc_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, ++ Error **errp) + { + BlockdevCreateOptions *create_options = NULL; + QDict *qdict; +diff --git a/include/block/block_int.h b/include/block/block_int.h +index 96e327b..7ff81be 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -136,7 +136,8 @@ struct BlockDriver { + void (*bdrv_close)(BlockDriverState *bs); + int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts, + Error **errp); +- int coroutine_fn (*bdrv_co_create_opts)(const char *filename, ++ int coroutine_fn (*bdrv_co_create_opts)(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, + Error **errp); + int (*bdrv_make_empty)(BlockDriverState *bs); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch b/SOURCES/kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch new file mode 100644 index 0000000..2c27fd2 --- /dev/null +++ b/SOURCES/kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch @@ -0,0 +1,78 @@ +From ec5408763c49cd0b63ee324bdc38a429ed1adeee Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:29 +0000 +Subject: [PATCH 09/20] block/qcow2: Move bitmap reopen into + bdrv_reopen_commit_post + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-4-kwolf@redhat.com> +Patchwork-id: 94280 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 03/13] block/qcow2: Move bitmap reopen into bdrv_reopen_commit_post +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +From: Peter Krempa + +The bitmap code requires writing the 'file' child when the qcow2 driver +is reopened in read-write mode. + +If the 'file' child is being reopened due to a permissions change, the +modification is commited yet when qcow2_reopen_commit is called. This +means that any attempt to write the 'file' child will end with EBADFD +as the original fd was already closed. + +Moving bitmap reopening to the new callback which is called after +permission modifications are commited fixes this as the file descriptor +will be replaced with the correct one. + +The above problem manifests itself when reopening 'qcow2' format layer +which uses a 'file-posix' file child which was opened with the +'auto-read-only' property set. + +Signed-off-by: Peter Krempa +Message-Id: +Signed-off-by: Kevin Wolf +(cherry picked from commit 65eb7c85a3e62529e2bad782e94d5a7b11dd5a92) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/qcow2.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/block/qcow2.c b/block/qcow2.c +index 7c18721..83b1fc0 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -1881,6 +1881,11 @@ fail: + static void qcow2_reopen_commit(BDRVReopenState *state) + { + qcow2_update_options_commit(state->bs, state->opaque); ++ g_free(state->opaque); ++} ++ ++static void qcow2_reopen_commit_post(BDRVReopenState *state) ++{ + if (state->flags & BDRV_O_RDWR) { + Error *local_err = NULL; + +@@ -1895,7 +1900,6 @@ static void qcow2_reopen_commit(BDRVReopenState *state) + bdrv_get_node_name(state->bs)); + } + } +- g_free(state->opaque); + } + + static void qcow2_reopen_abort(BDRVReopenState *state) +@@ -5492,6 +5496,7 @@ BlockDriver bdrv_qcow2 = { + .bdrv_close = qcow2_close, + .bdrv_reopen_prepare = qcow2_reopen_prepare, + .bdrv_reopen_commit = qcow2_reopen_commit, ++ .bdrv_reopen_commit_post = qcow2_reopen_commit_post, + .bdrv_reopen_abort = qcow2_reopen_abort, + .bdrv_join_options = qcow2_join_options, + .bdrv_child_perm = bdrv_format_default_perms, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-block-trickle-down-the-fallback-image-creation-funct.patch b/SOURCES/kvm-block-trickle-down-the-fallback-image-creation-funct.patch new file mode 100644 index 0000000..5ba1521 --- /dev/null +++ b/SOURCES/kvm-block-trickle-down-the-fallback-image-creation-funct.patch @@ -0,0 +1,296 @@ +From a1f7b929ae1fe6fa424c520c3a5eb497333b0fd9 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Thu, 26 Mar 2020 20:23:07 +0000 +Subject: [PATCH 2/4] block: trickle down the fallback image creation function + use to the block drivers + +RH-Author: Maxim Levitsky +Message-id: <20200326202307.9264-3-mlevitsk@redhat.com> +Patchwork-id: 94446 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] block: trickle down the fallback image creation function use to the block drivers +Bugzilla: 1816007 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Kevin Wolf +RH-Acked-by: Max Reitz + +Instead of checking the .bdrv_co_create_opts to see if we need the +fallback, just implement the .bdrv_co_create_opts in the drivers that +need it. + +This way we don't break various places that need to know if the +underlying protocol/format really supports image creation, and this way +we still allow some drivers to not support image creation. + +Fixes: fd17146cd93d1704cd96d7c2757b325fc7aac6fd +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1816007 + +Note that technically this driver reverts the image creation fallback +for the vxhs driver since I don't have a means to test it, and IMHO it +is better to leave it not supported as it was prior to generic image +creation patches. + +Also drop iscsi_create_opts which was left accidentally. + +Signed-off-by: Maxim Levitsky +Message-Id: <20200326011218.29230-3-mlevitsk@redhat.com> +Reviewed-by: Denis V. Lunev +[mreitz: Fixed alignment, and moved bdrv_co_create_opts_simple() and + bdrv_create_opts_simple from block.h into block_int.h] +Signed-off-by: Max Reitz +(cherry picked from commit 5a5e7f8cd86b7ced0732b1b6e28c82baa65b09c9) + +Contextual conflicts in block.c and include/block/block_int.h + +(conflict in block.c by default shows as functional but +with --diff-algorithm=patience it becomes a contextual conflict) + +... +001/2:[----] [--] 'block: pass BlockDriver reference to the .bdrv_co_create' +002/2:[0014] [FC] 'block: trickle down the fallback image creation function use to the block drivers' +... +002/2: 'meld <(git show 5a5e7f8^\!) <(git show 6d3bca5^\!)' + +So now running: +meld <(git show 5a5e7f8^\! --diff-algorithm=patience) <(git show 6d3bca5^\! --diff-algorithm=patience) + +shows no contextual conflicts +It is mostly due to missing commit f6dc1c31d3801dcbdf0c56574f9ff4f05180810c +Thanks to Max Reitz for helping me with this. + +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 35 ++++++++++++++++++++--------------- + block/file-posix.c | 7 ++++++- + block/iscsi.c | 16 ++++------------ + block/nbd.c | 6 ++++++ + block/nvme.c | 3 +++ + include/block/block.h | 1 + + include/block/block_int.h | 11 +++++++++++ + 7 files changed, 51 insertions(+), 28 deletions(-) + +diff --git a/block.c b/block.c +index f9a1c5b..ba3b40d7 100644 +--- a/block.c ++++ b/block.c +@@ -597,8 +597,15 @@ static int create_file_fallback_zero_first_sector(BlockBackend *blk, + return 0; + } + +-static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, +- QemuOpts *opts, Error **errp) ++/** ++ * Simple implementation of bdrv_co_create_opts for protocol drivers ++ * which only support creation via opening a file ++ * (usually existing raw storage device) ++ */ ++int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, ++ Error **errp) + { + BlockBackend *blk; + QDict *options; +@@ -662,11 +669,7 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) + return -ENOENT; + } + +- if (drv->bdrv_co_create_opts) { +- return bdrv_create(drv, filename, opts, errp); +- } else { +- return bdrv_create_file_fallback(filename, drv, opts, errp); +- } ++ return bdrv_create(drv, filename, opts, errp); + } + + /** +@@ -1543,9 +1546,9 @@ QemuOptsList bdrv_runtime_opts = { + }, + }; + +-static QemuOptsList fallback_create_opts = { +- .name = "fallback-create-opts", +- .head = QTAILQ_HEAD_INITIALIZER(fallback_create_opts.head), ++QemuOptsList bdrv_create_opts_simple = { ++ .name = "simple-create-opts", ++ .head = QTAILQ_HEAD_INITIALIZER(bdrv_create_opts_simple.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, +@@ -5910,13 +5913,15 @@ void bdrv_img_create(const char *filename, const char *fmt, + return; + } + +- create_opts = qemu_opts_append(create_opts, drv->create_opts); +- if (proto_drv->create_opts) { +- create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); +- } else { +- create_opts = qemu_opts_append(create_opts, &fallback_create_opts); ++ if (!proto_drv->create_opts) { ++ error_setg(errp, "Protocol driver '%s' does not support image creation", ++ proto_drv->format_name); ++ return; + } + ++ create_opts = qemu_opts_append(create_opts, drv->create_opts); ++ create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); ++ + /* Create parameter list with default values */ + opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); + qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); +diff --git a/block/file-posix.c b/block/file-posix.c +index a2e0a74..dd18d40 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -3432,6 +3432,8 @@ static BlockDriver bdrv_host_device = { + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .mutable_opts = mutable_opts, + .bdrv_co_invalidate_cache = raw_co_invalidate_cache, + .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes, +@@ -3558,10 +3560,11 @@ static BlockDriver bdrv_host_cdrom = { + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .mutable_opts = mutable_opts, + .bdrv_co_invalidate_cache = raw_co_invalidate_cache, + +- + .bdrv_co_preadv = raw_co_preadv, + .bdrv_co_pwritev = raw_co_pwritev, + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, +@@ -3690,6 +3693,8 @@ static BlockDriver bdrv_host_cdrom = { + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .mutable_opts = mutable_opts, + + .bdrv_co_preadv = raw_co_preadv, +diff --git a/block/iscsi.c b/block/iscsi.c +index b45da65..16b0716 100644 +--- a/block/iscsi.c ++++ b/block/iscsi.c +@@ -2399,18 +2399,6 @@ out_unlock: + return r; + } + +-static QemuOptsList iscsi_create_opts = { +- .name = "iscsi-create-opts", +- .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head), +- .desc = { +- { +- .name = BLOCK_OPT_SIZE, +- .type = QEMU_OPT_SIZE, +- .help = "Virtual disk size" +- }, +- { /* end of list */ } +- } +-}; + + static const char *const iscsi_strong_runtime_opts[] = { + "transport", +@@ -2434,6 +2422,8 @@ static BlockDriver bdrv_iscsi = { + .bdrv_parse_filename = iscsi_parse_filename, + .bdrv_file_open = iscsi_open, + .bdrv_close = iscsi_close, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .bdrv_reopen_prepare = iscsi_reopen_prepare, + .bdrv_reopen_commit = iscsi_reopen_commit, + .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, +@@ -2471,6 +2461,8 @@ static BlockDriver bdrv_iser = { + .bdrv_parse_filename = iscsi_parse_filename, + .bdrv_file_open = iscsi_open, + .bdrv_close = iscsi_close, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .bdrv_reopen_prepare = iscsi_reopen_prepare, + .bdrv_reopen_commit = iscsi_reopen_commit, + .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, +diff --git a/block/nbd.c b/block/nbd.c +index a73f0d9..927915d 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -2030,6 +2030,8 @@ static BlockDriver bdrv_nbd = { + .protocol_name = "nbd", + .instance_size = sizeof(BDRVNBDState), + .bdrv_parse_filename = nbd_parse_filename, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .bdrv_file_open = nbd_open, + .bdrv_reopen_prepare = nbd_client_reopen_prepare, + .bdrv_co_preadv = nbd_client_co_preadv, +@@ -2055,6 +2057,8 @@ static BlockDriver bdrv_nbd_tcp = { + .protocol_name = "nbd+tcp", + .instance_size = sizeof(BDRVNBDState), + .bdrv_parse_filename = nbd_parse_filename, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .bdrv_file_open = nbd_open, + .bdrv_reopen_prepare = nbd_client_reopen_prepare, + .bdrv_co_preadv = nbd_client_co_preadv, +@@ -2080,6 +2084,8 @@ static BlockDriver bdrv_nbd_unix = { + .protocol_name = "nbd+unix", + .instance_size = sizeof(BDRVNBDState), + .bdrv_parse_filename = nbd_parse_filename, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .bdrv_file_open = nbd_open, + .bdrv_reopen_prepare = nbd_client_reopen_prepare, + .bdrv_co_preadv = nbd_client_co_preadv, +diff --git a/block/nvme.c b/block/nvme.c +index d41c4bd..7b7c0cc 100644 +--- a/block/nvme.c ++++ b/block/nvme.c +@@ -1333,6 +1333,9 @@ static BlockDriver bdrv_nvme = { + .protocol_name = "nvme", + .instance_size = sizeof(BDRVNVMeState), + ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, ++ + .bdrv_parse_filename = nvme_parse_filename, + .bdrv_file_open = nvme_file_open, + .bdrv_close = nvme_close, +diff --git a/include/block/block.h b/include/block/block.h +index 1df9848..92685d2 100644 +--- a/include/block/block.h ++++ b/include/block/block.h +@@ -293,6 +293,7 @@ BlockDriver *bdrv_find_format(const char *format_name); + int bdrv_create(BlockDriver *drv, const char* filename, + QemuOpts *opts, Error **errp); + int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp); ++ + BlockDriverState *bdrv_new(void); + void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + Error **errp); +diff --git a/include/block/block_int.h b/include/block/block_int.h +index 7ff81be..529f153 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -1325,4 +1325,15 @@ int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset, + + int refresh_total_sectors(BlockDriverState *bs, int64_t hint); + ++/** ++ * Simple implementation of bdrv_co_create_opts for protocol drivers ++ * which only support creation via opening a file ++ * (usually existing raw storage device) ++ */ ++int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, ++ Error **errp); ++extern QemuOptsList bdrv_create_opts_simple; ++ + #endif /* BLOCK_INT_H */ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch b/SOURCES/kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch new file mode 100644 index 0000000..9a69130 --- /dev/null +++ b/SOURCES/kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch @@ -0,0 +1,176 @@ +From dc2654f2319ad6c379e0ba10be143726c6f0e9e0 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:47 +0000 +Subject: [PATCH 14/18] blockdev: Acquire AioContext on dirty bitmap functions + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-8-slp@redhat.com> +Patchwork-id: 93760 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 7/9] blockdev: Acquire AioContext on dirty bitmap functions +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +Dirty map addition and removal functions are not acquiring to BDS +AioContext, while they may call to code that expects it to be +acquired. + +This may trigger a crash with a stack trace like this one: + + #0 0x00007f0ef146370f in __GI_raise (sig=sig@entry=6) + at ../sysdeps/unix/sysv/linux/raise.c:50 + #1 0x00007f0ef144db25 in __GI_abort () at abort.c:79 + #2 0x0000565022294dce in error_exit + (err=, msg=msg@entry=0x56502243a730 <__func__.16350> "qemu_mutex_unlock_impl") at util/qemu-thread-posix.c:36 + #3 0x00005650222950ba in qemu_mutex_unlock_impl + (mutex=mutex@entry=0x5650244b0240, file=file@entry=0x565022439adf "util/async.c", line=line@entry=526) at util/qemu-thread-posix.c:108 + #4 0x0000565022290029 in aio_context_release + (ctx=ctx@entry=0x5650244b01e0) at util/async.c:526 + #5 0x000056502221cd08 in bdrv_can_store_new_dirty_bitmap + (bs=bs@entry=0x5650244dc820, name=name@entry=0x56502481d360 "bitmap1", granularity=granularity@entry=65536, errp=errp@entry=0x7fff22831718) + at block/dirty-bitmap.c:542 + #6 0x000056502206ae53 in qmp_block_dirty_bitmap_add + (errp=0x7fff22831718, disabled=false, has_disabled=, persistent=, has_persistent=true, granularity=65536, has_granularity=, name=0x56502481d360 "bitmap1", node=) at blockdev.c:2894 + #7 0x000056502206ae53 in qmp_block_dirty_bitmap_add + (node=, name=0x56502481d360 "bitmap1", has_granularity=, granularity=, has_persistent=true, persistent=, has_disabled=false, disabled=false, errp=0x7fff22831718) at blockdev.c:2856 + #8 0x00005650221847a3 in qmp_marshal_block_dirty_bitmap_add + (args=, ret=, errp=0x7fff22831798) + at qapi/qapi-commands-block-core.c:651 + #9 0x0000565022247e6c in do_qmp_dispatch + (errp=0x7fff22831790, allow_oob=, request=, cmds=0x565022b32d60 ) at qapi/qmp-dispatch.c:132 + #10 0x0000565022247e6c in qmp_dispatch + (cmds=0x565022b32d60 , request=, allow_oob=) at qapi/qmp-dispatch.c:175 + #11 0x0000565022166061 in monitor_qmp_dispatch + (mon=0x56502450faa0, req=) at monitor/qmp.c:145 + #12 0x00005650221666fa in monitor_qmp_bh_dispatcher + (data=) at monitor/qmp.c:234 + #13 0x000056502228f866 in aio_bh_call (bh=0x56502440eae0) + at util/async.c:117 + #14 0x000056502228f866 in aio_bh_poll (ctx=ctx@entry=0x56502440d7a0) + at util/async.c:117 + #15 0x0000565022292c54 in aio_dispatch (ctx=0x56502440d7a0) + at util/aio-posix.c:459 + #16 0x000056502228f742 in aio_ctx_dispatch + (source=, callback=, user_data=) at util/async.c:260 + #17 0x00007f0ef5ce667d in g_main_dispatch (context=0x56502449aa40) + at gmain.c:3176 + #18 0x00007f0ef5ce667d in g_main_context_dispatch + (context=context@entry=0x56502449aa40) at gmain.c:3829 + #19 0x0000565022291d08 in glib_pollfds_poll () at util/main-loop.c:219 + #20 0x0000565022291d08 in os_host_main_loop_wait + (timeout=) at util/main-loop.c:242 + #21 0x0000565022291d08 in main_loop_wait (nonblocking=) + at util/main-loop.c:518 + #22 0x00005650220743c1 in main_loop () at vl.c:1828 + #23 0x0000565021f20a72 in main + (argc=, argv=, envp=) + at vl.c:4504 + +Fix this by acquiring the AioContext at qmp_block_dirty_bitmap_add() +and qmp_block_dirty_bitmap_add(). + +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1782175 +Signed-off-by: Sergio Lopez +Signed-off-by: Kevin Wolf +(cherry picked from commit 91005a495e228ebd7e5e173cd18f952450eef82d) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 22 ++++++++++++++++++---- + 1 file changed, 18 insertions(+), 4 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 1dacbc2..d4ef6cd 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -2984,6 +2984,7 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + { + BlockDriverState *bs; + BdrvDirtyBitmap *bitmap; ++ AioContext *aio_context; + + if (!name || name[0] == '\0') { + error_setg(errp, "Bitmap name cannot be empty"); +@@ -2995,11 +2996,14 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + return; + } + ++ aio_context = bdrv_get_aio_context(bs); ++ aio_context_acquire(aio_context); ++ + if (has_granularity) { + if (granularity < 512 || !is_power_of_2(granularity)) { + error_setg(errp, "Granularity must be power of 2 " + "and at least 512"); +- return; ++ goto out; + } + } else { + /* Default to cluster size, if available: */ +@@ -3017,12 +3021,12 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + if (persistent && + !bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp)) + { +- return; ++ goto out; + } + + bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp); + if (bitmap == NULL) { +- return; ++ goto out; + } + + if (disabled) { +@@ -3030,6 +3034,9 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + } + + bdrv_dirty_bitmap_set_persistence(bitmap, persistent); ++ ++out: ++ aio_context_release(aio_context); + } + + static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( +@@ -3038,21 +3045,27 @@ static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( + { + BlockDriverState *bs; + BdrvDirtyBitmap *bitmap; ++ AioContext *aio_context; + + bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); + if (!bitmap || !bs) { + return NULL; + } + ++ aio_context = bdrv_get_aio_context(bs); ++ aio_context_acquire(aio_context); ++ + if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_BUSY | BDRV_BITMAP_RO, + errp)) { ++ aio_context_release(aio_context); + return NULL; + } + + if (bdrv_dirty_bitmap_get_persistence(bitmap) && + bdrv_remove_persistent_dirty_bitmap(bs, name, errp) < 0) + { +- return NULL; ++ aio_context_release(aio_context); ++ return NULL; + } + + if (release) { +@@ -3063,6 +3076,7 @@ static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( + *bitmap_bs = bs; + } + ++ aio_context_release(aio_context); + return release ? NULL : bitmap; + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch b/SOURCES/kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch new file mode 100644 index 0000000..b2dd453 --- /dev/null +++ b/SOURCES/kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch @@ -0,0 +1,107 @@ +From 24e5eca4218b294bd013e2d85a38345045506bec Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:48 +0000 +Subject: [PATCH 15/18] blockdev: Return bs to the proper context on snapshot + abort + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-9-slp@redhat.com> +Patchwork-id: 93761 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 8/9] blockdev: Return bs to the proper context on snapshot abort +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +external_snapshot_abort() calls to bdrv_set_backing_hd(), which +returns state->old_bs to the main AioContext, as it's intended to be +used then the BDS is going to be released. As that's not the case when +aborting an external snapshot, return it to the AioContext it was +before the call. + +This issue can be triggered by issuing a transaction with two actions, +a proper blockdev-snapshot-sync and a bogus one, so the second will +trigger a transaction abort. This results in a crash with an stack +trace like this one: + + #0 0x00007fa1048b28df in __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50 + #1 0x00007fa10489ccf5 in __GI_abort () at abort.c:79 + #2 0x00007fa10489cbc9 in __assert_fail_base + (fmt=0x7fa104a03300 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x5572240b44d8 "bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)", file=0x557224014d30 "block.c", line=2240, function=) at assert.c:92 + #3 0x00007fa1048aae96 in __GI___assert_fail + (assertion=assertion@entry=0x5572240b44d8 "bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)", file=file@entry=0x557224014d30 "block.c", line=line@entry=2240, function=function@entry=0x5572240b5d60 <__PRETTY_FUNCTION__.31620> "bdrv_replace_child_noperm") at assert.c:101 + #4 0x0000557223e631f8 in bdrv_replace_child_noperm (child=0x557225b9c980, new_bs=new_bs@entry=0x557225c42e40) at block.c:2240 + #5 0x0000557223e68be7 in bdrv_replace_node (from=0x557226951a60, to=0x557225c42e40, errp=0x5572247d6138 ) at block.c:4196 + #6 0x0000557223d069c4 in external_snapshot_abort (common=0x557225d7e170) at blockdev.c:1731 + #7 0x0000557223d069c4 in external_snapshot_abort (common=0x557225d7e170) at blockdev.c:1717 + #8 0x0000557223d09013 in qmp_transaction (dev_list=, has_props=, props=0x557225cc7d70, errp=errp@entry=0x7ffe704c0c98) at blockdev.c:2360 + #9 0x0000557223e32085 in qmp_marshal_transaction (args=, ret=, errp=0x7ffe704c0d08) at qapi/qapi-commands-transaction.c:44 + #10 0x0000557223ee798c in do_qmp_dispatch (errp=0x7ffe704c0d00, allow_oob=, request=, cmds=0x5572247d3cc0 ) at qapi/qmp-dispatch.c:132 + #11 0x0000557223ee798c in qmp_dispatch (cmds=0x5572247d3cc0 , request=, allow_oob=) at qapi/qmp-dispatch.c:175 + #12 0x0000557223e06141 in monitor_qmp_dispatch (mon=0x557225c69ff0, req=) at monitor/qmp.c:120 + #13 0x0000557223e0678a in monitor_qmp_bh_dispatcher (data=) at monitor/qmp.c:209 + #14 0x0000557223f2f366 in aio_bh_call (bh=0x557225b9dc60) at util/async.c:117 + #15 0x0000557223f2f366 in aio_bh_poll (ctx=ctx@entry=0x557225b9c840) at util/async.c:117 + #16 0x0000557223f32754 in aio_dispatch (ctx=0x557225b9c840) at util/aio-posix.c:459 + #17 0x0000557223f2f242 in aio_ctx_dispatch (source=, callback=, user_data=) at util/async.c:260 + #18 0x00007fa10913467d in g_main_dispatch (context=0x557225c28e80) at gmain.c:3176 + #19 0x00007fa10913467d in g_main_context_dispatch (context=context@entry=0x557225c28e80) at gmain.c:3829 + #20 0x0000557223f31808 in glib_pollfds_poll () at util/main-loop.c:219 + #21 0x0000557223f31808 in os_host_main_loop_wait (timeout=) at util/main-loop.c:242 + #22 0x0000557223f31808 in main_loop_wait (nonblocking=) at util/main-loop.c:518 + #23 0x0000557223d13201 in main_loop () at vl.c:1828 + #24 0x0000557223bbfb82 in main (argc=, argv=, envp=) at vl.c:4504 + +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1779036 +Signed-off-by: Sergio Lopez +Signed-off-by: Kevin Wolf +(cherry picked from commit 377410f6fb4f6b0d26d4a028c20766fae05de17e) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/blockdev.c b/blockdev.c +index d4ef6cd..4cd9a58 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1731,6 +1731,8 @@ static void external_snapshot_abort(BlkActionState *common) + if (state->new_bs) { + if (state->overlay_appended) { + AioContext *aio_context; ++ AioContext *tmp_context; ++ int ret; + + aio_context = bdrv_get_aio_context(state->old_bs); + aio_context_acquire(aio_context); +@@ -1738,6 +1740,25 @@ static void external_snapshot_abort(BlkActionState *common) + bdrv_ref(state->old_bs); /* we can't let bdrv_set_backind_hd() + close state->old_bs; we need it */ + bdrv_set_backing_hd(state->new_bs, NULL, &error_abort); ++ ++ /* ++ * The call to bdrv_set_backing_hd() above returns state->old_bs to ++ * the main AioContext. As we're still going to be using it, return ++ * it to the AioContext it was before. ++ */ ++ tmp_context = bdrv_get_aio_context(state->old_bs); ++ if (aio_context != tmp_context) { ++ aio_context_release(aio_context); ++ aio_context_acquire(tmp_context); ++ ++ ret = bdrv_try_set_aio_context(state->old_bs, ++ aio_context, NULL); ++ assert(ret == 0); ++ ++ aio_context_release(tmp_context); ++ aio_context_acquire(aio_context); ++ } ++ + bdrv_replace_node(state->new_bs, state->old_bs, &error_abort); + bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */ + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch b/SOURCES/kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch new file mode 100644 index 0000000..399a06a --- /dev/null +++ b/SOURCES/kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch @@ -0,0 +1,62 @@ +From d56b53cd75c4146eae7a06d1cc30ab823a9bde93 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:41 +0000 +Subject: [PATCH 08/18] blockdev: fix coding style issues in + drive_backup_prepare +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-2-slp@redhat.com> +Patchwork-id: 93754 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 1/9] blockdev: fix coding style issues in drive_backup_prepare +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +Fix a couple of minor coding style issues in drive_backup_prepare. + +Signed-off-by: Sergio Lopez +Reviewed-by: Max Reitz +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 471ded690e19689018535e3f48480507ed073e22) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 8e029e9..553e315 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3620,7 +3620,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, + + if (!backup->has_format) { + backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? +- NULL : (char*) bs->drv->format_name; ++ NULL : (char *) bs->drv->format_name; + } + + /* Early check to avoid creating target */ +@@ -3630,8 +3630,10 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, + + flags = bs->open_flags | BDRV_O_RDWR; + +- /* See if we have a backing HD we can use to create our new image +- * on top of. */ ++ /* ++ * See if we have a backing HD we can use to create our new image ++ * on top of. ++ */ + if (backup->sync == MIRROR_SYNC_MODE_TOP) { + source = backing_bs(bs); + if (!source) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch b/SOURCES/kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch new file mode 100644 index 0000000..a94ee75 --- /dev/null +++ b/SOURCES/kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch @@ -0,0 +1,204 @@ +From da4ee4c0d56200042cb86f8ccd2777009bd82df3 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:44 +0000 +Subject: [PATCH 11/18] blockdev: honor bdrv_try_set_aio_context() context + requirements + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-5-slp@redhat.com> +Patchwork-id: 93758 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 4/9] blockdev: honor bdrv_try_set_aio_context() context requirements +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +bdrv_try_set_aio_context() requires that the old context is held, and +the new context is not held. Fix all the occurrences where it's not +done this way. + +Suggested-by: Max Reitz +Signed-off-by: Sergio Lopez +Signed-off-by: Kevin Wolf +(cherry picked from commit 3ea67e08832775a28d0bd2795f01bc77e7ea1512) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- + 1 file changed, 60 insertions(+), 8 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 152a0f7..1dacbc2 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1535,6 +1535,7 @@ static void external_snapshot_prepare(BlkActionState *common, + DO_UPCAST(ExternalSnapshotState, common, common); + TransactionAction *action = common->action; + AioContext *aio_context; ++ AioContext *old_context; + int ret; + + /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar +@@ -1675,7 +1676,16 @@ static void external_snapshot_prepare(BlkActionState *common, + goto out; + } + ++ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ ++ old_context = bdrv_get_aio_context(state->new_bs); ++ aio_context_release(aio_context); ++ aio_context_acquire(old_context); ++ + ret = bdrv_try_set_aio_context(state->new_bs, aio_context, errp); ++ ++ aio_context_release(old_context); ++ aio_context_acquire(aio_context); ++ + if (ret < 0) { + goto out; + } +@@ -1775,11 +1785,13 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) + BlockDriverState *target_bs; + BlockDriverState *source = NULL; + AioContext *aio_context; ++ AioContext *old_context; + QDict *options; + Error *local_err = NULL; + int flags; + int64_t size; + bool set_backing_hd = false; ++ int ret; + + assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); + backup = common->action->u.drive_backup.data; +@@ -1868,6 +1880,21 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) + goto out; + } + ++ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ ++ old_context = bdrv_get_aio_context(target_bs); ++ aio_context_release(aio_context); ++ aio_context_acquire(old_context); ++ ++ ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); ++ if (ret < 0) { ++ bdrv_unref(target_bs); ++ aio_context_release(old_context); ++ return; ++ } ++ ++ aio_context_release(old_context); ++ aio_context_acquire(aio_context); ++ + if (set_backing_hd) { + bdrv_set_backing_hd(target_bs, source, &local_err); + if (local_err) { +@@ -1947,6 +1974,8 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) + BlockDriverState *bs; + BlockDriverState *target_bs; + AioContext *aio_context; ++ AioContext *old_context; ++ int ret; + + assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); + backup = common->action->u.blockdev_backup.data; +@@ -1961,7 +1990,18 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) + return; + } + ++ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ + aio_context = bdrv_get_aio_context(bs); ++ old_context = bdrv_get_aio_context(target_bs); ++ aio_context_acquire(old_context); ++ ++ ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); ++ if (ret < 0) { ++ aio_context_release(old_context); ++ return; ++ } ++ ++ aio_context_release(old_context); + aio_context_acquire(aio_context); + state->bs = bs; + +@@ -3562,7 +3602,6 @@ static BlockJob *do_backup_common(BackupCommon *backup, + BlockJob *job = NULL; + BdrvDirtyBitmap *bmap = NULL; + int job_flags = JOB_DEFAULT; +- int ret; + + if (!backup->has_speed) { + backup->speed = 0; +@@ -3586,11 +3625,6 @@ static BlockJob *do_backup_common(BackupCommon *backup, + backup->compress = false; + } + +- ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); +- if (ret < 0) { +- return NULL; +- } +- + if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) || + (backup->sync == MIRROR_SYNC_MODE_INCREMENTAL)) { + /* done before desugaring 'incremental' to print the right message */ +@@ -3825,6 +3859,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + BlockDriverState *bs; + BlockDriverState *source, *target_bs; + AioContext *aio_context; ++ AioContext *old_context; + BlockMirrorBackingMode backing_mode; + Error *local_err = NULL; + QDict *options = NULL; +@@ -3937,12 +3972,22 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + (arg->mode == NEW_IMAGE_MODE_EXISTING || + !bdrv_has_zero_init(target_bs))); + ++ ++ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ ++ old_context = bdrv_get_aio_context(target_bs); ++ aio_context_release(aio_context); ++ aio_context_acquire(old_context); ++ + ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); + if (ret < 0) { + bdrv_unref(target_bs); +- goto out; ++ aio_context_release(old_context); ++ return; + } + ++ aio_context_release(old_context); ++ aio_context_acquire(aio_context); ++ + blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs, + arg->has_replaces, arg->replaces, arg->sync, + backing_mode, zero_target, +@@ -3984,6 +4029,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, + BlockDriverState *bs; + BlockDriverState *target_bs; + AioContext *aio_context; ++ AioContext *old_context; + BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN; + Error *local_err = NULL; + bool zero_target; +@@ -4001,10 +4047,16 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, + + zero_target = (sync == MIRROR_SYNC_MODE_FULL); + ++ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ ++ old_context = bdrv_get_aio_context(target_bs); + aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); ++ aio_context_acquire(old_context); + + ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); ++ ++ aio_context_release(old_context); ++ aio_context_acquire(aio_context); ++ + if (ret < 0) { + goto out; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch b/SOURCES/kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch new file mode 100644 index 0000000..c426384 --- /dev/null +++ b/SOURCES/kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch @@ -0,0 +1,144 @@ +From 959955217f745f1ee6cbea97314efe69f2d7dc08 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:43 +0000 +Subject: [PATCH 10/18] blockdev: unify qmp_blockdev_backup and blockdev-backup + transaction paths + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-4-slp@redhat.com> +Patchwork-id: 93756 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 3/9] blockdev: unify qmp_blockdev_backup and blockdev-backup transaction paths +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +Issuing a blockdev-backup from qmp_blockdev_backup takes a slightly +different path than when it's issued from a transaction. In the code, +this is manifested as some redundancy between do_blockdev_backup() and +blockdev_backup_prepare(). + +This change unifies both paths, merging do_blockdev_backup() and +blockdev_backup_prepare(), and changing qmp_blockdev_backup() to +create a transaction instead of calling do_backup_common() direcly. + +As a side-effect, now qmp_blockdev_backup() is executed inside a +drained section, as it happens when creating a blockdev-backup +transaction. This change is visible from the user's perspective, as +the job gets paused and immediately resumed before starting the actual +work. + +Signed-off-by: Sergio Lopez +Reviewed-by: Max Reitz +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 5b7bfe515ecbd584b40ff6e41d2fd8b37c7d5139) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 60 +++++++++++++----------------------------------------------- + 1 file changed, 13 insertions(+), 47 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 5e85fc0..152a0f7 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1940,16 +1940,13 @@ typedef struct BlockdevBackupState { + BlockJob *job; + } BlockdevBackupState; + +-static BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn, +- Error **errp); +- + static void blockdev_backup_prepare(BlkActionState *common, Error **errp) + { + BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); + BlockdevBackup *backup; +- BlockDriverState *bs, *target; ++ BlockDriverState *bs; ++ BlockDriverState *target_bs; + AioContext *aio_context; +- Error *local_err = NULL; + + assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); + backup = common->action->u.blockdev_backup.data; +@@ -1959,8 +1956,8 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) + return; + } + +- target = bdrv_lookup_bs(backup->target, backup->target, errp); +- if (!target) { ++ target_bs = bdrv_lookup_bs(backup->target, backup->target, errp); ++ if (!target_bs) { + return; + } + +@@ -1971,13 +1968,10 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) + /* Paired with .clean() */ + bdrv_drained_begin(state->bs); + +- state->job = do_blockdev_backup(backup, common->block_job_txn, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); +- goto out; +- } ++ state->job = do_backup_common(qapi_BlockdevBackup_base(backup), ++ bs, target_bs, aio_context, ++ common->block_job_txn, errp); + +-out: + aio_context_release(aio_context); + } + +@@ -3695,41 +3689,13 @@ XDbgBlockGraph *qmp_x_debug_query_block_graph(Error **errp) + return bdrv_get_xdbg_block_graph(errp); + } + +-BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn, +- Error **errp) ++void qmp_blockdev_backup(BlockdevBackup *backup, Error **errp) + { +- BlockDriverState *bs; +- BlockDriverState *target_bs; +- AioContext *aio_context; +- BlockJob *job; +- +- bs = bdrv_lookup_bs(backup->device, backup->device, errp); +- if (!bs) { +- return NULL; +- } +- +- target_bs = bdrv_lookup_bs(backup->target, backup->target, errp); +- if (!target_bs) { +- return NULL; +- } +- +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- +- job = do_backup_common(qapi_BlockdevBackup_base(backup), +- bs, target_bs, aio_context, txn, errp); +- +- aio_context_release(aio_context); +- return job; +-} +- +-void qmp_blockdev_backup(BlockdevBackup *arg, Error **errp) +-{ +- BlockJob *job; +- job = do_blockdev_backup(arg, NULL, errp); +- if (job) { +- job_start(&job->job); +- } ++ TransactionAction action = { ++ .type = TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP, ++ .u.blockdev_backup.data = backup, ++ }; ++ blockdev_do_action(&action, errp); + } + + /* Parameter check and block job starting for drive mirroring. +-- +1.8.3.1 + diff --git a/SOURCES/kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch b/SOURCES/kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch new file mode 100644 index 0000000..9ec1975 --- /dev/null +++ b/SOURCES/kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch @@ -0,0 +1,419 @@ +From 4a03ab2a6cc4974d8d43240d1297b09160818af3 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:42 +0000 +Subject: [PATCH 09/18] blockdev: unify qmp_drive_backup and drive-backup + transaction paths + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-3-slp@redhat.com> +Patchwork-id: 93755 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 2/9] blockdev: unify qmp_drive_backup and drive-backup transaction paths +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +Issuing a drive-backup from qmp_drive_backup takes a slightly +different path than when it's issued from a transaction. In the code, +this is manifested as some redundancy between do_drive_backup() and +drive_backup_prepare(). + +This change unifies both paths, merging do_drive_backup() and +drive_backup_prepare(), and changing qmp_drive_backup() to create a +transaction instead of calling do_backup_common() direcly. + +As a side-effect, now qmp_drive_backup() is executed inside a drained +section, as it happens when creating a drive-backup transaction. This +change is visible from the user's perspective, as the job gets paused +and immediately resumed before starting the actual work. + +Also fix tests 141, 185 and 219 to cope with the extra +JOB_STATUS_CHANGE lines. + +Signed-off-by: Sergio Lopez +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 2288ccfac96281c316db942d10e3f921c1373064) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 224 ++++++++++++++++++++------------------------- + tests/qemu-iotests/141.out | 2 + + tests/qemu-iotests/185.out | 2 + + tests/qemu-iotests/219 | 7 +- + tests/qemu-iotests/219.out | 8 ++ + 5 files changed, 117 insertions(+), 126 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 553e315..5e85fc0 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1761,39 +1761,128 @@ typedef struct DriveBackupState { + BlockJob *job; + } DriveBackupState; + +-static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, +- Error **errp); ++static BlockJob *do_backup_common(BackupCommon *backup, ++ BlockDriverState *bs, ++ BlockDriverState *target_bs, ++ AioContext *aio_context, ++ JobTxn *txn, Error **errp); + + static void drive_backup_prepare(BlkActionState *common, Error **errp) + { + DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); +- BlockDriverState *bs; + DriveBackup *backup; ++ BlockDriverState *bs; ++ BlockDriverState *target_bs; ++ BlockDriverState *source = NULL; + AioContext *aio_context; ++ QDict *options; + Error *local_err = NULL; ++ int flags; ++ int64_t size; ++ bool set_backing_hd = false; + + assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); + backup = common->action->u.drive_backup.data; + ++ if (!backup->has_mode) { ++ backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; ++ } ++ + bs = bdrv_lookup_bs(backup->device, backup->device, errp); + if (!bs) { + return; + } + ++ if (!bs->drv) { ++ error_setg(errp, "Device has no medium"); ++ return; ++ } ++ + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + /* Paired with .clean() */ + bdrv_drained_begin(bs); + +- state->bs = bs; ++ if (!backup->has_format) { ++ backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? ++ NULL : (char *) bs->drv->format_name; ++ } ++ ++ /* Early check to avoid creating target */ ++ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { ++ goto out; ++ } ++ ++ flags = bs->open_flags | BDRV_O_RDWR; ++ ++ /* ++ * See if we have a backing HD we can use to create our new image ++ * on top of. ++ */ ++ if (backup->sync == MIRROR_SYNC_MODE_TOP) { ++ source = backing_bs(bs); ++ if (!source) { ++ backup->sync = MIRROR_SYNC_MODE_FULL; ++ } ++ } ++ if (backup->sync == MIRROR_SYNC_MODE_NONE) { ++ source = bs; ++ flags |= BDRV_O_NO_BACKING; ++ set_backing_hd = true; ++ } ++ ++ size = bdrv_getlength(bs); ++ if (size < 0) { ++ error_setg_errno(errp, -size, "bdrv_getlength failed"); ++ goto out; ++ } ++ ++ if (backup->mode != NEW_IMAGE_MODE_EXISTING) { ++ assert(backup->format); ++ if (source) { ++ bdrv_refresh_filename(source); ++ bdrv_img_create(backup->target, backup->format, source->filename, ++ source->drv->format_name, NULL, ++ size, flags, false, &local_err); ++ } else { ++ bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL, ++ size, flags, false, &local_err); ++ } ++ } + +- state->job = do_drive_backup(backup, common->block_job_txn, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto out; + } + ++ options = qdict_new(); ++ qdict_put_str(options, "discard", "unmap"); ++ qdict_put_str(options, "detect-zeroes", "unmap"); ++ if (backup->format) { ++ qdict_put_str(options, "driver", backup->format); ++ } ++ ++ target_bs = bdrv_open(backup->target, NULL, options, flags, errp); ++ if (!target_bs) { ++ goto out; ++ } ++ ++ if (set_backing_hd) { ++ bdrv_set_backing_hd(target_bs, source, &local_err); ++ if (local_err) { ++ goto unref; ++ } ++ } ++ ++ state->bs = bs; ++ ++ state->job = do_backup_common(qapi_DriveBackup_base(backup), ++ bs, target_bs, aio_context, ++ common->block_job_txn, errp); ++ ++unref: ++ bdrv_unref(target_bs); + out: + aio_context_release(aio_context); + } +@@ -3587,126 +3676,13 @@ static BlockJob *do_backup_common(BackupCommon *backup, + return job; + } + +-static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, +- Error **errp) +-{ +- BlockDriverState *bs; +- BlockDriverState *target_bs; +- BlockDriverState *source = NULL; +- BlockJob *job = NULL; +- AioContext *aio_context; +- QDict *options; +- Error *local_err = NULL; +- int flags; +- int64_t size; +- bool set_backing_hd = false; +- +- if (!backup->has_mode) { +- backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; +- } +- +- bs = bdrv_lookup_bs(backup->device, backup->device, errp); +- if (!bs) { +- return NULL; +- } +- +- if (!bs->drv) { +- error_setg(errp, "Device has no medium"); +- return NULL; +- } +- +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- +- if (!backup->has_format) { +- backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? +- NULL : (char *) bs->drv->format_name; +- } +- +- /* Early check to avoid creating target */ +- if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { +- goto out; +- } +- +- flags = bs->open_flags | BDRV_O_RDWR; +- +- /* +- * See if we have a backing HD we can use to create our new image +- * on top of. +- */ +- if (backup->sync == MIRROR_SYNC_MODE_TOP) { +- source = backing_bs(bs); +- if (!source) { +- backup->sync = MIRROR_SYNC_MODE_FULL; +- } +- } +- if (backup->sync == MIRROR_SYNC_MODE_NONE) { +- source = bs; +- flags |= BDRV_O_NO_BACKING; +- set_backing_hd = true; +- } +- +- size = bdrv_getlength(bs); +- if (size < 0) { +- error_setg_errno(errp, -size, "bdrv_getlength failed"); +- goto out; +- } +- +- if (backup->mode != NEW_IMAGE_MODE_EXISTING) { +- assert(backup->format); +- if (source) { +- bdrv_refresh_filename(source); +- bdrv_img_create(backup->target, backup->format, source->filename, +- source->drv->format_name, NULL, +- size, flags, false, &local_err); +- } else { +- bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL, +- size, flags, false, &local_err); +- } +- } +- +- if (local_err) { +- error_propagate(errp, local_err); +- goto out; +- } +- +- options = qdict_new(); +- qdict_put_str(options, "discard", "unmap"); +- qdict_put_str(options, "detect-zeroes", "unmap"); +- if (backup->format) { +- qdict_put_str(options, "driver", backup->format); +- } +- +- target_bs = bdrv_open(backup->target, NULL, options, flags, errp); +- if (!target_bs) { +- goto out; +- } +- +- if (set_backing_hd) { +- bdrv_set_backing_hd(target_bs, source, &local_err); +- if (local_err) { +- goto unref; +- } +- } +- +- job = do_backup_common(qapi_DriveBackup_base(backup), +- bs, target_bs, aio_context, txn, errp); +- +-unref: +- bdrv_unref(target_bs); +-out: +- aio_context_release(aio_context); +- return job; +-} +- +-void qmp_drive_backup(DriveBackup *arg, Error **errp) ++void qmp_drive_backup(DriveBackup *backup, Error **errp) + { +- +- BlockJob *job; +- job = do_drive_backup(arg, NULL, errp); +- if (job) { +- job_start(&job->job); +- } ++ TransactionAction action = { ++ .type = TRANSACTION_ACTION_KIND_DRIVE_BACKUP, ++ .u.drive_backup.data = backup, ++ }; ++ blockdev_do_action(&action, errp); + } + + BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp) +diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out +index 3645675..263b680 100644 +--- a/tests/qemu-iotests/141.out ++++ b/tests/qemu-iotests/141.out +@@ -13,6 +13,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/m. + Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "job0"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} + {'execute': 'blockdev-del', 'arguments': {'node-name': 'drv0'}} + {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}} + {'execute': 'block-job-cancel', 'arguments': {'device': 'job0'}} +diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out +index 8379ac5..9a3b657 100644 +--- a/tests/qemu-iotests/185.out ++++ b/tests/qemu-iotests/185.out +@@ -65,6 +65,8 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 cluster_size=65536 l + Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 cluster_size=65536 lazy_refcounts=off refcount_bits=16 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "disk"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} + {"return": {}} + { 'execute': 'quit' } + {"return": {}} +diff --git a/tests/qemu-iotests/219 b/tests/qemu-iotests/219 +index e0c5166..655f54d 100755 +--- a/tests/qemu-iotests/219 ++++ b/tests/qemu-iotests/219 +@@ -63,7 +63,7 @@ def test_pause_resume(vm): + # logged immediately + iotests.log(vm.qmp('query-jobs')) + +-def test_job_lifecycle(vm, job, job_args, has_ready=False): ++def test_job_lifecycle(vm, job, job_args, has_ready=False, is_mirror=False): + global img_size + + iotests.log('') +@@ -135,6 +135,9 @@ def test_job_lifecycle(vm, job, job_args, has_ready=False): + iotests.log('Waiting for PENDING state...') + iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) + iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) ++ if is_mirror: ++ iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) ++ iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) + + if not job_args.get('auto-finalize', True): + # PENDING state: +@@ -218,7 +221,7 @@ with iotests.FilePath('disk.img') as disk_path, \ + + for auto_finalize in [True, False]: + for auto_dismiss in [True, False]: +- test_job_lifecycle(vm, 'drive-backup', job_args={ ++ test_job_lifecycle(vm, 'drive-backup', is_mirror=True, job_args={ + 'device': 'drive0-node', + 'target': copy_path, + 'sync': 'full', +diff --git a/tests/qemu-iotests/219.out b/tests/qemu-iotests/219.out +index 8ebd3fe..0ea5d0b 100644 +--- a/tests/qemu-iotests/219.out ++++ b/tests/qemu-iotests/219.out +@@ -135,6 +135,8 @@ Pause/resume in RUNNING + {"return": {}} + + Waiting for PENDING state... ++{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "concluded"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +@@ -186,6 +188,8 @@ Pause/resume in RUNNING + {"return": {}} + + Waiting for PENDING state... ++{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "concluded"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +@@ -245,6 +249,8 @@ Pause/resume in RUNNING + {"return": {}} + + Waiting for PENDING state... ++{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"return": [{"current-progress": 4194304, "id": "job0", "status": "pending", "total-progress": 4194304, "type": "backup"}]} +@@ -304,6 +310,8 @@ Pause/resume in RUNNING + {"return": {}} + + Waiting for PENDING state... ++{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"return": [{"current-progress": 4194304, "id": "job0", "status": "pending", "total-progress": 4194304, "type": "backup"}]} +-- +1.8.3.1 + diff --git a/SOURCES/kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch b/SOURCES/kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch new file mode 100644 index 0000000..5d21bf8 --- /dev/null +++ b/SOURCES/kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch @@ -0,0 +1,137 @@ +From f756c1c4590a37c533ec0429644a7034ba35dada Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:38 +0100 +Subject: [PATCH 007/116] build: rename CONFIG_LIBCAP to CONFIG_LIBCAP_NG +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-4-dgilbert@redhat.com> +Patchwork-id: 93459 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 003/112] build: rename CONFIG_LIBCAP to CONFIG_LIBCAP_NG +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Paolo Bonzini + +Since we are actually testing for the newer capng library, rename the +symbol to match. + +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Paolo Bonzini +(cherry picked from commit a358bca24026a377e0804e137a4499e4e041918d) +Signed-off-by: Miroslav Rezanina +--- + configure | 2 +- + qemu-bridge-helper.c | 6 +++--- + scsi/qemu-pr-helper.c | 12 ++++++------ + 3 files changed, 10 insertions(+), 10 deletions(-) + +diff --git a/configure b/configure +index 16564f8..7831618 100755 +--- a/configure ++++ b/configure +@@ -6760,7 +6760,7 @@ if test "$l2tpv3" = "yes" ; then + echo "CONFIG_L2TPV3=y" >> $config_host_mak + fi + if test "$cap_ng" = "yes" ; then +- echo "CONFIG_LIBCAP=y" >> $config_host_mak ++ echo "CONFIG_LIBCAP_NG=y" >> $config_host_mak + fi + echo "CONFIG_AUDIO_DRIVERS=$audio_drv_list" >> $config_host_mak + for drv in $audio_drv_list; do +diff --git a/qemu-bridge-helper.c b/qemu-bridge-helper.c +index 3d50ec0..88b2674 100644 +--- a/qemu-bridge-helper.c ++++ b/qemu-bridge-helper.c +@@ -43,7 +43,7 @@ + + #include "net/tap-linux.h" + +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + #include + #endif + +@@ -207,7 +207,7 @@ static int send_fd(int c, int fd) + return sendmsg(c, &msg, 0); + } + +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + static int drop_privileges(void) + { + /* clear all capabilities */ +@@ -246,7 +246,7 @@ int main(int argc, char **argv) + int access_allowed, access_denied; + int ret = EXIT_SUCCESS; + +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + /* if we're run from an suid binary, immediately drop privileges preserving + * cap_net_admin */ + if (geteuid() == 0 && getuid() != geteuid()) { +diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c +index debb18f..0659cee 100644 +--- a/scsi/qemu-pr-helper.c ++++ b/scsi/qemu-pr-helper.c +@@ -24,7 +24,7 @@ + #include + #include + +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + #include + #endif + #include +@@ -70,7 +70,7 @@ static int num_active_sockets = 1; + static int noisy; + static int verbose; + +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + static int uid = -1; + static int gid = -1; + #endif +@@ -97,7 +97,7 @@ static void usage(const char *name) + " (default '%s')\n" + " -T, --trace [[enable=]][,events=][,file=]\n" + " specify tracing options\n" +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + " -u, --user=USER user to drop privileges to\n" + " -g, --group=GROUP group to drop privileges to\n" + #endif +@@ -827,7 +827,7 @@ static void close_server_socket(void) + num_active_sockets--; + } + +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + static int drop_privileges(void) + { + /* clear all capabilities */ +@@ -920,7 +920,7 @@ int main(int argc, char **argv) + pidfile = g_strdup(optarg); + pidfile_specified = true; + break; +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + case 'u': { + unsigned long res; + struct passwd *userinfo = getpwnam(optarg); +@@ -1056,7 +1056,7 @@ int main(int argc, char **argv) + exit(EXIT_FAILURE); + } + +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + if (drop_privileges() < 0) { + error_report("Failed to drop privileges: %s", strerror(errno)); + exit(EXIT_FAILURE); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch b/SOURCES/kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch new file mode 100644 index 0000000..5b1b170 --- /dev/null +++ b/SOURCES/kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch @@ -0,0 +1,2463 @@ +From fc2d0dfe60b14992a9b67e7a18394ba6365dc5ed Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 18 Mar 2020 18:10:40 +0000 +Subject: [PATCH 2/2] build-sys: do not make qemu-ga link with pixman +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20200318181040.256425-1-marcandre.lureau@redhat.com> +Patchwork-id: 94381 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] build-sys: do not make qemu-ga link with pixman +Bugzilla: 1811670 +RH-Acked-by: Markus Armbruster +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange + +Since commit d52c454aadcdae74506f315ebf8b58bb79a05573 ("contrib: add +vhost-user-gpu"), qemu-ga is linking with pixman. + +This is because the Make-based build-system use a global namespace for +variables, and we rely on "main.o-libs" for different linking targets. + +Note: this kind of variable clashing is hard to fix or prevent +currently. meson should help, as declarations have a linear +dependency and doesn't rely so much on variables and clever tricks. + +Note2: we have a lot of main.c (or other duplicated names!) in +tree. Imho, it would be annoying and a bad workaroud to rename all +those to avoid conflicts like I did here. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1811670 + +Signed-off-by: Marc-André Lureau +Message-Id: <20200311160923.882474-1-marcandre.lureau@redhat.com> +Signed-off-by: Paolo Bonzini + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1811670 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=27330493 + +(cherry picked from commit 5b42bc5ce9ab4a3171819feea5042931817211fd) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + contrib/vhost-user-gpu/Makefile.objs | 6 +- + contrib/vhost-user-gpu/main.c | 1191 ------------------------------- + contrib/vhost-user-gpu/vhost-user-gpu.c | 1191 +++++++++++++++++++++++++++++++ + 3 files changed, 1194 insertions(+), 1194 deletions(-) + delete mode 100644 contrib/vhost-user-gpu/main.c + create mode 100644 contrib/vhost-user-gpu/vhost-user-gpu.c + +diff --git a/contrib/vhost-user-gpu/Makefile.objs b/contrib/vhost-user-gpu/Makefile.objs +index 6170c91..0929609 100644 +--- a/contrib/vhost-user-gpu/Makefile.objs ++++ b/contrib/vhost-user-gpu/Makefile.objs +@@ -1,7 +1,7 @@ +-vhost-user-gpu-obj-y = main.o virgl.o vugbm.o ++vhost-user-gpu-obj-y = vhost-user-gpu.o virgl.o vugbm.o + +-main.o-cflags := $(PIXMAN_CFLAGS) $(GBM_CFLAGS) +-main.o-libs := $(PIXMAN_LIBS) ++vhost-user-gpu.o-cflags := $(PIXMAN_CFLAGS) $(GBM_CFLAGS) ++vhost-user-gpu.o-libs := $(PIXMAN_LIBS) + + virgl.o-cflags := $(VIRGL_CFLAGS) $(GBM_CFLAGS) + virgl.o-libs := $(VIRGL_LIBS) +diff --git a/contrib/vhost-user-gpu/main.c b/contrib/vhost-user-gpu/main.c +deleted file mode 100644 +index b45d201..0000000 +--- a/contrib/vhost-user-gpu/main.c ++++ /dev/null +@@ -1,1191 +0,0 @@ +-/* +- * Virtio vhost-user GPU Device +- * +- * Copyright Red Hat, Inc. 2013-2018 +- * +- * Authors: +- * Dave Airlie +- * Gerd Hoffmann +- * Marc-André Lureau +- * +- * This work is licensed under the terms of the GNU GPL, version 2 or later. +- * See the COPYING file in the top-level directory. +- */ +-#include "qemu/osdep.h" +-#include "qemu/drm.h" +-#include "qapi/error.h" +-#include "qemu/sockets.h" +- +-#include +-#include +- +-#include "vugpu.h" +-#include "hw/virtio/virtio-gpu-bswap.h" +-#include "hw/virtio/virtio-gpu-pixman.h" +-#include "virgl.h" +-#include "vugbm.h" +- +-enum { +- VHOST_USER_GPU_MAX_QUEUES = 2, +-}; +- +-struct virtio_gpu_simple_resource { +- uint32_t resource_id; +- uint32_t width; +- uint32_t height; +- uint32_t format; +- struct iovec *iov; +- unsigned int iov_cnt; +- uint32_t scanout_bitmask; +- pixman_image_t *image; +- struct vugbm_buffer buffer; +- QTAILQ_ENTRY(virtio_gpu_simple_resource) next; +-}; +- +-static gboolean opt_print_caps; +-static int opt_fdnum = -1; +-static char *opt_socket_path; +-static char *opt_render_node; +-static gboolean opt_virgl; +- +-static void vg_handle_ctrl(VuDev *dev, int qidx); +- +-static const char * +-vg_cmd_to_string(int cmd) +-{ +-#define CMD(cmd) [cmd] = #cmd +- static const char *vg_cmd_str[] = { +- CMD(VIRTIO_GPU_UNDEFINED), +- +- /* 2d commands */ +- CMD(VIRTIO_GPU_CMD_GET_DISPLAY_INFO), +- CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_2D), +- CMD(VIRTIO_GPU_CMD_RESOURCE_UNREF), +- CMD(VIRTIO_GPU_CMD_SET_SCANOUT), +- CMD(VIRTIO_GPU_CMD_RESOURCE_FLUSH), +- CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D), +- CMD(VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING), +- CMD(VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING), +- CMD(VIRTIO_GPU_CMD_GET_CAPSET_INFO), +- CMD(VIRTIO_GPU_CMD_GET_CAPSET), +- +- /* 3d commands */ +- CMD(VIRTIO_GPU_CMD_CTX_CREATE), +- CMD(VIRTIO_GPU_CMD_CTX_DESTROY), +- CMD(VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE), +- CMD(VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE), +- CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_3D), +- CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D), +- CMD(VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D), +- CMD(VIRTIO_GPU_CMD_SUBMIT_3D), +- +- /* cursor commands */ +- CMD(VIRTIO_GPU_CMD_UPDATE_CURSOR), +- CMD(VIRTIO_GPU_CMD_MOVE_CURSOR), +- }; +-#undef REQ +- +- if (cmd >= 0 && cmd < G_N_ELEMENTS(vg_cmd_str)) { +- return vg_cmd_str[cmd]; +- } else { +- return "unknown"; +- } +-} +- +-static int +-vg_sock_fd_read(int sock, void *buf, ssize_t buflen) +-{ +- int ret; +- +- do { +- ret = read(sock, buf, buflen); +- } while (ret < 0 && (errno == EINTR || errno == EAGAIN)); +- +- g_warn_if_fail(ret == buflen); +- return ret; +-} +- +-static void +-vg_sock_fd_close(VuGpu *g) +-{ +- if (g->sock_fd >= 0) { +- close(g->sock_fd); +- g->sock_fd = -1; +- } +-} +- +-static gboolean +-source_wait_cb(gint fd, GIOCondition condition, gpointer user_data) +-{ +- VuGpu *g = user_data; +- +- if (!vg_recv_msg(g, VHOST_USER_GPU_DMABUF_UPDATE, 0, NULL)) { +- return G_SOURCE_CONTINUE; +- } +- +- /* resume */ +- g->wait_ok = 0; +- vg_handle_ctrl(&g->dev.parent, 0); +- +- return G_SOURCE_REMOVE; +-} +- +-void +-vg_wait_ok(VuGpu *g) +-{ +- assert(g->wait_ok == 0); +- g->wait_ok = g_unix_fd_add(g->sock_fd, G_IO_IN | G_IO_HUP, +- source_wait_cb, g); +-} +- +-static int +-vg_sock_fd_write(int sock, const void *buf, ssize_t buflen, int fd) +-{ +- ssize_t ret; +- struct iovec iov = { +- .iov_base = (void *)buf, +- .iov_len = buflen, +- }; +- struct msghdr msg = { +- .msg_iov = &iov, +- .msg_iovlen = 1, +- }; +- union { +- struct cmsghdr cmsghdr; +- char control[CMSG_SPACE(sizeof(int))]; +- } cmsgu; +- struct cmsghdr *cmsg; +- +- if (fd != -1) { +- msg.msg_control = cmsgu.control; +- msg.msg_controllen = sizeof(cmsgu.control); +- +- cmsg = CMSG_FIRSTHDR(&msg); +- cmsg->cmsg_len = CMSG_LEN(sizeof(int)); +- cmsg->cmsg_level = SOL_SOCKET; +- cmsg->cmsg_type = SCM_RIGHTS; +- +- *((int *)CMSG_DATA(cmsg)) = fd; +- } +- +- do { +- ret = sendmsg(sock, &msg, 0); +- } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); +- +- g_warn_if_fail(ret == buflen); +- return ret; +-} +- +-void +-vg_send_msg(VuGpu *vg, const VhostUserGpuMsg *msg, int fd) +-{ +- if (vg_sock_fd_write(vg->sock_fd, msg, +- VHOST_USER_GPU_HDR_SIZE + msg->size, fd) < 0) { +- vg_sock_fd_close(vg); +- } +-} +- +-bool +-vg_recv_msg(VuGpu *g, uint32_t expect_req, uint32_t expect_size, +- gpointer payload) +-{ +- uint32_t req, flags, size; +- +- if (vg_sock_fd_read(g->sock_fd, &req, sizeof(req)) < 0 || +- vg_sock_fd_read(g->sock_fd, &flags, sizeof(flags)) < 0 || +- vg_sock_fd_read(g->sock_fd, &size, sizeof(size)) < 0) { +- goto err; +- } +- +- g_return_val_if_fail(req == expect_req, false); +- g_return_val_if_fail(flags & VHOST_USER_GPU_MSG_FLAG_REPLY, false); +- g_return_val_if_fail(size == expect_size, false); +- +- if (size && vg_sock_fd_read(g->sock_fd, payload, size) != size) { +- goto err; +- } +- +- return true; +- +-err: +- vg_sock_fd_close(g); +- return false; +-} +- +-static struct virtio_gpu_simple_resource * +-virtio_gpu_find_resource(VuGpu *g, uint32_t resource_id) +-{ +- struct virtio_gpu_simple_resource *res; +- +- QTAILQ_FOREACH(res, &g->reslist, next) { +- if (res->resource_id == resource_id) { +- return res; +- } +- } +- return NULL; +-} +- +-void +-vg_ctrl_response(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd, +- struct virtio_gpu_ctrl_hdr *resp, +- size_t resp_len) +-{ +- size_t s; +- +- if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE) { +- resp->flags |= VIRTIO_GPU_FLAG_FENCE; +- resp->fence_id = cmd->cmd_hdr.fence_id; +- resp->ctx_id = cmd->cmd_hdr.ctx_id; +- } +- virtio_gpu_ctrl_hdr_bswap(resp); +- s = iov_from_buf(cmd->elem.in_sg, cmd->elem.in_num, 0, resp, resp_len); +- if (s != resp_len) { +- g_critical("%s: response size incorrect %zu vs %zu", +- __func__, s, resp_len); +- } +- vu_queue_push(&g->dev.parent, cmd->vq, &cmd->elem, s); +- vu_queue_notify(&g->dev.parent, cmd->vq); +- cmd->finished = true; +-} +- +-void +-vg_ctrl_response_nodata(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd, +- enum virtio_gpu_ctrl_type type) +-{ +- struct virtio_gpu_ctrl_hdr resp = { +- .type = type, +- }; +- +- vg_ctrl_response(g, cmd, &resp, sizeof(resp)); +-} +- +-void +-vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) +-{ +- struct virtio_gpu_resp_display_info dpy_info = { {} }; +- VhostUserGpuMsg msg = { +- .request = VHOST_USER_GPU_GET_DISPLAY_INFO, +- .size = 0, +- }; +- +- assert(vg->wait_ok == 0); +- +- vg_send_msg(vg, &msg, -1); +- if (!vg_recv_msg(vg, msg.request, sizeof(dpy_info), &dpy_info)) { +- return; +- } +- +- vg_ctrl_response(vg, cmd, &dpy_info.hdr, sizeof(dpy_info)); +-} +- +-static void +-vg_resource_create_2d(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd) +-{ +- pixman_format_code_t pformat; +- struct virtio_gpu_simple_resource *res; +- struct virtio_gpu_resource_create_2d c2d; +- +- VUGPU_FILL_CMD(c2d); +- virtio_gpu_bswap_32(&c2d, sizeof(c2d)); +- +- if (c2d.resource_id == 0) { +- g_critical("%s: resource id 0 is not allowed", __func__); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- +- res = virtio_gpu_find_resource(g, c2d.resource_id); +- if (res) { +- g_critical("%s: resource already exists %d", __func__, c2d.resource_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- +- res = g_new0(struct virtio_gpu_simple_resource, 1); +- res->width = c2d.width; +- res->height = c2d.height; +- res->format = c2d.format; +- res->resource_id = c2d.resource_id; +- +- pformat = virtio_gpu_get_pixman_format(c2d.format); +- if (!pformat) { +- g_critical("%s: host couldn't handle guest format %d", +- __func__, c2d.format); +- g_free(res); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; +- return; +- } +- vugbm_buffer_create(&res->buffer, &g->gdev, c2d.width, c2d.height); +- res->image = pixman_image_create_bits(pformat, +- c2d.width, +- c2d.height, +- (uint32_t *)res->buffer.mmap, +- res->buffer.stride); +- if (!res->image) { +- g_critical("%s: resource creation failed %d %d %d", +- __func__, c2d.resource_id, c2d.width, c2d.height); +- g_free(res); +- cmd->error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY; +- return; +- } +- +- QTAILQ_INSERT_HEAD(&g->reslist, res, next); +-} +- +-static void +-vg_disable_scanout(VuGpu *g, int scanout_id) +-{ +- struct virtio_gpu_scanout *scanout = &g->scanout[scanout_id]; +- struct virtio_gpu_simple_resource *res; +- +- if (scanout->resource_id == 0) { +- return; +- } +- +- res = virtio_gpu_find_resource(g, scanout->resource_id); +- if (res) { +- res->scanout_bitmask &= ~(1 << scanout_id); +- } +- +- scanout->width = 0; +- scanout->height = 0; +- +- if (g->sock_fd >= 0) { +- VhostUserGpuMsg msg = { +- .request = VHOST_USER_GPU_SCANOUT, +- .size = sizeof(VhostUserGpuScanout), +- .payload.scanout.scanout_id = scanout_id, +- }; +- vg_send_msg(g, &msg, -1); +- } +-} +- +-static void +-vg_resource_destroy(VuGpu *g, +- struct virtio_gpu_simple_resource *res) +-{ +- int i; +- +- if (res->scanout_bitmask) { +- for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { +- if (res->scanout_bitmask & (1 << i)) { +- vg_disable_scanout(g, i); +- } +- } +- } +- +- vugbm_buffer_destroy(&res->buffer); +- pixman_image_unref(res->image); +- QTAILQ_REMOVE(&g->reslist, res, next); +- g_free(res); +-} +- +-static void +-vg_resource_unref(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd) +-{ +- struct virtio_gpu_simple_resource *res; +- struct virtio_gpu_resource_unref unref; +- +- VUGPU_FILL_CMD(unref); +- virtio_gpu_bswap_32(&unref, sizeof(unref)); +- +- res = virtio_gpu_find_resource(g, unref.resource_id); +- if (!res) { +- g_critical("%s: illegal resource specified %d", +- __func__, unref.resource_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- vg_resource_destroy(g, res); +-} +- +-int +-vg_create_mapping_iov(VuGpu *g, +- struct virtio_gpu_resource_attach_backing *ab, +- struct virtio_gpu_ctrl_command *cmd, +- struct iovec **iov) +-{ +- struct virtio_gpu_mem_entry *ents; +- size_t esize, s; +- int i; +- +- if (ab->nr_entries > 16384) { +- g_critical("%s: nr_entries is too big (%d > 16384)", +- __func__, ab->nr_entries); +- return -1; +- } +- +- esize = sizeof(*ents) * ab->nr_entries; +- ents = g_malloc(esize); +- s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, +- sizeof(*ab), ents, esize); +- if (s != esize) { +- g_critical("%s: command data size incorrect %zu vs %zu", +- __func__, s, esize); +- g_free(ents); +- return -1; +- } +- +- *iov = g_malloc0(sizeof(struct iovec) * ab->nr_entries); +- for (i = 0; i < ab->nr_entries; i++) { +- uint64_t len = ents[i].length; +- (*iov)[i].iov_len = ents[i].length; +- (*iov)[i].iov_base = vu_gpa_to_va(&g->dev.parent, &len, ents[i].addr); +- if (!(*iov)[i].iov_base || len != ents[i].length) { +- g_critical("%s: resource %d element %d", +- __func__, ab->resource_id, i); +- g_free(*iov); +- g_free(ents); +- *iov = NULL; +- return -1; +- } +- } +- g_free(ents); +- return 0; +-} +- +-static void +-vg_resource_attach_backing(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd) +-{ +- struct virtio_gpu_simple_resource *res; +- struct virtio_gpu_resource_attach_backing ab; +- int ret; +- +- VUGPU_FILL_CMD(ab); +- virtio_gpu_bswap_32(&ab, sizeof(ab)); +- +- res = virtio_gpu_find_resource(g, ab.resource_id); +- if (!res) { +- g_critical("%s: illegal resource specified %d", +- __func__, ab.resource_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- +- ret = vg_create_mapping_iov(g, &ab, cmd, &res->iov); +- if (ret != 0) { +- cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; +- return; +- } +- +- res->iov_cnt = ab.nr_entries; +-} +- +-static void +-vg_resource_detach_backing(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd) +-{ +- struct virtio_gpu_simple_resource *res; +- struct virtio_gpu_resource_detach_backing detach; +- +- VUGPU_FILL_CMD(detach); +- virtio_gpu_bswap_32(&detach, sizeof(detach)); +- +- res = virtio_gpu_find_resource(g, detach.resource_id); +- if (!res || !res->iov) { +- g_critical("%s: illegal resource specified %d", +- __func__, detach.resource_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- +- g_free(res->iov); +- res->iov = NULL; +- res->iov_cnt = 0; +-} +- +-static void +-vg_transfer_to_host_2d(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd) +-{ +- struct virtio_gpu_simple_resource *res; +- int h; +- uint32_t src_offset, dst_offset, stride; +- int bpp; +- pixman_format_code_t format; +- struct virtio_gpu_transfer_to_host_2d t2d; +- +- VUGPU_FILL_CMD(t2d); +- virtio_gpu_t2d_bswap(&t2d); +- +- res = virtio_gpu_find_resource(g, t2d.resource_id); +- if (!res || !res->iov) { +- g_critical("%s: illegal resource specified %d", +- __func__, t2d.resource_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- +- if (t2d.r.x > res->width || +- t2d.r.y > res->height || +- t2d.r.width > res->width || +- t2d.r.height > res->height || +- t2d.r.x + t2d.r.width > res->width || +- t2d.r.y + t2d.r.height > res->height) { +- g_critical("%s: transfer bounds outside resource" +- " bounds for resource %d: %d %d %d %d vs %d %d", +- __func__, t2d.resource_id, t2d.r.x, t2d.r.y, +- t2d.r.width, t2d.r.height, res->width, res->height); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; +- return; +- } +- +- format = pixman_image_get_format(res->image); +- bpp = (PIXMAN_FORMAT_BPP(format) + 7) / 8; +- stride = pixman_image_get_stride(res->image); +- +- if (t2d.offset || t2d.r.x || t2d.r.y || +- t2d.r.width != pixman_image_get_width(res->image)) { +- void *img_data = pixman_image_get_data(res->image); +- for (h = 0; h < t2d.r.height; h++) { +- src_offset = t2d.offset + stride * h; +- dst_offset = (t2d.r.y + h) * stride + (t2d.r.x * bpp); +- +- iov_to_buf(res->iov, res->iov_cnt, src_offset, +- img_data +- + dst_offset, t2d.r.width * bpp); +- } +- } else { +- iov_to_buf(res->iov, res->iov_cnt, 0, +- pixman_image_get_data(res->image), +- pixman_image_get_stride(res->image) +- * pixman_image_get_height(res->image)); +- } +-} +- +-static void +-vg_set_scanout(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd) +-{ +- struct virtio_gpu_simple_resource *res, *ores; +- struct virtio_gpu_scanout *scanout; +- struct virtio_gpu_set_scanout ss; +- int fd; +- +- VUGPU_FILL_CMD(ss); +- virtio_gpu_bswap_32(&ss, sizeof(ss)); +- +- if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUTS) { +- g_critical("%s: illegal scanout id specified %d", +- __func__, ss.scanout_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID; +- return; +- } +- +- if (ss.resource_id == 0) { +- vg_disable_scanout(g, ss.scanout_id); +- return; +- } +- +- /* create a surface for this scanout */ +- res = virtio_gpu_find_resource(g, ss.resource_id); +- if (!res) { +- g_critical("%s: illegal resource specified %d", +- __func__, ss.resource_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- +- if (ss.r.x > res->width || +- ss.r.y > res->height || +- ss.r.width > res->width || +- ss.r.height > res->height || +- ss.r.x + ss.r.width > res->width || +- ss.r.y + ss.r.height > res->height) { +- g_critical("%s: illegal scanout %d bounds for" +- " resource %d, (%d,%d)+%d,%d vs %d %d", +- __func__, ss.scanout_id, ss.resource_id, ss.r.x, ss.r.y, +- ss.r.width, ss.r.height, res->width, res->height); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; +- return; +- } +- +- scanout = &g->scanout[ss.scanout_id]; +- +- ores = virtio_gpu_find_resource(g, scanout->resource_id); +- if (ores) { +- ores->scanout_bitmask &= ~(1 << ss.scanout_id); +- } +- +- res->scanout_bitmask |= (1 << ss.scanout_id); +- scanout->resource_id = ss.resource_id; +- scanout->x = ss.r.x; +- scanout->y = ss.r.y; +- scanout->width = ss.r.width; +- scanout->height = ss.r.height; +- +- struct vugbm_buffer *buffer = &res->buffer; +- +- if (vugbm_buffer_can_get_dmabuf_fd(buffer)) { +- VhostUserGpuMsg msg = { +- .request = VHOST_USER_GPU_DMABUF_SCANOUT, +- .size = sizeof(VhostUserGpuDMABUFScanout), +- .payload.dmabuf_scanout = (VhostUserGpuDMABUFScanout) { +- .scanout_id = ss.scanout_id, +- .x = ss.r.x, +- .y = ss.r.y, +- .width = ss.r.width, +- .height = ss.r.height, +- .fd_width = buffer->width, +- .fd_height = buffer->height, +- .fd_stride = buffer->stride, +- .fd_drm_fourcc = buffer->format +- } +- }; +- +- if (vugbm_buffer_get_dmabuf_fd(buffer, &fd)) { +- vg_send_msg(g, &msg, fd); +- close(fd); +- } +- } else { +- VhostUserGpuMsg msg = { +- .request = VHOST_USER_GPU_SCANOUT, +- .size = sizeof(VhostUserGpuScanout), +- .payload.scanout = (VhostUserGpuScanout) { +- .scanout_id = ss.scanout_id, +- .width = scanout->width, +- .height = scanout->height +- } +- }; +- vg_send_msg(g, &msg, -1); +- } +-} +- +-static void +-vg_resource_flush(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd) +-{ +- struct virtio_gpu_simple_resource *res; +- struct virtio_gpu_resource_flush rf; +- pixman_region16_t flush_region; +- int i; +- +- VUGPU_FILL_CMD(rf); +- virtio_gpu_bswap_32(&rf, sizeof(rf)); +- +- res = virtio_gpu_find_resource(g, rf.resource_id); +- if (!res) { +- g_critical("%s: illegal resource specified %d\n", +- __func__, rf.resource_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- +- if (rf.r.x > res->width || +- rf.r.y > res->height || +- rf.r.width > res->width || +- rf.r.height > res->height || +- rf.r.x + rf.r.width > res->width || +- rf.r.y + rf.r.height > res->height) { +- g_critical("%s: flush bounds outside resource" +- " bounds for resource %d: %d %d %d %d vs %d %d\n", +- __func__, rf.resource_id, rf.r.x, rf.r.y, +- rf.r.width, rf.r.height, res->width, res->height); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; +- return; +- } +- +- pixman_region_init_rect(&flush_region, +- rf.r.x, rf.r.y, rf.r.width, rf.r.height); +- for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { +- struct virtio_gpu_scanout *scanout; +- pixman_region16_t region, finalregion; +- pixman_box16_t *extents; +- +- if (!(res->scanout_bitmask & (1 << i))) { +- continue; +- } +- scanout = &g->scanout[i]; +- +- pixman_region_init(&finalregion); +- pixman_region_init_rect(®ion, scanout->x, scanout->y, +- scanout->width, scanout->height); +- +- pixman_region_intersect(&finalregion, &flush_region, ®ion); +- +- extents = pixman_region_extents(&finalregion); +- size_t width = extents->x2 - extents->x1; +- size_t height = extents->y2 - extents->y1; +- +- if (vugbm_buffer_can_get_dmabuf_fd(&res->buffer)) { +- VhostUserGpuMsg vmsg = { +- .request = VHOST_USER_GPU_DMABUF_UPDATE, +- .size = sizeof(VhostUserGpuUpdate), +- .payload.update = (VhostUserGpuUpdate) { +- .scanout_id = i, +- .x = extents->x1, +- .y = extents->y1, +- .width = width, +- .height = height, +- } +- }; +- vg_send_msg(g, &vmsg, -1); +- vg_wait_ok(g); +- } else { +- size_t bpp = +- PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) / 8; +- size_t size = width * height * bpp; +- +- void *p = g_malloc(VHOST_USER_GPU_HDR_SIZE + +- sizeof(VhostUserGpuUpdate) + size); +- VhostUserGpuMsg *msg = p; +- msg->request = VHOST_USER_GPU_UPDATE; +- msg->size = sizeof(VhostUserGpuUpdate) + size; +- msg->payload.update = (VhostUserGpuUpdate) { +- .scanout_id = i, +- .x = extents->x1, +- .y = extents->y1, +- .width = width, +- .height = height, +- }; +- pixman_image_t *i = +- pixman_image_create_bits(pixman_image_get_format(res->image), +- msg->payload.update.width, +- msg->payload.update.height, +- p + offsetof(VhostUserGpuMsg, +- payload.update.data), +- width * bpp); +- pixman_image_composite(PIXMAN_OP_SRC, +- res->image, NULL, i, +- extents->x1, extents->y1, +- 0, 0, 0, 0, +- width, height); +- pixman_image_unref(i); +- vg_send_msg(g, msg, -1); +- g_free(msg); +- } +- pixman_region_fini(®ion); +- pixman_region_fini(&finalregion); +- } +- pixman_region_fini(&flush_region); +-} +- +-static void +-vg_process_cmd(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) +-{ +- switch (cmd->cmd_hdr.type) { +- case VIRTIO_GPU_CMD_GET_DISPLAY_INFO: +- vg_get_display_info(vg, cmd); +- break; +- case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D: +- vg_resource_create_2d(vg, cmd); +- break; +- case VIRTIO_GPU_CMD_RESOURCE_UNREF: +- vg_resource_unref(vg, cmd); +- break; +- case VIRTIO_GPU_CMD_RESOURCE_FLUSH: +- vg_resource_flush(vg, cmd); +- break; +- case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D: +- vg_transfer_to_host_2d(vg, cmd); +- break; +- case VIRTIO_GPU_CMD_SET_SCANOUT: +- vg_set_scanout(vg, cmd); +- break; +- case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING: +- vg_resource_attach_backing(vg, cmd); +- break; +- case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING: +- vg_resource_detach_backing(vg, cmd); +- break; +- /* case VIRTIO_GPU_CMD_GET_EDID: */ +- /* break */ +- default: +- g_warning("TODO handle ctrl %x\n", cmd->cmd_hdr.type); +- cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; +- break; +- } +- if (!cmd->finished) { +- vg_ctrl_response_nodata(vg, cmd, cmd->error ? cmd->error : +- VIRTIO_GPU_RESP_OK_NODATA); +- } +-} +- +-static void +-vg_handle_ctrl(VuDev *dev, int qidx) +-{ +- VuGpu *vg = container_of(dev, VuGpu, dev.parent); +- VuVirtq *vq = vu_get_queue(dev, qidx); +- struct virtio_gpu_ctrl_command *cmd = NULL; +- size_t len; +- +- for (;;) { +- if (vg->wait_ok != 0) { +- return; +- } +- +- cmd = vu_queue_pop(dev, vq, sizeof(struct virtio_gpu_ctrl_command)); +- if (!cmd) { +- break; +- } +- cmd->vq = vq; +- cmd->error = 0; +- cmd->finished = false; +- +- len = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, +- 0, &cmd->cmd_hdr, sizeof(cmd->cmd_hdr)); +- if (len != sizeof(cmd->cmd_hdr)) { +- g_warning("%s: command size incorrect %zu vs %zu\n", +- __func__, len, sizeof(cmd->cmd_hdr)); +- } +- +- virtio_gpu_ctrl_hdr_bswap(&cmd->cmd_hdr); +- g_debug("%d %s\n", cmd->cmd_hdr.type, +- vg_cmd_to_string(cmd->cmd_hdr.type)); +- +- if (vg->virgl) { +- vg_virgl_process_cmd(vg, cmd); +- } else { +- vg_process_cmd(vg, cmd); +- } +- +- if (!cmd->finished) { +- QTAILQ_INSERT_TAIL(&vg->fenceq, cmd, next); +- vg->inflight++; +- } else { +- g_free(cmd); +- } +- } +-} +- +-static void +-update_cursor_data_simple(VuGpu *g, uint32_t resource_id, gpointer data) +-{ +- struct virtio_gpu_simple_resource *res; +- +- res = virtio_gpu_find_resource(g, resource_id); +- g_return_if_fail(res != NULL); +- g_return_if_fail(pixman_image_get_width(res->image) == 64); +- g_return_if_fail(pixman_image_get_height(res->image) == 64); +- g_return_if_fail( +- PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) == 32); +- +- memcpy(data, pixman_image_get_data(res->image), 64 * 64 * sizeof(uint32_t)); +-} +- +-static void +-vg_process_cursor_cmd(VuGpu *g, struct virtio_gpu_update_cursor *cursor) +-{ +- bool move = cursor->hdr.type != VIRTIO_GPU_CMD_MOVE_CURSOR; +- +- g_debug("%s move:%d\n", G_STRFUNC, move); +- +- if (move) { +- VhostUserGpuMsg msg = { +- .request = cursor->resource_id ? +- VHOST_USER_GPU_CURSOR_POS : VHOST_USER_GPU_CURSOR_POS_HIDE, +- .size = sizeof(VhostUserGpuCursorPos), +- .payload.cursor_pos = { +- .scanout_id = cursor->pos.scanout_id, +- .x = cursor->pos.x, +- .y = cursor->pos.y, +- } +- }; +- vg_send_msg(g, &msg, -1); +- } else { +- VhostUserGpuMsg msg = { +- .request = VHOST_USER_GPU_CURSOR_UPDATE, +- .size = sizeof(VhostUserGpuCursorUpdate), +- .payload.cursor_update = { +- .pos = { +- .scanout_id = cursor->pos.scanout_id, +- .x = cursor->pos.x, +- .y = cursor->pos.y, +- }, +- .hot_x = cursor->hot_x, +- .hot_y = cursor->hot_y, +- } +- }; +- if (g->virgl) { +- vg_virgl_update_cursor_data(g, cursor->resource_id, +- msg.payload.cursor_update.data); +- } else { +- update_cursor_data_simple(g, cursor->resource_id, +- msg.payload.cursor_update.data); +- } +- vg_send_msg(g, &msg, -1); +- } +-} +- +-static void +-vg_handle_cursor(VuDev *dev, int qidx) +-{ +- VuGpu *g = container_of(dev, VuGpu, dev.parent); +- VuVirtq *vq = vu_get_queue(dev, qidx); +- VuVirtqElement *elem; +- size_t len; +- struct virtio_gpu_update_cursor cursor; +- +- for (;;) { +- elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); +- if (!elem) { +- break; +- } +- g_debug("cursor out:%d in:%d\n", elem->out_num, elem->in_num); +- +- len = iov_to_buf(elem->out_sg, elem->out_num, +- 0, &cursor, sizeof(cursor)); +- if (len != sizeof(cursor)) { +- g_warning("%s: cursor size incorrect %zu vs %zu\n", +- __func__, len, sizeof(cursor)); +- } else { +- virtio_gpu_bswap_32(&cursor, sizeof(cursor)); +- vg_process_cursor_cmd(g, &cursor); +- } +- vu_queue_push(dev, vq, elem, 0); +- vu_queue_notify(dev, vq); +- g_free(elem); +- } +-} +- +-static void +-vg_panic(VuDev *dev, const char *msg) +-{ +- g_critical("%s\n", msg); +- exit(1); +-} +- +-static void +-vg_queue_set_started(VuDev *dev, int qidx, bool started) +-{ +- VuVirtq *vq = vu_get_queue(dev, qidx); +- +- g_debug("queue started %d:%d\n", qidx, started); +- +- switch (qidx) { +- case 0: +- vu_set_queue_handler(dev, vq, started ? vg_handle_ctrl : NULL); +- break; +- case 1: +- vu_set_queue_handler(dev, vq, started ? vg_handle_cursor : NULL); +- break; +- default: +- break; +- } +-} +- +-static void +-set_gpu_protocol_features(VuGpu *g) +-{ +- uint64_t u64; +- VhostUserGpuMsg msg = { +- .request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES +- }; +- +- assert(g->wait_ok == 0); +- vg_send_msg(g, &msg, -1); +- if (!vg_recv_msg(g, msg.request, sizeof(u64), &u64)) { +- return; +- } +- +- msg = (VhostUserGpuMsg) { +- .request = VHOST_USER_GPU_SET_PROTOCOL_FEATURES, +- .size = sizeof(uint64_t), +- .payload.u64 = 0 +- }; +- vg_send_msg(g, &msg, -1); +-} +- +-static int +-vg_process_msg(VuDev *dev, VhostUserMsg *msg, int *do_reply) +-{ +- VuGpu *g = container_of(dev, VuGpu, dev.parent); +- +- switch (msg->request) { +- case VHOST_USER_GPU_SET_SOCKET: { +- g_return_val_if_fail(msg->fd_num == 1, 1); +- g_return_val_if_fail(g->sock_fd == -1, 1); +- g->sock_fd = msg->fds[0]; +- set_gpu_protocol_features(g); +- return 1; +- } +- default: +- return 0; +- } +- +- return 0; +-} +- +-static uint64_t +-vg_get_features(VuDev *dev) +-{ +- uint64_t features = 0; +- +- if (opt_virgl) { +- features |= 1 << VIRTIO_GPU_F_VIRGL; +- } +- +- return features; +-} +- +-static void +-vg_set_features(VuDev *dev, uint64_t features) +-{ +- VuGpu *g = container_of(dev, VuGpu, dev.parent); +- bool virgl = features & (1 << VIRTIO_GPU_F_VIRGL); +- +- if (virgl && !g->virgl_inited) { +- if (!vg_virgl_init(g)) { +- vg_panic(dev, "Failed to initialize virgl"); +- } +- g->virgl_inited = true; +- } +- +- g->virgl = virgl; +-} +- +-static int +-vg_get_config(VuDev *dev, uint8_t *config, uint32_t len) +-{ +- VuGpu *g = container_of(dev, VuGpu, dev.parent); +- +- g_return_val_if_fail(len <= sizeof(struct virtio_gpu_config), -1); +- +- if (opt_virgl) { +- g->virtio_config.num_capsets = vg_virgl_get_num_capsets(); +- } +- +- memcpy(config, &g->virtio_config, len); +- +- return 0; +-} +- +-static int +-vg_set_config(VuDev *dev, const uint8_t *data, +- uint32_t offset, uint32_t size, +- uint32_t flags) +-{ +- VuGpu *g = container_of(dev, VuGpu, dev.parent); +- struct virtio_gpu_config *config = (struct virtio_gpu_config *)data; +- +- if (config->events_clear) { +- g->virtio_config.events_read &= ~config->events_clear; +- } +- +- return 0; +-} +- +-static const VuDevIface vuiface = { +- .set_features = vg_set_features, +- .get_features = vg_get_features, +- .queue_set_started = vg_queue_set_started, +- .process_msg = vg_process_msg, +- .get_config = vg_get_config, +- .set_config = vg_set_config, +-}; +- +-static void +-vg_destroy(VuGpu *g) +-{ +- struct virtio_gpu_simple_resource *res, *tmp; +- +- vug_deinit(&g->dev); +- +- vg_sock_fd_close(g); +- +- QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) { +- vg_resource_destroy(g, res); +- } +- +- vugbm_device_destroy(&g->gdev); +-} +- +-static GOptionEntry entries[] = { +- { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps, +- "Print capabilities", NULL }, +- { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum, +- "Use inherited fd socket", "FDNUM" }, +- { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path, +- "Use UNIX socket path", "PATH" }, +- { "render-node", 'r', 0, G_OPTION_ARG_FILENAME, &opt_render_node, +- "Specify DRM render node", "PATH" }, +- { "virgl", 'v', 0, G_OPTION_ARG_NONE, &opt_virgl, +- "Turn virgl rendering on", NULL }, +- { NULL, } +-}; +- +-int +-main(int argc, char *argv[]) +-{ +- GOptionContext *context; +- GError *error = NULL; +- GMainLoop *loop = NULL; +- int fd; +- VuGpu g = { .sock_fd = -1, .drm_rnode_fd = -1 }; +- +- QTAILQ_INIT(&g.reslist); +- QTAILQ_INIT(&g.fenceq); +- +- context = g_option_context_new("QEMU vhost-user-gpu"); +- g_option_context_add_main_entries(context, entries, NULL); +- if (!g_option_context_parse(context, &argc, &argv, &error)) { +- g_printerr("Option parsing failed: %s\n", error->message); +- exit(EXIT_FAILURE); +- } +- g_option_context_free(context); +- +- if (opt_print_caps) { +- g_print("{\n"); +- g_print(" \"type\": \"gpu\",\n"); +- g_print(" \"features\": [\n"); +- g_print(" \"render-node\",\n"); +- g_print(" \"virgl\"\n"); +- g_print(" ]\n"); +- g_print("}\n"); +- exit(EXIT_SUCCESS); +- } +- +- g.drm_rnode_fd = qemu_drm_rendernode_open(opt_render_node); +- if (opt_render_node && g.drm_rnode_fd == -1) { +- g_printerr("Failed to open DRM rendernode.\n"); +- exit(EXIT_FAILURE); +- } +- +- if (g.drm_rnode_fd >= 0) { +- if (!vugbm_device_init(&g.gdev, g.drm_rnode_fd)) { +- g_warning("Failed to init DRM device, using fallback path"); +- } +- } +- +- if ((!!opt_socket_path + (opt_fdnum != -1)) != 1) { +- g_printerr("Please specify either --fd or --socket-path\n"); +- exit(EXIT_FAILURE); +- } +- +- if (opt_socket_path) { +- int lsock = unix_listen(opt_socket_path, &error_fatal); +- if (lsock < 0) { +- g_printerr("Failed to listen on %s.\n", opt_socket_path); +- exit(EXIT_FAILURE); +- } +- fd = accept(lsock, NULL, NULL); +- close(lsock); +- } else { +- fd = opt_fdnum; +- } +- if (fd == -1) { +- g_printerr("Invalid vhost-user socket.\n"); +- exit(EXIT_FAILURE); +- } +- +- if (!vug_init(&g.dev, VHOST_USER_GPU_MAX_QUEUES, fd, vg_panic, &vuiface)) { +- g_printerr("Failed to initialize libvhost-user-glib.\n"); +- exit(EXIT_FAILURE); +- } +- +- loop = g_main_loop_new(NULL, FALSE); +- g_main_loop_run(loop); +- g_main_loop_unref(loop); +- +- vg_destroy(&g); +- if (g.drm_rnode_fd >= 0) { +- close(g.drm_rnode_fd); +- } +- +- return 0; +-} +diff --git a/contrib/vhost-user-gpu/vhost-user-gpu.c b/contrib/vhost-user-gpu/vhost-user-gpu.c +new file mode 100644 +index 0000000..b45d201 +--- /dev/null ++++ b/contrib/vhost-user-gpu/vhost-user-gpu.c +@@ -0,0 +1,1191 @@ ++/* ++ * Virtio vhost-user GPU Device ++ * ++ * Copyright Red Hat, Inc. 2013-2018 ++ * ++ * Authors: ++ * Dave Airlie ++ * Gerd Hoffmann ++ * Marc-André Lureau ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++#include "qemu/osdep.h" ++#include "qemu/drm.h" ++#include "qapi/error.h" ++#include "qemu/sockets.h" ++ ++#include ++#include ++ ++#include "vugpu.h" ++#include "hw/virtio/virtio-gpu-bswap.h" ++#include "hw/virtio/virtio-gpu-pixman.h" ++#include "virgl.h" ++#include "vugbm.h" ++ ++enum { ++ VHOST_USER_GPU_MAX_QUEUES = 2, ++}; ++ ++struct virtio_gpu_simple_resource { ++ uint32_t resource_id; ++ uint32_t width; ++ uint32_t height; ++ uint32_t format; ++ struct iovec *iov; ++ unsigned int iov_cnt; ++ uint32_t scanout_bitmask; ++ pixman_image_t *image; ++ struct vugbm_buffer buffer; ++ QTAILQ_ENTRY(virtio_gpu_simple_resource) next; ++}; ++ ++static gboolean opt_print_caps; ++static int opt_fdnum = -1; ++static char *opt_socket_path; ++static char *opt_render_node; ++static gboolean opt_virgl; ++ ++static void vg_handle_ctrl(VuDev *dev, int qidx); ++ ++static const char * ++vg_cmd_to_string(int cmd) ++{ ++#define CMD(cmd) [cmd] = #cmd ++ static const char *vg_cmd_str[] = { ++ CMD(VIRTIO_GPU_UNDEFINED), ++ ++ /* 2d commands */ ++ CMD(VIRTIO_GPU_CMD_GET_DISPLAY_INFO), ++ CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_2D), ++ CMD(VIRTIO_GPU_CMD_RESOURCE_UNREF), ++ CMD(VIRTIO_GPU_CMD_SET_SCANOUT), ++ CMD(VIRTIO_GPU_CMD_RESOURCE_FLUSH), ++ CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D), ++ CMD(VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING), ++ CMD(VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING), ++ CMD(VIRTIO_GPU_CMD_GET_CAPSET_INFO), ++ CMD(VIRTIO_GPU_CMD_GET_CAPSET), ++ ++ /* 3d commands */ ++ CMD(VIRTIO_GPU_CMD_CTX_CREATE), ++ CMD(VIRTIO_GPU_CMD_CTX_DESTROY), ++ CMD(VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE), ++ CMD(VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE), ++ CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_3D), ++ CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D), ++ CMD(VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D), ++ CMD(VIRTIO_GPU_CMD_SUBMIT_3D), ++ ++ /* cursor commands */ ++ CMD(VIRTIO_GPU_CMD_UPDATE_CURSOR), ++ CMD(VIRTIO_GPU_CMD_MOVE_CURSOR), ++ }; ++#undef REQ ++ ++ if (cmd >= 0 && cmd < G_N_ELEMENTS(vg_cmd_str)) { ++ return vg_cmd_str[cmd]; ++ } else { ++ return "unknown"; ++ } ++} ++ ++static int ++vg_sock_fd_read(int sock, void *buf, ssize_t buflen) ++{ ++ int ret; ++ ++ do { ++ ret = read(sock, buf, buflen); ++ } while (ret < 0 && (errno == EINTR || errno == EAGAIN)); ++ ++ g_warn_if_fail(ret == buflen); ++ return ret; ++} ++ ++static void ++vg_sock_fd_close(VuGpu *g) ++{ ++ if (g->sock_fd >= 0) { ++ close(g->sock_fd); ++ g->sock_fd = -1; ++ } ++} ++ ++static gboolean ++source_wait_cb(gint fd, GIOCondition condition, gpointer user_data) ++{ ++ VuGpu *g = user_data; ++ ++ if (!vg_recv_msg(g, VHOST_USER_GPU_DMABUF_UPDATE, 0, NULL)) { ++ return G_SOURCE_CONTINUE; ++ } ++ ++ /* resume */ ++ g->wait_ok = 0; ++ vg_handle_ctrl(&g->dev.parent, 0); ++ ++ return G_SOURCE_REMOVE; ++} ++ ++void ++vg_wait_ok(VuGpu *g) ++{ ++ assert(g->wait_ok == 0); ++ g->wait_ok = g_unix_fd_add(g->sock_fd, G_IO_IN | G_IO_HUP, ++ source_wait_cb, g); ++} ++ ++static int ++vg_sock_fd_write(int sock, const void *buf, ssize_t buflen, int fd) ++{ ++ ssize_t ret; ++ struct iovec iov = { ++ .iov_base = (void *)buf, ++ .iov_len = buflen, ++ }; ++ struct msghdr msg = { ++ .msg_iov = &iov, ++ .msg_iovlen = 1, ++ }; ++ union { ++ struct cmsghdr cmsghdr; ++ char control[CMSG_SPACE(sizeof(int))]; ++ } cmsgu; ++ struct cmsghdr *cmsg; ++ ++ if (fd != -1) { ++ msg.msg_control = cmsgu.control; ++ msg.msg_controllen = sizeof(cmsgu.control); ++ ++ cmsg = CMSG_FIRSTHDR(&msg); ++ cmsg->cmsg_len = CMSG_LEN(sizeof(int)); ++ cmsg->cmsg_level = SOL_SOCKET; ++ cmsg->cmsg_type = SCM_RIGHTS; ++ ++ *((int *)CMSG_DATA(cmsg)) = fd; ++ } ++ ++ do { ++ ret = sendmsg(sock, &msg, 0); ++ } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); ++ ++ g_warn_if_fail(ret == buflen); ++ return ret; ++} ++ ++void ++vg_send_msg(VuGpu *vg, const VhostUserGpuMsg *msg, int fd) ++{ ++ if (vg_sock_fd_write(vg->sock_fd, msg, ++ VHOST_USER_GPU_HDR_SIZE + msg->size, fd) < 0) { ++ vg_sock_fd_close(vg); ++ } ++} ++ ++bool ++vg_recv_msg(VuGpu *g, uint32_t expect_req, uint32_t expect_size, ++ gpointer payload) ++{ ++ uint32_t req, flags, size; ++ ++ if (vg_sock_fd_read(g->sock_fd, &req, sizeof(req)) < 0 || ++ vg_sock_fd_read(g->sock_fd, &flags, sizeof(flags)) < 0 || ++ vg_sock_fd_read(g->sock_fd, &size, sizeof(size)) < 0) { ++ goto err; ++ } ++ ++ g_return_val_if_fail(req == expect_req, false); ++ g_return_val_if_fail(flags & VHOST_USER_GPU_MSG_FLAG_REPLY, false); ++ g_return_val_if_fail(size == expect_size, false); ++ ++ if (size && vg_sock_fd_read(g->sock_fd, payload, size) != size) { ++ goto err; ++ } ++ ++ return true; ++ ++err: ++ vg_sock_fd_close(g); ++ return false; ++} ++ ++static struct virtio_gpu_simple_resource * ++virtio_gpu_find_resource(VuGpu *g, uint32_t resource_id) ++{ ++ struct virtio_gpu_simple_resource *res; ++ ++ QTAILQ_FOREACH(res, &g->reslist, next) { ++ if (res->resource_id == resource_id) { ++ return res; ++ } ++ } ++ return NULL; ++} ++ ++void ++vg_ctrl_response(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd, ++ struct virtio_gpu_ctrl_hdr *resp, ++ size_t resp_len) ++{ ++ size_t s; ++ ++ if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE) { ++ resp->flags |= VIRTIO_GPU_FLAG_FENCE; ++ resp->fence_id = cmd->cmd_hdr.fence_id; ++ resp->ctx_id = cmd->cmd_hdr.ctx_id; ++ } ++ virtio_gpu_ctrl_hdr_bswap(resp); ++ s = iov_from_buf(cmd->elem.in_sg, cmd->elem.in_num, 0, resp, resp_len); ++ if (s != resp_len) { ++ g_critical("%s: response size incorrect %zu vs %zu", ++ __func__, s, resp_len); ++ } ++ vu_queue_push(&g->dev.parent, cmd->vq, &cmd->elem, s); ++ vu_queue_notify(&g->dev.parent, cmd->vq); ++ cmd->finished = true; ++} ++ ++void ++vg_ctrl_response_nodata(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd, ++ enum virtio_gpu_ctrl_type type) ++{ ++ struct virtio_gpu_ctrl_hdr resp = { ++ .type = type, ++ }; ++ ++ vg_ctrl_response(g, cmd, &resp, sizeof(resp)); ++} ++ ++void ++vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) ++{ ++ struct virtio_gpu_resp_display_info dpy_info = { {} }; ++ VhostUserGpuMsg msg = { ++ .request = VHOST_USER_GPU_GET_DISPLAY_INFO, ++ .size = 0, ++ }; ++ ++ assert(vg->wait_ok == 0); ++ ++ vg_send_msg(vg, &msg, -1); ++ if (!vg_recv_msg(vg, msg.request, sizeof(dpy_info), &dpy_info)) { ++ return; ++ } ++ ++ vg_ctrl_response(vg, cmd, &dpy_info.hdr, sizeof(dpy_info)); ++} ++ ++static void ++vg_resource_create_2d(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd) ++{ ++ pixman_format_code_t pformat; ++ struct virtio_gpu_simple_resource *res; ++ struct virtio_gpu_resource_create_2d c2d; ++ ++ VUGPU_FILL_CMD(c2d); ++ virtio_gpu_bswap_32(&c2d, sizeof(c2d)); ++ ++ if (c2d.resource_id == 0) { ++ g_critical("%s: resource id 0 is not allowed", __func__); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ ++ res = virtio_gpu_find_resource(g, c2d.resource_id); ++ if (res) { ++ g_critical("%s: resource already exists %d", __func__, c2d.resource_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ ++ res = g_new0(struct virtio_gpu_simple_resource, 1); ++ res->width = c2d.width; ++ res->height = c2d.height; ++ res->format = c2d.format; ++ res->resource_id = c2d.resource_id; ++ ++ pformat = virtio_gpu_get_pixman_format(c2d.format); ++ if (!pformat) { ++ g_critical("%s: host couldn't handle guest format %d", ++ __func__, c2d.format); ++ g_free(res); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; ++ return; ++ } ++ vugbm_buffer_create(&res->buffer, &g->gdev, c2d.width, c2d.height); ++ res->image = pixman_image_create_bits(pformat, ++ c2d.width, ++ c2d.height, ++ (uint32_t *)res->buffer.mmap, ++ res->buffer.stride); ++ if (!res->image) { ++ g_critical("%s: resource creation failed %d %d %d", ++ __func__, c2d.resource_id, c2d.width, c2d.height); ++ g_free(res); ++ cmd->error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY; ++ return; ++ } ++ ++ QTAILQ_INSERT_HEAD(&g->reslist, res, next); ++} ++ ++static void ++vg_disable_scanout(VuGpu *g, int scanout_id) ++{ ++ struct virtio_gpu_scanout *scanout = &g->scanout[scanout_id]; ++ struct virtio_gpu_simple_resource *res; ++ ++ if (scanout->resource_id == 0) { ++ return; ++ } ++ ++ res = virtio_gpu_find_resource(g, scanout->resource_id); ++ if (res) { ++ res->scanout_bitmask &= ~(1 << scanout_id); ++ } ++ ++ scanout->width = 0; ++ scanout->height = 0; ++ ++ if (g->sock_fd >= 0) { ++ VhostUserGpuMsg msg = { ++ .request = VHOST_USER_GPU_SCANOUT, ++ .size = sizeof(VhostUserGpuScanout), ++ .payload.scanout.scanout_id = scanout_id, ++ }; ++ vg_send_msg(g, &msg, -1); ++ } ++} ++ ++static void ++vg_resource_destroy(VuGpu *g, ++ struct virtio_gpu_simple_resource *res) ++{ ++ int i; ++ ++ if (res->scanout_bitmask) { ++ for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { ++ if (res->scanout_bitmask & (1 << i)) { ++ vg_disable_scanout(g, i); ++ } ++ } ++ } ++ ++ vugbm_buffer_destroy(&res->buffer); ++ pixman_image_unref(res->image); ++ QTAILQ_REMOVE(&g->reslist, res, next); ++ g_free(res); ++} ++ ++static void ++vg_resource_unref(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd) ++{ ++ struct virtio_gpu_simple_resource *res; ++ struct virtio_gpu_resource_unref unref; ++ ++ VUGPU_FILL_CMD(unref); ++ virtio_gpu_bswap_32(&unref, sizeof(unref)); ++ ++ res = virtio_gpu_find_resource(g, unref.resource_id); ++ if (!res) { ++ g_critical("%s: illegal resource specified %d", ++ __func__, unref.resource_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ vg_resource_destroy(g, res); ++} ++ ++int ++vg_create_mapping_iov(VuGpu *g, ++ struct virtio_gpu_resource_attach_backing *ab, ++ struct virtio_gpu_ctrl_command *cmd, ++ struct iovec **iov) ++{ ++ struct virtio_gpu_mem_entry *ents; ++ size_t esize, s; ++ int i; ++ ++ if (ab->nr_entries > 16384) { ++ g_critical("%s: nr_entries is too big (%d > 16384)", ++ __func__, ab->nr_entries); ++ return -1; ++ } ++ ++ esize = sizeof(*ents) * ab->nr_entries; ++ ents = g_malloc(esize); ++ s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, ++ sizeof(*ab), ents, esize); ++ if (s != esize) { ++ g_critical("%s: command data size incorrect %zu vs %zu", ++ __func__, s, esize); ++ g_free(ents); ++ return -1; ++ } ++ ++ *iov = g_malloc0(sizeof(struct iovec) * ab->nr_entries); ++ for (i = 0; i < ab->nr_entries; i++) { ++ uint64_t len = ents[i].length; ++ (*iov)[i].iov_len = ents[i].length; ++ (*iov)[i].iov_base = vu_gpa_to_va(&g->dev.parent, &len, ents[i].addr); ++ if (!(*iov)[i].iov_base || len != ents[i].length) { ++ g_critical("%s: resource %d element %d", ++ __func__, ab->resource_id, i); ++ g_free(*iov); ++ g_free(ents); ++ *iov = NULL; ++ return -1; ++ } ++ } ++ g_free(ents); ++ return 0; ++} ++ ++static void ++vg_resource_attach_backing(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd) ++{ ++ struct virtio_gpu_simple_resource *res; ++ struct virtio_gpu_resource_attach_backing ab; ++ int ret; ++ ++ VUGPU_FILL_CMD(ab); ++ virtio_gpu_bswap_32(&ab, sizeof(ab)); ++ ++ res = virtio_gpu_find_resource(g, ab.resource_id); ++ if (!res) { ++ g_critical("%s: illegal resource specified %d", ++ __func__, ab.resource_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ ++ ret = vg_create_mapping_iov(g, &ab, cmd, &res->iov); ++ if (ret != 0) { ++ cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; ++ return; ++ } ++ ++ res->iov_cnt = ab.nr_entries; ++} ++ ++static void ++vg_resource_detach_backing(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd) ++{ ++ struct virtio_gpu_simple_resource *res; ++ struct virtio_gpu_resource_detach_backing detach; ++ ++ VUGPU_FILL_CMD(detach); ++ virtio_gpu_bswap_32(&detach, sizeof(detach)); ++ ++ res = virtio_gpu_find_resource(g, detach.resource_id); ++ if (!res || !res->iov) { ++ g_critical("%s: illegal resource specified %d", ++ __func__, detach.resource_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ ++ g_free(res->iov); ++ res->iov = NULL; ++ res->iov_cnt = 0; ++} ++ ++static void ++vg_transfer_to_host_2d(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd) ++{ ++ struct virtio_gpu_simple_resource *res; ++ int h; ++ uint32_t src_offset, dst_offset, stride; ++ int bpp; ++ pixman_format_code_t format; ++ struct virtio_gpu_transfer_to_host_2d t2d; ++ ++ VUGPU_FILL_CMD(t2d); ++ virtio_gpu_t2d_bswap(&t2d); ++ ++ res = virtio_gpu_find_resource(g, t2d.resource_id); ++ if (!res || !res->iov) { ++ g_critical("%s: illegal resource specified %d", ++ __func__, t2d.resource_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ ++ if (t2d.r.x > res->width || ++ t2d.r.y > res->height || ++ t2d.r.width > res->width || ++ t2d.r.height > res->height || ++ t2d.r.x + t2d.r.width > res->width || ++ t2d.r.y + t2d.r.height > res->height) { ++ g_critical("%s: transfer bounds outside resource" ++ " bounds for resource %d: %d %d %d %d vs %d %d", ++ __func__, t2d.resource_id, t2d.r.x, t2d.r.y, ++ t2d.r.width, t2d.r.height, res->width, res->height); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; ++ return; ++ } ++ ++ format = pixman_image_get_format(res->image); ++ bpp = (PIXMAN_FORMAT_BPP(format) + 7) / 8; ++ stride = pixman_image_get_stride(res->image); ++ ++ if (t2d.offset || t2d.r.x || t2d.r.y || ++ t2d.r.width != pixman_image_get_width(res->image)) { ++ void *img_data = pixman_image_get_data(res->image); ++ for (h = 0; h < t2d.r.height; h++) { ++ src_offset = t2d.offset + stride * h; ++ dst_offset = (t2d.r.y + h) * stride + (t2d.r.x * bpp); ++ ++ iov_to_buf(res->iov, res->iov_cnt, src_offset, ++ img_data ++ + dst_offset, t2d.r.width * bpp); ++ } ++ } else { ++ iov_to_buf(res->iov, res->iov_cnt, 0, ++ pixman_image_get_data(res->image), ++ pixman_image_get_stride(res->image) ++ * pixman_image_get_height(res->image)); ++ } ++} ++ ++static void ++vg_set_scanout(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd) ++{ ++ struct virtio_gpu_simple_resource *res, *ores; ++ struct virtio_gpu_scanout *scanout; ++ struct virtio_gpu_set_scanout ss; ++ int fd; ++ ++ VUGPU_FILL_CMD(ss); ++ virtio_gpu_bswap_32(&ss, sizeof(ss)); ++ ++ if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUTS) { ++ g_critical("%s: illegal scanout id specified %d", ++ __func__, ss.scanout_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID; ++ return; ++ } ++ ++ if (ss.resource_id == 0) { ++ vg_disable_scanout(g, ss.scanout_id); ++ return; ++ } ++ ++ /* create a surface for this scanout */ ++ res = virtio_gpu_find_resource(g, ss.resource_id); ++ if (!res) { ++ g_critical("%s: illegal resource specified %d", ++ __func__, ss.resource_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ ++ if (ss.r.x > res->width || ++ ss.r.y > res->height || ++ ss.r.width > res->width || ++ ss.r.height > res->height || ++ ss.r.x + ss.r.width > res->width || ++ ss.r.y + ss.r.height > res->height) { ++ g_critical("%s: illegal scanout %d bounds for" ++ " resource %d, (%d,%d)+%d,%d vs %d %d", ++ __func__, ss.scanout_id, ss.resource_id, ss.r.x, ss.r.y, ++ ss.r.width, ss.r.height, res->width, res->height); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; ++ return; ++ } ++ ++ scanout = &g->scanout[ss.scanout_id]; ++ ++ ores = virtio_gpu_find_resource(g, scanout->resource_id); ++ if (ores) { ++ ores->scanout_bitmask &= ~(1 << ss.scanout_id); ++ } ++ ++ res->scanout_bitmask |= (1 << ss.scanout_id); ++ scanout->resource_id = ss.resource_id; ++ scanout->x = ss.r.x; ++ scanout->y = ss.r.y; ++ scanout->width = ss.r.width; ++ scanout->height = ss.r.height; ++ ++ struct vugbm_buffer *buffer = &res->buffer; ++ ++ if (vugbm_buffer_can_get_dmabuf_fd(buffer)) { ++ VhostUserGpuMsg msg = { ++ .request = VHOST_USER_GPU_DMABUF_SCANOUT, ++ .size = sizeof(VhostUserGpuDMABUFScanout), ++ .payload.dmabuf_scanout = (VhostUserGpuDMABUFScanout) { ++ .scanout_id = ss.scanout_id, ++ .x = ss.r.x, ++ .y = ss.r.y, ++ .width = ss.r.width, ++ .height = ss.r.height, ++ .fd_width = buffer->width, ++ .fd_height = buffer->height, ++ .fd_stride = buffer->stride, ++ .fd_drm_fourcc = buffer->format ++ } ++ }; ++ ++ if (vugbm_buffer_get_dmabuf_fd(buffer, &fd)) { ++ vg_send_msg(g, &msg, fd); ++ close(fd); ++ } ++ } else { ++ VhostUserGpuMsg msg = { ++ .request = VHOST_USER_GPU_SCANOUT, ++ .size = sizeof(VhostUserGpuScanout), ++ .payload.scanout = (VhostUserGpuScanout) { ++ .scanout_id = ss.scanout_id, ++ .width = scanout->width, ++ .height = scanout->height ++ } ++ }; ++ vg_send_msg(g, &msg, -1); ++ } ++} ++ ++static void ++vg_resource_flush(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd) ++{ ++ struct virtio_gpu_simple_resource *res; ++ struct virtio_gpu_resource_flush rf; ++ pixman_region16_t flush_region; ++ int i; ++ ++ VUGPU_FILL_CMD(rf); ++ virtio_gpu_bswap_32(&rf, sizeof(rf)); ++ ++ res = virtio_gpu_find_resource(g, rf.resource_id); ++ if (!res) { ++ g_critical("%s: illegal resource specified %d\n", ++ __func__, rf.resource_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ ++ if (rf.r.x > res->width || ++ rf.r.y > res->height || ++ rf.r.width > res->width || ++ rf.r.height > res->height || ++ rf.r.x + rf.r.width > res->width || ++ rf.r.y + rf.r.height > res->height) { ++ g_critical("%s: flush bounds outside resource" ++ " bounds for resource %d: %d %d %d %d vs %d %d\n", ++ __func__, rf.resource_id, rf.r.x, rf.r.y, ++ rf.r.width, rf.r.height, res->width, res->height); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; ++ return; ++ } ++ ++ pixman_region_init_rect(&flush_region, ++ rf.r.x, rf.r.y, rf.r.width, rf.r.height); ++ for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { ++ struct virtio_gpu_scanout *scanout; ++ pixman_region16_t region, finalregion; ++ pixman_box16_t *extents; ++ ++ if (!(res->scanout_bitmask & (1 << i))) { ++ continue; ++ } ++ scanout = &g->scanout[i]; ++ ++ pixman_region_init(&finalregion); ++ pixman_region_init_rect(®ion, scanout->x, scanout->y, ++ scanout->width, scanout->height); ++ ++ pixman_region_intersect(&finalregion, &flush_region, ®ion); ++ ++ extents = pixman_region_extents(&finalregion); ++ size_t width = extents->x2 - extents->x1; ++ size_t height = extents->y2 - extents->y1; ++ ++ if (vugbm_buffer_can_get_dmabuf_fd(&res->buffer)) { ++ VhostUserGpuMsg vmsg = { ++ .request = VHOST_USER_GPU_DMABUF_UPDATE, ++ .size = sizeof(VhostUserGpuUpdate), ++ .payload.update = (VhostUserGpuUpdate) { ++ .scanout_id = i, ++ .x = extents->x1, ++ .y = extents->y1, ++ .width = width, ++ .height = height, ++ } ++ }; ++ vg_send_msg(g, &vmsg, -1); ++ vg_wait_ok(g); ++ } else { ++ size_t bpp = ++ PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) / 8; ++ size_t size = width * height * bpp; ++ ++ void *p = g_malloc(VHOST_USER_GPU_HDR_SIZE + ++ sizeof(VhostUserGpuUpdate) + size); ++ VhostUserGpuMsg *msg = p; ++ msg->request = VHOST_USER_GPU_UPDATE; ++ msg->size = sizeof(VhostUserGpuUpdate) + size; ++ msg->payload.update = (VhostUserGpuUpdate) { ++ .scanout_id = i, ++ .x = extents->x1, ++ .y = extents->y1, ++ .width = width, ++ .height = height, ++ }; ++ pixman_image_t *i = ++ pixman_image_create_bits(pixman_image_get_format(res->image), ++ msg->payload.update.width, ++ msg->payload.update.height, ++ p + offsetof(VhostUserGpuMsg, ++ payload.update.data), ++ width * bpp); ++ pixman_image_composite(PIXMAN_OP_SRC, ++ res->image, NULL, i, ++ extents->x1, extents->y1, ++ 0, 0, 0, 0, ++ width, height); ++ pixman_image_unref(i); ++ vg_send_msg(g, msg, -1); ++ g_free(msg); ++ } ++ pixman_region_fini(®ion); ++ pixman_region_fini(&finalregion); ++ } ++ pixman_region_fini(&flush_region); ++} ++ ++static void ++vg_process_cmd(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) ++{ ++ switch (cmd->cmd_hdr.type) { ++ case VIRTIO_GPU_CMD_GET_DISPLAY_INFO: ++ vg_get_display_info(vg, cmd); ++ break; ++ case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D: ++ vg_resource_create_2d(vg, cmd); ++ break; ++ case VIRTIO_GPU_CMD_RESOURCE_UNREF: ++ vg_resource_unref(vg, cmd); ++ break; ++ case VIRTIO_GPU_CMD_RESOURCE_FLUSH: ++ vg_resource_flush(vg, cmd); ++ break; ++ case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D: ++ vg_transfer_to_host_2d(vg, cmd); ++ break; ++ case VIRTIO_GPU_CMD_SET_SCANOUT: ++ vg_set_scanout(vg, cmd); ++ break; ++ case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING: ++ vg_resource_attach_backing(vg, cmd); ++ break; ++ case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING: ++ vg_resource_detach_backing(vg, cmd); ++ break; ++ /* case VIRTIO_GPU_CMD_GET_EDID: */ ++ /* break */ ++ default: ++ g_warning("TODO handle ctrl %x\n", cmd->cmd_hdr.type); ++ cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; ++ break; ++ } ++ if (!cmd->finished) { ++ vg_ctrl_response_nodata(vg, cmd, cmd->error ? cmd->error : ++ VIRTIO_GPU_RESP_OK_NODATA); ++ } ++} ++ ++static void ++vg_handle_ctrl(VuDev *dev, int qidx) ++{ ++ VuGpu *vg = container_of(dev, VuGpu, dev.parent); ++ VuVirtq *vq = vu_get_queue(dev, qidx); ++ struct virtio_gpu_ctrl_command *cmd = NULL; ++ size_t len; ++ ++ for (;;) { ++ if (vg->wait_ok != 0) { ++ return; ++ } ++ ++ cmd = vu_queue_pop(dev, vq, sizeof(struct virtio_gpu_ctrl_command)); ++ if (!cmd) { ++ break; ++ } ++ cmd->vq = vq; ++ cmd->error = 0; ++ cmd->finished = false; ++ ++ len = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, ++ 0, &cmd->cmd_hdr, sizeof(cmd->cmd_hdr)); ++ if (len != sizeof(cmd->cmd_hdr)) { ++ g_warning("%s: command size incorrect %zu vs %zu\n", ++ __func__, len, sizeof(cmd->cmd_hdr)); ++ } ++ ++ virtio_gpu_ctrl_hdr_bswap(&cmd->cmd_hdr); ++ g_debug("%d %s\n", cmd->cmd_hdr.type, ++ vg_cmd_to_string(cmd->cmd_hdr.type)); ++ ++ if (vg->virgl) { ++ vg_virgl_process_cmd(vg, cmd); ++ } else { ++ vg_process_cmd(vg, cmd); ++ } ++ ++ if (!cmd->finished) { ++ QTAILQ_INSERT_TAIL(&vg->fenceq, cmd, next); ++ vg->inflight++; ++ } else { ++ g_free(cmd); ++ } ++ } ++} ++ ++static void ++update_cursor_data_simple(VuGpu *g, uint32_t resource_id, gpointer data) ++{ ++ struct virtio_gpu_simple_resource *res; ++ ++ res = virtio_gpu_find_resource(g, resource_id); ++ g_return_if_fail(res != NULL); ++ g_return_if_fail(pixman_image_get_width(res->image) == 64); ++ g_return_if_fail(pixman_image_get_height(res->image) == 64); ++ g_return_if_fail( ++ PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) == 32); ++ ++ memcpy(data, pixman_image_get_data(res->image), 64 * 64 * sizeof(uint32_t)); ++} ++ ++static void ++vg_process_cursor_cmd(VuGpu *g, struct virtio_gpu_update_cursor *cursor) ++{ ++ bool move = cursor->hdr.type != VIRTIO_GPU_CMD_MOVE_CURSOR; ++ ++ g_debug("%s move:%d\n", G_STRFUNC, move); ++ ++ if (move) { ++ VhostUserGpuMsg msg = { ++ .request = cursor->resource_id ? ++ VHOST_USER_GPU_CURSOR_POS : VHOST_USER_GPU_CURSOR_POS_HIDE, ++ .size = sizeof(VhostUserGpuCursorPos), ++ .payload.cursor_pos = { ++ .scanout_id = cursor->pos.scanout_id, ++ .x = cursor->pos.x, ++ .y = cursor->pos.y, ++ } ++ }; ++ vg_send_msg(g, &msg, -1); ++ } else { ++ VhostUserGpuMsg msg = { ++ .request = VHOST_USER_GPU_CURSOR_UPDATE, ++ .size = sizeof(VhostUserGpuCursorUpdate), ++ .payload.cursor_update = { ++ .pos = { ++ .scanout_id = cursor->pos.scanout_id, ++ .x = cursor->pos.x, ++ .y = cursor->pos.y, ++ }, ++ .hot_x = cursor->hot_x, ++ .hot_y = cursor->hot_y, ++ } ++ }; ++ if (g->virgl) { ++ vg_virgl_update_cursor_data(g, cursor->resource_id, ++ msg.payload.cursor_update.data); ++ } else { ++ update_cursor_data_simple(g, cursor->resource_id, ++ msg.payload.cursor_update.data); ++ } ++ vg_send_msg(g, &msg, -1); ++ } ++} ++ ++static void ++vg_handle_cursor(VuDev *dev, int qidx) ++{ ++ VuGpu *g = container_of(dev, VuGpu, dev.parent); ++ VuVirtq *vq = vu_get_queue(dev, qidx); ++ VuVirtqElement *elem; ++ size_t len; ++ struct virtio_gpu_update_cursor cursor; ++ ++ for (;;) { ++ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); ++ if (!elem) { ++ break; ++ } ++ g_debug("cursor out:%d in:%d\n", elem->out_num, elem->in_num); ++ ++ len = iov_to_buf(elem->out_sg, elem->out_num, ++ 0, &cursor, sizeof(cursor)); ++ if (len != sizeof(cursor)) { ++ g_warning("%s: cursor size incorrect %zu vs %zu\n", ++ __func__, len, sizeof(cursor)); ++ } else { ++ virtio_gpu_bswap_32(&cursor, sizeof(cursor)); ++ vg_process_cursor_cmd(g, &cursor); ++ } ++ vu_queue_push(dev, vq, elem, 0); ++ vu_queue_notify(dev, vq); ++ g_free(elem); ++ } ++} ++ ++static void ++vg_panic(VuDev *dev, const char *msg) ++{ ++ g_critical("%s\n", msg); ++ exit(1); ++} ++ ++static void ++vg_queue_set_started(VuDev *dev, int qidx, bool started) ++{ ++ VuVirtq *vq = vu_get_queue(dev, qidx); ++ ++ g_debug("queue started %d:%d\n", qidx, started); ++ ++ switch (qidx) { ++ case 0: ++ vu_set_queue_handler(dev, vq, started ? vg_handle_ctrl : NULL); ++ break; ++ case 1: ++ vu_set_queue_handler(dev, vq, started ? vg_handle_cursor : NULL); ++ break; ++ default: ++ break; ++ } ++} ++ ++static void ++set_gpu_protocol_features(VuGpu *g) ++{ ++ uint64_t u64; ++ VhostUserGpuMsg msg = { ++ .request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES ++ }; ++ ++ assert(g->wait_ok == 0); ++ vg_send_msg(g, &msg, -1); ++ if (!vg_recv_msg(g, msg.request, sizeof(u64), &u64)) { ++ return; ++ } ++ ++ msg = (VhostUserGpuMsg) { ++ .request = VHOST_USER_GPU_SET_PROTOCOL_FEATURES, ++ .size = sizeof(uint64_t), ++ .payload.u64 = 0 ++ }; ++ vg_send_msg(g, &msg, -1); ++} ++ ++static int ++vg_process_msg(VuDev *dev, VhostUserMsg *msg, int *do_reply) ++{ ++ VuGpu *g = container_of(dev, VuGpu, dev.parent); ++ ++ switch (msg->request) { ++ case VHOST_USER_GPU_SET_SOCKET: { ++ g_return_val_if_fail(msg->fd_num == 1, 1); ++ g_return_val_if_fail(g->sock_fd == -1, 1); ++ g->sock_fd = msg->fds[0]; ++ set_gpu_protocol_features(g); ++ return 1; ++ } ++ default: ++ return 0; ++ } ++ ++ return 0; ++} ++ ++static uint64_t ++vg_get_features(VuDev *dev) ++{ ++ uint64_t features = 0; ++ ++ if (opt_virgl) { ++ features |= 1 << VIRTIO_GPU_F_VIRGL; ++ } ++ ++ return features; ++} ++ ++static void ++vg_set_features(VuDev *dev, uint64_t features) ++{ ++ VuGpu *g = container_of(dev, VuGpu, dev.parent); ++ bool virgl = features & (1 << VIRTIO_GPU_F_VIRGL); ++ ++ if (virgl && !g->virgl_inited) { ++ if (!vg_virgl_init(g)) { ++ vg_panic(dev, "Failed to initialize virgl"); ++ } ++ g->virgl_inited = true; ++ } ++ ++ g->virgl = virgl; ++} ++ ++static int ++vg_get_config(VuDev *dev, uint8_t *config, uint32_t len) ++{ ++ VuGpu *g = container_of(dev, VuGpu, dev.parent); ++ ++ g_return_val_if_fail(len <= sizeof(struct virtio_gpu_config), -1); ++ ++ if (opt_virgl) { ++ g->virtio_config.num_capsets = vg_virgl_get_num_capsets(); ++ } ++ ++ memcpy(config, &g->virtio_config, len); ++ ++ return 0; ++} ++ ++static int ++vg_set_config(VuDev *dev, const uint8_t *data, ++ uint32_t offset, uint32_t size, ++ uint32_t flags) ++{ ++ VuGpu *g = container_of(dev, VuGpu, dev.parent); ++ struct virtio_gpu_config *config = (struct virtio_gpu_config *)data; ++ ++ if (config->events_clear) { ++ g->virtio_config.events_read &= ~config->events_clear; ++ } ++ ++ return 0; ++} ++ ++static const VuDevIface vuiface = { ++ .set_features = vg_set_features, ++ .get_features = vg_get_features, ++ .queue_set_started = vg_queue_set_started, ++ .process_msg = vg_process_msg, ++ .get_config = vg_get_config, ++ .set_config = vg_set_config, ++}; ++ ++static void ++vg_destroy(VuGpu *g) ++{ ++ struct virtio_gpu_simple_resource *res, *tmp; ++ ++ vug_deinit(&g->dev); ++ ++ vg_sock_fd_close(g); ++ ++ QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) { ++ vg_resource_destroy(g, res); ++ } ++ ++ vugbm_device_destroy(&g->gdev); ++} ++ ++static GOptionEntry entries[] = { ++ { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps, ++ "Print capabilities", NULL }, ++ { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum, ++ "Use inherited fd socket", "FDNUM" }, ++ { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path, ++ "Use UNIX socket path", "PATH" }, ++ { "render-node", 'r', 0, G_OPTION_ARG_FILENAME, &opt_render_node, ++ "Specify DRM render node", "PATH" }, ++ { "virgl", 'v', 0, G_OPTION_ARG_NONE, &opt_virgl, ++ "Turn virgl rendering on", NULL }, ++ { NULL, } ++}; ++ ++int ++main(int argc, char *argv[]) ++{ ++ GOptionContext *context; ++ GError *error = NULL; ++ GMainLoop *loop = NULL; ++ int fd; ++ VuGpu g = { .sock_fd = -1, .drm_rnode_fd = -1 }; ++ ++ QTAILQ_INIT(&g.reslist); ++ QTAILQ_INIT(&g.fenceq); ++ ++ context = g_option_context_new("QEMU vhost-user-gpu"); ++ g_option_context_add_main_entries(context, entries, NULL); ++ if (!g_option_context_parse(context, &argc, &argv, &error)) { ++ g_printerr("Option parsing failed: %s\n", error->message); ++ exit(EXIT_FAILURE); ++ } ++ g_option_context_free(context); ++ ++ if (opt_print_caps) { ++ g_print("{\n"); ++ g_print(" \"type\": \"gpu\",\n"); ++ g_print(" \"features\": [\n"); ++ g_print(" \"render-node\",\n"); ++ g_print(" \"virgl\"\n"); ++ g_print(" ]\n"); ++ g_print("}\n"); ++ exit(EXIT_SUCCESS); ++ } ++ ++ g.drm_rnode_fd = qemu_drm_rendernode_open(opt_render_node); ++ if (opt_render_node && g.drm_rnode_fd == -1) { ++ g_printerr("Failed to open DRM rendernode.\n"); ++ exit(EXIT_FAILURE); ++ } ++ ++ if (g.drm_rnode_fd >= 0) { ++ if (!vugbm_device_init(&g.gdev, g.drm_rnode_fd)) { ++ g_warning("Failed to init DRM device, using fallback path"); ++ } ++ } ++ ++ if ((!!opt_socket_path + (opt_fdnum != -1)) != 1) { ++ g_printerr("Please specify either --fd or --socket-path\n"); ++ exit(EXIT_FAILURE); ++ } ++ ++ if (opt_socket_path) { ++ int lsock = unix_listen(opt_socket_path, &error_fatal); ++ if (lsock < 0) { ++ g_printerr("Failed to listen on %s.\n", opt_socket_path); ++ exit(EXIT_FAILURE); ++ } ++ fd = accept(lsock, NULL, NULL); ++ close(lsock); ++ } else { ++ fd = opt_fdnum; ++ } ++ if (fd == -1) { ++ g_printerr("Invalid vhost-user socket.\n"); ++ exit(EXIT_FAILURE); ++ } ++ ++ if (!vug_init(&g.dev, VHOST_USER_GPU_MAX_QUEUES, fd, vg_panic, &vuiface)) { ++ g_printerr("Failed to initialize libvhost-user-glib.\n"); ++ exit(EXIT_FAILURE); ++ } ++ ++ loop = g_main_loop_new(NULL, FALSE); ++ g_main_loop_run(loop); ++ g_main_loop_unref(loop); ++ ++ vg_destroy(&g); ++ if (g.drm_rnode_fd >= 0) { ++ close(g.drm_rnode_fd); ++ } ++ ++ return 0; ++} +-- +1.8.3.1 + diff --git a/SOURCES/kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch b/SOURCES/kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch new file mode 100644 index 0000000..4212f1c --- /dev/null +++ b/SOURCES/kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch @@ -0,0 +1,134 @@ +From 548de8acbf0137b6e49a14b63682badfff037d23 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:44 +0100 +Subject: [PATCH 073/116] contrib/libvhost-user: Protect slave fd with mutex +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-70-dgilbert@redhat.com> +Patchwork-id: 93523 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 069/112] contrib/libvhost-user: Protect slave fd with mutex +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +In future patches we'll be performing commands on the slave-fd driven +by commands on queues, since those queues will be driven by individual +threads we need to make sure they don't attempt to use the slave-fd +for multiple commands in parallel. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit c25c02b9e6a196be87a818f459c426556b24770d) +Signed-off-by: Miroslav Rezanina +--- + contrib/libvhost-user/libvhost-user.c | 24 ++++++++++++++++++++---- + contrib/libvhost-user/libvhost-user.h | 3 +++ + 2 files changed, 23 insertions(+), 4 deletions(-) + +diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c +index ec27b78..63e4106 100644 +--- a/contrib/libvhost-user/libvhost-user.c ++++ b/contrib/libvhost-user/libvhost-user.c +@@ -392,26 +392,37 @@ vu_send_reply(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) + return vu_message_write(dev, conn_fd, vmsg); + } + ++/* ++ * Processes a reply on the slave channel. ++ * Entered with slave_mutex held and releases it before exit. ++ * Returns true on success. ++ */ + static bool + vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg) + { + VhostUserMsg msg_reply; ++ bool result = false; + + if ((vmsg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) { +- return true; ++ result = true; ++ goto out; + } + + if (!vu_message_read(dev, dev->slave_fd, &msg_reply)) { +- return false; ++ goto out; + } + + if (msg_reply.request != vmsg->request) { + DPRINT("Received unexpected msg type. Expected %d received %d", + vmsg->request, msg_reply.request); +- return false; ++ goto out; + } + +- return msg_reply.payload.u64 == 0; ++ result = msg_reply.payload.u64 == 0; ++ ++out: ++ pthread_mutex_unlock(&dev->slave_mutex); ++ return result; + } + + /* Kick the log_call_fd if required. */ +@@ -1105,10 +1116,13 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, + return false; + } + ++ pthread_mutex_lock(&dev->slave_mutex); + if (!vu_message_write(dev, dev->slave_fd, &vmsg)) { ++ pthread_mutex_unlock(&dev->slave_mutex); + return false; + } + ++ /* Also unlocks the slave_mutex */ + return vu_process_message_reply(dev, &vmsg); + } + +@@ -1628,6 +1642,7 @@ vu_deinit(VuDev *dev) + close(dev->slave_fd); + dev->slave_fd = -1; + } ++ pthread_mutex_destroy(&dev->slave_mutex); + + if (dev->sock != -1) { + close(dev->sock); +@@ -1663,6 +1678,7 @@ vu_init(VuDev *dev, + dev->remove_watch = remove_watch; + dev->iface = iface; + dev->log_call_fd = -1; ++ pthread_mutex_init(&dev->slave_mutex, NULL); + dev->slave_fd = -1; + dev->max_queues = max_queues; + +diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h +index 46b6007..1844b6f 100644 +--- a/contrib/libvhost-user/libvhost-user.h ++++ b/contrib/libvhost-user/libvhost-user.h +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + #include "standard-headers/linux/virtio_ring.h" + + /* Based on qemu/hw/virtio/vhost-user.c */ +@@ -355,6 +356,8 @@ struct VuDev { + VuVirtq *vq; + VuDevInflightInfo inflight_info; + int log_call_fd; ++ /* Must be held while using slave_fd */ ++ pthread_mutex_t slave_mutex; + int slave_fd; + uint64_t log_size; + uint8_t *log_table; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch b/SOURCES/kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch new file mode 100644 index 0000000..a6177c6 --- /dev/null +++ b/SOURCES/kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch @@ -0,0 +1,56 @@ +From f01178897c8f5ff98692a22059dd65e35677eaa3 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Mon, 10 Feb 2020 17:33:58 +0000 +Subject: [PATCH 18/18] docs/arm-cpu-features: Make kvm-no-adjvtime comment + clearer +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Andrew Jones +Message-id: <20200210173358.16896-3-drjones@redhat.com> +Patchwork-id: 93772 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] docs/arm-cpu-features: Make kvm-no-adjvtime comment clearer +Bugzilla: 1801320 +RH-Acked-by: Auger Eric +RH-Acked-by: Gavin Shan +RH-Acked-by: Philippe Mathieu-Daudé + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1801320 + +Author: Philippe Mathieu-Daudé +Date: Fri, 07 Feb 2020 14:04:28 +0000 + + docs/arm-cpu-features: Make kvm-no-adjvtime comment clearer + + The bold text sounds like 'knock knock'. Only bolding the + second 'not' makes it easier to read. + + Fixes: dea101a1ae + Signed-off-by: Philippe Mathieu-Daudé + Reviewed-by: Andrew Jones + Message-id: 20200206225148.23923-1-philmd@redhat.com + Signed-off-by: Peter Maydell + +(cherry picked from commit fa3236a970b6ea5be3fa3ad258f1a75920ca1ebb) +Signed-off-by: Danilo C. L. de Paula +--- + docs/arm-cpu-features.rst | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/docs/arm-cpu-features.rst b/docs/arm-cpu-features.rst +index 45d1eb6..48d5054 100644 +--- a/docs/arm-cpu-features.rst ++++ b/docs/arm-cpu-features.rst +@@ -185,7 +185,7 @@ the list of KVM VCPU features and their descriptions. + + kvm-no-adjvtime By default kvm-no-adjvtime is disabled. This + means that by default the virtual time +- adjustment is enabled (vtime is *not not* ++ adjustment is enabled (vtime is not *not* + adjusted). + + When virtual time adjustment is enabled each +-- +1.8.3.1 + diff --git a/SOURCES/kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch b/SOURCES/kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch new file mode 100644 index 0000000..5d44708 --- /dev/null +++ b/SOURCES/kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch @@ -0,0 +1,85 @@ +From 5770fe43fe1e15e6f53cfd3705605e8645b95a98 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 13 Mar 2020 17:17:08 +0000 +Subject: [PATCH 20/20] exec/rom_reset: Free rom data during inmigrate skip +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200313171708.242774-1-dgilbert@redhat.com> +Patchwork-id: 94292 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] exec/rom_reset: Free rom data during inmigrate skip +Bugzilla: 1809380 +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Paolo Bonzini + +From: "Dr. David Alan Gilbert" + +bz: https://bugzilla.redhat.com/show_bug.cgi?id=1809380 +brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=27249921 +branch: rhel-av-8.2.0 +upstream: Posted and with review-by, not merged yet + +Commit 355477f8c73e9 skips rom reset when we're an incoming migration +so as not to overwrite shared ram in the ignore-shared migration +optimisation. +However, it's got an unexpected side effect that because it skips +freeing the ROM data, when rom_reset gets called later on, after +migration (e.g. during a reboot), the ROM does get reset to the original +file contents. Because of seabios/x86's weird reboot process +this confuses a reboot into hanging after a migration. + +Fixes: 355477f8c73e9 ("migration: do not rom_reset() during incoming migration") +https://bugzilla.redhat.com/show_bug.cgi?id=1809380 + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Danilo C. L. de Paula +--- + hw/core/loader.c | 25 ++++++++++++++++--------- + 1 file changed, 16 insertions(+), 9 deletions(-) + +diff --git a/hw/core/loader.c b/hw/core/loader.c +index 5099f27..375b29b 100644 +--- a/hw/core/loader.c ++++ b/hw/core/loader.c +@@ -1118,19 +1118,26 @@ static void rom_reset(void *unused) + { + Rom *rom; + +- /* +- * We don't need to fill in the RAM with ROM data because we'll fill +- * the data in during the next incoming migration in all cases. Note +- * that some of those RAMs can actually be modified by the guest on ARM +- * so this is probably the only right thing to do here. +- */ +- if (runstate_check(RUN_STATE_INMIGRATE)) +- return; +- + QTAILQ_FOREACH(rom, &roms, next) { + if (rom->fw_file) { + continue; + } ++ /* ++ * We don't need to fill in the RAM with ROM data because we'll fill ++ * the data in during the next incoming migration in all cases. Note ++ * that some of those RAMs can actually be modified by the guest. ++ */ ++ if (runstate_check(RUN_STATE_INMIGRATE)) { ++ if (rom->data && rom->isrom) { ++ /* ++ * Free it so that a rom_reset after migration doesn't ++ * overwrite a potentially modified 'rom'. ++ */ ++ rom_free_data(rom); ++ } ++ continue; ++ } ++ + if (rom->data == NULL) { + continue; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-file-posix-Drop-hdev_co_create_opts.patch b/SOURCES/kvm-file-posix-Drop-hdev_co_create_opts.patch new file mode 100644 index 0000000..ea2edbd --- /dev/null +++ b/SOURCES/kvm-file-posix-Drop-hdev_co_create_opts.patch @@ -0,0 +1,131 @@ +From 3d3509c010129bd15eb1f5ec1a7b9eedcdbf23f6 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 11 Mar 2020 10:51:44 +0000 +Subject: [PATCH 03/20] file-posix: Drop hdev_co_create_opts() + +RH-Author: Maxim Levitsky +Message-id: <20200311105147.13208-4-mlevitsk@redhat.com> +Patchwork-id: 94225 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 3/6] file-posix: Drop hdev_co_create_opts() +Bugzilla: 1640894 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz + +From: Max Reitz + +The generic fallback implementation effectively does the same. + +Reviewed-by: Maxim Levitsky +Signed-off-by: Max Reitz +Message-Id: <20200122164532.178040-4-mreitz@redhat.com> +Signed-off-by: Max Reitz +(cherry picked from commit 87ca3b8fa615b278b33cabf9ed22b3f44b5214ba) +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block/file-posix.c | 67 ------------------------------------------------------ + 1 file changed, 67 deletions(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 1b805bd..fd29372 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -3418,67 +3418,6 @@ static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs, + return raw_do_pwrite_zeroes(bs, offset, bytes, flags, true); + } + +-static int coroutine_fn hdev_co_create_opts(const char *filename, QemuOpts *opts, +- Error **errp) +-{ +- int fd; +- int ret = 0; +- struct stat stat_buf; +- int64_t total_size = 0; +- bool has_prefix; +- +- /* This function is used by both protocol block drivers and therefore either +- * of these prefixes may be given. +- * The return value has to be stored somewhere, otherwise this is an error +- * due to -Werror=unused-value. */ +- has_prefix = +- strstart(filename, "host_device:", &filename) || +- strstart(filename, "host_cdrom:" , &filename); +- +- (void)has_prefix; +- +- ret = raw_normalize_devicepath(&filename, errp); +- if (ret < 0) { +- return ret; +- } +- +- /* Read out options */ +- total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), +- BDRV_SECTOR_SIZE); +- +- fd = qemu_open(filename, O_WRONLY | O_BINARY); +- if (fd < 0) { +- ret = -errno; +- error_setg_errno(errp, -ret, "Could not open device"); +- return ret; +- } +- +- if (fstat(fd, &stat_buf) < 0) { +- ret = -errno; +- error_setg_errno(errp, -ret, "Could not stat device"); +- } else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode)) { +- error_setg(errp, +- "The given file is neither a block nor a character device"); +- ret = -ENODEV; +- } else if (lseek(fd, 0, SEEK_END) < total_size) { +- error_setg(errp, "Device is too small"); +- ret = -ENOSPC; +- } +- +- if (!ret && total_size) { +- uint8_t buf[BDRV_SECTOR_SIZE] = { 0 }; +- int64_t zero_size = MIN(BDRV_SECTOR_SIZE, total_size); +- if (lseek(fd, 0, SEEK_SET) == -1) { +- ret = -errno; +- } else { +- ret = qemu_write_full(fd, buf, zero_size); +- ret = ret == zero_size ? 0 : -errno; +- } +- } +- qemu_close(fd); +- return ret; +-} +- + static BlockDriver bdrv_host_device = { + .format_name = "host_device", + .protocol_name = "host_device", +@@ -3491,8 +3430,6 @@ static BlockDriver bdrv_host_device = { + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, +- .bdrv_co_create_opts = hdev_co_create_opts, +- .create_opts = &raw_create_opts, + .mutable_opts = mutable_opts, + .bdrv_co_invalidate_cache = raw_co_invalidate_cache, + .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes, +@@ -3619,8 +3556,6 @@ static BlockDriver bdrv_host_cdrom = { + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, +- .bdrv_co_create_opts = hdev_co_create_opts, +- .create_opts = &raw_create_opts, + .mutable_opts = mutable_opts, + .bdrv_co_invalidate_cache = raw_co_invalidate_cache, + +@@ -3753,8 +3688,6 @@ static BlockDriver bdrv_host_cdrom = { + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, +- .bdrv_co_create_opts = hdev_co_create_opts, +- .create_opts = &raw_create_opts, + .mutable_opts = mutable_opts, + + .bdrv_co_preadv = raw_co_preadv, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch b/SOURCES/kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch new file mode 100644 index 0000000..f01dec2 --- /dev/null +++ b/SOURCES/kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch @@ -0,0 +1,100 @@ +From cebc614e5ddd1f770c4d6dc26c066791f36e56df Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 7 Feb 2020 11:24:02 +0000 +Subject: [PATCH 05/18] hmp: Allow using qdev ID for qemu-io command + +RH-Author: Kevin Wolf +Message-id: <20200207112404.25198-5-kwolf@redhat.com> +Patchwork-id: 93750 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 4/6] hmp: Allow using qdev ID for qemu-io command +Bugzilla: 1781637 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +In order to issue requests on an existing BlockBackend with the +'qemu-io' HMP command, allow specifying the BlockBackend not only with a +BlockBackend name, but also with a qdev ID/QOM path for a device that +owns the (possibly anonymous) BlockBackend. + +Because qdev names could be conflicting with BlockBackend and node +names, introduce a -d option to explicitly address a device. If the +option is not given, a BlockBackend or a node is addressed. + +Signed-off-by: Kevin Wolf +(cherry picked from commit 89b6fc45614bb45dcd58f1590415afe5c2791abd) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + hmp-commands.hx | 8 +++++--- + monitor/hmp-cmds.c | 28 ++++++++++++++++++---------- + 2 files changed, 23 insertions(+), 13 deletions(-) + +diff --git a/hmp-commands.hx b/hmp-commands.hx +index cfcc044..dc23185 100644 +--- a/hmp-commands.hx ++++ b/hmp-commands.hx +@@ -1875,9 +1875,11 @@ ETEXI + + { + .name = "qemu-io", +- .args_type = "device:B,command:s", +- .params = "[device] \"[command]\"", +- .help = "run a qemu-io command on a block device", ++ .args_type = "qdev:-d,device:B,command:s", ++ .params = "[-d] [device] \"[command]\"", ++ .help = "run a qemu-io command on a block device\n\t\t\t" ++ "-d: [device] is a device ID rather than a " ++ "drive ID or node name", + .cmd = hmp_qemu_io, + }, + +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index b2551c1..5f8941d 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -2468,23 +2468,31 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) + { + BlockBackend *blk; + BlockBackend *local_blk = NULL; ++ bool qdev = qdict_get_try_bool(qdict, "qdev", false); + const char* device = qdict_get_str(qdict, "device"); + const char* command = qdict_get_str(qdict, "command"); + Error *err = NULL; + int ret; + +- blk = blk_by_name(device); +- if (!blk) { +- BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err); +- if (bs) { +- blk = local_blk = blk_new(bdrv_get_aio_context(bs), +- 0, BLK_PERM_ALL); +- ret = blk_insert_bs(blk, bs, &err); +- if (ret < 0) { ++ if (qdev) { ++ blk = blk_by_qdev_id(device, &err); ++ if (!blk) { ++ goto fail; ++ } ++ } else { ++ blk = blk_by_name(device); ++ if (!blk) { ++ BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err); ++ if (bs) { ++ blk = local_blk = blk_new(bdrv_get_aio_context(bs), ++ 0, BLK_PERM_ALL); ++ ret = blk_insert_bs(blk, bs, &err); ++ if (ret < 0) { ++ goto fail; ++ } ++ } else { + goto fail; + } +- } else { +- goto fail; + } + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch b/SOURCES/kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch new file mode 100644 index 0000000..0f0f126 --- /dev/null +++ b/SOURCES/kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch @@ -0,0 +1,262 @@ +From e6c3fbfc82863180007569cf2a9132c28a47bf1f Mon Sep 17 00:00:00 2001 +From: "Daniel P. Berrange" +Date: Mon, 20 Jan 2020 16:13:08 +0000 +Subject: [PATCH 01/18] hw/smbios: set new default SMBIOS fields for Windows + driver support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrange +Message-id: <20200120161308.584989-2-berrange@redhat.com> +Patchwork-id: 93422 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] hw/smbios: set new default SMBIOS fields for Windows driver support +Bugzilla: 1782529 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Igor Mammedov +RH-Acked-by: Laszlo Ersek + +For Windows driver support, we have to follow this doc in order to +enable Windows to automatically determine the right drivers to install +for a given guest / host combination: + + https://docs.microsoft.com/en-us/windows-hardware/drivers/install/specifying-hardware-ids-for-a-computer + +Out of the choices available, it was decided that the Windows drivers +will be written to expect use of the scheme documented as "HardwareID-6" +against Windows 10. This uses SMBIOS System (Type 1) and Base Board +(Type 2) tables and will match on + + System Manufacturer = Red Hat + System SKU Number = 8.2.0 + Baseboard Manufacturer = Red Hat + Baseboard Product = RHEL-AV + +The new SMBIOS fields will be tied to machine type and only reported for +pc-q35-8.2.0 machine and later. + +The old SMBIOS fields, previously reported by all machines were: + + System Manufacturer: Red Hat + System Product Name: KVM + System Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) + System Family: Red Hat Enterprise Linux + Baseboard Manufacturer: Red Hat + Baseboard Product Name: KVM + Baseboard Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) + Chassis Manufacturer: Red Hat + Chassis Product Name: KVM + Chassis Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) + Processor Manufacturer: Red Hat + Processor Product Name: KVM + Processor Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) + +This information will continue to be reported for all machines, except +where it conflicts with the requirement of the new SMBIOS data. IOW, +the "Baseboard Product Name" will change to "RHEL-AV" for pc-q35-8.2.0 +machine types and later. + +Management applications MUST NEVER override the 4 new SMBIOS fields that +are used for Windows driver matching, with differing values. Aside from +this, they are free to override any other field, including those from +the old SMBIOS field data. + +In particular if a management application wants to report its own +product name and version, it is recommended to use "System product" +and "System version" as identifying fields, as these avoid a clash with +the new SMBIOS fields used for Windows drivers. + +Note that until now the Baseboard (type 2) table has only been generated +by QEMU if explicitly asked for on the CLI. This patch makes it always +present for new machine types. + +Signed-off-by: Daniel P. Berrangé +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/virt.c | 2 +- + hw/i386/pc_piix.c | 2 ++ + hw/i386/pc_q35.c | 8 ++++++++ + hw/smbios/smbios.c | 45 +++++++++++++++++++++++++++++++++++++++++--- + include/hw/firmware/smbios.h | 5 ++++- + include/hw/i386/pc.h | 3 +++ + 6 files changed, 60 insertions(+), 5 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index d30d38c..2dcf6e7 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1423,7 +1423,7 @@ static void virt_build_smbios(VirtMachineState *vms) + + smbios_set_defaults("QEMU", product, + vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, +- true, SMBIOS_ENTRY_POINT_30); ++ true, NULL, NULL, SMBIOS_ENTRY_POINT_30); + + smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len, + &smbios_anchor, &smbios_anchor_len); +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index bd7fdb9..2ac94d5 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -177,6 +177,8 @@ static void pc_init1(MachineState *machine, + smbios_set_defaults("Red Hat", "KVM", + mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, + SMBIOS_ENTRY_POINT_21); + } + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 7531d8e..e975643 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -200,6 +200,8 @@ static void pc_q35_init(MachineState *machine) + smbios_set_defaults("Red Hat", "KVM", + mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, + SMBIOS_ENTRY_POINT_21); + } + +@@ -565,8 +567,11 @@ static void pc_q35_init_rhel820(MachineState *machine) + + static void pc_q35_machine_rhel820_options(MachineClass *m) + { ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_machine_rhel_options(m); + m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.2.0"; + } + + DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820, +@@ -579,9 +584,12 @@ static void pc_q35_init_rhel810(MachineState *machine) + + static void pc_q35_machine_rhel810_options(MachineClass *m) + { ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_machine_rhel820_options(m); + m->desc = "RHEL-8.1.0 PC (Q35 + ICH9, 2009)"; + m->alias = NULL; ++ pcmc->smbios_stream_product = NULL; ++ pcmc->smbios_stream_version = NULL; + compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); + compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); + } +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index e6e9355..d65c149 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -57,6 +57,9 @@ static bool smbios_legacy = true; + static bool smbios_uuid_encoded = true; + /* end: legacy structures & constants for <= 2.0 machines */ + ++/* Set to true for modern Windows 10 HardwareID-6 compat */ ++static bool smbios_type2_required; ++ + + uint8_t *smbios_tables; + size_t smbios_tables_len; +@@ -532,7 +535,7 @@ static void smbios_build_type_1_table(void) + + static void smbios_build_type_2_table(void) + { +- SMBIOS_BUILD_TABLE_PRE(2, 0x200, false); /* optional */ ++ SMBIOS_BUILD_TABLE_PRE(2, 0x200, smbios_type2_required); + + SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); + SMBIOS_TABLE_SET_STR(2, product_str, type2.product); +@@ -753,7 +756,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) + + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, +- bool uuid_encoded, SmbiosEntryPointType ep_type) ++ bool uuid_encoded, ++ const char *stream_product, ++ const char *stream_version, ++ SmbiosEntryPointType ep_type) + { + smbios_have_defaults = true; + smbios_legacy = legacy_mode; +@@ -774,12 +780,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + g_free(smbios_entries); + } + ++ /* ++ * If @stream_product & @stream_version are non-NULL, then ++ * we're following rules for new Windows driver support. ++ * The data we have to report is defined in this doc: ++ * ++ * https://docs.microsoft.com/en-us/windows-hardware/drivers/install/specifying-hardware-ids-for-a-computer ++ * ++ * The Windows drivers are written to expect use of the ++ * scheme documented as "HardwareID-6" against Windows 10, ++ * which uses SMBIOS System (Type 1) and Base Board (Type 2) ++ * tables and will match on ++ * ++ * System Manufacturer = Red Hat (@manufacturer) ++ * System SKU Number = 8.2.0 (@stream_version) ++ * Baseboard Manufacturer = Red Hat (@manufacturer) ++ * Baseboard Product = RHEL-AV (@stream_product) ++ * ++ * NB, SKU must be changed with each RHEL-AV release ++ * ++ * Other fields can be freely used by applications using ++ * QEMU. For example apps can use the "System product" ++ * and "System version" to identify themselves. ++ * ++ * We get 'System Manufacturer' and 'Baseboard Manufacturer' ++ */ + SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type1.product, product); + SMBIOS_SET_DEFAULT(type1.version, version); + SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); ++ if (stream_version != NULL) { ++ SMBIOS_SET_DEFAULT(type1.sku, stream_version); ++ } + SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); +- SMBIOS_SET_DEFAULT(type2.product, product); ++ if (stream_product != NULL) { ++ SMBIOS_SET_DEFAULT(type2.product, stream_product); ++ smbios_type2_required = true; ++ } else { ++ SMBIOS_SET_DEFAULT(type2.product, product); ++ } + SMBIOS_SET_DEFAULT(type2.version, version); + SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type3.version, version); +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 02a0ced..67e38a1 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -267,7 +267,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); + void smbios_set_cpuid(uint32_t version, uint32_t features); + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, +- bool uuid_encoded, SmbiosEntryPointType ep_type); ++ bool uuid_encoded, ++ const char *stream_product, ++ const char *stream_version, ++ SmbiosEntryPointType ep_type); + uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length); + void smbios_get_tables(MachineState *ms, + const struct smbios_phys_mem_area *mem_array, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 2e362c8..b9f29ba 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -109,6 +109,9 @@ typedef struct PCMachineClass { + bool smbios_defaults; + bool smbios_legacy_mode; + bool smbios_uuid_encoded; ++ /* New fields needed for Windows HardwareID-6 matching */ ++ const char *smbios_stream_product; ++ const char *smbios_stream_version; + + /* RAM / address space compat: */ + bool gigabyte_align; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-i386-Remove-cpu64-rhel6-CPU-model.patch b/SOURCES/kvm-i386-Remove-cpu64-rhel6-CPU-model.patch new file mode 100644 index 0000000..5d62ace --- /dev/null +++ b/SOURCES/kvm-i386-Remove-cpu64-rhel6-CPU-model.patch @@ -0,0 +1,77 @@ +From 4543a3c19816bd07f27eb900f20ae609df03703c Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Mon, 23 Dec 2019 21:10:31 +0000 +Subject: [PATCH 1/2] i386: Remove cpu64-rhel6 CPU model + +RH-Author: Eduardo Habkost +Message-id: <20191223211031.26503-1-ehabkost@redhat.com> +Patchwork-id: 93213 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] i386: Remove cpu64-rhel6 CPU model +Bugzilla: 1741345 +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Laszlo Ersek + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1741345 +BRANCH: rhel-av-8.2.0 +Upstream: not applicable +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=25525975 + +We don't provide rhel6 machine types anymore, so we don't need to +provide compatibility with RHEl6. cpu64-rhel6 was documented as +deprecated and scheduled for removal in 8.2, so now it's time to +remove it. + +Signed-off-by: Eduardo Habkost +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 26 +------------------------- + 1 file changed, 1 insertion(+), 25 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 790db77..6dce6f2 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1829,12 +1829,7 @@ static CPUCaches epyc_cache_info = { + + static X86CPUDefinition builtin_x86_defs[] = { + { +- /* qemu64 is the default CPU model for all *-rhel7.* machine-types. +- * The default on RHEL-6 was cpu64-rhel6. +- * libvirt assumes that qemu64 is the default for _all_ machine-types, +- * so we should try to keep qemu64 and cpu64-rhel6 as similar as +- * possible. +- */ ++ /* qemu64 is the default CPU model for all machine-types */ + .name = "qemu64", + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, +@@ -2135,25 +2130,6 @@ static X86CPUDefinition builtin_x86_defs[] = { + .model_id = "Intel(R) Atom(TM) CPU N270 @ 1.60GHz", + }, + { +- .name = "cpu64-rhel6", +- .level = 4, +- .vendor = CPUID_VENDOR_AMD, +- .family = 6, +- .model = 13, +- .stepping = 3, +- .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | +- CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | +- CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | +- CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | +- CPUID_PSE | CPUID_DE | CPUID_FP87, +- .features[FEAT_1_ECX] = CPUID_EXT_CX16 | CPUID_EXT_SSE3, +- .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, +- .features[FEAT_8000_0001_ECX] = CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | +- CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, +- .xlevel = 0x8000000A, +- .model_id = "QEMU Virtual CPU version (cpu64-rhel6)", +- }, +- { + .name = "Conroe", + .level = 10, + .vendor = CPUID_VENDOR_INTEL, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-i386-Resolve-CPU-models-to-v1-by-default.patch b/SOURCES/kvm-i386-Resolve-CPU-models-to-v1-by-default.patch new file mode 100644 index 0000000..1027341 --- /dev/null +++ b/SOURCES/kvm-i386-Resolve-CPU-models-to-v1-by-default.patch @@ -0,0 +1,95 @@ +From ccda4494b0ea4b81b6b0c3e539a0bcf7e673c68c Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Thu, 5 Dec 2019 21:56:50 +0000 +Subject: [PATCH 01/18] i386: Resolve CPU models to v1 by default + +RH-Author: Eduardo Habkost +Message-id: <20191205225650.772600-2-ehabkost@redhat.com> +Patchwork-id: 92907 +O-Subject: [RHEL-AV-8.1.1 qemu-kvm PATCH 1/1] i386: Resolve CPU models to v1 by default +Bugzilla: 1787291 1779078 1779078 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1779078 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=25187823 +Upstream: submitted, Message-Id: <20191205223339.764534-1-ehabkost@redhat.com> + +When using `query-cpu-definitions` using `-machine none`, +QEMU is resolving all CPU models to their latest versions. The +actual CPU model version being used by another machine type (e.g. +`pc-q35-4.0`) might be different. + +In theory, this was OK because the correct CPU model +version is returned when using the correct `-machine` argument. + +Except that in practice, this breaks libvirt expectations: +libvirt always use `-machine none` when checking if a CPU model +is runnable, because runnability is not expected to be affected +when the machine type is changed. + +For example, when running on a Haswell host without TSX, +Haswell-v4 is runnable, but Haswell-v1 is not. On those hosts, +`query-cpu-definitions` says Haswell is runnable if using +`-machine none`, but Haswell is actually not runnable using any +of the `pc-*` machine types (because they resolve Haswell to +Haswell-v1). In other words, we're breaking the "runnability +guarantee" we promised to not break for a few releases (see +qemu-deprecated.texi). + +To address this issue, change the default CPU model version to v1 +on all machine types, so we make `query-cpu-definitions` output +when using `-machine none` match the results when using `pc-*`. +This will change in the future (the plan is to always return the +latest CPU model version if using `-machine none`), but only +after giving libvirt the opportunity to adapt. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1779078 +Signed-off-by: Eduardo Habkost +Signed-off-by: Danilo C. L. de Paula +--- + qemu-deprecated.texi | 7 +++++++ + target/i386/cpu.c | 8 +++++++- + 2 files changed, 14 insertions(+), 1 deletion(-) + +diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi +index 4b4b742..534ebe9 100644 +--- a/qemu-deprecated.texi ++++ b/qemu-deprecated.texi +@@ -374,6 +374,13 @@ guarantees must resolve the CPU model aliases using te + ``alias-of'' field returned by the ``query-cpu-definitions'' QMP + command. + ++While those guarantees are kept, the return value of ++``query-cpu-definitions'' will have existing CPU model aliases ++point to a version that doesn't break runnability guarantees ++(specifically, version 1 of those CPU models). In future QEMU ++versions, aliases will point to newer CPU model versions ++depending on the machine type, so management software must ++resolve CPU model aliases before starting a virtual machine. + + @node Recently removed features + @appendix Recently removed features +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 6dce6f2..863192c 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -3926,7 +3926,13 @@ static PropValue tcg_default_props[] = { + }; + + +-X86CPUVersion default_cpu_version = CPU_VERSION_LATEST; ++/* ++ * We resolve CPU model aliases using -v1 when using "-machine ++ * none", but this is just for compatibility while libvirt isn't ++ * adapted to resolve CPU model versions before creating VMs. ++ * See "Runnability guarantee of CPU models" at * qemu-deprecated.texi. ++ */ ++X86CPUVersion default_cpu_version = 1; + + void x86_cpu_set_default_version(X86CPUVersion version) + { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Add-iothread-cases-to-155.patch b/SOURCES/kvm-iotests-Add-iothread-cases-to-155.patch new file mode 100644 index 0000000..24ac90c --- /dev/null +++ b/SOURCES/kvm-iotests-Add-iothread-cases-to-155.patch @@ -0,0 +1,147 @@ +From 2366cd9066e79d6c93a3a28710aea987b2c8f454 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:38 +0000 +Subject: [PATCH 18/20] iotests: Add iothread cases to 155 + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-13-kwolf@redhat.com> +Patchwork-id: 94289 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 12/13] iotests: Add iothread cases to 155 +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +This patch adds test cases for attaching the backing chain to a mirror +job target right before finalising the job, where the image is in a +non-mainloop AioContext (i.e. the backing chain needs to be moved to the +AioContext of the mirror target). + +This requires switching the test case from virtio-blk to virtio-scsi +because virtio-blk only actually starts using the iothreads when the +guest driver initialises the device (which never happens in a test case +without a guest OS). virtio-scsi always keeps its block nodes in the +AioContext of the the requested iothread without guest interaction. + +Signed-off-by: Kevin Wolf +Message-Id: <20200310113831.27293-7-kwolf@redhat.com> +Reviewed-by: Peter Krempa +Signed-off-by: Kevin Wolf +(cherry picked from commit 6a5f6403a11307794ec79d277a065c137cfc12b2) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/155 | 32 +++++++++++++++++++++++--------- + tests/qemu-iotests/155.out | 4 ++-- + 2 files changed, 25 insertions(+), 11 deletions(-) + +diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155 +index 3053e50..b552d1f 100755 +--- a/tests/qemu-iotests/155 ++++ b/tests/qemu-iotests/155 +@@ -49,11 +49,14 @@ target_img = os.path.join(iotests.test_dir, 'target.' + iotests.imgfmt) + # chain opened right away. If False, blockdev-add + # opens it without a backing file and job completion + # is supposed to open the backing chain. ++# use_iothread: If True, an iothread is configured for the virtio-blk device ++# that uses the image being mirrored + + class BaseClass(iotests.QMPTestCase): + target_blockdev_backing = None + target_real_backing = None + target_open_with_backing = True ++ use_iothread = False + + def setUp(self): + qemu_img('create', '-f', iotests.imgfmt, back0_img, '1440K') +@@ -69,7 +72,16 @@ class BaseClass(iotests.QMPTestCase): + 'file': {'driver': 'file', + 'filename': source_img}} + self.vm.add_blockdev(self.vm.qmp_to_opts(blockdev)) +- self.vm.add_device('virtio-blk,id=qdev0,drive=source') ++ ++ if self.use_iothread: ++ self.vm.add_object('iothread,id=iothread0') ++ iothread = ",iothread=iothread0" ++ else: ++ iothread = "" ++ ++ self.vm.add_device('virtio-scsi%s' % iothread) ++ self.vm.add_device('scsi-hd,id=qdev0,drive=source') ++ + self.vm.launch() + + self.assertIntactSourceBackingChain() +@@ -182,24 +194,21 @@ class MirrorBaseClass(BaseClass): + def testFull(self): + self.runMirror('full') + +- node = self.findBlockNode('target', +- '/machine/peripheral/qdev0/virtio-backend') ++ node = self.findBlockNode('target', 'qdev0') + self.assertCorrectBackingImage(node, None) + self.assertIntactSourceBackingChain() + + def testTop(self): + self.runMirror('top') + +- node = self.findBlockNode('target', +- '/machine/peripheral/qdev0/virtio-backend') ++ node = self.findBlockNode('target', 'qdev0') + self.assertCorrectBackingImage(node, back2_img) + self.assertIntactSourceBackingChain() + + def testNone(self): + self.runMirror('none') + +- node = self.findBlockNode('target', +- '/machine/peripheral/qdev0/virtio-backend') ++ node = self.findBlockNode('target', 'qdev0') + self.assertCorrectBackingImage(node, source_img) + self.assertIntactSourceBackingChain() + +@@ -252,6 +261,9 @@ class TestBlockdevMirrorReopen(MirrorBaseClass): + backing="backing") + self.assert_qmp(result, 'return', {}) + ++class TestBlockdevMirrorReopenIothread(TestBlockdevMirrorReopen): ++ use_iothread = True ++ + # Attach the backing chain only during completion, with blockdev-snapshot + class TestBlockdevMirrorSnapshot(MirrorBaseClass): + cmd = 'blockdev-mirror' +@@ -268,6 +280,9 @@ class TestBlockdevMirrorSnapshot(MirrorBaseClass): + overlay="target") + self.assert_qmp(result, 'return', {}) + ++class TestBlockdevMirrorSnapshotIothread(TestBlockdevMirrorSnapshot): ++ use_iothread = True ++ + class TestCommit(BaseClass): + existing = False + +@@ -283,8 +298,7 @@ class TestCommit(BaseClass): + + self.vm.event_wait('BLOCK_JOB_COMPLETED') + +- node = self.findBlockNode(None, +- '/machine/peripheral/qdev0/virtio-backend') ++ node = self.findBlockNode(None, 'qdev0') + self.assert_qmp(node, 'image' + '/backing-image' * 0 + '/filename', + back1_img) + self.assert_qmp(node, 'image' + '/backing-image' * 1 + '/filename', +diff --git a/tests/qemu-iotests/155.out b/tests/qemu-iotests/155.out +index 4fd1c2d..ed714d5 100644 +--- a/tests/qemu-iotests/155.out ++++ b/tests/qemu-iotests/155.out +@@ -1,5 +1,5 @@ +-......................... ++............................... + ---------------------------------------------------------------------- +-Ran 25 tests ++Ran 31 tests + + OK +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Add-test-for-image-creation-fallback.patch b/SOURCES/kvm-iotests-Add-test-for-image-creation-fallback.patch new file mode 100644 index 0000000..a8ea8f7 --- /dev/null +++ b/SOURCES/kvm-iotests-Add-test-for-image-creation-fallback.patch @@ -0,0 +1,138 @@ +From 55f3a02574da226299d99bd74d12dd91b0f228dc Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 11 Mar 2020 10:51:46 +0000 +Subject: [PATCH 05/20] iotests: Add test for image creation fallback + +RH-Author: Maxim Levitsky +Message-id: <20200311105147.13208-6-mlevitsk@redhat.com> +Patchwork-id: 94228 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 5/6] iotests: Add test for image creation fallback +Bugzilla: 1640894 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz + +From: Max Reitz + +Signed-off-by: Max Reitz +Message-Id: <20200122164532.178040-6-mreitz@redhat.com> +Reviewed-by: Eric Blake +Reviewed-by: Maxim Levitsky +[mreitz: Added a note that NBD does not support resizing, which is why + the second case is expected to fail] +Signed-off-by: Max Reitz +(cherry picked from commit 4dddeac115c5a2c5f74731fda0afd031a0b45490) +Signed-off-by: Maxim Levitsky + +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/259 | 62 ++++++++++++++++++++++++++++++++++++++++++++++ + tests/qemu-iotests/259.out | 14 +++++++++++ + tests/qemu-iotests/group | 1 + + 3 files changed, 77 insertions(+) + create mode 100755 tests/qemu-iotests/259 + create mode 100644 tests/qemu-iotests/259.out + +diff --git a/tests/qemu-iotests/259 b/tests/qemu-iotests/259 +new file mode 100755 +index 0000000..62e29af +--- /dev/null ++++ b/tests/qemu-iotests/259 +@@ -0,0 +1,62 @@ ++#!/usr/bin/env bash ++# ++# Test generic image creation fallback (by using NBD) ++# ++# Copyright (C) 2019 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++# creator ++owner=mreitz@redhat.com ++ ++seq=$(basename $0) ++echo "QA output created by $seq" ++ ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ _cleanup_test_img ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++. ./common.rc ++. ./common.filter ++ ++_supported_fmt raw ++_supported_proto nbd ++_supported_os Linux ++ ++ ++_make_test_img 64M ++ ++echo ++echo '--- Testing creation ---' ++ ++$QEMU_IMG create -f qcow2 "$TEST_IMG" 64M | _filter_img_create ++$QEMU_IMG info "$TEST_IMG" | _filter_img_info ++ ++echo ++echo '--- Testing creation for which the node would need to grow ---' ++ ++# NBD does not support resizing, so this will fail ++$QEMU_IMG create -f qcow2 -o preallocation=metadata "$TEST_IMG" 64M 2>&1 \ ++ | _filter_img_create ++ ++# success, all done ++echo "*** done" ++rm -f $seq.full ++status=0 +diff --git a/tests/qemu-iotests/259.out b/tests/qemu-iotests/259.out +new file mode 100644 +index 0000000..ffed19c +--- /dev/null ++++ b/tests/qemu-iotests/259.out +@@ -0,0 +1,14 @@ ++QA output created by 259 ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 ++ ++--- Testing creation --- ++Formatting 'TEST_DIR/t.IMGFMT', fmt=qcow2 size=67108864 ++image: TEST_DIR/t.IMGFMT ++file format: qcow2 ++virtual size: 64 MiB (67108864 bytes) ++disk size: unavailable ++ ++--- Testing creation for which the node would need to grow --- ++qemu-img: TEST_DIR/t.IMGFMT: Could not resize image: Image format driver does not support resize ++Formatting 'TEST_DIR/t.IMGFMT', fmt=qcow2 size=67108864 preallocation=metadata ++*** done +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index c0e8197..e47cbfc 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -273,6 +273,7 @@ + 256 rw quick + 257 rw + 258 rw quick ++259 rw auto quick + 260 rw quick + 261 rw + 262 rw quick migration +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Create-VM.blockdev_create.patch b/SOURCES/kvm-iotests-Create-VM.blockdev_create.patch new file mode 100644 index 0000000..805b31a --- /dev/null +++ b/SOURCES/kvm-iotests-Create-VM.blockdev_create.patch @@ -0,0 +1,59 @@ +From 05fedde1374abb180cd2b51457385d8128aa7fe4 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 7 Feb 2020 11:24:00 +0000 +Subject: [PATCH 03/18] iotests: Create VM.blockdev_create() + +RH-Author: Kevin Wolf +Message-id: <20200207112404.25198-3-kwolf@redhat.com> +Patchwork-id: 93748 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 2/6] iotests: Create VM.blockdev_create() +Bugzilla: 1781637 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +We have several almost identical copies of a blockdev_create() function +in different test cases. Time to create one unified function in +iotests.py. + +To keep the diff managable, this patch only creates the function and +follow-up patches will convert the individual test cases. + +Signed-off-by: Kevin Wolf +(cherry picked from commit e9dbd1cae86f7cb6f8e470e1485aeb0c6e23ae64) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/iotests.py | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index 3cff671..5741efb 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -638,6 +638,22 @@ class VM(qtest.QEMUQtestMachine): + elif status == 'null': + return error + ++ # Returns None on success, and an error string on failure ++ def blockdev_create(self, options, job_id='job0', filters=None): ++ if filters is None: ++ filters = [filter_qmp_testfiles] ++ result = self.qmp_log('blockdev-create', filters=filters, ++ job_id=job_id, options=options) ++ ++ if 'return' in result: ++ assert result['return'] == {} ++ job_result = self.run_job(job_id) ++ else: ++ job_result = result['error'] ++ ++ log("") ++ return job_result ++ + def enable_migration_events(self, name): + log('Enabling migration QMP events on %s...' % name) + log(self.qmp('migrate-set-capabilities', capabilities=[ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Fix-run_job-with-use_log-False.patch b/SOURCES/kvm-iotests-Fix-run_job-with-use_log-False.patch new file mode 100644 index 0000000..b105fc2 --- /dev/null +++ b/SOURCES/kvm-iotests-Fix-run_job-with-use_log-False.patch @@ -0,0 +1,47 @@ +From bb7b968a02c97564596b73d8d080cd745d96ed6b Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:35 +0000 +Subject: [PATCH 15/20] iotests: Fix run_job() with use_log=False + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-10-kwolf@redhat.com> +Patchwork-id: 94284 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 09/13] iotests: Fix run_job() with use_log=False +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +The 'job-complete' QMP command should be run with qmp() rather than +qmp_log() if use_log=False is passed. + +Signed-off-by: Kevin Wolf +Message-Id: <20200310113831.27293-4-kwolf@redhat.com> +Reviewed-by: Peter Krempa +Signed-off-by: Kevin Wolf +(cherry picked from commit b31b532122ec6f68d17168449c034d2197bf96ec) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/iotests.py | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index 0c55f7b..46f880c 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -618,7 +618,10 @@ class VM(qtest.QEMUQtestMachine): + if use_log: + log('Job failed: %s' % (j['error'])) + elif status == 'ready': +- self.qmp_log('job-complete', id=job) ++ if use_log: ++ self.qmp_log('job-complete', id=job) ++ else: ++ self.qmp('job-complete', id=job) + elif status == 'pending' and not auto_finalize: + if pre_finalize: + pre_finalize() +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch b/SOURCES/kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch new file mode 100644 index 0000000..17e4a41 --- /dev/null +++ b/SOURCES/kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch @@ -0,0 +1,122 @@ +From 7e23b64dc20b64ca6fa887cd06cc5e52374f6268 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:30 +0000 +Subject: [PATCH 10/20] iotests: Refactor blockdev-reopen test for iothreads + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-5-kwolf@redhat.com> +Patchwork-id: 94281 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 04/13] iotests: Refactor blockdev-reopen test for iothreads +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +We'll want to test more than one successful case in the future, so +prepare the test for that by a refactoring that runs each scenario in a +separate VM. + +test_iothreads_switch_{backing,overlay} currently produce errors, but +these are cases that should actually work, by switching either the +backing file node or the overlay node to the AioContext of the other +node. + +Signed-off-by: Kevin Wolf +Tested-by: Peter Krempa +Message-Id: <20200306141413.30705-2-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 97518e11c3d902a32386d33797044f6b79bccc6f) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/245 | 47 ++++++++++++++++++++++++++++++++++++---------- + tests/qemu-iotests/245.out | 4 ++-- + 2 files changed, 39 insertions(+), 12 deletions(-) + +diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 +index e66a23c..f69c2fa 100644 +--- a/tests/qemu-iotests/245 ++++ b/tests/qemu-iotests/245 +@@ -968,8 +968,7 @@ class TestBlockdevReopen(iotests.QMPTestCase): + self.assertEqual(self.get_node('hd1'), None) + self.assert_qmp(self.get_node('hd2'), 'ro', True) + +- # We don't allow setting a backing file that uses a different AioContext +- def test_iothreads(self): ++ def run_test_iothreads(self, iothread_a, iothread_b, errmsg = None): + opts = hd_opts(0) + result = self.vm.qmp('blockdev-add', conv_keys = False, **opts) + self.assert_qmp(result, 'return', {}) +@@ -984,20 +983,48 @@ class TestBlockdevReopen(iotests.QMPTestCase): + result = self.vm.qmp('object-add', qom_type='iothread', id='iothread1') + self.assert_qmp(result, 'return', {}) + +- result = self.vm.qmp('x-blockdev-set-iothread', node_name='hd0', iothread='iothread0') ++ result = self.vm.qmp('device_add', driver='virtio-scsi', id='scsi0', ++ iothread=iothread_a) + self.assert_qmp(result, 'return', {}) + +- self.reopen(opts, {'backing': 'hd2'}, "Cannot use a new backing file with a different AioContext") +- +- result = self.vm.qmp('x-blockdev-set-iothread', node_name='hd2', iothread='iothread1') ++ result = self.vm.qmp('device_add', driver='virtio-scsi', id='scsi1', ++ iothread=iothread_b) + self.assert_qmp(result, 'return', {}) + +- self.reopen(opts, {'backing': 'hd2'}, "Cannot use a new backing file with a different AioContext") ++ if iothread_a: ++ result = self.vm.qmp('device_add', driver='scsi-hd', drive='hd0', ++ share_rw=True, bus="scsi0.0") ++ self.assert_qmp(result, 'return', {}) + +- result = self.vm.qmp('x-blockdev-set-iothread', node_name='hd2', iothread='iothread0') +- self.assert_qmp(result, 'return', {}) ++ if iothread_b: ++ result = self.vm.qmp('device_add', driver='scsi-hd', drive='hd2', ++ share_rw=True, bus="scsi1.0") ++ self.assert_qmp(result, 'return', {}) + +- self.reopen(opts, {'backing': 'hd2'}) ++ # Attaching the backing file may or may not work ++ self.reopen(opts, {'backing': 'hd2'}, errmsg) ++ ++ # But removing the backing file should always work ++ self.reopen(opts, {'backing': None}) ++ ++ self.vm.shutdown() ++ ++ # We don't allow setting a backing file that uses a different AioContext if ++ # neither of them can switch to the other AioContext ++ def test_iothreads_error(self): ++ self.run_test_iothreads('iothread0', 'iothread1', ++ "Cannot use a new backing file with a different AioContext") ++ ++ def test_iothreads_compatible_users(self): ++ self.run_test_iothreads('iothread0', 'iothread0') ++ ++ def test_iothreads_switch_backing(self): ++ self.run_test_iothreads('iothread0', None, ++ "Cannot use a new backing file with a different AioContext") ++ ++ def test_iothreads_switch_overlay(self): ++ self.run_test_iothreads(None, 'iothread0', ++ "Cannot use a new backing file with a different AioContext") + + if __name__ == '__main__': + iotests.main(supported_fmts=["qcow2"], +diff --git a/tests/qemu-iotests/245.out b/tests/qemu-iotests/245.out +index a19de52..682b933 100644 +--- a/tests/qemu-iotests/245.out ++++ b/tests/qemu-iotests/245.out +@@ -1,6 +1,6 @@ +-.................. ++..................... + ---------------------------------------------------------------------- +-Ran 18 tests ++Ran 21 tests + + OK + {"execute": "job-finalize", "arguments": {"id": "commit0"}} +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Support-job-complete-in-run_job.patch b/SOURCES/kvm-iotests-Support-job-complete-in-run_job.patch new file mode 100644 index 0000000..08971a0 --- /dev/null +++ b/SOURCES/kvm-iotests-Support-job-complete-in-run_job.patch @@ -0,0 +1,46 @@ +From a3778aef0be61dead835af39073a62bbf72c8e20 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 7 Feb 2020 11:23:59 +0000 +Subject: [PATCH 02/18] iotests: Support job-complete in run_job() + +RH-Author: Kevin Wolf +Message-id: <20200207112404.25198-2-kwolf@redhat.com> +Patchwork-id: 93746 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 1/6] iotests: Support job-complete in run_job() +Bugzilla: 1781637 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +Automatically complete jobs that have a 'ready' state and need an +explicit job-complete. Without this, run_job() would hang for such +jobs. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Alberto Garcia +Reviewed-by: Stefan Hajnoczi +(cherry picked from commit 4688c4e32ec76004676470f11734478799673d6d) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/iotests.py | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index df07089..3cff671 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -617,6 +617,8 @@ class VM(qtest.QEMUQtestMachine): + error = j['error'] + if use_log: + log('Job failed: %s' % (j['error'])) ++ elif status == 'ready': ++ self.qmp_log('job-complete', id=job) + elif status == 'pending' and not auto_finalize: + if pre_finalize: + pre_finalize() +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Test-external-snapshot-with-VM-state.patch b/SOURCES/kvm-iotests-Test-external-snapshot-with-VM-state.patch new file mode 100644 index 0000000..6fcb2f6 --- /dev/null +++ b/SOURCES/kvm-iotests-Test-external-snapshot-with-VM-state.patch @@ -0,0 +1,189 @@ +From 38b0cff9703fc740c30f5874973ac1be88f94d9f Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 7 Feb 2020 11:24:03 +0000 +Subject: [PATCH 06/18] iotests: Test external snapshot with VM state + +RH-Author: Kevin Wolf +Message-id: <20200207112404.25198-6-kwolf@redhat.com> +Patchwork-id: 93752 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 5/6] iotests: Test external snapshot with VM state +Bugzilla: 1781637 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +This tests creating an external snapshot with VM state (which results in +an active overlay over an inactive backing file, which is also the root +node of an inactive BlockBackend), re-activating the images and +performing some operations to test that the re-activation worked as +intended. + +Signed-off-by: Kevin Wolf +(cherry picked from commit f62f08ab7a9d902da70078992248ec5c98f652ad) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/280 | 83 ++++++++++++++++++++++++++++++++++++++++++++++ + tests/qemu-iotests/280.out | 50 ++++++++++++++++++++++++++++ + tests/qemu-iotests/group | 1 + + 3 files changed, 134 insertions(+) + create mode 100755 tests/qemu-iotests/280 + create mode 100644 tests/qemu-iotests/280.out + +diff --git a/tests/qemu-iotests/280 b/tests/qemu-iotests/280 +new file mode 100755 +index 0000000..0b1fa8e +--- /dev/null ++++ b/tests/qemu-iotests/280 +@@ -0,0 +1,83 @@ ++#!/usr/bin/env python ++# ++# Copyright (C) 2019 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++# Creator/Owner: Kevin Wolf ++# ++# Test migration to file for taking an external snapshot with VM state. ++ ++import iotests ++import os ++ ++iotests.verify_image_format(supported_fmts=['qcow2']) ++iotests.verify_protocol(supported=['file']) ++iotests.verify_platform(['linux']) ++ ++with iotests.FilePath('base') as base_path , \ ++ iotests.FilePath('top') as top_path, \ ++ iotests.VM() as vm: ++ ++ iotests.qemu_img_log('create', '-f', iotests.imgfmt, base_path, '64M') ++ ++ iotests.log('=== Launch VM ===') ++ vm.add_object('iothread,id=iothread0') ++ vm.add_blockdev('file,filename=%s,node-name=base-file' % (base_path)) ++ vm.add_blockdev('%s,file=base-file,node-name=base-fmt' % (iotests.imgfmt)) ++ vm.add_device('virtio-blk,drive=base-fmt,iothread=iothread0,id=vda') ++ vm.launch() ++ ++ vm.enable_migration_events('VM') ++ ++ iotests.log('\n=== Migrate to file ===') ++ vm.qmp_log('migrate', uri='exec:cat > /dev/null') ++ ++ with iotests.Timeout(3, 'Migration does not complete'): ++ vm.wait_migration() ++ ++ iotests.log('\nVM is now stopped:') ++ iotests.log(vm.qmp('query-migrate')['return']['status']) ++ vm.qmp_log('query-status') ++ ++ iotests.log('\n=== Create a snapshot of the disk image ===') ++ vm.blockdev_create({ ++ 'driver': 'file', ++ 'filename': top_path, ++ 'size': 0, ++ }) ++ vm.qmp_log('blockdev-add', node_name='top-file', ++ driver='file', filename=top_path, ++ filters=[iotests.filter_qmp_testfiles]) ++ ++ vm.blockdev_create({ ++ 'driver': iotests.imgfmt, ++ 'file': 'top-file', ++ 'size': 1024 * 1024, ++ }) ++ vm.qmp_log('blockdev-add', node_name='top-fmt', ++ driver=iotests.imgfmt, file='top-file') ++ ++ vm.qmp_log('blockdev-snapshot', node='base-fmt', overlay='top-fmt') ++ ++ iotests.log('\n=== Resume the VM and simulate a write request ===') ++ vm.qmp_log('cont') ++ iotests.log(vm.hmp_qemu_io('-d vda/virtio-backend', 'write 4k 4k')) ++ ++ iotests.log('\n=== Commit it to the backing file ===') ++ result = vm.qmp_log('block-commit', job_id='job0', auto_dismiss=False, ++ device='top-fmt', top_node='top-fmt', ++ filters=[iotests.filter_qmp_testfiles]) ++ if 'return' in result: ++ vm.run_job('job0') +diff --git a/tests/qemu-iotests/280.out b/tests/qemu-iotests/280.out +new file mode 100644 +index 0000000..5d382fa +--- /dev/null ++++ b/tests/qemu-iotests/280.out +@@ -0,0 +1,50 @@ ++Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=67108864 cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++=== Launch VM === ++Enabling migration QMP events on VM... ++{"return": {}} ++ ++=== Migrate to file === ++{"execute": "migrate", "arguments": {"uri": "exec:cat > /dev/null"}} ++{"return": {}} ++{"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++ ++VM is now stopped: ++completed ++{"execute": "query-status", "arguments": {}} ++{"return": {"running": false, "singlestep": false, "status": "postmigrate"}} ++ ++=== Create a snapshot of the disk image === ++{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "file", "filename": "TEST_DIR/PID-top", "size": 0}}} ++{"return": {}} ++{"execute": "job-dismiss", "arguments": {"id": "job0"}} ++{"return": {}} ++ ++{"execute": "blockdev-add", "arguments": {"driver": "file", "filename": "TEST_DIR/PID-top", "node-name": "top-file"}} ++{"return": {}} ++{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "qcow2", "file": "top-file", "size": 1048576}}} ++{"return": {}} ++{"execute": "job-dismiss", "arguments": {"id": "job0"}} ++{"return": {}} ++ ++{"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": "top-file", "node-name": "top-fmt"}} ++{"return": {}} ++{"execute": "blockdev-snapshot", "arguments": {"node": "base-fmt", "overlay": "top-fmt"}} ++{"return": {}} ++ ++=== Resume the VM and simulate a write request === ++{"execute": "cont", "arguments": {}} ++{"return": {}} ++{"return": ""} ++ ++=== Commit it to the backing file === ++{"execute": "block-commit", "arguments": {"auto-dismiss": false, "device": "top-fmt", "job-id": "job0", "top-node": "top-fmt"}} ++{"return": {}} ++{"execute": "job-complete", "arguments": {"id": "job0"}} ++{"return": {}} ++{"data": {"device": "job0", "len": 65536, "offset": 65536, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"device": "job0", "len": 65536, "offset": 65536, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"execute": "job-dismiss", "arguments": {"id": "job0"}} ++{"return": {}} +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index 06cc734..01301cd 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -286,3 +286,4 @@ + 272 rw + 273 backing quick + 277 rw quick ++280 rw migration quick +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch b/SOURCES/kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch new file mode 100644 index 0000000..b09439b --- /dev/null +++ b/SOURCES/kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch @@ -0,0 +1,322 @@ +From 6b9a6ba9ed753ad7aa714b35de938ebeeb4fa6cb Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 10:27:49 +0000 +Subject: [PATCH 16/18] iotests: Test handling of AioContexts with some + blockdev actions + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-10-slp@redhat.com> +Patchwork-id: 93762 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 9/9] iotests: Test handling of AioContexts with some blockdev actions +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +Includes the following tests: + + - Adding a dirty bitmap. + * RHBZ: 1782175 + + - Starting a drive-mirror to an NBD-backed target. + * RHBZ: 1746217, 1773517 + + - Aborting an external snapshot transaction. + * RHBZ: 1779036 + + - Aborting a blockdev backup transaction. + * RHBZ: 1782111 + +For each one of them, a VM with a number of disks running in an +IOThread AioContext is used. + +Signed-off-by: Sergio Lopez +Signed-off-by: Kevin Wolf +(cherry picked from commit 9b8c59e7610b9c5315ef093d801843dbe8debfac) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/281 | 247 +++++++++++++++++++++++++++++++++++++++++++++ + tests/qemu-iotests/281.out | 5 + + tests/qemu-iotests/group | 1 + + 3 files changed, 253 insertions(+) + create mode 100755 tests/qemu-iotests/281 + create mode 100644 tests/qemu-iotests/281.out + +diff --git a/tests/qemu-iotests/281 b/tests/qemu-iotests/281 +new file mode 100755 +index 0000000..269d583 +--- /dev/null ++++ b/tests/qemu-iotests/281 +@@ -0,0 +1,247 @@ ++#!/usr/bin/env python ++# ++# Test cases for blockdev + IOThread interactions ++# ++# Copyright (C) 2019 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++import os ++import iotests ++from iotests import qemu_img ++ ++image_len = 64 * 1024 * 1024 ++ ++# Test for RHBZ#1782175 ++class TestDirtyBitmapIOThread(iotests.QMPTestCase): ++ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') ++ images = { 'drive0': drive0_img } ++ ++ def setUp(self): ++ for name in self.images: ++ qemu_img('create', '-f', iotests.imgfmt, ++ self.images[name], str(image_len)) ++ ++ self.vm = iotests.VM() ++ self.vm.add_object('iothread,id=iothread0') ++ ++ for name in self.images: ++ self.vm.add_blockdev('driver=file,filename=%s,node-name=file_%s' ++ % (self.images[name], name)) ++ self.vm.add_blockdev('driver=qcow2,file=file_%s,node-name=%s' ++ % (name, name)) ++ ++ self.vm.launch() ++ self.vm.qmp('x-blockdev-set-iothread', ++ node_name='drive0', iothread='iothread0', ++ force=True) ++ ++ def tearDown(self): ++ self.vm.shutdown() ++ for name in self.images: ++ os.remove(self.images[name]) ++ ++ def test_add_dirty_bitmap(self): ++ result = self.vm.qmp( ++ 'block-dirty-bitmap-add', ++ node='drive0', ++ name='bitmap1', ++ persistent=True, ++ ) ++ ++ self.assert_qmp(result, 'return', {}) ++ ++ ++# Test for RHBZ#1746217 & RHBZ#1773517 ++class TestNBDMirrorIOThread(iotests.QMPTestCase): ++ nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') ++ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') ++ mirror_img = os.path.join(iotests.test_dir, 'mirror.img') ++ images = { 'drive0': drive0_img, 'mirror': mirror_img } ++ ++ def setUp(self): ++ for name in self.images: ++ qemu_img('create', '-f', iotests.imgfmt, ++ self.images[name], str(image_len)) ++ ++ self.vm_src = iotests.VM(path_suffix='src') ++ self.vm_src.add_object('iothread,id=iothread0') ++ self.vm_src.add_blockdev('driver=file,filename=%s,node-name=file0' ++ % (self.drive0_img)) ++ self.vm_src.add_blockdev('driver=qcow2,file=file0,node-name=drive0') ++ self.vm_src.launch() ++ self.vm_src.qmp('x-blockdev-set-iothread', ++ node_name='drive0', iothread='iothread0', ++ force=True) ++ ++ self.vm_tgt = iotests.VM(path_suffix='tgt') ++ self.vm_tgt.add_object('iothread,id=iothread0') ++ self.vm_tgt.add_blockdev('driver=file,filename=%s,node-name=file0' ++ % (self.mirror_img)) ++ self.vm_tgt.add_blockdev('driver=qcow2,file=file0,node-name=drive0') ++ self.vm_tgt.launch() ++ self.vm_tgt.qmp('x-blockdev-set-iothread', ++ node_name='drive0', iothread='iothread0', ++ force=True) ++ ++ def tearDown(self): ++ self.vm_src.shutdown() ++ self.vm_tgt.shutdown() ++ for name in self.images: ++ os.remove(self.images[name]) ++ ++ def test_nbd_mirror(self): ++ result = self.vm_tgt.qmp( ++ 'nbd-server-start', ++ addr={ ++ 'type': 'unix', ++ 'data': { 'path': self.nbd_sock } ++ } ++ ) ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.vm_tgt.qmp( ++ 'nbd-server-add', ++ device='drive0', ++ writable=True ++ ) ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.vm_src.qmp( ++ 'drive-mirror', ++ device='drive0', ++ target='nbd+unix:///drive0?socket=' + self.nbd_sock, ++ sync='full', ++ mode='existing', ++ speed=64*1024*1024, ++ job_id='j1' ++ ) ++ self.assert_qmp(result, 'return', {}) ++ ++ self.vm_src.event_wait(name="BLOCK_JOB_READY") ++ ++ ++# Test for RHBZ#1779036 ++class TestExternalSnapshotAbort(iotests.QMPTestCase): ++ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') ++ snapshot_img = os.path.join(iotests.test_dir, 'snapshot.img') ++ images = { 'drive0': drive0_img, 'snapshot': snapshot_img } ++ ++ def setUp(self): ++ for name in self.images: ++ qemu_img('create', '-f', iotests.imgfmt, ++ self.images[name], str(image_len)) ++ ++ self.vm = iotests.VM() ++ self.vm.add_object('iothread,id=iothread0') ++ self.vm.add_blockdev('driver=file,filename=%s,node-name=file0' ++ % (self.drive0_img)) ++ self.vm.add_blockdev('driver=qcow2,file=file0,node-name=drive0') ++ self.vm.launch() ++ self.vm.qmp('x-blockdev-set-iothread', ++ node_name='drive0', iothread='iothread0', ++ force=True) ++ ++ def tearDown(self): ++ self.vm.shutdown() ++ for name in self.images: ++ os.remove(self.images[name]) ++ ++ def test_external_snapshot_abort(self): ++ # Use a two actions transaction with a bogus values on the second ++ # one to trigger an abort of the transaction. ++ result = self.vm.qmp('transaction', actions=[ ++ { ++ 'type': 'blockdev-snapshot-sync', ++ 'data': { 'node-name': 'drive0', ++ 'snapshot-file': self.snapshot_img, ++ 'snapshot-node-name': 'snap1', ++ 'mode': 'absolute-paths', ++ 'format': 'qcow2' } ++ }, ++ { ++ 'type': 'blockdev-snapshot-sync', ++ 'data': { 'node-name': 'drive0', ++ 'snapshot-file': '/fakesnapshot', ++ 'snapshot-node-name': 'snap2', ++ 'mode': 'absolute-paths', ++ 'format': 'qcow2' } ++ }, ++ ]) ++ ++ # Crashes on failure, we expect this error. ++ self.assert_qmp(result, 'error/class', 'GenericError') ++ ++ ++# Test for RHBZ#1782111 ++class TestBlockdevBackupAbort(iotests.QMPTestCase): ++ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') ++ drive1_img = os.path.join(iotests.test_dir, 'drive1.img') ++ snap0_img = os.path.join(iotests.test_dir, 'snap0.img') ++ snap1_img = os.path.join(iotests.test_dir, 'snap1.img') ++ images = { 'drive0': drive0_img, ++ 'drive1': drive1_img, ++ 'snap0': snap0_img, ++ 'snap1': snap1_img } ++ ++ def setUp(self): ++ for name in self.images: ++ qemu_img('create', '-f', iotests.imgfmt, ++ self.images[name], str(image_len)) ++ ++ self.vm = iotests.VM() ++ self.vm.add_object('iothread,id=iothread0') ++ self.vm.add_device('virtio-scsi,iothread=iothread0') ++ ++ for name in self.images: ++ self.vm.add_blockdev('driver=file,filename=%s,node-name=file_%s' ++ % (self.images[name], name)) ++ self.vm.add_blockdev('driver=qcow2,file=file_%s,node-name=%s' ++ % (name, name)) ++ ++ self.vm.add_device('scsi-hd,drive=drive0') ++ self.vm.add_device('scsi-hd,drive=drive1') ++ self.vm.launch() ++ ++ def tearDown(self): ++ self.vm.shutdown() ++ for name in self.images: ++ os.remove(self.images[name]) ++ ++ def test_blockdev_backup_abort(self): ++ # Use a two actions transaction with a bogus values on the second ++ # one to trigger an abort of the transaction. ++ result = self.vm.qmp('transaction', actions=[ ++ { ++ 'type': 'blockdev-backup', ++ 'data': { 'device': 'drive0', ++ 'target': 'snap0', ++ 'sync': 'full', ++ 'job-id': 'j1' } ++ }, ++ { ++ 'type': 'blockdev-backup', ++ 'data': { 'device': 'drive1', ++ 'target': 'snap1', ++ 'sync': 'full' } ++ }, ++ ]) ++ ++ # Hangs on failure, we expect this error. ++ self.assert_qmp(result, 'error/class', 'GenericError') ++ ++if __name__ == '__main__': ++ iotests.main(supported_fmts=['qcow2'], ++ supported_protocols=['file']) +diff --git a/tests/qemu-iotests/281.out b/tests/qemu-iotests/281.out +new file mode 100644 +index 0000000..89968f3 +--- /dev/null ++++ b/tests/qemu-iotests/281.out +@@ -0,0 +1,5 @@ ++.... ++---------------------------------------------------------------------- ++Ran 4 tests ++ ++OK +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index 01301cd..c0e8197 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -287,3 +287,4 @@ + 273 backing quick + 277 rw quick + 280 rw migration quick ++281 rw quick +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch b/SOURCES/kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch new file mode 100644 index 0000000..58ef198 --- /dev/null +++ b/SOURCES/kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch @@ -0,0 +1,162 @@ +From 239f7bdeef48a3c0b07098617371b9955dc55348 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:36 +0000 +Subject: [PATCH 16/20] iotests: Test mirror with temporarily disabled target + backing file + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-11-kwolf@redhat.com> +Patchwork-id: 94288 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 10/13] iotests: Test mirror with temporarily disabled target backing file +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +The newly tested scenario is a common live storage migration scenario: +The target node is opened without a backing file so that the active +layer is mirrored while its backing chain can be copied in the +background. + +The backing chain should be attached to the mirror target node when +finalising the job, just before switching the users of the source node +to the new copy (at which point the mirror job still has a reference to +the node). drive-mirror did this automatically, but with blockdev-mirror +this is the job of the QMP client. + +This patch adds test cases for two ways to achieve the desired result, +using either x-blockdev-reopen or blockdev-snapshot. + +Signed-off-by: Kevin Wolf +Message-Id: <20200310113831.27293-5-kwolf@redhat.com> +Reviewed-by: Peter Krempa +Signed-off-by: Kevin Wolf +(cherry picked from commit 8bdee9f10eac2aefdcc5095feef756354c87bdec) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/155 | 56 +++++++++++++++++++++++++++++++++++++++++----- + tests/qemu-iotests/155.out | 4 ++-- + 2 files changed, 53 insertions(+), 7 deletions(-) + +diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155 +index d7ef257..3053e50 100755 +--- a/tests/qemu-iotests/155 ++++ b/tests/qemu-iotests/155 +@@ -45,10 +45,15 @@ target_img = os.path.join(iotests.test_dir, 'target.' + iotests.imgfmt) + # image during runtime, only makes sense if + # target_blockdev_backing is not None + # (None: same as target_backing) ++# target_open_with_backing: If True, the target image is added with its backing ++# chain opened right away. If False, blockdev-add ++# opens it without a backing file and job completion ++# is supposed to open the backing chain. + + class BaseClass(iotests.QMPTestCase): + target_blockdev_backing = None + target_real_backing = None ++ target_open_with_backing = True + + def setUp(self): + qemu_img('create', '-f', iotests.imgfmt, back0_img, '1440K') +@@ -80,9 +85,13 @@ class BaseClass(iotests.QMPTestCase): + options = { 'node-name': 'target', + 'driver': iotests.imgfmt, + 'file': { 'driver': 'file', ++ 'node-name': 'target-file', + 'filename': target_img } } +- if self.target_blockdev_backing: +- options['backing'] = self.target_blockdev_backing ++ ++ if not self.target_open_with_backing: ++ options['backing'] = None ++ elif self.target_blockdev_backing: ++ options['backing'] = self.target_blockdev_backing + + result = self.vm.qmp('blockdev-add', **options) + self.assert_qmp(result, 'return', {}) +@@ -147,10 +156,14 @@ class BaseClass(iotests.QMPTestCase): + # cmd: Mirroring command to execute, either drive-mirror or blockdev-mirror + + class MirrorBaseClass(BaseClass): ++ def openBacking(self): ++ pass ++ + def runMirror(self, sync): + if self.cmd == 'blockdev-mirror': + result = self.vm.qmp(self.cmd, job_id='mirror-job', device='source', +- sync=sync, target='target') ++ sync=sync, target='target', ++ auto_finalize=False) + else: + if self.existing: + mode = 'existing' +@@ -159,11 +172,12 @@ class MirrorBaseClass(BaseClass): + result = self.vm.qmp(self.cmd, job_id='mirror-job', device='source', + sync=sync, target=target_img, + format=iotests.imgfmt, mode=mode, +- node_name='target') ++ node_name='target', auto_finalize=False) + + self.assert_qmp(result, 'return', {}) + +- self.complete_and_wait('mirror-job') ++ self.vm.run_job('mirror-job', use_log=False, auto_finalize=False, ++ pre_finalize=self.openBacking, auto_dismiss=True) + + def testFull(self): + self.runMirror('full') +@@ -221,6 +235,38 @@ class TestBlockdevMirrorForcedBacking(MirrorBaseClass): + target_blockdev_backing = { 'driver': 'null-co' } + target_real_backing = 'null-co://' + ++# Attach the backing chain only during completion, with blockdev-reopen ++class TestBlockdevMirrorReopen(MirrorBaseClass): ++ cmd = 'blockdev-mirror' ++ existing = True ++ target_backing = 'null-co://' ++ target_open_with_backing = False ++ ++ def openBacking(self): ++ if not self.target_open_with_backing: ++ result = self.vm.qmp('blockdev-add', node_name="backing", ++ driver="null-co") ++ self.assert_qmp(result, 'return', {}) ++ result = self.vm.qmp('x-blockdev-reopen', node_name="target", ++ driver=iotests.imgfmt, file="target-file", ++ backing="backing") ++ self.assert_qmp(result, 'return', {}) ++ ++# Attach the backing chain only during completion, with blockdev-snapshot ++class TestBlockdevMirrorSnapshot(MirrorBaseClass): ++ cmd = 'blockdev-mirror' ++ existing = True ++ target_backing = 'null-co://' ++ target_open_with_backing = False ++ ++ def openBacking(self): ++ if not self.target_open_with_backing: ++ result = self.vm.qmp('blockdev-add', node_name="backing", ++ driver="null-co") ++ self.assert_qmp(result, 'return', {}) ++ result = self.vm.qmp('blockdev-snapshot', node="backing", ++ overlay="target") ++ self.assert_qmp(result, 'return', {}) + + class TestCommit(BaseClass): + existing = False +diff --git a/tests/qemu-iotests/155.out b/tests/qemu-iotests/155.out +index 4176bb9..4fd1c2d 100644 +--- a/tests/qemu-iotests/155.out ++++ b/tests/qemu-iotests/155.out +@@ -1,5 +1,5 @@ +-................... ++......................... + ---------------------------------------------------------------------- +-Ran 19 tests ++Ran 25 tests + + OK +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests-Use-complete_and_wait-in-155.patch b/SOURCES/kvm-iotests-Use-complete_and_wait-in-155.patch new file mode 100644 index 0000000..38b41be --- /dev/null +++ b/SOURCES/kvm-iotests-Use-complete_and_wait-in-155.patch @@ -0,0 +1,50 @@ +From 872fbd32d06bda4aba3a7e67a95f76f62e475dbe Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:27 +0000 +Subject: [PATCH 07/20] iotests: Use complete_and_wait() in 155 + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-2-kwolf@redhat.com> +Patchwork-id: 94279 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 01/13] iotests: Use complete_and_wait() in 155 +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +From: Max Reitz + +This way, we get to see errors during the completion phase. + +Signed-off-by: Max Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20200218103454.296704-14-mreitz@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 6644d0e6192b36cdf2902c9774e1afb8ab2e7223) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/155 | 7 +------ + 1 file changed, 1 insertion(+), 6 deletions(-) + +diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155 +index e194859..d7ef257 100755 +--- a/tests/qemu-iotests/155 ++++ b/tests/qemu-iotests/155 +@@ -163,12 +163,7 @@ class MirrorBaseClass(BaseClass): + + self.assert_qmp(result, 'return', {}) + +- self.vm.event_wait('BLOCK_JOB_READY') +- +- result = self.vm.qmp('block-job-complete', device='mirror-job') +- self.assert_qmp(result, 'return', {}) +- +- self.vm.event_wait('BLOCK_JOB_COMPLETED') ++ self.complete_and_wait('mirror-job') + + def testFull(self): + self.runMirror('full') +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iotests.py-Let-wait_migration-wait-even-more.patch b/SOURCES/kvm-iotests.py-Let-wait_migration-wait-even-more.patch new file mode 100644 index 0000000..cda8037 --- /dev/null +++ b/SOURCES/kvm-iotests.py-Let-wait_migration-wait-even-more.patch @@ -0,0 +1,123 @@ +From d6df1426ae65b3a0d50bdbb1f8a7246386dd6ebf Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 7 Feb 2020 11:24:04 +0000 +Subject: [PATCH 07/18] iotests.py: Let wait_migration wait even more + +RH-Author: Kevin Wolf +Message-id: <20200207112404.25198-7-kwolf@redhat.com> +Patchwork-id: 93751 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 6/6] iotests.py: Let wait_migration wait even more +Bugzilla: 1781637 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +From: Max Reitz + +The "migration completed" event may be sent (on the source, to be +specific) before the migration is actually completed, so the VM runstate +will still be "finish-migrate" instead of "postmigrate". So ask the +users of VM.wait_migration() to specify the final runstate they desire +and then poll the VM until it has reached that state. (This should be +over very quickly, so busy polling is fine.) + +Without this patch, I see intermittent failures in the new iotest 280 +under high system load. I have not yet seen such failures with other +iotests that use VM.wait_migration() and query-status afterwards, but +maybe they just occur even more rarely, or it is because they also wait +on the destination VM to be running. + +Signed-off-by: Max Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit 8da7969bd7014f6de037d8ae132b40721944b186) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/234 | 8 ++++---- + tests/qemu-iotests/262 | 4 ++-- + tests/qemu-iotests/280 | 2 +- + tests/qemu-iotests/iotests.py | 6 +++++- + 4 files changed, 12 insertions(+), 8 deletions(-) + +diff --git a/tests/qemu-iotests/234 b/tests/qemu-iotests/234 +index 34c818c..59a7f94 100755 +--- a/tests/qemu-iotests/234 ++++ b/tests/qemu-iotests/234 +@@ -69,9 +69,9 @@ with iotests.FilePath('img') as img_path, \ + iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo_a))) + with iotests.Timeout(3, 'Migration does not complete'): + # Wait for the source first (which includes setup=setup) +- vm_a.wait_migration() ++ vm_a.wait_migration('postmigrate') + # Wait for the destination second (which does not) +- vm_b.wait_migration() ++ vm_b.wait_migration('running') + + iotests.log(vm_a.qmp('query-migrate')['return']['status']) + iotests.log(vm_b.qmp('query-migrate')['return']['status']) +@@ -98,9 +98,9 @@ with iotests.FilePath('img') as img_path, \ + iotests.log(vm_b.qmp('migrate', uri='exec:cat >%s' % (fifo_b))) + with iotests.Timeout(3, 'Migration does not complete'): + # Wait for the source first (which includes setup=setup) +- vm_b.wait_migration() ++ vm_b.wait_migration('postmigrate') + # Wait for the destination second (which does not) +- vm_a.wait_migration() ++ vm_a.wait_migration('running') + + iotests.log(vm_a.qmp('query-migrate')['return']['status']) + iotests.log(vm_b.qmp('query-migrate')['return']['status']) +diff --git a/tests/qemu-iotests/262 b/tests/qemu-iotests/262 +index 0963daa..bbcb526 100755 +--- a/tests/qemu-iotests/262 ++++ b/tests/qemu-iotests/262 +@@ -71,9 +71,9 @@ with iotests.FilePath('img') as img_path, \ + iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo))) + with iotests.Timeout(3, 'Migration does not complete'): + # Wait for the source first (which includes setup=setup) +- vm_a.wait_migration() ++ vm_a.wait_migration('postmigrate') + # Wait for the destination second (which does not) +- vm_b.wait_migration() ++ vm_b.wait_migration('running') + + iotests.log(vm_a.qmp('query-migrate')['return']['status']) + iotests.log(vm_b.qmp('query-migrate')['return']['status']) +diff --git a/tests/qemu-iotests/280 b/tests/qemu-iotests/280 +index 0b1fa8e..85e9114 100755 +--- a/tests/qemu-iotests/280 ++++ b/tests/qemu-iotests/280 +@@ -45,7 +45,7 @@ with iotests.FilePath('base') as base_path , \ + vm.qmp_log('migrate', uri='exec:cat > /dev/null') + + with iotests.Timeout(3, 'Migration does not complete'): +- vm.wait_migration() ++ vm.wait_migration('postmigrate') + + iotests.log('\nVM is now stopped:') + iotests.log(vm.qmp('query-migrate')['return']['status']) +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index 5741efb..0c55f7b 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -663,12 +663,16 @@ class VM(qtest.QEMUQtestMachine): + } + ])) + +- def wait_migration(self): ++ def wait_migration(self, expect_runstate): + while True: + event = self.event_wait('MIGRATION') + log(event, filters=[filter_qmp_event]) + if event['data']['status'] == 'completed': + break ++ # The event may occur in finish-migrate, so wait for the expected ++ # post-migration runstate ++ while self.qmp('query-status')['return']['status'] != expect_runstate: ++ pass + + def node_info(self, node_name): + nodes = self.qmp('query-named-block-nodes') +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch b/SOURCES/kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch new file mode 100644 index 0000000..2ee9dcd --- /dev/null +++ b/SOURCES/kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch @@ -0,0 +1,79 @@ +From 1c508d56d154caf5fbf53e7dabafd707236cb16b Mon Sep 17 00:00:00 2001 +From: jmaloy +Date: Wed, 29 Jan 2020 13:45:18 +0000 +Subject: [PATCH 06/15] iscsi: Cap block count from GET LBA STATUS + (CVE-2020-1711) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: jmaloy +Message-id: <20200129134518.1293-2-jmaloy@redhat.com> +Patchwork-id: 93571 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] iscsi: Cap block count from GET LBA STATUS (CVE-2020-1711) +Bugzilla: 1794503 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf +RH-Acked-by: Philippe Mathieu-Daudé + +From: Felipe Franciosi + +When querying an iSCSI server for the provisioning status of blocks (via +GET LBA STATUS), Qemu only validates that the response descriptor zero's +LBA matches the one requested. Given the SCSI spec allows servers to +respond with the status of blocks beyond the end of the LUN, Qemu may +have its heap corrupted by clearing/setting too many bits at the end of +its allocmap for the LUN. + +A malicious guest in control of the iSCSI server could carefully program +Qemu's heap (by selectively setting the bitmap) and then smash it. + +This limits the number of bits that iscsi_co_block_status() will try to +update in the allocmap so it can't overflow the bitmap. + +Fixes: CVE-2020-1711 +Cc: qemu-stable@nongnu.org +Signed-off-by: Felipe Franciosi +Signed-off-by: Peter Turschmid +Signed-off-by: Raphael Norwitz +Signed-off-by: Kevin Wolf +(cherry picked from commit 693fd2acdf14dd86c0bf852610f1c2cca80a74dc) +Signed-off-by: Jon Maloy +Signed-off-by: Danilo C. L. de Paula +--- + block/iscsi.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/block/iscsi.c b/block/iscsi.c +index 2aea7e3..cbd5729 100644 +--- a/block/iscsi.c ++++ b/block/iscsi.c +@@ -701,7 +701,7 @@ static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, + struct scsi_get_lba_status *lbas = NULL; + struct scsi_lba_status_descriptor *lbasd = NULL; + struct IscsiTask iTask; +- uint64_t lba; ++ uint64_t lba, max_bytes; + int ret; + + iscsi_co_init_iscsitask(iscsilun, &iTask); +@@ -721,6 +721,7 @@ static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, + } + + lba = offset / iscsilun->block_size; ++ max_bytes = (iscsilun->num_blocks - lba) * iscsilun->block_size; + + qemu_mutex_lock(&iscsilun->mutex); + retry: +@@ -764,7 +765,7 @@ retry: + goto out_unlock; + } + +- *pnum = (int64_t) lbasd->num_blocks * iscsilun->block_size; ++ *pnum = MIN((int64_t) lbasd->num_blocks * iscsilun->block_size, max_bytes); + + if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED || + lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-iscsi-Drop-iscsi_co_create_opts.patch b/SOURCES/kvm-iscsi-Drop-iscsi_co_create_opts.patch new file mode 100644 index 0000000..a6d0baf --- /dev/null +++ b/SOURCES/kvm-iscsi-Drop-iscsi_co_create_opts.patch @@ -0,0 +1,113 @@ +From 58b7d33e1bc17b89103ceaa39f5722a69b35d810 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 11 Mar 2020 10:51:45 +0000 +Subject: [PATCH 04/20] iscsi: Drop iscsi_co_create_opts() + +RH-Author: Maxim Levitsky +Message-id: <20200311105147.13208-5-mlevitsk@redhat.com> +Patchwork-id: 94226 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 4/6] iscsi: Drop iscsi_co_create_opts() +Bugzilla: 1640894 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz + +From: Max Reitz + +The generic fallback implementation effectively does the same. + +Reviewed-by: Maxim Levitsky +Signed-off-by: Max Reitz +Message-Id: <20200122164532.178040-5-mreitz@redhat.com> +Signed-off-by: Max Reitz +(cherry picked from commit 80f0900905b555f00d644894c786b6d66ac2e00e) +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block/iscsi.c | 56 -------------------------------------------------------- + 1 file changed, 56 deletions(-) + +diff --git a/block/iscsi.c b/block/iscsi.c +index cbd5729..b45da65 100644 +--- a/block/iscsi.c ++++ b/block/iscsi.c +@@ -2164,58 +2164,6 @@ static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset, + return 0; + } + +-static int coroutine_fn iscsi_co_create_opts(const char *filename, QemuOpts *opts, +- Error **errp) +-{ +- int ret = 0; +- int64_t total_size = 0; +- BlockDriverState *bs; +- IscsiLun *iscsilun = NULL; +- QDict *bs_options; +- Error *local_err = NULL; +- +- bs = bdrv_new(); +- +- /* Read out options */ +- total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), +- BDRV_SECTOR_SIZE); +- bs->opaque = g_new0(struct IscsiLun, 1); +- iscsilun = bs->opaque; +- +- bs_options = qdict_new(); +- iscsi_parse_filename(filename, bs_options, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); +- ret = -EINVAL; +- } else { +- ret = iscsi_open(bs, bs_options, 0, NULL); +- } +- qobject_unref(bs_options); +- +- if (ret != 0) { +- goto out; +- } +- iscsi_detach_aio_context(bs); +- if (iscsilun->type != TYPE_DISK) { +- ret = -ENODEV; +- goto out; +- } +- if (bs->total_sectors < total_size) { +- ret = -ENOSPC; +- goto out; +- } +- +- ret = 0; +-out: +- if (iscsilun->iscsi != NULL) { +- iscsi_destroy_context(iscsilun->iscsi); +- } +- g_free(bs->opaque); +- bs->opaque = NULL; +- bdrv_unref(bs); +- return ret; +-} +- + static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) + { + IscsiLun *iscsilun = bs->opaque; +@@ -2486,8 +2434,6 @@ static BlockDriver bdrv_iscsi = { + .bdrv_parse_filename = iscsi_parse_filename, + .bdrv_file_open = iscsi_open, + .bdrv_close = iscsi_close, +- .bdrv_co_create_opts = iscsi_co_create_opts, +- .create_opts = &iscsi_create_opts, + .bdrv_reopen_prepare = iscsi_reopen_prepare, + .bdrv_reopen_commit = iscsi_reopen_commit, + .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, +@@ -2525,8 +2471,6 @@ static BlockDriver bdrv_iser = { + .bdrv_parse_filename = iscsi_parse_filename, + .bdrv_file_open = iscsi_open, + .bdrv_close = iscsi_close, +- .bdrv_co_create_opts = iscsi_co_create_opts, +- .create_opts = &iscsi_create_opts, + .bdrv_reopen_prepare = iscsi_reopen_prepare, + .bdrv_reopen_commit = iscsi_reopen_commit, + .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch b/SOURCES/kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch new file mode 100644 index 0000000..e38428b --- /dev/null +++ b/SOURCES/kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch @@ -0,0 +1,213 @@ +From 3f16b8a33bd7503cbe857fbeb45fff7301b6bb5f Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 8 Apr 2020 17:29:12 +0100 +Subject: [PATCH 1/6] job: take each job's lock individually in job_txn_apply + +RH-Author: Kevin Wolf +Message-id: <20200408172917.18712-2-kwolf@redhat.com> +Patchwork-id: 94597 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/6] job: take each job's lock individually in job_txn_apply +Bugzilla: 1817621 +RH-Acked-by: Eric Blake +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +From: Stefan Reiter + +All callers of job_txn_apply hold a single job's lock, but different +jobs within a transaction can have different contexts, thus we need to +lock each one individually before applying the callback function. + +Similar to job_completed_txn_abort this also requires releasing the +caller's context before and reacquiring it after to avoid recursive +locks which might break AIO_WAIT_WHILE in the callback. This is safe, since +existing code would already have to take this into account, lest +job_completed_txn_abort might have broken. + +This also brings to light a different issue: When a callback function in +job_txn_apply moves it's job to a different AIO context, callers will +try to release the wrong lock (now that we re-acquire the lock +correctly, previously it would just continue with the old lock, leaving +the job unlocked for the rest of the return path). Fix this by not caching +the job's context. + +This is only necessary for qmp_block_job_finalize, qmp_job_finalize and +job_exit, since everyone else calls through job_exit. + +One test needed adapting, since it calls job_finalize directly, so it +manually needs to acquire the correct context. + +Signed-off-by: Stefan Reiter +Message-Id: <20200407115651.69472-2-s.reiter@proxmox.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit b660a84bbb0eb1a76b505648d31d5e82594fb75e) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 9 +++++++++ + job-qmp.c | 9 +++++++++ + job.c | 50 ++++++++++++++++++++++++++++++++++++++++---------- + tests/test-blockjob.c | 2 ++ + 4 files changed, 60 insertions(+), 10 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index c8d4b51..86eb115 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -4215,7 +4215,16 @@ void qmp_block_job_finalize(const char *id, Error **errp) + } + + trace_qmp_block_job_finalize(job); ++ job_ref(&job->job); + job_finalize(&job->job, errp); ++ ++ /* ++ * Job's context might have changed via job_finalize (and job_txn_apply ++ * automatically acquires the new one), so make sure we release the correct ++ * one. ++ */ ++ aio_context = blk_get_aio_context(job->blk); ++ job_unref(&job->job); + aio_context_release(aio_context); + } + +diff --git a/job-qmp.c b/job-qmp.c +index fbfed25..a201220 100644 +--- a/job-qmp.c ++++ b/job-qmp.c +@@ -114,7 +114,16 @@ void qmp_job_finalize(const char *id, Error **errp) + } + + trace_qmp_job_finalize(job); ++ job_ref(job); + job_finalize(job, errp); ++ ++ /* ++ * Job's context might have changed via job_finalize (and job_txn_apply ++ * automatically acquires the new one), so make sure we release the correct ++ * one. ++ */ ++ aio_context = job->aio_context; ++ job_unref(job); + aio_context_release(aio_context); + } + +diff --git a/job.c b/job.c +index 04409b4..48fc4ad 100644 +--- a/job.c ++++ b/job.c +@@ -136,17 +136,38 @@ static void job_txn_del_job(Job *job) + } + } + +-static int job_txn_apply(JobTxn *txn, int fn(Job *)) ++static int job_txn_apply(Job *job, int fn(Job *)) + { +- Job *job, *next; ++ AioContext *inner_ctx; ++ Job *other_job, *next; ++ JobTxn *txn = job->txn; + int rc = 0; + +- QLIST_FOREACH_SAFE(job, &txn->jobs, txn_list, next) { +- rc = fn(job); ++ /* ++ * Similar to job_completed_txn_abort, we take each job's lock before ++ * applying fn, but since we assume that outer_ctx is held by the caller, ++ * we need to release it here to avoid holding the lock twice - which would ++ * break AIO_WAIT_WHILE from within fn. ++ */ ++ job_ref(job); ++ aio_context_release(job->aio_context); ++ ++ QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { ++ inner_ctx = other_job->aio_context; ++ aio_context_acquire(inner_ctx); ++ rc = fn(other_job); ++ aio_context_release(inner_ctx); + if (rc) { + break; + } + } ++ ++ /* ++ * Note that job->aio_context might have been changed by calling fn, so we ++ * can't use a local variable to cache it. ++ */ ++ aio_context_acquire(job->aio_context); ++ job_unref(job); + return rc; + } + +@@ -774,11 +795,11 @@ static void job_do_finalize(Job *job) + assert(job && job->txn); + + /* prepare the transaction to complete */ +- rc = job_txn_apply(job->txn, job_prepare); ++ rc = job_txn_apply(job, job_prepare); + if (rc) { + job_completed_txn_abort(job); + } else { +- job_txn_apply(job->txn, job_finalize_single); ++ job_txn_apply(job, job_finalize_single); + } + } + +@@ -824,10 +845,10 @@ static void job_completed_txn_success(Job *job) + assert(other_job->ret == 0); + } + +- job_txn_apply(txn, job_transition_to_pending); ++ job_txn_apply(job, job_transition_to_pending); + + /* If no jobs need manual finalization, automatically do so */ +- if (job_txn_apply(txn, job_needs_finalize) == 0) { ++ if (job_txn_apply(job, job_needs_finalize) == 0) { + job_do_finalize(job); + } + } +@@ -849,9 +870,10 @@ static void job_completed(Job *job) + static void job_exit(void *opaque) + { + Job *job = (Job *)opaque; +- AioContext *ctx = job->aio_context; ++ AioContext *ctx; + +- aio_context_acquire(ctx); ++ job_ref(job); ++ aio_context_acquire(job->aio_context); + + /* This is a lie, we're not quiescent, but still doing the completion + * callbacks. However, completion callbacks tend to involve operations that +@@ -862,6 +884,14 @@ static void job_exit(void *opaque) + + job_completed(job); + ++ /* ++ * Note that calling job_completed can move the job to a different ++ * aio_context, so we cannot cache from above. job_txn_apply takes care of ++ * acquiring the new lock, and we ref/unref to avoid job_completed freeing ++ * the job underneath us. ++ */ ++ ctx = job->aio_context; ++ job_unref(job); + aio_context_release(ctx); + } + +diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c +index 7844c9f..6d857fd 100644 +--- a/tests/test-blockjob.c ++++ b/tests/test-blockjob.c +@@ -368,7 +368,9 @@ static void test_cancel_concluded(void) + aio_poll(qemu_get_aio_context(), true); + assert(job->status == JOB_STATUS_PENDING); + ++ aio_context_acquire(job->aio_context); + job_finalize(job, &error_abort); ++ aio_context_release(job->aio_context); + assert(job->status == JOB_STATUS_CONCLUDED); + + cancel_common(s); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-libvhost-user-Fix-some-memtable-remap-cases.patch b/SOURCES/kvm-libvhost-user-Fix-some-memtable-remap-cases.patch new file mode 100644 index 0000000..e362efe --- /dev/null +++ b/SOURCES/kvm-libvhost-user-Fix-some-memtable-remap-cases.patch @@ -0,0 +1,117 @@ +From ee360b70f179cf540faebe7e55b34e323e2bb179 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:09 +0100 +Subject: [PATCH 098/116] libvhost-user: Fix some memtable remap cases +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-95-dgilbert@redhat.com> +Patchwork-id: 93548 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 094/112] libvhost-user: Fix some memtable remap cases +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +If a new setmemtable command comes in once the vhost threads are +running, it will remap the guests address space and the threads +will now be looking in the wrong place. + +Fortunately we're running this command under lock, so we can +update the queue mappings so that threads will look in the new-right +place. + +Note: This doesn't fix things that the threads might be doing +without a lock (e.g. a readv/writev!) That's for another time. + +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 49e9ec749d4db62ae51f76354143cee183912a1d) +Signed-off-by: Miroslav Rezanina +--- + contrib/libvhost-user/libvhost-user.c | 33 +++++++++++++++++++++++++-------- + contrib/libvhost-user/libvhost-user.h | 3 +++ + 2 files changed, 28 insertions(+), 8 deletions(-) + +diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c +index 63e4106..b89bf18 100644 +--- a/contrib/libvhost-user/libvhost-user.c ++++ b/contrib/libvhost-user/libvhost-user.c +@@ -565,6 +565,21 @@ vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg) + } + + static bool ++map_ring(VuDev *dev, VuVirtq *vq) ++{ ++ vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr); ++ vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr); ++ vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr); ++ ++ DPRINT("Setting virtq addresses:\n"); ++ DPRINT(" vring_desc at %p\n", vq->vring.desc); ++ DPRINT(" vring_used at %p\n", vq->vring.used); ++ DPRINT(" vring_avail at %p\n", vq->vring.avail); ++ ++ return !(vq->vring.desc && vq->vring.used && vq->vring.avail); ++} ++ ++static bool + vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg) + { + int i; +@@ -767,6 +782,14 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg) + close(vmsg->fds[i]); + } + ++ for (i = 0; i < dev->max_queues; i++) { ++ if (dev->vq[i].vring.desc) { ++ if (map_ring(dev, &dev->vq[i])) { ++ vu_panic(dev, "remaping queue %d during setmemtable", i); ++ } ++ } ++ } ++ + return false; + } + +@@ -853,18 +876,12 @@ vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg) + DPRINT(" avail_user_addr: 0x%016" PRIx64 "\n", vra->avail_user_addr); + DPRINT(" log_guest_addr: 0x%016" PRIx64 "\n", vra->log_guest_addr); + ++ vq->vra = *vra; + vq->vring.flags = vra->flags; +- vq->vring.desc = qva_to_va(dev, vra->desc_user_addr); +- vq->vring.used = qva_to_va(dev, vra->used_user_addr); +- vq->vring.avail = qva_to_va(dev, vra->avail_user_addr); + vq->vring.log_guest_addr = vra->log_guest_addr; + +- DPRINT("Setting virtq addresses:\n"); +- DPRINT(" vring_desc at %p\n", vq->vring.desc); +- DPRINT(" vring_used at %p\n", vq->vring.used); +- DPRINT(" vring_avail at %p\n", vq->vring.avail); + +- if (!(vq->vring.desc && vq->vring.used && vq->vring.avail)) { ++ if (map_ring(dev, vq)) { + vu_panic(dev, "Invalid vring_addr message"); + return false; + } +diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h +index 1844b6f..5cb7708 100644 +--- a/contrib/libvhost-user/libvhost-user.h ++++ b/contrib/libvhost-user/libvhost-user.h +@@ -327,6 +327,9 @@ typedef struct VuVirtq { + int err_fd; + unsigned int enable; + bool started; ++ ++ /* Guest addresses of our ring */ ++ struct vhost_vring_addr vra; + } VuVirtq; + + enum VuWatchCondtion { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch b/SOURCES/kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch new file mode 100644 index 0000000..3477af5 --- /dev/null +++ b/SOURCES/kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch @@ -0,0 +1,179 @@ +From 38a032829b6b8d523b4cee05f732031e66fc2e41 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 31 Jan 2020 17:12:56 +0000 +Subject: [PATCH 14/15] migration: Change SaveStateEntry.instance_id into + uint32_t + +RH-Author: Peter Xu +Message-id: <20200131171257.1066593-3-peterx@redhat.com> +Patchwork-id: 93629 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/3] migration: Change SaveStateEntry.instance_id into uint32_t +Bugzilla: 1529231 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert + +It was always used as 32bit, so define it as used to be clear. +Instead of using -1 as the auto-gen magic value, we switch to +UINT32_MAX. We also make sure that we don't auto-gen this value to +avoid overflowed instance IDs without being noticed. + +Suggested-by: Juan Quintela +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 93062e23619e057743757ee53bf7f8e07f7a3710) +Signed-off-by: Peter Xu +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + include/migration/vmstate.h + migration/savevm.c + stubs/vmstate.c + Due to missing 3cad405bab ("vmstate: replace DeviceState with + VMStateIf", 2020-01-06) + +Signed-off-by: Danilo C. L. de Paula +--- + hw/intc/apic_common.c | 2 +- + include/migration/register.h | 2 +- + include/migration/vmstate.h | 2 +- + migration/savevm.c | 18 ++++++++++-------- + stubs/vmstate.c | 2 +- + 5 files changed, 14 insertions(+), 12 deletions(-) + +diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c +index f2c3a7f..54b8731 100644 +--- a/hw/intc/apic_common.c ++++ b/hw/intc/apic_common.c +@@ -268,7 +268,7 @@ static void apic_common_realize(DeviceState *dev, Error **errp) + APICCommonState *s = APIC_COMMON(dev); + APICCommonClass *info; + static DeviceState *vapic; +- int instance_id = s->id; ++ uint32_t instance_id = s->id; + + info = APIC_COMMON_GET_CLASS(s); + info->realize(dev, errp); +diff --git a/include/migration/register.h b/include/migration/register.h +index a13359a..f3ba10b 100644 +--- a/include/migration/register.h ++++ b/include/migration/register.h +@@ -69,7 +69,7 @@ typedef struct SaveVMHandlers { + } SaveVMHandlers; + + int register_savevm_live(const char *idstr, +- int instance_id, ++ uint32_t instance_id, + int version_id, + const SaveVMHandlers *ops, + void *opaque); +diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h +index 883f1cf..296609c 100644 +--- a/include/migration/vmstate.h ++++ b/include/migration/vmstate.h +@@ -1158,7 +1158,7 @@ bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque); + #define VMSTATE_INSTANCE_ID_ANY -1 + + /* Returns: 0 on success, -1 on failure */ +-int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, ++int vmstate_register_with_alias_id(DeviceState *dev, uint32_t instance_id, + const VMStateDescription *vmsd, + void *base, int alias_id, + int required_for_version, +diff --git a/migration/savevm.c b/migration/savevm.c +index e2e8e0a..a80bb52 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -233,7 +233,7 @@ typedef struct CompatEntry { + typedef struct SaveStateEntry { + QTAILQ_ENTRY(SaveStateEntry) entry; + char idstr[256]; +- int instance_id; ++ uint32_t instance_id; + int alias_id; + int version_id; + /* version id read from the stream */ +@@ -665,10 +665,10 @@ void dump_vmstate_json_to_file(FILE *out_file) + fclose(out_file); + } + +-static int calculate_new_instance_id(const char *idstr) ++static uint32_t calculate_new_instance_id(const char *idstr) + { + SaveStateEntry *se; +- int instance_id = 0; ++ uint32_t instance_id = 0; + + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { + if (strcmp(idstr, se->idstr) == 0 +@@ -676,6 +676,8 @@ static int calculate_new_instance_id(const char *idstr) + instance_id = se->instance_id + 1; + } + } ++ /* Make sure we never loop over without being noticed */ ++ assert(instance_id != VMSTATE_INSTANCE_ID_ANY); + return instance_id; + } + +@@ -730,7 +732,7 @@ static void savevm_state_handler_insert(SaveStateEntry *nse) + Meanwhile pass -1 as instance_id if you do not already have a clearly + distinguishing id for all instances of your device class. */ + int register_savevm_live(const char *idstr, +- int instance_id, ++ uint32_t instance_id, + int version_id, + const SaveVMHandlers *ops, + void *opaque) +@@ -784,7 +786,7 @@ void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque) + } + } + +-int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, ++int vmstate_register_with_alias_id(DeviceState *dev, uint32_t instance_id, + const VMStateDescription *vmsd, + void *opaque, int alias_id, + int required_for_version, +@@ -1600,7 +1602,7 @@ int qemu_save_device_state(QEMUFile *f) + return qemu_file_get_error(f); + } + +-static SaveStateEntry *find_se(const char *idstr, int instance_id) ++static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id) + { + SaveStateEntry *se; + +@@ -2267,7 +2269,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) + /* Find savevm section */ + se = find_se(idstr, instance_id); + if (se == NULL) { +- error_report("Unknown savevm section or instance '%s' %d. " ++ error_report("Unknown savevm section or instance '%s' %"PRIu32". " + "Make sure that your current VM setup matches your " + "saved VM setup, including any hotplugged devices", + idstr, instance_id); +@@ -2291,7 +2293,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) + + ret = vmstate_load(f, se); + if (ret < 0) { +- error_report("error while loading state for instance 0x%x of" ++ error_report("error while loading state for instance 0x%"PRIx32" of" + " device '%s'", instance_id, idstr); + return ret; + } +diff --git a/stubs/vmstate.c b/stubs/vmstate.c +index e1e89b8..4ed5cc6 100644 +--- a/stubs/vmstate.c ++++ b/stubs/vmstate.c +@@ -4,7 +4,7 @@ + const VMStateDescription vmstate_dummy = {}; + + int vmstate_register_with_alias_id(DeviceState *dev, +- int instance_id, ++ uint32_t instance_id, + const VMStateDescription *vmsd, + void *base, int alias_id, + int required_for_version, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-migration-Create-migration_is_running.patch b/SOURCES/kvm-migration-Create-migration_is_running.patch new file mode 100644 index 0000000..c9593de --- /dev/null +++ b/SOURCES/kvm-migration-Create-migration_is_running.patch @@ -0,0 +1,119 @@ +From c9e3d13d70a24bf606ce351886b27bdca25ef4dc Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:41 +0000 +Subject: [PATCH 09/18] migration: Create migration_is_running() + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-9-quintela@redhat.com> +Patchwork-id: 94115 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 08/10] migration: Create migration_is_running() +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +This function returns true if we are in the middle of a migration. +It is like migration_is_setup_or_active() with CANCELLING and COLO. +Adapt all callers that are needed. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 392d87e21325fdb01210176faa07472b4985ccf0) +Signed-off-by: Danilo C. L. de Paula +--- + migration/migration.c | 29 ++++++++++++++++++++++++----- + migration/migration.h | 1 + + migration/savevm.c | 4 +--- + 3 files changed, 26 insertions(+), 8 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 30c53c6..eb50d77 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -831,6 +831,27 @@ bool migration_is_setup_or_active(int state) + } + } + ++bool migration_is_running(int state) ++{ ++ switch (state) { ++ case MIGRATION_STATUS_ACTIVE: ++ case MIGRATION_STATUS_POSTCOPY_ACTIVE: ++ case MIGRATION_STATUS_POSTCOPY_PAUSED: ++ case MIGRATION_STATUS_POSTCOPY_RECOVER: ++ case MIGRATION_STATUS_SETUP: ++ case MIGRATION_STATUS_PRE_SWITCHOVER: ++ case MIGRATION_STATUS_DEVICE: ++ case MIGRATION_STATUS_WAIT_UNPLUG: ++ case MIGRATION_STATUS_CANCELLING: ++ case MIGRATION_STATUS_COLO: ++ return true; ++ ++ default: ++ return false; ++ ++ } ++} ++ + static void populate_time_info(MigrationInfo *info, MigrationState *s) + { + info->has_status = true; +@@ -1090,7 +1111,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, + MigrationCapabilityStatusList *cap; + bool cap_list[MIGRATION_CAPABILITY__MAX]; + +- if (migration_is_setup_or_active(s->state)) { ++ if (migration_is_running(s->state)) { + error_setg(errp, QERR_MIGRATION_ACTIVE); + return; + } +@@ -1603,7 +1624,7 @@ static void migrate_fd_cancel(MigrationState *s) + + do { + old_state = s->state; +- if (!migration_is_setup_or_active(old_state)) { ++ if (!migration_is_running(old_state)) { + break; + } + /* If the migration is paused, kick it out of the pause */ +@@ -1900,9 +1921,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + return true; + } + +- if (migration_is_setup_or_active(s->state) || +- s->state == MIGRATION_STATUS_CANCELLING || +- s->state == MIGRATION_STATUS_COLO) { ++ if (migration_is_running(s->state)) { + error_setg(errp, QERR_MIGRATION_ACTIVE); + return false; + } +diff --git a/migration/migration.h b/migration/migration.h +index 0b1b0d4..a2b2336 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -279,6 +279,7 @@ void migrate_fd_error(MigrationState *s, const Error *error); + void migrate_fd_connect(MigrationState *s, Error *error_in); + + bool migration_is_setup_or_active(int state); ++bool migration_is_running(int state); + + void migrate_init(MigrationState *s); + bool migration_is_blocked(Error **errp); +diff --git a/migration/savevm.c b/migration/savevm.c +index a80bb52..144ecf0 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1506,9 +1506,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) + MigrationState *ms = migrate_get_current(); + MigrationStatus status; + +- if (migration_is_setup_or_active(ms->state) || +- ms->state == MIGRATION_STATUS_CANCELLING || +- ms->state == MIGRATION_STATUS_COLO) { ++ if (migration_is_running(ms->state)) { + error_setg(errp, QERR_MIGRATION_ACTIVE); + return -EINVAL; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch b/SOURCES/kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch new file mode 100644 index 0000000..c2ead53 --- /dev/null +++ b/SOURCES/kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch @@ -0,0 +1,257 @@ +From 2659af9267586fb626f543773bf3f844727e473b Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 31 Jan 2020 17:12:55 +0000 +Subject: [PATCH 13/15] migration: Define VMSTATE_INSTANCE_ID_ANY + +RH-Author: Peter Xu +Message-id: <20200131171257.1066593-2-peterx@redhat.com> +Patchwork-id: 93630 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/3] migration: Define VMSTATE_INSTANCE_ID_ANY +Bugzilla: 1529231 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert + +Define the new macro VMSTATE_INSTANCE_ID_ANY for callers who wants to +auto-generate the vmstate instance ID. Previously it was hard coded +as -1 instead of this macro. It helps to change this default value in +the follow up patches. No functional change. + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 1df2c9a26fcb2fa32d099f8e9adcdae4207872e3) +Signed-off-by: Peter Xu +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + backends/dbus-vmstate.c + File deleted + hw/core/qdev.c + hw/misc/max111x.c + hw/net/eepro100.c + Due to missing commit 3cad405bab ("vmstate: replace + DeviceState with VMStateIf", 2020-01-06) + +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/stellaris.c | 2 +- + hw/core/qdev.c | 3 ++- + hw/display/ads7846.c | 2 +- + hw/i2c/core.c | 2 +- + hw/input/stellaris_input.c | 3 ++- + hw/intc/apic_common.c | 2 +- + hw/misc/max111x.c | 2 +- + hw/net/eepro100.c | 2 +- + hw/pci/pci.c | 2 +- + hw/ppc/spapr.c | 2 +- + hw/timer/arm_timer.c | 2 +- + hw/tpm/tpm_emulator.c | 3 ++- + include/migration/vmstate.h | 2 ++ + migration/savevm.c | 8 ++++---- + 14 files changed, 21 insertions(+), 16 deletions(-) + +diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c +index b198066..bb025e0 100644 +--- a/hw/arm/stellaris.c ++++ b/hw/arm/stellaris.c +@@ -708,7 +708,7 @@ static int stellaris_sys_init(uint32_t base, qemu_irq irq, + memory_region_init_io(&s->iomem, NULL, &ssys_ops, s, "ssys", 0x00001000); + memory_region_add_subregion(get_system_memory(), base, &s->iomem); + ssys_reset(s); +- vmstate_register(NULL, -1, &vmstate_stellaris_sys, s); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_stellaris_sys, s); + return 0; + } + +diff --git a/hw/core/qdev.c b/hw/core/qdev.c +index cf1ba28..40f6b2b 100644 +--- a/hw/core/qdev.c ++++ b/hw/core/qdev.c +@@ -890,7 +890,8 @@ static void device_set_realized(Object *obj, bool value, Error **errp) + dev->canonical_path = object_get_canonical_path(OBJECT(dev)); + + if (qdev_get_vmsd(dev)) { +- if (vmstate_register_with_alias_id(dev, -1, qdev_get_vmsd(dev), dev, ++ if (vmstate_register_with_alias_id(dev, VMSTATE_INSTANCE_ID_ANY, ++ qdev_get_vmsd(dev), dev, + dev->instance_id_alias, + dev->alias_required_for_version, + &local_err) < 0) { +diff --git a/hw/display/ads7846.c b/hw/display/ads7846.c +index c12272a..9228b40 100644 +--- a/hw/display/ads7846.c ++++ b/hw/display/ads7846.c +@@ -154,7 +154,7 @@ static void ads7846_realize(SSISlave *d, Error **errp) + + ads7846_int_update(s); + +- vmstate_register(NULL, -1, &vmstate_ads7846, s); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_ads7846, s); + } + + static void ads7846_class_init(ObjectClass *klass, void *data) +diff --git a/hw/i2c/core.c b/hw/i2c/core.c +index 92cd489..d770035 100644 +--- a/hw/i2c/core.c ++++ b/hw/i2c/core.c +@@ -61,7 +61,7 @@ I2CBus *i2c_init_bus(DeviceState *parent, const char *name) + + bus = I2C_BUS(qbus_create(TYPE_I2C_BUS, parent, name)); + QLIST_INIT(&bus->current_devs); +- vmstate_register(NULL, -1, &vmstate_i2c_bus, bus); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_i2c_bus, bus); + return bus; + } + +diff --git a/hw/input/stellaris_input.c b/hw/input/stellaris_input.c +index 59892b0..e6ee5e1 100644 +--- a/hw/input/stellaris_input.c ++++ b/hw/input/stellaris_input.c +@@ -88,5 +88,6 @@ void stellaris_gamepad_init(int n, qemu_irq *irq, const int *keycode) + } + s->num_buttons = n; + qemu_add_kbd_event_handler(stellaris_gamepad_put_key, s); +- vmstate_register(NULL, -1, &vmstate_stellaris_gamepad, s); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, ++ &vmstate_stellaris_gamepad, s); + } +diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c +index 375cb6a..f2c3a7f 100644 +--- a/hw/intc/apic_common.c ++++ b/hw/intc/apic_common.c +@@ -284,7 +284,7 @@ static void apic_common_realize(DeviceState *dev, Error **errp) + } + + if (s->legacy_instance_id) { +- instance_id = -1; ++ instance_id = VMSTATE_INSTANCE_ID_ANY; + } + vmstate_register_with_alias_id(NULL, instance_id, &vmstate_apic_common, + s, -1, 0, NULL); +diff --git a/hw/misc/max111x.c b/hw/misc/max111x.c +index a713149..81ee73e 100644 +--- a/hw/misc/max111x.c ++++ b/hw/misc/max111x.c +@@ -146,7 +146,7 @@ static int max111x_init(SSISlave *d, int inputs) + s->input[7] = 0x80; + s->com = 0; + +- vmstate_register(dev, -1, &vmstate_max111x, s); ++ vmstate_register(dev, VMSTATE_INSTANCE_ID_ANY, &vmstate_max111x, s); + return 0; + } + +diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c +index cc2dd8b..39920c6 100644 +--- a/hw/net/eepro100.c ++++ b/hw/net/eepro100.c +@@ -1874,7 +1874,7 @@ static void e100_nic_realize(PCIDevice *pci_dev, Error **errp) + + s->vmstate = g_memdup(&vmstate_eepro100, sizeof(vmstate_eepro100)); + s->vmstate->name = qemu_get_queue(s->nic)->model; +- vmstate_register(&pci_dev->qdev, -1, s->vmstate, s); ++ vmstate_register(&pci_dev->qdev, VMSTATE_INSTANCE_ID_ANY, s->vmstate, s); + } + + static void eepro100_instance_init(Object *obj) +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index cbc7a32..fed019d 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -124,7 +124,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp) + bus->machine_done.notify = pcibus_machine_done; + qemu_add_machine_init_done_notifier(&bus->machine_done); + +- vmstate_register(NULL, -1, &vmstate_pcibus, bus); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_pcibus, bus); + } + + static void pcie_bus_realize(BusState *qbus, Error **errp) +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 8749c72..c12862d 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -3028,7 +3028,7 @@ static void spapr_machine_init(MachineState *machine) + * interface, this is a legacy from the sPAPREnvironment structure + * which predated MachineState but had a similar function */ + vmstate_register(NULL, 0, &vmstate_spapr, spapr); +- register_savevm_live("spapr/htab", -1, 1, ++ register_savevm_live("spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1, + &savevm_htab_handlers, spapr); + + qbus_set_hotplug_handler(sysbus_get_default(), OBJECT(machine), +diff --git a/hw/timer/arm_timer.c b/hw/timer/arm_timer.c +index af524fa..beaa285 100644 +--- a/hw/timer/arm_timer.c ++++ b/hw/timer/arm_timer.c +@@ -180,7 +180,7 @@ static arm_timer_state *arm_timer_init(uint32_t freq) + s->control = TIMER_CTRL_IE; + + s->timer = ptimer_init(arm_timer_tick, s, PTIMER_POLICY_DEFAULT); +- vmstate_register(NULL, -1, &vmstate_arm_timer, s); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_arm_timer, s); + return s; + } + +diff --git a/hw/tpm/tpm_emulator.c b/hw/tpm/tpm_emulator.c +index 22f9113..da7b490 100644 +--- a/hw/tpm/tpm_emulator.c ++++ b/hw/tpm/tpm_emulator.c +@@ -914,7 +914,8 @@ static void tpm_emulator_inst_init(Object *obj) + tpm_emu->cur_locty_number = ~0; + qemu_mutex_init(&tpm_emu->mutex); + +- vmstate_register(NULL, -1, &vmstate_tpm_emulator, obj); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, ++ &vmstate_tpm_emulator, obj); + } + + /* +diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h +index ac4f46a..883f1cf 100644 +--- a/include/migration/vmstate.h ++++ b/include/migration/vmstate.h +@@ -1155,6 +1155,8 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd, + + bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque); + ++#define VMSTATE_INSTANCE_ID_ANY -1 ++ + /* Returns: 0 on success, -1 on failure */ + int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, + const VMStateDescription *vmsd, +diff --git a/migration/savevm.c b/migration/savevm.c +index a71b930..e2e8e0a 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -750,7 +750,7 @@ int register_savevm_live(const char *idstr, + + pstrcat(se->idstr, sizeof(se->idstr), idstr); + +- if (instance_id == -1) { ++ if (instance_id == VMSTATE_INSTANCE_ID_ANY) { + se->instance_id = calculate_new_instance_id(se->idstr); + } else { + se->instance_id = instance_id; +@@ -817,14 +817,14 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, + + se->compat = g_new0(CompatEntry, 1); + pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name); +- se->compat->instance_id = instance_id == -1 ? ++ se->compat->instance_id = instance_id == VMSTATE_INSTANCE_ID_ANY ? + calculate_compat_instance_id(vmsd->name) : instance_id; +- instance_id = -1; ++ instance_id = VMSTATE_INSTANCE_ID_ANY; + } + } + pstrcat(se->idstr, sizeof(se->idstr), vmsd->name); + +- if (instance_id == -1) { ++ if (instance_id == VMSTATE_INSTANCE_ID_ANY) { + se->instance_id = calculate_new_instance_id(se->idstr); + } else { + se->instance_id = instance_id; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-migration-Don-t-send-data-if-we-have-stopped.patch b/SOURCES/kvm-migration-Don-t-send-data-if-we-have-stopped.patch new file mode 100644 index 0000000..9a36714 --- /dev/null +++ b/SOURCES/kvm-migration-Don-t-send-data-if-we-have-stopped.patch @@ -0,0 +1,42 @@ +From ab07e0b41c50a85940d798a9a65a58698fd2edfb Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:40 +0000 +Subject: [PATCH 08/18] migration: Don't send data if we have stopped + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-8-quintela@redhat.com> +Patchwork-id: 94114 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 07/10] migration: Don't send data if we have stopped +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +If we do a cancel, we got out without one error, but we can't do the +rest of the output as in a normal situation. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit b69a0227a803256ad270283872d40ff768f4d56d) +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/migration/ram.c b/migration/ram.c +index a0257ee..902c56c 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -3511,7 +3511,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + ram_control_after_iterate(f, RAM_CONTROL_ROUND); + + out: +- if (ret >= 0) { ++ if (ret >= 0 ++ && migration_is_setup_or_active(migrate_get_current()->state)) { + multifd_send_sync_main(rs); + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch b/SOURCES/kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch new file mode 100644 index 0000000..01cb0f1 --- /dev/null +++ b/SOURCES/kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch @@ -0,0 +1,94 @@ +From 71b05ab5782aa1e38c016be6264a14f5650d2a87 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:35 +0000 +Subject: [PATCH 03/18] migration: Make sure that we don't call write() in case + of error + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-3-quintela@redhat.com> +Patchwork-id: 94113 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 02/10] migration: Make sure that we don't call write() in case of error +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +If we are exiting due to an error/finish/.... Just don't try to even +touch the channel with one IO operation. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Juan Quintela +(cherry picked from commit 4d65a6216bfc44891ac298b74a6921d479805131) +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 25 +++++++++++++++++++++++++ + 1 file changed, 25 insertions(+) + +diff --git a/migration/ram.c b/migration/ram.c +index 65580e3..8c783b3 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -899,6 +899,12 @@ struct { + uint64_t packet_num; + /* send channels ready */ + QemuSemaphore channels_ready; ++ /* ++ * Have we already run terminate threads. There is a race when it ++ * happens that we got one error while we are exiting. ++ * We will use atomic operations. Only valid values are 0 and 1. ++ */ ++ int exiting; + } *multifd_send_state; + + /* +@@ -927,6 +933,10 @@ static int multifd_send_pages(RAMState *rs) + MultiFDPages_t *pages = multifd_send_state->pages; + uint64_t transferred; + ++ if (atomic_read(&multifd_send_state->exiting)) { ++ return -1; ++ } ++ + qemu_sem_wait(&multifd_send_state->channels_ready); + for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) { + p = &multifd_send_state->params[i]; +@@ -1008,6 +1018,16 @@ static void multifd_send_terminate_threads(Error *err) + } + } + ++ /* ++ * We don't want to exit each threads twice. Depending on where ++ * we get the error, or if there are two independent errors in two ++ * threads at the same time, we can end calling this function ++ * twice. ++ */ ++ if (atomic_xchg(&multifd_send_state->exiting, 1)) { ++ return; ++ } ++ + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +@@ -1117,6 +1137,10 @@ static void *multifd_send_thread(void *opaque) + + while (true) { + qemu_sem_wait(&p->sem); ++ ++ if (atomic_read(&multifd_send_state->exiting)) { ++ break; ++ } + qemu_mutex_lock(&p->mutex); + + if (p->pending_job) { +@@ -1225,6 +1249,7 @@ int multifd_save_setup(void) + multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); + multifd_send_state->pages = multifd_pages_init(page_count); + qemu_sem_init(&multifd_send_state->channels_ready, 0); ++ atomic_set(&multifd_send_state->exiting, 0); + + for (i = 0; i < thread_count; i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch b/SOURCES/kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch new file mode 100644 index 0000000..4a7fb28 --- /dev/null +++ b/SOURCES/kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch @@ -0,0 +1,70 @@ +From 3c4f6f0c2bf5562f2aa26f964848ae53e6ac4790 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:43 +0000 +Subject: [PATCH 11/18] migration: Maybe VM is paused when migration is + cancelled + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-11-quintela@redhat.com> +Patchwork-id: 94120 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 10/10] migration: Maybe VM is paused when migration is cancelled +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +From: Zhimin Feng + +If the migration is cancelled when it is in the completion phase, +the migration state is set to MIGRATION_STATUS_CANCELLING. +The VM maybe wait for the 'pause_sem' semaphore in migration_maybe_pause +function, so that VM always is paused. + +Reported-by: Euler Robot +Signed-off-by: Zhimin Feng +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 8958338b10abcb346b54a8038a491fda2db1c853) +Signed-off-by: Danilo C. L. de Paula +--- + migration/migration.c | 24 ++++++++++++++++-------- + 1 file changed, 16 insertions(+), 8 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index eb50d77..ed18c59 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2786,14 +2786,22 @@ static int migration_maybe_pause(MigrationState *s, + /* This block intentionally left blank */ + } + +- qemu_mutex_unlock_iothread(); +- migrate_set_state(&s->state, *current_active_state, +- MIGRATION_STATUS_PRE_SWITCHOVER); +- qemu_sem_wait(&s->pause_sem); +- migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, +- new_state); +- *current_active_state = new_state; +- qemu_mutex_lock_iothread(); ++ /* ++ * If the migration is cancelled when it is in the completion phase, ++ * the migration state is set to MIGRATION_STATUS_CANCELLING. ++ * So we don't need to wait a semaphore, otherwise we would always ++ * wait for the 'pause_sem' semaphore. ++ */ ++ if (s->state != MIGRATION_STATUS_CANCELLING) { ++ qemu_mutex_unlock_iothread(); ++ migrate_set_state(&s->state, *current_active_state, ++ MIGRATION_STATUS_PRE_SWITCHOVER); ++ qemu_sem_wait(&s->pause_sem); ++ migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, ++ new_state); ++ *current_active_state = new_state; ++ qemu_mutex_lock_iothread(); ++ } + + return s->state == new_state ? 0 : -EINVAL; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-migration-Rate-limit-inside-host-pages.patch b/SOURCES/kvm-migration-Rate-limit-inside-host-pages.patch new file mode 100644 index 0000000..2d3d519 --- /dev/null +++ b/SOURCES/kvm-migration-Rate-limit-inside-host-pages.patch @@ -0,0 +1,172 @@ +From 8e8f421cce99543081f225acf46541312cfbc371 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Tue, 17 Mar 2020 17:05:18 +0000 +Subject: [PATCH 1/2] migration: Rate limit inside host pages + +RH-Author: Laurent Vivier +Message-id: <20200317170518.9303-1-lvivier@redhat.com> +Patchwork-id: 94374 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] migration: Rate limit inside host pages +Bugzilla: 1814336 +RH-Acked-by: Peter Xu +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert + +From: "Dr. David Alan Gilbert" + +When using hugepages, rate limiting is necessary within each huge +page, since a 1G huge page can take a significant time to send, so +you end up with bursty behaviour. + +Fixes: 4c011c37ecb3 ("postcopy: Send whole huge pages") +Reported-by: Lin Ma +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Juan Quintela +Reviewed-by: Peter Xu +Signed-off-by: Juan Quintela +(cherry picked from commit 97e1e06780e70f6e98a0d2df881e0c0927d3aeb6) +Signed-off-by: Laurent Vivier + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1814336 +BRANCH: rhel-av-8.2.0 +UPSTREAM: Merged +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=27283241 +TESTED: Tested that the migration abort doesn't trigger an error message in + the kernel logs on P9 + +Signed-off-by: Danilo C. L. de Paula +--- + migration/migration.c | 57 ++++++++++++++++++++++++++++---------------------- + migration/migration.h | 1 + + migration/ram.c | 2 ++ + migration/trace-events | 4 ++-- + 4 files changed, 37 insertions(+), 27 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index ed18c59..e31d0f5 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3253,6 +3253,37 @@ void migration_consume_urgent_request(void) + qemu_sem_wait(&migrate_get_current()->rate_limit_sem); + } + ++/* Returns true if the rate limiting was broken by an urgent request */ ++bool migration_rate_limit(void) ++{ ++ int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); ++ MigrationState *s = migrate_get_current(); ++ ++ bool urgent = false; ++ migration_update_counters(s, now); ++ if (qemu_file_rate_limit(s->to_dst_file)) { ++ /* ++ * Wait for a delay to do rate limiting OR ++ * something urgent to post the semaphore. ++ */ ++ int ms = s->iteration_start_time + BUFFER_DELAY - now; ++ trace_migration_rate_limit_pre(ms); ++ if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { ++ /* ++ * We were woken by one or more urgent things but ++ * the timedwait will have consumed one of them. ++ * The service routine for the urgent wake will dec ++ * the semaphore itself for each item it consumes, ++ * so add this one we just eat back. ++ */ ++ qemu_sem_post(&s->rate_limit_sem); ++ urgent = true; ++ } ++ trace_migration_rate_limit_post(urgent); ++ } ++ return urgent; ++} ++ + /* + * Master migration thread on the source VM. + * It drives the migration and pumps the data down the outgoing channel. +@@ -3319,8 +3350,6 @@ static void *migration_thread(void *opaque) + trace_migration_thread_setup_complete(); + + while (migration_is_active(s)) { +- int64_t current_time; +- + if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { + MigIterateState iter_state = migration_iteration_run(s); + if (iter_state == MIG_ITERATE_SKIP) { +@@ -3347,29 +3376,7 @@ static void *migration_thread(void *opaque) + update_iteration_initial_status(s); + } + +- current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +- +- migration_update_counters(s, current_time); +- +- urgent = false; +- if (qemu_file_rate_limit(s->to_dst_file)) { +- /* Wait for a delay to do rate limiting OR +- * something urgent to post the semaphore. +- */ +- int ms = s->iteration_start_time + BUFFER_DELAY - current_time; +- trace_migration_thread_ratelimit_pre(ms); +- if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { +- /* We were worken by one or more urgent things but +- * the timedwait will have consumed one of them. +- * The service routine for the urgent wake will dec +- * the semaphore itself for each item it consumes, +- * so add this one we just eat back. +- */ +- qemu_sem_post(&s->rate_limit_sem); +- urgent = true; +- } +- trace_migration_thread_ratelimit_post(urgent); +- } ++ urgent = migration_rate_limit(); + } + + trace_migration_thread_after_loop(); +diff --git a/migration/migration.h b/migration/migration.h +index a2b2336..a15e8d8 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -347,5 +347,6 @@ extern bool migrate_pre_2_2; + + void migration_make_urgent_request(void); + void migration_consume_urgent_request(void); ++bool migration_rate_limit(void); + + #endif +diff --git a/migration/ram.c b/migration/ram.c +index 3891eff..5344c7d 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2661,6 +2661,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, + + pages += tmppages; + pss->page++; ++ /* Allow rate limiting to happen in the middle of huge pages */ ++ migration_rate_limit(); + } while ((pss->page & (pagesize_bits - 1)) && + offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); + +diff --git a/migration/trace-events b/migration/trace-events +index 6dee7b5..2f9129e 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -138,12 +138,12 @@ migrate_send_rp_recv_bitmap(char *name, int64_t size) "block '%s' size 0x%"PRIi6 + migration_completion_file_err(void) "" + migration_completion_postcopy_end(void) "" + migration_completion_postcopy_end_after_complete(void) "" ++migration_rate_limit_pre(int ms) "%d ms" ++migration_rate_limit_post(int urgent) "urgent: %d" + migration_return_path_end_before(void) "" + migration_return_path_end_after(int rp_error) "%d" + migration_thread_after_loop(void) "" + migration_thread_file_err(void) "" +-migration_thread_ratelimit_pre(int ms) "%d ms" +-migration_thread_ratelimit_post(int urgent) "urgent: %d" + migration_thread_setup_complete(void) "" + open_return_path_on_source(void) "" + open_return_path_on_source_continue(void) "" +-- +1.8.3.1 + diff --git a/SOURCES/kvm-migration-multifd-clean-pages-after-filling-packet.patch b/SOURCES/kvm-migration-multifd-clean-pages-after-filling-packet.patch new file mode 100644 index 0000000..5fa7fde --- /dev/null +++ b/SOURCES/kvm-migration-multifd-clean-pages-after-filling-packet.patch @@ -0,0 +1,65 @@ +From 32ee75b7f4a31d6080e5659e2a0285a046ef1036 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:34 +0000 +Subject: [PATCH 02/18] migration/multifd: clean pages after filling packet + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-2-quintela@redhat.com> +Patchwork-id: 94112 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 01/10] migration/multifd: clean pages after filling packet +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +From: Wei Yang + +This is a preparation for the next patch: + + not use multifd during postcopy. + +Without enabling postcopy, everything looks good. While after enabling +postcopy, migration may fail even not use multifd during postcopy. The +reason is the pages is not properly cleared and *old* target page will +continue to be transferred. + +After clean pages, migration succeeds. + +Signed-off-by: Wei Yang +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit eab54aa78ffd9fb7895b20fc2761ee998479489b) +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 5078f94..65580e3 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -944,10 +944,10 @@ static int multifd_send_pages(RAMState *rs) + } + qemu_mutex_unlock(&p->mutex); + } +- p->pages->used = 0; ++ assert(!p->pages->used); ++ assert(!p->pages->block); + + p->packet_num = multifd_send_state->packet_num++; +- p->pages->block = NULL; + multifd_send_state->pages = p->pages; + p->pages = pages; + transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len; +@@ -1129,6 +1129,8 @@ static void *multifd_send_thread(void *opaque) + p->flags = 0; + p->num_packets++; + p->num_pages += used; ++ p->pages->used = 0; ++ p->pages->block = NULL; + qemu_mutex_unlock(&p->mutex); + + trace_multifd_send(p->id, packet_num, used, flags, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch b/SOURCES/kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch new file mode 100644 index 0000000..0c5fe80 --- /dev/null +++ b/SOURCES/kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch @@ -0,0 +1,77 @@ +From 2c14a6831954a59256cc8d1980da0ad705a3a3fa Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:37 +0000 +Subject: [PATCH 05/18] migration/multifd: fix destroyed mutex access in + terminating multifd threads + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-5-quintela@redhat.com> +Patchwork-id: 94119 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 04/10] migration/multifd: fix destroyed mutex access in terminating multifd threads +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +From: Jiahui Cen + +One multifd will lock all the other multifds' IOChannel mutex to inform them +to quit by setting p->quit or shutting down p->c. In this senario, if some +multifds had already been terminated and multifd_load_cleanup/multifd_save_cleanup +had destroyed their mutex, it could cause destroyed mutex access when trying +lock their mutex. + +Here is the coredump stack: + #0 0x00007f81a2794437 in raise () from /usr/lib64/libc.so.6 + #1 0x00007f81a2795b28 in abort () from /usr/lib64/libc.so.6 + #2 0x00007f81a278d1b6 in __assert_fail_base () from /usr/lib64/libc.so.6 + #3 0x00007f81a278d262 in __assert_fail () from /usr/lib64/libc.so.6 + #4 0x000055eb1bfadbd3 in qemu_mutex_lock_impl (mutex=0x55eb1e2d1988, file=, line=) at util/qemu-thread-posix.c:64 + #5 0x000055eb1bb4564a in multifd_send_terminate_threads (err=) at migration/ram.c:1015 + #6 0x000055eb1bb4bb7f in multifd_send_thread (opaque=0x55eb1e2d19f8) at migration/ram.c:1171 + #7 0x000055eb1bfad628 in qemu_thread_start (args=0x55eb1e170450) at util/qemu-thread-posix.c:502 + #8 0x00007f81a2b36df5 in start_thread () from /usr/lib64/libpthread.so.0 + #9 0x00007f81a286048d in clone () from /usr/lib64/libc.so.6 + +To fix it up, let's destroy the mutex after all the other multifd threads had +been terminated. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 9560a48ecc0c20d87bc458a6db77fba651605819) +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/migration/ram.c b/migration/ram.c +index 860f781..6c55c5d 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1052,6 +1052,10 @@ void multifd_save_cleanup(void) + if (p->running) { + qemu_thread_join(&p->thread); + } ++ } ++ for (i = 0; i < migrate_multifd_channels(); i++) { ++ MultiFDSendParams *p = &multifd_send_state->params[i]; ++ + socket_send_channel_destroy(p->c); + p->c = NULL; + qemu_mutex_destroy(&p->mutex); +@@ -1335,6 +1339,10 @@ int multifd_load_cleanup(Error **errp) + qemu_sem_post(&p->sem_sync); + qemu_thread_join(&p->thread); + } ++ } ++ for (i = 0; i < migrate_multifd_channels(); i++) { ++ MultiFDRecvParams *p = &multifd_recv_state->params[i]; ++ + object_unref(OBJECT(p->c)); + p->c = NULL; + qemu_mutex_destroy(&p->mutex); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch b/SOURCES/kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch new file mode 100644 index 0000000..9e9683c --- /dev/null +++ b/SOURCES/kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch @@ -0,0 +1,75 @@ +From 517a99c5fba163bf684978fe3d9476b619481391 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:42 +0000 +Subject: [PATCH 10/18] migration/multifd: fix nullptr access in + multifd_send_terminate_threads + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-10-quintela@redhat.com> +Patchwork-id: 94117 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 09/10] migration/multifd: fix nullptr access in multifd_send_terminate_threads +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +From: Zhimin Feng + +If the multifd_send_threads is not created when migration is failed, +multifd_save_cleanup would be called twice. In this senario, the +multifd_send_state is accessed after it has been released, the result +is that the source VM is crashing down. + +Here is the coredump stack: + Program received signal SIGSEGV, Segmentation fault. + 0x00005629333a78ef in multifd_send_terminate_threads (err=err@entry=0x0) at migration/ram.c:1012 + 1012 MultiFDSendParams *p = &multifd_send_state->params[i]; + #0 0x00005629333a78ef in multifd_send_terminate_threads (err=err@entry=0x0) at migration/ram.c:1012 + #1 0x00005629333ab8a9 in multifd_save_cleanup () at migration/ram.c:1028 + #2 0x00005629333abaea in multifd_new_send_channel_async (task=0x562935450e70, opaque=) at migration/ram.c:1202 + #3 0x000056293373a562 in qio_task_complete (task=task@entry=0x562935450e70) at io/task.c:196 + #4 0x000056293373a6e0 in qio_task_thread_result (opaque=0x562935450e70) at io/task.c:111 + #5 0x00007f475d4d75a7 in g_idle_dispatch () from /usr/lib64/libglib-2.0.so.0 + #6 0x00007f475d4da9a9 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0 + #7 0x0000562933785b33 in glib_pollfds_poll () at util/main-loop.c:219 + #8 os_host_main_loop_wait (timeout=) at util/main-loop.c:242 + #9 main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:518 + #10 0x00005629334c5acf in main_loop () at vl.c:1810 + #11 0x000056293334d7bb in main (argc=, argv=, envp=) at vl.c:4471 + +If the multifd_send_threads is not created when migration is failed. +In this senario, we don't call multifd_save_cleanup in multifd_new_send_channel_async. + +Signed-off-by: Zhimin Feng +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 9c4d333c092e9c26d38f740ff3616deb42f21681) +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 902c56c..3891eff 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1229,7 +1229,15 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + trace_multifd_new_send_channel_async(p->id); + if (qio_task_propagate_error(task, &local_err)) { + migrate_set_error(migrate_get_current(), local_err); +- multifd_save_cleanup(); ++ /* Error happen, we need to tell who pay attention to me */ ++ qemu_sem_post(&multifd_send_state->channels_ready); ++ qemu_sem_post(&p->sem_sync); ++ /* ++ * Although multifd_send_thread is not created, but main migration ++ * thread neet to judge whether it is running, so we need to mark ++ * its status. ++ */ ++ p->quit = true; + } else { + p->c = QIO_CHANNEL(sioc); + qio_channel_set_delay(p->c, false); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch b/SOURCES/kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch new file mode 100644 index 0000000..e780698 --- /dev/null +++ b/SOURCES/kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch @@ -0,0 +1,68 @@ +From 7f664fe26ff67f8131faa7a81a388b8a5b51403f Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:36 +0000 +Subject: [PATCH 04/18] migration/multifd: fix nullptr access in terminating + multifd threads + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-4-quintela@redhat.com> +Patchwork-id: 94110 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 03/10] migration/multifd: fix nullptr access in terminating multifd threads +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +From: Jiahui Cen + +One multifd channel will shutdown all the other multifd's IOChannel when it +fails to receive an IOChannel. In this senario, if some multifds had not +received its IOChannel yet, it would try to shutdown its IOChannel which could +cause nullptr access at qio_channel_shutdown. + +Here is the coredump stack: + #0 object_get_class (obj=obj@entry=0x0) at qom/object.c:908 + #1 0x00005563fdbb8f4a in qio_channel_shutdown (ioc=0x0, how=QIO_CHANNEL_SHUTDOWN_BOTH, errp=0x0) at io/channel.c:355 + #2 0x00005563fd7b4c5f in multifd_recv_terminate_threads (err=) at migration/ram.c:1280 + #3 0x00005563fd7bc019 in multifd_recv_new_channel (ioc=ioc@entry=0x556400255610, errp=errp@entry=0x7ffec07dce00) at migration/ram.c:1478 + #4 0x00005563fda82177 in migration_ioc_process_incoming (ioc=ioc@entry=0x556400255610, errp=errp@entry=0x7ffec07dce30) at migration/migration.c:605 + #5 0x00005563fda8567d in migration_channel_process_incoming (ioc=0x556400255610) at migration/channel.c:44 + #6 0x00005563fda83ee0 in socket_accept_incoming_migration (listener=0x5563fff6b920, cioc=0x556400255610, opaque=) at migration/socket.c:166 + #7 0x00005563fdbc25cd in qio_net_listener_channel_func (ioc=, condition=, opaque=) at io/net-listener.c:54 + #8 0x00007f895b6fe9a9 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0 + #9 0x00005563fdc18136 in glib_pollfds_poll () at util/main-loop.c:218 + #10 0x00005563fdc181b5 in os_host_main_loop_wait (timeout=1000000000) at util/main-loop.c:241 + #11 0x00005563fdc183a2 in main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:517 + #12 0x00005563fd8edb37 in main_loop () at vl.c:1791 + #13 0x00005563fd74fd45 in main (argc=, argv=, envp=) at vl.c:4473 + +To fix it up, let's check p->c before calling qio_channel_shutdown. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit f76e32eb05041ab001184ab16afb56524adccd0c) +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 8c783b3..860f781 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1307,7 +1307,9 @@ static void multifd_recv_terminate_threads(Error *err) + - normal quit, i.e. everything went fine, just finished + - error quit: We close the channels so the channel threads + finish the qio_channel_read_all_eof() */ +- qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); ++ if (p->c) { ++ qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); ++ } + qemu_mutex_unlock(&p->mutex); + } + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch b/SOURCES/kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch new file mode 100644 index 0000000..c20cb6c --- /dev/null +++ b/SOURCES/kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch @@ -0,0 +1,123 @@ +From 261ee33e0e6711fadd3049e4640bb731ee3d44ff Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 24 Feb 2020 16:57:10 +0000 +Subject: [PATCH 9/9] mirror: Don't let an operation wait for itself + +RH-Author: Kevin Wolf +Message-id: <20200224165710.4830-3-kwolf@redhat.com> +Patchwork-id: 94045 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] mirror: Don't let an operation wait for itself +Bugzilla: 1794692 +RH-Acked-by: John Snow +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +mirror_wait_for_free_in_flight_slot() just picks a random operation to +wait for. However, when mirror_co_read() waits for free slots, its +MirrorOp is already in s->ops_in_flight, so if not enough slots are +immediately available, an operation can end up waiting for itself to +complete, which results in a hang. + +Fix this by passing the current MirrorOp and skipping this operation +when picking an operation to wait for. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1794692 +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +(cherry picked from commit 7e6c4ff792734e196c8ca82564c56b5e7c6288ca) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/mirror.c | 21 ++++++++++++--------- + 1 file changed, 12 insertions(+), 9 deletions(-) + +diff --git a/block/mirror.c b/block/mirror.c +index 8959e42..cacbc70 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -283,11 +283,14 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, + } + + static inline void coroutine_fn +-mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) ++mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active) + { + MirrorOp *op; + + QTAILQ_FOREACH(op, &s->ops_in_flight, next) { ++ if (self == op) { ++ continue; ++ } + /* Do not wait on pseudo ops, because it may in turn wait on + * some other operation to start, which may in fact be the + * caller of this function. Since there is only one pseudo op +@@ -302,10 +305,10 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) + } + + static inline void coroutine_fn +-mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) ++mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s, MirrorOp *self) + { + /* Only non-active operations use up in-flight slots */ +- mirror_wait_for_any_operation(s, false); ++ mirror_wait_for_any_operation(s, self, false); + } + + /* Perform a mirror copy operation. +@@ -348,7 +351,7 @@ static void coroutine_fn mirror_co_read(void *opaque) + + while (s->buf_free_count < nb_chunks) { + trace_mirror_yield_in_flight(s, op->offset, s->in_flight); +- mirror_wait_for_free_in_flight_slot(s); ++ mirror_wait_for_free_in_flight_slot(s, op); + } + + /* Now make a QEMUIOVector taking enough granularity-sized chunks +@@ -555,7 +558,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) + + while (s->in_flight >= MAX_IN_FLIGHT) { + trace_mirror_yield_in_flight(s, offset, s->in_flight); +- mirror_wait_for_free_in_flight_slot(s); ++ mirror_wait_for_free_in_flight_slot(s, pseudo_op); + } + + if (s->ret < 0) { +@@ -609,7 +612,7 @@ static void mirror_free_init(MirrorBlockJob *s) + static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s) + { + while (s->in_flight > 0) { +- mirror_wait_for_free_in_flight_slot(s); ++ mirror_wait_for_free_in_flight_slot(s, NULL); + } + } + +@@ -794,7 +797,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) + if (s->in_flight >= MAX_IN_FLIGHT) { + trace_mirror_yield(s, UINT64_MAX, s->buf_free_count, + s->in_flight); +- mirror_wait_for_free_in_flight_slot(s); ++ mirror_wait_for_free_in_flight_slot(s, NULL); + continue; + } + +@@ -947,7 +950,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + /* Do not start passive operations while there are active + * writes in progress */ + while (s->in_active_write_counter) { +- mirror_wait_for_any_operation(s, true); ++ mirror_wait_for_any_operation(s, NULL, true); + } + + if (s->ret < 0) { +@@ -973,7 +976,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || + (cnt == 0 && s->in_flight > 0)) { + trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); +- mirror_wait_for_free_in_flight_slot(s); ++ mirror_wait_for_free_in_flight_slot(s, NULL); + continue; + } else if (cnt != 0) { + delay_ns = mirror_iteration(s); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-mirror-Store-MirrorOp.co-for-debuggability.patch b/SOURCES/kvm-mirror-Store-MirrorOp.co-for-debuggability.patch new file mode 100644 index 0000000..67f3e54 --- /dev/null +++ b/SOURCES/kvm-mirror-Store-MirrorOp.co-for-debuggability.patch @@ -0,0 +1,51 @@ +From 27fe3b8d42a2c99de01ce20e4b0727079c12da65 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 24 Feb 2020 16:57:09 +0000 +Subject: [PATCH 8/9] mirror: Store MirrorOp.co for debuggability + +RH-Author: Kevin Wolf +Message-id: <20200224165710.4830-2-kwolf@redhat.com> +Patchwork-id: 94044 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] mirror: Store MirrorOp.co for debuggability +Bugzilla: 1794692 +RH-Acked-by: John Snow +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +If a coroutine is launched, but the coroutine pointer isn't stored +anywhere, debugging any problems inside the coroutine is quite hard. +Let's store the coroutine pointer of a mirror operation in MirrorOp to +have it available in the debugger. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +(cherry picked from commit eed325b92c3e68417121ea23f96e33af6a4654ed) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/mirror.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/block/mirror.c b/block/mirror.c +index f0f2d9d..8959e42 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -103,6 +103,7 @@ struct MirrorOp { + bool is_pseudo_op; + bool is_active_write; + CoQueue waiting_requests; ++ Coroutine *co; + + QTAILQ_ENTRY(MirrorOp) next; + }; +@@ -429,6 +430,7 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, + default: + abort(); + } ++ op->co = co; + + QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); + qemu_coroutine_enter(co); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-mirror-Wait-only-for-in-flight-operations.patch b/SOURCES/kvm-mirror-Wait-only-for-in-flight-operations.patch new file mode 100644 index 0000000..a06d30e --- /dev/null +++ b/SOURCES/kvm-mirror-Wait-only-for-in-flight-operations.patch @@ -0,0 +1,95 @@ +From bddf389330e11fb0ce17413c1bfa2264a281ded2 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 30 Mar 2020 11:19:24 +0100 +Subject: [PATCH 4/4] mirror: Wait only for in-flight operations + +RH-Author: Kevin Wolf +Message-id: <20200330111924.22938-3-kwolf@redhat.com> +Patchwork-id: 94463 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] mirror: Wait only for in-flight operations +Bugzilla: 1794692 +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +mirror_wait_for_free_in_flight_slot() just picks a random operation to +wait for. However, a MirrorOp is already in s->ops_in_flight when +mirror_co_read() waits for free slots, so if not enough slots are +immediately available, an operation can end up waiting for itself, or +two or more operations can wait for each other to complete, which +results in a hang. + +Fix this by adding a flag to MirrorOp that tells us if the request is +already in flight (and therefore occupies slots that it will later +free), and picking only such operations for waiting. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1794692 +Signed-off-by: Kevin Wolf +Message-Id: <20200326153628.4869-3-kwolf@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit ce8cabbd17cf738ddfc68384440c38e5dd2fdf97) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/mirror.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/block/mirror.c b/block/mirror.c +index 8959e42..5e5a521 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -102,6 +102,7 @@ struct MirrorOp { + + bool is_pseudo_op; + bool is_active_write; ++ bool is_in_flight; + CoQueue waiting_requests; + Coroutine *co; + +@@ -293,7 +294,9 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) + * caller of this function. Since there is only one pseudo op + * at any given time, we will always find some real operation + * to wait on. */ +- if (!op->is_pseudo_op && op->is_active_write == active) { ++ if (!op->is_pseudo_op && op->is_in_flight && ++ op->is_active_write == active) ++ { + qemu_co_queue_wait(&op->waiting_requests, NULL); + return; + } +@@ -367,6 +370,7 @@ static void coroutine_fn mirror_co_read(void *opaque) + /* Copy the dirty cluster. */ + s->in_flight++; + s->bytes_in_flight += op->bytes; ++ op->is_in_flight = true; + trace_mirror_one_iteration(s, op->offset, op->bytes); + + ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes, +@@ -382,6 +386,7 @@ static void coroutine_fn mirror_co_zero(void *opaque) + op->s->in_flight++; + op->s->bytes_in_flight += op->bytes; + *op->bytes_handled = op->bytes; ++ op->is_in_flight = true; + + ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes, + op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0); +@@ -396,6 +401,7 @@ static void coroutine_fn mirror_co_discard(void *opaque) + op->s->in_flight++; + op->s->bytes_in_flight += op->bytes; + *op->bytes_handled = op->bytes; ++ op->is_in_flight = true; + + ret = blk_co_pdiscard(op->s->target, op->offset, op->bytes); + mirror_write_complete(op, ret); +@@ -1306,6 +1312,7 @@ static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s, + .offset = offset, + .bytes = bytes, + .is_active_write = true, ++ .is_in_flight = true, + }; + qemu_co_queue_init(&op->waiting_requests); + QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch b/SOURCES/kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch new file mode 100644 index 0000000..bca0b4c --- /dev/null +++ b/SOURCES/kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch @@ -0,0 +1,74 @@ +From 78c7fb5afcb298631df47f6b71cf764f921c15f4 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:38 +0000 +Subject: [PATCH 06/18] multifd: Make sure that we don't do any IO after an + error + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-6-quintela@redhat.com> +Patchwork-id: 94118 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 05/10] multifd: Make sure that we don't do any IO after an error +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 3d4095b222d97393b1c2c6e514951ec7798f1c43) +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 22 +++++++++++++--------- + 1 file changed, 13 insertions(+), 9 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 6c55c5d..a0257ee 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -3440,7 +3440,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + { + RAMState **temp = opaque; + RAMState *rs = *temp; +- int ret; ++ int ret = 0; + int i; + int64_t t0; + int done = 0; +@@ -3511,12 +3511,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + ram_control_after_iterate(f, RAM_CONTROL_ROUND); + + out: +- multifd_send_sync_main(rs); +- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); +- qemu_fflush(f); +- ram_counters.transferred += 8; ++ if (ret >= 0) { ++ multifd_send_sync_main(rs); ++ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); ++ qemu_fflush(f); ++ ram_counters.transferred += 8; + +- ret = qemu_file_get_error(f); ++ ret = qemu_file_get_error(f); ++ } + if (ret < 0) { + return ret; + } +@@ -3568,9 +3570,11 @@ static int ram_save_complete(QEMUFile *f, void *opaque) + ram_control_after_iterate(f, RAM_CONTROL_FINISH); + } + +- multifd_send_sync_main(rs); +- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); +- qemu_fflush(f); ++ if (ret >= 0) { ++ multifd_send_sync_main(rs); ++ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); ++ qemu_fflush(f); ++ } + + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch b/SOURCES/kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch new file mode 100644 index 0000000..2dbdb16 --- /dev/null +++ b/SOURCES/kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch @@ -0,0 +1,107 @@ +From 22fc9bd7e7ae0b72c6f6e483eb66cf996f519766 Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Tue, 21 Jan 2020 05:16:11 +0000 +Subject: [PATCH 01/15] ppc: Deassert the external interrupt pin in KVM on + reset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: David Gibson +Message-id: <20200121051613.388295-2-dgibson@redhat.com> +Patchwork-id: 93429 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 1/3] ppc: Deassert the external interrupt pin in KVM on reset +Bugzilla: 1776638 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth + +From: Greg Kurz + +When a CPU is reset, QEMU makes sure no interrupt is pending by clearing +CPUPPCstate::pending_interrupts in ppc_cpu_reset(). In the case of a +complete machine emulation, eg. a sPAPR machine, an external interrupt +request could still be pending in KVM though, eg. an IPI. It will be +eventually presented to the guest, which is supposed to acknowledge it at +the interrupt controller. If the interrupt controller is emulated in QEMU, +either XICS or XIVE, ppc_set_irq() won't deassert the external interrupt +pin in KVM since it isn't pending anymore for QEMU. When the vCPU re-enters +the guest, the interrupt request is still pending and the vCPU will try +again to acknowledge it. This causes an infinite loop and eventually hangs +the guest. + +The code has been broken since the beginning. The issue wasn't hit before +because accel=kvm,kernel-irqchip=off is an awkward setup that never got +used until recently with the LC92x IBM systems (aka, Boston). + +Add a ppc_irq_reset() function to do the necessary cleanup, ie. deassert +the IRQ pins of the CPU in QEMU and most importantly the external interrupt +pin for this vCPU in KVM. + +Reported-by: Satheesh Rajendran +Signed-off-by: Greg Kurz +Message-Id: <157548861740.3650476.16879693165328764758.stgit@bahia.lan> +Signed-off-by: David Gibson +(cherry picked from commit 401774387aeb37f2ada9bb18f7c7e307b21a3e93) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1776638 + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/ppc.c | 8 ++++++++ + include/hw/ppc/ppc.h | 2 ++ + target/ppc/translate_init.inc.c | 1 + + 3 files changed, 11 insertions(+) + +diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c +index 52a18eb..d554b64 100644 +--- a/hw/ppc/ppc.c ++++ b/hw/ppc/ppc.c +@@ -1510,3 +1510,11 @@ PowerPCCPU *ppc_get_vcpu_by_pir(int pir) + + return NULL; + } ++ ++void ppc_irq_reset(PowerPCCPU *cpu) ++{ ++ CPUPPCState *env = &cpu->env; ++ ++ env->irq_input_state = 0; ++ kvmppc_set_interrupt(cpu, PPC_INTERRUPT_EXT, 0); ++} +diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h +index 4bdcb8b..5dd7531 100644 +--- a/include/hw/ppc/ppc.h ++++ b/include/hw/ppc/ppc.h +@@ -76,6 +76,7 @@ static inline void ppc970_irq_init(PowerPCCPU *cpu) {} + static inline void ppcPOWER7_irq_init(PowerPCCPU *cpu) {} + static inline void ppcPOWER9_irq_init(PowerPCCPU *cpu) {} + static inline void ppce500_irq_init(PowerPCCPU *cpu) {} ++static inline void ppc_irq_reset(PowerPCCPU *cpu) {} + #else + void ppc40x_irq_init(PowerPCCPU *cpu); + void ppce500_irq_init(PowerPCCPU *cpu); +@@ -83,6 +84,7 @@ void ppc6xx_irq_init(PowerPCCPU *cpu); + void ppc970_irq_init(PowerPCCPU *cpu); + void ppcPOWER7_irq_init(PowerPCCPU *cpu); + void ppcPOWER9_irq_init(PowerPCCPU *cpu); ++void ppc_irq_reset(PowerPCCPU *cpu); + #endif + + /* PPC machines for OpenBIOS */ +diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c +index ba726de..64a8380 100644 +--- a/target/ppc/translate_init.inc.c ++++ b/target/ppc/translate_init.inc.c +@@ -10461,6 +10461,7 @@ static void ppc_cpu_reset(CPUState *s) + env->pending_interrupts = 0; + s->exception_index = POWERPC_EXCP_NONE; + env->error_code = 0; ++ ppc_irq_reset(cpu); + + /* tininess for underflow is detected before rounding */ + set_float_detect_tininess(float_tininess_before_rounding, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch b/SOURCES/kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch new file mode 100644 index 0000000..457d149 --- /dev/null +++ b/SOURCES/kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch @@ -0,0 +1,112 @@ +From f2f57c1ed926384e074d2048cdbdc30ee2f426eb Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Tue, 21 Jan 2020 05:16:13 +0000 +Subject: [PATCH 03/15] ppc: Don't use CPUPPCState::irq_input_state with modern + Book3s CPU models +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: David Gibson +Message-id: <20200121051613.388295-4-dgibson@redhat.com> +Patchwork-id: 93431 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 3/3] ppc: Don't use CPUPPCState::irq_input_state with modern Book3s CPU models +Bugzilla: 1776638 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth + +From: Greg Kurz + +The power7_set_irq() and power9_set_irq() functions set this but it is +never used actually. Modern Book3s compatible CPUs are only supported +by the pnv and spapr machines. They have an interrupt controller, XICS +for POWER7/8 and XIVE for POWER9, whose models don't require to track +IRQ input states at the CPU level. + +Drop these lines to avoid confusion. + +Signed-off-by: Greg Kurz +Message-Id: <157548862861.3650476.16622818876928044450.stgit@bahia.lan> +Signed-off-by: David Gibson +(cherry picked from commit c1ad0b892ce20cf2b5e619c79e8a0c4c66b235dc) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1776638 + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/ppc.c | 16 ++-------------- + target/ppc/cpu.h | 4 +++- + 2 files changed, 5 insertions(+), 15 deletions(-) + +diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c +index d554b64..730a41f 100644 +--- a/hw/ppc/ppc.c ++++ b/hw/ppc/ppc.c +@@ -275,10 +275,9 @@ void ppc970_irq_init(PowerPCCPU *cpu) + static void power7_set_irq(void *opaque, int pin, int level) + { + PowerPCCPU *cpu = opaque; +- CPUPPCState *env = &cpu->env; + + LOG_IRQ("%s: env %p pin %d level %d\n", __func__, +- env, pin, level); ++ &cpu->env, pin, level); + + switch (pin) { + case POWER7_INPUT_INT: +@@ -292,11 +291,6 @@ static void power7_set_irq(void *opaque, int pin, int level) + LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); + return; + } +- if (level) { +- env->irq_input_state |= 1 << pin; +- } else { +- env->irq_input_state &= ~(1 << pin); +- } + } + + void ppcPOWER7_irq_init(PowerPCCPU *cpu) +@@ -311,10 +305,9 @@ void ppcPOWER7_irq_init(PowerPCCPU *cpu) + static void power9_set_irq(void *opaque, int pin, int level) + { + PowerPCCPU *cpu = opaque; +- CPUPPCState *env = &cpu->env; + + LOG_IRQ("%s: env %p pin %d level %d\n", __func__, +- env, pin, level); ++ &cpu->env, pin, level); + + switch (pin) { + case POWER9_INPUT_INT: +@@ -334,11 +327,6 @@ static void power9_set_irq(void *opaque, int pin, int level) + LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); + return; + } +- if (level) { +- env->irq_input_state |= 1 << pin; +- } else { +- env->irq_input_state &= ~(1 << pin); +- } + } + + void ppcPOWER9_irq_init(PowerPCCPU *cpu) +diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h +index 5c53801..8887f76 100644 +--- a/target/ppc/cpu.h ++++ b/target/ppc/cpu.h +@@ -1090,7 +1090,9 @@ struct CPUPPCState { + #if !defined(CONFIG_USER_ONLY) + /* + * This is the IRQ controller, which is implementation dependent +- * and only relevant when emulating a complete machine. ++ * and only relevant when emulating a complete machine. Note that ++ * this isn't used by recent Book3s compatible CPUs (POWER7 and ++ * newer). + */ + uint32_t irq_input_state; + void **irq_inputs; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch b/SOURCES/kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch new file mode 100644 index 0000000..9c25b76 --- /dev/null +++ b/SOURCES/kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch @@ -0,0 +1,64 @@ +From 428eb7260718b69b1f3f421d03bce10b8785fc49 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:39 +0000 +Subject: [PATCH 19/20] qapi: Add '@allow-write-only-overlay' feature for + 'blockdev-snapshot' + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-14-kwolf@redhat.com> +Patchwork-id: 94290 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 13/13] qapi: Add '@allow-write-only-overlay' feature for 'blockdev-snapshot' +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +From: Peter Krempa + +Anounce that 'blockdev-snapshot' command's permissions allow changing +of the backing file if the 'consistent_read' permission is not required. + +This is useful for libvirt to allow late opening of the backing chain +during a blockdev-mirror. + +Signed-off-by: Peter Krempa +Signed-off-by: Kevin Wolf +Message-Id: <20200310113831.27293-8-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit c6bdc312f30d5c7326aa2fdca3e0f98c15eb541a) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + qapi/block-core.json | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/qapi/block-core.json b/qapi/block-core.json +index a1e85b0..a64ad81 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -1541,6 +1541,12 @@ + # + # For the arguments, see the documentation of BlockdevSnapshot. + # ++# Features: ++# @allow-write-only-overlay: If present, the check whether this operation is safe ++# was relaxed so that it can be used to change ++# backing file of a destination of a blockdev-mirror. ++# (since 5.0) ++# + # Since: 2.5 + # + # Example: +@@ -1561,7 +1567,8 @@ + # + ## + { 'command': 'blockdev-snapshot', +- 'data': 'BlockdevSnapshot' } ++ 'data': 'BlockdevSnapshot', ++ 'features': [ 'allow-write-only-overlay' ] } + + ## + # @change-backing-file: +-- +1.8.3.1 + diff --git a/SOURCES/kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch b/SOURCES/kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch new file mode 100644 index 0000000..1a7ace5 --- /dev/null +++ b/SOURCES/kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch @@ -0,0 +1,52 @@ +From ecc4fb6e1941035e1d9def1f69b779fbea216caf Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 24 Feb 2020 16:13:07 +0000 +Subject: [PATCH 7/9] qcow2: Fix qcow2_alloc_cluster_abort() for external data + file + +RH-Author: Kevin Wolf +Message-id: <20200224161307.29783-2-kwolf@redhat.com> +Patchwork-id: 94042 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] qcow2: Fix qcow2_alloc_cluster_abort() for external data file +Bugzilla: 1703907 +RH-Acked-by: John Snow +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +For external data file, cluster allocations return an offset in the data +file and are not refcounted. In this case, there is nothing to do for +qcow2_alloc_cluster_abort(). Freeing the same offset in the qcow2 file +is wrong and causes crashes in the better case or image corruption in +the worse case. + +Signed-off-by: Kevin Wolf +Message-Id: <20200211094900.17315-3-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit c3b6658c1a5a3fb24d6c27b2594cf86146f75b22) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/qcow2-cluster.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c +index 8982b7b..dc3c270 100644 +--- a/block/qcow2-cluster.c ++++ b/block/qcow2-cluster.c +@@ -1015,8 +1015,11 @@ err: + void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) + { + BDRVQcow2State *s = bs->opaque; +- qcow2_free_clusters(bs, m->alloc_offset, m->nb_clusters << s->cluster_bits, +- QCOW2_DISCARD_NEVER); ++ if (!has_data_file(bs)) { ++ qcow2_free_clusters(bs, m->alloc_offset, ++ m->nb_clusters << s->cluster_bits, ++ QCOW2_DISCARD_NEVER); ++ } + } + + /* +-- +1.8.3.1 + diff --git a/SOURCES/kvm-qemu-file-Don-t-do-IO-after-shutdown.patch b/SOURCES/kvm-qemu-file-Don-t-do-IO-after-shutdown.patch new file mode 100644 index 0000000..88a6e31 --- /dev/null +++ b/SOURCES/kvm-qemu-file-Don-t-do-IO-after-shutdown.patch @@ -0,0 +1,92 @@ +From d84814e298e3b05fb5bc61cc8e641a5e104d32d5 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:39 +0000 +Subject: [PATCH 07/18] qemu-file: Don't do IO after shutdown + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-7-quintela@redhat.com> +Patchwork-id: 94116 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 06/10] qemu-file: Don't do IO after shutdown +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +Be sure that we are not doing neither read/write after shutdown of the +QEMUFile. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit a555b8092abc6f1bbe4b64c516679cbd68fcfbd8) +Signed-off-by: Danilo C. L. de Paula +--- + migration/qemu-file.c | 22 +++++++++++++++++++++- + 1 file changed, 21 insertions(+), 1 deletion(-) + +diff --git a/migration/qemu-file.c b/migration/qemu-file.c +index 26fb25d..bbb2b63 100644 +--- a/migration/qemu-file.c ++++ b/migration/qemu-file.c +@@ -53,6 +53,8 @@ struct QEMUFile { + + int last_error; + Error *last_error_obj; ++ /* has the file has been shutdown */ ++ bool shutdown; + }; + + /* +@@ -61,10 +63,18 @@ struct QEMUFile { + */ + int qemu_file_shutdown(QEMUFile *f) + { ++ int ret; ++ ++ f->shutdown = true; + if (!f->ops->shut_down) { + return -ENOSYS; + } +- return f->ops->shut_down(f->opaque, true, true, NULL); ++ ret = f->ops->shut_down(f->opaque, true, true, NULL); ++ ++ if (!f->last_error) { ++ qemu_file_set_error(f, -EIO); ++ } ++ return ret; + } + + /* +@@ -214,6 +224,9 @@ void qemu_fflush(QEMUFile *f) + return; + } + ++ if (f->shutdown) { ++ return; ++ } + if (f->iovcnt > 0) { + expect = iov_size(f->iov, f->iovcnt); + ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos, +@@ -328,6 +341,10 @@ static ssize_t qemu_fill_buffer(QEMUFile *f) + f->buf_index = 0; + f->buf_size = pending; + ++ if (f->shutdown) { ++ return 0; ++ } ++ + len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos, + IO_BUF_SIZE - pending, &local_error); + if (len > 0) { +@@ -642,6 +659,9 @@ int64_t qemu_ftell(QEMUFile *f) + + int qemu_file_rate_limit(QEMUFile *f) + { ++ if (f->shutdown) { ++ return 1; ++ } + if (qemu_file_get_error(f)) { + return 1; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-replication-assert-we-own-context-before-job_cancel_.patch b/SOURCES/kvm-replication-assert-we-own-context-before-job_cancel_.patch new file mode 100644 index 0000000..09ef4de --- /dev/null +++ b/SOURCES/kvm-replication-assert-we-own-context-before-job_cancel_.patch @@ -0,0 +1,57 @@ +From 46887feac666d0d7633ff3f5af5721fe2a80a8ab Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 8 Apr 2020 17:29:13 +0100 +Subject: [PATCH 2/6] replication: assert we own context before job_cancel_sync + +RH-Author: Kevin Wolf +Message-id: <20200408172917.18712-3-kwolf@redhat.com> +Patchwork-id: 94595 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/6] replication: assert we own context before job_cancel_sync +Bugzilla: 1817621 +RH-Acked-by: Eric Blake +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +From: Stefan Reiter + +job_cancel_sync requires the job's lock to be held, all other callers +already do this (replication_stop, drive_backup_abort, +blockdev_backup_abort, job_cancel_sync_all, cancel_common). + +In this case we're in a BlockDriver handler, so we already have a lock, +just assert that it is the same as the one used for the commit_job. + +Signed-off-by: Stefan Reiter +Message-Id: <20200407115651.69472-3-s.reiter@proxmox.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 08558e33257ec796594bd411261028a93414a70c) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/replication.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/block/replication.c b/block/replication.c +index 99532ce..0ce27ee 100644 +--- a/block/replication.c ++++ b/block/replication.c +@@ -144,12 +144,15 @@ fail: + static void replication_close(BlockDriverState *bs) + { + BDRVReplicationState *s = bs->opaque; ++ Job *commit_job; + + if (s->stage == BLOCK_REPLICATION_RUNNING) { + replication_stop(s->rs, false, NULL); + } + if (s->stage == BLOCK_REPLICATION_FAILOVER) { +- job_cancel_sync(&s->commit_job->job); ++ commit_job = &s->commit_job->job; ++ assert(commit_job->aio_context == qemu_get_current_aio_context()); ++ job_cancel_sync(commit_job); + } + + if (s->mode == REPLICATION_MODE_SECONDARY) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-s390x.conf b/SOURCES/kvm-s390x.conf new file mode 100644 index 0000000..d82b818 --- /dev/null +++ b/SOURCES/kvm-s390x.conf @@ -0,0 +1,19 @@ +# User changes in this file are preserved across upgrades. +# +# Setting "modprobe kvm nested=1" only enables Nested Virtualization until +# the next reboot or module reload. Uncomment the option below to enable +# the feature permanently. +# +#options kvm nested=1 +# +# +# Setting "modprobe kvm hpage=1" only enables Huge Page Backing (1MB) +# support until the next reboot or module reload. Uncomment the option +# below to enable the feature permanently. +# +# Note: - Incompatible with "nested=1". Loading the module will fail. +# - Dirty page logging will be performed on a 1MB (not 4KB) basis, +# which can result in a lot of data having to be transferred during +# migration, and therefore taking very long to converge. +# +#options kvm hpage=1 diff --git a/SOURCES/kvm-setup b/SOURCES/kvm-setup new file mode 100644 index 0000000..3bfedf6 --- /dev/null +++ b/SOURCES/kvm-setup @@ -0,0 +1,49 @@ +#! /bin/bash + +kvm_setup_powerpc () { + if grep '^platform[[:space:]]*:[[:space:]]*PowerNV' /proc/cpuinfo > /dev/null; then + # PowerNV platform, which is KVM HV capable + + if [ -z "$SUBCORES" ]; then + SUBCORES=1 + fi + + # Step 1. Load the KVM HVmodule + if ! modprobe -b kvm_hv; then + return + fi + + # On POWER8 a host core can only run threads of a single + # guest, meaning that SMT must be disabled on the host in + # order to run KVM guests. (Also applieds to POWER7, but we + # don't support that). + # + # POWER9 doesn't have this limitation (though it will for hash + # guests on radix host when that's implemented). So, only set + # up subcores and disable SMT for POWER*. + if grep '^cpu[[:space:]]*:[[:space:]]*POWER8' /proc/cpuinfo > /dev/null; then + # Step 2. Configure subcore mode + /usr/sbin/ppc64_cpu --subcores-per-core=$SUBCORES + + # Step 3. Disable SMT (multithreading) + /usr/sbin/ppc64_cpu --smt=off + fi + fi +} + +kvm_setup_s390x () { + if grep -q "^features.*sie" /proc/cpuinfo; then + modprobe kvm + fi +} + +case $(uname -m) in + ppc64|ppc64le) + kvm_setup_powerpc + ;; + s390x) + kvm_setup_s390x + ;; +esac + +exit 0 diff --git a/SOURCES/kvm-setup.service b/SOURCES/kvm-setup.service new file mode 100644 index 0000000..9c4bf97 --- /dev/null +++ b/SOURCES/kvm-setup.service @@ -0,0 +1,14 @@ +[Unit] +Description=Perform system configuration to prepare system to run KVM guests +# Offlining CPUs can cause irqbalance to throw warnings if it's running +Before=irqbalance.service +# libvirtd reads CPU topology at startup, so change it before +Before=libvirtd.service + +[Service] +Type=oneshot +EnvironmentFile=-/etc/sysconfig/kvm +ExecStart=/usr/lib/systemd/kvm-setup + +[Install] +WantedBy=multi-user.target diff --git a/SOURCES/kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch b/SOURCES/kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch new file mode 100644 index 0000000..6d8dfe1 --- /dev/null +++ b/SOURCES/kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch @@ -0,0 +1,77 @@ +From 0f659af4870f151e25a7d2184b9a383bff58e3ba Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 17 Jan 2020 12:07:57 +0100 +Subject: [PATCH 2/4] slirp: use correct size while emulating IRC commands +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20200117120758.1076549-3-marcandre.lureau@redhat.com> +Patchwork-id: 93400 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm + RHEL-AV-8.2.0 qemu-kvm PATCH 2/3] slirp: use correct size while emulating IRC commands +Bugzilla: 1791568 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi + +From: Prasad J Pandit + +While emulating IRC DCC commands, tcp_emu() uses 'mbuf' size +'m->m_size' to write DCC commands via snprintf(3). This may +lead to OOB write access, because 'bptr' points somewhere in +the middle of 'mbuf' buffer, not at the start. Use M_FREEROOM(m) +size to avoid OOB access. + +Reported-by: Vishnu Dev TJ +Signed-off-by: Prasad J Pandit +Reviewed-by: Samuel Thibault +Message-Id: <20200109094228.79764-2-ppandit@redhat.com> + +(cherry picked from libslirp commit ce131029d6d4a405cb7d3ac6716d03e58fb4a5d9) +Signed-off-by: Marc-André Lureau + +Signed-off-by: Miroslav Rezanina +--- + slirp/src/tcp_subr.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c +index cbecd64..cedbfb2 100644 +--- a/slirp/src/tcp_subr.c ++++ b/slirp/src/tcp_subr.c +@@ -778,7 +778,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, m->m_size, "DCC CHAT chat %lu %u%c\n", ++ m->m_len += snprintf(bptr, M_FREEROOM(m), ++ "DCC CHAT chat %lu %u%c\n", + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), 1); + } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, +@@ -788,8 +789,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += +- snprintf(bptr, m->m_size, "DCC SEND %s %lu %u %u%c\n", buff, ++ m->m_len += snprintf(bptr, M_FREEROOM(m), ++ "DCC SEND %s %lu %u %u%c\n", buff, + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), n1, 1); + } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, +@@ -799,8 +800,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += +- snprintf(bptr, m->m_size, "DCC MOVE %s %lu %u %u%c\n", buff, ++ m->m_len += snprintf(bptr, M_FREEROOM(m), ++ "DCC MOVE %s %lu %u %u%c\n", buff, + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), n1, 1); + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-slirp-use-correct-size-while-emulating-commands.patch b/SOURCES/kvm-slirp-use-correct-size-while-emulating-commands.patch new file mode 100644 index 0000000..fe42f4f --- /dev/null +++ b/SOURCES/kvm-slirp-use-correct-size-while-emulating-commands.patch @@ -0,0 +1,71 @@ +From dfbfcf02738640ab83f7970e636b72b78f166675 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 17 Jan 2020 12:07:58 +0100 +Subject: [PATCH 3/4] slirp: use correct size while emulating commands +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20200117120758.1076549-4-marcandre.lureau@redhat.com> +Patchwork-id: 93401 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm + RHEL-AV-8.2.0 qemu-kvm PATCH 3/3] slirp: use correct size while emulating commands +Bugzilla: 1791568 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi + +From: Prasad J Pandit + +While emulating services in tcp_emu(), it uses 'mbuf' size +'m->m_size' to write commands via snprintf(3). Use M_FREEROOM(m) +size to avoid possible OOB access. + +Signed-off-by: Prasad J Pandit +Signed-off-by: Samuel Thibault +Message-Id: <20200109094228.79764-3-ppandit@redhat.com> + +(cherry picked from commit 82ebe9c370a0e2970fb5695aa19aa5214a6a1c80) +Signed-off-by: Marc-André Lureau +Signed-off-by: Miroslav Rezanina +--- + slirp/src/tcp_subr.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c +index cedbfb2..954d1a6 100644 +--- a/slirp/src/tcp_subr.c ++++ b/slirp/src/tcp_subr.c +@@ -696,7 +696,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) + n4 = (laddr & 0xff); + + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, m->m_size - m->m_len, ++ m->m_len += snprintf(bptr, M_FREEROOM(m), + "ORT %d,%d,%d,%d,%d,%d\r\n%s", n1, n2, n3, n4, + n5, n6, x == 7 ? buff : ""); + return 1; +@@ -731,8 +731,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) + n4 = (laddr & 0xff); + + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += +- snprintf(bptr, m->m_size - m->m_len, ++ m->m_len += snprintf(bptr, M_FREEROOM(m), + "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", + n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); + +@@ -758,8 +757,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) + if (m->m_data[m->m_len - 1] == '\0' && lport != 0 && + (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, + htons(lport), SS_FACCEPTONCE)) != NULL) +- m->m_len = +- snprintf(m->m_data, m->m_size, "%d", ntohs(so->so_fport)) + 1; ++ m->m_len = snprintf(m->m_data, M_ROOM(m), ++ "%d", ntohs(so->so_fport)) + 1; + return 1; + + case EMU_IRC: +-- +1.8.3.1 + diff --git a/SOURCES/kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch b/SOURCES/kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch new file mode 100644 index 0000000..d934712 --- /dev/null +++ b/SOURCES/kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch @@ -0,0 +1,113 @@ +From f2aeed761d2dad14920fa08c977dc45564886d9b Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Fri, 3 Jan 2020 01:15:12 +0000 +Subject: [PATCH 1/5] spapr: Don't trigger a CAS reboot for XICS/XIVE mode + changeover +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: David Gibson +Message-id: <20200103011512.49129-2-dgibson@redhat.com> +Patchwork-id: 93261 +O-Subject: [RHEL-AV-4.2 qemu-kvm PATCH 1/1] spapr: Don't trigger a CAS reboot for XICS/XIVE mode changeover +Bugzilla: 1733893 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth +RH-Acked-by: Philippe Mathieu-Daudé + +From: David Gibson + +PAPR allows the interrupt controller used on a POWER9 machine (XICS or +XIVE) to be selected by the guest operating system, by using the +ibm,client-architecture-support (CAS) feature negotiation call. + +Currently, if the guest selects an interrupt controller different from the +one selected at initial boot, this causes the system to be reset with the +new model and the boot starts again. This means we run through the SLOF +boot process twice, as well as any other bootloader (e.g. grub) in use +before the OS calls CAS. This can be confusing and/or inconvenient for +users. + +Thanks to two fairly recent changes, we no longer need this reboot. 1) we +now completely regenerate the device tree when CAS is called (meaning we +don't need special case updates for all the device tree changes caused by +the interrupt controller mode change), 2) we now have explicit code paths +to activate and deactivate the different interrupt controllers, rather than +just implicitly calling those at machine reset time. + +We can therefore eliminate the reboot for changing irq mode, simply by +putting a call to spapr_irq_update_active_intc() before we call +spapr_h_cas_compose_response() (which gives the updated device tree to +the guest firmware and OS). + +Signed-off-by: David Gibson +Reviewed-by: Cedric Le Goater +Reviewed-by: Greg Kurz +(cherry picked from commit 8deb8019d696c75e6ecaee7545026b62aba2f1bb) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1733893 + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr_hcall.c | 33 +++++++++++++-------------------- + 1 file changed, 13 insertions(+), 20 deletions(-) + +diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c +index 140f05c..05a7ca2 100644 +--- a/hw/ppc/spapr_hcall.c ++++ b/hw/ppc/spapr_hcall.c +@@ -1767,21 +1767,10 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, + } + spapr->cas_pre_isa3_guest = !spapr_ovec_test(ov1_guest, OV1_PPC_3_00); + spapr_ovec_cleanup(ov1_guest); +- if (!spapr->cas_reboot) { +- /* If spapr_machine_reset() did not set up a HPT but one is necessary +- * (because the guest isn't going to use radix) then set it up here. */ +- if ((spapr->patb_entry & PATE1_GR) && !guest_radix) { +- /* legacy hash or new hash: */ +- spapr_setup_hpt_and_vrma(spapr); +- } +- spapr->cas_reboot = +- (spapr_h_cas_compose_response(spapr, args[1], args[2], +- ov5_updates) != 0); +- } + + /* +- * Ensure the guest asks for an interrupt mode we support; otherwise +- * terminate the boot. ++ * Ensure the guest asks for an interrupt mode we support; ++ * otherwise terminate the boot. + */ + if (guest_xive) { + if (!spapr->irq->xive) { +@@ -1797,14 +1786,18 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, + } + } + +- /* +- * Generate a machine reset when we have an update of the +- * interrupt mode. Only required when the machine supports both +- * modes. +- */ ++ spapr_irq_update_active_intc(spapr); ++ + if (!spapr->cas_reboot) { +- spapr->cas_reboot = spapr_ovec_test(ov5_updates, OV5_XIVE_EXPLOIT) +- && spapr->irq->xics && spapr->irq->xive; ++ /* If spapr_machine_reset() did not set up a HPT but one is necessary ++ * (because the guest isn't going to use radix) then set it up here. */ ++ if ((spapr->patb_entry & PATE1_GR) && !guest_radix) { ++ /* legacy hash or new hash: */ ++ spapr_setup_hpt_and_vrma(spapr); ++ } ++ spapr->cas_reboot = ++ (spapr_h_cas_compose_response(spapr, args[1], args[2], ++ ov5_updates) != 0); + } + + spapr_ovec_cleanup(ov5_updates); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch b/SOURCES/kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch new file mode 100644 index 0000000..0aa782b --- /dev/null +++ b/SOURCES/kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch @@ -0,0 +1,135 @@ +From eb121ffa97c1c25d7853d51b4c8209c0bb521deb Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Fri, 7 Feb 2020 00:57:04 +0000 +Subject: [PATCH 1/7] spapr: Enable DD2.3 accelerated count cache flush in + pseries-5.0 machine + +RH-Author: David Gibson +Message-id: <20200207005704.194428-1-dgibson@redhat.com> +Patchwork-id: 93737 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCHv2] spapr: Enable DD2.3 accelerated count cache flush in pseries-5.0 machine +Bugzilla: 1796240 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth + +From: David Gibson + +For POWER9 DD2.2 cpus, the best current Spectre v2 indirect branch +mitigation is "count cache disabled", which is configured with: + -machine cap-ibs=fixed-ccd +However, this option isn't available on DD2.3 CPUs with KVM, because they +don't have the count cache disabled. + +For POWER9 DD2.3 cpus, it is "count cache flush with assist", configured +with: + -machine cap-ibs=workaround,cap-ccf-assist=on +However this option isn't available on DD2.2 CPUs with KVM, because they +don't have the special CCF assist instruction this relies on. + +On current machine types, we default to "count cache flush w/o assist", +that is: + -machine cap-ibs=workaround,cap-ccf-assist=off +This runs, with mitigation on both DD2.2 and DD2.3 host cpus, but has a +fairly significant performance impact. + +It turns out we can do better. The special instruction that CCF assist +uses to trigger a count cache flush is a no-op on earlier CPUs, rather than +trapping or causing other badness. It doesn't, of itself, implement the +mitigation, but *if* we have count-cache-disabled, then the count cache +flush is unnecessary, and so using the count cache flush mitigation is +harmless. + +Therefore for the new pseries-5.0 machine type, enable cap-ccf-assist by +default. Along with that, suppress throwing an error if cap-ccf-assist +is selected but KVM doesn't support it, as long as KVM *is* giving us +count-cache-disabled. To allow TCG to work out of the box, even though it +doesn't implement the ccf flush assist, downgrade the error in that case to +a warning. This matches several Spectre mitigations where we allow TCG +to operate for debugging, since we don't really make guarantees about TCG +security properties anyway. + +While we're there, make the TCG warning for this case match that for other +mitigations. + +Signed-off-by: David Gibson +Tested-by: Michael Ellerman +(cherry picked from commit 37965dfe4dffa3ac49438337417608e7f346b58a) +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + hw/ppc/spapr.c + +Adjusted machine version compatibility code to the RHEL machine types +rather than the upstream machine types. + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1796240 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=26285002 +Branch: rhel-av-8.2.0 +Upstream: Merged for qemu-5.0 + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 4 +++- + hw/ppc/spapr_caps.c | 21 +++++++++++++++++---- + 2 files changed, 20 insertions(+), 5 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index c12862d..a330f03 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -4440,7 +4440,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) + smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */ + smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF; + smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON; +- smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; ++ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON; + spapr_caps_add_properties(smc, &error_abort); + smc->irq = &spapr_irq_dual; + smc->dr_phb_enabled = true; +@@ -4904,6 +4904,8 @@ static void spapr_machine_rhel810_class_options(MachineClass *mc) + hw_compat_rhel_8_1_len); + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + ++ /* from pseries-4.2 */ ++ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; + } + + DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", false); +diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c +index 805f385..6e6fb28 100644 +--- a/hw/ppc/spapr_caps.c ++++ b/hw/ppc/spapr_caps.c +@@ -492,11 +492,24 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val, + uint8_t kvm_val = kvmppc_get_cap_count_cache_flush_assist(); + + if (tcg_enabled() && val) { +- /* TODO - for now only allow broken for TCG */ +- error_setg(errp, +-"Requested count cache flush assist capability level not supported by tcg," +- " try appending -machine cap-ccf-assist=off"); ++ /* TCG doesn't implement anything here, but allow with a warning */ ++ warn_report("TCG doesn't support requested feature, cap-ccf-assist=on"); + } else if (kvm_enabled() && (val > kvm_val)) { ++ uint8_t kvm_ibs = kvmppc_get_cap_safe_indirect_branch(); ++ ++ if (kvm_ibs == SPAPR_CAP_FIXED_CCD) { ++ /* ++ * If we don't have CCF assist on the host, the assist ++ * instruction is a harmless no-op. It won't correctly ++ * implement the cache count flush *but* if we have ++ * count-cache-disabled in the host, that flush is ++ * unnnecessary. So, specifically allow this case. This ++ * allows us to have better performance on POWER9 DD2.3, ++ * while still working on POWER9 DD2.2 and POWER8 host ++ * cpus. ++ */ ++ return; ++ } + error_setg(errp, + "Requested count cache flush assist capability level not supported by kvm," + " try appending -machine cap-ccf-assist=off"); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-arm-arch_dump-Add-SVE-notes.patch b/SOURCES/kvm-target-arm-arch_dump-Add-SVE-notes.patch new file mode 100644 index 0000000..febea10 --- /dev/null +++ b/SOURCES/kvm-target-arm-arch_dump-Add-SVE-notes.patch @@ -0,0 +1,298 @@ +From d8871ae2842531130c9b333e7c06a6a5d1561286 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Fri, 24 Jan 2020 09:14:34 +0100 +Subject: [PATCH 001/116] target/arm/arch_dump: Add SVE notes + +RH-Author: Andrew Jones +Message-id: <20200124091434.15021-2-drjones@redhat.com> +Patchwork-id: 93443 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] target/arm/arch_dump: Add SVE notes +Bugzilla: 1725084 +RH-Acked-by: Auger Eric +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Gavin Shan + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1725084 + +Author: Andrew Jones +Date: Thu, 23 Jan 2020 15:22:40 +0000 + + target/arm/arch_dump: Add SVE notes + + When dumping a guest with dump-guest-memory also dump the SVE + registers if they are in use. + + Signed-off-by: Andrew Jones + Reviewed-by: Richard Henderson + Message-id: 20200120101832.18781-1-drjones@redhat.com + [PMM: fixed checkpatch nits] + Signed-off-by: Peter Maydell + +(cherry picked from commit 538baab245ca881e6a6ff720b5133f3ad1fcaafc) +Signed-off-by: Miroslav Rezanina +--- + include/elf.h | 1 + + target/arm/arch_dump.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++- + target/arm/cpu.h | 25 ++++++++++ + target/arm/kvm64.c | 24 ---------- + 4 files changed, 148 insertions(+), 26 deletions(-) + +diff --git a/include/elf.h b/include/elf.h +index 3501e0c..8fbfe60 100644 +--- a/include/elf.h ++++ b/include/elf.h +@@ -1650,6 +1650,7 @@ typedef struct elf64_shdr { + #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ + #define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ + #define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ ++#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension regs */ + + /* + * Physical entry point into the kernel. +diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c +index 26a2c09..2345dec 100644 +--- a/target/arm/arch_dump.c ++++ b/target/arm/arch_dump.c +@@ -62,12 +62,23 @@ struct aarch64_user_vfp_state { + + QEMU_BUILD_BUG_ON(sizeof(struct aarch64_user_vfp_state) != 528); + ++/* struct user_sve_header from arch/arm64/include/uapi/asm/ptrace.h */ ++struct aarch64_user_sve_header { ++ uint32_t size; ++ uint32_t max_size; ++ uint16_t vl; ++ uint16_t max_vl; ++ uint16_t flags; ++ uint16_t reserved; ++} QEMU_PACKED; ++ + struct aarch64_note { + Elf64_Nhdr hdr; + char name[8]; /* align_up(sizeof("CORE"), 4) */ + union { + struct aarch64_elf_prstatus prstatus; + struct aarch64_user_vfp_state vfp; ++ struct aarch64_user_sve_header sve; + }; + } QEMU_PACKED; + +@@ -76,6 +87,8 @@ struct aarch64_note { + (AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_elf_prstatus)) + #define AARCH64_PRFPREG_NOTE_SIZE \ + (AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_user_vfp_state)) ++#define AARCH64_SVE_NOTE_SIZE(env) \ ++ (AARCH64_NOTE_HEADER_SIZE + sve_size(env)) + + static void aarch64_note_init(struct aarch64_note *note, DumpState *s, + const char *name, Elf64_Word namesz, +@@ -128,11 +141,102 @@ static int aarch64_write_elf64_prfpreg(WriteCoreDumpFunction f, + return 0; + } + ++#ifdef TARGET_AARCH64 ++static off_t sve_zreg_offset(uint32_t vq, int n) ++{ ++ off_t off = sizeof(struct aarch64_user_sve_header); ++ return ROUND_UP(off, 16) + vq * 16 * n; ++} ++ ++static off_t sve_preg_offset(uint32_t vq, int n) ++{ ++ return sve_zreg_offset(vq, 32) + vq * 16 / 8 * n; ++} ++ ++static off_t sve_fpsr_offset(uint32_t vq) ++{ ++ off_t off = sve_preg_offset(vq, 17); ++ return ROUND_UP(off, 16); ++} ++ ++static off_t sve_fpcr_offset(uint32_t vq) ++{ ++ return sve_fpsr_offset(vq) + sizeof(uint32_t); ++} ++ ++static uint32_t sve_current_vq(CPUARMState *env) ++{ ++ return sve_zcr_len_for_el(env, arm_current_el(env)) + 1; ++} ++ ++static size_t sve_size_vq(uint32_t vq) ++{ ++ off_t off = sve_fpcr_offset(vq) + sizeof(uint32_t); ++ return ROUND_UP(off, 16); ++} ++ ++static size_t sve_size(CPUARMState *env) ++{ ++ return sve_size_vq(sve_current_vq(env)); ++} ++ ++static int aarch64_write_elf64_sve(WriteCoreDumpFunction f, ++ CPUARMState *env, int cpuid, ++ DumpState *s) ++{ ++ struct aarch64_note *note; ++ ARMCPU *cpu = env_archcpu(env); ++ uint32_t vq = sve_current_vq(env); ++ uint64_t tmp[ARM_MAX_VQ * 2], *r; ++ uint32_t fpr; ++ uint8_t *buf; ++ int ret, i; ++ ++ note = g_malloc0(AARCH64_SVE_NOTE_SIZE(env)); ++ buf = (uint8_t *)¬e->sve; ++ ++ aarch64_note_init(note, s, "LINUX", 6, NT_ARM_SVE, sve_size_vq(vq)); ++ ++ note->sve.size = cpu_to_dump32(s, sve_size_vq(vq)); ++ note->sve.max_size = cpu_to_dump32(s, sve_size_vq(cpu->sve_max_vq)); ++ note->sve.vl = cpu_to_dump16(s, vq * 16); ++ note->sve.max_vl = cpu_to_dump16(s, cpu->sve_max_vq * 16); ++ note->sve.flags = cpu_to_dump16(s, 1); ++ ++ for (i = 0; i < 32; ++i) { ++ r = sve_bswap64(tmp, &env->vfp.zregs[i].d[0], vq * 2); ++ memcpy(&buf[sve_zreg_offset(vq, i)], r, vq * 16); ++ } ++ ++ for (i = 0; i < 17; ++i) { ++ r = sve_bswap64(tmp, r = &env->vfp.pregs[i].p[0], ++ DIV_ROUND_UP(vq * 2, 8)); ++ memcpy(&buf[sve_preg_offset(vq, i)], r, vq * 16 / 8); ++ } ++ ++ fpr = cpu_to_dump32(s, vfp_get_fpsr(env)); ++ memcpy(&buf[sve_fpsr_offset(vq)], &fpr, sizeof(uint32_t)); ++ ++ fpr = cpu_to_dump32(s, vfp_get_fpcr(env)); ++ memcpy(&buf[sve_fpcr_offset(vq)], &fpr, sizeof(uint32_t)); ++ ++ ret = f(note, AARCH64_SVE_NOTE_SIZE(env), s); ++ g_free(note); ++ ++ if (ret < 0) { ++ return -1; ++ } ++ ++ return 0; ++} ++#endif ++ + int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, + int cpuid, void *opaque) + { + struct aarch64_note note; +- CPUARMState *env = &ARM_CPU(cs)->env; ++ ARMCPU *cpu = ARM_CPU(cs); ++ CPUARMState *env = &cpu->env; + DumpState *s = opaque; + uint64_t pstate, sp; + int ret, i; +@@ -163,7 +267,18 @@ int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, + return -1; + } + +- return aarch64_write_elf64_prfpreg(f, env, cpuid, s); ++ ret = aarch64_write_elf64_prfpreg(f, env, cpuid, s); ++ if (ret) { ++ return ret; ++ } ++ ++#ifdef TARGET_AARCH64 ++ if (cpu_isar_feature(aa64_sve, cpu)) { ++ ret = aarch64_write_elf64_sve(f, env, cpuid, s); ++ } ++#endif ++ ++ return ret; + } + + /* struct pt_regs from arch/arm/include/asm/ptrace.h */ +@@ -335,6 +450,11 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) + if (class == ELFCLASS64) { + note_size = AARCH64_PRSTATUS_NOTE_SIZE; + note_size += AARCH64_PRFPREG_NOTE_SIZE; ++#ifdef TARGET_AARCH64 ++ if (cpu_isar_feature(aa64_sve, cpu)) { ++ note_size += AARCH64_SVE_NOTE_SIZE(env); ++ } ++#endif + } else { + note_size = ARM_PRSTATUS_NOTE_SIZE; + if (arm_feature(env, ARM_FEATURE_VFP)) { +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 83a809d..82dd3cc 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -975,6 +975,31 @@ void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq); + void aarch64_sve_change_el(CPUARMState *env, int old_el, + int new_el, bool el0_a64); + void aarch64_add_sve_properties(Object *obj); ++ ++/* ++ * SVE registers are encoded in KVM's memory in an endianness-invariant format. ++ * The byte at offset i from the start of the in-memory representation contains ++ * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the ++ * lowest offsets are stored in the lowest memory addresses, then that nearly ++ * matches QEMU's representation, which is to use an array of host-endian ++ * uint64_t's, where the lower offsets are at the lower indices. To complete ++ * the translation we just need to byte swap the uint64_t's on big-endian hosts. ++ */ ++static inline uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr) ++{ ++#ifdef HOST_WORDS_BIGENDIAN ++ int i; ++ ++ for (i = 0; i < nr; ++i) { ++ dst[i] = bswap64(src[i]); ++ } ++ ++ return dst; ++#else ++ return src; ++#endif ++} ++ + #else + static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { } + static inline void aarch64_sve_change_el(CPUARMState *env, int o, +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 876184b..e2da756 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -877,30 +877,6 @@ static int kvm_arch_put_fpsimd(CPUState *cs) + } + + /* +- * SVE registers are encoded in KVM's memory in an endianness-invariant format. +- * The byte at offset i from the start of the in-memory representation contains +- * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the +- * lowest offsets are stored in the lowest memory addresses, then that nearly +- * matches QEMU's representation, which is to use an array of host-endian +- * uint64_t's, where the lower offsets are at the lower indices. To complete +- * the translation we just need to byte swap the uint64_t's on big-endian hosts. +- */ +-static uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr) +-{ +-#ifdef HOST_WORDS_BIGENDIAN +- int i; +- +- for (i = 0; i < nr; ++i) { +- dst[i] = bswap64(src[i]); +- } +- +- return dst; +-#else +- return src; +-#endif +-} +- +-/* + * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits + * and PREGS and the FFR have a slice size of 256 bits. However we simply hard + * code the slice index to zero for now as it's unlikely we'll need more than +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch b/SOURCES/kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch new file mode 100644 index 0000000..601b8c4 --- /dev/null +++ b/SOURCES/kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch @@ -0,0 +1,281 @@ +From 730f72105b478553c4f22555c29b0f64224ff914 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Fri, 31 Jan 2020 14:23:14 +0000 +Subject: [PATCH 12/15] target/arm/cpu: Add the kvm-no-adjvtime CPU property +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Andrew Jones +Message-id: <20200131142314.13175-6-drjones@redhat.com> +Patchwork-id: 93623 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/5] target/arm/cpu: Add the kvm-no-adjvtime CPU property +Bugzilla: 1647366 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Auger Eric +RH-Acked-by: Gavin Shan + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 + +Author: Andrew Jones +Date: Thu, 30 Jan 2020 16:02:06 +0000 + + target/arm/cpu: Add the kvm-no-adjvtime CPU property + + kvm-no-adjvtime is a KVM specific CPU property and a first of its + kind. To accommodate it we also add kvm_arm_add_vcpu_properties() + and a KVM specific CPU properties description to the CPU features + document. + + Signed-off-by: Andrew Jones + Message-id: 20200120101023.16030-7-drjones@redhat.com + Reviewed-by: Peter Maydell + Signed-off-by: Peter Maydell + +(cherry picked from commit dea101a1ae9968c9fec6ab0291489dad7c49f36f) +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + Dropped the second hunk of the hw/arm/virt.c changes + as they would patch dead code. + +Signed-off-by: Danilo C. L. de Paula +--- + docs/arm-cpu-features.rst | 37 ++++++++++++++++++++++++++++++++++++- + hw/arm/virt.c | 5 +++++ + include/hw/arm/virt.h | 1 + + target/arm/cpu.c | 2 ++ + target/arm/cpu64.c | 1 + + target/arm/kvm.c | 28 ++++++++++++++++++++++++++++ + target/arm/kvm_arm.h | 11 +++++++++++ + target/arm/monitor.c | 1 + + tests/arm-cpu-features.c | 4 ++++ + 9 files changed, 89 insertions(+), 1 deletion(-) + +diff --git a/docs/arm-cpu-features.rst b/docs/arm-cpu-features.rst +index 1b367e2..45d1eb6 100644 +--- a/docs/arm-cpu-features.rst ++++ b/docs/arm-cpu-features.rst +@@ -31,7 +31,9 @@ supporting the feature or only supporting the feature under certain + configurations. For example, the `aarch64` CPU feature, which, when + disabled, enables the optional AArch32 CPU feature, is only supported + when using the KVM accelerator and when running on a host CPU type that +-supports the feature. ++supports the feature. While `aarch64` currently only works with KVM, ++it could work with TCG. CPU features that are specific to KVM are ++prefixed with "kvm-" and are described in "KVM VCPU Features". + + CPU Feature Probing + =================== +@@ -171,6 +173,39 @@ disabling many SVE vector lengths would be quite verbose, the `sve` CPU + properties have special semantics (see "SVE CPU Property Parsing + Semantics"). + ++KVM VCPU Features ++================= ++ ++KVM VCPU features are CPU features that are specific to KVM, such as ++paravirt features or features that enable CPU virtualization extensions. ++The features' CPU properties are only available when KVM is enabled and ++are named with the prefix "kvm-". KVM VCPU features may be probed, ++enabled, and disabled in the same way as other CPU features. Below is ++the list of KVM VCPU features and their descriptions. ++ ++ kvm-no-adjvtime By default kvm-no-adjvtime is disabled. This ++ means that by default the virtual time ++ adjustment is enabled (vtime is *not not* ++ adjusted). ++ ++ When virtual time adjustment is enabled each ++ time the VM transitions back to running state ++ the VCPU's virtual counter is updated to ensure ++ stopped time is not counted. This avoids time ++ jumps surprising guest OSes and applications, ++ as long as they use the virtual counter for ++ timekeeping. However it has the side effect of ++ the virtual and physical counters diverging. ++ All timekeeping based on the virtual counter ++ will appear to lag behind any timekeeping that ++ does not subtract VM stopped time. The guest ++ may resynchronize its virtual counter with ++ other time sources as needed. ++ ++ Enable kvm-no-adjvtime to disable virtual time ++ adjustment, also restoring the legacy (pre-5.0) ++ behavior. ++ + SVE CPU Properties + ================== + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e108391..d30d38c 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1707,6 +1707,11 @@ static void machvirt_init(MachineState *machine) + } + } + ++ if (vmc->kvm_no_adjvtime && ++ object_property_find(cpuobj, "kvm-no-adjvtime", NULL)) { ++ object_property_set_bool(cpuobj, true, "kvm-no-adjvtime", NULL); ++ } ++ + if (vmc->no_pmu && object_property_find(cpuobj, "pmu", NULL)) { + object_property_set_bool(cpuobj, false, "pmu", NULL); + } +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 53fdf16..77828ce 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -109,6 +109,7 @@ typedef struct { + bool smbios_old_sys_ver; + bool no_highmem_ecam; + bool no_ged; /* Machines < 4.2 has no support for ACPI GED device */ ++ bool kvm_no_adjvtime; + } VirtMachineClass; + + typedef struct { +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 3788fc3..e46efe9 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2482,6 +2482,7 @@ static void arm_max_initfn(Object *obj) + + if (kvm_enabled()) { + kvm_arm_set_cpu_features_from_host(cpu); ++ kvm_arm_add_vcpu_properties(obj); + } else { + cortex_a15_initfn(obj); + +@@ -2673,6 +2674,7 @@ static void arm_host_initfn(Object *obj) + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + aarch64_add_sve_properties(obj); + } ++ kvm_arm_add_vcpu_properties(obj); + arm_cpu_post_init(obj); + } + +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index a39d6fc..3cd416d 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -605,6 +605,7 @@ static void aarch64_max_initfn(Object *obj) + + if (kvm_enabled()) { + kvm_arm_set_cpu_features_from_host(cpu); ++ kvm_arm_add_vcpu_properties(obj); + } else { + uint64_t t; + uint32_t u; +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 26d7f8b..4be9497 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -17,6 +17,8 @@ + #include "qemu/timer.h" + #include "qemu/error-report.h" + #include "qemu/main-loop.h" ++#include "qom/object.h" ++#include "qapi/error.h" + #include "sysemu/sysemu.h" + #include "sysemu/kvm.h" + #include "sysemu/kvm_int.h" +@@ -179,6 +181,32 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) + env->features = arm_host_cpu_features.features; + } + ++static bool kvm_no_adjvtime_get(Object *obj, Error **errp) ++{ ++ return !ARM_CPU(obj)->kvm_adjvtime; ++} ++ ++static void kvm_no_adjvtime_set(Object *obj, bool value, Error **errp) ++{ ++ ARM_CPU(obj)->kvm_adjvtime = !value; ++} ++ ++/* KVM VCPU properties should be prefixed with "kvm-". */ ++void kvm_arm_add_vcpu_properties(Object *obj) ++{ ++ if (!kvm_enabled()) { ++ return; ++ } ++ ++ ARM_CPU(obj)->kvm_adjvtime = true; ++ object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get, ++ kvm_no_adjvtime_set, &error_abort); ++ object_property_set_description(obj, "kvm-no-adjvtime", ++ "Set on to disable the adjustment of " ++ "the virtual counter. VM stopped time " ++ "will be counted.", &error_abort); ++} ++ + bool kvm_arm_pmu_supported(CPUState *cpu) + { + KVMState *s = KVM_STATE(current_machine->accelerator); +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 01a9a18..ae9e075 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -256,6 +256,15 @@ void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map); + void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu); + + /** ++ * kvm_arm_add_vcpu_properties: ++ * @obj: The CPU object to add the properties to ++ * ++ * Add all KVM specific CPU properties to the CPU object. These ++ * are the CPU properties with "kvm-" prefixed names. ++ */ ++void kvm_arm_add_vcpu_properties(Object *obj); ++ ++/** + * kvm_arm_aarch32_supported: + * @cs: CPUState + * +@@ -345,6 +354,8 @@ static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) + cpu->host_cpu_probe_failed = true; + } + ++static inline void kvm_arm_add_vcpu_properties(Object *obj) {} ++ + static inline bool kvm_arm_aarch32_supported(CPUState *cs) + { + return false; +diff --git a/target/arm/monitor.c b/target/arm/monitor.c +index fa054f8..9725dff 100644 +--- a/target/arm/monitor.c ++++ b/target/arm/monitor.c +@@ -103,6 +103,7 @@ static const char *cpu_model_advertised_features[] = { + "sve128", "sve256", "sve384", "sve512", + "sve640", "sve768", "sve896", "sve1024", "sve1152", "sve1280", + "sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048", ++ "kvm-no-adjvtime", + NULL + }; + +diff --git a/tests/arm-cpu-features.c b/tests/arm-cpu-features.c +index 89285ca..ba1a6fe 100644 +--- a/tests/arm-cpu-features.c ++++ b/tests/arm-cpu-features.c +@@ -428,6 +428,8 @@ static void test_query_cpu_model_expansion(const void *data) + assert_has_feature_enabled(qts, "cortex-a15", "pmu"); + assert_has_not_feature(qts, "cortex-a15", "aarch64"); + ++ assert_has_not_feature(qts, "max", "kvm-no-adjvtime"); ++ + if (g_str_equal(qtest_get_arch(), "aarch64")) { + assert_has_feature_enabled(qts, "max", "aarch64"); + assert_has_feature_enabled(qts, "max", "sve"); +@@ -462,6 +464,8 @@ static void test_query_cpu_model_expansion_kvm(const void *data) + return; + } + ++ assert_has_feature_disabled(qts, "host", "kvm-no-adjvtime"); ++ + if (g_str_equal(qtest_get_arch(), "aarch64")) { + bool kvm_supports_sve; + char max_name[8], name[8]; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch b/SOURCES/kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch new file mode 100644 index 0000000..3396a32 --- /dev/null +++ b/SOURCES/kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch @@ -0,0 +1,330 @@ +From 5388ea3fc0737d1a659256ff3663057bef484c19 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Fri, 31 Jan 2020 14:23:13 +0000 +Subject: [PATCH 11/15] target/arm/kvm: Implement virtual time adjustment +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Andrew Jones +Message-id: <20200131142314.13175-5-drjones@redhat.com> +Patchwork-id: 93622 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/5] target/arm/kvm: Implement virtual time adjustment +Bugzilla: 1647366 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Auger Eric +RH-Acked-by: Gavin Shan + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 + +Author: Andrew Jones +Date: Thu, 30 Jan 2020 16:02:06 +0000 + + target/arm/kvm: Implement virtual time adjustment + + When a VM is stopped (such as when it's paused) guest virtual time + should stop counting. Otherwise, when the VM is resumed it will + experience time jumps and its kernel may report soft lockups. Not + counting virtual time while the VM is stopped has the side effect + of making the guest's time appear to lag when compared with real + time, and even with time derived from the physical counter. For + this reason, this change, which is enabled by default, comes with + a KVM CPU feature allowing it to be disabled, restoring legacy + behavior. + + This patch only provides the implementation of the virtual time + adjustment. A subsequent patch will provide the CPU property + allowing the change to be enabled and disabled. + + Reported-by: Bijan Mottahedeh + Signed-off-by: Andrew Jones + Message-id: 20200120101023.16030-6-drjones@redhat.com + Reviewed-by: Peter Maydell + Signed-off-by: Peter Maydell + +(cherry picked from commit e5ac4200b4cddf44df9adbef677af0d1f1c579c6) +Signed-off-by: Danilo C. L. de Paula +--- + target/arm/cpu.h | 7 ++++ + target/arm/kvm.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++ + target/arm/kvm32.c | 3 ++ + target/arm/kvm64.c | 3 ++ + target/arm/kvm_arm.h | 38 ++++++++++++++++++++++ + target/arm/machine.c | 7 ++++ + 6 files changed, 150 insertions(+) + +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 82dd3cc..fbd8ea0 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -821,6 +821,13 @@ struct ARMCPU { + /* KVM init features for this CPU */ + uint32_t kvm_init_features[7]; + ++ /* KVM CPU state */ ++ ++ /* KVM virtual time adjustment */ ++ bool kvm_adjvtime; ++ bool kvm_vtime_dirty; ++ uint64_t kvm_vtime; ++ + /* Uniprocessor system with MP extensions */ + bool mp_is_up; + +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 5b82cef..26d7f8b 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -359,6 +359,22 @@ static int compare_u64(const void *a, const void *b) + return 0; + } + ++/* ++ * cpreg_values are sorted in ascending order by KVM register ID ++ * (see kvm_arm_init_cpreg_list). This allows us to cheaply find ++ * the storage for a KVM register by ID with a binary search. ++ */ ++static uint64_t *kvm_arm_get_cpreg_ptr(ARMCPU *cpu, uint64_t regidx) ++{ ++ uint64_t *res; ++ ++ res = bsearch(®idx, cpu->cpreg_indexes, cpu->cpreg_array_len, ++ sizeof(uint64_t), compare_u64); ++ assert(res); ++ ++ return &cpu->cpreg_values[res - cpu->cpreg_indexes]; ++} ++ + /* Initialize the ARMCPU cpreg list according to the kernel's + * definition of what CPU registers it knows about (and throw away + * the previous TCG-created cpreg list). +@@ -512,6 +528,23 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level) + return ok; + } + ++void kvm_arm_cpu_pre_save(ARMCPU *cpu) ++{ ++ /* KVM virtual time adjustment */ ++ if (cpu->kvm_vtime_dirty) { ++ *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT) = cpu->kvm_vtime; ++ } ++} ++ ++void kvm_arm_cpu_post_load(ARMCPU *cpu) ++{ ++ /* KVM virtual time adjustment */ ++ if (cpu->kvm_adjvtime) { ++ cpu->kvm_vtime = *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT); ++ cpu->kvm_vtime_dirty = true; ++ } ++} ++ + void kvm_arm_reset_vcpu(ARMCPU *cpu) + { + int ret; +@@ -579,6 +612,50 @@ int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu) + return 0; + } + ++void kvm_arm_get_virtual_time(CPUState *cs) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ struct kvm_one_reg reg = { ++ .id = KVM_REG_ARM_TIMER_CNT, ++ .addr = (uintptr_t)&cpu->kvm_vtime, ++ }; ++ int ret; ++ ++ if (cpu->kvm_vtime_dirty) { ++ return; ++ } ++ ++ ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); ++ if (ret) { ++ error_report("Failed to get KVM_REG_ARM_TIMER_CNT"); ++ abort(); ++ } ++ ++ cpu->kvm_vtime_dirty = true; ++} ++ ++void kvm_arm_put_virtual_time(CPUState *cs) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ struct kvm_one_reg reg = { ++ .id = KVM_REG_ARM_TIMER_CNT, ++ .addr = (uintptr_t)&cpu->kvm_vtime, ++ }; ++ int ret; ++ ++ if (!cpu->kvm_vtime_dirty) { ++ return; ++ } ++ ++ ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); ++ if (ret) { ++ error_report("Failed to set KVM_REG_ARM_TIMER_CNT"); ++ abort(); ++ } ++ ++ cpu->kvm_vtime_dirty = false; ++} ++ + int kvm_put_vcpu_events(ARMCPU *cpu) + { + CPUARMState *env = &cpu->env; +@@ -690,6 +767,21 @@ MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) + return MEMTXATTRS_UNSPECIFIED; + } + ++void kvm_arm_vm_state_change(void *opaque, int running, RunState state) ++{ ++ CPUState *cs = opaque; ++ ARMCPU *cpu = ARM_CPU(cs); ++ ++ if (running) { ++ if (cpu->kvm_adjvtime) { ++ kvm_arm_put_virtual_time(cs); ++ } ++ } else { ++ if (cpu->kvm_adjvtime) { ++ kvm_arm_get_virtual_time(cs); ++ } ++ } ++} + + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + { +diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c +index 32bf8d6..3a8b437 100644 +--- a/target/arm/kvm32.c ++++ b/target/arm/kvm32.c +@@ -16,6 +16,7 @@ + #include "qemu-common.h" + #include "cpu.h" + #include "qemu/timer.h" ++#include "sysemu/runstate.h" + #include "sysemu/kvm.h" + #include "kvm_arm.h" + #include "internals.h" +@@ -198,6 +199,8 @@ int kvm_arch_init_vcpu(CPUState *cs) + return -EINVAL; + } + ++ qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); ++ + /* Determine init features for this CPU */ + memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); + if (cpu->start_powered_off) { +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 666a81a..d368189 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -23,6 +23,7 @@ + #include "qemu/host-utils.h" + #include "qemu/main-loop.h" + #include "exec/gdbstub.h" ++#include "sysemu/runstate.h" + #include "sysemu/kvm.h" + #include "sysemu/kvm_int.h" + #include "kvm_arm.h" +@@ -735,6 +736,8 @@ int kvm_arch_init_vcpu(CPUState *cs) + return -EINVAL; + } + ++ qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); ++ + /* Determine init features for this CPU */ + memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); + if (cpu->start_powered_off) { +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index b48a9c9..01a9a18 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -128,6 +128,23 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level); + bool write_kvmstate_to_list(ARMCPU *cpu); + + /** ++ * kvm_arm_cpu_pre_save: ++ * @cpu: ARMCPU ++ * ++ * Called after write_kvmstate_to_list() from cpu_pre_save() to update ++ * the cpreg list with KVM CPU state. ++ */ ++void kvm_arm_cpu_pre_save(ARMCPU *cpu); ++ ++/** ++ * kvm_arm_cpu_post_load: ++ * @cpu: ARMCPU ++ * ++ * Called from cpu_post_load() to update KVM CPU state from the cpreg list. ++ */ ++void kvm_arm_cpu_post_load(ARMCPU *cpu); ++ ++/** + * kvm_arm_reset_vcpu: + * @cpu: ARMCPU + * +@@ -292,6 +309,24 @@ int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu); + */ + int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu); + ++/** ++ * kvm_arm_get_virtual_time: ++ * @cs: CPUState ++ * ++ * Gets the VCPU's virtual counter and stores it in the KVM CPU state. ++ */ ++void kvm_arm_get_virtual_time(CPUState *cs); ++ ++/** ++ * kvm_arm_put_virtual_time: ++ * @cs: CPUState ++ * ++ * Sets the VCPU's virtual counter to the value stored in the KVM CPU state. ++ */ ++void kvm_arm_put_virtual_time(CPUState *cs); ++ ++void kvm_arm_vm_state_change(void *opaque, int running, RunState state); ++ + int kvm_arm_vgic_probe(void); + + void kvm_arm_pmu_set_irq(CPUState *cs, int irq); +@@ -339,6 +374,9 @@ static inline void kvm_arm_pmu_set_irq(CPUState *cs, int irq) {} + static inline void kvm_arm_pmu_init(CPUState *cs) {} + + static inline void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map) {} ++ ++static inline void kvm_arm_get_virtual_time(CPUState *cs) {} ++static inline void kvm_arm_put_virtual_time(CPUState *cs) {} + #endif + + static inline const char *gic_class_name(void) +diff --git a/target/arm/machine.c b/target/arm/machine.c +index eb28b23..241890a 100644 +--- a/target/arm/machine.c ++++ b/target/arm/machine.c +@@ -642,6 +642,12 @@ static int cpu_pre_save(void *opaque) + /* This should never fail */ + abort(); + } ++ ++ /* ++ * kvm_arm_cpu_pre_save() must be called after ++ * write_kvmstate_to_list() ++ */ ++ kvm_arm_cpu_pre_save(cpu); + } else { + if (!write_cpustate_to_list(cpu, false)) { + /* This should never fail. */ +@@ -744,6 +750,7 @@ static int cpu_post_load(void *opaque, int version_id) + * we're using it. + */ + write_list_to_cpustate(cpu); ++ kvm_arm_cpu_post_load(cpu); + } else { + if (!write_list_to_cpustate(cpu)) { + return -1; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch b/SOURCES/kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch new file mode 100644 index 0000000..8cdc867 --- /dev/null +++ b/SOURCES/kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch @@ -0,0 +1,197 @@ +From 11cb9cb7b1b56d5c9723e9c50bc2903281893bcc Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Fri, 31 Jan 2020 14:23:10 +0000 +Subject: [PATCH 08/15] target/arm/kvm: trivial: Clean up header documentation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Andrew Jones +Message-id: <20200131142314.13175-2-drjones@redhat.com> +Patchwork-id: 93625 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/5] target/arm/kvm: trivial: Clean up header documentation +Bugzilla: 1647366 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Auger Eric +RH-Acked-by: Gavin Shan + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 + +Author: Andrew Jones +Date: Thu, 30 Jan 2020 16:02:05 +0000 + + target/arm/kvm: trivial: Clean up header documentation + + Signed-off-by: Andrew Jones + Message-id: 20200120101023.16030-2-drjones@redhat.com + Reviewed-by: Peter Maydell + Signed-off-by: Peter Maydell + +(cherry picked from commit d1ebbc9d16297b54b153ee33abe05eb4f1df0c66) +Signed-off-by: Danilo C. L. de Paula +--- + target/arm/kvm_arm.h | 46 +++++++++++++++++++++++++++------------------- + 1 file changed, 27 insertions(+), 19 deletions(-) + +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 8e14d40..b48a9c9 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -28,9 +28,9 @@ + int kvm_arm_vcpu_init(CPUState *cs); + + /** +- * kvm_arm_vcpu_finalize ++ * kvm_arm_vcpu_finalize: + * @cs: CPUState +- * @feature: int ++ * @feature: feature to finalize + * + * Finalizes the configuration of the specified VCPU feature by + * invoking the KVM_ARM_VCPU_FINALIZE ioctl. Features requiring +@@ -75,8 +75,8 @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, + int kvm_arm_init_cpreg_list(ARMCPU *cpu); + + /** +- * kvm_arm_reg_syncs_via_cpreg_list +- * regidx: KVM register index ++ * kvm_arm_reg_syncs_via_cpreg_list: ++ * @regidx: KVM register index + * + * Return true if this KVM register should be synchronized via the + * cpreg list of arbitrary system registers, false if it is synchronized +@@ -85,8 +85,8 @@ int kvm_arm_init_cpreg_list(ARMCPU *cpu); + bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx); + + /** +- * kvm_arm_cpreg_level +- * regidx: KVM register index ++ * kvm_arm_cpreg_level: ++ * @regidx: KVM register index + * + * Return the level of this coprocessor/system register. Return value is + * either KVM_PUT_RUNTIME_STATE, KVM_PUT_RESET_STATE, or KVM_PUT_FULL_STATE. +@@ -148,6 +148,8 @@ void kvm_arm_init_serror_injection(CPUState *cs); + * @cpu: ARMCPU + * + * Get VCPU related state from kvm. ++ * ++ * Returns: 0 if success else < 0 error code + */ + int kvm_get_vcpu_events(ARMCPU *cpu); + +@@ -156,6 +158,8 @@ int kvm_get_vcpu_events(ARMCPU *cpu); + * @cpu: ARMCPU + * + * Put VCPU related state to kvm. ++ * ++ * Returns: 0 if success else < 0 error code + */ + int kvm_put_vcpu_events(ARMCPU *cpu); + +@@ -205,10 +209,12 @@ typedef struct ARMHostCPUFeatures { + + /** + * kvm_arm_get_host_cpu_features: +- * @ahcc: ARMHostCPUClass to fill in ++ * @ahcf: ARMHostCPUClass to fill in + * + * Probe the capabilities of the host kernel's preferred CPU and fill + * in the ARMHostCPUClass struct accordingly. ++ * ++ * Returns true on success and false otherwise. + */ + bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf); + +@@ -242,7 +248,7 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu); + bool kvm_arm_aarch32_supported(CPUState *cs); + + /** +- * bool kvm_arm_pmu_supported: ++ * kvm_arm_pmu_supported: + * @cs: CPUState + * + * Returns: true if the KVM VCPU can enable its PMU +@@ -251,7 +257,7 @@ bool kvm_arm_aarch32_supported(CPUState *cs); + bool kvm_arm_pmu_supported(CPUState *cs); + + /** +- * bool kvm_arm_sve_supported: ++ * kvm_arm_sve_supported: + * @cs: CPUState + * + * Returns true if the KVM VCPU can enable SVE and false otherwise. +@@ -259,26 +265,30 @@ bool kvm_arm_pmu_supported(CPUState *cs); + bool kvm_arm_sve_supported(CPUState *cs); + + /** +- * kvm_arm_get_max_vm_ipa_size - Returns the number of bits in the +- * IPA address space supported by KVM +- * ++ * kvm_arm_get_max_vm_ipa_size: + * @ms: Machine state handle ++ * ++ * Returns the number of bits in the IPA address space supported by KVM + */ + int kvm_arm_get_max_vm_ipa_size(MachineState *ms); + + /** +- * kvm_arm_sync_mpstate_to_kvm ++ * kvm_arm_sync_mpstate_to_kvm: + * @cpu: ARMCPU + * + * If supported set the KVM MP_STATE based on QEMU's model. ++ * ++ * Returns 0 on success and -1 on failure. + */ + int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu); + + /** +- * kvm_arm_sync_mpstate_to_qemu ++ * kvm_arm_sync_mpstate_to_qemu: + * @cpu: ARMCPU + * + * If supported get the MP_STATE from KVM and store in QEMU's model. ++ * ++ * Returns 0 on success and aborts on failure. + */ + int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu); + +@@ -292,7 +302,8 @@ int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level); + + static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) + { +- /* This should never actually be called in the "not KVM" case, ++ /* ++ * This should never actually be called in the "not KVM" case, + * but set up the fields to indicate an error anyway. + */ + cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE; +@@ -377,23 +388,20 @@ bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit); + * + * Return: TRUE if any hardware breakpoints in use. + */ +- + bool kvm_arm_hw_debug_active(CPUState *cs); + + /** + * kvm_arm_copy_hw_debug_data: +- * + * @ptr: kvm_guest_debug_arch structure + * + * Copy the architecture specific debug registers into the + * kvm_guest_debug ioctl structure. + */ + struct kvm_guest_debug_arch; +- + void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr); + + /** +- * its_class_name ++ * its_class_name: + * + * Return the ITS class name to use depending on whether KVM acceleration + * and KVM CAP_SIGNAL_MSI are supported +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch b/SOURCES/kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch new file mode 100644 index 0000000..36c0f1a --- /dev/null +++ b/SOURCES/kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch @@ -0,0 +1,60 @@ +From 2740a84fe798ade5c1ce725d65cdaffb255da47c Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Fri, 31 Jan 2020 14:23:11 +0000 +Subject: [PATCH 09/15] target/arm/kvm64: kvm64 cpus have timer registers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Andrew Jones +Message-id: <20200131142314.13175-3-drjones@redhat.com> +Patchwork-id: 93621 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/5] target/arm/kvm64: kvm64 cpus have timer registers +Bugzilla: 1647366 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Auger Eric +RH-Acked-by: Gavin Shan + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 + +Author: Andrew Jones +Date: Thu, 30 Jan 2020 16:02:06 +0000 + + target/arm/kvm64: kvm64 cpus have timer registers + + Add the missing GENERIC_TIMER feature to kvm64 cpus. + + We don't currently use these registers when KVM is enabled, but it's + probably best we add the feature flag for consistency and potential + future use. There's also precedent, as we add the PMU feature flag to + KVM enabled guests, even though we don't use those registers either. + + This change was originally posted as a hunk of a different, never + merged patch from Bijan Mottahedeh. + + Signed-off-by: Andrew Jones + Reviewed-by: Richard Henderson + Message-id: 20200120101023.16030-4-drjones@redhat.com + Signed-off-by: Peter Maydell + +(cherry picked from commit 65caa415487f4a6e265105446c6ef8f56bb0aa70) +Signed-off-by: Danilo C. L. de Paula +--- + target/arm/kvm64.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index e2da756..666a81a 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -605,6 +605,7 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) + set_feature(&features, ARM_FEATURE_NEON); + set_feature(&features, ARM_FEATURE_AARCH64); + set_feature(&features, ARM_FEATURE_PMU); ++ set_feature(&features, ARM_FEATURE_GENERIC_TIMER); + + ahcf->features = features; + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch b/SOURCES/kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch new file mode 100644 index 0000000..55f328d --- /dev/null +++ b/SOURCES/kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch @@ -0,0 +1,81 @@ +From c82cf5c08617c947b34eb490d1714729103e3379 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Mon, 10 Feb 2020 17:33:57 +0000 +Subject: [PATCH 17/18] target/arm/monitor: query-cpu-model-expansion crashed + qemu when using machine type none +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Andrew Jones +Message-id: <20200210173358.16896-2-drjones@redhat.com> +Patchwork-id: 93773 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] target/arm/monitor: query-cpu-model-expansion crashed qemu when using machine type none +Bugzilla: 1801320 +RH-Acked-by: Auger Eric +RH-Acked-by: Gavin Shan +RH-Acked-by: Philippe Mathieu-Daudé + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1801320 + +Author: Liang Yan +Date: Fri, 07 Feb 2020 14:04:21 +0000 + + target/arm/monitor: query-cpu-model-expansion crashed qemu when using machine type none + + Commit e19afd566781 mentioned that target-arm only supports queryable + cpu models 'max', 'host', and the current type when KVM is in use. + The logic works well until using machine type none. + + For machine type none, cpu_type will be null if cpu option is not + set by command line, strlen(cpu_type) will terminate process. + So We add a check above it. + + This won't affect i386 and s390x since they do not use current_cpu. + + Signed-off-by: Liang Yan + Message-id: 20200203134251.12986-1-lyan@suse.com + Reviewed-by: Andrew Jones + Tested-by: Andrew Jones + Signed-off-by: Peter Maydell + +(cherry picked from commit 0999a4ba8718aa96105b978d3567fc7e90244c7e) +Signed-off-by: Danilo C. L. de Paula +--- + target/arm/monitor.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +diff --git a/target/arm/monitor.c b/target/arm/monitor.c +index 9725dff..c2dc790 100644 +--- a/target/arm/monitor.c ++++ b/target/arm/monitor.c +@@ -137,17 +137,20 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, + } + + if (kvm_enabled()) { +- const char *cpu_type = current_machine->cpu_type; +- int len = strlen(cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX); + bool supported = false; + + if (!strcmp(model->name, "host") || !strcmp(model->name, "max")) { + /* These are kvmarm's recommended cpu types */ + supported = true; +- } else if (strlen(model->name) == len && +- !strncmp(model->name, cpu_type, len)) { +- /* KVM is enabled and we're using this type, so it works. */ +- supported = true; ++ } else if (current_machine->cpu_type) { ++ const char *cpu_type = current_machine->cpu_type; ++ int len = strlen(cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX); ++ ++ if (strlen(model->name) == len && ++ !strncmp(model->name, cpu_type, len)) { ++ /* KVM is enabled and we're using this type, so it works. */ ++ supported = true; ++ } + } + if (!supported) { + error_setg(errp, "We cannot guarantee the CPU type '%s' works " +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-i386-add-a-ucode-rev-property.patch b/SOURCES/kvm-target-i386-add-a-ucode-rev-property.patch new file mode 100644 index 0000000..5c3c770 --- /dev/null +++ b/SOURCES/kvm-target-i386-add-a-ucode-rev-property.patch @@ -0,0 +1,125 @@ +From 4009f0bcc8004ce481015d088fe335a16b8d7ce1 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 17 Feb 2020 16:23:12 +0000 +Subject: [PATCH 2/9] target/i386: add a ucode-rev property + +RH-Author: Paolo Bonzini +Message-id: <20200217162316.2464-3-pbonzini@redhat.com> +Patchwork-id: 93909 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/6] target/i386: add a ucode-rev property +Bugzilla: 1791648 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Dr. David Alan Gilbert + +Add the property and plumb it in TCG and HVF (the latter of which +tried to support returning a constant value but used the wrong MSR). + +Signed-off-by: Paolo Bonzini +Message-Id: <1579544504-3616-3-git-send-email-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 4e45aff398cd1542c2a384a2a3b8600f23337d86) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 10 ++++++++++ + target/i386/cpu.h | 3 +++ + target/i386/hvf/x86_emu.c | 4 +--- + target/i386/misc_helper.c | 4 ++++ + 4 files changed, 18 insertions(+), 3 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 863192c..e505d3e 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6325,6 +6325,15 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + } + } + ++ if (cpu->ucode_rev == 0) { ++ /* The default is the same as KVM's. */ ++ if (IS_AMD_CPU(env)) { ++ cpu->ucode_rev = 0x01000065; ++ } else { ++ cpu->ucode_rev = 0x100000000ULL; ++ } ++ } ++ + /* mwait extended info: needed for Core compatibility */ + /* We always wake on interrupt even if host does not have the capability */ + cpu->mwait.ecx |= CPUID_MWAIT_EMX | CPUID_MWAIT_IBE; +@@ -7008,6 +7017,7 @@ static Property x86_cpu_properties[] = { + DEFINE_PROP_UINT32("min-level", X86CPU, env.cpuid_min_level, 0), + DEFINE_PROP_UINT32("min-xlevel", X86CPU, env.cpuid_min_xlevel, 0), + DEFINE_PROP_UINT32("min-xlevel2", X86CPU, env.cpuid_min_xlevel2, 0), ++ DEFINE_PROP_UINT64("ucode-rev", X86CPU, ucode_rev, 0), + DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true), + DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor_id), + DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index cde2a16..4441061 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -348,6 +348,7 @@ typedef enum X86Seg { + #define MSR_IA32_SPEC_CTRL 0x48 + #define MSR_VIRT_SSBD 0xc001011f + #define MSR_IA32_PRED_CMD 0x49 ++#define MSR_IA32_UCODE_REV 0x8b + #define MSR_IA32_CORE_CAPABILITY 0xcf + + #define MSR_IA32_ARCH_CAPABILITIES 0x10a +@@ -1621,6 +1622,8 @@ struct X86CPU { + CPUNegativeOffsetState neg; + CPUX86State env; + ++ uint64_t ucode_rev; ++ + uint32_t hyperv_spinlock_attempts; + char *hyperv_vendor_id; + bool hyperv_synic_kvm_only; +diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c +index 3df7672..92ab815 100644 +--- a/target/i386/hvf/x86_emu.c ++++ b/target/i386/hvf/x86_emu.c +@@ -664,8 +664,6 @@ static void exec_lods(struct CPUX86State *env, struct x86_decode *decode) + RIP(env) += decode->len; + } + +-#define MSR_IA32_UCODE_REV 0x00000017 +- + void simulate_rdmsr(struct CPUState *cpu) + { + X86CPU *x86_cpu = X86_CPU(cpu); +@@ -681,7 +679,7 @@ void simulate_rdmsr(struct CPUState *cpu) + val = cpu_get_apic_base(X86_CPU(cpu)->apic_state); + break; + case MSR_IA32_UCODE_REV: +- val = (0x100000000ULL << 32) | 0x100000000ULL; ++ val = x86_cpu->ucode_rev; + break; + case MSR_EFER: + val = rvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER); +diff --git a/target/i386/misc_helper.c b/target/i386/misc_helper.c +index 3eff688..aed16fe 100644 +--- a/target/i386/misc_helper.c ++++ b/target/i386/misc_helper.c +@@ -229,6 +229,7 @@ void helper_rdmsr(CPUX86State *env) + #else + void helper_wrmsr(CPUX86State *env) + { ++ X86CPU *x86_cpu = env_archcpu(env); + uint64_t val; + + cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 1, GETPC()); +@@ -371,6 +372,9 @@ void helper_wrmsr(CPUX86State *env) + env->msr_bndcfgs = val; + cpu_sync_bndcs_hflags(env); + break; ++ case MSR_IA32_UCODE_REV: ++ val = x86_cpu->ucode_rev; ++ break; + default: + if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL + && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch b/SOURCES/kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch new file mode 100644 index 0000000..a80c9d3 --- /dev/null +++ b/SOURCES/kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch @@ -0,0 +1,72 @@ +From 27d7b085f2f568050d638b694ed2f51495db718c Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 17 Feb 2020 16:23:15 +0000 +Subject: [PATCH 5/9] target/i386: check for availability of MSR_IA32_UCODE_REV + as an emulated MSR +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20200217162316.2464-6-pbonzini@redhat.com> +Patchwork-id: 93898 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/6] target/i386: check for availability of MSR_IA32_UCODE_REV as an emulated MSR +Bugzilla: 1791648 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Dr. David Alan Gilbert + +Even though MSR_IA32_UCODE_REV has been available long before Linux 5.6, +which added it to the emulated MSR list, a bug caused the microcode +version to revert to 0x100000000 on INIT. As a result, processors other +than the bootstrap processor would not see the host microcode revision; +some Windows version complain loudly about this and crash with a +fairly explicit MICROCODE REVISION MISMATCH error. + +[If running 5.6 prereleases, the kernel fix "KVM: x86: do not reset + microcode version on INIT or RESET" should also be applied.] + +Reported-by: Alex Williamson +Message-id: <20200211175516.10716-1-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 6702514814c7e7b4cbf179624539b5f38c72740b) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/kvm.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 6c61aef..99840ca 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -105,6 +105,7 @@ static bool has_msr_smi_count; + static bool has_msr_arch_capabs; + static bool has_msr_core_capabs; + static bool has_msr_vmx_vmfunc; ++static bool has_msr_ucode_rev; + + static uint32_t has_architectural_pmu_version; + static uint32_t num_architectural_pmu_gp_counters; +@@ -2056,6 +2057,9 @@ static int kvm_get_supported_msrs(KVMState *s) + case MSR_IA32_VMX_VMFUNC: + has_msr_vmx_vmfunc = true; + break; ++ case MSR_IA32_UCODE_REV: ++ has_msr_ucode_rev = true; ++ break; + } + } + } +@@ -2696,8 +2700,7 @@ static void kvm_init_msrs(X86CPU *cpu) + env->features[FEAT_CORE_CAPABILITY]); + } + +- if (kvm_arch_get_supported_msr_feature(kvm_state, +- MSR_IA32_UCODE_REV)) { ++ if (has_msr_ucode_rev) { + kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch b/SOURCES/kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch new file mode 100644 index 0000000..4c2362d --- /dev/null +++ b/SOURCES/kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch @@ -0,0 +1,112 @@ +From 77cdcccc49ba988e3b5bcb66decdee2e99fdcd72 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Tue, 14 Apr 2020 15:00:36 +0100 +Subject: [PATCH] target/i386: do not set unsupported VMX secondary execution + controls + +RH-Author: Vitaly Kuznetsov +Message-id: <20200414150036.625732-2-vkuznets@redhat.com> +Patchwork-id: 94674 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] target/i386: do not set unsupported VMX secondary execution controls +Bugzilla: 1822682 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Paolo Bonzini + +Commit 048c95163b4 ("target/i386: work around KVM_GET_MSRS bug for +secondary execution controls") added a workaround for KVM pre-dating +commit 6defc591846d ("KVM: nVMX: include conditional controls in /dev/kvm +KVM_GET_MSRS") which wasn't setting certain available controls. The +workaround uses generic CPUID feature bits to set missing VMX controls. + +It was found that in some cases it is possible to observe hosts which +have certain CPUID features but lack the corresponding VMX control. + +In particular, it was reported that Azure VMs have RDSEED but lack +VMX_SECONDARY_EXEC_RDSEED_EXITING; attempts to enable this feature +bit result in QEMU abort. + +Resolve the issue but not applying the workaround when we don't have +to. As there is no good way to find out if KVM has the fix itself, use +95c5c7c77c ("KVM: nVMX: list VMX MSRs in KVM_GET_MSR_INDEX_LIST") instead +as these [are supposed to] come together. + +Fixes: 048c95163b4 ("target/i386: work around KVM_GET_MSRS bug for secondary execution controls") +Suggested-by: Paolo Bonzini +Signed-off-by: Vitaly Kuznetsov +Message-Id: <20200331162752.1209928-1-vkuznets@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 4a910e1f6ab4155ec8b24c49b2585cc486916985) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/kvm.c | 41 ++++++++++++++++++++++++++--------------- + 1 file changed, 26 insertions(+), 15 deletions(-) + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 99840ca..fcc8f7d 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -106,6 +106,7 @@ static bool has_msr_arch_capabs; + static bool has_msr_core_capabs; + static bool has_msr_vmx_vmfunc; + static bool has_msr_ucode_rev; ++static bool has_msr_vmx_procbased_ctls2; + + static uint32_t has_architectural_pmu_version; + static uint32_t num_architectural_pmu_gp_counters; +@@ -490,21 +491,28 @@ uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) + value = msr_data.entries[0].data; + switch (index) { + case MSR_IA32_VMX_PROCBASED_CTLS2: +- /* KVM forgot to add these bits for some time, do this ourselves. */ +- if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & CPUID_XSAVE_XSAVES) { +- value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32; +- } +- if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & CPUID_EXT_RDRAND) { +- value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32; +- } +- if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_INVPCID) { +- value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32; +- } +- if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_RDSEED) { +- value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32; +- } +- if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & CPUID_EXT2_RDTSCP) { +- value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32; ++ if (!has_msr_vmx_procbased_ctls2) { ++ /* KVM forgot to add these bits for some time, do this ourselves. */ ++ if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & ++ CPUID_XSAVE_XSAVES) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32; ++ } ++ if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & ++ CPUID_EXT_RDRAND) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32; ++ } ++ if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & ++ CPUID_7_0_EBX_INVPCID) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32; ++ } ++ if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & ++ CPUID_7_0_EBX_RDSEED) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32; ++ } ++ if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & ++ CPUID_EXT2_RDTSCP) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32; ++ } + } + /* fall through */ + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: +@@ -2060,6 +2068,9 @@ static int kvm_get_supported_msrs(KVMState *s) + case MSR_IA32_UCODE_REV: + has_msr_ucode_rev = true; + break; ++ case MSR_IA32_VMX_PROCBASED_CTLS2: ++ has_msr_vmx_procbased_ctls2 = true; ++ break; + } + } + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch b/SOURCES/kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch new file mode 100644 index 0000000..47438a3 --- /dev/null +++ b/SOURCES/kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch @@ -0,0 +1,49 @@ +From 7b71a7011437ebfa3bc7df9297e892b82293ec98 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 17 Feb 2020 16:23:16 +0000 +Subject: [PATCH 6/9] target/i386: enable monitor and ucode revision with -cpu + max +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20200217162316.2464-7-pbonzini@redhat.com> +Patchwork-id: 93910 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 6/6] target/i386: enable monitor and ucode revision with -cpu max +Bugzilla: 1791648 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Dr. David Alan Gilbert + +These two features were incorrectly tied to host_cpuid_required rather than +cpu->max_features. As a result, -cpu max was not enabling either MONITOR +features or ucode revision. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit be02cda3afde60d219786e23c3f8edb53aec8e17) + +[RHEL7: context, upstream uses g_autofree] + +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 5ac843d..1685a8c 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6317,7 +6317,9 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + g_free(name); + goto out; + } ++ } + ++ if (cpu->max_features && accel_uses_host_cpuid()) { + if (enable_cpu_pm) { + host_cpuid(5, 0, &cpu->mwait.eax, &cpu->mwait.ebx, + &cpu->mwait.ecx, &cpu->mwait.edx); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-i386-fix-TCG-UCODE_REV-access.patch b/SOURCES/kvm-target-i386-fix-TCG-UCODE_REV-access.patch new file mode 100644 index 0000000..c7ced8a --- /dev/null +++ b/SOURCES/kvm-target-i386-fix-TCG-UCODE_REV-access.patch @@ -0,0 +1,73 @@ +From 3d16f05359e6277da1f970f71aa9f76337d655dc Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 17 Feb 2020 16:23:14 +0000 +Subject: [PATCH 4/9] target/i386: fix TCG UCODE_REV access +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20200217162316.2464-5-pbonzini@redhat.com> +Patchwork-id: 93904 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/6] target/i386: fix TCG UCODE_REV access +Bugzilla: 1791648 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Dr. David Alan Gilbert + +This was a very interesting semantic conflict that caused git to move +the MSR_IA32_UCODE_REV read to helper_wrmsr. Not a big deal, but +still should be fixed... + +Fixes: 4e45aff398 ("target/i386: add a ucode-rev property", 2020-01-24) +Message-id: <20200206171022.9289-1-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 9028c75c9d08be303ccc425bfe3d3b23d8f4cac7) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/misc_helper.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/target/i386/misc_helper.c b/target/i386/misc_helper.c +index aed16fe..7d61221 100644 +--- a/target/i386/misc_helper.c ++++ b/target/i386/misc_helper.c +@@ -229,7 +229,6 @@ void helper_rdmsr(CPUX86State *env) + #else + void helper_wrmsr(CPUX86State *env) + { +- X86CPU *x86_cpu = env_archcpu(env); + uint64_t val; + + cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 1, GETPC()); +@@ -372,9 +371,6 @@ void helper_wrmsr(CPUX86State *env) + env->msr_bndcfgs = val; + cpu_sync_bndcs_hflags(env); + break; +- case MSR_IA32_UCODE_REV: +- val = x86_cpu->ucode_rev; +- break; + default: + if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL + && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + +@@ -393,6 +389,7 @@ void helper_wrmsr(CPUX86State *env) + + void helper_rdmsr(CPUX86State *env) + { ++ X86CPU *x86_cpu = env_archcpu(env); + uint64_t val; + + cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 0, GETPC()); +@@ -526,6 +523,9 @@ void helper_rdmsr(CPUX86State *env) + case MSR_IA32_BNDCFGS: + val = env->msr_bndcfgs; + break; ++ case MSR_IA32_UCODE_REV: ++ val = x86_cpu->ucode_rev; ++ break; + default: + if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL + && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch b/SOURCES/kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch new file mode 100644 index 0000000..5118aed --- /dev/null +++ b/SOURCES/kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch @@ -0,0 +1,178 @@ +From eb0fc0ae2750a0462698d6d21ebb56a4249539f9 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 17 Feb 2020 16:23:11 +0000 +Subject: [PATCH 1/9] target/i386: kvm: initialize feature MSRs very early +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20200217162316.2464-2-pbonzini@redhat.com> +Patchwork-id: 93899 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/6] target/i386: kvm: initialize feature MSRs very early +Bugzilla: 1791648 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Dr. David Alan Gilbert + +Some read-only MSRs affect the behavior of ioctls such as +KVM_SET_NESTED_STATE. We can initialize them once and for all +right after the CPU is realized, since they will never be modified +by the guest. + +Reported-by: Qingua Cheng +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +Message-Id: <1579544504-3616-2-git-send-email-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 420ae1fc51c99abfd03b1c590f55617edd2a2bed) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/kvm.c | 81 ++++++++++++++++++++++++++++++-------------------- + target/i386/kvm_i386.h | 1 + + 2 files changed, 49 insertions(+), 33 deletions(-) + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 86d9a1f..f41605b 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -67,6 +67,8 @@ + * 255 kvm_msr_entry structs */ + #define MSR_BUF_SIZE 4096 + ++static void kvm_init_msrs(X86CPU *cpu); ++ + const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + KVM_CAP_INFO(SET_TSS_ADDR), + KVM_CAP_INFO(EXT_CPUID), +@@ -1842,6 +1844,8 @@ int kvm_arch_init_vcpu(CPUState *cs) + has_msr_tsc_aux = false; + } + ++ kvm_init_msrs(cpu); ++ + r = hyperv_init_vcpu(cpu); + if (r) { + goto fail; +@@ -2660,11 +2664,53 @@ static void kvm_msr_entry_add_vmx(X86CPU *cpu, FeatureWordArray f) + VMCS12_MAX_FIELD_INDEX << 1); + } + ++static int kvm_buf_set_msrs(X86CPU *cpu) ++{ ++ int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ if (ret < cpu->kvm_msr_buf->nmsrs) { ++ struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; ++ error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64, ++ (uint32_t)e->index, (uint64_t)e->data); ++ } ++ ++ assert(ret == cpu->kvm_msr_buf->nmsrs); ++ return 0; ++} ++ ++static void kvm_init_msrs(X86CPU *cpu) ++{ ++ CPUX86State *env = &cpu->env; ++ ++ kvm_msr_buf_reset(cpu); ++ if (has_msr_arch_capabs) { ++ kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, ++ env->features[FEAT_ARCH_CAPABILITIES]); ++ } ++ ++ if (has_msr_core_capabs) { ++ kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, ++ env->features[FEAT_CORE_CAPABILITY]); ++ } ++ ++ /* ++ * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but ++ * all kernels with MSR features should have them. ++ */ ++ if (kvm_feature_msrs && cpu_has_vmx(env)) { ++ kvm_msr_entry_add_vmx(cpu, env->features); ++ } ++ ++ assert(kvm_buf_set_msrs(cpu) == 0); ++} ++ + static int kvm_put_msrs(X86CPU *cpu, int level) + { + CPUX86State *env = &cpu->env; + int i; +- int ret; + + kvm_msr_buf_reset(cpu); + +@@ -2722,17 +2768,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level) + } + #endif + +- /* If host supports feature MSR, write down. */ +- if (has_msr_arch_capabs) { +- kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, +- env->features[FEAT_ARCH_CAPABILITIES]); +- } +- +- if (has_msr_core_capabs) { +- kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, +- env->features[FEAT_CORE_CAPABILITY]); +- } +- + /* + * The following MSRs have side effects on the guest or are too heavy + * for normal writeback. Limit them to reset or full state updates. +@@ -2910,14 +2945,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level) + + /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see + * kvm_put_msr_feature_control. */ +- +- /* +- * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but +- * all kernels with MSR features should have them. +- */ +- if (kvm_feature_msrs && cpu_has_vmx(env)) { +- kvm_msr_entry_add_vmx(cpu, env->features); +- } + } + + if (env->mcg_cap) { +@@ -2933,19 +2960,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level) + } + } + +- ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); +- if (ret < 0) { +- return ret; +- } +- +- if (ret < cpu->kvm_msr_buf->nmsrs) { +- struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; +- error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64, +- (uint32_t)e->index, (uint64_t)e->data); +- } +- +- assert(ret == cpu->kvm_msr_buf->nmsrs); +- return 0; ++ return kvm_buf_set_msrs(cpu); + } + + +diff --git a/target/i386/kvm_i386.h b/target/i386/kvm_i386.h +index 06fe06b..d98c6f6 100644 +--- a/target/i386/kvm_i386.h ++++ b/target/i386/kvm_i386.h +@@ -66,4 +66,5 @@ bool kvm_enable_x2apic(void); + bool kvm_has_x2apic_api(void); + + bool kvm_hv_vpindex_settable(void); ++ + #endif +-- +1.8.3.1 + diff --git a/SOURCES/kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch b/SOURCES/kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch new file mode 100644 index 0000000..99b18fc --- /dev/null +++ b/SOURCES/kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch @@ -0,0 +1,64 @@ +From 8f39b0c9523630efeb451e2298cf64b88cd2ac81 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 17 Feb 2020 16:23:13 +0000 +Subject: [PATCH 3/9] target/i386: kvm: initialize microcode revision from KVM +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20200217162316.2464-4-pbonzini@redhat.com> +Patchwork-id: 93897 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/6] target/i386: kvm: initialize microcode revision from KVM +Bugzilla: 1791648 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Dr. David Alan Gilbert + +KVM can return the host microcode revision as a feature MSR. +Use it as the default value for -cpu host. + +Signed-off-by: Paolo Bonzini +Message-Id: <1579544504-3616-4-git-send-email-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 32c87d70ff55b96741f08c35108935cac6f40fe4) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 4 ++++ + target/i386/kvm.c | 5 +++++ + 2 files changed, 9 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index e505d3e..5ac843d 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6323,6 +6323,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + &cpu->mwait.ecx, &cpu->mwait.edx); + env->features[FEAT_1_ECX] |= CPUID_EXT_MONITOR; + } ++ if (kvm_enabled() && cpu->ucode_rev == 0) { ++ cpu->ucode_rev = kvm_arch_get_supported_msr_feature(kvm_state, ++ MSR_IA32_UCODE_REV); ++ } + } + + if (cpu->ucode_rev == 0) { +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index f41605b..6c61aef 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -2696,6 +2696,11 @@ static void kvm_init_msrs(X86CPU *cpu) + env->features[FEAT_CORE_CAPABILITY]); + } + ++ if (kvm_arch_get_supported_msr_feature(kvm_state, ++ MSR_IA32_UCODE_REV)) { ++ kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); ++ } ++ + /* + * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but + * all kernels with MSR features should have them. +-- +1.8.3.1 + diff --git a/SOURCES/kvm-tcp_emu-Fix-oob-access.patch b/SOURCES/kvm-tcp_emu-Fix-oob-access.patch new file mode 100644 index 0000000..e532877 --- /dev/null +++ b/SOURCES/kvm-tcp_emu-Fix-oob-access.patch @@ -0,0 +1,59 @@ +From 5c2c5496083fa549e1dff903413bb6136fc19d8d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 17 Jan 2020 12:07:56 +0100 +Subject: [PATCH 1/4] tcp_emu: Fix oob access +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20200117120758.1076549-2-marcandre.lureau@redhat.com> +Patchwork-id: 93399 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm + RHEL-AV-8.2.0 qemu-kvm PATCH 1/3] tcp_emu: Fix oob access +Bugzilla: 1791568 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi + +From: Samuel Thibault + +The main loop only checks for one available byte, while we sometimes +need two bytes. + +[ MA - minor conflict, CHANGELOG.md absent ] +(cherry picked from libslirp commit 2655fffed7a9e765bcb4701dd876e9dab975f289) +Signed-off-by: Marc-André Lureau + +Signed-off-by: Miroslav Rezanina +--- + slirp/src/tcp_subr.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c +index d6dd133..cbecd64 100644 +--- a/slirp/src/tcp_subr.c ++++ b/slirp/src/tcp_subr.c +@@ -886,6 +886,9 @@ int tcp_emu(struct socket *so, struct mbuf *m) + break; + + case 5: ++ if (bptr == m->m_data + m->m_len - 1) ++ return 1; /* We need two bytes */ ++ + /* + * The difference between versions 1.0 and + * 2.0 is here. For future versions of +@@ -901,6 +904,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) + /* This is the field containing the port + * number that RA-player is listening to. + */ ++ ++ if (bptr == m->m_data + m->m_len - 1) ++ return 1; /* We need two bytes */ ++ + lport = (((uint8_t *)bptr)[0] << 8) + ((uint8_t *)bptr)[1]; + if (lport < 6970) + lport += 256; /* don't know why */ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-tcp_emu-fix-unsafe-snprintf-usages.patch b/SOURCES/kvm-tcp_emu-fix-unsafe-snprintf-usages.patch new file mode 100644 index 0000000..846da73 --- /dev/null +++ b/SOURCES/kvm-tcp_emu-fix-unsafe-snprintf-usages.patch @@ -0,0 +1,149 @@ +From 9a7810c257711ce02627916d886fc1029f7a8190 Mon Sep 17 00:00:00 2001 +From: jmaloy +Date: Thu, 13 Feb 2020 15:50:49 +0000 +Subject: [PATCH 3/7] tcp_emu: fix unsafe snprintf() usages +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: jmaloy +Message-id: <20200213155049.3936-3-jmaloy@redhat.com> +Patchwork-id: 93826 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] tcp_emu: fix unsafe snprintf() usages +Bugzilla: 1798994 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi + +From: Marc-André Lureau + +Various calls to snprintf() assume that snprintf() returns "only" the +number of bytes written (excluding terminating NUL). + +https://pubs.opengroup.org/onlinepubs/9699919799/functions/snprintf.html#tag_16_159_04 + +"Upon successful completion, the snprintf() function shall return the +number of bytes that would be written to s had n been sufficiently +large excluding the terminating null byte." + +Before patch ce131029, if there isn't enough room in "m_data" for the +"DCC ..." message, we overflow "m_data". + +After the patch, if there isn't enough room for the same, we don't +overflow "m_data", but we set "m_len" out-of-bounds. The next time an +access is bounded by "m_len", we'll have a buffer overflow then. + +Use slirp_fmt*() to fix potential OOB memory access. + +Reported-by: Laszlo Ersek +Signed-off-by: Marc-André Lureau +Reviewed-by: Samuel Thibault +Message-Id: <20200127092414.169796-7-marcandre.lureau@redhat.com> +(cherry picked from libslirp commit 68ccb8021a838066f0951d4b2817eb6b6f10a843) +Signed-off-by: Jon Maloy + +Signed-off-by: Danilo C. L. de Paula +--- + slirp/src/tcp_subr.c | 44 +++++++++++++++++++++----------------------- + 1 file changed, 21 insertions(+), 23 deletions(-) + +diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c +index 954d1a6..26d4ead 100644 +--- a/slirp/src/tcp_subr.c ++++ b/slirp/src/tcp_subr.c +@@ -655,8 +655,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) + NTOHS(n1); + NTOHS(n2); + m_inc(m, snprintf(NULL, 0, "%d,%d\r\n", n1, n2) + 1); +- m->m_len = snprintf(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); +- assert(m->m_len < M_ROOM(m)); ++ m->m_len = slirp_fmt(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); + } else { + *eol = '\r'; + } +@@ -696,9 +695,9 @@ int tcp_emu(struct socket *so, struct mbuf *m) + n4 = (laddr & 0xff); + + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, M_FREEROOM(m), +- "ORT %d,%d,%d,%d,%d,%d\r\n%s", n1, n2, n3, n4, +- n5, n6, x == 7 ? buff : ""); ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "ORT %d,%d,%d,%d,%d,%d\r\n%s", ++ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); + return 1; + } else if ((bptr = (char *)strstr(m->m_data, "27 Entering")) != NULL) { + /* +@@ -731,10 +730,9 @@ int tcp_emu(struct socket *so, struct mbuf *m) + n4 = (laddr & 0xff); + + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, M_FREEROOM(m), +- "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", +- n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); +- ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", ++ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); + return 1; + } + +@@ -757,8 +755,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) + if (m->m_data[m->m_len - 1] == '\0' && lport != 0 && + (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, + htons(lport), SS_FACCEPTONCE)) != NULL) +- m->m_len = snprintf(m->m_data, M_ROOM(m), +- "%d", ntohs(so->so_fport)) + 1; ++ m->m_len = slirp_fmt0(m->m_data, M_ROOM(m), ++ "%d", ntohs(so->so_fport)); + return 1; + + case EMU_IRC: +@@ -777,10 +775,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, M_FREEROOM(m), +- "DCC CHAT chat %lu %u%c\n", +- (unsigned long)ntohl(so->so_faddr.s_addr), +- ntohs(so->so_fport), 1); ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC CHAT chat %lu %u%c\n", ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), 1); + } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, + &n1) == 4) { + if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), +@@ -788,10 +786,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, M_FREEROOM(m), +- "DCC SEND %s %lu %u %u%c\n", buff, +- (unsigned long)ntohl(so->so_faddr.s_addr), +- ntohs(so->so_fport), n1, 1); ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC SEND %s %lu %u %u%c\n", buff, ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), n1, 1); + } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, + &n1) == 4) { + if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), +@@ -799,10 +797,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, M_FREEROOM(m), +- "DCC MOVE %s %lu %u %u%c\n", buff, +- (unsigned long)ntohl(so->so_faddr.s_addr), +- ntohs(so->so_fport), n1, 1); ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC MOVE %s %lu %u %u%c\n", buff, ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), n1, 1); + } + return 1; + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-tests-arm-cpu-features-Check-feature-default-values.patch b/SOURCES/kvm-tests-arm-cpu-features-Check-feature-default-values.patch new file mode 100644 index 0000000..e8a48bf --- /dev/null +++ b/SOURCES/kvm-tests-arm-cpu-features-Check-feature-default-values.patch @@ -0,0 +1,106 @@ +From 323889aa2182bf39df10f1caf43f22daea2d7d37 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Fri, 31 Jan 2020 14:23:12 +0000 +Subject: [PATCH 10/15] tests/arm-cpu-features: Check feature default values +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Andrew Jones +Message-id: <20200131142314.13175-4-drjones@redhat.com> +Patchwork-id: 93626 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/5] tests/arm-cpu-features: Check feature default values +Bugzilla: 1647366 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Auger Eric +RH-Acked-by: Gavin Shan + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 + +Author: Andrew Jones +Date: Thu, 30 Jan 2020 16:02:06 +0000 + + tests/arm-cpu-features: Check feature default values + + If we know what the default value should be then we can test for + that as well as the feature existence. + + Signed-off-by: Andrew Jones + Reviewed-by: Richard Henderson + Message-id: 20200120101023.16030-5-drjones@redhat.com + Signed-off-by: Peter Maydell + +(cherry picked from commit 789a35efb583464f9fcd5d871a7fd6164318bb91) +Signed-off-by: Danilo C. L. de Paula +--- + tests/arm-cpu-features.c | 37 ++++++++++++++++++++++++++++--------- + 1 file changed, 28 insertions(+), 9 deletions(-) + +diff --git a/tests/arm-cpu-features.c b/tests/arm-cpu-features.c +index 6e99aa9..89285ca 100644 +--- a/tests/arm-cpu-features.c ++++ b/tests/arm-cpu-features.c +@@ -159,6 +159,25 @@ static bool resp_get_feature(QDict *resp, const char *feature) + qobject_unref(_resp); \ + }) + ++#define assert_feature(qts, cpu_type, feature, expected_value) \ ++({ \ ++ QDict *_resp, *_props; \ ++ \ ++ _resp = do_query_no_props(qts, cpu_type); \ ++ g_assert(_resp); \ ++ g_assert(resp_has_props(_resp)); \ ++ _props = resp_get_props(_resp); \ ++ g_assert(qdict_get(_props, feature)); \ ++ g_assert(qdict_get_bool(_props, feature) == (expected_value)); \ ++ qobject_unref(_resp); \ ++}) ++ ++#define assert_has_feature_enabled(qts, cpu_type, feature) \ ++ assert_feature(qts, cpu_type, feature, true) ++ ++#define assert_has_feature_disabled(qts, cpu_type, feature) \ ++ assert_feature(qts, cpu_type, feature, false) ++ + static void assert_type_full(QTestState *qts) + { + const char *error; +@@ -405,16 +424,16 @@ static void test_query_cpu_model_expansion(const void *data) + assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); + + /* Test expected feature presence/absence for some cpu types */ +- assert_has_feature(qts, "max", "pmu"); +- assert_has_feature(qts, "cortex-a15", "pmu"); ++ assert_has_feature_enabled(qts, "max", "pmu"); ++ assert_has_feature_enabled(qts, "cortex-a15", "pmu"); + assert_has_not_feature(qts, "cortex-a15", "aarch64"); + + if (g_str_equal(qtest_get_arch(), "aarch64")) { +- assert_has_feature(qts, "max", "aarch64"); +- assert_has_feature(qts, "max", "sve"); +- assert_has_feature(qts, "max", "sve128"); +- assert_has_feature(qts, "cortex-a57", "pmu"); +- assert_has_feature(qts, "cortex-a57", "aarch64"); ++ assert_has_feature_enabled(qts, "max", "aarch64"); ++ assert_has_feature_enabled(qts, "max", "sve"); ++ assert_has_feature_enabled(qts, "max", "sve128"); ++ assert_has_feature_enabled(qts, "cortex-a57", "pmu"); ++ assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); + + sve_tests_default(qts, "max"); + +@@ -451,8 +470,8 @@ static void test_query_cpu_model_expansion_kvm(const void *data) + QDict *resp; + char *error; + +- assert_has_feature(qts, "host", "aarch64"); +- assert_has_feature(qts, "host", "pmu"); ++ assert_has_feature_enabled(qts, "host", "aarch64"); ++ assert_has_feature_enabled(qts, "host", "pmu"); + + assert_error(qts, "cortex-a15", + "We cannot guarantee the CPU type 'cortex-a15' works " +-- +1.8.3.1 + diff --git a/SOURCES/kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch b/SOURCES/kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch new file mode 100644 index 0000000..3efef47 --- /dev/null +++ b/SOURCES/kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch @@ -0,0 +1,55 @@ +From e483eea891139ee38138381ba6715b3a2be050cc Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 3 Mar 2020 18:43:12 +0000 +Subject: [PATCH 16/18] tools/virtiofsd/fuse_lowlevel: Fix + fuse_out_header::error value +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200303184314.155564-6-dgilbert@redhat.com> +Patchwork-id: 94128 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/7] tools/virtiofsd/fuse_lowlevel: Fix fuse_out_header::error value +Bugzilla: 1797064 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Ján Tomko + +From: Philippe Mathieu-Daudé + +Fix warning reported by Clang static code analyzer: + + CC tools/virtiofsd/fuse_lowlevel.o + tools/virtiofsd/fuse_lowlevel.c:195:9: warning: Value stored to 'error' is never read + error = -ERANGE; + ^ ~~~~~~~ + +Fixes: 3db2876 +Reported-by: Clang Static Analyzer +Reviewed-by: Ján Tomko +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 09c086b2a144324199f99a7d4de78c3276a486c1) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/fuse_lowlevel.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 704c036..2dd36ec 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -192,7 +192,7 @@ int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, + + if (error <= -1000 || error > 0) { + fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); +- error = -ERANGE; ++ out.error = -ERANGE; + } + + iov[0].iov_base = &out; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-tpm-ppi-page-align-PPI-RAM.patch b/SOURCES/kvm-tpm-ppi-page-align-PPI-RAM.patch new file mode 100644 index 0000000..32c971d --- /dev/null +++ b/SOURCES/kvm-tpm-ppi-page-align-PPI-RAM.patch @@ -0,0 +1,58 @@ +From 7cb1c5e1416de9a09180f0930d2a216c77e8cdbd Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Thu, 30 Jan 2020 16:01:10 +0000 +Subject: [PATCH 07/15] tpm-ppi: page-align PPI RAM +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20200130160110.126086-1-marcandre.lureau@redhat.com> +Patchwork-id: 93600 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] tpm-ppi: page-align PPI RAM +Bugzilla: 1787444 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Philippe Mathieu-Daudé + +post-copy migration fails on destination with error such as: +2019-12-26T10:22:44.714644Z qemu-kvm: ram_block_discard_range: +Unaligned start address: 0x559d2afae9a0 + +Use qemu_memalign() to constrain the PPI RAM memory alignment. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Marc-André Lureau +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Stefan Berger +Signed-off-by: Stefan Berger +Message-id: 20200103074000.1006389-3-marcandre.lureau@redhat.com + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1787444 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=26122940 + +(cherry picked from commit 71e415c8a75c130875f14d6b2136825789feb297) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + hw/tpm/tpm_ppi.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c +index ff31459..6d9c1a3 100644 +--- a/hw/tpm/tpm_ppi.c ++++ b/hw/tpm/tpm_ppi.c +@@ -43,7 +43,8 @@ void tpm_ppi_reset(TPMPPI *tpmppi) + void tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m, + hwaddr addr, Object *obj) + { +- tpmppi->buf = g_malloc0(HOST_PAGE_ALIGN(TPM_PPI_ADDR_SIZE)); ++ tpmppi->buf = qemu_memalign(qemu_real_host_page_size, ++ HOST_PAGE_ALIGN(TPM_PPI_ADDR_SIZE)); + memory_region_init_ram_device_ptr(&tpmppi->ram, obj, "tpm-ppi", + TPM_PPI_ADDR_SIZE, tpmppi->buf); + vmstate_register_ram(&tpmppi->ram, DEVICE(obj)); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-trace-update-qemu-trace-stap-to-Python-3.patch b/SOURCES/kvm-trace-update-qemu-trace-stap-to-Python-3.patch new file mode 100644 index 0000000..c49aecd --- /dev/null +++ b/SOURCES/kvm-trace-update-qemu-trace-stap-to-Python-3.patch @@ -0,0 +1,82 @@ +From e7cdcd1e39c4c030a32c9e8ef79316eae8555bc8 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 16 Jan 2020 17:52:48 +0000 +Subject: [PATCH 04/15] trace: update qemu-trace-stap to Python 3 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +Message-id: <20200116175248.286556-2-stefanha@redhat.com> +Patchwork-id: 93365 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] trace: update qemu-trace-stap to Python 3 +Bugzilla: 1787395 +RH-Acked-by: John Snow +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Dr. David Alan Gilbert + +qemu-trace-stap does not support Python 3 yet: + + $ scripts/qemu-trace-stap list path/to/qemu-system-x86_64 + Traceback (most recent call last): + File "scripts/qemu-trace-stap", line 175, in + main() + File "scripts/qemu-trace-stap", line 171, in main + args.func(args) + File "scripts/qemu-trace-stap", line 118, in cmd_list + print_probes(args.verbose, "*") + File "scripts/qemu-trace-stap", line 114, in print_probes + if line.startswith(prefix): + TypeError: startswith first arg must be bytes or a tuple of bytes, not str + +Now that QEMU requires Python 3.5 or later we can switch to pure Python +3. Use Popen()'s universal_newlines=True argument to treat stdout as +text instead of binary. + +Fixes: 62dd1048c0bd ("trace: add ability to do simple printf logging via systemtap") +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1787395 +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Message-id: 20200107112438.383958-1-stefanha@redhat.com +Message-Id: <20200107112438.383958-1-stefanha@redhat.com> +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 3f0097169bb60268cc5dda0c5ea47c31ab57b22f) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Danilo C. L. de Paula +--- + scripts/qemu-trace-stap | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/scripts/qemu-trace-stap b/scripts/qemu-trace-stap +index 91d1051..90527eb 100755 +--- a/scripts/qemu-trace-stap ++++ b/scripts/qemu-trace-stap +@@ -1,4 +1,4 @@ +-#!/usr/bin/python ++#!/usr/bin/env python3 + # -*- python -*- + # + # Copyright (C) 2019 Red Hat, Inc +@@ -18,8 +18,6 @@ + # You should have received a copy of the GNU General Public License + # along with this program; if not, see . + +-from __future__ import print_function +- + import argparse + import copy + import os.path +@@ -104,7 +102,9 @@ def cmd_list(args): + if verbose: + print("Listing probes with name '%s'" % script) + proc = subprocess.Popen(["stap", "-l", script], +- stdout=subprocess.PIPE, env=tapset_env(tapsets)) ++ stdout=subprocess.PIPE, ++ universal_newlines=True, ++ env=tapset_env(tapsets)) + out, err = proc.communicate() + if proc.returncode != 0: + print("No probes found, are the tapsets installed in %s" % tapset_dir(args.binary)) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-usbredir-Prevent-recursion-in-usbredir_write.patch b/SOURCES/kvm-usbredir-Prevent-recursion-in-usbredir_write.patch new file mode 100644 index 0000000..8f08256 --- /dev/null +++ b/SOURCES/kvm-usbredir-Prevent-recursion-in-usbredir_write.patch @@ -0,0 +1,106 @@ +From 8f6311159977b8ee4b78172caa411d3cee4d2ae5 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 14 Jan 2020 20:23:30 +0000 +Subject: [PATCH 4/5] usbredir: Prevent recursion in usbredir_write +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200114202331.51831-2-dgilbert@redhat.com> +Patchwork-id: 93344 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] usbredir: Prevent recursion in usbredir_write +Bugzilla: 1790844 +RH-Acked-by: Peter Xu +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Gerd Hoffmann + +From: "Dr. David Alan Gilbert" + +I've got a case where usbredir_write manages to call back into itself +via spice; this patch causes the recursion to fail (0 bytes) the write; +this seems to avoid the deadlock I was previously seeing. + +I can't say I fully understand the interaction of usbredir and spice; +but there are a few similar guards in spice and usbredir +to catch other cases especially onces also related to spice_server_char_device_wakeup + +This case seems to be triggered by repeated migration+repeated +reconnection of the viewer; but my debugging suggests the migration +finished before this hits. + +The backtrace of the hang looks like: + reds_handle_ticket + reds_handle_other_links + reds_channel_do_link + red_channel_connect + spicevmc_connect + usbredir_create_parser + usbredirparser_do_write + usbredir_write + qemu_chr_fe_write + qemu_chr_write + qemu_chr_write_buffer + spice_chr_write + spice_server_char_device_wakeup + red_char_device_wakeup + red_char_device_write_to_device + vmc_write + usbredirparser_do_write + usbredir_write + qemu_chr_fe_write + qemu_chr_write + qemu_chr_write_buffer + qemu_mutex_lock_impl + +and we fail as we land through qemu_chr_write_buffer's lock +twice. + +Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1752320 + +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20191218113012.13331-1-dgilbert@redhat.com> +Signed-off-by: Gerd Hoffmann +(cherry picked from commit 394642a8d3742c885e397d5bb5ee0ec40743cdc6) +Signed-off-by: Danilo C. L. de Paula +--- + hw/usb/redirect.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c +index e0f5ca6..97f2c3a 100644 +--- a/hw/usb/redirect.c ++++ b/hw/usb/redirect.c +@@ -113,6 +113,7 @@ struct USBRedirDevice { + /* Properties */ + CharBackend cs; + bool enable_streams; ++ bool in_write; + uint8_t debug; + int32_t bootindex; + char *filter_str; +@@ -290,6 +291,13 @@ static int usbredir_write(void *priv, uint8_t *data, int count) + return 0; + } + ++ /* Recursion check */ ++ if (dev->in_write) { ++ DPRINTF("usbredir_write recursion\n"); ++ return 0; ++ } ++ dev->in_write = true; ++ + r = qemu_chr_fe_write(&dev->cs, data, count); + if (r < count) { + if (!dev->watch) { +@@ -300,6 +308,7 @@ static int usbredir_write(void *priv, uint8_t *data, int count) + r = 0; + } + } ++ dev->in_write = false; + return r; + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-util-add-slirp_fmt-helpers.patch b/SOURCES/kvm-util-add-slirp_fmt-helpers.patch new file mode 100644 index 0000000..31af599 --- /dev/null +++ b/SOURCES/kvm-util-add-slirp_fmt-helpers.patch @@ -0,0 +1,140 @@ +From 5dc50c6bca059a9cda6677b1fd0187df1de78ed7 Mon Sep 17 00:00:00 2001 +From: jmaloy +Date: Thu, 13 Feb 2020 15:50:48 +0000 +Subject: [PATCH 2/7] util: add slirp_fmt() helpers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: jmaloy +Message-id: <20200213155049.3936-2-jmaloy@redhat.com> +Patchwork-id: 93824 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] util: add slirp_fmt() helpers +Bugzilla: 1798994 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi + +From: Marc-André Lureau + +Various calls to snprintf() in libslirp assume that snprintf() returns +"only" the number of bytes written (excluding terminating NUL). + +https://pubs.opengroup.org/onlinepubs/9699919799/functions/snprintf.html#tag_16_159_04 + +"Upon successful completion, the snprintf() function shall return the +number of bytes that would be written to s had n been sufficiently +large excluding the terminating null byte." + +Introduce slirp_fmt() that handles several pathological cases the +way libslirp usually expect: + +- treat error as fatal (instead of silently returning -1) + +- fmt0() will always \0 end + +- return the number of bytes actually written (instead of what would +have been written, which would usually result in OOB later), including +the ending \0 for fmt0() + +- warn if truncation happened (instead of ignoring) + +Other less common cases can still be handled with strcpy/snprintf() etc. + +Signed-off-by: Marc-André Lureau +Reviewed-by: Samuel Thibault +Message-Id: <20200127092414.169796-2-marcandre.lureau@redhat.com> +(cherry picked from libslirp commit 30648c03b27fb8d9611b723184216cd3174b6775) +Signed-off-by: Jon Maloy + +Signed-off-by: Danilo C. L. de Paula +--- + slirp/src/util.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + slirp/src/util.h | 3 +++ + 2 files changed, 65 insertions(+) + +diff --git a/slirp/src/util.c b/slirp/src/util.c +index e596087..e3b6257 100644 +--- a/slirp/src/util.c ++++ b/slirp/src/util.c +@@ -364,3 +364,65 @@ void slirp_pstrcpy(char *buf, int buf_size, const char *str) + } + *q = '\0'; + } ++ ++static int slirp_vsnprintf(char *str, size_t size, ++ const char *format, va_list args) ++{ ++ int rv = vsnprintf(str, size, format, args); ++ ++ if (rv < 0) { ++ g_error("vsnprintf() failed: %s", g_strerror(errno)); ++ } ++ ++ return rv; ++} ++ ++/* ++ * A snprintf()-like function that: ++ * - returns the number of bytes written (excluding optional \0-ending) ++ * - dies on error ++ * - warn on truncation ++ */ ++int slirp_fmt(char *str, size_t size, const char *format, ...) ++{ ++ va_list args; ++ int rv; ++ ++ va_start(args, format); ++ rv = slirp_vsnprintf(str, size, format, args); ++ va_end(args); ++ ++ if (rv > size) { ++ g_critical("vsnprintf() truncation"); ++ } ++ ++ return MIN(rv, size); ++} ++ ++/* ++ * A snprintf()-like function that: ++ * - always \0-end (unless size == 0) ++ * - returns the number of bytes actually written, including \0 ending ++ * - dies on error ++ * - warn on truncation ++ */ ++int slirp_fmt0(char *str, size_t size, const char *format, ...) ++{ ++ va_list args; ++ int rv; ++ ++ va_start(args, format); ++ rv = slirp_vsnprintf(str, size, format, args); ++ va_end(args); ++ ++ if (rv >= size) { ++ g_critical("vsnprintf() truncation"); ++ if (size > 0) ++ str[size - 1] = '\0'; ++ rv = size; ++ } else { ++ rv += 1; /* include \0 */ ++ } ++ ++ return rv; ++} +diff --git a/slirp/src/util.h b/slirp/src/util.h +index 3c6223c..0558dfc 100644 +--- a/slirp/src/util.h ++++ b/slirp/src/util.h +@@ -177,4 +177,7 @@ static inline int slirp_socket_set_fast_reuse(int fd) + + void slirp_pstrcpy(char *buf, int buf_size, const char *str); + ++int slirp_fmt(char *str, size_t size, const char *format, ...); ++int slirp_fmt0(char *str, size_t size, const char *format, ...); ++ + #endif +-- +1.8.3.1 + diff --git a/SOURCES/kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch b/SOURCES/kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch new file mode 100644 index 0000000..d416e0f --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch @@ -0,0 +1,58 @@ +From e4631c00d8e9ee3608ef3196cbe8bec4841ee988 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 8 Jan 2020 15:04:57 +0000 +Subject: [PATCH 2/5] vfio/pci: Don't remove irqchip notifier if not registered + +RH-Author: Peter Xu +Message-id: <20200108150457.12324-2-peterx@redhat.com> +Patchwork-id: 93291 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] vfio/pci: Don't remove irqchip notifier if not registered +Bugzilla: 1782678 +RH-Acked-by: Alex Williamson +RH-Acked-by: Cornelia Huck +RH-Acked-by: Auger Eric +RH-Acked-by: Jens Freimann + +The kvm irqchip notifier is only registered if the device supports +INTx, however it's unconditionally removed. If the assigned device +does not support INTx, this will cause QEMU to crash when unplugging +the device from the system. Change it to conditionally remove the +notifier only if the notify hook is setup. + +CC: Eduardo Habkost +CC: David Gibson +CC: Alex Williamson +Cc: qemu-stable@nongnu.org # v4.2 +Reported-by: yanghliu@redhat.com +Debugged-by: Eduardo Habkost +Fixes: c5478fea27ac ("vfio/pci: Respond to KVM irqchip change notifier") +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1782678 +Signed-off-by: Peter Xu +Reviewed-by: David Gibson +Reviewed-by: Greg Kurz +Signed-off-by: Alex Williamson +(cherry picked from commit 0446f8121723b134ca1d1ed0b73e96d4a0a8689d) +Signed-off-by: Peter Xu +Signed-off-by: Danilo C. L. de Paula +--- + hw/vfio/pci.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 309535f..d717520 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3100,7 +3100,9 @@ static void vfio_exitfn(PCIDevice *pdev) + vfio_unregister_req_notifier(vdev); + vfio_unregister_err_notifier(vdev); + pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); +- kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); ++ if (vdev->irqchip_change_notifier.notify) { ++ kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); ++ } + vfio_disable_interrupts(vdev); + if (vdev->intx.mmap_timer) { + timer_free(vdev->intx.mmap_timer); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-vhost-Add-names-to-section-rounded-warning.patch b/SOURCES/kvm-vhost-Add-names-to-section-rounded-warning.patch new file mode 100644 index 0000000..c41a14c --- /dev/null +++ b/SOURCES/kvm-vhost-Add-names-to-section-rounded-warning.patch @@ -0,0 +1,53 @@ +From 0d545c5850caf76ad3e8dd9bb0fbc9f86b08e220 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 24 Jan 2020 19:46:11 +0100 +Subject: [PATCH 002/116] vhost: Add names to section rounded warning +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200124194613.41119-2-dgilbert@redhat.com> +Patchwork-id: 93450 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 1/3] vhost: Add names to section rounded warning +Bugzilla: 1779041 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Philippe Mathieu-Daudé + +From: "Dr. David Alan Gilbert" + +Add the memory region names to section rounding/alignment +warnings. + +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20200116202414.157959-2-dgilbert@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit ff4776147e960b128ee68f94c728659f662f4378) +Signed-off-by: Miroslav Rezanina +--- + hw/virtio/vhost.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 4da0d5a..774d87d 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -590,9 +590,10 @@ static void vhost_region_add_section(struct vhost_dev *dev, + * match up in the same RAMBlock if they do. + */ + if (mrs_gpa < prev_gpa_start) { +- error_report("%s:Section rounded to %"PRIx64 +- " prior to previous %"PRIx64, +- __func__, mrs_gpa, prev_gpa_start); ++ error_report("%s:Section '%s' rounded to %"PRIx64 ++ " prior to previous '%s' %"PRIx64, ++ __func__, section->mr->name, mrs_gpa, ++ prev_sec->mr->name, prev_gpa_start); + /* A way to cleanly fail here would be better */ + return; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-vhost-Only-align-sections-for-vhost-user.patch b/SOURCES/kvm-vhost-Only-align-sections-for-vhost-user.patch new file mode 100644 index 0000000..e082ce8 --- /dev/null +++ b/SOURCES/kvm-vhost-Only-align-sections-for-vhost-user.patch @@ -0,0 +1,97 @@ +From c35466c168e5219bf585aa65ac31fc9bdc7cbf36 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 24 Jan 2020 19:46:12 +0100 +Subject: [PATCH 003/116] vhost: Only align sections for vhost-user +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200124194613.41119-3-dgilbert@redhat.com> +Patchwork-id: 93452 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 2/3] vhost: Only align sections for vhost-user +Bugzilla: 1779041 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Philippe Mathieu-Daudé + +From: "Dr. David Alan Gilbert" + +I added hugepage alignment code in c1ece84e7c9 to deal with +vhost-user + postcopy which needs aligned pages when using userfault. +However, on x86 the lower 2MB of address space tends to be shotgun'd +with small fragments around the 512-640k range - e.g. video RAM, and +with HyperV synic pages tend to sit around there - again splitting +it up. The alignment code complains with a 'Section rounded to ...' +error and gives up. + +Since vhost-user already filters out devices without an fd +(see vhost-user.c vhost_user_mem_section_filter) it shouldn't be +affected by those overlaps. + +Turn the alignment off on vhost-kernel so that it doesn't try +and align, and thus won't hit the rounding issues. + +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20200116202414.157959-3-dgilbert@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Paolo Bonzini +(cherry picked from commit 76525114736e8f669766e69b715fa59ce8648aae) +Signed-off-by: Miroslav Rezanina +--- + hw/virtio/vhost.c | 34 ++++++++++++++++++---------------- + 1 file changed, 18 insertions(+), 16 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 774d87d..25fd469 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -547,26 +547,28 @@ static void vhost_region_add_section(struct vhost_dev *dev, + uintptr_t mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) + + section->offset_within_region; + RAMBlock *mrs_rb = section->mr->ram_block; +- size_t mrs_page = qemu_ram_pagesize(mrs_rb); + + trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size, + mrs_host); + +- /* Round the section to it's page size */ +- /* First align the start down to a page boundary */ +- uint64_t alignage = mrs_host & (mrs_page - 1); +- if (alignage) { +- mrs_host -= alignage; +- mrs_size += alignage; +- mrs_gpa -= alignage; +- } +- /* Now align the size up to a page boundary */ +- alignage = mrs_size & (mrs_page - 1); +- if (alignage) { +- mrs_size += mrs_page - alignage; +- } +- trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size, +- mrs_host); ++ if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { ++ /* Round the section to it's page size */ ++ /* First align the start down to a page boundary */ ++ size_t mrs_page = qemu_ram_pagesize(mrs_rb); ++ uint64_t alignage = mrs_host & (mrs_page - 1); ++ if (alignage) { ++ mrs_host -= alignage; ++ mrs_size += alignage; ++ mrs_gpa -= alignage; ++ } ++ /* Now align the size up to a page boundary */ ++ alignage = mrs_size & (mrs_page - 1); ++ if (alignage) { ++ mrs_size += mrs_page - alignage; ++ } ++ trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size, ++ mrs_host); ++ } + + if (dev->n_tmp_sections) { + /* Since we already have at least one section, lets see if +-- +1.8.3.1 + diff --git a/SOURCES/kvm-vhost-coding-style-fix.patch b/SOURCES/kvm-vhost-coding-style-fix.patch new file mode 100644 index 0000000..4546130 --- /dev/null +++ b/SOURCES/kvm-vhost-coding-style-fix.patch @@ -0,0 +1,56 @@ +From 624d96c456536e1471968a59fbeea206309cc33b Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 24 Jan 2020 19:46:13 +0100 +Subject: [PATCH 004/116] vhost: coding style fix +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200124194613.41119-4-dgilbert@redhat.com> +Patchwork-id: 93453 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 3/3] vhost: coding style fix +Bugzilla: 1779041 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Philippe Mathieu-Daudé + +From: "Michael S. Tsirkin" + +Drop a trailing whitespace. Make line shorter. + +Fixes: 76525114736e8 ("vhost: Only align sections for vhost-user") +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 8347505640238d3b80f9bb7510fdc1bb574bad19) +Signed-off-by: Miroslav Rezanina +--- + hw/virtio/vhost.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 25fd469..9edfadc 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -551,7 +551,7 @@ static void vhost_region_add_section(struct vhost_dev *dev, + trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size, + mrs_host); + +- if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { ++ if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { + /* Round the section to it's page size */ + /* First align the start down to a page boundary */ + size_t mrs_page = qemu_ram_pagesize(mrs_rb); +@@ -566,8 +566,8 @@ static void vhost_region_add_section(struct vhost_dev *dev, + if (alignage) { + mrs_size += mrs_page - alignage; + } +- trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size, +- mrs_host); ++ trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, ++ mrs_size, mrs_host); + } + + if (dev->n_tmp_sections) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-vhost-user-Print-unexpected-slave-message-types.patch b/SOURCES/kvm-vhost-user-Print-unexpected-slave-message-types.patch new file mode 100644 index 0000000..e5776e7 --- /dev/null +++ b/SOURCES/kvm-vhost-user-Print-unexpected-slave-message-types.patch @@ -0,0 +1,48 @@ +From d6abbdaeb2c35efe6793a599c98116e250b1f179 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:43 +0100 +Subject: [PATCH 072/116] vhost-user: Print unexpected slave message types +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-69-dgilbert@redhat.com> +Patchwork-id: 93519 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 068/112] vhost-user: Print unexpected slave message types +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +When we receive an unexpected message type on the slave fd, print +the type. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 0fdc465d7d5aafeae127eba488f247ac6f58df4c) +Signed-off-by: Miroslav Rezanina +--- + hw/virtio/vhost-user.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index 02a9b25..e4f46ec 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -1055,7 +1055,7 @@ static void slave_read(void *opaque) + fd[0]); + break; + default: +- error_report("Received unexpected msg type."); ++ error_report("Received unexpected msg type: %d.", hdr.request); + ret = -EINVAL; + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-vhost-user-fs-remove-vhostfd-property.patch b/SOURCES/kvm-vhost-user-fs-remove-vhostfd-property.patch new file mode 100644 index 0000000..5904e82 --- /dev/null +++ b/SOURCES/kvm-vhost-user-fs-remove-vhostfd-property.patch @@ -0,0 +1,59 @@ +From 912af6f7c270e2939a91c9b3f62b6ba1202edc43 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:37 +0100 +Subject: [PATCH 006/116] vhost-user-fs: remove "vhostfd" property +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-3-dgilbert@redhat.com> +Patchwork-id: 93458 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 002/112] vhost-user-fs: remove "vhostfd" property +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Marc-André Lureau + +The property doesn't make much sense for a vhost-user device. + +Signed-off-by: Marc-André Lureau +Message-Id: <20191116112016.14872-1-marcandre.lureau@redhat.com> +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 703857348724319735d9be7b5b996e6445c6e6b9) +Signed-off-by: Miroslav Rezanina +--- + hw/virtio/vhost-user-fs.c | 1 - + include/hw/virtio/vhost-user-fs.h | 1 - + 2 files changed, 2 deletions(-) + +diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c +index f0df7f4..ca0b7fc 100644 +--- a/hw/virtio/vhost-user-fs.c ++++ b/hw/virtio/vhost-user-fs.c +@@ -263,7 +263,6 @@ static Property vuf_properties[] = { + DEFINE_PROP_UINT16("num-request-queues", VHostUserFS, + conf.num_request_queues, 1), + DEFINE_PROP_UINT16("queue-size", VHostUserFS, conf.queue_size, 128), +- DEFINE_PROP_STRING("vhostfd", VHostUserFS, conf.vhostfd), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h +index 539885b..9ff1bdb 100644 +--- a/include/hw/virtio/vhost-user-fs.h ++++ b/include/hw/virtio/vhost-user-fs.h +@@ -28,7 +28,6 @@ typedef struct { + char *tag; + uint16_t num_request_queues; + uint16_t queue_size; +- char *vhostfd; + } VHostUserFSConf; + + typedef struct { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-vhost-user-gpu-Drop-trailing-json-comma.patch b/SOURCES/kvm-vhost-user-gpu-Drop-trailing-json-comma.patch new file mode 100644 index 0000000..3a50632 --- /dev/null +++ b/SOURCES/kvm-vhost-user-gpu-Drop-trailing-json-comma.patch @@ -0,0 +1,52 @@ +From 044feb40e3041759ee77d08136f334cf3ad67c1e Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?J=C3=A1n=20Tomko?= +Date: Fri, 21 Feb 2020 09:49:23 +0000 +Subject: [PATCH] vhost-user-gpu: Drop trailing json comma +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Ján Tomko +Message-id: <07fed9a38495938a7180819e27f590d80cd6668d.1582278173.git.jtomko@redhat.com> +Patchwork-id: 94019 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] vhost-user-gpu: Drop trailing json comma +Bugzilla: 1805334 +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Stefan Hajnoczi + +From: Cole Robinson + +Trailing comma is not valid json: + +$ cat contrib/vhost-user-gpu/50-qemu-gpu.json.in | jq +parse error: Expected another key-value pair at line 5, column 1 + +Signed-off-by: Cole Robinson +Reviewed-by: Marc-André Lureau +Reviewed-by: Li Qiang +Reviewed-by: Philippe Mathieu-Daudé +Message-id: 7f5dd2ac9f3504e2699f23e69bc3d8051b729832.1568925097.git.crobinso@redhat.com +Signed-off-by: Gerd Hoffmann +(cherry picked from commit ca26b032e5a0e8a190c763ce828a8740d24b9b65) +Signed-off-by: Ján Tomko +Signed-off-by: Danilo C. L. de Paula +--- + contrib/vhost-user-gpu/50-qemu-gpu.json.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/contrib/vhost-user-gpu/50-qemu-gpu.json.in b/contrib/vhost-user-gpu/50-qemu-gpu.json.in +index 658b545..f5edd09 100644 +--- a/contrib/vhost-user-gpu/50-qemu-gpu.json.in ++++ b/contrib/vhost-user-gpu/50-qemu-gpu.json.in +@@ -1,5 +1,5 @@ + { + "description": "QEMU vhost-user-gpu", + "type": "gpu", +- "binary": "@libexecdir@/vhost-user-gpu", ++ "binary": "@libexecdir@/vhost-user-gpu" + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch b/SOURCES/kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch new file mode 100644 index 0000000..ed10701 --- /dev/null +++ b/SOURCES/kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch @@ -0,0 +1,80 @@ +From b395ad369278d0923a590975fabbb99ec7716c6b Mon Sep 17 00:00:00 2001 +From: Julia Suvorova +Date: Wed, 19 Feb 2020 21:34:28 +0000 +Subject: [PATCH 4/7] virtio: add ability to delete vq through a pointer +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Julia Suvorova +Message-id: <20200219213431.11913-2-jusual@redhat.com> +Patchwork-id: 93980 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/4] virtio: add ability to delete vq through a pointer +Bugzilla: 1791590 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Michael S. Tsirkin + +From: "Michael S. Tsirkin" + +Devices tend to maintain vq pointers, allow deleting them trough a vq pointer. + +Signed-off-by: Michael S. Tsirkin +Reviewed-by: David Hildenbrand +Reviewed-by: David Hildenbrand +(cherry picked from commit 722f8c51d8af223751dfb1d02de40043e8ba067e) +Signed-off-by: Danilo C. L. de Paula +--- + hw/virtio/virtio.c | 15 ++++++++++----- + include/hw/virtio/virtio.h | 2 ++ + 2 files changed, 12 insertions(+), 5 deletions(-) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 3211135..d63a369 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2335,17 +2335,22 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, + return &vdev->vq[i]; + } + ++void virtio_delete_queue(VirtQueue *vq) ++{ ++ vq->vring.num = 0; ++ vq->vring.num_default = 0; ++ vq->handle_output = NULL; ++ vq->handle_aio_output = NULL; ++ g_free(vq->used_elems); ++} ++ + void virtio_del_queue(VirtIODevice *vdev, int n) + { + if (n < 0 || n >= VIRTIO_QUEUE_MAX) { + abort(); + } + +- vdev->vq[n].vring.num = 0; +- vdev->vq[n].vring.num_default = 0; +- vdev->vq[n].handle_output = NULL; +- vdev->vq[n].handle_aio_output = NULL; +- g_free(vdev->vq[n].used_elems); ++ virtio_delete_queue(&vdev->vq[n]); + } + + static void virtio_set_isr(VirtIODevice *vdev, int value) +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index 6a20442..91167f6 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -183,6 +183,8 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, + + void virtio_del_queue(VirtIODevice *vdev, int n); + ++void virtio_delete_queue(VirtQueue *vq); ++ + void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len); + void virtqueue_flush(VirtQueue *vq, unsigned int count); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtio-don-t-enable-notifications-during-polling.patch b/SOURCES/kvm-virtio-don-t-enable-notifications-during-polling.patch new file mode 100644 index 0000000..2dffc01 --- /dev/null +++ b/SOURCES/kvm-virtio-don-t-enable-notifications-during-polling.patch @@ -0,0 +1,158 @@ +From 351dd07d7b5e69cdf47260c9ea848c0c93cd2c8a Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 9 Jan 2020 11:13:25 +0000 +Subject: [PATCH 3/5] virtio: don't enable notifications during polling + +RH-Author: Stefan Hajnoczi +Message-id: <20200109111325.559557-2-stefanha@redhat.com> +Patchwork-id: 93298 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] virtio: don't enable notifications during polling +Bugzilla: 1789301 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Michael S. Tsirkin + +Virtqueue notifications are not necessary during polling, so we disable +them. This allows the guest driver to avoid MMIO vmexits. +Unfortunately the virtio-blk and virtio-scsi handler functions re-enable +notifications, defeating this optimization. + +Fix virtio-blk and virtio-scsi emulation so they leave notifications +disabled. The key thing to remember for correctness is that polling +always checks one last time after ending its loop, therefore it's safe +to lose the race when re-enabling notifications at the end of polling. + +There is a measurable performance improvement of 5-10% with the null-co +block driver. Real-life storage configurations will see a smaller +improvement because the MMIO vmexit overhead contributes less to +latency. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20191209210957.65087-1-stefanha@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit d0435bc513e23a4961b6af20164d1c6c219eb4ea) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Danilo C. L. de Paula +--- + hw/block/virtio-blk.c | 9 +++++++-- + hw/scsi/virtio-scsi.c | 9 +++++++-- + hw/virtio/virtio.c | 12 ++++++------ + include/hw/virtio/virtio.h | 1 + + 4 files changed, 21 insertions(+), 10 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 4c357d2..c4e55fb 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -764,13 +764,16 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) + { + VirtIOBlockReq *req; + MultiReqBuffer mrb = {}; ++ bool suppress_notifications = virtio_queue_get_notification(vq); + bool progress = false; + + aio_context_acquire(blk_get_aio_context(s->blk)); + blk_io_plug(s->blk); + + do { +- virtio_queue_set_notification(vq, 0); ++ if (suppress_notifications) { ++ virtio_queue_set_notification(vq, 0); ++ } + + while ((req = virtio_blk_get_request(s, vq))) { + progress = true; +@@ -781,7 +784,9 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) + } + } + +- virtio_queue_set_notification(vq, 1); ++ if (suppress_notifications) { ++ virtio_queue_set_notification(vq, 1); ++ } + } while (!virtio_queue_empty(vq)); + + if (mrb.num_reqs) { +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 54108c0..e2cd1df 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -597,12 +597,15 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) + { + VirtIOSCSIReq *req, *next; + int ret = 0; ++ bool suppress_notifications = virtio_queue_get_notification(vq); + bool progress = false; + + QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); + + do { +- virtio_queue_set_notification(vq, 0); ++ if (suppress_notifications) { ++ virtio_queue_set_notification(vq, 0); ++ } + + while ((req = virtio_scsi_pop_req(s, vq))) { + progress = true; +@@ -622,7 +625,9 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) + } + } + +- virtio_queue_set_notification(vq, 1); ++ if (suppress_notifications) { ++ virtio_queue_set_notification(vq, 1); ++ } + } while (ret != -EINVAL && !virtio_queue_empty(vq)); + + QTAILQ_FOREACH_SAFE(req, &reqs, next, next) { +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 04716b5..3211135 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -432,6 +432,11 @@ static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable) + } + } + ++bool virtio_queue_get_notification(VirtQueue *vq) ++{ ++ return vq->notification; ++} ++ + void virtio_queue_set_notification(VirtQueue *vq, int enable) + { + vq->notification = enable; +@@ -3384,17 +3389,12 @@ static bool virtio_queue_host_notifier_aio_poll(void *opaque) + { + EventNotifier *n = opaque; + VirtQueue *vq = container_of(n, VirtQueue, host_notifier); +- bool progress; + + if (!vq->vring.desc || virtio_queue_empty(vq)) { + return false; + } + +- progress = virtio_queue_notify_aio_vq(vq); +- +- /* In case the handler function re-enabled notifications */ +- virtio_queue_set_notification(vq, 0); +- return progress; ++ return virtio_queue_notify_aio_vq(vq); + } + + static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n) +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index c32a815..6a20442 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -224,6 +224,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id); + + void virtio_notify_config(VirtIODevice *vdev); + ++bool virtio_queue_get_notification(VirtQueue *vq); + void virtio_queue_set_notification(VirtQueue *vq, int enable); + + int virtio_queue_ready(VirtQueue *vq); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch b/SOURCES/kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch new file mode 100644 index 0000000..9a69ed1 --- /dev/null +++ b/SOURCES/kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch @@ -0,0 +1,60 @@ +From c0cf6d8a1d3b9bf3928f37fcfd5aa8ae6f1338ca Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:36 +0100 +Subject: [PATCH 005/116] virtio-fs: fix MSI-X nvectors calculation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-2-dgilbert@redhat.com> +Patchwork-id: 93455 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 001/112] virtio-fs: fix MSI-X nvectors calculation +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +The following MSI-X vectors are required: + * VIRTIO Configuration Change + * hiprio virtqueue + * requests virtqueues + +Fix the calculation to reserve enough MSI-X vectors. Otherwise guest +drivers fall back to a sub-optional configuration where all virtqueues +share a single vector. + +This change does not break live migration compatibility since +vhost-user-fs-pci devices are not migratable yet. + +Reported-by: Vivek Goyal +Signed-off-by: Stefan Hajnoczi +Message-Id: <20191209110759.35227-1-stefanha@redhat.com> +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 366844f3d1329c6423dd752891a28ccb3ee8fddd) +Signed-off-by: Miroslav Rezanina +--- + hw/virtio/vhost-user-fs-pci.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-user-fs-pci.c b/hw/virtio/vhost-user-fs-pci.c +index 933a3f2..e3a649d 100644 +--- a/hw/virtio/vhost-user-fs-pci.c ++++ b/hw/virtio/vhost-user-fs-pci.c +@@ -40,7 +40,8 @@ static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) + DeviceState *vdev = DEVICE(&dev->vdev); + + if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { +- vpci_dev->nvectors = dev->vdev.conf.num_request_queues + 1; ++ /* Also reserve config change and hiprio queue vectors */ ++ vpci_dev->nvectors = dev->vdev.conf.num_request_queues + 2; + } + + qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtio-make-virtio_delete_queue-idempotent.patch b/SOURCES/kvm-virtio-make-virtio_delete_queue-idempotent.patch new file mode 100644 index 0000000..16eb1da --- /dev/null +++ b/SOURCES/kvm-virtio-make-virtio_delete_queue-idempotent.patch @@ -0,0 +1,42 @@ +From 901e65fa6ccbadeacd6c585cf49a0a7cdafb4737 Mon Sep 17 00:00:00 2001 +From: Julia Suvorova +Date: Wed, 19 Feb 2020 21:34:29 +0000 +Subject: [PATCH 5/7] virtio: make virtio_delete_queue idempotent + +RH-Author: Julia Suvorova +Message-id: <20200219213431.11913-3-jusual@redhat.com> +Patchwork-id: 93981 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/4] virtio: make virtio_delete_queue idempotent +Bugzilla: 1791590 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Michael S. Tsirkin + +From: "Michael S. Tsirkin" + +Let's make sure calling this twice is harmless - +no known instances, but seems safer. + +Suggested-by: Pan Nengyuan +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 8cd353ea0fbf0e334e015d833f612799be642296) +Signed-off-by: Danilo C. L. de Paula +--- + hw/virtio/virtio.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index d63a369..e6a9ba4 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2342,6 +2342,7 @@ void virtio_delete_queue(VirtQueue *vq) + vq->handle_output = NULL; + vq->handle_aio_output = NULL; + g_free(vq->used_elems); ++ vq->used_elems = NULL; + } + + void virtio_del_queue(VirtIODevice *vdev, int n) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch b/SOURCES/kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch new file mode 100644 index 0000000..c21c699 --- /dev/null +++ b/SOURCES/kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch @@ -0,0 +1,49 @@ +From 2f494c41715193522c52eafc6af2a5e33f88ceb9 Mon Sep 17 00:00:00 2001 +From: Julia Suvorova +Date: Wed, 19 Feb 2020 21:34:31 +0000 +Subject: [PATCH 7/7] virtio-net: delete also control queue when TX/RX deleted + +RH-Author: Julia Suvorova +Message-id: <20200219213431.11913-5-jusual@redhat.com> +Patchwork-id: 93983 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/4] virtio-net: delete also control queue when TX/RX deleted +Bugzilla: 1791590 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Michael S. Tsirkin + +From: Yuri Benditovich + +https://bugzilla.redhat.com/show_bug.cgi?id=1708480 +If the control queue is not deleted together with TX/RX, it +later will be ignored in freeing cache resources and hot +unplug will not be completed. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Yuri Benditovich +Message-Id: <20191226043649.14481-3-yuri.benditovich@daynix.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit d945d9f1731244ef341f74ede93120fc9de35913) +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index db3d7c3..f325440 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3101,7 +3101,8 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) + for (i = 0; i < max_queues; i++) { + virtio_net_del_queue(n, i); + } +- ++ /* delete also control vq */ ++ virtio_del_queue(vdev, max_queues * 2); + qemu_announce_timer_del(&n->announce_timer, false); + g_free(n->vqs); + qemu_del_nic(n->nic); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtio-reset-region-cache-when-on-queue-deletion.patch b/SOURCES/kvm-virtio-reset-region-cache-when-on-queue-deletion.patch new file mode 100644 index 0000000..c9f1086 --- /dev/null +++ b/SOURCES/kvm-virtio-reset-region-cache-when-on-queue-deletion.patch @@ -0,0 +1,46 @@ +From 8bf4f561262d9282cebdb3418cdb9a69c92216a0 Mon Sep 17 00:00:00 2001 +From: Julia Suvorova +Date: Wed, 19 Feb 2020 21:34:30 +0000 +Subject: [PATCH 6/7] virtio: reset region cache when on queue deletion + +RH-Author: Julia Suvorova +Message-id: <20200219213431.11913-4-jusual@redhat.com> +Patchwork-id: 93982 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/4] virtio: reset region cache when on queue deletion +Bugzilla: 1791590 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Michael S. Tsirkin + +From: Yuri Benditovich + +https://bugzilla.redhat.com/show_bug.cgi?id=1708480 +Fix leak of region reference that prevents complete +device deletion on hot unplug. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Yuri Benditovich +Message-Id: <20191226043649.14481-2-yuri.benditovich@daynix.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 421afd2fe8dd4603216cbf36081877c391f5a2a4) +Signed-off-by: Danilo C. L. de Paula +--- + hw/virtio/virtio.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index e6a9ba4..f644d9a 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2343,6 +2343,7 @@ void virtio_delete_queue(VirtQueue *vq) + vq->handle_aio_output = NULL; + g_free(vq->used_elems); + vq->used_elems = NULL; ++ virtio_virtqueue_reset_region_cache(vq); + } + + void virtio_del_queue(VirtIODevice *vdev, int n) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofs-Add-maintainers-entry.patch b/SOURCES/kvm-virtiofs-Add-maintainers-entry.patch new file mode 100644 index 0000000..fec9371 --- /dev/null +++ b/SOURCES/kvm-virtiofs-Add-maintainers-entry.patch @@ -0,0 +1,52 @@ +From f4144443eacceb04823ee72cb2d4f9f841f05495 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:11 +0100 +Subject: [PATCH 040/116] virtiofs: Add maintainers entry +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-37-dgilbert@redhat.com> +Patchwork-id: 93491 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 036/112] virtiofs: Add maintainers entry +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit bad7d2c3ad1af9344df035aedaf8e0967a543070) +Signed-off-by: Miroslav Rezanina +--- + MAINTAINERS | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/MAINTAINERS b/MAINTAINERS +index 5e5e3e5..d1b3e26 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -1575,6 +1575,14 @@ T: git https://github.com/cohuck/qemu.git s390-next + T: git https://github.com/borntraeger/qemu.git s390-next + L: qemu-s390x@nongnu.org + ++virtiofs ++M: Dr. David Alan Gilbert ++M: Stefan Hajnoczi ++S: Supported ++F: tools/virtiofsd/* ++F: hw/virtio/vhost-user-fs* ++F: include/hw/virtio/vhost-user-fs.h ++ + virtio-input + M: Gerd Hoffmann + S: Maintained +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch b/SOURCES/kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch new file mode 100644 index 0000000..a2b91be --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch @@ -0,0 +1,86 @@ +From 4d9106acfd7ed9e4d197ddf9f22b79ba6c8afdd8 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:38 +0100 +Subject: [PATCH 067/116] virtiofsd: Add ID to the log with FUSE_LOG_DEBUG + level +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-64-dgilbert@redhat.com> +Patchwork-id: 93514 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 063/112] virtiofsd: Add ID to the log with FUSE_LOG_DEBUG level +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Masayoshi Mizuma + +virtiofsd has some threads, so we see a lot of logs with debug option. +It would be useful for debugging if we can identify the specific thread +from the log. + +Add ID, which is got by gettid(), to the log with FUSE_LOG_DEBUG level +so that we can grep the specific thread. + +The log is like as: + + ]# ./virtiofsd -d -o vhost_user_socket=/tmp/vhostqemu0 -o source=/tmp/share0 -o cache=auto + ... + [ID: 00000097] unique: 12696, success, outsize: 120 + [ID: 00000097] virtio_send_msg: elem 18: with 2 in desc of length 120 + [ID: 00000003] fv_queue_thread: Got queue event on Queue 1 + [ID: 00000003] fv_queue_thread: Queue 1 gave evalue: 1 available: in: 65552 out: 80 + [ID: 00000003] fv_queue_thread: Waiting for Queue 1 event + [ID: 00000071] fv_queue_worker: elem 33: with 2 out desc of length 80 bad_in_num=0 bad_out_num=0 + [ID: 00000071] unique: 12694, opcode: READ (15), nodeid: 2, insize: 80, pid: 2014 + [ID: 00000071] lo_read(ino=2, size=65536, off=131072) + +Signed-off-by: Masayoshi Mizuma + +Signed-off-by: Dr. David Alan Gilbert + added rework as suggested by Daniel P. Berrangé during review +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 36f3846902bd41413f6c0bf797dee509028c29f4) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index ff6910f..f08324f 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -43,6 +43,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -2268,10 +2269,17 @@ static void setup_nofile_rlimit(void) + + static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) + { ++ g_autofree char *localfmt = NULL; ++ + if (current_log_level < level) { + return; + } + ++ if (current_log_level == FUSE_LOG_DEBUG) { ++ localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), fmt); ++ fmt = localfmt; ++ } ++ + if (use_syslog) { + int priority = LOG_ERR; + switch (level) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch b/SOURCES/kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch new file mode 100644 index 0000000..b017bf4 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch @@ -0,0 +1,106 @@ +From 709408de33112d32b7c6675f8c9320b8bebccd58 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:05 +0100 +Subject: [PATCH 034/116] virtiofsd: Add Makefile wiring for virtiofsd contrib +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-31-dgilbert@redhat.com> +Patchwork-id: 93482 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 030/112] virtiofsd: Add Makefile wiring for virtiofsd contrib +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Wire up the building of the virtiofsd in tools. + +virtiofsd relies on Linux-specific system calls and seccomp. Anyone +wishing to port it to other host operating systems should do so +carefully and without reducing security. + +Only allow building on Linux hosts. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Liam Merwick +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 81bfc42dcf473bc8d3790622633410da72d8e622) +Signed-off-by: Miroslav Rezanina +--- + Makefile | 10 ++++++++++ + Makefile.objs | 1 + + tools/virtiofsd/Makefile.objs | 9 +++++++++ + 3 files changed, 20 insertions(+) + create mode 100644 tools/virtiofsd/Makefile.objs + +diff --git a/Makefile b/Makefile +index 4254950..1526775 100644 +--- a/Makefile ++++ b/Makefile +@@ -330,6 +330,10 @@ endif + endif + endif + ++ifdef CONFIG_LINUX ++HELPERS-y += virtiofsd$(EXESUF) ++endif ++ + # Sphinx does not allow building manuals into the same directory as + # the source files, so if we're doing an in-tree QEMU build we must + # build the manuals into a subdirectory (and then install them from +@@ -430,6 +434,7 @@ dummy := $(call unnest-vars,, \ + elf2dmp-obj-y \ + ivshmem-client-obj-y \ + ivshmem-server-obj-y \ ++ virtiofsd-obj-y \ + rdmacm-mux-obj-y \ + libvhost-user-obj-y \ + vhost-user-scsi-obj-y \ +@@ -675,6 +680,11 @@ rdmacm-mux$(EXESUF): LIBS += "-libumad" + rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS) + $(call LINK, $^) + ++ifdef CONFIG_LINUX # relies on Linux-specific syscalls ++virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS) ++ $(call LINK, $^) ++endif ++ + vhost-user-gpu$(EXESUF): $(vhost-user-gpu-obj-y) $(libvhost-user-obj-y) libqemuutil.a libqemustub.a + $(call LINK, $^) + +diff --git a/Makefile.objs b/Makefile.objs +index fcf63e1..1a8f288 100644 +--- a/Makefile.objs ++++ b/Makefile.objs +@@ -125,6 +125,7 @@ vhost-user-blk-obj-y = contrib/vhost-user-blk/ + rdmacm-mux-obj-y = contrib/rdmacm-mux/ + vhost-user-input-obj-y = contrib/vhost-user-input/ + vhost-user-gpu-obj-y = contrib/vhost-user-gpu/ ++virtiofsd-obj-y = tools/virtiofsd/ + + ###################################################################### + trace-events-subdirs = +diff --git a/tools/virtiofsd/Makefile.objs b/tools/virtiofsd/Makefile.objs +new file mode 100644 +index 0000000..45a8075 +--- /dev/null ++++ b/tools/virtiofsd/Makefile.objs +@@ -0,0 +1,9 @@ ++virtiofsd-obj-y = buffer.o \ ++ fuse_opt.o \ ++ fuse_log.o \ ++ fuse_lowlevel.o \ ++ fuse_signals.o \ ++ fuse_virtio.o \ ++ helper.o \ ++ passthrough_ll.o ++ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Add-auxiliary-.c-s.patch b/SOURCES/kvm-virtiofsd-Add-auxiliary-.c-s.patch new file mode 100644 index 0000000..90150d9 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Add-auxiliary-.c-s.patch @@ -0,0 +1,1387 @@ +From 55b4059d6399c212109c758190e15b574accdd07 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:41 +0100 +Subject: [PATCH 010/116] virtiofsd: Add auxiliary .c's +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-7-dgilbert@redhat.com> +Patchwork-id: 93461 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 006/112] virtiofsd: Add auxiliary .c's +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Add most of the non-main .c files we need from upstream fuse-3.8.0 + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit ffcf8d9f8649c6e56b1193bbbc9c9f7388920043) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 321 ++++++++++++++++++++++++++++++ + tools/virtiofsd/fuse_log.c | 40 ++++ + tools/virtiofsd/fuse_opt.c | 423 +++++++++++++++++++++++++++++++++++++++ + tools/virtiofsd/fuse_signals.c | 91 +++++++++ + tools/virtiofsd/helper.c | 440 +++++++++++++++++++++++++++++++++++++++++ + 5 files changed, 1315 insertions(+) + create mode 100644 tools/virtiofsd/buffer.c + create mode 100644 tools/virtiofsd/fuse_log.c + create mode 100644 tools/virtiofsd/fuse_opt.c + create mode 100644 tools/virtiofsd/fuse_signals.c + create mode 100644 tools/virtiofsd/helper.c + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +new file mode 100644 +index 0000000..5ab9b87 +--- /dev/null ++++ b/tools/virtiofsd/buffer.c +@@ -0,0 +1,321 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2010 Miklos Szeredi ++ ++ Functions for dealing with `struct fuse_buf` and `struct ++ fuse_bufvec`. ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB ++*/ ++ ++#define _GNU_SOURCE ++ ++#include "config.h" ++#include "fuse_i.h" ++#include "fuse_lowlevel.h" ++#include ++#include ++#include ++#include ++ ++size_t fuse_buf_size(const struct fuse_bufvec *bufv) ++{ ++ size_t i; ++ size_t size = 0; ++ ++ for (i = 0; i < bufv->count; i++) { ++ if (bufv->buf[i].size == SIZE_MAX) ++ size = SIZE_MAX; ++ else ++ size += bufv->buf[i].size; ++ } ++ ++ return size; ++} ++ ++static size_t min_size(size_t s1, size_t s2) ++{ ++ return s1 < s2 ? s1 : s2; ++} ++ ++static ssize_t fuse_buf_write(const struct fuse_buf *dst, size_t dst_off, ++ const struct fuse_buf *src, size_t src_off, ++ size_t len) ++{ ++ ssize_t res = 0; ++ size_t copied = 0; ++ ++ while (len) { ++ if (dst->flags & FUSE_BUF_FD_SEEK) { ++ res = pwrite(dst->fd, (char *)src->mem + src_off, len, ++ dst->pos + dst_off); ++ } else { ++ res = write(dst->fd, (char *)src->mem + src_off, len); ++ } ++ if (res == -1) { ++ if (!copied) ++ return -errno; ++ break; ++ } ++ if (res == 0) ++ break; ++ ++ copied += res; ++ if (!(dst->flags & FUSE_BUF_FD_RETRY)) ++ break; ++ ++ src_off += res; ++ dst_off += res; ++ len -= res; ++ } ++ ++ return copied; ++} ++ ++static ssize_t fuse_buf_read(const struct fuse_buf *dst, size_t dst_off, ++ const struct fuse_buf *src, size_t src_off, ++ size_t len) ++{ ++ ssize_t res = 0; ++ size_t copied = 0; ++ ++ while (len) { ++ if (src->flags & FUSE_BUF_FD_SEEK) { ++ res = pread(src->fd, (char *)dst->mem + dst_off, len, ++ src->pos + src_off); ++ } else { ++ res = read(src->fd, (char *)dst->mem + dst_off, len); ++ } ++ if (res == -1) { ++ if (!copied) ++ return -errno; ++ break; ++ } ++ if (res == 0) ++ break; ++ ++ copied += res; ++ if (!(src->flags & FUSE_BUF_FD_RETRY)) ++ break; ++ ++ dst_off += res; ++ src_off += res; ++ len -= res; ++ } ++ ++ return copied; ++} ++ ++static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, ++ const struct fuse_buf *src, size_t src_off, ++ size_t len) ++{ ++ char buf[4096]; ++ struct fuse_buf tmp = { ++ .size = sizeof(buf), ++ .flags = 0, ++ }; ++ ssize_t res; ++ size_t copied = 0; ++ ++ tmp.mem = buf; ++ ++ while (len) { ++ size_t this_len = min_size(tmp.size, len); ++ size_t read_len; ++ ++ res = fuse_buf_read(&tmp, 0, src, src_off, this_len); ++ if (res < 0) { ++ if (!copied) ++ return res; ++ break; ++ } ++ if (res == 0) ++ break; ++ ++ read_len = res; ++ res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len); ++ if (res < 0) { ++ if (!copied) ++ return res; ++ break; ++ } ++ if (res == 0) ++ break; ++ ++ copied += res; ++ ++ if (res < this_len) ++ break; ++ ++ dst_off += res; ++ src_off += res; ++ len -= res; ++ } ++ ++ return copied; ++} ++ ++#ifdef HAVE_SPLICE ++static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, ++ const struct fuse_buf *src, size_t src_off, ++ size_t len, enum fuse_buf_copy_flags flags) ++{ ++ int splice_flags = 0; ++ off_t *srcpos = NULL; ++ off_t *dstpos = NULL; ++ off_t srcpos_val; ++ off_t dstpos_val; ++ ssize_t res; ++ size_t copied = 0; ++ ++ if (flags & FUSE_BUF_SPLICE_MOVE) ++ splice_flags |= SPLICE_F_MOVE; ++ if (flags & FUSE_BUF_SPLICE_NONBLOCK) ++ splice_flags |= SPLICE_F_NONBLOCK; ++ ++ if (src->flags & FUSE_BUF_FD_SEEK) { ++ srcpos_val = src->pos + src_off; ++ srcpos = &srcpos_val; ++ } ++ if (dst->flags & FUSE_BUF_FD_SEEK) { ++ dstpos_val = dst->pos + dst_off; ++ dstpos = &dstpos_val; ++ } ++ ++ while (len) { ++ res = splice(src->fd, srcpos, dst->fd, dstpos, len, ++ splice_flags); ++ if (res == -1) { ++ if (copied) ++ break; ++ ++ if (errno != EINVAL || (flags & FUSE_BUF_FORCE_SPLICE)) ++ return -errno; ++ ++ /* Maybe splice is not supported for this combination */ ++ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, ++ len); ++ } ++ if (res == 0) ++ break; ++ ++ copied += res; ++ if (!(src->flags & FUSE_BUF_FD_RETRY) && ++ !(dst->flags & FUSE_BUF_FD_RETRY)) { ++ break; ++ } ++ ++ len -= res; ++ } ++ ++ return copied; ++} ++#else ++static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, ++ const struct fuse_buf *src, size_t src_off, ++ size_t len, enum fuse_buf_copy_flags flags) ++{ ++ (void) flags; ++ ++ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); ++} ++#endif ++ ++ ++static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, ++ const struct fuse_buf *src, size_t src_off, ++ size_t len, enum fuse_buf_copy_flags flags) ++{ ++ int src_is_fd = src->flags & FUSE_BUF_IS_FD; ++ int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; ++ ++ if (!src_is_fd && !dst_is_fd) { ++ char *dstmem = (char *)dst->mem + dst_off; ++ char *srcmem = (char *)src->mem + src_off; ++ ++ if (dstmem != srcmem) { ++ if (dstmem + len <= srcmem || srcmem + len <= dstmem) ++ memcpy(dstmem, srcmem, len); ++ else ++ memmove(dstmem, srcmem, len); ++ } ++ ++ return len; ++ } else if (!src_is_fd) { ++ return fuse_buf_write(dst, dst_off, src, src_off, len); ++ } else if (!dst_is_fd) { ++ return fuse_buf_read(dst, dst_off, src, src_off, len); ++ } else if (flags & FUSE_BUF_NO_SPLICE) { ++ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); ++ } else { ++ return fuse_buf_splice(dst, dst_off, src, src_off, len, flags); ++ } ++} ++ ++static const struct fuse_buf *fuse_bufvec_current(struct fuse_bufvec *bufv) ++{ ++ if (bufv->idx < bufv->count) ++ return &bufv->buf[bufv->idx]; ++ else ++ return NULL; ++} ++ ++static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) ++{ ++ const struct fuse_buf *buf = fuse_bufvec_current(bufv); ++ ++ bufv->off += len; ++ assert(bufv->off <= buf->size); ++ if (bufv->off == buf->size) { ++ assert(bufv->idx < bufv->count); ++ bufv->idx++; ++ if (bufv->idx == bufv->count) ++ return 0; ++ bufv->off = 0; ++ } ++ return 1; ++} ++ ++ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, ++ enum fuse_buf_copy_flags flags) ++{ ++ size_t copied = 0; ++ ++ if (dstv == srcv) ++ return fuse_buf_size(dstv); ++ ++ for (;;) { ++ const struct fuse_buf *src = fuse_bufvec_current(srcv); ++ const struct fuse_buf *dst = fuse_bufvec_current(dstv); ++ size_t src_len; ++ size_t dst_len; ++ size_t len; ++ ssize_t res; ++ ++ if (src == NULL || dst == NULL) ++ break; ++ ++ src_len = src->size - srcv->off; ++ dst_len = dst->size - dstv->off; ++ len = min_size(src_len, dst_len); ++ ++ res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); ++ if (res < 0) { ++ if (!copied) ++ return res; ++ break; ++ } ++ copied += res; ++ ++ if (!fuse_bufvec_advance(srcv, res) || ++ !fuse_bufvec_advance(dstv, res)) ++ break; ++ ++ if (res < len) ++ break; ++ } ++ ++ return copied; ++} +diff --git a/tools/virtiofsd/fuse_log.c b/tools/virtiofsd/fuse_log.c +new file mode 100644 +index 0000000..0d268ab +--- /dev/null ++++ b/tools/virtiofsd/fuse_log.c +@@ -0,0 +1,40 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2019 Red Hat, Inc. ++ ++ Logging API. ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB ++*/ ++ ++#include "fuse_log.h" ++ ++#include ++#include ++ ++static void default_log_func( ++ __attribute__(( unused )) enum fuse_log_level level, ++ const char *fmt, va_list ap) ++{ ++ vfprintf(stderr, fmt, ap); ++} ++ ++static fuse_log_func_t log_func = default_log_func; ++ ++void fuse_set_log_func(fuse_log_func_t func) ++{ ++ if (!func) ++ func = default_log_func; ++ ++ log_func = func; ++} ++ ++void fuse_log(enum fuse_log_level level, const char *fmt, ...) ++{ ++ va_list ap; ++ ++ va_start(ap, fmt); ++ log_func(level, fmt, ap); ++ va_end(ap); ++} +diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c +new file mode 100644 +index 0000000..93066b9 +--- /dev/null ++++ b/tools/virtiofsd/fuse_opt.c +@@ -0,0 +1,423 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ Implementation of option parsing routines (dealing with `struct ++ fuse_args`). ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB ++*/ ++ ++#include "config.h" ++#include "fuse_i.h" ++#include "fuse_opt.h" ++#include "fuse_misc.h" ++ ++#include ++#include ++#include ++#include ++ ++struct fuse_opt_context { ++ void *data; ++ const struct fuse_opt *opt; ++ fuse_opt_proc_t proc; ++ int argctr; ++ int argc; ++ char **argv; ++ struct fuse_args outargs; ++ char *opts; ++ int nonopt; ++}; ++ ++void fuse_opt_free_args(struct fuse_args *args) ++{ ++ if (args) { ++ if (args->argv && args->allocated) { ++ int i; ++ for (i = 0; i < args->argc; i++) ++ free(args->argv[i]); ++ free(args->argv); ++ } ++ args->argc = 0; ++ args->argv = NULL; ++ args->allocated = 0; ++ } ++} ++ ++static int alloc_failed(void) ++{ ++ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); ++ return -1; ++} ++ ++int fuse_opt_add_arg(struct fuse_args *args, const char *arg) ++{ ++ char **newargv; ++ char *newarg; ++ ++ assert(!args->argv || args->allocated); ++ ++ newarg = strdup(arg); ++ if (!newarg) ++ return alloc_failed(); ++ ++ newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *)); ++ if (!newargv) { ++ free(newarg); ++ return alloc_failed(); ++ } ++ ++ args->argv = newargv; ++ args->allocated = 1; ++ args->argv[args->argc++] = newarg; ++ args->argv[args->argc] = NULL; ++ return 0; ++} ++ ++static int fuse_opt_insert_arg_common(struct fuse_args *args, int pos, ++ const char *arg) ++{ ++ assert(pos <= args->argc); ++ if (fuse_opt_add_arg(args, arg) == -1) ++ return -1; ++ ++ if (pos != args->argc - 1) { ++ char *newarg = args->argv[args->argc - 1]; ++ memmove(&args->argv[pos + 1], &args->argv[pos], ++ sizeof(char *) * (args->argc - pos - 1)); ++ args->argv[pos] = newarg; ++ } ++ return 0; ++} ++ ++int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg) ++{ ++ return fuse_opt_insert_arg_common(args, pos, arg); ++} ++ ++static int next_arg(struct fuse_opt_context *ctx, const char *opt) ++{ ++ if (ctx->argctr + 1 >= ctx->argc) { ++ fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt); ++ return -1; ++ } ++ ctx->argctr++; ++ return 0; ++} ++ ++static int add_arg(struct fuse_opt_context *ctx, const char *arg) ++{ ++ return fuse_opt_add_arg(&ctx->outargs, arg); ++} ++ ++static int add_opt_common(char **opts, const char *opt, int esc) ++{ ++ unsigned oldlen = *opts ? strlen(*opts) : 0; ++ char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1); ++ ++ if (!d) ++ return alloc_failed(); ++ ++ *opts = d; ++ if (oldlen) { ++ d += oldlen; ++ *d++ = ','; ++ } ++ ++ for (; *opt; opt++) { ++ if (esc && (*opt == ',' || *opt == '\\')) ++ *d++ = '\\'; ++ *d++ = *opt; ++ } ++ *d = '\0'; ++ ++ return 0; ++} ++ ++int fuse_opt_add_opt(char **opts, const char *opt) ++{ ++ return add_opt_common(opts, opt, 0); ++} ++ ++int fuse_opt_add_opt_escaped(char **opts, const char *opt) ++{ ++ return add_opt_common(opts, opt, 1); ++} ++ ++static int add_opt(struct fuse_opt_context *ctx, const char *opt) ++{ ++ return add_opt_common(&ctx->opts, opt, 1); ++} ++ ++static int call_proc(struct fuse_opt_context *ctx, const char *arg, int key, ++ int iso) ++{ ++ if (key == FUSE_OPT_KEY_DISCARD) ++ return 0; ++ ++ if (key != FUSE_OPT_KEY_KEEP && ctx->proc) { ++ int res = ctx->proc(ctx->data, arg, key, &ctx->outargs); ++ if (res == -1 || !res) ++ return res; ++ } ++ if (iso) ++ return add_opt(ctx, arg); ++ else ++ return add_arg(ctx, arg); ++} ++ ++static int match_template(const char *t, const char *arg, unsigned *sepp) ++{ ++ int arglen = strlen(arg); ++ const char *sep = strchr(t, '='); ++ sep = sep ? sep : strchr(t, ' '); ++ if (sep && (!sep[1] || sep[1] == '%')) { ++ int tlen = sep - t; ++ if (sep[0] == '=') ++ tlen ++; ++ if (arglen >= tlen && strncmp(arg, t, tlen) == 0) { ++ *sepp = sep - t; ++ return 1; ++ } ++ } ++ if (strcmp(t, arg) == 0) { ++ *sepp = 0; ++ return 1; ++ } ++ return 0; ++} ++ ++static const struct fuse_opt *find_opt(const struct fuse_opt *opt, ++ const char *arg, unsigned *sepp) ++{ ++ for (; opt && opt->templ; opt++) ++ if (match_template(opt->templ, arg, sepp)) ++ return opt; ++ return NULL; ++} ++ ++int fuse_opt_match(const struct fuse_opt *opts, const char *opt) ++{ ++ unsigned dummy; ++ return find_opt(opts, opt, &dummy) ? 1 : 0; ++} ++ ++static int process_opt_param(void *var, const char *format, const char *param, ++ const char *arg) ++{ ++ assert(format[0] == '%'); ++ if (format[1] == 's') { ++ char **s = var; ++ char *copy = strdup(param); ++ if (!copy) ++ return alloc_failed(); ++ ++ free(*s); ++ *s = copy; ++ } else { ++ if (sscanf(param, format, var) != 1) { ++ fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n", arg); ++ return -1; ++ } ++ } ++ return 0; ++} ++ ++static int process_opt(struct fuse_opt_context *ctx, ++ const struct fuse_opt *opt, unsigned sep, ++ const char *arg, int iso) ++{ ++ if (opt->offset == -1U) { ++ if (call_proc(ctx, arg, opt->value, iso) == -1) ++ return -1; ++ } else { ++ void *var = (char *)ctx->data + opt->offset; ++ if (sep && opt->templ[sep + 1]) { ++ const char *param = arg + sep; ++ if (opt->templ[sep] == '=') ++ param ++; ++ if (process_opt_param(var, opt->templ + sep + 1, ++ param, arg) == -1) ++ return -1; ++ } else ++ *(int *)var = opt->value; ++ } ++ return 0; ++} ++ ++static int process_opt_sep_arg(struct fuse_opt_context *ctx, ++ const struct fuse_opt *opt, unsigned sep, ++ const char *arg, int iso) ++{ ++ int res; ++ char *newarg; ++ char *param; ++ ++ if (next_arg(ctx, arg) == -1) ++ return -1; ++ ++ param = ctx->argv[ctx->argctr]; ++ newarg = malloc(sep + strlen(param) + 1); ++ if (!newarg) ++ return alloc_failed(); ++ ++ memcpy(newarg, arg, sep); ++ strcpy(newarg + sep, param); ++ res = process_opt(ctx, opt, sep, newarg, iso); ++ free(newarg); ++ ++ return res; ++} ++ ++static int process_gopt(struct fuse_opt_context *ctx, const char *arg, int iso) ++{ ++ unsigned sep; ++ const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep); ++ if (opt) { ++ for (; opt; opt = find_opt(opt + 1, arg, &sep)) { ++ int res; ++ if (sep && opt->templ[sep] == ' ' && !arg[sep]) ++ res = process_opt_sep_arg(ctx, opt, sep, arg, ++ iso); ++ else ++ res = process_opt(ctx, opt, sep, arg, iso); ++ if (res == -1) ++ return -1; ++ } ++ return 0; ++ } else ++ return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso); ++} ++ ++static int process_real_option_group(struct fuse_opt_context *ctx, char *opts) ++{ ++ char *s = opts; ++ char *d = s; ++ int end = 0; ++ ++ while (!end) { ++ if (*s == '\0') ++ end = 1; ++ if (*s == ',' || end) { ++ int res; ++ ++ *d = '\0'; ++ res = process_gopt(ctx, opts, 1); ++ if (res == -1) ++ return -1; ++ d = opts; ++ } else { ++ if (s[0] == '\\' && s[1] != '\0') { ++ s++; ++ if (s[0] >= '0' && s[0] <= '3' && ++ s[1] >= '0' && s[1] <= '7' && ++ s[2] >= '0' && s[2] <= '7') { ++ *d++ = (s[0] - '0') * 0100 + ++ (s[1] - '0') * 0010 + ++ (s[2] - '0'); ++ s += 2; ++ } else { ++ *d++ = *s; ++ } ++ } else { ++ *d++ = *s; ++ } ++ } ++ s++; ++ } ++ ++ return 0; ++} ++ ++static int process_option_group(struct fuse_opt_context *ctx, const char *opts) ++{ ++ int res; ++ char *copy = strdup(opts); ++ ++ if (!copy) { ++ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); ++ return -1; ++ } ++ res = process_real_option_group(ctx, copy); ++ free(copy); ++ return res; ++} ++ ++static int process_one(struct fuse_opt_context *ctx, const char *arg) ++{ ++ if (ctx->nonopt || arg[0] != '-') ++ return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0); ++ else if (arg[1] == 'o') { ++ if (arg[2]) ++ return process_option_group(ctx, arg + 2); ++ else { ++ if (next_arg(ctx, arg) == -1) ++ return -1; ++ ++ return process_option_group(ctx, ++ ctx->argv[ctx->argctr]); ++ } ++ } else if (arg[1] == '-' && !arg[2]) { ++ if (add_arg(ctx, arg) == -1) ++ return -1; ++ ctx->nonopt = ctx->outargs.argc; ++ return 0; ++ } else ++ return process_gopt(ctx, arg, 0); ++} ++ ++static int opt_parse(struct fuse_opt_context *ctx) ++{ ++ if (ctx->argc) { ++ if (add_arg(ctx, ctx->argv[0]) == -1) ++ return -1; ++ } ++ ++ for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) ++ if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) ++ return -1; ++ ++ if (ctx->opts) { ++ if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 || ++ fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) ++ return -1; ++ } ++ ++ /* If option separator ("--") is the last argument, remove it */ ++ if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc && ++ strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) { ++ free(ctx->outargs.argv[ctx->outargs.argc - 1]); ++ ctx->outargs.argv[--ctx->outargs.argc] = NULL; ++ } ++ ++ return 0; ++} ++ ++int fuse_opt_parse(struct fuse_args *args, void *data, ++ const struct fuse_opt opts[], fuse_opt_proc_t proc) ++{ ++ int res; ++ struct fuse_opt_context ctx = { ++ .data = data, ++ .opt = opts, ++ .proc = proc, ++ }; ++ ++ if (!args || !args->argv || !args->argc) ++ return 0; ++ ++ ctx.argc = args->argc; ++ ctx.argv = args->argv; ++ ++ res = opt_parse(&ctx); ++ if (res != -1) { ++ struct fuse_args tmp = *args; ++ *args = ctx.outargs; ++ ctx.outargs = tmp; ++ } ++ free(ctx.opts); ++ fuse_opt_free_args(&ctx.outargs); ++ return res; ++} +diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c +new file mode 100644 +index 0000000..4271947 +--- /dev/null ++++ b/tools/virtiofsd/fuse_signals.c +@@ -0,0 +1,91 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ Utility functions for setting signal handlers. ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB ++*/ ++ ++#include "config.h" ++#include "fuse_lowlevel.h" ++#include "fuse_i.h" ++ ++#include ++#include ++#include ++#include ++ ++static struct fuse_session *fuse_instance; ++ ++static void exit_handler(int sig) ++{ ++ if (fuse_instance) { ++ fuse_session_exit(fuse_instance); ++ if(sig <= 0) { ++ fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n"); ++ abort(); ++ } ++ fuse_instance->error = sig; ++ } ++} ++ ++static void do_nothing(int sig) ++{ ++ (void) sig; ++} ++ ++static int set_one_signal_handler(int sig, void (*handler)(int), int remove) ++{ ++ struct sigaction sa; ++ struct sigaction old_sa; ++ ++ memset(&sa, 0, sizeof(struct sigaction)); ++ sa.sa_handler = remove ? SIG_DFL : handler; ++ sigemptyset(&(sa.sa_mask)); ++ sa.sa_flags = 0; ++ ++ if (sigaction(sig, NULL, &old_sa) == -1) { ++ perror("fuse: cannot get old signal handler"); ++ return -1; ++ } ++ ++ if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && ++ sigaction(sig, &sa, NULL) == -1) { ++ perror("fuse: cannot set signal handler"); ++ return -1; ++ } ++ return 0; ++} ++ ++int fuse_set_signal_handlers(struct fuse_session *se) ++{ ++ /* If we used SIG_IGN instead of the do_nothing function, ++ then we would be unable to tell if we set SIG_IGN (and ++ thus should reset to SIG_DFL in fuse_remove_signal_handlers) ++ or if it was already set to SIG_IGN (and should be left ++ untouched. */ ++ if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 || ++ set_one_signal_handler(SIGINT, exit_handler, 0) == -1 || ++ set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 || ++ set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) ++ return -1; ++ ++ fuse_instance = se; ++ return 0; ++} ++ ++void fuse_remove_signal_handlers(struct fuse_session *se) ++{ ++ if (fuse_instance != se) ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: fuse_remove_signal_handlers: unknown session\n"); ++ else ++ fuse_instance = NULL; ++ ++ set_one_signal_handler(SIGHUP, exit_handler, 1); ++ set_one_signal_handler(SIGINT, exit_handler, 1); ++ set_one_signal_handler(SIGTERM, exit_handler, 1); ++ set_one_signal_handler(SIGPIPE, do_nothing, 1); ++} +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +new file mode 100644 +index 0000000..64ff7ad +--- /dev/null ++++ b/tools/virtiofsd/helper.c +@@ -0,0 +1,440 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ Helper functions to create (simple) standalone programs. With the ++ aid of these functions it should be possible to create full FUSE ++ file system by implementing nothing but the request handlers. ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB. ++*/ ++ ++#include "config.h" ++#include "fuse_i.h" ++#include "fuse_misc.h" ++#include "fuse_opt.h" ++#include "fuse_lowlevel.h" ++#include "mount_util.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define FUSE_HELPER_OPT(t, p) \ ++ { t, offsetof(struct fuse_cmdline_opts, p), 1 } ++ ++static const struct fuse_opt fuse_helper_opts[] = { ++ FUSE_HELPER_OPT("-h", show_help), ++ FUSE_HELPER_OPT("--help", show_help), ++ FUSE_HELPER_OPT("-V", show_version), ++ FUSE_HELPER_OPT("--version", show_version), ++ FUSE_HELPER_OPT("-d", debug), ++ FUSE_HELPER_OPT("debug", debug), ++ FUSE_HELPER_OPT("-d", foreground), ++ FUSE_HELPER_OPT("debug", foreground), ++ FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), ++ FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), ++ FUSE_HELPER_OPT("-f", foreground), ++ FUSE_HELPER_OPT("-s", singlethread), ++ FUSE_HELPER_OPT("fsname=", nodefault_subtype), ++ FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), ++#ifndef __FreeBSD__ ++ FUSE_HELPER_OPT("subtype=", nodefault_subtype), ++ FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), ++#endif ++ FUSE_HELPER_OPT("clone_fd", clone_fd), ++ FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), ++ FUSE_OPT_END ++}; ++ ++struct fuse_conn_info_opts { ++ int atomic_o_trunc; ++ int no_remote_posix_lock; ++ int no_remote_flock; ++ int splice_write; ++ int splice_move; ++ int splice_read; ++ int no_splice_write; ++ int no_splice_move; ++ int no_splice_read; ++ int auto_inval_data; ++ int no_auto_inval_data; ++ int no_readdirplus; ++ int no_readdirplus_auto; ++ int async_dio; ++ int no_async_dio; ++ int writeback_cache; ++ int no_writeback_cache; ++ int async_read; ++ int sync_read; ++ unsigned max_write; ++ unsigned max_readahead; ++ unsigned max_background; ++ unsigned congestion_threshold; ++ unsigned time_gran; ++ int set_max_write; ++ int set_max_readahead; ++ int set_max_background; ++ int set_congestion_threshold; ++ int set_time_gran; ++}; ++ ++#define CONN_OPTION(t, p, v) \ ++ { t, offsetof(struct fuse_conn_info_opts, p), v } ++static const struct fuse_opt conn_info_opt_spec[] = { ++ CONN_OPTION("max_write=%u", max_write, 0), ++ CONN_OPTION("max_write=", set_max_write, 1), ++ CONN_OPTION("max_readahead=%u", max_readahead, 0), ++ CONN_OPTION("max_readahead=", set_max_readahead, 1), ++ CONN_OPTION("max_background=%u", max_background, 0), ++ CONN_OPTION("max_background=", set_max_background, 1), ++ CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0), ++ CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1), ++ CONN_OPTION("sync_read", sync_read, 1), ++ CONN_OPTION("async_read", async_read, 1), ++ CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1), ++ CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1), ++ CONN_OPTION("no_remote_lock", no_remote_flock, 1), ++ CONN_OPTION("no_remote_flock", no_remote_flock, 1), ++ CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1), ++ CONN_OPTION("splice_write", splice_write, 1), ++ CONN_OPTION("no_splice_write", no_splice_write, 1), ++ CONN_OPTION("splice_move", splice_move, 1), ++ CONN_OPTION("no_splice_move", no_splice_move, 1), ++ CONN_OPTION("splice_read", splice_read, 1), ++ CONN_OPTION("no_splice_read", no_splice_read, 1), ++ CONN_OPTION("auto_inval_data", auto_inval_data, 1), ++ CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1), ++ CONN_OPTION("readdirplus=no", no_readdirplus, 1), ++ CONN_OPTION("readdirplus=yes", no_readdirplus, 0), ++ CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1), ++ CONN_OPTION("readdirplus=auto", no_readdirplus, 0), ++ CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0), ++ CONN_OPTION("async_dio", async_dio, 1), ++ CONN_OPTION("no_async_dio", no_async_dio, 1), ++ CONN_OPTION("writeback_cache", writeback_cache, 1), ++ CONN_OPTION("no_writeback_cache", no_writeback_cache, 1), ++ CONN_OPTION("time_gran=%u", time_gran, 0), ++ CONN_OPTION("time_gran=", set_time_gran, 1), ++ FUSE_OPT_END ++}; ++ ++ ++void fuse_cmdline_help(void) ++{ ++ printf(" -h --help print help\n" ++ " -V --version print version\n" ++ " -d -o debug enable debug output (implies -f)\n" ++ " -f foreground operation\n" ++ " -s disable multi-threaded operation\n" ++ " -o clone_fd use separate fuse device fd for each thread\n" ++ " (may improve performance)\n" ++ " -o max_idle_threads the maximum number of idle worker threads\n" ++ " allowed (default: 10)\n"); ++} ++ ++static int fuse_helper_opt_proc(void *data, const char *arg, int key, ++ struct fuse_args *outargs) ++{ ++ (void) outargs; ++ struct fuse_cmdline_opts *opts = data; ++ ++ switch (key) { ++ case FUSE_OPT_KEY_NONOPT: ++ if (!opts->mountpoint) { ++ if (fuse_mnt_parse_fuse_fd(arg) != -1) { ++ return fuse_opt_add_opt(&opts->mountpoint, arg); ++ } ++ ++ char mountpoint[PATH_MAX] = ""; ++ if (realpath(arg, mountpoint) == NULL) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: bad mount point `%s': %s\n", ++ arg, strerror(errno)); ++ return -1; ++ } ++ return fuse_opt_add_opt(&opts->mountpoint, mountpoint); ++ } else { ++ fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); ++ return -1; ++ } ++ ++ default: ++ /* Pass through unknown options */ ++ return 1; ++ } ++} ++ ++/* Under FreeBSD, there is no subtype option so this ++ function actually sets the fsname */ ++static int add_default_subtype(const char *progname, struct fuse_args *args) ++{ ++ int res; ++ char *subtype_opt; ++ ++ const char *basename = strrchr(progname, '/'); ++ if (basename == NULL) ++ basename = progname; ++ else if (basename[1] != '\0') ++ basename++; ++ ++ subtype_opt = (char *) malloc(strlen(basename) + 64); ++ if (subtype_opt == NULL) { ++ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); ++ return -1; ++ } ++#ifdef __FreeBSD__ ++ sprintf(subtype_opt, "-ofsname=%s", basename); ++#else ++ sprintf(subtype_opt, "-osubtype=%s", basename); ++#endif ++ res = fuse_opt_add_arg(args, subtype_opt); ++ free(subtype_opt); ++ return res; ++} ++ ++int fuse_parse_cmdline(struct fuse_args *args, ++ struct fuse_cmdline_opts *opts) ++{ ++ memset(opts, 0, sizeof(struct fuse_cmdline_opts)); ++ ++ opts->max_idle_threads = 10; ++ ++ if (fuse_opt_parse(args, opts, fuse_helper_opts, ++ fuse_helper_opt_proc) == -1) ++ return -1; ++ ++ /* *Linux*: if neither -o subtype nor -o fsname are specified, ++ set subtype to program's basename. ++ *FreeBSD*: if fsname is not specified, set to program's ++ basename. */ ++ if (!opts->nodefault_subtype) ++ if (add_default_subtype(args->argv[0], args) == -1) ++ return -1; ++ ++ return 0; ++} ++ ++ ++int fuse_daemonize(int foreground) ++{ ++ if (!foreground) { ++ int nullfd; ++ int waiter[2]; ++ char completed; ++ ++ if (pipe(waiter)) { ++ perror("fuse_daemonize: pipe"); ++ return -1; ++ } ++ ++ /* ++ * demonize current process by forking it and killing the ++ * parent. This makes current process as a child of 'init'. ++ */ ++ switch(fork()) { ++ case -1: ++ perror("fuse_daemonize: fork"); ++ return -1; ++ case 0: ++ break; ++ default: ++ (void) read(waiter[0], &completed, sizeof(completed)); ++ _exit(0); ++ } ++ ++ if (setsid() == -1) { ++ perror("fuse_daemonize: setsid"); ++ return -1; ++ } ++ ++ (void) chdir("/"); ++ ++ nullfd = open("/dev/null", O_RDWR, 0); ++ if (nullfd != -1) { ++ (void) dup2(nullfd, 0); ++ (void) dup2(nullfd, 1); ++ (void) dup2(nullfd, 2); ++ if (nullfd > 2) ++ close(nullfd); ++ } ++ ++ /* Propagate completion of daemon initialization */ ++ completed = 1; ++ (void) write(waiter[1], &completed, sizeof(completed)); ++ close(waiter[0]); ++ close(waiter[1]); ++ } else { ++ (void) chdir("/"); ++ } ++ return 0; ++} ++ ++int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, ++ size_t op_size, void *user_data) ++{ ++ struct fuse_args args = FUSE_ARGS_INIT(argc, argv); ++ struct fuse *fuse; ++ struct fuse_cmdline_opts opts; ++ int res; ++ ++ if (fuse_parse_cmdline(&args, &opts) != 0) ++ return 1; ++ ++ if (opts.show_version) { ++ printf("FUSE library version %s\n", PACKAGE_VERSION); ++ fuse_lowlevel_version(); ++ res = 0; ++ goto out1; ++ } ++ ++ if (opts.show_help) { ++ if(args.argv[0][0] != '\0') ++ printf("usage: %s [options] \n\n", ++ args.argv[0]); ++ printf("FUSE options:\n"); ++ fuse_cmdline_help(); ++ fuse_lib_help(&args); ++ res = 0; ++ goto out1; ++ } ++ ++ if (!opts.show_help && ++ !opts.mountpoint) { ++ fuse_log(FUSE_LOG_ERR, "error: no mountpoint specified\n"); ++ res = 2; ++ goto out1; ++ } ++ ++ ++ fuse = fuse_new_31(&args, op, op_size, user_data); ++ if (fuse == NULL) { ++ res = 3; ++ goto out1; ++ } ++ ++ if (fuse_mount(fuse,opts.mountpoint) != 0) { ++ res = 4; ++ goto out2; ++ } ++ ++ if (fuse_daemonize(opts.foreground) != 0) { ++ res = 5; ++ goto out3; ++ } ++ ++ struct fuse_session *se = fuse_get_session(fuse); ++ if (fuse_set_signal_handlers(se) != 0) { ++ res = 6; ++ goto out3; ++ } ++ ++ if (opts.singlethread) ++ res = fuse_loop(fuse); ++ else { ++ struct fuse_loop_config loop_config; ++ loop_config.clone_fd = opts.clone_fd; ++ loop_config.max_idle_threads = opts.max_idle_threads; ++ res = fuse_loop_mt_32(fuse, &loop_config); ++ } ++ if (res) ++ res = 7; ++ ++ fuse_remove_signal_handlers(se); ++out3: ++ fuse_unmount(fuse); ++out2: ++ fuse_destroy(fuse); ++out1: ++ free(opts.mountpoint); ++ fuse_opt_free_args(&args); ++ return res; ++} ++ ++ ++void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, ++ struct fuse_conn_info *conn) ++{ ++ if(opts->set_max_write) ++ conn->max_write = opts->max_write; ++ if(opts->set_max_background) ++ conn->max_background = opts->max_background; ++ if(opts->set_congestion_threshold) ++ conn->congestion_threshold = opts->congestion_threshold; ++ if(opts->set_time_gran) ++ conn->time_gran = opts->time_gran; ++ if(opts->set_max_readahead) ++ conn->max_readahead = opts->max_readahead; ++ ++#define LL_ENABLE(cond,cap) \ ++ if (cond) conn->want |= (cap) ++#define LL_DISABLE(cond,cap) \ ++ if (cond) conn->want &= ~(cap) ++ ++ LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ); ++ LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ); ++ ++ LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE); ++ LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE); ++ ++ LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE); ++ LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE); ++ ++ LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); ++ LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); ++ ++ LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS); ++ LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO); ++ ++ LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO); ++ LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO); ++ ++ LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE); ++ LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE); ++ ++ LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ); ++ LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ); ++ ++ LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS); ++ LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS); ++} ++ ++struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args) ++{ ++ struct fuse_conn_info_opts *opts; ++ ++ opts = calloc(1, sizeof(struct fuse_conn_info_opts)); ++ if(opts == NULL) { ++ fuse_log(FUSE_LOG_ERR, "calloc failed\n"); ++ return NULL; ++ } ++ if(fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) { ++ free(opts); ++ return NULL; ++ } ++ return opts; ++} ++ ++int fuse_open_channel(const char *mountpoint, const char* options) ++{ ++ struct mount_opts *opts = NULL; ++ int fd = -1; ++ const char *argv[] = { "", "-o", options }; ++ int argc = sizeof(argv) / sizeof(argv[0]); ++ struct fuse_args args = FUSE_ARGS_INIT(argc, (char**) argv); ++ ++ opts = parse_mount_opts(&args); ++ if (opts == NULL) ++ return -1; ++ ++ fd = fuse_kern_mount(mountpoint, opts); ++ destroy_mount_opts(opts); ++ ++ return fd; ++} +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Add-fuse_lowlevel.c.patch b/SOURCES/kvm-virtiofsd-Add-fuse_lowlevel.c.patch new file mode 100644 index 0000000..1318fef --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Add-fuse_lowlevel.c.patch @@ -0,0 +1,3172 @@ +From f6c6830f772e8060255323d2a458cd0e774d9654 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:42 +0100 +Subject: [PATCH 011/116] virtiofsd: Add fuse_lowlevel.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-8-dgilbert@redhat.com> +Patchwork-id: 93456 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 007/112] virtiofsd: Add fuse_lowlevel.c +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +fuse_lowlevel is one of the largest files from the library +and does most of the work. Add it separately to keep the diff +sizes small. +Again this is from upstream fuse-3.8.0 + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 2de121f01e37e2fe98a4362f4abf7c0848697f76) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 3129 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 3129 insertions(+) + create mode 100644 tools/virtiofsd/fuse_lowlevel.c + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +new file mode 100644 +index 0000000..f2d7038 +--- /dev/null ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -0,0 +1,3129 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ Implementation of (most of) the low-level FUSE API. The session loop ++ functions are implemented in separate files. ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB ++*/ ++ ++#define _GNU_SOURCE ++ ++#include "config.h" ++#include "fuse_i.h" ++#include "fuse_kernel.h" ++#include "fuse_opt.h" ++#include "fuse_misc.h" ++#include "mount_util.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifndef F_LINUX_SPECIFIC_BASE ++#define F_LINUX_SPECIFIC_BASE 1024 ++#endif ++#ifndef F_SETPIPE_SZ ++#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) ++#endif ++ ++ ++#define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) ++#define OFFSET_MAX 0x7fffffffffffffffLL ++ ++#define container_of(ptr, type, member) ({ \ ++ const typeof( ((type *)0)->member ) *__mptr = (ptr); \ ++ (type *)( (char *)__mptr - offsetof(type,member) );}) ++ ++struct fuse_pollhandle { ++ uint64_t kh; ++ struct fuse_session *se; ++}; ++ ++static size_t pagesize; ++ ++static __attribute__((constructor)) void fuse_ll_init_pagesize(void) ++{ ++ pagesize = getpagesize(); ++} ++ ++static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr) ++{ ++ attr->ino = stbuf->st_ino; ++ attr->mode = stbuf->st_mode; ++ attr->nlink = stbuf->st_nlink; ++ attr->uid = stbuf->st_uid; ++ attr->gid = stbuf->st_gid; ++ attr->rdev = stbuf->st_rdev; ++ attr->size = stbuf->st_size; ++ attr->blksize = stbuf->st_blksize; ++ attr->blocks = stbuf->st_blocks; ++ attr->atime = stbuf->st_atime; ++ attr->mtime = stbuf->st_mtime; ++ attr->ctime = stbuf->st_ctime; ++ attr->atimensec = ST_ATIM_NSEC(stbuf); ++ attr->mtimensec = ST_MTIM_NSEC(stbuf); ++ attr->ctimensec = ST_CTIM_NSEC(stbuf); ++} ++ ++static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf) ++{ ++ stbuf->st_mode = attr->mode; ++ stbuf->st_uid = attr->uid; ++ stbuf->st_gid = attr->gid; ++ stbuf->st_size = attr->size; ++ stbuf->st_atime = attr->atime; ++ stbuf->st_mtime = attr->mtime; ++ stbuf->st_ctime = attr->ctime; ++ ST_ATIM_NSEC_SET(stbuf, attr->atimensec); ++ ST_MTIM_NSEC_SET(stbuf, attr->mtimensec); ++ ST_CTIM_NSEC_SET(stbuf, attr->ctimensec); ++} ++ ++static size_t iov_length(const struct iovec *iov, size_t count) ++{ ++ size_t seg; ++ size_t ret = 0; ++ ++ for (seg = 0; seg < count; seg++) ++ ret += iov[seg].iov_len; ++ return ret; ++} ++ ++static void list_init_req(struct fuse_req *req) ++{ ++ req->next = req; ++ req->prev = req; ++} ++ ++static void list_del_req(struct fuse_req *req) ++{ ++ struct fuse_req *prev = req->prev; ++ struct fuse_req *next = req->next; ++ prev->next = next; ++ next->prev = prev; ++} ++ ++static void list_add_req(struct fuse_req *req, struct fuse_req *next) ++{ ++ struct fuse_req *prev = next->prev; ++ req->next = next; ++ req->prev = prev; ++ prev->next = req; ++ next->prev = req; ++} ++ ++static void destroy_req(fuse_req_t req) ++{ ++ pthread_mutex_destroy(&req->lock); ++ free(req); ++} ++ ++void fuse_free_req(fuse_req_t req) ++{ ++ int ctr; ++ struct fuse_session *se = req->se; ++ ++ pthread_mutex_lock(&se->lock); ++ req->u.ni.func = NULL; ++ req->u.ni.data = NULL; ++ list_del_req(req); ++ ctr = --req->ctr; ++ fuse_chan_put(req->ch); ++ req->ch = NULL; ++ pthread_mutex_unlock(&se->lock); ++ if (!ctr) ++ destroy_req(req); ++} ++ ++static struct fuse_req *fuse_ll_alloc_req(struct fuse_session *se) ++{ ++ struct fuse_req *req; ++ ++ req = (struct fuse_req *) calloc(1, sizeof(struct fuse_req)); ++ if (req == NULL) { ++ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n"); ++ } else { ++ req->se = se; ++ req->ctr = 1; ++ list_init_req(req); ++ fuse_mutex_init(&req->lock); ++ } ++ ++ return req; ++} ++ ++/* Send data. If *ch* is NULL, send via session master fd */ ++static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, ++ struct iovec *iov, int count) ++{ ++ struct fuse_out_header *out = iov[0].iov_base; ++ ++ out->len = iov_length(iov, count); ++ if (se->debug) { ++ if (out->unique == 0) { ++ fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", ++ out->error, out->len); ++ } else if (out->error) { ++ fuse_log(FUSE_LOG_DEBUG, ++ " unique: %llu, error: %i (%s), outsize: %i\n", ++ (unsigned long long) out->unique, out->error, ++ strerror(-out->error), out->len); ++ } else { ++ fuse_log(FUSE_LOG_DEBUG, ++ " unique: %llu, success, outsize: %i\n", ++ (unsigned long long) out->unique, out->len); ++ } ++ } ++ ++ ssize_t res = writev(ch ? ch->fd : se->fd, ++ iov, count); ++ int err = errno; ++ ++ if (res == -1) { ++ assert(se != NULL); ++ ++ /* ENOENT means the operation was interrupted */ ++ if (!fuse_session_exited(se) && err != ENOENT) ++ perror("fuse: writing device"); ++ return -err; ++ } ++ ++ return 0; ++} ++ ++ ++int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, ++ int count) ++{ ++ struct fuse_out_header out; ++ ++ if (error <= -1000 || error > 0) { ++ fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); ++ error = -ERANGE; ++ } ++ ++ out.unique = req->unique; ++ out.error = error; ++ ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(struct fuse_out_header); ++ ++ return fuse_send_msg(req->se, req->ch, iov, count); ++} ++ ++static int send_reply_iov(fuse_req_t req, int error, struct iovec *iov, ++ int count) ++{ ++ int res; ++ ++ res = fuse_send_reply_iov_nofree(req, error, iov, count); ++ fuse_free_req(req); ++ return res; ++} ++ ++static int send_reply(fuse_req_t req, int error, const void *arg, ++ size_t argsize) ++{ ++ struct iovec iov[2]; ++ int count = 1; ++ if (argsize) { ++ iov[1].iov_base = (void *) arg; ++ iov[1].iov_len = argsize; ++ count++; ++ } ++ return send_reply_iov(req, error, iov, count); ++} ++ ++int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count) ++{ ++ int res; ++ struct iovec *padded_iov; ++ ++ padded_iov = malloc((count + 1) * sizeof(struct iovec)); ++ if (padded_iov == NULL) ++ return fuse_reply_err(req, ENOMEM); ++ ++ memcpy(padded_iov + 1, iov, count * sizeof(struct iovec)); ++ count++; ++ ++ res = send_reply_iov(req, 0, padded_iov, count); ++ free(padded_iov); ++ ++ return res; ++} ++ ++ ++/* `buf` is allowed to be empty so that the proper size may be ++ allocated by the caller */ ++size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, ++ const char *name, const struct stat *stbuf, off_t off) ++{ ++ (void)req; ++ size_t namelen; ++ size_t entlen; ++ size_t entlen_padded; ++ struct fuse_dirent *dirent; ++ ++ namelen = strlen(name); ++ entlen = FUSE_NAME_OFFSET + namelen; ++ entlen_padded = FUSE_DIRENT_ALIGN(entlen); ++ ++ if ((buf == NULL) || (entlen_padded > bufsize)) ++ return entlen_padded; ++ ++ dirent = (struct fuse_dirent*) buf; ++ dirent->ino = stbuf->st_ino; ++ dirent->off = off; ++ dirent->namelen = namelen; ++ dirent->type = (stbuf->st_mode & S_IFMT) >> 12; ++ memcpy(dirent->name, name, namelen); ++ memset(dirent->name + namelen, 0, entlen_padded - entlen); ++ ++ return entlen_padded; ++} ++ ++static void convert_statfs(const struct statvfs *stbuf, ++ struct fuse_kstatfs *kstatfs) ++{ ++ kstatfs->bsize = stbuf->f_bsize; ++ kstatfs->frsize = stbuf->f_frsize; ++ kstatfs->blocks = stbuf->f_blocks; ++ kstatfs->bfree = stbuf->f_bfree; ++ kstatfs->bavail = stbuf->f_bavail; ++ kstatfs->files = stbuf->f_files; ++ kstatfs->ffree = stbuf->f_ffree; ++ kstatfs->namelen = stbuf->f_namemax; ++} ++ ++static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize) ++{ ++ return send_reply(req, 0, arg, argsize); ++} ++ ++int fuse_reply_err(fuse_req_t req, int err) ++{ ++ return send_reply(req, -err, NULL, 0); ++} ++ ++void fuse_reply_none(fuse_req_t req) ++{ ++ fuse_free_req(req); ++} ++ ++static unsigned long calc_timeout_sec(double t) ++{ ++ if (t > (double) ULONG_MAX) ++ return ULONG_MAX; ++ else if (t < 0.0) ++ return 0; ++ else ++ return (unsigned long) t; ++} ++ ++static unsigned int calc_timeout_nsec(double t) ++{ ++ double f = t - (double) calc_timeout_sec(t); ++ if (f < 0.0) ++ return 0; ++ else if (f >= 0.999999999) ++ return 999999999; ++ else ++ return (unsigned int) (f * 1.0e9); ++} ++ ++static void fill_entry(struct fuse_entry_out *arg, ++ const struct fuse_entry_param *e) ++{ ++ arg->nodeid = e->ino; ++ arg->generation = e->generation; ++ arg->entry_valid = calc_timeout_sec(e->entry_timeout); ++ arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); ++ arg->attr_valid = calc_timeout_sec(e->attr_timeout); ++ arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); ++ convert_stat(&e->attr, &arg->attr); ++} ++ ++/* `buf` is allowed to be empty so that the proper size may be ++ allocated by the caller */ ++size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, ++ const char *name, ++ const struct fuse_entry_param *e, off_t off) ++{ ++ (void)req; ++ size_t namelen; ++ size_t entlen; ++ size_t entlen_padded; ++ ++ namelen = strlen(name); ++ entlen = FUSE_NAME_OFFSET_DIRENTPLUS + namelen; ++ entlen_padded = FUSE_DIRENT_ALIGN(entlen); ++ if ((buf == NULL) || (entlen_padded > bufsize)) ++ return entlen_padded; ++ ++ struct fuse_direntplus *dp = (struct fuse_direntplus *) buf; ++ memset(&dp->entry_out, 0, sizeof(dp->entry_out)); ++ fill_entry(&dp->entry_out, e); ++ ++ struct fuse_dirent *dirent = &dp->dirent; ++ dirent->ino = e->attr.st_ino; ++ dirent->off = off; ++ dirent->namelen = namelen; ++ dirent->type = (e->attr.st_mode & S_IFMT) >> 12; ++ memcpy(dirent->name, name, namelen); ++ memset(dirent->name + namelen, 0, entlen_padded - entlen); ++ ++ return entlen_padded; ++} ++ ++static void fill_open(struct fuse_open_out *arg, ++ const struct fuse_file_info *f) ++{ ++ arg->fh = f->fh; ++ if (f->direct_io) ++ arg->open_flags |= FOPEN_DIRECT_IO; ++ if (f->keep_cache) ++ arg->open_flags |= FOPEN_KEEP_CACHE; ++ if (f->cache_readdir) ++ arg->open_flags |= FOPEN_CACHE_DIR; ++ if (f->nonseekable) ++ arg->open_flags |= FOPEN_NONSEEKABLE; ++} ++ ++int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) ++{ ++ struct fuse_entry_out arg; ++ size_t size = req->se->conn.proto_minor < 9 ? ++ FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(arg); ++ ++ /* before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant ++ negative entry */ ++ if (!e->ino && req->se->conn.proto_minor < 4) ++ return fuse_reply_err(req, ENOENT); ++ ++ memset(&arg, 0, sizeof(arg)); ++ fill_entry(&arg, e); ++ return send_reply_ok(req, &arg, size); ++} ++ ++int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, ++ const struct fuse_file_info *f) ++{ ++ char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; ++ size_t entrysize = req->se->conn.proto_minor < 9 ? ++ FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(struct fuse_entry_out); ++ struct fuse_entry_out *earg = (struct fuse_entry_out *) buf; ++ struct fuse_open_out *oarg = (struct fuse_open_out *) (buf + entrysize); ++ ++ memset(buf, 0, sizeof(buf)); ++ fill_entry(earg, e); ++ fill_open(oarg, f); ++ return send_reply_ok(req, buf, ++ entrysize + sizeof(struct fuse_open_out)); ++} ++ ++int fuse_reply_attr(fuse_req_t req, const struct stat *attr, ++ double attr_timeout) ++{ ++ struct fuse_attr_out arg; ++ size_t size = req->se->conn.proto_minor < 9 ? ++ FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.attr_valid = calc_timeout_sec(attr_timeout); ++ arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout); ++ convert_stat(attr, &arg.attr); ++ ++ return send_reply_ok(req, &arg, size); ++} ++ ++int fuse_reply_readlink(fuse_req_t req, const char *linkname) ++{ ++ return send_reply_ok(req, linkname, strlen(linkname)); ++} ++ ++int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *f) ++{ ++ struct fuse_open_out arg; ++ ++ memset(&arg, 0, sizeof(arg)); ++ fill_open(&arg, f); ++ return send_reply_ok(req, &arg, sizeof(arg)); ++} ++ ++int fuse_reply_write(fuse_req_t req, size_t count) ++{ ++ struct fuse_write_out arg; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.size = count; ++ ++ return send_reply_ok(req, &arg, sizeof(arg)); ++} ++ ++int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size) ++{ ++ return send_reply_ok(req, buf, size); ++} ++ ++static int fuse_send_data_iov_fallback(struct fuse_session *se, ++ struct fuse_chan *ch, ++ struct iovec *iov, int iov_count, ++ struct fuse_bufvec *buf, ++ size_t len) ++{ ++ struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); ++ void *mbuf; ++ int res; ++ ++ /* Optimize common case */ ++ if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && ++ !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { ++ /* FIXME: also avoid memory copy if there are multiple buffers ++ but none of them contain an fd */ ++ ++ iov[iov_count].iov_base = buf->buf[0].mem; ++ iov[iov_count].iov_len = len; ++ iov_count++; ++ return fuse_send_msg(se, ch, iov, iov_count); ++ } ++ ++ res = posix_memalign(&mbuf, pagesize, len); ++ if (res != 0) ++ return res; ++ ++ mem_buf.buf[0].mem = mbuf; ++ res = fuse_buf_copy(&mem_buf, buf, 0); ++ if (res < 0) { ++ free(mbuf); ++ return -res; ++ } ++ len = res; ++ ++ iov[iov_count].iov_base = mbuf; ++ iov[iov_count].iov_len = len; ++ iov_count++; ++ res = fuse_send_msg(se, ch, iov, iov_count); ++ free(mbuf); ++ ++ return res; ++} ++ ++struct fuse_ll_pipe { ++ size_t size; ++ int can_grow; ++ int pipe[2]; ++}; ++ ++static void fuse_ll_pipe_free(struct fuse_ll_pipe *llp) ++{ ++ close(llp->pipe[0]); ++ close(llp->pipe[1]); ++ free(llp); ++} ++ ++#ifdef HAVE_SPLICE ++#if !defined(HAVE_PIPE2) || !defined(O_CLOEXEC) ++static int fuse_pipe(int fds[2]) ++{ ++ int rv = pipe(fds); ++ ++ if (rv == -1) ++ return rv; ++ ++ if (fcntl(fds[0], F_SETFL, O_NONBLOCK) == -1 || ++ fcntl(fds[1], F_SETFL, O_NONBLOCK) == -1 || ++ fcntl(fds[0], F_SETFD, FD_CLOEXEC) == -1 || ++ fcntl(fds[1], F_SETFD, FD_CLOEXEC) == -1) { ++ close(fds[0]); ++ close(fds[1]); ++ rv = -1; ++ } ++ return rv; ++} ++#else ++static int fuse_pipe(int fds[2]) ++{ ++ return pipe2(fds, O_CLOEXEC | O_NONBLOCK); ++} ++#endif ++ ++static struct fuse_ll_pipe *fuse_ll_get_pipe(struct fuse_session *se) ++{ ++ struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); ++ if (llp == NULL) { ++ int res; ++ ++ llp = malloc(sizeof(struct fuse_ll_pipe)); ++ if (llp == NULL) ++ return NULL; ++ ++ res = fuse_pipe(llp->pipe); ++ if (res == -1) { ++ free(llp); ++ return NULL; ++ } ++ ++ /* ++ *the default size is 16 pages on linux ++ */ ++ llp->size = pagesize * 16; ++ llp->can_grow = 1; ++ ++ pthread_setspecific(se->pipe_key, llp); ++ } ++ ++ return llp; ++} ++#endif ++ ++static void fuse_ll_clear_pipe(struct fuse_session *se) ++{ ++ struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); ++ if (llp) { ++ pthread_setspecific(se->pipe_key, NULL); ++ fuse_ll_pipe_free(llp); ++ } ++} ++ ++#if defined(HAVE_SPLICE) && defined(HAVE_VMSPLICE) ++static int read_back(int fd, char *buf, size_t len) ++{ ++ int res; ++ ++ res = read(fd, buf, len); ++ if (res == -1) { ++ fuse_log(FUSE_LOG_ERR, "fuse: internal error: failed to read back from pipe: %s\n", strerror(errno)); ++ return -EIO; ++ } ++ if (res != len) { ++ fuse_log(FUSE_LOG_ERR, "fuse: internal error: short read back from pipe: %i from %zi\n", res, len); ++ return -EIO; ++ } ++ return 0; ++} ++ ++static int grow_pipe_to_max(int pipefd) ++{ ++ int max; ++ int res; ++ int maxfd; ++ char buf[32]; ++ ++ maxfd = open("/proc/sys/fs/pipe-max-size", O_RDONLY); ++ if (maxfd < 0) ++ return -errno; ++ ++ res = read(maxfd, buf, sizeof(buf) - 1); ++ if (res < 0) { ++ int saved_errno; ++ ++ saved_errno = errno; ++ close(maxfd); ++ return -saved_errno; ++ } ++ close(maxfd); ++ buf[res] = '\0'; ++ ++ max = atoi(buf); ++ res = fcntl(pipefd, F_SETPIPE_SZ, max); ++ if (res < 0) ++ return -errno; ++ return max; ++} ++ ++static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, ++ struct iovec *iov, int iov_count, ++ struct fuse_bufvec *buf, unsigned int flags) ++{ ++ int res; ++ size_t len = fuse_buf_size(buf); ++ struct fuse_out_header *out = iov[0].iov_base; ++ struct fuse_ll_pipe *llp; ++ int splice_flags; ++ size_t pipesize; ++ size_t total_fd_size; ++ size_t idx; ++ size_t headerlen; ++ struct fuse_bufvec pipe_buf = FUSE_BUFVEC_INIT(len); ++ ++ if (se->broken_splice_nonblock) ++ goto fallback; ++ ++ if (flags & FUSE_BUF_NO_SPLICE) ++ goto fallback; ++ ++ total_fd_size = 0; ++ for (idx = buf->idx; idx < buf->count; idx++) { ++ if (buf->buf[idx].flags & FUSE_BUF_IS_FD) { ++ total_fd_size = buf->buf[idx].size; ++ if (idx == buf->idx) ++ total_fd_size -= buf->off; ++ } ++ } ++ if (total_fd_size < 2 * pagesize) ++ goto fallback; ++ ++ if (se->conn.proto_minor < 14 || ++ !(se->conn.want & FUSE_CAP_SPLICE_WRITE)) ++ goto fallback; ++ ++ llp = fuse_ll_get_pipe(se); ++ if (llp == NULL) ++ goto fallback; ++ ++ ++ headerlen = iov_length(iov, iov_count); ++ ++ out->len = headerlen + len; ++ ++ /* ++ * Heuristic for the required pipe size, does not work if the ++ * source contains less than page size fragments ++ */ ++ pipesize = pagesize * (iov_count + buf->count + 1) + out->len; ++ ++ if (llp->size < pipesize) { ++ if (llp->can_grow) { ++ res = fcntl(llp->pipe[0], F_SETPIPE_SZ, pipesize); ++ if (res == -1) { ++ res = grow_pipe_to_max(llp->pipe[0]); ++ if (res > 0) ++ llp->size = res; ++ llp->can_grow = 0; ++ goto fallback; ++ } ++ llp->size = res; ++ } ++ if (llp->size < pipesize) ++ goto fallback; ++ } ++ ++ ++ res = vmsplice(llp->pipe[1], iov, iov_count, SPLICE_F_NONBLOCK); ++ if (res == -1) ++ goto fallback; ++ ++ if (res != headerlen) { ++ res = -EIO; ++ fuse_log(FUSE_LOG_ERR, "fuse: short vmsplice to pipe: %u/%zu\n", res, ++ headerlen); ++ goto clear_pipe; ++ } ++ ++ pipe_buf.buf[0].flags = FUSE_BUF_IS_FD; ++ pipe_buf.buf[0].fd = llp->pipe[1]; ++ ++ res = fuse_buf_copy(&pipe_buf, buf, ++ FUSE_BUF_FORCE_SPLICE | FUSE_BUF_SPLICE_NONBLOCK); ++ if (res < 0) { ++ if (res == -EAGAIN || res == -EINVAL) { ++ /* ++ * Should only get EAGAIN on kernels with ++ * broken SPLICE_F_NONBLOCK support (<= ++ * 2.6.35) where this error or a short read is ++ * returned even if the pipe itself is not ++ * full ++ * ++ * EINVAL might mean that splice can't handle ++ * this combination of input and output. ++ */ ++ if (res == -EAGAIN) ++ se->broken_splice_nonblock = 1; ++ ++ pthread_setspecific(se->pipe_key, NULL); ++ fuse_ll_pipe_free(llp); ++ goto fallback; ++ } ++ res = -res; ++ goto clear_pipe; ++ } ++ ++ if (res != 0 && res < len) { ++ struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); ++ void *mbuf; ++ size_t now_len = res; ++ /* ++ * For regular files a short count is either ++ * 1) due to EOF, or ++ * 2) because of broken SPLICE_F_NONBLOCK (see above) ++ * ++ * For other inputs it's possible that we overflowed ++ * the pipe because of small buffer fragments. ++ */ ++ ++ res = posix_memalign(&mbuf, pagesize, len); ++ if (res != 0) ++ goto clear_pipe; ++ ++ mem_buf.buf[0].mem = mbuf; ++ mem_buf.off = now_len; ++ res = fuse_buf_copy(&mem_buf, buf, 0); ++ if (res > 0) { ++ char *tmpbuf; ++ size_t extra_len = res; ++ /* ++ * Trickiest case: got more data. Need to get ++ * back the data from the pipe and then fall ++ * back to regular write. ++ */ ++ tmpbuf = malloc(headerlen); ++ if (tmpbuf == NULL) { ++ free(mbuf); ++ res = ENOMEM; ++ goto clear_pipe; ++ } ++ res = read_back(llp->pipe[0], tmpbuf, headerlen); ++ free(tmpbuf); ++ if (res != 0) { ++ free(mbuf); ++ goto clear_pipe; ++ } ++ res = read_back(llp->pipe[0], mbuf, now_len); ++ if (res != 0) { ++ free(mbuf); ++ goto clear_pipe; ++ } ++ len = now_len + extra_len; ++ iov[iov_count].iov_base = mbuf; ++ iov[iov_count].iov_len = len; ++ iov_count++; ++ res = fuse_send_msg(se, ch, iov, iov_count); ++ free(mbuf); ++ return res; ++ } ++ free(mbuf); ++ res = now_len; ++ } ++ len = res; ++ out->len = headerlen + len; ++ ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, ++ " unique: %llu, success, outsize: %i (splice)\n", ++ (unsigned long long) out->unique, out->len); ++ } ++ ++ splice_flags = 0; ++ if ((flags & FUSE_BUF_SPLICE_MOVE) && ++ (se->conn.want & FUSE_CAP_SPLICE_MOVE)) ++ splice_flags |= SPLICE_F_MOVE; ++ ++ res = splice(llp->pipe[0], NULL, ch ? ch->fd : se->fd, ++ NULL, out->len, splice_flags); ++ if (res == -1) { ++ res = -errno; ++ perror("fuse: splice from pipe"); ++ goto clear_pipe; ++ } ++ if (res != out->len) { ++ res = -EIO; ++ fuse_log(FUSE_LOG_ERR, "fuse: short splice from pipe: %u/%u\n", ++ res, out->len); ++ goto clear_pipe; ++ } ++ return 0; ++ ++clear_pipe: ++ fuse_ll_clear_pipe(se); ++ return res; ++ ++fallback: ++ return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); ++} ++#else ++static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, ++ struct iovec *iov, int iov_count, ++ struct fuse_bufvec *buf, unsigned int flags) ++{ ++ size_t len = fuse_buf_size(buf); ++ (void) flags; ++ ++ return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); ++} ++#endif ++ ++int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, ++ enum fuse_buf_copy_flags flags) ++{ ++ struct iovec iov[2]; ++ struct fuse_out_header out; ++ int res; ++ ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(struct fuse_out_header); ++ ++ out.unique = req->unique; ++ out.error = 0; ++ ++ res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); ++ if (res <= 0) { ++ fuse_free_req(req); ++ return res; ++ } else { ++ return fuse_reply_err(req, res); ++ } ++} ++ ++int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf) ++{ ++ struct fuse_statfs_out arg; ++ size_t size = req->se->conn.proto_minor < 4 ? ++ FUSE_COMPAT_STATFS_SIZE : sizeof(arg); ++ ++ memset(&arg, 0, sizeof(arg)); ++ convert_statfs(stbuf, &arg.st); ++ ++ return send_reply_ok(req, &arg, size); ++} ++ ++int fuse_reply_xattr(fuse_req_t req, size_t count) ++{ ++ struct fuse_getxattr_out arg; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.size = count; ++ ++ return send_reply_ok(req, &arg, sizeof(arg)); ++} ++ ++int fuse_reply_lock(fuse_req_t req, const struct flock *lock) ++{ ++ struct fuse_lk_out arg; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.lk.type = lock->l_type; ++ if (lock->l_type != F_UNLCK) { ++ arg.lk.start = lock->l_start; ++ if (lock->l_len == 0) ++ arg.lk.end = OFFSET_MAX; ++ else ++ arg.lk.end = lock->l_start + lock->l_len - 1; ++ } ++ arg.lk.pid = lock->l_pid; ++ return send_reply_ok(req, &arg, sizeof(arg)); ++} ++ ++int fuse_reply_bmap(fuse_req_t req, uint64_t idx) ++{ ++ struct fuse_bmap_out arg; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.block = idx; ++ ++ return send_reply_ok(req, &arg, sizeof(arg)); ++} ++ ++static struct fuse_ioctl_iovec *fuse_ioctl_iovec_copy(const struct iovec *iov, ++ size_t count) ++{ ++ struct fuse_ioctl_iovec *fiov; ++ size_t i; ++ ++ fiov = malloc(sizeof(fiov[0]) * count); ++ if (!fiov) ++ return NULL; ++ ++ for (i = 0; i < count; i++) { ++ fiov[i].base = (uintptr_t) iov[i].iov_base; ++ fiov[i].len = iov[i].iov_len; ++ } ++ ++ return fiov; ++} ++ ++int fuse_reply_ioctl_retry(fuse_req_t req, ++ const struct iovec *in_iov, size_t in_count, ++ const struct iovec *out_iov, size_t out_count) ++{ ++ struct fuse_ioctl_out arg; ++ struct fuse_ioctl_iovec *in_fiov = NULL; ++ struct fuse_ioctl_iovec *out_fiov = NULL; ++ struct iovec iov[4]; ++ size_t count = 1; ++ int res; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.flags |= FUSE_IOCTL_RETRY; ++ arg.in_iovs = in_count; ++ arg.out_iovs = out_count; ++ iov[count].iov_base = &arg; ++ iov[count].iov_len = sizeof(arg); ++ count++; ++ ++ if (req->se->conn.proto_minor < 16) { ++ if (in_count) { ++ iov[count].iov_base = (void *)in_iov; ++ iov[count].iov_len = sizeof(in_iov[0]) * in_count; ++ count++; ++ } ++ ++ if (out_count) { ++ iov[count].iov_base = (void *)out_iov; ++ iov[count].iov_len = sizeof(out_iov[0]) * out_count; ++ count++; ++ } ++ } else { ++ /* Can't handle non-compat 64bit ioctls on 32bit */ ++ if (sizeof(void *) == 4 && req->ioctl_64bit) { ++ res = fuse_reply_err(req, EINVAL); ++ goto out; ++ } ++ ++ if (in_count) { ++ in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); ++ if (!in_fiov) ++ goto enomem; ++ ++ iov[count].iov_base = (void *)in_fiov; ++ iov[count].iov_len = sizeof(in_fiov[0]) * in_count; ++ count++; ++ } ++ if (out_count) { ++ out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); ++ if (!out_fiov) ++ goto enomem; ++ ++ iov[count].iov_base = (void *)out_fiov; ++ iov[count].iov_len = sizeof(out_fiov[0]) * out_count; ++ count++; ++ } ++ } ++ ++ res = send_reply_iov(req, 0, iov, count); ++out: ++ free(in_fiov); ++ free(out_fiov); ++ ++ return res; ++ ++enomem: ++ res = fuse_reply_err(req, ENOMEM); ++ goto out; ++} ++ ++int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size) ++{ ++ struct fuse_ioctl_out arg; ++ struct iovec iov[3]; ++ size_t count = 1; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.result = result; ++ iov[count].iov_base = &arg; ++ iov[count].iov_len = sizeof(arg); ++ count++; ++ ++ if (size) { ++ iov[count].iov_base = (char *) buf; ++ iov[count].iov_len = size; ++ count++; ++ } ++ ++ return send_reply_iov(req, 0, iov, count); ++} ++ ++int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, ++ int count) ++{ ++ struct iovec *padded_iov; ++ struct fuse_ioctl_out arg; ++ int res; ++ ++ padded_iov = malloc((count + 2) * sizeof(struct iovec)); ++ if (padded_iov == NULL) ++ return fuse_reply_err(req, ENOMEM); ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.result = result; ++ padded_iov[1].iov_base = &arg; ++ padded_iov[1].iov_len = sizeof(arg); ++ ++ memcpy(&padded_iov[2], iov, count * sizeof(struct iovec)); ++ ++ res = send_reply_iov(req, 0, padded_iov, count + 2); ++ free(padded_iov); ++ ++ return res; ++} ++ ++int fuse_reply_poll(fuse_req_t req, unsigned revents) ++{ ++ struct fuse_poll_out arg; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.revents = revents; ++ ++ return send_reply_ok(req, &arg, sizeof(arg)); ++} ++ ++int fuse_reply_lseek(fuse_req_t req, off_t off) ++{ ++ struct fuse_lseek_out arg; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.offset = off; ++ ++ return send_reply_ok(req, &arg, sizeof(arg)); ++} ++ ++static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ char *name = (char *) inarg; ++ ++ if (req->se->op.lookup) ++ req->se->op.lookup(req, nodeid, name); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_forget_in *arg = (struct fuse_forget_in *) inarg; ++ ++ if (req->se->op.forget) ++ req->se->op.forget(req, nodeid, arg->nlookup); ++ else ++ fuse_reply_none(req); ++} ++ ++static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, ++ const void *inarg) ++{ ++ struct fuse_batch_forget_in *arg = (void *) inarg; ++ struct fuse_forget_one *param = (void *) PARAM(arg); ++ unsigned int i; ++ ++ (void) nodeid; ++ ++ if (req->se->op.forget_multi) { ++ req->se->op.forget_multi(req, arg->count, ++ (struct fuse_forget_data *) param); ++ } else if (req->se->op.forget) { ++ for (i = 0; i < arg->count; i++) { ++ struct fuse_forget_one *forget = ¶m[i]; ++ struct fuse_req *dummy_req; ++ ++ dummy_req = fuse_ll_alloc_req(req->se); ++ if (dummy_req == NULL) ++ break; ++ ++ dummy_req->unique = req->unique; ++ dummy_req->ctx = req->ctx; ++ dummy_req->ch = NULL; ++ ++ req->se->op.forget(dummy_req, forget->nodeid, ++ forget->nlookup); ++ } ++ fuse_reply_none(req); ++ } else { ++ fuse_reply_none(req); ++ } ++} ++ ++static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_file_info *fip = NULL; ++ struct fuse_file_info fi; ++ ++ if (req->se->conn.proto_minor >= 9) { ++ struct fuse_getattr_in *arg = (struct fuse_getattr_in *) inarg; ++ ++ if (arg->getattr_flags & FUSE_GETATTR_FH) { ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fip = &fi; ++ } ++ } ++ ++ if (req->se->op.getattr) ++ req->se->op.getattr(req, nodeid, fip); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_setattr_in *arg = (struct fuse_setattr_in *) inarg; ++ ++ if (req->se->op.setattr) { ++ struct fuse_file_info *fi = NULL; ++ struct fuse_file_info fi_store; ++ struct stat stbuf; ++ memset(&stbuf, 0, sizeof(stbuf)); ++ convert_attr(arg, &stbuf); ++ if (arg->valid & FATTR_FH) { ++ arg->valid &= ~FATTR_FH; ++ memset(&fi_store, 0, sizeof(fi_store)); ++ fi = &fi_store; ++ fi->fh = arg->fh; ++ } ++ arg->valid &= ++ FUSE_SET_ATTR_MODE | ++ FUSE_SET_ATTR_UID | ++ FUSE_SET_ATTR_GID | ++ FUSE_SET_ATTR_SIZE | ++ FUSE_SET_ATTR_ATIME | ++ FUSE_SET_ATTR_MTIME | ++ FUSE_SET_ATTR_ATIME_NOW | ++ FUSE_SET_ATTR_MTIME_NOW | ++ FUSE_SET_ATTR_CTIME; ++ ++ req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi); ++ } else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_access_in *arg = (struct fuse_access_in *) inarg; ++ ++ if (req->se->op.access) ++ req->se->op.access(req, nodeid, arg->mask); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ (void) inarg; ++ ++ if (req->se->op.readlink) ++ req->se->op.readlink(req, nodeid); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_mknod_in *arg = (struct fuse_mknod_in *) inarg; ++ char *name = PARAM(arg); ++ ++ if (req->se->conn.proto_minor >= 12) ++ req->ctx.umask = arg->umask; ++ else ++ name = (char *) inarg + FUSE_COMPAT_MKNOD_IN_SIZE; ++ ++ if (req->se->op.mknod) ++ req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *) inarg; ++ ++ if (req->se->conn.proto_minor >= 12) ++ req->ctx.umask = arg->umask; ++ ++ if (req->se->op.mkdir) ++ req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ char *name = (char *) inarg; ++ ++ if (req->se->op.unlink) ++ req->se->op.unlink(req, nodeid, name); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ char *name = (char *) inarg; ++ ++ if (req->se->op.rmdir) ++ req->se->op.rmdir(req, nodeid, name); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ char *name = (char *) inarg; ++ char *linkname = ((char *) inarg) + strlen((char *) inarg) + 1; ++ ++ if (req->se->op.symlink) ++ req->se->op.symlink(req, linkname, nodeid, name); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_rename_in *arg = (struct fuse_rename_in *) inarg; ++ char *oldname = PARAM(arg); ++ char *newname = oldname + strlen(oldname) + 1; ++ ++ if (req->se->op.rename) ++ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, ++ 0); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_rename2_in *arg = (struct fuse_rename2_in *) inarg; ++ char *oldname = PARAM(arg); ++ char *newname = oldname + strlen(oldname) + 1; ++ ++ if (req->se->op.rename) ++ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, ++ arg->flags); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_link(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_link_in *arg = (struct fuse_link_in *) inarg; ++ ++ if (req->se->op.link) ++ req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_create_in *arg = (struct fuse_create_in *) inarg; ++ ++ if (req->se->op.create) { ++ struct fuse_file_info fi; ++ char *name = PARAM(arg); ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; ++ ++ if (req->se->conn.proto_minor >= 12) ++ req->ctx.umask = arg->umask; ++ else ++ name = (char *) inarg + sizeof(struct fuse_open_in); ++ ++ req->se->op.create(req, nodeid, name, arg->mode, &fi); ++ } else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_open_in *arg = (struct fuse_open_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; ++ ++ if (req->se->op.open) ++ req->se->op.open(req, nodeid, &fi); ++ else ++ fuse_reply_open(req, &fi); ++} ++ ++static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_read_in *arg = (struct fuse_read_in *) inarg; ++ ++ if (req->se->op.read) { ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ if (req->se->conn.proto_minor >= 9) { ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ } ++ req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); ++ } else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_write_in *arg = (struct fuse_write_in *) inarg; ++ struct fuse_file_info fi; ++ char *param; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; ++ ++ if (req->se->conn.proto_minor < 9) { ++ param = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; ++ } else { ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ param = PARAM(arg); ++ } ++ ++ if (req->se->op.write) ++ req->se->op.write(req, nodeid, param, arg->size, ++ arg->offset, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, ++ const struct fuse_buf *ibuf) ++{ ++ struct fuse_session *se = req->se; ++ struct fuse_bufvec bufv = { ++ .buf[0] = *ibuf, ++ .count = 1, ++ }; ++ struct fuse_write_in *arg = (struct fuse_write_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; ++ ++ if (se->conn.proto_minor < 9) { ++ bufv.buf[0].mem = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; ++ bufv.buf[0].size -= sizeof(struct fuse_in_header) + ++ FUSE_COMPAT_WRITE_IN_SIZE; ++ assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); ++ } else { ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) ++ bufv.buf[0].mem = PARAM(arg); ++ ++ bufv.buf[0].size -= sizeof(struct fuse_in_header) + ++ sizeof(struct fuse_write_in); ++ } ++ if (bufv.buf[0].size < arg->size) { ++ fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); ++ fuse_reply_err(req, EIO); ++ goto out; ++ } ++ bufv.buf[0].size = arg->size; ++ ++ se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); ++ ++out: ++ /* Need to reset the pipe if ->write_buf() didn't consume all data */ ++ if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) ++ fuse_ll_clear_pipe(se); ++} ++ ++static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_flush_in *arg = (struct fuse_flush_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.flush = 1; ++ if (req->se->conn.proto_minor >= 7) ++ fi.lock_owner = arg->lock_owner; ++ ++ if (req->se->op.flush) ++ req->se->op.flush(req, nodeid, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_release_in *arg = (struct fuse_release_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; ++ fi.fh = arg->fh; ++ if (req->se->conn.proto_minor >= 8) { ++ fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; ++ fi.lock_owner = arg->lock_owner; ++ } ++ if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { ++ fi.flock_release = 1; ++ fi.lock_owner = arg->lock_owner; ++ } ++ ++ if (req->se->op.release) ++ req->se->op.release(req, nodeid, &fi); ++ else ++ fuse_reply_err(req, 0); ++} ++ ++static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; ++ struct fuse_file_info fi; ++ int datasync = arg->fsync_flags & 1; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ ++ if (req->se->op.fsync) ++ req->se->op.fsync(req, nodeid, datasync, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_open_in *arg = (struct fuse_open_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; ++ ++ if (req->se->op.opendir) ++ req->se->op.opendir(req, nodeid, &fi); ++ else ++ fuse_reply_open(req, &fi); ++} ++ ++static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_read_in *arg = (struct fuse_read_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ ++ if (req->se->op.readdir) ++ req->se->op.readdir(req, nodeid, arg->size, arg->offset, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_read_in *arg = (struct fuse_read_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ ++ if (req->se->op.readdirplus) ++ req->se->op.readdirplus(req, nodeid, arg->size, arg->offset, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_release_in *arg = (struct fuse_release_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; ++ fi.fh = arg->fh; ++ ++ if (req->se->op.releasedir) ++ req->se->op.releasedir(req, nodeid, &fi); ++ else ++ fuse_reply_err(req, 0); ++} ++ ++static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; ++ struct fuse_file_info fi; ++ int datasync = arg->fsync_flags & 1; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ ++ if (req->se->op.fsyncdir) ++ req->se->op.fsyncdir(req, nodeid, datasync, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ (void) nodeid; ++ (void) inarg; ++ ++ if (req->se->op.statfs) ++ req->se->op.statfs(req, nodeid); ++ else { ++ struct statvfs buf = { ++ .f_namemax = 255, ++ .f_bsize = 512, ++ }; ++ fuse_reply_statfs(req, &buf); ++ } ++} ++ ++static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *) inarg; ++ char *name = PARAM(arg); ++ char *value = name + strlen(name) + 1; ++ ++ if (req->se->op.setxattr) ++ req->se->op.setxattr(req, nodeid, name, value, arg->size, ++ arg->flags); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; ++ ++ if (req->se->op.getxattr) ++ req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; ++ ++ if (req->se->op.listxattr) ++ req->se->op.listxattr(req, nodeid, arg->size); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ char *name = (char *) inarg; ++ ++ if (req->se->op.removexattr) ++ req->se->op.removexattr(req, nodeid, name); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void convert_fuse_file_lock(struct fuse_file_lock *fl, ++ struct flock *flock) ++{ ++ memset(flock, 0, sizeof(struct flock)); ++ flock->l_type = fl->type; ++ flock->l_whence = SEEK_SET; ++ flock->l_start = fl->start; ++ if (fl->end == OFFSET_MAX) ++ flock->l_len = 0; ++ else ++ flock->l_len = fl->end - fl->start + 1; ++ flock->l_pid = fl->pid; ++} ++ ++static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; ++ struct fuse_file_info fi; ++ struct flock flock; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.lock_owner = arg->owner; ++ ++ convert_fuse_file_lock(&arg->lk, &flock); ++ if (req->se->op.getlk) ++ req->se->op.getlk(req, nodeid, &fi, &flock); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, ++ const void *inarg, int sleep) ++{ ++ struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; ++ struct fuse_file_info fi; ++ struct flock flock; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.lock_owner = arg->owner; ++ ++ if (arg->lk_flags & FUSE_LK_FLOCK) { ++ int op = 0; ++ ++ switch (arg->lk.type) { ++ case F_RDLCK: ++ op = LOCK_SH; ++ break; ++ case F_WRLCK: ++ op = LOCK_EX; ++ break; ++ case F_UNLCK: ++ op = LOCK_UN; ++ break; ++ } ++ if (!sleep) ++ op |= LOCK_NB; ++ ++ if (req->se->op.flock) ++ req->se->op.flock(req, nodeid, &fi, op); ++ else ++ fuse_reply_err(req, ENOSYS); ++ } else { ++ convert_fuse_file_lock(&arg->lk, &flock); ++ if (req->se->op.setlk) ++ req->se->op.setlk(req, nodeid, &fi, &flock, sleep); ++ else ++ fuse_reply_err(req, ENOSYS); ++ } ++} ++ ++static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ do_setlk_common(req, nodeid, inarg, 0); ++} ++ ++static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ do_setlk_common(req, nodeid, inarg, 1); ++} ++ ++static int find_interrupted(struct fuse_session *se, struct fuse_req *req) ++{ ++ struct fuse_req *curr; ++ ++ for (curr = se->list.next; curr != &se->list; curr = curr->next) { ++ if (curr->unique == req->u.i.unique) { ++ fuse_interrupt_func_t func; ++ void *data; ++ ++ curr->ctr++; ++ pthread_mutex_unlock(&se->lock); ++ ++ /* Ugh, ugly locking */ ++ pthread_mutex_lock(&curr->lock); ++ pthread_mutex_lock(&se->lock); ++ curr->interrupted = 1; ++ func = curr->u.ni.func; ++ data = curr->u.ni.data; ++ pthread_mutex_unlock(&se->lock); ++ if (func) ++ func(curr, data); ++ pthread_mutex_unlock(&curr->lock); ++ ++ pthread_mutex_lock(&se->lock); ++ curr->ctr--; ++ if (!curr->ctr) ++ destroy_req(curr); ++ ++ return 1; ++ } ++ } ++ for (curr = se->interrupts.next; curr != &se->interrupts; ++ curr = curr->next) { ++ if (curr->u.i.unique == req->u.i.unique) ++ return 1; ++ } ++ return 0; ++} ++ ++static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *) inarg; ++ struct fuse_session *se = req->se; ++ ++ (void) nodeid; ++ if (se->debug) ++ fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", ++ (unsigned long long) arg->unique); ++ ++ req->u.i.unique = arg->unique; ++ ++ pthread_mutex_lock(&se->lock); ++ if (find_interrupted(se, req)) ++ destroy_req(req); ++ else ++ list_add_req(req, &se->interrupts); ++ pthread_mutex_unlock(&se->lock); ++} ++ ++static struct fuse_req *check_interrupt(struct fuse_session *se, ++ struct fuse_req *req) ++{ ++ struct fuse_req *curr; ++ ++ for (curr = se->interrupts.next; curr != &se->interrupts; ++ curr = curr->next) { ++ if (curr->u.i.unique == req->unique) { ++ req->interrupted = 1; ++ list_del_req(curr); ++ free(curr); ++ return NULL; ++ } ++ } ++ curr = se->interrupts.next; ++ if (curr != &se->interrupts) { ++ list_del_req(curr); ++ list_init_req(curr); ++ return curr; ++ } else ++ return NULL; ++} ++ ++static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_bmap_in *arg = (struct fuse_bmap_in *) inarg; ++ ++ if (req->se->op.bmap) ++ req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *) inarg; ++ unsigned int flags = arg->flags; ++ void *in_buf = arg->in_size ? PARAM(arg) : NULL; ++ struct fuse_file_info fi; ++ ++ if (flags & FUSE_IOCTL_DIR && ++ !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { ++ fuse_reply_err(req, ENOTTY); ++ return; ++ } ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ ++ if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && ++ !(flags & FUSE_IOCTL_32BIT)) { ++ req->ioctl_64bit = 1; ++ } ++ ++ if (req->se->op.ioctl) ++ req->se->op.ioctl(req, nodeid, arg->cmd, ++ (void *)(uintptr_t)arg->arg, &fi, flags, ++ in_buf, arg->in_size, arg->out_size); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++void fuse_pollhandle_destroy(struct fuse_pollhandle *ph) ++{ ++ free(ph); ++} ++ ++static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_poll_in *arg = (struct fuse_poll_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.poll_events = arg->events; ++ ++ if (req->se->op.poll) { ++ struct fuse_pollhandle *ph = NULL; ++ ++ if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) { ++ ph = malloc(sizeof(struct fuse_pollhandle)); ++ if (ph == NULL) { ++ fuse_reply_err(req, ENOMEM); ++ return; ++ } ++ ph->kh = arg->kh; ++ ph->se = req->se; ++ } ++ ++ req->se->op.poll(req, nodeid, &fi, ph); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } ++} ++ ++static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ ++ if (req->se->op.fallocate) ++ req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, const void *inarg) ++{ ++ struct fuse_copy_file_range_in *arg = (struct fuse_copy_file_range_in *) inarg; ++ struct fuse_file_info fi_in, fi_out; ++ ++ memset(&fi_in, 0, sizeof(fi_in)); ++ fi_in.fh = arg->fh_in; ++ ++ memset(&fi_out, 0, sizeof(fi_out)); ++ fi_out.fh = arg->fh_out; ++ ++ ++ if (req->se->op.copy_file_range) ++ req->se->op.copy_file_range(req, nodeid_in, arg->off_in, ++ &fi_in, arg->nodeid_out, ++ arg->off_out, &fi_out, arg->len, ++ arg->flags); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_lseek_in *arg = (struct fuse_lseek_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ ++ if (req->se->op.lseek) ++ req->se->op.lseek(req, nodeid, arg->offset, arg->whence, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_init_in *arg = (struct fuse_init_in *) inarg; ++ struct fuse_init_out outarg; ++ struct fuse_session *se = req->se; ++ size_t bufsize = se->bufsize; ++ size_t outargsize = sizeof(outarg); ++ ++ (void) nodeid; ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); ++ if (arg->major == 7 && arg->minor >= 6) { ++ fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); ++ fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", ++ arg->max_readahead); ++ } ++ } ++ se->conn.proto_major = arg->major; ++ se->conn.proto_minor = arg->minor; ++ se->conn.capable = 0; ++ se->conn.want = 0; ++ ++ memset(&outarg, 0, sizeof(outarg)); ++ outarg.major = FUSE_KERNEL_VERSION; ++ outarg.minor = FUSE_KERNEL_MINOR_VERSION; ++ ++ if (arg->major < 7) { ++ fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", ++ arg->major, arg->minor); ++ fuse_reply_err(req, EPROTO); ++ return; ++ } ++ ++ if (arg->major > 7) { ++ /* Wait for a second INIT request with a 7.X version */ ++ send_reply_ok(req, &outarg, sizeof(outarg)); ++ return; ++ } ++ ++ if (arg->minor >= 6) { ++ if (arg->max_readahead < se->conn.max_readahead) ++ se->conn.max_readahead = arg->max_readahead; ++ if (arg->flags & FUSE_ASYNC_READ) ++ se->conn.capable |= FUSE_CAP_ASYNC_READ; ++ if (arg->flags & FUSE_POSIX_LOCKS) ++ se->conn.capable |= FUSE_CAP_POSIX_LOCKS; ++ if (arg->flags & FUSE_ATOMIC_O_TRUNC) ++ se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; ++ if (arg->flags & FUSE_EXPORT_SUPPORT) ++ se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; ++ if (arg->flags & FUSE_DONT_MASK) ++ se->conn.capable |= FUSE_CAP_DONT_MASK; ++ if (arg->flags & FUSE_FLOCK_LOCKS) ++ se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; ++ if (arg->flags & FUSE_AUTO_INVAL_DATA) ++ se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; ++ if (arg->flags & FUSE_DO_READDIRPLUS) ++ se->conn.capable |= FUSE_CAP_READDIRPLUS; ++ if (arg->flags & FUSE_READDIRPLUS_AUTO) ++ se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; ++ if (arg->flags & FUSE_ASYNC_DIO) ++ se->conn.capable |= FUSE_CAP_ASYNC_DIO; ++ if (arg->flags & FUSE_WRITEBACK_CACHE) ++ se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; ++ if (arg->flags & FUSE_NO_OPEN_SUPPORT) ++ se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; ++ if (arg->flags & FUSE_PARALLEL_DIROPS) ++ se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; ++ if (arg->flags & FUSE_POSIX_ACL) ++ se->conn.capable |= FUSE_CAP_POSIX_ACL; ++ if (arg->flags & FUSE_HANDLE_KILLPRIV) ++ se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; ++ if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) ++ se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; ++ if (!(arg->flags & FUSE_MAX_PAGES)) { ++ size_t max_bufsize = ++ FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() ++ + FUSE_BUFFER_HEADER_SIZE; ++ if (bufsize > max_bufsize) { ++ bufsize = max_bufsize; ++ } ++ } ++ } else { ++ se->conn.max_readahead = 0; ++ } ++ ++ if (se->conn.proto_minor >= 14) { ++#ifdef HAVE_SPLICE ++#ifdef HAVE_VMSPLICE ++ se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; ++#endif ++ se->conn.capable |= FUSE_CAP_SPLICE_READ; ++#endif ++ } ++ if (se->conn.proto_minor >= 18) ++ se->conn.capable |= FUSE_CAP_IOCTL_DIR; ++ ++ /* Default settings for modern filesystems. ++ * ++ * Most of these capabilities were disabled by default in ++ * libfuse2 for backwards compatibility reasons. In libfuse3, ++ * we can finally enable them by default (as long as they're ++ * supported by the kernel). ++ */ ++#define LL_SET_DEFAULT(cond, cap) \ ++ if ((cond) && (se->conn.capable & (cap))) \ ++ se->conn.want |= (cap) ++ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_READ); ++ LL_SET_DEFAULT(1, FUSE_CAP_PARALLEL_DIROPS); ++ LL_SET_DEFAULT(1, FUSE_CAP_AUTO_INVAL_DATA); ++ LL_SET_DEFAULT(1, FUSE_CAP_HANDLE_KILLPRIV); ++ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO); ++ LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR); ++ LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC); ++ LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ); ++ LL_SET_DEFAULT(se->op.getlk && se->op.setlk, ++ FUSE_CAP_POSIX_LOCKS); ++ LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS); ++ LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS); ++ LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir, ++ FUSE_CAP_READDIRPLUS_AUTO); ++ se->conn.time_gran = 1; ++ ++ if (bufsize < FUSE_MIN_READ_BUFFER) { ++ fuse_log(FUSE_LOG_ERR, "fuse: warning: buffer size too small: %zu\n", ++ bufsize); ++ bufsize = FUSE_MIN_READ_BUFFER; ++ } ++ se->bufsize = bufsize; ++ ++ if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE) ++ se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE; ++ ++ se->got_init = 1; ++ if (se->op.init) ++ se->op.init(se->userdata, &se->conn); ++ ++ if (se->conn.want & (~se->conn.capable)) { ++ fuse_log(FUSE_LOG_ERR, "fuse: error: filesystem requested capabilities " ++ "0x%x that are not supported by kernel, aborting.\n", ++ se->conn.want & (~se->conn.capable)); ++ fuse_reply_err(req, EPROTO); ++ se->error = -EPROTO; ++ fuse_session_exit(se); ++ return; ++ } ++ ++ unsigned max_read_mo = get_max_read(se->mo); ++ if (se->conn.max_read != max_read_mo) { ++ fuse_log(FUSE_LOG_ERR, "fuse: error: init() and fuse_session_new() " ++ "requested different maximum read size (%u vs %u)\n", ++ se->conn.max_read, max_read_mo); ++ fuse_reply_err(req, EPROTO); ++ se->error = -EPROTO; ++ fuse_session_exit(se); ++ return; ++ } ++ ++ if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { ++ se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; ++ } ++ if (arg->flags & FUSE_MAX_PAGES) { ++ outarg.flags |= FUSE_MAX_PAGES; ++ outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1; ++ } ++ ++ /* Always enable big writes, this is superseded ++ by the max_write option */ ++ outarg.flags |= FUSE_BIG_WRITES; ++ ++ if (se->conn.want & FUSE_CAP_ASYNC_READ) ++ outarg.flags |= FUSE_ASYNC_READ; ++ if (se->conn.want & FUSE_CAP_POSIX_LOCKS) ++ outarg.flags |= FUSE_POSIX_LOCKS; ++ if (se->conn.want & FUSE_CAP_ATOMIC_O_TRUNC) ++ outarg.flags |= FUSE_ATOMIC_O_TRUNC; ++ if (se->conn.want & FUSE_CAP_EXPORT_SUPPORT) ++ outarg.flags |= FUSE_EXPORT_SUPPORT; ++ if (se->conn.want & FUSE_CAP_DONT_MASK) ++ outarg.flags |= FUSE_DONT_MASK; ++ if (se->conn.want & FUSE_CAP_FLOCK_LOCKS) ++ outarg.flags |= FUSE_FLOCK_LOCKS; ++ if (se->conn.want & FUSE_CAP_AUTO_INVAL_DATA) ++ outarg.flags |= FUSE_AUTO_INVAL_DATA; ++ if (se->conn.want & FUSE_CAP_READDIRPLUS) ++ outarg.flags |= FUSE_DO_READDIRPLUS; ++ if (se->conn.want & FUSE_CAP_READDIRPLUS_AUTO) ++ outarg.flags |= FUSE_READDIRPLUS_AUTO; ++ if (se->conn.want & FUSE_CAP_ASYNC_DIO) ++ outarg.flags |= FUSE_ASYNC_DIO; ++ if (se->conn.want & FUSE_CAP_WRITEBACK_CACHE) ++ outarg.flags |= FUSE_WRITEBACK_CACHE; ++ if (se->conn.want & FUSE_CAP_POSIX_ACL) ++ outarg.flags |= FUSE_POSIX_ACL; ++ outarg.max_readahead = se->conn.max_readahead; ++ outarg.max_write = se->conn.max_write; ++ if (se->conn.proto_minor >= 13) { ++ if (se->conn.max_background >= (1 << 16)) ++ se->conn.max_background = (1 << 16) - 1; ++ if (se->conn.congestion_threshold > se->conn.max_background) ++ se->conn.congestion_threshold = se->conn.max_background; ++ if (!se->conn.congestion_threshold) { ++ se->conn.congestion_threshold = ++ se->conn.max_background * 3 / 4; ++ } ++ ++ outarg.max_background = se->conn.max_background; ++ outarg.congestion_threshold = se->conn.congestion_threshold; ++ } ++ if (se->conn.proto_minor >= 23) ++ outarg.time_gran = se->conn.time_gran; ++ ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor); ++ fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); ++ fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", ++ outarg.max_readahead); ++ fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); ++ fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", ++ outarg.max_background); ++ fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", ++ outarg.congestion_threshold); ++ fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", ++ outarg.time_gran); ++ } ++ if (arg->minor < 5) ++ outargsize = FUSE_COMPAT_INIT_OUT_SIZE; ++ else if (arg->minor < 23) ++ outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; ++ ++ send_reply_ok(req, &outarg, outargsize); ++} ++ ++static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_session *se = req->se; ++ ++ (void) nodeid; ++ (void) inarg; ++ ++ se->got_destroy = 1; ++ if (se->op.destroy) ++ se->op.destroy(se->userdata); ++ ++ send_reply_ok(req, NULL, 0); ++} ++ ++static void list_del_nreq(struct fuse_notify_req *nreq) ++{ ++ struct fuse_notify_req *prev = nreq->prev; ++ struct fuse_notify_req *next = nreq->next; ++ prev->next = next; ++ next->prev = prev; ++} ++ ++static void list_add_nreq(struct fuse_notify_req *nreq, ++ struct fuse_notify_req *next) ++{ ++ struct fuse_notify_req *prev = next->prev; ++ nreq->next = next; ++ nreq->prev = prev; ++ prev->next = nreq; ++ next->prev = nreq; ++} ++ ++static void list_init_nreq(struct fuse_notify_req *nreq) ++{ ++ nreq->next = nreq; ++ nreq->prev = nreq; ++} ++ ++static void do_notify_reply(fuse_req_t req, fuse_ino_t nodeid, ++ const void *inarg, const struct fuse_buf *buf) ++{ ++ struct fuse_session *se = req->se; ++ struct fuse_notify_req *nreq; ++ struct fuse_notify_req *head; ++ ++ pthread_mutex_lock(&se->lock); ++ head = &se->notify_list; ++ for (nreq = head->next; nreq != head; nreq = nreq->next) { ++ if (nreq->unique == req->unique) { ++ list_del_nreq(nreq); ++ break; ++ } ++ } ++ pthread_mutex_unlock(&se->lock); ++ ++ if (nreq != head) ++ nreq->reply(nreq, req, nodeid, inarg, buf); ++} ++ ++static int send_notify_iov(struct fuse_session *se, int notify_code, ++ struct iovec *iov, int count) ++{ ++ struct fuse_out_header out; ++ ++ if (!se->got_init) ++ return -ENOTCONN; ++ ++ out.unique = 0; ++ out.error = notify_code; ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(struct fuse_out_header); ++ ++ return fuse_send_msg(se, NULL, iov, count); ++} ++ ++int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) ++{ ++ if (ph != NULL) { ++ struct fuse_notify_poll_wakeup_out outarg; ++ struct iovec iov[2]; ++ ++ outarg.kh = ph->kh; ++ ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ ++ return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2); ++ } else { ++ return 0; ++ } ++} ++ ++int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, ++ off_t off, off_t len) ++{ ++ struct fuse_notify_inval_inode_out outarg; ++ struct iovec iov[2]; ++ ++ if (!se) ++ return -EINVAL; ++ ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) ++ return -ENOSYS; ++ ++ outarg.ino = ino; ++ outarg.off = off; ++ outarg.len = len; ++ ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ ++ return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2); ++} ++ ++int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, ++ const char *name, size_t namelen) ++{ ++ struct fuse_notify_inval_entry_out outarg; ++ struct iovec iov[3]; ++ ++ if (!se) ++ return -EINVAL; ++ ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) ++ return -ENOSYS; ++ ++ outarg.parent = parent; ++ outarg.namelen = namelen; ++ outarg.padding = 0; ++ ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ iov[2].iov_base = (void *)name; ++ iov[2].iov_len = namelen + 1; ++ ++ return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3); ++} ++ ++int fuse_lowlevel_notify_delete(struct fuse_session *se, ++ fuse_ino_t parent, fuse_ino_t child, ++ const char *name, size_t namelen) ++{ ++ struct fuse_notify_delete_out outarg; ++ struct iovec iov[3]; ++ ++ if (!se) ++ return -EINVAL; ++ ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) ++ return -ENOSYS; ++ ++ outarg.parent = parent; ++ outarg.child = child; ++ outarg.namelen = namelen; ++ outarg.padding = 0; ++ ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ iov[2].iov_base = (void *)name; ++ iov[2].iov_len = namelen + 1; ++ ++ return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3); ++} ++ ++int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, ++ off_t offset, struct fuse_bufvec *bufv, ++ enum fuse_buf_copy_flags flags) ++{ ++ struct fuse_out_header out; ++ struct fuse_notify_store_out outarg; ++ struct iovec iov[3]; ++ size_t size = fuse_buf_size(bufv); ++ int res; ++ ++ if (!se) ++ return -EINVAL; ++ ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) ++ return -ENOSYS; ++ ++ out.unique = 0; ++ out.error = FUSE_NOTIFY_STORE; ++ ++ outarg.nodeid = ino; ++ outarg.offset = offset; ++ outarg.size = size; ++ outarg.padding = 0; ++ ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(out); ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ ++ res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); ++ if (res > 0) ++ res = -res; ++ ++ return res; ++} ++ ++struct fuse_retrieve_req { ++ struct fuse_notify_req nreq; ++ void *cookie; ++}; ++ ++static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, ++ fuse_req_t req, fuse_ino_t ino, ++ const void *inarg, ++ const struct fuse_buf *ibuf) ++{ ++ struct fuse_session *se = req->se; ++ struct fuse_retrieve_req *rreq = ++ container_of(nreq, struct fuse_retrieve_req, nreq); ++ const struct fuse_notify_retrieve_in *arg = inarg; ++ struct fuse_bufvec bufv = { ++ .buf[0] = *ibuf, ++ .count = 1, ++ }; ++ ++ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) ++ bufv.buf[0].mem = PARAM(arg); ++ ++ bufv.buf[0].size -= sizeof(struct fuse_in_header) + ++ sizeof(struct fuse_notify_retrieve_in); ++ ++ if (bufv.buf[0].size < arg->size) { ++ fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); ++ fuse_reply_none(req); ++ goto out; ++ } ++ bufv.buf[0].size = arg->size; ++ ++ if (se->op.retrieve_reply) { ++ se->op.retrieve_reply(req, rreq->cookie, ino, ++ arg->offset, &bufv); ++ } else { ++ fuse_reply_none(req); ++ } ++out: ++ free(rreq); ++ if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) ++ fuse_ll_clear_pipe(se); ++} ++ ++int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, ++ size_t size, off_t offset, void *cookie) ++{ ++ struct fuse_notify_retrieve_out outarg; ++ struct iovec iov[2]; ++ struct fuse_retrieve_req *rreq; ++ int err; ++ ++ if (!se) ++ return -EINVAL; ++ ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) ++ return -ENOSYS; ++ ++ rreq = malloc(sizeof(*rreq)); ++ if (rreq == NULL) ++ return -ENOMEM; ++ ++ pthread_mutex_lock(&se->lock); ++ rreq->cookie = cookie; ++ rreq->nreq.unique = se->notify_ctr++; ++ rreq->nreq.reply = fuse_ll_retrieve_reply; ++ list_add_nreq(&rreq->nreq, &se->notify_list); ++ pthread_mutex_unlock(&se->lock); ++ ++ outarg.notify_unique = rreq->nreq.unique; ++ outarg.nodeid = ino; ++ outarg.offset = offset; ++ outarg.size = size; ++ outarg.padding = 0; ++ ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ ++ err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); ++ if (err) { ++ pthread_mutex_lock(&se->lock); ++ list_del_nreq(&rreq->nreq); ++ pthread_mutex_unlock(&se->lock); ++ free(rreq); ++ } ++ ++ return err; ++} ++ ++void *fuse_req_userdata(fuse_req_t req) ++{ ++ return req->se->userdata; ++} ++ ++const struct fuse_ctx *fuse_req_ctx(fuse_req_t req) ++{ ++ return &req->ctx; ++} ++ ++void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, ++ void *data) ++{ ++ pthread_mutex_lock(&req->lock); ++ pthread_mutex_lock(&req->se->lock); ++ req->u.ni.func = func; ++ req->u.ni.data = data; ++ pthread_mutex_unlock(&req->se->lock); ++ if (req->interrupted && func) ++ func(req, data); ++ pthread_mutex_unlock(&req->lock); ++} ++ ++int fuse_req_interrupted(fuse_req_t req) ++{ ++ int interrupted; ++ ++ pthread_mutex_lock(&req->se->lock); ++ interrupted = req->interrupted; ++ pthread_mutex_unlock(&req->se->lock); ++ ++ return interrupted; ++} ++ ++static struct { ++ void (*func)(fuse_req_t, fuse_ino_t, const void *); ++ const char *name; ++} fuse_ll_ops[] = { ++ [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, ++ [FUSE_FORGET] = { do_forget, "FORGET" }, ++ [FUSE_GETATTR] = { do_getattr, "GETATTR" }, ++ [FUSE_SETATTR] = { do_setattr, "SETATTR" }, ++ [FUSE_READLINK] = { do_readlink, "READLINK" }, ++ [FUSE_SYMLINK] = { do_symlink, "SYMLINK" }, ++ [FUSE_MKNOD] = { do_mknod, "MKNOD" }, ++ [FUSE_MKDIR] = { do_mkdir, "MKDIR" }, ++ [FUSE_UNLINK] = { do_unlink, "UNLINK" }, ++ [FUSE_RMDIR] = { do_rmdir, "RMDIR" }, ++ [FUSE_RENAME] = { do_rename, "RENAME" }, ++ [FUSE_LINK] = { do_link, "LINK" }, ++ [FUSE_OPEN] = { do_open, "OPEN" }, ++ [FUSE_READ] = { do_read, "READ" }, ++ [FUSE_WRITE] = { do_write, "WRITE" }, ++ [FUSE_STATFS] = { do_statfs, "STATFS" }, ++ [FUSE_RELEASE] = { do_release, "RELEASE" }, ++ [FUSE_FSYNC] = { do_fsync, "FSYNC" }, ++ [FUSE_SETXATTR] = { do_setxattr, "SETXATTR" }, ++ [FUSE_GETXATTR] = { do_getxattr, "GETXATTR" }, ++ [FUSE_LISTXATTR] = { do_listxattr, "LISTXATTR" }, ++ [FUSE_REMOVEXATTR] = { do_removexattr, "REMOVEXATTR" }, ++ [FUSE_FLUSH] = { do_flush, "FLUSH" }, ++ [FUSE_INIT] = { do_init, "INIT" }, ++ [FUSE_OPENDIR] = { do_opendir, "OPENDIR" }, ++ [FUSE_READDIR] = { do_readdir, "READDIR" }, ++ [FUSE_RELEASEDIR] = { do_releasedir, "RELEASEDIR" }, ++ [FUSE_FSYNCDIR] = { do_fsyncdir, "FSYNCDIR" }, ++ [FUSE_GETLK] = { do_getlk, "GETLK" }, ++ [FUSE_SETLK] = { do_setlk, "SETLK" }, ++ [FUSE_SETLKW] = { do_setlkw, "SETLKW" }, ++ [FUSE_ACCESS] = { do_access, "ACCESS" }, ++ [FUSE_CREATE] = { do_create, "CREATE" }, ++ [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" }, ++ [FUSE_BMAP] = { do_bmap, "BMAP" }, ++ [FUSE_IOCTL] = { do_ioctl, "IOCTL" }, ++ [FUSE_POLL] = { do_poll, "POLL" }, ++ [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, ++ [FUSE_DESTROY] = { do_destroy, "DESTROY" }, ++ [FUSE_NOTIFY_REPLY] = { (void *) 1, "NOTIFY_REPLY" }, ++ [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, ++ [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS"}, ++ [FUSE_RENAME2] = { do_rename2, "RENAME2" }, ++ [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, ++ [FUSE_LSEEK] = { do_lseek, "LSEEK" }, ++ [CUSE_INIT] = { cuse_lowlevel_init, "CUSE_INIT" }, ++}; ++ ++#define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) ++ ++static const char *opname(enum fuse_opcode opcode) ++{ ++ if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) ++ return "???"; ++ else ++ return fuse_ll_ops[opcode].name; ++} ++ ++static int fuse_ll_copy_from_pipe(struct fuse_bufvec *dst, ++ struct fuse_bufvec *src) ++{ ++ ssize_t res = fuse_buf_copy(dst, src, 0); ++ if (res < 0) { ++ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", strerror(-res)); ++ return res; ++ } ++ if ((size_t)res < fuse_buf_size(dst)) { ++ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); ++ return -1; ++ } ++ return 0; ++} ++ ++void fuse_session_process_buf(struct fuse_session *se, ++ const struct fuse_buf *buf) ++{ ++ fuse_session_process_buf_int(se, buf, NULL); ++} ++ ++void fuse_session_process_buf_int(struct fuse_session *se, ++ const struct fuse_buf *buf, struct fuse_chan *ch) ++{ ++ const size_t write_header_size = sizeof(struct fuse_in_header) + ++ sizeof(struct fuse_write_in); ++ struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 }; ++ struct fuse_bufvec tmpbuf = FUSE_BUFVEC_INIT(write_header_size); ++ struct fuse_in_header *in; ++ const void *inarg; ++ struct fuse_req *req; ++ void *mbuf = NULL; ++ int err; ++ int res; ++ ++ if (buf->flags & FUSE_BUF_IS_FD) { ++ if (buf->size < tmpbuf.buf[0].size) ++ tmpbuf.buf[0].size = buf->size; ++ ++ mbuf = malloc(tmpbuf.buf[0].size); ++ if (mbuf == NULL) { ++ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate header\n"); ++ goto clear_pipe; ++ } ++ tmpbuf.buf[0].mem = mbuf; ++ ++ res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); ++ if (res < 0) ++ goto clear_pipe; ++ ++ in = mbuf; ++ } else { ++ in = buf->mem; ++ } ++ ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, ++ "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n", ++ (unsigned long long) in->unique, ++ opname((enum fuse_opcode) in->opcode), in->opcode, ++ (unsigned long long) in->nodeid, buf->size, in->pid); ++ } ++ ++ req = fuse_ll_alloc_req(se); ++ if (req == NULL) { ++ struct fuse_out_header out = { ++ .unique = in->unique, ++ .error = -ENOMEM, ++ }; ++ struct iovec iov = { ++ .iov_base = &out, ++ .iov_len = sizeof(struct fuse_out_header), ++ }; ++ ++ fuse_send_msg(se, ch, &iov, 1); ++ goto clear_pipe; ++ } ++ ++ req->unique = in->unique; ++ req->ctx.uid = in->uid; ++ req->ctx.gid = in->gid; ++ req->ctx.pid = in->pid; ++ req->ch = ch ? fuse_chan_get(ch) : NULL; ++ ++ err = EIO; ++ if (!se->got_init) { ++ enum fuse_opcode expected; ++ ++ expected = se->cuse_data ? CUSE_INIT : FUSE_INIT; ++ if (in->opcode != expected) ++ goto reply_err; ++ } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) ++ goto reply_err; ++ ++ err = EACCES; ++ /* Implement -o allow_root */ ++ if (se->deny_others && in->uid != se->owner && in->uid != 0 && ++ in->opcode != FUSE_INIT && in->opcode != FUSE_READ && ++ in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC && ++ in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR && ++ in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR && ++ in->opcode != FUSE_NOTIFY_REPLY && ++ in->opcode != FUSE_READDIRPLUS) ++ goto reply_err; ++ ++ err = ENOSYS; ++ if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) ++ goto reply_err; ++ if (in->opcode != FUSE_INTERRUPT) { ++ struct fuse_req *intr; ++ pthread_mutex_lock(&se->lock); ++ intr = check_interrupt(se, req); ++ list_add_req(req, &se->list); ++ pthread_mutex_unlock(&se->lock); ++ if (intr) ++ fuse_reply_err(intr, EAGAIN); ++ } ++ ++ if ((buf->flags & FUSE_BUF_IS_FD) && write_header_size < buf->size && ++ (in->opcode != FUSE_WRITE || !se->op.write_buf) && ++ in->opcode != FUSE_NOTIFY_REPLY) { ++ void *newmbuf; ++ ++ err = ENOMEM; ++ newmbuf = realloc(mbuf, buf->size); ++ if (newmbuf == NULL) ++ goto reply_err; ++ mbuf = newmbuf; ++ ++ tmpbuf = FUSE_BUFVEC_INIT(buf->size - write_header_size); ++ tmpbuf.buf[0].mem = (char *)mbuf + write_header_size; ++ ++ res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); ++ err = -res; ++ if (res < 0) ++ goto reply_err; ++ ++ in = mbuf; ++ } ++ ++ inarg = (void *) &in[1]; ++ if (in->opcode == FUSE_WRITE && se->op.write_buf) ++ do_write_buf(req, in->nodeid, inarg, buf); ++ else if (in->opcode == FUSE_NOTIFY_REPLY) ++ do_notify_reply(req, in->nodeid, inarg, buf); ++ else ++ fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); ++ ++out_free: ++ free(mbuf); ++ return; ++ ++reply_err: ++ fuse_reply_err(req, err); ++clear_pipe: ++ if (buf->flags & FUSE_BUF_IS_FD) ++ fuse_ll_clear_pipe(se); ++ goto out_free; ++} ++ ++#define LL_OPTION(n,o,v) \ ++ { n, offsetof(struct fuse_session, o), v } ++ ++static const struct fuse_opt fuse_ll_opts[] = { ++ LL_OPTION("debug", debug, 1), ++ LL_OPTION("-d", debug, 1), ++ LL_OPTION("--debug", debug, 1), ++ LL_OPTION("allow_root", deny_others, 1), ++ FUSE_OPT_END ++}; ++ ++void fuse_lowlevel_version(void) ++{ ++ printf("using FUSE kernel interface version %i.%i\n", ++ FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); ++ fuse_mount_version(); ++} ++ ++void fuse_lowlevel_help(void) ++{ ++ /* These are not all options, but the ones that are ++ potentially of interest to an end-user */ ++ printf( ++" -o allow_other allow access by all users\n" ++" -o allow_root allow access by root\n" ++" -o auto_unmount auto unmount on process termination\n"); ++} ++ ++void fuse_session_destroy(struct fuse_session *se) ++{ ++ struct fuse_ll_pipe *llp; ++ ++ if (se->got_init && !se->got_destroy) { ++ if (se->op.destroy) ++ se->op.destroy(se->userdata); ++ } ++ llp = pthread_getspecific(se->pipe_key); ++ if (llp != NULL) ++ fuse_ll_pipe_free(llp); ++ pthread_key_delete(se->pipe_key); ++ pthread_mutex_destroy(&se->lock); ++ free(se->cuse_data); ++ if (se->fd != -1) ++ close(se->fd); ++ destroy_mount_opts(se->mo); ++ free(se); ++} ++ ++ ++static void fuse_ll_pipe_destructor(void *data) ++{ ++ struct fuse_ll_pipe *llp = data; ++ fuse_ll_pipe_free(llp); ++} ++ ++int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf) ++{ ++ return fuse_session_receive_buf_int(se, buf, NULL); ++} ++ ++int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, ++ struct fuse_chan *ch) ++{ ++ int err; ++ ssize_t res; ++#ifdef HAVE_SPLICE ++ size_t bufsize = se->bufsize; ++ struct fuse_ll_pipe *llp; ++ struct fuse_buf tmpbuf; ++ ++ if (se->conn.proto_minor < 14 || !(se->conn.want & FUSE_CAP_SPLICE_READ)) ++ goto fallback; ++ ++ llp = fuse_ll_get_pipe(se); ++ if (llp == NULL) ++ goto fallback; ++ ++ if (llp->size < bufsize) { ++ if (llp->can_grow) { ++ res = fcntl(llp->pipe[0], F_SETPIPE_SZ, bufsize); ++ if (res == -1) { ++ llp->can_grow = 0; ++ res = grow_pipe_to_max(llp->pipe[0]); ++ if (res > 0) ++ llp->size = res; ++ goto fallback; ++ } ++ llp->size = res; ++ } ++ if (llp->size < bufsize) ++ goto fallback; ++ } ++ ++ res = splice(ch ? ch->fd : se->fd, ++ NULL, llp->pipe[1], NULL, bufsize, 0); ++ err = errno; ++ ++ if (fuse_session_exited(se)) ++ return 0; ++ ++ if (res == -1) { ++ if (err == ENODEV) { ++ /* Filesystem was unmounted, or connection was aborted ++ via /sys/fs/fuse/connections */ ++ fuse_session_exit(se); ++ return 0; ++ } ++ if (err != EINTR && err != EAGAIN) ++ perror("fuse: splice from device"); ++ return -err; ++ } ++ ++ if (res < sizeof(struct fuse_in_header)) { ++ fuse_log(FUSE_LOG_ERR, "short splice from fuse device\n"); ++ return -EIO; ++ } ++ ++ tmpbuf = (struct fuse_buf) { ++ .size = res, ++ .flags = FUSE_BUF_IS_FD, ++ .fd = llp->pipe[0], ++ }; ++ ++ /* ++ * Don't bother with zero copy for small requests. ++ * fuse_loop_mt() needs to check for FORGET so this more than ++ * just an optimization. ++ */ ++ if (res < sizeof(struct fuse_in_header) + ++ sizeof(struct fuse_write_in) + pagesize) { ++ struct fuse_bufvec src = { .buf[0] = tmpbuf, .count = 1 }; ++ struct fuse_bufvec dst = { .count = 1 }; ++ ++ if (!buf->mem) { ++ buf->mem = malloc(se->bufsize); ++ if (!buf->mem) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: failed to allocate read buffer\n"); ++ return -ENOMEM; ++ } ++ } ++ buf->size = se->bufsize; ++ buf->flags = 0; ++ dst.buf[0] = *buf; ++ ++ res = fuse_buf_copy(&dst, &src, 0); ++ if (res < 0) { ++ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", ++ strerror(-res)); ++ fuse_ll_clear_pipe(se); ++ return res; ++ } ++ if (res < tmpbuf.size) { ++ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); ++ fuse_ll_clear_pipe(se); ++ return -EIO; ++ } ++ assert(res == tmpbuf.size); ++ ++ } else { ++ /* Don't overwrite buf->mem, as that would cause a leak */ ++ buf->fd = tmpbuf.fd; ++ buf->flags = tmpbuf.flags; ++ } ++ buf->size = tmpbuf.size; ++ ++ return res; ++ ++fallback: ++#endif ++ if (!buf->mem) { ++ buf->mem = malloc(se->bufsize); ++ if (!buf->mem) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: failed to allocate read buffer\n"); ++ return -ENOMEM; ++ } ++ } ++ ++restart: ++ res = read(ch ? ch->fd : se->fd, buf->mem, se->bufsize); ++ err = errno; ++ ++ if (fuse_session_exited(se)) ++ return 0; ++ if (res == -1) { ++ /* ENOENT means the operation was interrupted, it's safe ++ to restart */ ++ if (err == ENOENT) ++ goto restart; ++ ++ if (err == ENODEV) { ++ /* Filesystem was unmounted, or connection was aborted ++ via /sys/fs/fuse/connections */ ++ fuse_session_exit(se); ++ return 0; ++ } ++ /* Errors occurring during normal operation: EINTR (read ++ interrupted), EAGAIN (nonblocking I/O), ENODEV (filesystem ++ umounted) */ ++ if (err != EINTR && err != EAGAIN) ++ perror("fuse: reading device"); ++ return -err; ++ } ++ if ((size_t) res < sizeof(struct fuse_in_header)) { ++ fuse_log(FUSE_LOG_ERR, "short read on fuse device\n"); ++ return -EIO; ++ } ++ ++ buf->size = res; ++ ++ return res; ++} ++ ++struct fuse_session *fuse_session_new(struct fuse_args *args, ++ const struct fuse_lowlevel_ops *op, ++ size_t op_size, void *userdata) ++{ ++ int err; ++ struct fuse_session *se; ++ struct mount_opts *mo; ++ ++ if (sizeof(struct fuse_lowlevel_ops) < op_size) { ++ fuse_log(FUSE_LOG_ERR, "fuse: warning: library too old, some operations may not work\n"); ++ op_size = sizeof(struct fuse_lowlevel_ops); ++ } ++ ++ if (args->argc == 0) { ++ fuse_log(FUSE_LOG_ERR, "fuse: empty argv passed to fuse_session_new().\n"); ++ return NULL; ++ } ++ ++ se = (struct fuse_session *) calloc(1, sizeof(struct fuse_session)); ++ if (se == NULL) { ++ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n"); ++ goto out1; ++ } ++ se->fd = -1; ++ se->conn.max_write = UINT_MAX; ++ se->conn.max_readahead = UINT_MAX; ++ ++ /* Parse options */ ++ if(fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) ++ goto out2; ++ if(se->deny_others) { ++ /* Allowing access only by root is done by instructing ++ * kernel to allow access by everyone, and then restricting ++ * access to root and mountpoint owner in libfuse. ++ */ ++ // We may be adding the option a second time, but ++ // that doesn't hurt. ++ if(fuse_opt_add_arg(args, "-oallow_other") == -1) ++ goto out2; ++ } ++ mo = parse_mount_opts(args); ++ if (mo == NULL) ++ goto out3; ++ ++ if(args->argc == 1 && ++ args->argv[0][0] == '-') { ++ fuse_log(FUSE_LOG_ERR, "fuse: warning: argv[0] looks like an option, but " ++ "will be ignored\n"); ++ } else if (args->argc != 1) { ++ int i; ++ fuse_log(FUSE_LOG_ERR, "fuse: unknown option(s): `"); ++ for(i = 1; i < args->argc-1; i++) ++ fuse_log(FUSE_LOG_ERR, "%s ", args->argv[i]); ++ fuse_log(FUSE_LOG_ERR, "%s'\n", args->argv[i]); ++ goto out4; ++ } ++ ++ if (se->debug) ++ fuse_log(FUSE_LOG_DEBUG, "FUSE library version: %s\n", PACKAGE_VERSION); ++ ++ se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + ++ FUSE_BUFFER_HEADER_SIZE; ++ ++ list_init_req(&se->list); ++ list_init_req(&se->interrupts); ++ list_init_nreq(&se->notify_list); ++ se->notify_ctr = 1; ++ fuse_mutex_init(&se->lock); ++ ++ err = pthread_key_create(&se->pipe_key, fuse_ll_pipe_destructor); ++ if (err) { ++ fuse_log(FUSE_LOG_ERR, "fuse: failed to create thread specific key: %s\n", ++ strerror(err)); ++ goto out5; ++ } ++ ++ memcpy(&se->op, op, op_size); ++ se->owner = getuid(); ++ se->userdata = userdata; ++ ++ se->mo = mo; ++ return se; ++ ++out5: ++ pthread_mutex_destroy(&se->lock); ++out4: ++ fuse_opt_free_args(args); ++out3: ++ free(mo); ++out2: ++ free(se); ++out1: ++ return NULL; ++} ++ ++int fuse_session_mount(struct fuse_session *se, const char *mountpoint) ++{ ++ int fd; ++ ++ /* ++ * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos ++ * would ensue. ++ */ ++ do { ++ fd = open("/dev/null", O_RDWR); ++ if (fd > 2) ++ close(fd); ++ } while (fd >= 0 && fd <= 2); ++ ++ /* ++ * To allow FUSE daemons to run without privileges, the caller may open ++ * /dev/fuse before launching the file system and pass on the file ++ * descriptor by specifying /dev/fd/N as the mount point. Note that the ++ * parent process takes care of performing the mount in this case. ++ */ ++ fd = fuse_mnt_parse_fuse_fd(mountpoint); ++ if (fd != -1) { ++ if (fcntl(fd, F_GETFD) == -1) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: Invalid file descriptor /dev/fd/%u\n", ++ fd); ++ return -1; ++ } ++ se->fd = fd; ++ return 0; ++ } ++ ++ /* Open channel */ ++ fd = fuse_kern_mount(mountpoint, se->mo); ++ if (fd == -1) ++ return -1; ++ se->fd = fd; ++ ++ /* Save mountpoint */ ++ se->mountpoint = strdup(mountpoint); ++ if (se->mountpoint == NULL) ++ goto error_out; ++ ++ return 0; ++ ++error_out: ++ fuse_kern_unmount(mountpoint, fd); ++ return -1; ++} ++ ++int fuse_session_fd(struct fuse_session *se) ++{ ++ return se->fd; ++} ++ ++void fuse_session_unmount(struct fuse_session *se) ++{ ++ if (se->mountpoint != NULL) { ++ fuse_kern_unmount(se->mountpoint, se->fd); ++ free(se->mountpoint); ++ se->mountpoint = NULL; ++ } ++} ++ ++#ifdef linux ++int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) ++{ ++ char *buf; ++ size_t bufsize = 1024; ++ char path[128]; ++ int ret; ++ int fd; ++ unsigned long pid = req->ctx.pid; ++ char *s; ++ ++ sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); ++ ++retry: ++ buf = malloc(bufsize); ++ if (buf == NULL) ++ return -ENOMEM; ++ ++ ret = -EIO; ++ fd = open(path, O_RDONLY); ++ if (fd == -1) ++ goto out_free; ++ ++ ret = read(fd, buf, bufsize); ++ close(fd); ++ if (ret < 0) { ++ ret = -EIO; ++ goto out_free; ++ } ++ ++ if ((size_t)ret == bufsize) { ++ free(buf); ++ bufsize *= 4; ++ goto retry; ++ } ++ ++ ret = -EIO; ++ s = strstr(buf, "\nGroups:"); ++ if (s == NULL) ++ goto out_free; ++ ++ s += 8; ++ ret = 0; ++ while (1) { ++ char *end; ++ unsigned long val = strtoul(s, &end, 0); ++ if (end == s) ++ break; ++ ++ s = end; ++ if (ret < size) ++ list[ret] = val; ++ ret++; ++ } ++ ++out_free: ++ free(buf); ++ return ret; ++} ++#else /* linux */ ++/* ++ * This is currently not implemented on other than Linux... ++ */ ++int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) ++{ ++ (void) req; (void) size; (void) list; ++ return -ENOSYS; ++} ++#endif ++ ++void fuse_session_exit(struct fuse_session *se) ++{ ++ se->exited = 1; ++} ++ ++void fuse_session_reset(struct fuse_session *se) ++{ ++ se->exited = 0; ++ se->error = 0; ++} ++ ++int fuse_session_exited(struct fuse_session *se) ++{ ++ return se->exited; ++} +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Add-main-virtio-loop.patch b/SOURCES/kvm-virtiofsd-Add-main-virtio-loop.patch new file mode 100644 index 0000000..c0ba96a --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Add-main-virtio-loop.patch @@ -0,0 +1,105 @@ +From 6f413d8b76ff38e5bc01f36515ca71d7fd6e6144 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:58 +0100 +Subject: [PATCH 027/116] virtiofsd: Add main virtio loop +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-24-dgilbert@redhat.com> +Patchwork-id: 93475 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 023/112] virtiofsd: Add main virtio loop +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Processes incoming requests on the vhost-user fd. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 204d8ae57b3c57098642c79b3c03d42495149c09) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 42 +++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 39 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 2ae3c76..1928a20 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -11,12 +11,14 @@ + * See the file COPYING.LIB + */ + ++#include "fuse_virtio.h" + #include "fuse_i.h" + #include "standard-headers/linux/fuse.h" + #include "fuse_misc.h" + #include "fuse_opt.h" +-#include "fuse_virtio.h" + ++#include ++#include + #include + #include + #include +@@ -80,15 +82,49 @@ static const VuDevIface fv_iface = { + .queue_is_processed_in_order = fv_queue_order, + }; + ++/* ++ * Main loop; this mostly deals with events on the vhost-user ++ * socket itself, and not actual fuse data. ++ */ + int virtio_loop(struct fuse_session *se) + { + fuse_log(FUSE_LOG_INFO, "%s: Entry\n", __func__); + +- while (1) { +- /* TODO: Add stuffing */ ++ while (!fuse_session_exited(se)) { ++ struct pollfd pf[1]; ++ pf[0].fd = se->vu_socketfd; ++ pf[0].events = POLLIN; ++ pf[0].revents = 0; ++ ++ fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for VU event\n", __func__); ++ int poll_res = ppoll(pf, 1, NULL, NULL); ++ ++ if (poll_res == -1) { ++ if (errno == EINTR) { ++ fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n", ++ __func__); ++ continue; ++ } ++ fuse_log(FUSE_LOG_ERR, "virtio_loop ppoll: %m\n"); ++ break; ++ } ++ assert(poll_res == 1); ++ if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) { ++ fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x\n", __func__, ++ pf[0].revents); ++ break; ++ } ++ assert(pf[0].revents & POLLIN); ++ fuse_log(FUSE_LOG_DEBUG, "%s: Got VU event\n", __func__); ++ if (!vu_dispatch(&se->virtio_dev->dev)) { ++ fuse_log(FUSE_LOG_ERR, "%s: vu_dispatch failed\n", __func__); ++ break; ++ } + } + + fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__); ++ ++ return 0; + } + + int virtio_session_mount(struct fuse_session *se) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Add-options-for-virtio.patch b/SOURCES/kvm-virtiofsd-Add-options-for-virtio.patch new file mode 100644 index 0000000..8ac7fa7 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Add-options-for-virtio.patch @@ -0,0 +1,103 @@ +From 9c1bbe327cf8f88ffc78eed0fce8cdd6f3f006ef Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:54 +0100 +Subject: [PATCH 023/116] virtiofsd: Add options for virtio +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-20-dgilbert@redhat.com> +Patchwork-id: 93473 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 019/112] virtiofsd: Add options for virtio +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Add options to specify parameters for virtio-fs paths, i.e. + + ./virtiofsd -o vhost_user_socket=/tmp/vhostqemu + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 205de006aab8dcbe546a7e3a51d295c2d05e654b) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 1 + + tools/virtiofsd/fuse_lowlevel.c | 11 ++++++++--- + tools/virtiofsd/helper.c | 14 +++++++------- + 3 files changed, 16 insertions(+), 10 deletions(-) + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index bae0699..26b1a7d 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -63,6 +63,7 @@ struct fuse_session { + struct fuse_notify_req notify_list; + size_t bufsize; + int error; ++ char *vu_socket_path; + }; + + struct fuse_chan { +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 8552cfb..17e8718 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2115,8 +2115,11 @@ reply_err: + } + + static const struct fuse_opt fuse_ll_opts[] = { +- LL_OPTION("debug", debug, 1), LL_OPTION("-d", debug, 1), +- LL_OPTION("--debug", debug, 1), LL_OPTION("allow_root", deny_others, 1), ++ LL_OPTION("debug", debug, 1), ++ LL_OPTION("-d", debug, 1), ++ LL_OPTION("--debug", debug, 1), ++ LL_OPTION("allow_root", deny_others, 1), ++ LL_OPTION("--socket-path=%s", vu_socket_path, 0), + FUSE_OPT_END + }; + +@@ -2132,7 +2135,9 @@ void fuse_lowlevel_help(void) + * These are not all options, but the ones that are + * potentially of interest to an end-user + */ +- printf(" -o allow_root allow access by root\n"); ++ printf( ++ " -o allow_root allow access by root\n" ++ " --socket-path=PATH path for the vhost-user socket\n"); + } + + void fuse_session_destroy(struct fuse_session *se) +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 9333691..676032e 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -127,13 +127,13 @@ static const struct fuse_opt conn_info_opt_spec[] = { + + void fuse_cmdline_help(void) + { +- printf( +- " -h --help print help\n" +- " -V --version print version\n" +- " -d -o debug enable debug output (implies -f)\n" +- " -f foreground operation\n" +- " -o max_idle_threads the maximum number of idle worker threads\n" +- " allowed (default: 10)\n"); ++ printf(" -h --help print help\n" ++ " -V --version print version\n" ++ " -d -o debug enable debug output (implies -f)\n" ++ " -f foreground operation\n" ++ " -o max_idle_threads the maximum number of idle worker " ++ "threads\n" ++ " allowed (default: 10)\n"); + } + + static int fuse_helper_opt_proc(void *data, const char *arg, int key, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Add-passthrough_ll.patch b/SOURCES/kvm-virtiofsd-Add-passthrough_ll.patch new file mode 100644 index 0000000..2510551 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Add-passthrough_ll.patch @@ -0,0 +1,1387 @@ +From 18ef831cac81a6bd2336c73dda357d9d69f8fd25 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:43 +0100 +Subject: [PATCH 012/116] virtiofsd: Add passthrough_ll +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-9-dgilbert@redhat.com> +Patchwork-id: 93462 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 008/112] virtiofsd: Add passthrough_ll +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +passthrough_ll is one of the examples in the upstream fuse project +and is the main part of our daemon here. It passes through requests +from fuse to the underlying filesystem, using syscalls as directly +as possible. + +>From libfuse fuse-3.8.0 + +Signed-off-by: Dr. David Alan Gilbert + Fixed up 'GPL' to 'GPLv2' as per Dan's comments and consistent + with the 'LICENSE' file in libfuse; patch sent to libfuse to fix + it upstream. +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 7c6b66027241f41720240fc6ee1021cdbd975b2e) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 1338 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 1338 insertions(+) + create mode 100644 tools/virtiofsd/passthrough_ll.c + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +new file mode 100644 +index 0000000..e1a6056 +--- /dev/null ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -0,0 +1,1338 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU GPLv2. ++ See the file COPYING. ++*/ ++ ++/** @file ++ * ++ * This file system mirrors the existing file system hierarchy of the ++ * system, starting at the root file system. This is implemented by ++ * just "passing through" all requests to the corresponding user-space ++ * libc functions. In contrast to passthrough.c and passthrough_fh.c, ++ * this implementation uses the low-level API. Its performance should ++ * be the least bad among the three, but many operations are not ++ * implemented. In particular, it is not possible to remove files (or ++ * directories) because the code necessary to defer actual removal ++ * until the file is not opened anymore would make the example much ++ * more complicated. ++ * ++ * When writeback caching is enabled (-o writeback mount option), it ++ * is only possible to write to files for which the mounting user has ++ * read permissions. This is because the writeback cache requires the ++ * kernel to be able to issue read requests for all files (which the ++ * passthrough filesystem cannot satisfy if it can't read the file in ++ * the underlying filesystem). ++ * ++ * Compile with: ++ * ++ * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o passthrough_ll ++ * ++ * ## Source code ## ++ * \include passthrough_ll.c ++ */ ++ ++#define _GNU_SOURCE ++#define FUSE_USE_VERSION 31 ++ ++#include "config.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "passthrough_helpers.h" ++ ++/* We are re-using pointers to our `struct lo_inode` and `struct ++ lo_dirp` elements as inodes. This means that we must be able to ++ store uintptr_t values in a fuse_ino_t variable. The following ++ incantation checks this condition at compile time. */ ++#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus ++_Static_assert(sizeof(fuse_ino_t) >= sizeof(uintptr_t), ++ "fuse_ino_t too small to hold uintptr_t values!"); ++#else ++struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct \ ++ { unsigned _uintptr_to_must_hold_fuse_ino_t: ++ ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); }; ++#endif ++ ++struct lo_inode { ++ struct lo_inode *next; /* protected by lo->mutex */ ++ struct lo_inode *prev; /* protected by lo->mutex */ ++ int fd; ++ bool is_symlink; ++ ino_t ino; ++ dev_t dev; ++ uint64_t refcount; /* protected by lo->mutex */ ++}; ++ ++enum { ++ CACHE_NEVER, ++ CACHE_NORMAL, ++ CACHE_ALWAYS, ++}; ++ ++struct lo_data { ++ pthread_mutex_t mutex; ++ int debug; ++ int writeback; ++ int flock; ++ int xattr; ++ const char *source; ++ double timeout; ++ int cache; ++ int timeout_set; ++ struct lo_inode root; /* protected by lo->mutex */ ++}; ++ ++static const struct fuse_opt lo_opts[] = { ++ { "writeback", ++ offsetof(struct lo_data, writeback), 1 }, ++ { "no_writeback", ++ offsetof(struct lo_data, writeback), 0 }, ++ { "source=%s", ++ offsetof(struct lo_data, source), 0 }, ++ { "flock", ++ offsetof(struct lo_data, flock), 1 }, ++ { "no_flock", ++ offsetof(struct lo_data, flock), 0 }, ++ { "xattr", ++ offsetof(struct lo_data, xattr), 1 }, ++ { "no_xattr", ++ offsetof(struct lo_data, xattr), 0 }, ++ { "timeout=%lf", ++ offsetof(struct lo_data, timeout), 0 }, ++ { "timeout=", ++ offsetof(struct lo_data, timeout_set), 1 }, ++ { "cache=never", ++ offsetof(struct lo_data, cache), CACHE_NEVER }, ++ { "cache=auto", ++ offsetof(struct lo_data, cache), CACHE_NORMAL }, ++ { "cache=always", ++ offsetof(struct lo_data, cache), CACHE_ALWAYS }, ++ ++ FUSE_OPT_END ++}; ++ ++static struct lo_data *lo_data(fuse_req_t req) ++{ ++ return (struct lo_data *) fuse_req_userdata(req); ++} ++ ++static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) ++{ ++ if (ino == FUSE_ROOT_ID) ++ return &lo_data(req)->root; ++ else ++ return (struct lo_inode *) (uintptr_t) ino; ++} ++ ++static int lo_fd(fuse_req_t req, fuse_ino_t ino) ++{ ++ return lo_inode(req, ino)->fd; ++} ++ ++static bool lo_debug(fuse_req_t req) ++{ ++ return lo_data(req)->debug != 0; ++} ++ ++static void lo_init(void *userdata, ++ struct fuse_conn_info *conn) ++{ ++ struct lo_data *lo = (struct lo_data*) userdata; ++ ++ if(conn->capable & FUSE_CAP_EXPORT_SUPPORT) ++ conn->want |= FUSE_CAP_EXPORT_SUPPORT; ++ ++ if (lo->writeback && ++ conn->capable & FUSE_CAP_WRITEBACK_CACHE) { ++ if (lo->debug) ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); ++ conn->want |= FUSE_CAP_WRITEBACK_CACHE; ++ } ++ if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { ++ if (lo->debug) ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); ++ conn->want |= FUSE_CAP_FLOCK_LOCKS; ++ } ++} ++ ++static void lo_getattr(fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi) ++{ ++ int res; ++ struct stat buf; ++ struct lo_data *lo = lo_data(req); ++ ++ (void) fi; ++ ++ res = fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) ++ return (void) fuse_reply_err(req, errno); ++ ++ fuse_reply_attr(req, &buf, lo->timeout); ++} ++ ++static int utimensat_empty_nofollow(struct lo_inode *inode, ++ const struct timespec *tv) ++{ ++ int res; ++ char procname[64]; ++ ++ if (inode->is_symlink) { ++ res = utimensat(inode->fd, "", tv, ++ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1 && errno == EINVAL) { ++ /* Sorry, no race free way to set times on symlink. */ ++ errno = EPERM; ++ } ++ return res; ++ } ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ return utimensat(AT_FDCWD, procname, tv, 0); ++} ++ ++static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, ++ int valid, struct fuse_file_info *fi) ++{ ++ int saverr; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ int ifd = inode->fd; ++ int res; ++ ++ if (valid & FUSE_SET_ATTR_MODE) { ++ if (fi) { ++ res = fchmod(fi->fh, attr->st_mode); ++ } else { ++ sprintf(procname, "/proc/self/fd/%i", ifd); ++ res = chmod(procname, attr->st_mode); ++ } ++ if (res == -1) ++ goto out_err; ++ } ++ if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { ++ uid_t uid = (valid & FUSE_SET_ATTR_UID) ? ++ attr->st_uid : (uid_t) -1; ++ gid_t gid = (valid & FUSE_SET_ATTR_GID) ? ++ attr->st_gid : (gid_t) -1; ++ ++ res = fchownat(ifd, "", uid, gid, ++ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) ++ goto out_err; ++ } ++ if (valid & FUSE_SET_ATTR_SIZE) { ++ if (fi) { ++ res = ftruncate(fi->fh, attr->st_size); ++ } else { ++ sprintf(procname, "/proc/self/fd/%i", ifd); ++ res = truncate(procname, attr->st_size); ++ } ++ if (res == -1) ++ goto out_err; ++ } ++ if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { ++ struct timespec tv[2]; ++ ++ tv[0].tv_sec = 0; ++ tv[1].tv_sec = 0; ++ tv[0].tv_nsec = UTIME_OMIT; ++ tv[1].tv_nsec = UTIME_OMIT; ++ ++ if (valid & FUSE_SET_ATTR_ATIME_NOW) ++ tv[0].tv_nsec = UTIME_NOW; ++ else if (valid & FUSE_SET_ATTR_ATIME) ++ tv[0] = attr->st_atim; ++ ++ if (valid & FUSE_SET_ATTR_MTIME_NOW) ++ tv[1].tv_nsec = UTIME_NOW; ++ else if (valid & FUSE_SET_ATTR_MTIME) ++ tv[1] = attr->st_mtim; ++ ++ if (fi) ++ res = futimens(fi->fh, tv); ++ else ++ res = utimensat_empty_nofollow(inode, tv); ++ if (res == -1) ++ goto out_err; ++ } ++ ++ return lo_getattr(req, ino, fi); ++ ++out_err: ++ saverr = errno; ++ fuse_reply_err(req, saverr); ++} ++ ++static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) ++{ ++ struct lo_inode *p; ++ struct lo_inode *ret = NULL; ++ ++ pthread_mutex_lock(&lo->mutex); ++ for (p = lo->root.next; p != &lo->root; p = p->next) { ++ if (p->ino == st->st_ino && p->dev == st->st_dev) { ++ assert(p->refcount > 0); ++ ret = p; ++ ret->refcount++; ++ break; ++ } ++ } ++ pthread_mutex_unlock(&lo->mutex); ++ return ret; ++} ++ ++static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, ++ struct fuse_entry_param *e) ++{ ++ int newfd; ++ int res; ++ int saverr; ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode; ++ ++ memset(e, 0, sizeof(*e)); ++ e->attr_timeout = lo->timeout; ++ e->entry_timeout = lo->timeout; ++ ++ newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); ++ if (newfd == -1) ++ goto out_err; ++ ++ res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) ++ goto out_err; ++ ++ inode = lo_find(lo_data(req), &e->attr); ++ if (inode) { ++ close(newfd); ++ newfd = -1; ++ } else { ++ struct lo_inode *prev, *next; ++ ++ saverr = ENOMEM; ++ inode = calloc(1, sizeof(struct lo_inode)); ++ if (!inode) ++ goto out_err; ++ ++ inode->is_symlink = S_ISLNK(e->attr.st_mode); ++ inode->refcount = 1; ++ inode->fd = newfd; ++ inode->ino = e->attr.st_ino; ++ inode->dev = e->attr.st_dev; ++ ++ pthread_mutex_lock(&lo->mutex); ++ prev = &lo->root; ++ next = prev->next; ++ next->prev = inode; ++ inode->next = next; ++ inode->prev = prev; ++ prev->next = inode; ++ pthread_mutex_unlock(&lo->mutex); ++ } ++ e->ino = (uintptr_t) inode; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", ++ (unsigned long long) parent, name, (unsigned long long) e->ino); ++ ++ return 0; ++ ++out_err: ++ saverr = errno; ++ if (newfd != -1) ++ close(newfd); ++ return saverr; ++} ++ ++static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) ++{ ++ struct fuse_entry_param e; ++ int err; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", ++ parent, name); ++ ++ err = lo_do_lookup(req, parent, name, &e); ++ if (err) ++ fuse_reply_err(req, err); ++ else ++ fuse_reply_entry(req, &e); ++} ++ ++static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, ++ const char *name, mode_t mode, dev_t rdev, ++ const char *link) ++{ ++ int res; ++ int saverr; ++ struct lo_inode *dir = lo_inode(req, parent); ++ struct fuse_entry_param e; ++ ++ saverr = ENOMEM; ++ ++ res = mknod_wrapper(dir->fd, name, link, mode, rdev); ++ ++ saverr = errno; ++ if (res == -1) ++ goto out; ++ ++ saverr = lo_do_lookup(req, parent, name, &e); ++ if (saverr) ++ goto out; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", ++ (unsigned long long) parent, name, (unsigned long long) e.ino); ++ ++ fuse_reply_entry(req, &e); ++ return; ++ ++out: ++ fuse_reply_err(req, saverr); ++} ++ ++static void lo_mknod(fuse_req_t req, fuse_ino_t parent, ++ const char *name, mode_t mode, dev_t rdev) ++{ ++ lo_mknod_symlink(req, parent, name, mode, rdev, NULL); ++} ++ ++static void lo_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode) ++{ ++ lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL); ++} ++ ++static void lo_symlink(fuse_req_t req, const char *link, ++ fuse_ino_t parent, const char *name) ++{ ++ lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); ++} ++ ++static int linkat_empty_nofollow(struct lo_inode *inode, int dfd, ++ const char *name) ++{ ++ int res; ++ char procname[64]; ++ ++ if (inode->is_symlink) { ++ res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); ++ if (res == -1 && (errno == ENOENT || errno == EINVAL)) { ++ /* Sorry, no race free way to hard-link a symlink. */ ++ errno = EPERM; ++ } ++ return res; ++ } ++ ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); ++} ++ ++static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, ++ const char *name) ++{ ++ int res; ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode = lo_inode(req, ino); ++ struct fuse_entry_param e; ++ int saverr; ++ ++ memset(&e, 0, sizeof(struct fuse_entry_param)); ++ e.attr_timeout = lo->timeout; ++ e.entry_timeout = lo->timeout; ++ ++ res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); ++ if (res == -1) ++ goto out_err; ++ ++ res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) ++ goto out_err; ++ ++ pthread_mutex_lock(&lo->mutex); ++ inode->refcount++; ++ pthread_mutex_unlock(&lo->mutex); ++ e.ino = (uintptr_t) inode; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", ++ (unsigned long long) parent, name, ++ (unsigned long long) e.ino); ++ ++ fuse_reply_entry(req, &e); ++ return; ++ ++out_err: ++ saverr = errno; ++ fuse_reply_err(req, saverr); ++} ++ ++static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) ++{ ++ int res; ++ ++ res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); ++ ++ fuse_reply_err(req, res == -1 ? errno : 0); ++} ++ ++static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, ++ fuse_ino_t newparent, const char *newname, ++ unsigned int flags) ++{ ++ int res; ++ ++ if (flags) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ ++ res = renameat(lo_fd(req, parent), name, ++ lo_fd(req, newparent), newname); ++ ++ fuse_reply_err(req, res == -1 ? errno : 0); ++} ++ ++static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) ++{ ++ int res; ++ ++ res = unlinkat(lo_fd(req, parent), name, 0); ++ ++ fuse_reply_err(req, res == -1 ? errno : 0); ++} ++ ++static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) ++{ ++ if (!inode) ++ return; ++ ++ pthread_mutex_lock(&lo->mutex); ++ assert(inode->refcount >= n); ++ inode->refcount -= n; ++ if (!inode->refcount) { ++ struct lo_inode *prev, *next; ++ ++ prev = inode->prev; ++ next = inode->next; ++ next->prev = prev; ++ prev->next = next; ++ ++ pthread_mutex_unlock(&lo->mutex); ++ close(inode->fd); ++ free(inode); ++ ++ } else { ++ pthread_mutex_unlock(&lo->mutex); ++ } ++} ++ ++static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) ++{ ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode = lo_inode(req, ino); ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", ++ (unsigned long long) ino, ++ (unsigned long long) inode->refcount, ++ (unsigned long long) nlookup); ++ } ++ ++ unref_inode(lo, inode, nlookup); ++} ++ ++static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) ++{ ++ lo_forget_one(req, ino, nlookup); ++ fuse_reply_none(req); ++} ++ ++static void lo_forget_multi(fuse_req_t req, size_t count, ++ struct fuse_forget_data *forgets) ++{ ++ int i; ++ ++ for (i = 0; i < count; i++) ++ lo_forget_one(req, forgets[i].ino, forgets[i].nlookup); ++ fuse_reply_none(req); ++} ++ ++static void lo_readlink(fuse_req_t req, fuse_ino_t ino) ++{ ++ char buf[PATH_MAX + 1]; ++ int res; ++ ++ res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf)); ++ if (res == -1) ++ return (void) fuse_reply_err(req, errno); ++ ++ if (res == sizeof(buf)) ++ return (void) fuse_reply_err(req, ENAMETOOLONG); ++ ++ buf[res] = '\0'; ++ ++ fuse_reply_readlink(req, buf); ++} ++ ++struct lo_dirp { ++ DIR *dp; ++ struct dirent *entry; ++ off_t offset; ++}; ++ ++static struct lo_dirp *lo_dirp(struct fuse_file_info *fi) ++{ ++ return (struct lo_dirp *) (uintptr_t) fi->fh; ++} ++ ++static void lo_opendir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++{ ++ int error = ENOMEM; ++ struct lo_data *lo = lo_data(req); ++ struct lo_dirp *d; ++ int fd; ++ ++ d = calloc(1, sizeof(struct lo_dirp)); ++ if (d == NULL) ++ goto out_err; ++ ++ fd = openat(lo_fd(req, ino), ".", O_RDONLY); ++ if (fd == -1) ++ goto out_errno; ++ ++ d->dp = fdopendir(fd); ++ if (d->dp == NULL) ++ goto out_errno; ++ ++ d->offset = 0; ++ d->entry = NULL; ++ ++ fi->fh = (uintptr_t) d; ++ if (lo->cache == CACHE_ALWAYS) ++ fi->keep_cache = 1; ++ fuse_reply_open(req, fi); ++ return; ++ ++out_errno: ++ error = errno; ++out_err: ++ if (d) { ++ if (fd != -1) ++ close(fd); ++ free(d); ++ } ++ fuse_reply_err(req, error); ++} ++ ++static int is_dot_or_dotdot(const char *name) ++{ ++ return name[0] == '.' && (name[1] == '\0' || ++ (name[1] == '.' && name[2] == '\0')); ++} ++ ++static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, ++ off_t offset, struct fuse_file_info *fi, int plus) ++{ ++ struct lo_dirp *d = lo_dirp(fi); ++ char *buf; ++ char *p; ++ size_t rem = size; ++ int err; ++ ++ (void) ino; ++ ++ buf = calloc(1, size); ++ if (!buf) { ++ err = ENOMEM; ++ goto error; ++ } ++ p = buf; ++ ++ if (offset != d->offset) { ++ seekdir(d->dp, offset); ++ d->entry = NULL; ++ d->offset = offset; ++ } ++ while (1) { ++ size_t entsize; ++ off_t nextoff; ++ const char *name; ++ ++ if (!d->entry) { ++ errno = 0; ++ d->entry = readdir(d->dp); ++ if (!d->entry) { ++ if (errno) { // Error ++ err = errno; ++ goto error; ++ } else { // End of stream ++ break; ++ } ++ } ++ } ++ nextoff = d->entry->d_off; ++ name = d->entry->d_name; ++ fuse_ino_t entry_ino = 0; ++ if (plus) { ++ struct fuse_entry_param e; ++ if (is_dot_or_dotdot(name)) { ++ e = (struct fuse_entry_param) { ++ .attr.st_ino = d->entry->d_ino, ++ .attr.st_mode = d->entry->d_type << 12, ++ }; ++ } else { ++ err = lo_do_lookup(req, ino, name, &e); ++ if (err) ++ goto error; ++ entry_ino = e.ino; ++ } ++ ++ entsize = fuse_add_direntry_plus(req, p, rem, name, ++ &e, nextoff); ++ } else { ++ struct stat st = { ++ .st_ino = d->entry->d_ino, ++ .st_mode = d->entry->d_type << 12, ++ }; ++ entsize = fuse_add_direntry(req, p, rem, name, ++ &st, nextoff); ++ } ++ if (entsize > rem) { ++ if (entry_ino != 0) ++ lo_forget_one(req, entry_ino, 1); ++ break; ++ } ++ ++ p += entsize; ++ rem -= entsize; ++ ++ d->entry = NULL; ++ d->offset = nextoff; ++ } ++ ++ err = 0; ++error: ++ // If there's an error, we can only signal it if we haven't stored ++ // any entries yet - otherwise we'd end up with wrong lookup ++ // counts for the entries that are already in the buffer. So we ++ // return what we've collected until that point. ++ if (err && rem == size) ++ fuse_reply_err(req, err); ++ else ++ fuse_reply_buf(req, buf, size - rem); ++ free(buf); ++} ++ ++static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, ++ off_t offset, struct fuse_file_info *fi) ++{ ++ lo_do_readdir(req, ino, size, offset, fi, 0); ++} ++ ++static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, ++ off_t offset, struct fuse_file_info *fi) ++{ ++ lo_do_readdir(req, ino, size, offset, fi, 1); ++} ++ ++static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++{ ++ struct lo_dirp *d = lo_dirp(fi); ++ (void) ino; ++ closedir(d->dp); ++ free(d); ++ fuse_reply_err(req, 0); ++} ++ ++static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode, struct fuse_file_info *fi) ++{ ++ int fd; ++ struct lo_data *lo = lo_data(req); ++ struct fuse_entry_param e; ++ int err; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", ++ parent, name); ++ ++ fd = openat(lo_fd(req, parent), name, ++ (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode); ++ if (fd == -1) ++ return (void) fuse_reply_err(req, errno); ++ ++ fi->fh = fd; ++ if (lo->cache == CACHE_NEVER) ++ fi->direct_io = 1; ++ else if (lo->cache == CACHE_ALWAYS) ++ fi->keep_cache = 1; ++ ++ err = lo_do_lookup(req, parent, name, &e); ++ if (err) ++ fuse_reply_err(req, err); ++ else ++ fuse_reply_create(req, &e, fi); ++} ++ ++static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, ++ struct fuse_file_info *fi) ++{ ++ int res; ++ int fd = dirfd(lo_dirp(fi)->dp); ++ (void) ino; ++ if (datasync) ++ res = fdatasync(fd); ++ else ++ res = fsync(fd); ++ fuse_reply_err(req, res == -1 ? errno : 0); ++} ++ ++static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++{ ++ int fd; ++ char buf[64]; ++ struct lo_data *lo = lo_data(req); ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ++ ino, fi->flags); ++ ++ /* With writeback cache, kernel may send read requests even ++ when userspace opened write-only */ ++ if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { ++ fi->flags &= ~O_ACCMODE; ++ fi->flags |= O_RDWR; ++ } ++ ++ /* With writeback cache, O_APPEND is handled by the kernel. ++ This breaks atomicity (since the file may change in the ++ underlying filesystem, so that the kernel's idea of the ++ end of the file isn't accurate anymore). In this example, ++ we just accept that. A more rigorous filesystem may want ++ to return an error here */ ++ if (lo->writeback && (fi->flags & O_APPEND)) ++ fi->flags &= ~O_APPEND; ++ ++ sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); ++ fd = open(buf, fi->flags & ~O_NOFOLLOW); ++ if (fd == -1) ++ return (void) fuse_reply_err(req, errno); ++ ++ fi->fh = fd; ++ if (lo->cache == CACHE_NEVER) ++ fi->direct_io = 1; ++ else if (lo->cache == CACHE_ALWAYS) ++ fi->keep_cache = 1; ++ fuse_reply_open(req, fi); ++} ++ ++static void lo_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++{ ++ (void) ino; ++ ++ close(fi->fh); ++ fuse_reply_err(req, 0); ++} ++ ++static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++{ ++ int res; ++ (void) ino; ++ res = close(dup(fi->fh)); ++ fuse_reply_err(req, res == -1 ? errno : 0); ++} ++ ++static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, ++ struct fuse_file_info *fi) ++{ ++ int res; ++ (void) ino; ++ if (datasync) ++ res = fdatasync(fi->fh); ++ else ++ res = fsync(fi->fh); ++ fuse_reply_err(req, res == -1 ? errno : 0); ++} ++ ++static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, ++ off_t offset, struct fuse_file_info *fi) ++{ ++ struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, "lo_read(ino=%" PRIu64 ", size=%zd, " ++ "off=%lu)\n", ino, size, (unsigned long) offset); ++ ++ buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; ++ buf.buf[0].fd = fi->fh; ++ buf.buf[0].pos = offset; ++ ++ fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); ++} ++ ++static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, ++ struct fuse_bufvec *in_buf, off_t off, ++ struct fuse_file_info *fi) ++{ ++ (void) ino; ++ ssize_t res; ++ struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); ++ ++ out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; ++ out_buf.buf[0].fd = fi->fh; ++ out_buf.buf[0].pos = off; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ++ ino, out_buf.buf[0].size, (unsigned long) off); ++ ++ res = fuse_buf_copy(&out_buf, in_buf, 0); ++ if(res < 0) ++ fuse_reply_err(req, -res); ++ else ++ fuse_reply_write(req, (size_t) res); ++} ++ ++static void lo_statfs(fuse_req_t req, fuse_ino_t ino) ++{ ++ int res; ++ struct statvfs stbuf; ++ ++ res = fstatvfs(lo_fd(req, ino), &stbuf); ++ if (res == -1) ++ fuse_reply_err(req, errno); ++ else ++ fuse_reply_statfs(req, &stbuf); ++} ++ ++static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, ++ off_t offset, off_t length, struct fuse_file_info *fi) ++{ ++ int err = EOPNOTSUPP; ++ (void) ino; ++ ++#ifdef HAVE_FALLOCATE ++ err = fallocate(fi->fh, mode, offset, length); ++ if (err < 0) ++ err = errno; ++ ++#elif defined(HAVE_POSIX_FALLOCATE) ++ if (mode) { ++ fuse_reply_err(req, EOPNOTSUPP); ++ return; ++ } ++ ++ err = posix_fallocate(fi->fh, offset, length); ++#endif ++ ++ fuse_reply_err(req, err); ++} ++ ++static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ int op) ++{ ++ int res; ++ (void) ino; ++ ++ res = flock(fi->fh, op); ++ ++ fuse_reply_err(req, res == -1 ? errno : 0); ++} ++ ++static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, ++ size_t size) ++{ ++ char *value = NULL; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; ++ ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) ++ goto out; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", ++ ino, name, size); ++ } ++ ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to getxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } ++ ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ if (size) { ++ value = malloc(size); ++ if (!value) ++ goto out_err; ++ ++ ret = getxattr(procname, name, value, size); ++ if (ret == -1) ++ goto out_err; ++ saverr = 0; ++ if (ret == 0) ++ goto out; ++ ++ fuse_reply_buf(req, value, ret); ++ } else { ++ ret = getxattr(procname, name, NULL, 0); ++ if (ret == -1) ++ goto out_err; ++ ++ fuse_reply_xattr(req, ret); ++ } ++out_free: ++ free(value); ++ return; ++ ++out_err: ++ saverr = errno; ++out: ++ fuse_reply_err(req, saverr); ++ goto out_free; ++} ++ ++static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) ++{ ++ char *value = NULL; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; ++ ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) ++ goto out; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ++ ino, size); ++ } ++ ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to listxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } ++ ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ if (size) { ++ value = malloc(size); ++ if (!value) ++ goto out_err; ++ ++ ret = listxattr(procname, value, size); ++ if (ret == -1) ++ goto out_err; ++ saverr = 0; ++ if (ret == 0) ++ goto out; ++ ++ fuse_reply_buf(req, value, ret); ++ } else { ++ ret = listxattr(procname, NULL, 0); ++ if (ret == -1) ++ goto out_err; ++ ++ fuse_reply_xattr(req, ret); ++ } ++out_free: ++ free(value); ++ return; ++ ++out_err: ++ saverr = errno; ++out: ++ fuse_reply_err(req, saverr); ++ goto out_free; ++} ++ ++static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, ++ const char *value, size_t size, int flags) ++{ ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; ++ ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) ++ goto out; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", ++ ino, name, value, size); ++ } ++ ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to setxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } ++ ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ ret = setxattr(procname, name, value, size, flags); ++ saverr = ret == -1 ? errno : 0; ++ ++out: ++ fuse_reply_err(req, saverr); ++} ++ ++static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) ++{ ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; ++ ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) ++ goto out; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ++ ino, name); ++ } ++ ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to setxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } ++ ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ ret = removexattr(procname, name); ++ saverr = ret == -1 ? errno : 0; ++ ++out: ++ fuse_reply_err(req, saverr); ++} ++ ++#ifdef HAVE_COPY_FILE_RANGE ++static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, ++ struct fuse_file_info *fi_in, ++ fuse_ino_t ino_out, off_t off_out, ++ struct fuse_file_info *fi_out, size_t len, ++ int flags) ++{ ++ ssize_t res; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " ++ "off=%lu, ino=%" PRIu64 "/fd=%lu, " ++ "off=%lu, size=%zd, flags=0x%x)\n", ++ ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, ++ len, flags); ++ ++ res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, ++ flags); ++ if (res < 0) ++ fuse_reply_err(req, -errno); ++ else ++ fuse_reply_write(req, res); ++} ++#endif ++ ++static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, ++ struct fuse_file_info *fi) ++{ ++ off_t res; ++ ++ (void)ino; ++ res = lseek(fi->fh, off, whence); ++ if (res != -1) ++ fuse_reply_lseek(req, res); ++ else ++ fuse_reply_err(req, errno); ++} ++ ++static struct fuse_lowlevel_ops lo_oper = { ++ .init = lo_init, ++ .lookup = lo_lookup, ++ .mkdir = lo_mkdir, ++ .mknod = lo_mknod, ++ .symlink = lo_symlink, ++ .link = lo_link, ++ .unlink = lo_unlink, ++ .rmdir = lo_rmdir, ++ .rename = lo_rename, ++ .forget = lo_forget, ++ .forget_multi = lo_forget_multi, ++ .getattr = lo_getattr, ++ .setattr = lo_setattr, ++ .readlink = lo_readlink, ++ .opendir = lo_opendir, ++ .readdir = lo_readdir, ++ .readdirplus = lo_readdirplus, ++ .releasedir = lo_releasedir, ++ .fsyncdir = lo_fsyncdir, ++ .create = lo_create, ++ .open = lo_open, ++ .release = lo_release, ++ .flush = lo_flush, ++ .fsync = lo_fsync, ++ .read = lo_read, ++ .write_buf = lo_write_buf, ++ .statfs = lo_statfs, ++ .fallocate = lo_fallocate, ++ .flock = lo_flock, ++ .getxattr = lo_getxattr, ++ .listxattr = lo_listxattr, ++ .setxattr = lo_setxattr, ++ .removexattr = lo_removexattr, ++#ifdef HAVE_COPY_FILE_RANGE ++ .copy_file_range = lo_copy_file_range, ++#endif ++ .lseek = lo_lseek, ++}; ++ ++int main(int argc, char *argv[]) ++{ ++ struct fuse_args args = FUSE_ARGS_INIT(argc, argv); ++ struct fuse_session *se; ++ struct fuse_cmdline_opts opts; ++ struct lo_data lo = { .debug = 0, ++ .writeback = 0 }; ++ int ret = -1; ++ ++ /* Don't mask creation mode, kernel already did that */ ++ umask(0); ++ ++ pthread_mutex_init(&lo.mutex, NULL); ++ lo.root.next = lo.root.prev = &lo.root; ++ lo.root.fd = -1; ++ lo.cache = CACHE_NORMAL; ++ ++ if (fuse_parse_cmdline(&args, &opts) != 0) ++ return 1; ++ if (opts.show_help) { ++ printf("usage: %s [options] \n\n", argv[0]); ++ fuse_cmdline_help(); ++ fuse_lowlevel_help(); ++ ret = 0; ++ goto err_out1; ++ } else if (opts.show_version) { ++ printf("FUSE library version %s\n", fuse_pkgversion()); ++ fuse_lowlevel_version(); ++ ret = 0; ++ goto err_out1; ++ } ++ ++ if(opts.mountpoint == NULL) { ++ printf("usage: %s [options] \n", argv[0]); ++ printf(" %s --help\n", argv[0]); ++ ret = 1; ++ goto err_out1; ++ } ++ ++ if (fuse_opt_parse(&args, &lo, lo_opts, NULL)== -1) ++ return 1; ++ ++ lo.debug = opts.debug; ++ lo.root.refcount = 2; ++ if (lo.source) { ++ struct stat stat; ++ int res; ++ ++ res = lstat(lo.source, &stat); ++ if (res == -1) { ++ fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n", ++ lo.source); ++ exit(1); ++ } ++ if (!S_ISDIR(stat.st_mode)) { ++ fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); ++ exit(1); ++ } ++ ++ } else { ++ lo.source = "/"; ++ } ++ lo.root.is_symlink = false; ++ if (!lo.timeout_set) { ++ switch (lo.cache) { ++ case CACHE_NEVER: ++ lo.timeout = 0.0; ++ break; ++ ++ case CACHE_NORMAL: ++ lo.timeout = 1.0; ++ break; ++ ++ case CACHE_ALWAYS: ++ lo.timeout = 86400.0; ++ break; ++ } ++ } else if (lo.timeout < 0) { ++ fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", ++ lo.timeout); ++ exit(1); ++ } ++ ++ lo.root.fd = open(lo.source, O_PATH); ++ if (lo.root.fd == -1) { ++ fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", ++ lo.source); ++ exit(1); ++ } ++ ++ se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); ++ if (se == NULL) ++ goto err_out1; ++ ++ if (fuse_set_signal_handlers(se) != 0) ++ goto err_out2; ++ ++ if (fuse_session_mount(se, opts.mountpoint) != 0) ++ goto err_out3; ++ ++ fuse_daemonize(opts.foreground); ++ ++ /* Block until ctrl+c or fusermount -u */ ++ if (opts.singlethread) ++ ret = fuse_session_loop(se); ++ else ++ ret = fuse_session_loop_mt(se, opts.clone_fd); ++ ++ fuse_session_unmount(se); ++err_out3: ++ fuse_remove_signal_handlers(se); ++err_out2: ++ fuse_session_destroy(se); ++err_out1: ++ free(opts.mountpoint); ++ fuse_opt_free_args(&args); ++ ++ if (lo.root.fd >= 0) ++ close(lo.root.fd); ++ ++ return ret ? 1 : 0; ++} +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch b/SOURCES/kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch new file mode 100644 index 0000000..cef537a --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch @@ -0,0 +1,73 @@ +From 52e93f2dc499ead339bf808dac3480b369dfadd1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:39 +0100 +Subject: [PATCH 068/116] virtiofsd: Add timestamp to the log with + FUSE_LOG_DEBUG level +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-65-dgilbert@redhat.com> +Patchwork-id: 93517 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 064/112] virtiofsd: Add timestamp to the log with FUSE_LOG_DEBUG level +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Masayoshi Mizuma + +virtiofsd has some threads, so we see a lot of logs with debug option. +It would be useful for debugging if we can see the timestamp. + +Add nano second timestamp, which got by get_clock(), to the log with +FUSE_LOG_DEBUG level if the syslog option isn't set. + +The log is like as: + + # ./virtiofsd -d -o vhost_user_socket=/tmp/vhostqemu0 -o source=/tmp/share0 -o cache=auto + ... + [5365943125463727] [ID: 00000002] fv_queue_thread: Start for queue 0 kick_fd 9 + [5365943125568644] [ID: 00000002] fv_queue_thread: Waiting for Queue 0 event + [5365943125573561] [ID: 00000002] fv_queue_thread: Got queue event on Queue 0 + +Signed-off-by: Masayoshi Mizuma +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 50fb955aa0e6ede929422146936cf68bf1ca876f) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index f08324f..98114a3 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -36,6 +36,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/timer.h" + #include "fuse_virtio.h" + #include "fuse_log.h" + #include "fuse_lowlevel.h" +@@ -2276,7 +2277,13 @@ static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) + } + + if (current_log_level == FUSE_LOG_DEBUG) { +- localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), fmt); ++ if (!use_syslog) { ++ localfmt = g_strdup_printf("[%" PRId64 "] [ID: %08ld] %s", ++ get_clock(), syscall(__NR_gettid), fmt); ++ } else { ++ localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), ++ fmt); ++ } + fmt = localfmt; + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Clean-up-inodes-on-destroy.patch b/SOURCES/kvm-virtiofsd-Clean-up-inodes-on-destroy.patch new file mode 100644 index 0000000..4713a0d --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Clean-up-inodes-on-destroy.patch @@ -0,0 +1,85 @@ +From 2b921f7162b53204051955228bf99bbed55d2457 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:53 +0100 +Subject: [PATCH 082/116] virtiofsd: Clean up inodes on destroy +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-79-dgilbert@redhat.com> +Patchwork-id: 93532 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 078/112] virtiofsd: Clean up inodes on destroy +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Clear out our inodes and fd's on a 'destroy' - so we get rid +of them if we reboot the guest. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 771b01eb76ff480fee984bd1d21727147cc3e702) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 26 ++++++++++++++++++++++++++ + 1 file changed, 26 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index b176a31..9ed77a1 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1169,6 +1169,25 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, + } + } + ++static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) ++{ ++ struct lo_inode *inode = value; ++ struct lo_data *lo = user_data; ++ ++ inode->refcount = 0; ++ lo_map_remove(&lo->ino_map, inode->fuse_ino); ++ close(inode->fd); ++ ++ return TRUE; ++} ++ ++static void unref_all_inodes(struct lo_data *lo) ++{ ++ pthread_mutex_lock(&lo->mutex); ++ g_hash_table_foreach_remove(lo->inodes, unref_all_inodes_cb, lo); ++ pthread_mutex_unlock(&lo->mutex); ++} ++ + static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + { + struct lo_data *lo = lo_data(req); +@@ -2035,6 +2054,12 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, + } + } + ++static void lo_destroy(void *userdata) ++{ ++ struct lo_data *lo = (struct lo_data *)userdata; ++ unref_all_inodes(lo); ++} ++ + static struct fuse_lowlevel_ops lo_oper = { + .init = lo_init, + .lookup = lo_lookup, +@@ -2073,6 +2098,7 @@ static struct fuse_lowlevel_ops lo_oper = { + .copy_file_range = lo_copy_file_range, + #endif + .lseek = lo_lseek, ++ .destroy = lo_destroy, + }; + + /* Print vhost-user.json backend program capabilities */ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch b/SOURCES/kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch new file mode 100644 index 0000000..c421365 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch @@ -0,0 +1,112 @@ +From 24f91062f571ad2dd2ac22db3b7d456a2c8bd2cb Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:23 +0100 +Subject: [PATCH 112/116] virtiofsd: Convert lo_destroy to take the lo->mutex + lock itself +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-109-dgilbert@redhat.com> +Patchwork-id: 93563 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 108/112] virtiofsd: Convert lo_destroy to take the lo->mutex lock itself +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +lo_destroy was relying on some implicit knowledge of the locking; +we can avoid this if we create an unref_inode that doesn't take +the lock and then grab it for the whole of the lo_destroy. + +Suggested-by: Vivek Goyal +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit fe4c15798a48143dd6b1f58d2d3cad12206ce211) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 31 +++++++++++++++++-------------- + 1 file changed, 17 insertions(+), 14 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index eb001b9..fc15d61 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1344,14 +1344,13 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) + lo_inode_put(lo, &inode); + } + +-static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, +- uint64_t n) ++/* To be called with lo->mutex held */ ++static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) + { + if (!inode) { + return; + } + +- pthread_mutex_lock(&lo->mutex); + assert(inode->nlookup >= n); + inode->nlookup -= n; + if (!inode->nlookup) { +@@ -1362,15 +1361,24 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, + } + g_hash_table_destroy(inode->posix_locks); + pthread_mutex_destroy(&inode->plock_mutex); +- pthread_mutex_unlock(&lo->mutex); + + /* Drop our refcount from lo_do_lookup() */ + lo_inode_put(lo, &inode); +- } else { +- pthread_mutex_unlock(&lo->mutex); + } + } + ++static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, ++ uint64_t n) ++{ ++ if (!inode) { ++ return; ++ } ++ ++ pthread_mutex_lock(&lo->mutex); ++ unref_inode(lo, inode, n); ++ pthread_mutex_unlock(&lo->mutex); ++} ++ + static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + { + struct lo_data *lo = lo_data(req); +@@ -2458,13 +2466,7 @@ static void lo_destroy(void *userdata) + { + struct lo_data *lo = (struct lo_data *)userdata; + +- /* +- * Normally lo->mutex must be taken when traversing lo->inodes but +- * lo_destroy() is a serialized request so no races are possible here. +- * +- * In addition, we cannot acquire lo->mutex since unref_inode() takes it +- * too and this would result in a recursive lock. +- */ ++ pthread_mutex_lock(&lo->mutex); + while (true) { + GHashTableIter iter; + gpointer key, value; +@@ -2475,8 +2477,9 @@ static void lo_destroy(void *userdata) + } + + struct lo_inode *inode = value; +- unref_inode_lolocked(lo, inode, inode->nlookup); ++ unref_inode(lo, inode, inode->nlookup); + } ++ pthread_mutex_unlock(&lo->mutex); + } + + static struct fuse_lowlevel_ops lo_oper = { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch b/SOURCES/kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch new file mode 100644 index 0000000..9f198c2 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch @@ -0,0 +1,176 @@ +From e217ab392e0d4c770ec18dbfbe986771773cb557 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:33 +0100 +Subject: [PATCH 062/116] virtiofsd: Drop CAP_FSETID if client asked for it +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-59-dgilbert@redhat.com> +Patchwork-id: 93513 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 058/112] virtiofsd: Drop CAP_FSETID if client asked for it +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Vivek Goyal + +If client requested killing setuid/setgid bits on file being written, drop +CAP_FSETID capability so that setuid/setgid bits are cleared upon write +automatically. + +pjdfstest chown/12.t needs this. + +Signed-off-by: Vivek Goyal + dgilbert: reworked for libcap-ng +Reviewed-by: Misono Tomohiro +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit ee88465224b3aed2596049caa28f86cbe0d5a3d0) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 105 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 105 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 97e7c75..d53cb1e 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -201,6 +201,91 @@ static int load_capng(void) + return 0; + } + ++/* ++ * Helpers for dropping and regaining effective capabilities. Returns 0 ++ * on success, error otherwise ++ */ ++static int drop_effective_cap(const char *cap_name, bool *cap_dropped) ++{ ++ int cap, ret; ++ ++ cap = capng_name_to_capability(cap_name); ++ if (cap < 0) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n", ++ cap_name, strerror(errno)); ++ goto out; ++ } ++ ++ if (load_capng()) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "load_capng() failed\n"); ++ goto out; ++ } ++ ++ /* We dont have this capability in effective set already. */ ++ if (!capng_have_capability(CAPNG_EFFECTIVE, cap)) { ++ ret = 0; ++ goto out; ++ } ++ ++ if (capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, cap)) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "capng_update(DROP,) failed\n"); ++ goto out; ++ } ++ ++ if (capng_apply(CAPNG_SELECT_CAPS)) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "drop:capng_apply() failed\n"); ++ goto out; ++ } ++ ++ ret = 0; ++ if (cap_dropped) { ++ *cap_dropped = true; ++ } ++ ++out: ++ return ret; ++} ++ ++static int gain_effective_cap(const char *cap_name) ++{ ++ int cap; ++ int ret = 0; ++ ++ cap = capng_name_to_capability(cap_name); ++ if (cap < 0) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n", ++ cap_name, strerror(errno)); ++ goto out; ++ } ++ ++ if (load_capng()) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "load_capng() failed\n"); ++ goto out; ++ } ++ ++ if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, cap)) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "capng_update(ADD,) failed\n"); ++ goto out; ++ } ++ ++ if (capng_apply(CAPNG_SELECT_CAPS)) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "gain:capng_apply() failed\n"); ++ goto out; ++ } ++ ret = 0; ++ ++out: ++ return ret; ++} ++ + static void lo_map_init(struct lo_map *map) + { + map->elems = NULL; +@@ -1577,6 +1662,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, + (void)ino; + ssize_t res; + struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); ++ bool cap_fsetid_dropped = false; + + out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; + out_buf.buf[0].fd = lo_fi_fd(req, fi); +@@ -1588,12 +1674,31 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, + out_buf.buf[0].size, (unsigned long)off); + } + ++ /* ++ * If kill_priv is set, drop CAP_FSETID which should lead to kernel ++ * clearing setuid/setgid on file. ++ */ ++ if (fi->kill_priv) { ++ res = drop_effective_cap("FSETID", &cap_fsetid_dropped); ++ if (res != 0) { ++ fuse_reply_err(req, res); ++ return; ++ } ++ } ++ + res = fuse_buf_copy(&out_buf, in_buf); + if (res < 0) { + fuse_reply_err(req, -res); + } else { + fuse_reply_write(req, (size_t)res); + } ++ ++ if (cap_fsetid_dropped) { ++ res = gain_effective_cap("FSETID"); ++ if (res) { ++ fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n"); ++ } ++ } + } + + static void lo_statfs(fuse_req_t req, fuse_ino_t ino) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Fast-path-for-virtio-read.patch b/SOURCES/kvm-virtiofsd-Fast-path-for-virtio-read.patch new file mode 100644 index 0000000..03874ce --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Fast-path-for-virtio-read.patch @@ -0,0 +1,240 @@ +From 7d2efc3e4af15eff57b0c38cff7c81b371a98303 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:06 +0100 +Subject: [PATCH 035/116] virtiofsd: Fast path for virtio read +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-32-dgilbert@redhat.com> +Patchwork-id: 93480 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 031/112] virtiofsd: Fast path for virtio read +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Readv the data straight into the guests buffer. + +Signed-off-by: Dr. David Alan Gilbert +With fix by: +Signed-off-by: Eryu Guan +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit eb49d187ef5134483a34c970bbfece28aaa686a7) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 5 ++ + tools/virtiofsd/fuse_virtio.c | 162 ++++++++++++++++++++++++++++++++++++++++ + tools/virtiofsd/fuse_virtio.h | 4 + + 3 files changed, 171 insertions(+) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 380d93b..4f4684d 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -475,6 +475,11 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, + return fuse_send_msg(se, ch, iov, iov_count); + } + ++ if (fuse_lowlevel_is_virtio(se) && buf->count == 1 && ++ buf->buf[0].flags == (FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK)) { ++ return virtio_send_data_iov(se, ch, iov, iov_count, buf, len); ++ } ++ + abort(); /* Will have taken vhost path */ + return 0; + } +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index f1adeb6..7e2711b 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -230,6 +230,168 @@ err: + return ret; + } + ++/* ++ * Callback from fuse_send_data_iov_* when it's virtio and the buffer ++ * is a single FD with FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK ++ * We need send the iov and then the buffer. ++ * Return 0 on success ++ */ ++int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, ++ struct iovec *iov, int count, struct fuse_bufvec *buf, ++ size_t len) ++{ ++ int ret = 0; ++ VuVirtqElement *elem; ++ VuVirtq *q; ++ ++ assert(count >= 1); ++ assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); ++ ++ struct fuse_out_header *out = iov[0].iov_base; ++ /* TODO: Endianness! */ ++ ++ size_t iov_len = iov_size(iov, count); ++ size_t tosend_len = iov_len + len; ++ ++ out->len = tosend_len; ++ ++ fuse_log(FUSE_LOG_DEBUG, "%s: count=%d len=%zd iov_len=%zd\n", __func__, ++ count, len, iov_len); ++ ++ /* unique == 0 is notification which we don't support */ ++ assert(out->unique); ++ ++ /* For virtio we always have ch */ ++ assert(ch); ++ assert(!ch->qi->reply_sent); ++ elem = ch->qi->qe; ++ q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; ++ ++ /* The 'in' part of the elem is to qemu */ ++ unsigned int in_num = elem->in_num; ++ struct iovec *in_sg = elem->in_sg; ++ size_t in_len = iov_size(in_sg, in_num); ++ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n", ++ __func__, elem->index, in_num, in_len); ++ ++ /* ++ * The elem should have room for a 'fuse_out_header' (out from fuse) ++ * plus the data based on the len in the header. ++ */ ++ if (in_len < sizeof(struct fuse_out_header)) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", ++ __func__, elem->index); ++ ret = E2BIG; ++ goto err; ++ } ++ if (in_len < tosend_len) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n", ++ __func__, elem->index, tosend_len); ++ ret = E2BIG; ++ goto err; ++ } ++ ++ /* TODO: Limit to 'len' */ ++ ++ /* First copy the header data from iov->in_sg */ ++ copy_iov(iov, count, in_sg, in_num, iov_len); ++ ++ /* ++ * Build a copy of the the in_sg iov so we can skip bits in it, ++ * including changing the offsets ++ */ ++ struct iovec *in_sg_cpy = calloc(sizeof(struct iovec), in_num); ++ assert(in_sg_cpy); ++ memcpy(in_sg_cpy, in_sg, sizeof(struct iovec) * in_num); ++ /* These get updated as we skip */ ++ struct iovec *in_sg_ptr = in_sg_cpy; ++ int in_sg_cpy_count = in_num; ++ ++ /* skip over parts of in_sg that contained the header iov */ ++ size_t skip_size = iov_len; ++ ++ size_t in_sg_left = 0; ++ do { ++ while (skip_size != 0 && in_sg_cpy_count) { ++ if (skip_size >= in_sg_ptr[0].iov_len) { ++ skip_size -= in_sg_ptr[0].iov_len; ++ in_sg_ptr++; ++ in_sg_cpy_count--; ++ } else { ++ in_sg_ptr[0].iov_len -= skip_size; ++ in_sg_ptr[0].iov_base += skip_size; ++ break; ++ } ++ } ++ ++ int i; ++ for (i = 0, in_sg_left = 0; i < in_sg_cpy_count; i++) { ++ in_sg_left += in_sg_ptr[i].iov_len; ++ } ++ fuse_log(FUSE_LOG_DEBUG, ++ "%s: after skip skip_size=%zd in_sg_cpy_count=%d " ++ "in_sg_left=%zd\n", ++ __func__, skip_size, in_sg_cpy_count, in_sg_left); ++ ret = preadv(buf->buf[0].fd, in_sg_ptr, in_sg_cpy_count, ++ buf->buf[0].pos); ++ ++ if (ret == -1) { ++ ret = errno; ++ fuse_log(FUSE_LOG_DEBUG, "%s: preadv failed (%m) len=%zd\n", ++ __func__, len); ++ free(in_sg_cpy); ++ goto err; ++ } ++ fuse_log(FUSE_LOG_DEBUG, "%s: preadv ret=%d len=%zd\n", __func__, ++ ret, len); ++ if (ret < len && ret) { ++ fuse_log(FUSE_LOG_DEBUG, "%s: ret < len\n", __func__); ++ /* Skip over this much next time around */ ++ skip_size = ret; ++ buf->buf[0].pos += ret; ++ len -= ret; ++ ++ /* Lets do another read */ ++ continue; ++ } ++ if (!ret) { ++ /* EOF case? */ ++ fuse_log(FUSE_LOG_DEBUG, "%s: !ret in_sg_left=%zd\n", __func__, ++ in_sg_left); ++ break; ++ } ++ if (ret != len) { ++ fuse_log(FUSE_LOG_DEBUG, "%s: ret!=len\n", __func__); ++ ret = EIO; ++ free(in_sg_cpy); ++ goto err; ++ } ++ in_sg_left -= ret; ++ len -= ret; ++ } while (in_sg_left); ++ free(in_sg_cpy); ++ ++ /* Need to fix out->len on EOF */ ++ if (len) { ++ struct fuse_out_header *out_sg = in_sg[0].iov_base; ++ ++ tosend_len -= len; ++ out_sg->len = tosend_len; ++ } ++ ++ ret = 0; ++ ++ vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); ++ vu_queue_notify(&se->virtio_dev->dev, q); ++ ++err: ++ if (ret == 0) { ++ ch->qi->reply_sent = true; ++ } ++ ++ return ret; ++} ++ + /* Thread function for individual queues, created when a queue is 'started' */ + static void *fv_queue_thread(void *opaque) + { +diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h +index 135a148..cc676b9 100644 +--- a/tools/virtiofsd/fuse_virtio.h ++++ b/tools/virtiofsd/fuse_virtio.h +@@ -26,4 +26,8 @@ int virtio_loop(struct fuse_session *se); + int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, + struct iovec *iov, int count); + ++int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, ++ struct iovec *iov, int count, ++ struct fuse_bufvec *buf, size_t len); ++ + #endif +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch b/SOURCES/kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch new file mode 100644 index 0000000..12bb9a2 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch @@ -0,0 +1,164 @@ +From 6d41fc549198e140f38fddcb02975098df040ae1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:50 +0100 +Subject: [PATCH 019/116] virtiofsd: Fix common header and define for QEMU + builds +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-16-dgilbert@redhat.com> +Patchwork-id: 93470 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 015/112] virtiofsd: Fix common header and define for QEMU builds +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +All of the fuse files include config.h and define GNU_SOURCE +where we don't have either under our build - remove them. +Fixup path to the kernel's fuse.h in the QEMUs world. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 09863ebc7e32a107235b3c815ad54d26cc64f07a) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 4 +--- + tools/virtiofsd/fuse_i.h | 3 +++ + tools/virtiofsd/fuse_log.c | 1 + + tools/virtiofsd/fuse_lowlevel.c | 6 ++---- + tools/virtiofsd/fuse_opt.c | 2 +- + tools/virtiofsd/fuse_signals.c | 2 +- + tools/virtiofsd/helper.c | 1 + + tools/virtiofsd/passthrough_ll.c | 8 ++------ + 8 files changed, 12 insertions(+), 15 deletions(-) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index 4d507f3..772efa9 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -9,9 +9,7 @@ + * See the file COPYING.LIB + */ + +-#define _GNU_SOURCE +- +-#include "config.h" ++#include "qemu/osdep.h" + #include "fuse_i.h" + #include "fuse_lowlevel.h" + #include +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index e63cb58..bae0699 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -6,6 +6,9 @@ + * See the file COPYING.LIB + */ + ++#define FUSE_USE_VERSION 31 ++ ++ + #include "fuse.h" + #include "fuse_lowlevel.h" + +diff --git a/tools/virtiofsd/fuse_log.c b/tools/virtiofsd/fuse_log.c +index 11345f9..c301ff6 100644 +--- a/tools/virtiofsd/fuse_log.c ++++ b/tools/virtiofsd/fuse_log.c +@@ -8,6 +8,7 @@ + * See the file COPYING.LIB + */ + ++#include "qemu/osdep.h" + #include "fuse_log.h" + + #include +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 3da80de..07fb8a6 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -9,11 +9,9 @@ + * See the file COPYING.LIB + */ + +-#define _GNU_SOURCE +- +-#include "config.h" ++#include "qemu/osdep.h" + #include "fuse_i.h" +-#include "fuse_kernel.h" ++#include "standard-headers/linux/fuse.h" + #include "fuse_misc.h" + #include "fuse_opt.h" + +diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c +index edd36f4..2892236 100644 +--- a/tools/virtiofsd/fuse_opt.c ++++ b/tools/virtiofsd/fuse_opt.c +@@ -9,8 +9,8 @@ + * See the file COPYING.LIB + */ + ++#include "qemu/osdep.h" + #include "fuse_opt.h" +-#include "config.h" + #include "fuse_i.h" + #include "fuse_misc.h" + +diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c +index 19d6791..dc7c8ac 100644 +--- a/tools/virtiofsd/fuse_signals.c ++++ b/tools/virtiofsd/fuse_signals.c +@@ -8,7 +8,7 @@ + * See the file COPYING.LIB + */ + +-#include "config.h" ++#include "qemu/osdep.h" + #include "fuse_i.h" + #include "fuse_lowlevel.h" + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index d9227d7..9333691 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -10,6 +10,7 @@ + * See the file COPYING.LIB. + */ + ++#include "qemu/osdep.h" + #include "fuse_i.h" + #include "fuse_lowlevel.h" + #include "fuse_misc.h" +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 126a56c..322a889 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -35,15 +35,11 @@ + * \include passthrough_ll.c + */ + +-#define _GNU_SOURCE +-#define FUSE_USE_VERSION 31 +- +-#include "config.h" +- ++#include "qemu/osdep.h" ++#include "fuse_lowlevel.h" + #include + #include + #include +-#include + #include + #include + #include +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch b/SOURCES/kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch new file mode 100644 index 0000000..f929bab --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch @@ -0,0 +1,136 @@ +From 9b5fbc95a287b2ce9448142194b161d8360d5e4e Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:15 +0100 +Subject: [PATCH 104/116] virtiofsd: Fix data corruption with O_APPEND write in + writeback mode +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-101-dgilbert@redhat.com> +Patchwork-id: 93556 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 100/112] virtiofsd: Fix data corruption with O_APPEND write in writeback mode +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Misono Tomohiro + +When writeback mode is enabled (-o writeback), O_APPEND handling is +done in kernel. Therefore virtiofsd clears O_APPEND flag when open. +Otherwise O_APPEND flag takes precedence over pwrite() and write +data may corrupt. + +Currently clearing O_APPEND flag is done in lo_open(), but we also +need the same operation in lo_create(). So, factor out the flag +update operation in lo_open() to update_open_flags() and call it +in both lo_open() and lo_create(). + +This fixes the failure of xfstest generic/069 in writeback mode +(which tests O_APPEND write data integrity). + +Signed-off-by: Misono Tomohiro +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 8e4e41e39eac5ee5f378d66f069a2f70a1734317) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 66 ++++++++++++++++++++-------------------- + 1 file changed, 33 insertions(+), 33 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 948cb19..4c61ac5 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1692,6 +1692,37 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, + fuse_reply_err(req, 0); + } + ++static void update_open_flags(int writeback, struct fuse_file_info *fi) ++{ ++ /* ++ * With writeback cache, kernel may send read requests even ++ * when userspace opened write-only ++ */ ++ if (writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { ++ fi->flags &= ~O_ACCMODE; ++ fi->flags |= O_RDWR; ++ } ++ ++ /* ++ * With writeback cache, O_APPEND is handled by the kernel. ++ * This breaks atomicity (since the file may change in the ++ * underlying filesystem, so that the kernel's idea of the ++ * end of the file isn't accurate anymore). In this example, ++ * we just accept that. A more rigorous filesystem may want ++ * to return an error here ++ */ ++ if (writeback && (fi->flags & O_APPEND)) { ++ fi->flags &= ~O_APPEND; ++ } ++ ++ /* ++ * O_DIRECT in guest should not necessarily mean bypassing page ++ * cache on host as well. If somebody needs that behavior, it ++ * probably should be a configuration knob in daemon. ++ */ ++ fi->flags &= ~O_DIRECT; ++} ++ + static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode, struct fuse_file_info *fi) + { +@@ -1721,12 +1752,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + goto out; + } + +- /* +- * O_DIRECT in guest should not necessarily mean bypassing page +- * cache on host as well. If somebody needs that behavior, it +- * probably should be a configuration knob in daemon. +- */ +- fi->flags &= ~O_DIRECT; ++ update_open_flags(lo->writeback, fi); + + fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, + mode); +@@ -1936,33 +1962,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, + fi->flags); + +- /* +- * With writeback cache, kernel may send read requests even +- * when userspace opened write-only +- */ +- if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { +- fi->flags &= ~O_ACCMODE; +- fi->flags |= O_RDWR; +- } +- +- /* +- * With writeback cache, O_APPEND is handled by the kernel. +- * This breaks atomicity (since the file may change in the +- * underlying filesystem, so that the kernel's idea of the +- * end of the file isn't accurate anymore). In this example, +- * we just accept that. A more rigorous filesystem may want +- * to return an error here +- */ +- if (lo->writeback && (fi->flags & O_APPEND)) { +- fi->flags &= ~O_APPEND; +- } +- +- /* +- * O_DIRECT in guest should not necessarily mean bypassing page +- * cache on host as well. If somebody needs that behavior, it +- * probably should be a configuration knob in daemon. +- */ +- fi->flags &= ~O_DIRECT; ++ update_open_flags(lo->writeback, fi); + + sprintf(buf, "%i", lo_fd(req, ino)); + fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch b/SOURCES/kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch new file mode 100644 index 0000000..306c183 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch @@ -0,0 +1,120 @@ +From 9f726593bc3acbc247876dcc4d79fbf046958003 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:49 +0100 +Subject: [PATCH 018/116] virtiofsd: Fix fuse_daemonize ignored return values +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-15-dgilbert@redhat.com> +Patchwork-id: 93469 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 014/112] virtiofsd: Fix fuse_daemonize ignored return values +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +QEMU's compiler enables warnings/errors for ignored values +and the (void) trick used in the fuse code isn't enough. +Turn all the return values into a return value on the function. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 30d8e49760712d65697ea517c53671bd1d214fc7) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/helper.c | 33 ++++++++++++++++++++++----------- + 1 file changed, 22 insertions(+), 11 deletions(-) + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 5e6f205..d9227d7 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -10,12 +10,10 @@ + * See the file COPYING.LIB. + */ + +-#include "config.h" + #include "fuse_i.h" + #include "fuse_lowlevel.h" + #include "fuse_misc.h" + #include "fuse_opt.h" +-#include "mount_util.h" + + #include + #include +@@ -171,6 +169,7 @@ int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts) + + int fuse_daemonize(int foreground) + { ++ int ret = 0, rett; + if (!foreground) { + int nullfd; + int waiter[2]; +@@ -192,8 +191,8 @@ int fuse_daemonize(int foreground) + case 0: + break; + default: +- (void)read(waiter[0], &completed, sizeof(completed)); +- _exit(0); ++ _exit(read(waiter[0], &completed, ++ sizeof(completed) != sizeof(completed))); + } + + if (setsid() == -1) { +@@ -201,13 +200,22 @@ int fuse_daemonize(int foreground) + return -1; + } + +- (void)chdir("/"); ++ ret = chdir("/"); + + nullfd = open("/dev/null", O_RDWR, 0); + if (nullfd != -1) { +- (void)dup2(nullfd, 0); +- (void)dup2(nullfd, 1); +- (void)dup2(nullfd, 2); ++ rett = dup2(nullfd, 0); ++ if (!ret) { ++ ret = rett; ++ } ++ rett = dup2(nullfd, 1); ++ if (!ret) { ++ ret = rett; ++ } ++ rett = dup2(nullfd, 2); ++ if (!ret) { ++ ret = rett; ++ } + if (nullfd > 2) { + close(nullfd); + } +@@ -215,13 +223,16 @@ int fuse_daemonize(int foreground) + + /* Propagate completion of daemon initialization */ + completed = 1; +- (void)write(waiter[1], &completed, sizeof(completed)); ++ rett = write(waiter[1], &completed, sizeof(completed)); ++ if (!ret) { ++ ret = rett; ++ } + close(waiter[0]); + close(waiter[1]); + } else { +- (void)chdir("/"); ++ ret = chdir("/"); + } +- return 0; ++ return ret; + } + + void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Fix-xattr-operations.patch b/SOURCES/kvm-virtiofsd-Fix-xattr-operations.patch new file mode 100644 index 0000000..532948f --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Fix-xattr-operations.patch @@ -0,0 +1,327 @@ +From 8721796f22a8a61d82974088e542377ee6db209e Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 3 Mar 2020 18:43:14 +0000 +Subject: [PATCH 18/18] virtiofsd: Fix xattr operations +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200303184314.155564-8-dgilbert@redhat.com> +Patchwork-id: 94123 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 7/7] virtiofsd: Fix xattr operations +Bugzilla: 1797064 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Ján Tomko + +From: Misono Tomohiro + +Current virtiofsd has problems about xattr operations and +they does not work properly for directory/symlink/special file. + +The fundamental cause is that virtiofsd uses openat() + f...xattr() +systemcalls for xattr operation but we should not open symlink/special +file in the daemon. Therefore the function is restricted. + +Fix this problem by: + 1. during setup of each thread, call unshare(CLONE_FS) + 2. in xattr operations (i.e. lo_getxattr), if inode is not a regular + file or directory, use fchdir(proc_loot_fd) + ...xattr() + + fchdir(root.fd) instead of openat() + f...xattr() + + (Note: for a regular file/directory openat() + f...xattr() + is still used for performance reason) + +With this patch, xfstests generic/062 passes on virtiofs. + +This fix is suggested by Miklos Szeredi and Stefan Hajnoczi. +The original discussion can be found here: + https://www.redhat.com/archives/virtio-fs/2019-October/msg00046.html + +Signed-off-by: Misono Tomohiro +Message-Id: <20200227055927.24566-3-misono.tomohiro@jp.fujitsu.com> +Acked-by: Vivek Goyal +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit bdfd66788349acc43cd3f1298718ad491663cfcc) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/fuse_virtio.c | 13 +++++ + tools/virtiofsd/passthrough_ll.c | 105 +++++++++++++++++++++------------------ + tools/virtiofsd/seccomp.c | 6 +++ + 3 files changed, 77 insertions(+), 47 deletions(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index dd1c605..3b6d16a 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -426,6 +426,8 @@ err: + return ret; + } + ++static __thread bool clone_fs_called; ++ + /* Process one FVRequest in a thread pool */ + static void fv_queue_worker(gpointer data, gpointer user_data) + { +@@ -441,6 +443,17 @@ static void fv_queue_worker(gpointer data, gpointer user_data) + + assert(se->bufsize > sizeof(struct fuse_in_header)); + ++ if (!clone_fs_called) { ++ int ret; ++ ++ /* unshare FS for xattr operation */ ++ ret = unshare(CLONE_FS); ++ /* should not fail */ ++ assert(ret == 0); ++ ++ clone_fs_called = true; ++ } ++ + /* + * An element contains one request and the space to send our response + * They're spread over multiple descriptors in a scatter/gather set +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 50c7273..9cba3f1 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -123,7 +123,7 @@ struct lo_inode { + pthread_mutex_t plock_mutex; + GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ + +- bool is_symlink; ++ mode_t filetype; + }; + + struct lo_cred { +@@ -695,7 +695,7 @@ static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode, + struct lo_inode *parent; + char path[PATH_MAX]; + +- if (inode->is_symlink) { ++ if (S_ISLNK(inode->filetype)) { + res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH); + if (res == -1 && errno == EINVAL) { + /* Sorry, no race free way to set times on symlink. */ +@@ -929,7 +929,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + goto out_err; + } + +- inode->is_symlink = S_ISLNK(e->attr.st_mode); ++ /* cache only filetype */ ++ inode->filetype = (e->attr.st_mode & S_IFMT); + + /* + * One for the caller and one for nlookup (released in +@@ -1139,7 +1140,7 @@ static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode, + struct lo_inode *parent; + char path[PATH_MAX]; + +- if (inode->is_symlink) { ++ if (S_ISLNK(inode->filetype)) { + res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); + if (res == -1 && (errno == ENOENT || errno == EINVAL)) { + /* Sorry, no race free way to hard-link a symlink. */ +@@ -2193,12 +2194,6 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", + ino, name, size); + +- if (inode->is_symlink) { +- /* Sorry, no race free way to getxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } +- + if (size) { + value = malloc(size); + if (!value) { +@@ -2207,12 +2202,25 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + } + + sprintf(procname, "%i", inode->fd); +- fd = openat(lo->proc_self_fd, procname, O_RDONLY); +- if (fd < 0) { +- goto out_err; ++ /* ++ * It is not safe to open() non-regular/non-dir files in file server ++ * unless O_PATH is used, so use that method for regular files/dir ++ * only (as it seems giving less performance overhead). ++ * Otherwise, call fchdir() to avoid open(). ++ */ ++ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { ++ fd = openat(lo->proc_self_fd, procname, O_RDONLY); ++ if (fd < 0) { ++ goto out_err; ++ } ++ ret = fgetxattr(fd, name, value, size); ++ } else { ++ /* fchdir should not fail here */ ++ assert(fchdir(lo->proc_self_fd) == 0); ++ ret = getxattr(procname, name, value, size); ++ assert(fchdir(lo->root.fd) == 0); + } + +- ret = fgetxattr(fd, name, value, size); + if (ret == -1) { + goto out_err; + } +@@ -2266,12 +2274,6 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino, + size); + +- if (inode->is_symlink) { +- /* Sorry, no race free way to listxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } +- + if (size) { + value = malloc(size); + if (!value) { +@@ -2280,12 +2282,19 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + } + + sprintf(procname, "%i", inode->fd); +- fd = openat(lo->proc_self_fd, procname, O_RDONLY); +- if (fd < 0) { +- goto out_err; ++ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { ++ fd = openat(lo->proc_self_fd, procname, O_RDONLY); ++ if (fd < 0) { ++ goto out_err; ++ } ++ ret = flistxattr(fd, value, size); ++ } else { ++ /* fchdir should not fail here */ ++ assert(fchdir(lo->proc_self_fd) == 0); ++ ret = listxattr(procname, value, size); ++ assert(fchdir(lo->root.fd) == 0); + } + +- ret = flistxattr(fd, value, size); + if (ret == -1) { + goto out_err; + } +@@ -2339,20 +2348,21 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 + ", name=%s value=%s size=%zd)\n", ino, name, value, size); + +- if (inode->is_symlink) { +- /* Sorry, no race free way to setxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } +- + sprintf(procname, "%i", inode->fd); +- fd = openat(lo->proc_self_fd, procname, O_RDWR); +- if (fd < 0) { +- saverr = errno; +- goto out; ++ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { ++ fd = openat(lo->proc_self_fd, procname, O_RDONLY); ++ if (fd < 0) { ++ saverr = errno; ++ goto out; ++ } ++ ret = fsetxattr(fd, name, value, size, flags); ++ } else { ++ /* fchdir should not fail here */ ++ assert(fchdir(lo->proc_self_fd) == 0); ++ ret = setxattr(procname, name, value, size, flags); ++ assert(fchdir(lo->root.fd) == 0); + } + +- ret = fsetxattr(fd, name, value, size, flags); + saverr = ret == -1 ? errno : 0; + + out: +@@ -2387,20 +2397,21 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) + fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino, + name); + +- if (inode->is_symlink) { +- /* Sorry, no race free way to setxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } +- + sprintf(procname, "%i", inode->fd); +- fd = openat(lo->proc_self_fd, procname, O_RDWR); +- if (fd < 0) { +- saverr = errno; +- goto out; ++ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { ++ fd = openat(lo->proc_self_fd, procname, O_RDONLY); ++ if (fd < 0) { ++ saverr = errno; ++ goto out; ++ } ++ ret = fremovexattr(fd, name); ++ } else { ++ /* fchdir should not fail here */ ++ assert(fchdir(lo->proc_self_fd) == 0); ++ ret = removexattr(procname, name); ++ assert(fchdir(lo->root.fd) == 0); + } + +- ret = fremovexattr(fd, name); + saverr = ret == -1 ? errno : 0; + + out: +@@ -2800,7 +2811,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) + exit(1); + } + +- root->is_symlink = false; ++ root->filetype = S_IFDIR; + root->fd = fd; + root->key.ino = stat.st_ino; + root->key.dev = stat.st_dev; +diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c +index 2d9d4a7..bd9e7b0 100644 +--- a/tools/virtiofsd/seccomp.c ++++ b/tools/virtiofsd/seccomp.c +@@ -41,6 +41,7 @@ static const int syscall_whitelist[] = { + SCMP_SYS(exit), + SCMP_SYS(exit_group), + SCMP_SYS(fallocate), ++ SCMP_SYS(fchdir), + SCMP_SYS(fchmodat), + SCMP_SYS(fchownat), + SCMP_SYS(fcntl), +@@ -62,7 +63,9 @@ static const int syscall_whitelist[] = { + SCMP_SYS(getpid), + SCMP_SYS(gettid), + SCMP_SYS(gettimeofday), ++ SCMP_SYS(getxattr), + SCMP_SYS(linkat), ++ SCMP_SYS(listxattr), + SCMP_SYS(lseek), + SCMP_SYS(madvise), + SCMP_SYS(mkdirat), +@@ -85,6 +88,7 @@ static const int syscall_whitelist[] = { + SCMP_SYS(recvmsg), + SCMP_SYS(renameat), + SCMP_SYS(renameat2), ++ SCMP_SYS(removexattr), + SCMP_SYS(rt_sigaction), + SCMP_SYS(rt_sigprocmask), + SCMP_SYS(rt_sigreturn), +@@ -98,10 +102,12 @@ static const int syscall_whitelist[] = { + SCMP_SYS(setresuid32), + #endif + SCMP_SYS(set_robust_list), ++ SCMP_SYS(setxattr), + SCMP_SYS(symlinkat), + SCMP_SYS(time), /* Rarely needed, except on static builds */ + SCMP_SYS(tgkill), + SCMP_SYS(unlinkat), ++ SCMP_SYS(unshare), + SCMP_SYS(utimensat), + SCMP_SYS(write), + SCMP_SYS(writev), +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Format-imported-files-to-qemu-style.patch b/SOURCES/kvm-virtiofsd-Format-imported-files-to-qemu-style.patch new file mode 100644 index 0000000..5593a33 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Format-imported-files-to-qemu-style.patch @@ -0,0 +1,14743 @@ +From e313ab94af558bbc133e7a93b0a6dbff706dd1d8 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:45 +0100 +Subject: [PATCH 014/116] virtiofsd: Format imported files to qemu style +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-11-dgilbert@redhat.com> +Patchwork-id: 93464 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 010/112] virtiofsd: Format imported files to qemu style +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Mostly using a set like: + +indent -nut -i 4 -nlp -br -cs -ce --no-space-after-function-call-names file +clang-format -style=file -i -- file +clang-tidy -fix-errors -checks=readability-braces-around-statements file +clang-format -style=file -i -- file + +With manual cleanups. + +The .clang-format used is below. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Reviewed by: Aleksandar Markovic + +Language: Cpp +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false # although we like it, it creates churn +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: true +AlignOperands: true +AlignTrailingComments: false # churn +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterReturnType: None # AlwaysBreakAfterDefinitionReturnType is taken into account +AlwaysBreakBeforeMultilineStrings: false +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterControlStatement: false + AfterEnum: false + AfterFunction: true + AfterStruct: false + AfterUnion: false + BeforeElse: false + IndentBraces: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Custom +BreakBeforeTernaryOperators: false +BreakStringLiterals: true +ColumnLimit: 80 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: false +DerivePointerAlignment: false +DisableFormat: false +ForEachMacros: [ + 'CPU_FOREACH', + 'CPU_FOREACH_REVERSE', + 'CPU_FOREACH_SAFE', + 'IOMMU_NOTIFIER_FOREACH', + 'QLIST_FOREACH', + 'QLIST_FOREACH_ENTRY', + 'QLIST_FOREACH_RCU', + 'QLIST_FOREACH_SAFE', + 'QLIST_FOREACH_SAFE_RCU', + 'QSIMPLEQ_FOREACH', + 'QSIMPLEQ_FOREACH_SAFE', + 'QSLIST_FOREACH', + 'QSLIST_FOREACH_SAFE', + 'QTAILQ_FOREACH', + 'QTAILQ_FOREACH_REVERSE', + 'QTAILQ_FOREACH_SAFE', + 'QTAILQ_RAW_FOREACH', + 'RAMBLOCK_FOREACH' +] +IncludeCategories: + - Regex: '^"qemu/osdep.h' + Priority: -3 + - Regex: '^"(block|chardev|crypto|disas|exec|fpu|hw|io|libdecnumber|migration|monitor|net|qapi|qemu|qom|standard-headers|sysemu|ui)/' + Priority: -2 + - Regex: '^"(elf.h|qemu-common.h|glib-compat.h|qemu-io.h|trace-tcg.h)' + Priority: -1 + - Regex: '.*' + Priority: 1 +IncludeIsMainRegex: '$' +IndentCaseLabels: false +IndentWidth: 4 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ? +MacroBlockEnd: '.*_END$' +MaxEmptyLinesToKeep: 2 +PointerAlignment: Right +ReflowComments: true +SortIncludes: true +SpaceAfterCStyleCast: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInContainerLiterals: true +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Auto +UseTab: Never +... + +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 7387863d033e8028aa09a815736617a7c4490827) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 434 ++-- + tools/virtiofsd/fuse.h | 1572 +++++++------- + tools/virtiofsd/fuse_common.h | 730 +++---- + tools/virtiofsd/fuse_i.h | 121 +- + tools/virtiofsd/fuse_log.c | 38 +- + tools/virtiofsd/fuse_log.h | 32 +- + tools/virtiofsd/fuse_lowlevel.c | 3638 +++++++++++++++++---------------- + tools/virtiofsd/fuse_lowlevel.h | 2392 +++++++++++----------- + tools/virtiofsd/fuse_misc.h | 30 +- + tools/virtiofsd/fuse_opt.c | 659 +++--- + tools/virtiofsd/fuse_opt.h | 79 +- + tools/virtiofsd/fuse_signals.c | 118 +- + tools/virtiofsd/helper.c | 506 ++--- + tools/virtiofsd/passthrough_helpers.h | 33 +- + tools/virtiofsd/passthrough_ll.c | 2061 ++++++++++--------- + 15 files changed, 6382 insertions(+), 6061 deletions(-) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index aefb7db..5df946c 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -1,252 +1,272 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2010 Miklos Szeredi +- +- Functions for dealing with `struct fuse_buf` and `struct +- fuse_bufvec`. +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2010 Miklos Szeredi ++ * ++ * Functions for dealing with `struct fuse_buf` and `struct ++ * fuse_bufvec`. ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ + + #define _GNU_SOURCE + + #include "config.h" + #include "fuse_i.h" + #include "fuse_lowlevel.h" ++#include ++#include + #include + #include +-#include +-#include + + size_t fuse_buf_size(const struct fuse_bufvec *bufv) + { +- size_t i; +- size_t size = 0; +- +- for (i = 0; i < bufv->count; i++) { +- if (bufv->buf[i].size == SIZE_MAX) +- size = SIZE_MAX; +- else +- size += bufv->buf[i].size; +- } +- +- return size; ++ size_t i; ++ size_t size = 0; ++ ++ for (i = 0; i < bufv->count; i++) { ++ if (bufv->buf[i].size == SIZE_MAX) { ++ size = SIZE_MAX; ++ } else { ++ size += bufv->buf[i].size; ++ } ++ } ++ ++ return size; + } + + static size_t min_size(size_t s1, size_t s2) + { +- return s1 < s2 ? s1 : s2; ++ return s1 < s2 ? s1 : s2; + } + + static ssize_t fuse_buf_write(const struct fuse_buf *dst, size_t dst_off, +- const struct fuse_buf *src, size_t src_off, +- size_t len) ++ const struct fuse_buf *src, size_t src_off, ++ size_t len) + { +- ssize_t res = 0; +- size_t copied = 0; +- +- while (len) { +- if (dst->flags & FUSE_BUF_FD_SEEK) { +- res = pwrite(dst->fd, (char *)src->mem + src_off, len, +- dst->pos + dst_off); +- } else { +- res = write(dst->fd, (char *)src->mem + src_off, len); +- } +- if (res == -1) { +- if (!copied) +- return -errno; +- break; +- } +- if (res == 0) +- break; +- +- copied += res; +- if (!(dst->flags & FUSE_BUF_FD_RETRY)) +- break; +- +- src_off += res; +- dst_off += res; +- len -= res; +- } +- +- return copied; ++ ssize_t res = 0; ++ size_t copied = 0; ++ ++ while (len) { ++ if (dst->flags & FUSE_BUF_FD_SEEK) { ++ res = pwrite(dst->fd, (char *)src->mem + src_off, len, ++ dst->pos + dst_off); ++ } else { ++ res = write(dst->fd, (char *)src->mem + src_off, len); ++ } ++ if (res == -1) { ++ if (!copied) { ++ return -errno; ++ } ++ break; ++ } ++ if (res == 0) { ++ break; ++ } ++ ++ copied += res; ++ if (!(dst->flags & FUSE_BUF_FD_RETRY)) { ++ break; ++ } ++ ++ src_off += res; ++ dst_off += res; ++ len -= res; ++ } ++ ++ return copied; + } + + static ssize_t fuse_buf_read(const struct fuse_buf *dst, size_t dst_off, +- const struct fuse_buf *src, size_t src_off, +- size_t len) ++ const struct fuse_buf *src, size_t src_off, ++ size_t len) + { +- ssize_t res = 0; +- size_t copied = 0; +- +- while (len) { +- if (src->flags & FUSE_BUF_FD_SEEK) { +- res = pread(src->fd, (char *)dst->mem + dst_off, len, +- src->pos + src_off); +- } else { +- res = read(src->fd, (char *)dst->mem + dst_off, len); +- } +- if (res == -1) { +- if (!copied) +- return -errno; +- break; +- } +- if (res == 0) +- break; +- +- copied += res; +- if (!(src->flags & FUSE_BUF_FD_RETRY)) +- break; +- +- dst_off += res; +- src_off += res; +- len -= res; +- } +- +- return copied; ++ ssize_t res = 0; ++ size_t copied = 0; ++ ++ while (len) { ++ if (src->flags & FUSE_BUF_FD_SEEK) { ++ res = pread(src->fd, (char *)dst->mem + dst_off, len, ++ src->pos + src_off); ++ } else { ++ res = read(src->fd, (char *)dst->mem + dst_off, len); ++ } ++ if (res == -1) { ++ if (!copied) { ++ return -errno; ++ } ++ break; ++ } ++ if (res == 0) { ++ break; ++ } ++ ++ copied += res; ++ if (!(src->flags & FUSE_BUF_FD_RETRY)) { ++ break; ++ } ++ ++ dst_off += res; ++ src_off += res; ++ len -= res; ++ } ++ ++ return copied; + } + + static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, +- const struct fuse_buf *src, size_t src_off, +- size_t len) ++ const struct fuse_buf *src, size_t src_off, ++ size_t len) + { +- char buf[4096]; +- struct fuse_buf tmp = { +- .size = sizeof(buf), +- .flags = 0, +- }; +- ssize_t res; +- size_t copied = 0; +- +- tmp.mem = buf; +- +- while (len) { +- size_t this_len = min_size(tmp.size, len); +- size_t read_len; +- +- res = fuse_buf_read(&tmp, 0, src, src_off, this_len); +- if (res < 0) { +- if (!copied) +- return res; +- break; +- } +- if (res == 0) +- break; +- +- read_len = res; +- res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len); +- if (res < 0) { +- if (!copied) +- return res; +- break; +- } +- if (res == 0) +- break; +- +- copied += res; +- +- if (res < this_len) +- break; +- +- dst_off += res; +- src_off += res; +- len -= res; +- } +- +- return copied; ++ char buf[4096]; ++ struct fuse_buf tmp = { ++ .size = sizeof(buf), ++ .flags = 0, ++ }; ++ ssize_t res; ++ size_t copied = 0; ++ ++ tmp.mem = buf; ++ ++ while (len) { ++ size_t this_len = min_size(tmp.size, len); ++ size_t read_len; ++ ++ res = fuse_buf_read(&tmp, 0, src, src_off, this_len); ++ if (res < 0) { ++ if (!copied) { ++ return res; ++ } ++ break; ++ } ++ if (res == 0) { ++ break; ++ } ++ ++ read_len = res; ++ res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len); ++ if (res < 0) { ++ if (!copied) { ++ return res; ++ } ++ break; ++ } ++ if (res == 0) { ++ break; ++ } ++ ++ copied += res; ++ ++ if (res < this_len) { ++ break; ++ } ++ ++ dst_off += res; ++ src_off += res; ++ len -= res; ++ } ++ ++ return copied; + } + + static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, +- const struct fuse_buf *src, size_t src_off, +- size_t len, enum fuse_buf_copy_flags flags) ++ const struct fuse_buf *src, size_t src_off, ++ size_t len, enum fuse_buf_copy_flags flags) + { +- int src_is_fd = src->flags & FUSE_BUF_IS_FD; +- int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; +- +- if (!src_is_fd && !dst_is_fd) { +- char *dstmem = (char *)dst->mem + dst_off; +- char *srcmem = (char *)src->mem + src_off; +- +- if (dstmem != srcmem) { +- if (dstmem + len <= srcmem || srcmem + len <= dstmem) +- memcpy(dstmem, srcmem, len); +- else +- memmove(dstmem, srcmem, len); +- } +- +- return len; +- } else if (!src_is_fd) { +- return fuse_buf_write(dst, dst_off, src, src_off, len); +- } else if (!dst_is_fd) { +- return fuse_buf_read(dst, dst_off, src, src_off, len); +- } else { +- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); +- } ++ int src_is_fd = src->flags & FUSE_BUF_IS_FD; ++ int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; ++ ++ if (!src_is_fd && !dst_is_fd) { ++ char *dstmem = (char *)dst->mem + dst_off; ++ char *srcmem = (char *)src->mem + src_off; ++ ++ if (dstmem != srcmem) { ++ if (dstmem + len <= srcmem || srcmem + len <= dstmem) { ++ memcpy(dstmem, srcmem, len); ++ } else { ++ memmove(dstmem, srcmem, len); ++ } ++ } ++ ++ return len; ++ } else if (!src_is_fd) { ++ return fuse_buf_write(dst, dst_off, src, src_off, len); ++ } else if (!dst_is_fd) { ++ return fuse_buf_read(dst, dst_off, src, src_off, len); ++ } else { ++ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); ++ } + } + + static const struct fuse_buf *fuse_bufvec_current(struct fuse_bufvec *bufv) + { +- if (bufv->idx < bufv->count) +- return &bufv->buf[bufv->idx]; +- else +- return NULL; ++ if (bufv->idx < bufv->count) { ++ return &bufv->buf[bufv->idx]; ++ } else { ++ return NULL; ++ } + } + + static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) + { +- const struct fuse_buf *buf = fuse_bufvec_current(bufv); +- +- bufv->off += len; +- assert(bufv->off <= buf->size); +- if (bufv->off == buf->size) { +- assert(bufv->idx < bufv->count); +- bufv->idx++; +- if (bufv->idx == bufv->count) +- return 0; +- bufv->off = 0; +- } +- return 1; ++ const struct fuse_buf *buf = fuse_bufvec_current(bufv); ++ ++ bufv->off += len; ++ assert(bufv->off <= buf->size); ++ if (bufv->off == buf->size) { ++ assert(bufv->idx < bufv->count); ++ bufv->idx++; ++ if (bufv->idx == bufv->count) { ++ return 0; ++ } ++ bufv->off = 0; ++ } ++ return 1; + } + + ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, +- enum fuse_buf_copy_flags flags) ++ enum fuse_buf_copy_flags flags) + { +- size_t copied = 0; +- +- if (dstv == srcv) +- return fuse_buf_size(dstv); +- +- for (;;) { +- const struct fuse_buf *src = fuse_bufvec_current(srcv); +- const struct fuse_buf *dst = fuse_bufvec_current(dstv); +- size_t src_len; +- size_t dst_len; +- size_t len; +- ssize_t res; +- +- if (src == NULL || dst == NULL) +- break; +- +- src_len = src->size - srcv->off; +- dst_len = dst->size - dstv->off; +- len = min_size(src_len, dst_len); +- +- res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); +- if (res < 0) { +- if (!copied) +- return res; +- break; +- } +- copied += res; +- +- if (!fuse_bufvec_advance(srcv, res) || +- !fuse_bufvec_advance(dstv, res)) +- break; +- +- if (res < len) +- break; +- } +- +- return copied; ++ size_t copied = 0; ++ ++ if (dstv == srcv) { ++ return fuse_buf_size(dstv); ++ } ++ ++ for (;;) { ++ const struct fuse_buf *src = fuse_bufvec_current(srcv); ++ const struct fuse_buf *dst = fuse_bufvec_current(dstv); ++ size_t src_len; ++ size_t dst_len; ++ size_t len; ++ ssize_t res; ++ ++ if (src == NULL || dst == NULL) { ++ break; ++ } ++ ++ src_len = src->size - srcv->off; ++ dst_len = dst->size - dstv->off; ++ len = min_size(src_len, dst_len); ++ ++ res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); ++ if (res < 0) { ++ if (!copied) { ++ return res; ++ } ++ break; ++ } ++ copied += res; ++ ++ if (!fuse_bufvec_advance(srcv, res) || ++ !fuse_bufvec_advance(dstv, res)) { ++ break; ++ } ++ ++ if (res < len) { ++ break; ++ } ++ } ++ ++ return copied; + } +diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h +index 3202fba..7a4c713 100644 +--- a/tools/virtiofsd/fuse.h ++++ b/tools/virtiofsd/fuse.h +@@ -1,15 +1,15 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB. +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB. ++ */ + + #ifndef FUSE_H_ + #define FUSE_H_ + +-/** @file ++/* + * + * This file defines the library interface of FUSE + * +@@ -19,15 +19,15 @@ + #include "fuse_common.h" + + #include +-#include +-#include + #include + #include ++#include + #include ++#include + +-/* ----------------------------------------------------------- * +- * Basic FUSE API * +- * ----------------------------------------------------------- */ ++/* ++ * Basic FUSE API ++ */ + + /** Handle for a FUSE filesystem */ + struct fuse; +@@ -36,38 +36,39 @@ struct fuse; + * Readdir flags, passed to ->readdir() + */ + enum fuse_readdir_flags { +- /** +- * "Plus" mode. +- * +- * The kernel wants to prefill the inode cache during readdir. The +- * filesystem may honour this by filling in the attributes and setting +- * FUSE_FILL_DIR_FLAGS for the filler function. The filesystem may also +- * just ignore this flag completely. +- */ +- FUSE_READDIR_PLUS = (1 << 0), ++ /** ++ * "Plus" mode. ++ * ++ * The kernel wants to prefill the inode cache during readdir. The ++ * filesystem may honour this by filling in the attributes and setting ++ * FUSE_FILL_DIR_FLAGS for the filler function. The filesystem may also ++ * just ignore this flag completely. ++ */ ++ FUSE_READDIR_PLUS = (1 << 0), + }; + + enum fuse_fill_dir_flags { +- /** +- * "Plus" mode: all file attributes are valid +- * +- * The attributes are used by the kernel to prefill the inode cache +- * during a readdir. +- * +- * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set +- * and vice versa. +- */ +- FUSE_FILL_DIR_PLUS = (1 << 1), ++ /** ++ * "Plus" mode: all file attributes are valid ++ * ++ * The attributes are used by the kernel to prefill the inode cache ++ * during a readdir. ++ * ++ * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set ++ * and vice versa. ++ */ ++ FUSE_FILL_DIR_PLUS = (1 << 1), + }; + +-/** Function to add an entry in a readdir() operation ++/** ++ * Function to add an entry in a readdir() operation + * + * The *off* parameter can be any non-zero value that enables the + * filesystem to identify the current point in the directory + * stream. It does not need to be the actual physical position. A + * value of zero is reserved to indicate that seeking in directories + * is not supported. +- * ++ * + * @param buf the buffer passed to the readdir() operation + * @param name the file name of the directory entry + * @param stat file attributes, can be NULL +@@ -75,9 +76,9 @@ enum fuse_fill_dir_flags { + * @param flags fill flags + * @return 1 if buffer is full, zero otherwise + */ +-typedef int (*fuse_fill_dir_t) (void *buf, const char *name, +- const struct stat *stbuf, off_t off, +- enum fuse_fill_dir_flags flags); ++typedef int (*fuse_fill_dir_t)(void *buf, const char *name, ++ const struct stat *stbuf, off_t off, ++ enum fuse_fill_dir_flags flags); + /** + * Configuration of the high-level API + * +@@ -87,186 +88,186 @@ typedef int (*fuse_fill_dir_t) (void *buf, const char *name, + * file system implementation. + */ + struct fuse_config { +- /** +- * If `set_gid` is non-zero, the st_gid attribute of each file +- * is overwritten with the value of `gid`. +- */ +- int set_gid; +- unsigned int gid; +- +- /** +- * If `set_uid` is non-zero, the st_uid attribute of each file +- * is overwritten with the value of `uid`. +- */ +- int set_uid; +- unsigned int uid; +- +- /** +- * If `set_mode` is non-zero, the any permissions bits set in +- * `umask` are unset in the st_mode attribute of each file. +- */ +- int set_mode; +- unsigned int umask; +- +- /** +- * The timeout in seconds for which name lookups will be +- * cached. +- */ +- double entry_timeout; +- +- /** +- * The timeout in seconds for which a negative lookup will be +- * cached. This means, that if file did not exist (lookup +- * retuned ENOENT), the lookup will only be redone after the +- * timeout, and the file/directory will be assumed to not +- * exist until then. A value of zero means that negative +- * lookups are not cached. +- */ +- double negative_timeout; +- +- /** +- * The timeout in seconds for which file/directory attributes +- * (as returned by e.g. the `getattr` handler) are cached. +- */ +- double attr_timeout; +- +- /** +- * Allow requests to be interrupted +- */ +- int intr; +- +- /** +- * Specify which signal number to send to the filesystem when +- * a request is interrupted. The default is hardcoded to +- * USR1. +- */ +- int intr_signal; +- +- /** +- * Normally, FUSE assigns inodes to paths only for as long as +- * the kernel is aware of them. With this option inodes are +- * instead remembered for at least this many seconds. This +- * will require more memory, but may be necessary when using +- * applications that make use of inode numbers. +- * +- * A number of -1 means that inodes will be remembered for the +- * entire life-time of the file-system process. +- */ +- int remember; +- +- /** +- * The default behavior is that if an open file is deleted, +- * the file is renamed to a hidden file (.fuse_hiddenXXX), and +- * only removed when the file is finally released. This +- * relieves the filesystem implementation of having to deal +- * with this problem. This option disables the hiding +- * behavior, and files are removed immediately in an unlink +- * operation (or in a rename operation which overwrites an +- * existing file). +- * +- * It is recommended that you not use the hard_remove +- * option. When hard_remove is set, the following libc +- * functions fail on unlinked files (returning errno of +- * ENOENT): read(2), write(2), fsync(2), close(2), f*xattr(2), +- * ftruncate(2), fstat(2), fchmod(2), fchown(2) +- */ +- int hard_remove; +- +- /** +- * Honor the st_ino field in the functions getattr() and +- * fill_dir(). This value is used to fill in the st_ino field +- * in the stat(2), lstat(2), fstat(2) functions and the d_ino +- * field in the readdir(2) function. The filesystem does not +- * have to guarantee uniqueness, however some applications +- * rely on this value being unique for the whole filesystem. +- * +- * Note that this does *not* affect the inode that libfuse +- * and the kernel use internally (also called the "nodeid"). +- */ +- int use_ino; +- +- /** +- * If use_ino option is not given, still try to fill in the +- * d_ino field in readdir(2). If the name was previously +- * looked up, and is still in the cache, the inode number +- * found there will be used. Otherwise it will be set to -1. +- * If use_ino option is given, this option is ignored. +- */ +- int readdir_ino; +- +- /** +- * This option disables the use of page cache (file content cache) +- * in the kernel for this filesystem. This has several affects: +- * +- * 1. Each read(2) or write(2) system call will initiate one +- * or more read or write operations, data will not be +- * cached in the kernel. +- * +- * 2. The return value of the read() and write() system calls +- * will correspond to the return values of the read and +- * write operations. This is useful for example if the +- * file size is not known in advance (before reading it). +- * +- * Internally, enabling this option causes fuse to set the +- * `direct_io` field of `struct fuse_file_info` - overwriting +- * any value that was put there by the file system. +- */ +- int direct_io; +- +- /** +- * This option disables flushing the cache of the file +- * contents on every open(2). This should only be enabled on +- * filesystems where the file data is never changed +- * externally (not through the mounted FUSE filesystem). Thus +- * it is not suitable for network filesystems and other +- * intermediate filesystems. +- * +- * NOTE: if this option is not specified (and neither +- * direct_io) data is still cached after the open(2), so a +- * read(2) system call will not always initiate a read +- * operation. +- * +- * Internally, enabling this option causes fuse to set the +- * `keep_cache` field of `struct fuse_file_info` - overwriting +- * any value that was put there by the file system. +- */ +- int kernel_cache; +- +- /** +- * This option is an alternative to `kernel_cache`. Instead of +- * unconditionally keeping cached data, the cached data is +- * invalidated on open(2) if if the modification time or the +- * size of the file has changed since it was last opened. +- */ +- int auto_cache; +- +- /** +- * The timeout in seconds for which file attributes are cached +- * for the purpose of checking if auto_cache should flush the +- * file data on open. +- */ +- int ac_attr_timeout_set; +- double ac_attr_timeout; +- +- /** +- * If this option is given the file-system handlers for the +- * following operations will not receive path information: +- * read, write, flush, release, fsync, readdir, releasedir, +- * fsyncdir, lock, ioctl and poll. +- * +- * For the truncate, getattr, chmod, chown and utimens +- * operations the path will be provided only if the struct +- * fuse_file_info argument is NULL. +- */ +- int nullpath_ok; +- +- /** +- * The remaining options are used by libfuse internally and +- * should not be touched. +- */ +- int show_help; +- char *modules; +- int debug; ++ /** ++ * If `set_gid` is non-zero, the st_gid attribute of each file ++ * is overwritten with the value of `gid`. ++ */ ++ int set_gid; ++ unsigned int gid; ++ ++ /** ++ * If `set_uid` is non-zero, the st_uid attribute of each file ++ * is overwritten with the value of `uid`. ++ */ ++ int set_uid; ++ unsigned int uid; ++ ++ /** ++ * If `set_mode` is non-zero, the any permissions bits set in ++ * `umask` are unset in the st_mode attribute of each file. ++ */ ++ int set_mode; ++ unsigned int umask; ++ ++ /** ++ * The timeout in seconds for which name lookups will be ++ * cached. ++ */ ++ double entry_timeout; ++ ++ /** ++ * The timeout in seconds for which a negative lookup will be ++ * cached. This means, that if file did not exist (lookup ++ * retuned ENOENT), the lookup will only be redone after the ++ * timeout, and the file/directory will be assumed to not ++ * exist until then. A value of zero means that negative ++ * lookups are not cached. ++ */ ++ double negative_timeout; ++ ++ /** ++ * The timeout in seconds for which file/directory attributes ++ * (as returned by e.g. the `getattr` handler) are cached. ++ */ ++ double attr_timeout; ++ ++ /** ++ * Allow requests to be interrupted ++ */ ++ int intr; ++ ++ /** ++ * Specify which signal number to send to the filesystem when ++ * a request is interrupted. The default is hardcoded to ++ * USR1. ++ */ ++ int intr_signal; ++ ++ /** ++ * Normally, FUSE assigns inodes to paths only for as long as ++ * the kernel is aware of them. With this option inodes are ++ * instead remembered for at least this many seconds. This ++ * will require more memory, but may be necessary when using ++ * applications that make use of inode numbers. ++ * ++ * A number of -1 means that inodes will be remembered for the ++ * entire life-time of the file-system process. ++ */ ++ int remember; ++ ++ /** ++ * The default behavior is that if an open file is deleted, ++ * the file is renamed to a hidden file (.fuse_hiddenXXX), and ++ * only removed when the file is finally released. This ++ * relieves the filesystem implementation of having to deal ++ * with this problem. This option disables the hiding ++ * behavior, and files are removed immediately in an unlink ++ * operation (or in a rename operation which overwrites an ++ * existing file). ++ * ++ * It is recommended that you not use the hard_remove ++ * option. When hard_remove is set, the following libc ++ * functions fail on unlinked files (returning errno of ++ * ENOENT): read(2), write(2), fsync(2), close(2), f*xattr(2), ++ * ftruncate(2), fstat(2), fchmod(2), fchown(2) ++ */ ++ int hard_remove; ++ ++ /** ++ * Honor the st_ino field in the functions getattr() and ++ * fill_dir(). This value is used to fill in the st_ino field ++ * in the stat(2), lstat(2), fstat(2) functions and the d_ino ++ * field in the readdir(2) function. The filesystem does not ++ * have to guarantee uniqueness, however some applications ++ * rely on this value being unique for the whole filesystem. ++ * ++ * Note that this does *not* affect the inode that libfuse ++ * and the kernel use internally (also called the "nodeid"). ++ */ ++ int use_ino; ++ ++ /** ++ * If use_ino option is not given, still try to fill in the ++ * d_ino field in readdir(2). If the name was previously ++ * looked up, and is still in the cache, the inode number ++ * found there will be used. Otherwise it will be set to -1. ++ * If use_ino option is given, this option is ignored. ++ */ ++ int readdir_ino; ++ ++ /** ++ * This option disables the use of page cache (file content cache) ++ * in the kernel for this filesystem. This has several affects: ++ * ++ * 1. Each read(2) or write(2) system call will initiate one ++ * or more read or write operations, data will not be ++ * cached in the kernel. ++ * ++ * 2. The return value of the read() and write() system calls ++ * will correspond to the return values of the read and ++ * write operations. This is useful for example if the ++ * file size is not known in advance (before reading it). ++ * ++ * Internally, enabling this option causes fuse to set the ++ * `direct_io` field of `struct fuse_file_info` - overwriting ++ * any value that was put there by the file system. ++ */ ++ int direct_io; ++ ++ /** ++ * This option disables flushing the cache of the file ++ * contents on every open(2). This should only be enabled on ++ * filesystems where the file data is never changed ++ * externally (not through the mounted FUSE filesystem). Thus ++ * it is not suitable for network filesystems and other ++ * intermediate filesystems. ++ * ++ * NOTE: if this option is not specified (and neither ++ * direct_io) data is still cached after the open(2), so a ++ * read(2) system call will not always initiate a read ++ * operation. ++ * ++ * Internally, enabling this option causes fuse to set the ++ * `keep_cache` field of `struct fuse_file_info` - overwriting ++ * any value that was put there by the file system. ++ */ ++ int kernel_cache; ++ ++ /** ++ * This option is an alternative to `kernel_cache`. Instead of ++ * unconditionally keeping cached data, the cached data is ++ * invalidated on open(2) if if the modification time or the ++ * size of the file has changed since it was last opened. ++ */ ++ int auto_cache; ++ ++ /** ++ * The timeout in seconds for which file attributes are cached ++ * for the purpose of checking if auto_cache should flush the ++ * file data on open. ++ */ ++ int ac_attr_timeout_set; ++ double ac_attr_timeout; ++ ++ /** ++ * If this option is given the file-system handlers for the ++ * following operations will not receive path information: ++ * read, write, flush, release, fsync, readdir, releasedir, ++ * fsyncdir, lock, ioctl and poll. ++ * ++ * For the truncate, getattr, chmod, chown and utimens ++ * operations the path will be provided only if the struct ++ * fuse_file_info argument is NULL. ++ */ ++ int nullpath_ok; ++ ++ /** ++ * The remaining options are used by libfuse internally and ++ * should not be touched. ++ */ ++ int show_help; ++ char *modules; ++ int debug; + }; + + +@@ -293,515 +294,535 @@ struct fuse_config { + * Almost all operations take a path which can be of any length. + */ + struct fuse_operations { +- /** Get file attributes. +- * +- * Similar to stat(). The 'st_dev' and 'st_blksize' fields are +- * ignored. The 'st_ino' field is ignored except if the 'use_ino' +- * mount option is given. In that case it is passed to userspace, +- * but libfuse and the kernel will still assign a different +- * inode for internal use (called the "nodeid"). +- * +- * `fi` will always be NULL if the file is not currently open, but +- * may also be NULL if the file is open. +- */ +- int (*getattr) (const char *, struct stat *, struct fuse_file_info *fi); +- +- /** Read the target of a symbolic link +- * +- * The buffer should be filled with a null terminated string. The +- * buffer size argument includes the space for the terminating +- * null character. If the linkname is too long to fit in the +- * buffer, it should be truncated. The return value should be 0 +- * for success. +- */ +- int (*readlink) (const char *, char *, size_t); +- +- /** Create a file node +- * +- * This is called for creation of all non-directory, non-symlink +- * nodes. If the filesystem defines a create() method, then for +- * regular files that will be called instead. +- */ +- int (*mknod) (const char *, mode_t, dev_t); +- +- /** Create a directory +- * +- * Note that the mode argument may not have the type specification +- * bits set, i.e. S_ISDIR(mode) can be false. To obtain the +- * correct directory type bits use mode|S_IFDIR +- * */ +- int (*mkdir) (const char *, mode_t); +- +- /** Remove a file */ +- int (*unlink) (const char *); +- +- /** Remove a directory */ +- int (*rmdir) (const char *); +- +- /** Create a symbolic link */ +- int (*symlink) (const char *, const char *); +- +- /** Rename a file +- * +- * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If +- * RENAME_NOREPLACE is specified, the filesystem must not +- * overwrite *newname* if it exists and return an error +- * instead. If `RENAME_EXCHANGE` is specified, the filesystem +- * must atomically exchange the two files, i.e. both must +- * exist and neither may be deleted. +- */ +- int (*rename) (const char *, const char *, unsigned int flags); +- +- /** Create a hard link to a file */ +- int (*link) (const char *, const char *); +- +- /** Change the permission bits of a file +- * +- * `fi` will always be NULL if the file is not currenlty open, but +- * may also be NULL if the file is open. +- */ +- int (*chmod) (const char *, mode_t, struct fuse_file_info *fi); +- +- /** Change the owner and group of a file +- * +- * `fi` will always be NULL if the file is not currenlty open, but +- * may also be NULL if the file is open. +- * +- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is +- * expected to reset the setuid and setgid bits. +- */ +- int (*chown) (const char *, uid_t, gid_t, struct fuse_file_info *fi); +- +- /** Change the size of a file +- * +- * `fi` will always be NULL if the file is not currenlty open, but +- * may also be NULL if the file is open. +- * +- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is +- * expected to reset the setuid and setgid bits. +- */ +- int (*truncate) (const char *, off_t, struct fuse_file_info *fi); +- +- /** Open a file +- * +- * Open flags are available in fi->flags. The following rules +- * apply. +- * +- * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be +- * filtered out / handled by the kernel. +- * +- * - Access modes (O_RDONLY, O_WRONLY, O_RDWR, O_EXEC, O_SEARCH) +- * should be used by the filesystem to check if the operation is +- * permitted. If the ``-o default_permissions`` mount option is +- * given, this check is already done by the kernel before calling +- * open() and may thus be omitted by the filesystem. +- * +- * - When writeback caching is enabled, the kernel may send +- * read requests even for files opened with O_WRONLY. The +- * filesystem should be prepared to handle this. +- * +- * - When writeback caching is disabled, the filesystem is +- * expected to properly handle the O_APPEND flag and ensure +- * that each write is appending to the end of the file. +- * +- * - When writeback caching is enabled, the kernel will +- * handle O_APPEND. However, unless all changes to the file +- * come through the kernel this will not work reliably. The +- * filesystem should thus either ignore the O_APPEND flag +- * (and let the kernel handle it), or return an error +- * (indicating that reliably O_APPEND is not available). +- * +- * Filesystem may store an arbitrary file handle (pointer, +- * index, etc) in fi->fh, and use this in other all other file +- * operations (read, write, flush, release, fsync). +- * +- * Filesystem may also implement stateless file I/O and not store +- * anything in fi->fh. +- * +- * There are also some flags (direct_io, keep_cache) which the +- * filesystem may set in fi, to change the way the file is opened. +- * See fuse_file_info structure in for more details. +- * +- * If this request is answered with an error code of ENOSYS +- * and FUSE_CAP_NO_OPEN_SUPPORT is set in +- * `fuse_conn_info.capable`, this is treated as success and +- * future calls to open will also succeed without being send +- * to the filesystem process. +- * +- */ +- int (*open) (const char *, struct fuse_file_info *); +- +- /** Read data from an open file +- * +- * Read should return exactly the number of bytes requested except +- * on EOF or error, otherwise the rest of the data will be +- * substituted with zeroes. An exception to this is when the +- * 'direct_io' mount option is specified, in which case the return +- * value of the read system call will reflect the return value of +- * this operation. +- */ +- int (*read) (const char *, char *, size_t, off_t, +- struct fuse_file_info *); +- +- /** Write data to an open file +- * +- * Write should return exactly the number of bytes requested +- * except on error. An exception to this is when the 'direct_io' +- * mount option is specified (see read operation). +- * +- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is +- * expected to reset the setuid and setgid bits. +- */ +- int (*write) (const char *, const char *, size_t, off_t, +- struct fuse_file_info *); +- +- /** Get file system statistics +- * +- * The 'f_favail', 'f_fsid' and 'f_flag' fields are ignored +- */ +- int (*statfs) (const char *, struct statvfs *); +- +- /** Possibly flush cached data +- * +- * BIG NOTE: This is not equivalent to fsync(). It's not a +- * request to sync dirty data. +- * +- * Flush is called on each close() of a file descriptor, as opposed to +- * release which is called on the close of the last file descriptor for +- * a file. Under Linux, errors returned by flush() will be passed to +- * userspace as errors from close(), so flush() is a good place to write +- * back any cached dirty data. However, many applications ignore errors +- * on close(), and on non-Linux systems, close() may succeed even if flush() +- * returns an error. For these reasons, filesystems should not assume +- * that errors returned by flush will ever be noticed or even +- * delivered. +- * +- * NOTE: The flush() method may be called more than once for each +- * open(). This happens if more than one file descriptor refers to an +- * open file handle, e.g. due to dup(), dup2() or fork() calls. It is +- * not possible to determine if a flush is final, so each flush should +- * be treated equally. Multiple write-flush sequences are relatively +- * rare, so this shouldn't be a problem. +- * +- * Filesystems shouldn't assume that flush will be called at any +- * particular point. It may be called more times than expected, or not +- * at all. +- * +- * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html +- */ +- int (*flush) (const char *, struct fuse_file_info *); +- +- /** Release an open file +- * +- * Release is called when there are no more references to an open +- * file: all file descriptors are closed and all memory mappings +- * are unmapped. +- * +- * For every open() call there will be exactly one release() call +- * with the same flags and file handle. It is possible to +- * have a file opened more than once, in which case only the last +- * release will mean, that no more reads/writes will happen on the +- * file. The return value of release is ignored. +- */ +- int (*release) (const char *, struct fuse_file_info *); +- +- /** Synchronize file contents +- * +- * If the datasync parameter is non-zero, then only the user data +- * should be flushed, not the meta data. +- */ +- int (*fsync) (const char *, int, struct fuse_file_info *); +- +- /** Set extended attributes */ +- int (*setxattr) (const char *, const char *, const char *, size_t, int); +- +- /** Get extended attributes */ +- int (*getxattr) (const char *, const char *, char *, size_t); +- +- /** List extended attributes */ +- int (*listxattr) (const char *, char *, size_t); +- +- /** Remove extended attributes */ +- int (*removexattr) (const char *, const char *); +- +- /** Open directory +- * +- * Unless the 'default_permissions' mount option is given, +- * this method should check if opendir is permitted for this +- * directory. Optionally opendir may also return an arbitrary +- * filehandle in the fuse_file_info structure, which will be +- * passed to readdir, releasedir and fsyncdir. +- */ +- int (*opendir) (const char *, struct fuse_file_info *); +- +- /** Read directory +- * +- * The filesystem may choose between two modes of operation: +- * +- * 1) The readdir implementation ignores the offset parameter, and +- * passes zero to the filler function's offset. The filler +- * function will not return '1' (unless an error happens), so the +- * whole directory is read in a single readdir operation. +- * +- * 2) The readdir implementation keeps track of the offsets of the +- * directory entries. It uses the offset parameter and always +- * passes non-zero offset to the filler function. When the buffer +- * is full (or an error happens) the filler function will return +- * '1'. +- */ +- int (*readdir) (const char *, void *, fuse_fill_dir_t, off_t, +- struct fuse_file_info *, enum fuse_readdir_flags); +- +- /** Release directory +- */ +- int (*releasedir) (const char *, struct fuse_file_info *); +- +- /** Synchronize directory contents +- * +- * If the datasync parameter is non-zero, then only the user data +- * should be flushed, not the meta data +- */ +- int (*fsyncdir) (const char *, int, struct fuse_file_info *); +- +- /** +- * Initialize filesystem +- * +- * The return value will passed in the `private_data` field of +- * `struct fuse_context` to all file operations, and as a +- * parameter to the destroy() method. It overrides the initial +- * value provided to fuse_main() / fuse_new(). +- */ +- void *(*init) (struct fuse_conn_info *conn, +- struct fuse_config *cfg); +- +- /** +- * Clean up filesystem +- * +- * Called on filesystem exit. +- */ +- void (*destroy) (void *private_data); +- +- /** +- * Check file access permissions +- * +- * This will be called for the access() system call. If the +- * 'default_permissions' mount option is given, this method is not +- * called. +- * +- * This method is not called under Linux kernel versions 2.4.x +- */ +- int (*access) (const char *, int); +- +- /** +- * Create and open a file +- * +- * If the file does not exist, first create it with the specified +- * mode, and then open it. +- * +- * If this method is not implemented or under Linux kernel +- * versions earlier than 2.6.15, the mknod() and open() methods +- * will be called instead. +- */ +- int (*create) (const char *, mode_t, struct fuse_file_info *); +- +- /** +- * Perform POSIX file locking operation +- * +- * The cmd argument will be either F_GETLK, F_SETLK or F_SETLKW. +- * +- * For the meaning of fields in 'struct flock' see the man page +- * for fcntl(2). The l_whence field will always be set to +- * SEEK_SET. +- * +- * For checking lock ownership, the 'fuse_file_info->owner' +- * argument must be used. +- * +- * For F_GETLK operation, the library will first check currently +- * held locks, and if a conflicting lock is found it will return +- * information without calling this method. This ensures, that +- * for local locks the l_pid field is correctly filled in. The +- * results may not be accurate in case of race conditions and in +- * the presence of hard links, but it's unlikely that an +- * application would rely on accurate GETLK results in these +- * cases. If a conflicting lock is not found, this method will be +- * called, and the filesystem may fill out l_pid by a meaningful +- * value, or it may leave this field zero. +- * +- * For F_SETLK and F_SETLKW the l_pid field will be set to the pid +- * of the process performing the locking operation. +- * +- * Note: if this method is not implemented, the kernel will still +- * allow file locking to work locally. Hence it is only +- * interesting for network filesystems and similar. +- */ +- int (*lock) (const char *, struct fuse_file_info *, int cmd, +- struct flock *); +- +- /** +- * Change the access and modification times of a file with +- * nanosecond resolution +- * +- * This supersedes the old utime() interface. New applications +- * should use this. +- * +- * `fi` will always be NULL if the file is not currenlty open, but +- * may also be NULL if the file is open. +- * +- * See the utimensat(2) man page for details. +- */ +- int (*utimens) (const char *, const struct timespec tv[2], +- struct fuse_file_info *fi); +- +- /** +- * Map block index within file to block index within device +- * +- * Note: This makes sense only for block device backed filesystems +- * mounted with the 'blkdev' option +- */ +- int (*bmap) (const char *, size_t blocksize, uint64_t *idx); +- +- /** +- * Ioctl +- * +- * flags will have FUSE_IOCTL_COMPAT set for 32bit ioctls in +- * 64bit environment. The size and direction of data is +- * determined by _IOC_*() decoding of cmd. For _IOC_NONE, +- * data will be NULL, for _IOC_WRITE data is out area, for +- * _IOC_READ in area and if both are set in/out area. In all +- * non-NULL cases, the area is of _IOC_SIZE(cmd) bytes. +- * +- * If flags has FUSE_IOCTL_DIR then the fuse_file_info refers to a +- * directory file handle. +- * +- * Note : the unsigned long request submitted by the application +- * is truncated to 32 bits. +- */ +- int (*ioctl) (const char *, unsigned int cmd, void *arg, +- struct fuse_file_info *, unsigned int flags, void *data); +- +- /** +- * Poll for IO readiness events +- * +- * Note: If ph is non-NULL, the client should notify +- * when IO readiness events occur by calling +- * fuse_notify_poll() with the specified ph. +- * +- * Regardless of the number of times poll with a non-NULL ph +- * is received, single notification is enough to clear all. +- * Notifying more times incurs overhead but doesn't harm +- * correctness. +- * +- * The callee is responsible for destroying ph with +- * fuse_pollhandle_destroy() when no longer in use. +- */ +- int (*poll) (const char *, struct fuse_file_info *, +- struct fuse_pollhandle *ph, unsigned *reventsp); +- +- /** Write contents of buffer to an open file +- * +- * Similar to the write() method, but data is supplied in a +- * generic buffer. Use fuse_buf_copy() to transfer data to +- * the destination. +- * +- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is +- * expected to reset the setuid and setgid bits. +- */ +- int (*write_buf) (const char *, struct fuse_bufvec *buf, off_t off, +- struct fuse_file_info *); +- +- /** Store data from an open file in a buffer +- * +- * Similar to the read() method, but data is stored and +- * returned in a generic buffer. +- * +- * No actual copying of data has to take place, the source +- * file descriptor may simply be stored in the buffer for +- * later data transfer. +- * +- * The buffer must be allocated dynamically and stored at the +- * location pointed to by bufp. If the buffer contains memory +- * regions, they too must be allocated using malloc(). The +- * allocated memory will be freed by the caller. +- */ +- int (*read_buf) (const char *, struct fuse_bufvec **bufp, +- size_t size, off_t off, struct fuse_file_info *); +- /** +- * Perform BSD file locking operation +- * +- * The op argument will be either LOCK_SH, LOCK_EX or LOCK_UN +- * +- * Nonblocking requests will be indicated by ORing LOCK_NB to +- * the above operations +- * +- * For more information see the flock(2) manual page. +- * +- * Additionally fi->owner will be set to a value unique to +- * this open file. This same value will be supplied to +- * ->release() when the file is released. +- * +- * Note: if this method is not implemented, the kernel will still +- * allow file locking to work locally. Hence it is only +- * interesting for network filesystems and similar. +- */ +- int (*flock) (const char *, struct fuse_file_info *, int op); +- +- /** +- * Allocates space for an open file +- * +- * This function ensures that required space is allocated for specified +- * file. If this function returns success then any subsequent write +- * request to specified range is guaranteed not to fail because of lack +- * of space on the file system media. +- */ +- int (*fallocate) (const char *, int, off_t, off_t, +- struct fuse_file_info *); +- +- /** +- * Copy a range of data from one file to another +- * +- * Performs an optimized copy between two file descriptors without the +- * additional cost of transferring data through the FUSE kernel module +- * to user space (glibc) and then back into the FUSE filesystem again. +- * +- * In case this method is not implemented, glibc falls back to reading +- * data from the source and writing to the destination. Effectively +- * doing an inefficient copy of the data. +- */ +- ssize_t (*copy_file_range) (const char *path_in, +- struct fuse_file_info *fi_in, +- off_t offset_in, const char *path_out, +- struct fuse_file_info *fi_out, +- off_t offset_out, size_t size, int flags); +- +- /** +- * Find next data or hole after the specified offset +- */ +- off_t (*lseek) (const char *, off_t off, int whence, struct fuse_file_info *); ++ /** ++ * Get file attributes. ++ * ++ * Similar to stat(). The 'st_dev' and 'st_blksize' fields are ++ * ignored. The 'st_ino' field is ignored except if the 'use_ino' ++ * mount option is given. In that case it is passed to userspace, ++ * but libfuse and the kernel will still assign a different ++ * inode for internal use (called the "nodeid"). ++ * ++ * `fi` will always be NULL if the file is not currently open, but ++ * may also be NULL if the file is open. ++ */ ++ int (*getattr)(const char *, struct stat *, struct fuse_file_info *fi); ++ ++ /** ++ * Read the target of a symbolic link ++ * ++ * The buffer should be filled with a null terminated string. The ++ * buffer size argument includes the space for the terminating ++ * null character. If the linkname is too long to fit in the ++ * buffer, it should be truncated. The return value should be 0 ++ * for success. ++ */ ++ int (*readlink)(const char *, char *, size_t); ++ ++ /** ++ * Create a file node ++ * ++ * This is called for creation of all non-directory, non-symlink ++ * nodes. If the filesystem defines a create() method, then for ++ * regular files that will be called instead. ++ */ ++ int (*mknod)(const char *, mode_t, dev_t); ++ ++ /** ++ * Create a directory ++ * ++ * Note that the mode argument may not have the type specification ++ * bits set, i.e. S_ISDIR(mode) can be false. To obtain the ++ * correct directory type bits use mode|S_IFDIR ++ */ ++ int (*mkdir)(const char *, mode_t); ++ ++ /** Remove a file */ ++ int (*unlink)(const char *); ++ ++ /** Remove a directory */ ++ int (*rmdir)(const char *); ++ ++ /** Create a symbolic link */ ++ int (*symlink)(const char *, const char *); ++ ++ /** ++ * Rename a file ++ * ++ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If ++ * RENAME_NOREPLACE is specified, the filesystem must not ++ * overwrite *newname* if it exists and return an error ++ * instead. If `RENAME_EXCHANGE` is specified, the filesystem ++ * must atomically exchange the two files, i.e. both must ++ * exist and neither may be deleted. ++ */ ++ int (*rename)(const char *, const char *, unsigned int flags); ++ ++ /** Create a hard link to a file */ ++ int (*link)(const char *, const char *); ++ ++ /** ++ * Change the permission bits of a file ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ */ ++ int (*chmod)(const char *, mode_t, struct fuse_file_info *fi); ++ ++ /** ++ * Change the owner and group of a file ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*chown)(const char *, uid_t, gid_t, struct fuse_file_info *fi); ++ ++ /** ++ * Change the size of a file ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*truncate)(const char *, off_t, struct fuse_file_info *fi); ++ ++ /** ++ * Open a file ++ * ++ * Open flags are available in fi->flags. The following rules ++ * apply. ++ * ++ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be ++ * filtered out / handled by the kernel. ++ * ++ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR, O_EXEC, O_SEARCH) ++ * should be used by the filesystem to check if the operation is ++ * permitted. If the ``-o default_permissions`` mount option is ++ * given, this check is already done by the kernel before calling ++ * open() and may thus be omitted by the filesystem. ++ * ++ * - When writeback caching is enabled, the kernel may send ++ * read requests even for files opened with O_WRONLY. The ++ * filesystem should be prepared to handle this. ++ * ++ * - When writeback caching is disabled, the filesystem is ++ * expected to properly handle the O_APPEND flag and ensure ++ * that each write is appending to the end of the file. ++ * ++ * - When writeback caching is enabled, the kernel will ++ * handle O_APPEND. However, unless all changes to the file ++ * come through the kernel this will not work reliably. The ++ * filesystem should thus either ignore the O_APPEND flag ++ * (and let the kernel handle it), or return an error ++ * (indicating that reliably O_APPEND is not available). ++ * ++ * Filesystem may store an arbitrary file handle (pointer, ++ * index, etc) in fi->fh, and use this in other all other file ++ * operations (read, write, flush, release, fsync). ++ * ++ * Filesystem may also implement stateless file I/O and not store ++ * anything in fi->fh. ++ * ++ * There are also some flags (direct_io, keep_cache) which the ++ * filesystem may set in fi, to change the way the file is opened. ++ * See fuse_file_info structure in for more details. ++ * ++ * If this request is answered with an error code of ENOSYS ++ * and FUSE_CAP_NO_OPEN_SUPPORT is set in ++ * `fuse_conn_info.capable`, this is treated as success and ++ * future calls to open will also succeed without being send ++ * to the filesystem process. ++ * ++ */ ++ int (*open)(const char *, struct fuse_file_info *); ++ ++ /** ++ * Read data from an open file ++ * ++ * Read should return exactly the number of bytes requested except ++ * on EOF or error, otherwise the rest of the data will be ++ * substituted with zeroes. An exception to this is when the ++ * 'direct_io' mount option is specified, in which case the return ++ * value of the read system call will reflect the return value of ++ * this operation. ++ */ ++ int (*read)(const char *, char *, size_t, off_t, struct fuse_file_info *); ++ ++ /** ++ * Write data to an open file ++ * ++ * Write should return exactly the number of bytes requested ++ * except on error. An exception to this is when the 'direct_io' ++ * mount option is specified (see read operation). ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*write)(const char *, const char *, size_t, off_t, ++ struct fuse_file_info *); ++ ++ /** ++ * Get file system statistics ++ * ++ * The 'f_favail', 'f_fsid' and 'f_flag' fields are ignored ++ */ ++ int (*statfs)(const char *, struct statvfs *); ++ ++ /** ++ * Possibly flush cached data ++ * ++ * BIG NOTE: This is not equivalent to fsync(). It's not a ++ * request to sync dirty data. ++ * ++ * Flush is called on each close() of a file descriptor, as opposed to ++ * release which is called on the close of the last file descriptor for ++ * a file. Under Linux, errors returned by flush() will be passed to ++ * userspace as errors from close(), so flush() is a good place to write ++ * back any cached dirty data. However, many applications ignore errors ++ * on close(), and on non-Linux systems, close() may succeed even if flush() ++ * returns an error. For these reasons, filesystems should not assume ++ * that errors returned by flush will ever be noticed or even ++ * delivered. ++ * ++ * NOTE: The flush() method may be called more than once for each ++ * open(). This happens if more than one file descriptor refers to an ++ * open file handle, e.g. due to dup(), dup2() or fork() calls. It is ++ * not possible to determine if a flush is final, so each flush should ++ * be treated equally. Multiple write-flush sequences are relatively ++ * rare, so this shouldn't be a problem. ++ * ++ * Filesystems shouldn't assume that flush will be called at any ++ * particular point. It may be called more times than expected, or not ++ * at all. ++ * ++ * [close]: ++ * http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html ++ */ ++ int (*flush)(const char *, struct fuse_file_info *); ++ ++ /** ++ * Release an open file ++ * ++ * Release is called when there are no more references to an open ++ * file: all file descriptors are closed and all memory mappings ++ * are unmapped. ++ * ++ * For every open() call there will be exactly one release() call ++ * with the same flags and file handle. It is possible to ++ * have a file opened more than once, in which case only the last ++ * release will mean, that no more reads/writes will happen on the ++ * file. The return value of release is ignored. ++ */ ++ int (*release)(const char *, struct fuse_file_info *); ++ ++ /* ++ * Synchronize file contents ++ * ++ * If the datasync parameter is non-zero, then only the user data ++ * should be flushed, not the meta data. ++ */ ++ int (*fsync)(const char *, int, struct fuse_file_info *); ++ ++ /** Set extended attributes */ ++ int (*setxattr)(const char *, const char *, const char *, size_t, int); ++ ++ /** Get extended attributes */ ++ int (*getxattr)(const char *, const char *, char *, size_t); ++ ++ /** List extended attributes */ ++ int (*listxattr)(const char *, char *, size_t); ++ ++ /** Remove extended attributes */ ++ int (*removexattr)(const char *, const char *); ++ ++ /* ++ * Open directory ++ * ++ * Unless the 'default_permissions' mount option is given, ++ * this method should check if opendir is permitted for this ++ * directory. Optionally opendir may also return an arbitrary ++ * filehandle in the fuse_file_info structure, which will be ++ * passed to readdir, releasedir and fsyncdir. ++ */ ++ int (*opendir)(const char *, struct fuse_file_info *); ++ ++ /* ++ * Read directory ++ * ++ * The filesystem may choose between two modes of operation: ++ * ++ * 1) The readdir implementation ignores the offset parameter, and ++ * passes zero to the filler function's offset. The filler ++ * function will not return '1' (unless an error happens), so the ++ * whole directory is read in a single readdir operation. ++ * ++ * 2) The readdir implementation keeps track of the offsets of the ++ * directory entries. It uses the offset parameter and always ++ * passes non-zero offset to the filler function. When the buffer ++ * is full (or an error happens) the filler function will return ++ * '1'. ++ */ ++ int (*readdir)(const char *, void *, fuse_fill_dir_t, off_t, ++ struct fuse_file_info *, enum fuse_readdir_flags); ++ ++ /** ++ * Release directory ++ */ ++ int (*releasedir)(const char *, struct fuse_file_info *); ++ ++ /** ++ * Synchronize directory contents ++ * ++ * If the datasync parameter is non-zero, then only the user data ++ * should be flushed, not the meta data ++ */ ++ int (*fsyncdir)(const char *, int, struct fuse_file_info *); ++ ++ /** ++ * Initialize filesystem ++ * ++ * The return value will passed in the `private_data` field of ++ * `struct fuse_context` to all file operations, and as a ++ * parameter to the destroy() method. It overrides the initial ++ * value provided to fuse_main() / fuse_new(). ++ */ ++ void *(*init)(struct fuse_conn_info *conn, struct fuse_config *cfg); ++ ++ /** ++ * Clean up filesystem ++ * ++ * Called on filesystem exit. ++ */ ++ void (*destroy)(void *private_data); ++ ++ /** ++ * Check file access permissions ++ * ++ * This will be called for the access() system call. If the ++ * 'default_permissions' mount option is given, this method is not ++ * called. ++ * ++ * This method is not called under Linux kernel versions 2.4.x ++ */ ++ int (*access)(const char *, int); ++ ++ /** ++ * Create and open a file ++ * ++ * If the file does not exist, first create it with the specified ++ * mode, and then open it. ++ * ++ * If this method is not implemented or under Linux kernel ++ * versions earlier than 2.6.15, the mknod() and open() methods ++ * will be called instead. ++ */ ++ int (*create)(const char *, mode_t, struct fuse_file_info *); ++ ++ /** ++ * Perform POSIX file locking operation ++ * ++ * The cmd argument will be either F_GETLK, F_SETLK or F_SETLKW. ++ * ++ * For the meaning of fields in 'struct flock' see the man page ++ * for fcntl(2). The l_whence field will always be set to ++ * SEEK_SET. ++ * ++ * For checking lock ownership, the 'fuse_file_info->owner' ++ * argument must be used. ++ * ++ * For F_GETLK operation, the library will first check currently ++ * held locks, and if a conflicting lock is found it will return ++ * information without calling this method. This ensures, that ++ * for local locks the l_pid field is correctly filled in. The ++ * results may not be accurate in case of race conditions and in ++ * the presence of hard links, but it's unlikely that an ++ * application would rely on accurate GETLK results in these ++ * cases. If a conflicting lock is not found, this method will be ++ * called, and the filesystem may fill out l_pid by a meaningful ++ * value, or it may leave this field zero. ++ * ++ * For F_SETLK and F_SETLKW the l_pid field will be set to the pid ++ * of the process performing the locking operation. ++ * ++ * Note: if this method is not implemented, the kernel will still ++ * allow file locking to work locally. Hence it is only ++ * interesting for network filesystems and similar. ++ */ ++ int (*lock)(const char *, struct fuse_file_info *, int cmd, struct flock *); ++ ++ /** ++ * Change the access and modification times of a file with ++ * nanosecond resolution ++ * ++ * This supersedes the old utime() interface. New applications ++ * should use this. ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ * ++ * See the utimensat(2) man page for details. ++ */ ++ int (*utimens)(const char *, const struct timespec tv[2], ++ struct fuse_file_info *fi); ++ ++ /** ++ * Map block index within file to block index within device ++ * ++ * Note: This makes sense only for block device backed filesystems ++ * mounted with the 'blkdev' option ++ */ ++ int (*bmap)(const char *, size_t blocksize, uint64_t *idx); ++ ++ /** ++ * Ioctl ++ * ++ * flags will have FUSE_IOCTL_COMPAT set for 32bit ioctls in ++ * 64bit environment. The size and direction of data is ++ * determined by _IOC_*() decoding of cmd. For _IOC_NONE, ++ * data will be NULL, for _IOC_WRITE data is out area, for ++ * _IOC_READ in area and if both are set in/out area. In all ++ * non-NULL cases, the area is of _IOC_SIZE(cmd) bytes. ++ * ++ * If flags has FUSE_IOCTL_DIR then the fuse_file_info refers to a ++ * directory file handle. ++ * ++ * Note : the unsigned long request submitted by the application ++ * is truncated to 32 bits. ++ */ ++ int (*ioctl)(const char *, unsigned int cmd, void *arg, ++ struct fuse_file_info *, unsigned int flags, void *data); ++ ++ /** ++ * Poll for IO readiness events ++ * ++ * Note: If ph is non-NULL, the client should notify ++ * when IO readiness events occur by calling ++ * fuse_notify_poll() with the specified ph. ++ * ++ * Regardless of the number of times poll with a non-NULL ph ++ * is received, single notification is enough to clear all. ++ * Notifying more times incurs overhead but doesn't harm ++ * correctness. ++ * ++ * The callee is responsible for destroying ph with ++ * fuse_pollhandle_destroy() when no longer in use. ++ */ ++ int (*poll)(const char *, struct fuse_file_info *, ++ struct fuse_pollhandle *ph, unsigned *reventsp); ++ ++ /* ++ * Write contents of buffer to an open file ++ * ++ * Similar to the write() method, but data is supplied in a ++ * generic buffer. Use fuse_buf_copy() to transfer data to ++ * the destination. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*write_buf)(const char *, struct fuse_bufvec *buf, off_t off, ++ struct fuse_file_info *); ++ ++ /* ++ * Store data from an open file in a buffer ++ * ++ * Similar to the read() method, but data is stored and ++ * returned in a generic buffer. ++ * ++ * No actual copying of data has to take place, the source ++ * file descriptor may simply be stored in the buffer for ++ * later data transfer. ++ * ++ * The buffer must be allocated dynamically and stored at the ++ * location pointed to by bufp. If the buffer contains memory ++ * regions, they too must be allocated using malloc(). The ++ * allocated memory will be freed by the caller. ++ */ ++ int (*read_buf)(const char *, struct fuse_bufvec **bufp, size_t size, ++ off_t off, struct fuse_file_info *); ++ /** ++ * Perform BSD file locking operation ++ * ++ * The op argument will be either LOCK_SH, LOCK_EX or LOCK_UN ++ * ++ * Nonblocking requests will be indicated by ORing LOCK_NB to ++ * the above operations ++ * ++ * For more information see the flock(2) manual page. ++ * ++ * Additionally fi->owner will be set to a value unique to ++ * this open file. This same value will be supplied to ++ * ->release() when the file is released. ++ * ++ * Note: if this method is not implemented, the kernel will still ++ * allow file locking to work locally. Hence it is only ++ * interesting for network filesystems and similar. ++ */ ++ int (*flock)(const char *, struct fuse_file_info *, int op); ++ ++ /** ++ * Allocates space for an open file ++ * ++ * This function ensures that required space is allocated for specified ++ * file. If this function returns success then any subsequent write ++ * request to specified range is guaranteed not to fail because of lack ++ * of space on the file system media. ++ */ ++ int (*fallocate)(const char *, int, off_t, off_t, struct fuse_file_info *); ++ ++ /** ++ * Copy a range of data from one file to another ++ * ++ * Performs an optimized copy between two file descriptors without the ++ * additional cost of transferring data through the FUSE kernel module ++ * to user space (glibc) and then back into the FUSE filesystem again. ++ * ++ * In case this method is not implemented, glibc falls back to reading ++ * data from the source and writing to the destination. Effectively ++ * doing an inefficient copy of the data. ++ */ ++ ssize_t (*copy_file_range)(const char *path_in, ++ struct fuse_file_info *fi_in, off_t offset_in, ++ const char *path_out, ++ struct fuse_file_info *fi_out, off_t offset_out, ++ size_t size, int flags); ++ ++ /** ++ * Find next data or hole after the specified offset ++ */ ++ off_t (*lseek)(const char *, off_t off, int whence, ++ struct fuse_file_info *); + }; + +-/** Extra context that may be needed by some filesystems ++/* ++ * Extra context that may be needed by some filesystems + * + * The uid, gid and pid fields are not filled in case of a writepage + * operation. + */ + struct fuse_context { +- /** Pointer to the fuse object */ +- struct fuse *fuse; ++ /** Pointer to the fuse object */ ++ struct fuse *fuse; + +- /** User ID of the calling process */ +- uid_t uid; ++ /** User ID of the calling process */ ++ uid_t uid; + +- /** Group ID of the calling process */ +- gid_t gid; ++ /** Group ID of the calling process */ ++ gid_t gid; + +- /** Process ID of the calling thread */ +- pid_t pid; ++ /** Process ID of the calling thread */ ++ pid_t pid; + +- /** Private filesystem data */ +- void *private_data; ++ /** Private filesystem data */ ++ void *private_data; + +- /** Umask of the calling process */ +- mode_t umask; ++ /** Umask of the calling process */ ++ mode_t umask; + }; + + /** +@@ -859,15 +880,15 @@ struct fuse_context { + * Example usage, see hello.c + */ + /* +- int fuse_main(int argc, char *argv[], const struct fuse_operations *op, +- void *private_data); +-*/ +-#define fuse_main(argc, argv, op, private_data) \ +- fuse_main_real(argc, argv, op, sizeof(*(op)), private_data) ++ * int fuse_main(int argc, char *argv[], const struct fuse_operations *op, ++ * void *private_data); ++ */ ++#define fuse_main(argc, argv, op, private_data) \ ++ fuse_main_real(argc, argv, op, sizeof(*(op)), private_data) + +-/* ----------------------------------------------------------- * +- * More detailed API * +- * ----------------------------------------------------------- */ ++/* ++ * More detailed API ++ */ + + /** + * Print available options (high- and low-level) to stdout. This is +@@ -910,12 +931,13 @@ void fuse_lib_help(struct fuse_args *args); + * @return the created FUSE handle + */ + #if FUSE_USE_VERSION == 30 +-struct fuse *fuse_new_30(struct fuse_args *args, const struct fuse_operations *op, +- size_t op_size, void *private_data); ++struct fuse *fuse_new_30(struct fuse_args *args, ++ const struct fuse_operations *op, size_t op_size, ++ void *private_data); + #define fuse_new(args, op, size, data) fuse_new_30(args, op, size, data) + #else + struct fuse *fuse_new(struct fuse_args *args, const struct fuse_operations *op, +- size_t op_size, void *private_data); ++ size_t op_size, void *private_data); + #endif + + /** +@@ -940,7 +962,7 @@ void fuse_unmount(struct fuse *f); + /** + * Destroy the FUSE handle. + * +- * NOTE: This function does not unmount the filesystem. If this is ++ * NOTE: This function does not unmount the filesystem. If this is + * needed, call fuse_unmount() before calling this function. + * + * @param f the FUSE handle +@@ -1030,7 +1052,7 @@ int fuse_invalidate_path(struct fuse *f, const char *path); + * Do not call this directly, use fuse_main() + */ + int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, +- size_t op_size, void *private_data); ++ size_t op_size, void *private_data); + + /** + * Start the cleanup thread when using option "remember". +@@ -1081,89 +1103,87 @@ struct fuse_fs; + */ + + int fuse_fs_getattr(struct fuse_fs *fs, const char *path, struct stat *buf, +- struct fuse_file_info *fi); +-int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, +- const char *newpath, unsigned int flags); ++ struct fuse_file_info *fi); ++int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, const char *newpath, ++ unsigned int flags); + int fuse_fs_unlink(struct fuse_fs *fs, const char *path); + int fuse_fs_rmdir(struct fuse_fs *fs, const char *path); +-int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, +- const char *path); ++int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, const char *path); + int fuse_fs_link(struct fuse_fs *fs, const char *oldpath, const char *newpath); +-int fuse_fs_release(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi); ++int fuse_fs_release(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi); + int fuse_fs_open(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_read(struct fuse_fs *fs, const char *path, char *buf, size_t size, +- off_t off, struct fuse_file_info *fi); ++ off_t off, struct fuse_file_info *fi); + int fuse_fs_read_buf(struct fuse_fs *fs, const char *path, +- struct fuse_bufvec **bufp, size_t size, off_t off, +- struct fuse_file_info *fi); ++ struct fuse_bufvec **bufp, size_t size, off_t off, ++ struct fuse_file_info *fi); + int fuse_fs_write(struct fuse_fs *fs, const char *path, const char *buf, +- size_t size, off_t off, struct fuse_file_info *fi); ++ size_t size, off_t off, struct fuse_file_info *fi); + int fuse_fs_write_buf(struct fuse_fs *fs, const char *path, +- struct fuse_bufvec *buf, off_t off, +- struct fuse_file_info *fi); ++ struct fuse_bufvec *buf, off_t off, ++ struct fuse_file_info *fi); + int fuse_fs_fsync(struct fuse_fs *fs, const char *path, int datasync, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_flush(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_statfs(struct fuse_fs *fs, const char *path, struct statvfs *buf); + int fuse_fs_opendir(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_readdir(struct fuse_fs *fs, const char *path, void *buf, +- fuse_fill_dir_t filler, off_t off, +- struct fuse_file_info *fi, enum fuse_readdir_flags flags); ++ fuse_fill_dir_t filler, off_t off, ++ struct fuse_file_info *fi, enum fuse_readdir_flags flags); + int fuse_fs_fsyncdir(struct fuse_fs *fs, const char *path, int datasync, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_releasedir(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_create(struct fuse_fs *fs, const char *path, mode_t mode, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_lock(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi, int cmd, struct flock *lock); ++ struct fuse_file_info *fi, int cmd, struct flock *lock); + int fuse_fs_flock(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi, int op); ++ struct fuse_file_info *fi, int op); + int fuse_fs_chmod(struct fuse_fs *fs, const char *path, mode_t mode, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_chown(struct fuse_fs *fs, const char *path, uid_t uid, gid_t gid, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_truncate(struct fuse_fs *fs, const char *path, off_t size, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_utimens(struct fuse_fs *fs, const char *path, +- const struct timespec tv[2], struct fuse_file_info *fi); ++ const struct timespec tv[2], struct fuse_file_info *fi); + int fuse_fs_access(struct fuse_fs *fs, const char *path, int mask); + int fuse_fs_readlink(struct fuse_fs *fs, const char *path, char *buf, +- size_t len); ++ size_t len); + int fuse_fs_mknod(struct fuse_fs *fs, const char *path, mode_t mode, +- dev_t rdev); ++ dev_t rdev); + int fuse_fs_mkdir(struct fuse_fs *fs, const char *path, mode_t mode); + int fuse_fs_setxattr(struct fuse_fs *fs, const char *path, const char *name, +- const char *value, size_t size, int flags); ++ const char *value, size_t size, int flags); + int fuse_fs_getxattr(struct fuse_fs *fs, const char *path, const char *name, +- char *value, size_t size); ++ char *value, size_t size); + int fuse_fs_listxattr(struct fuse_fs *fs, const char *path, char *list, +- size_t size); +-int fuse_fs_removexattr(struct fuse_fs *fs, const char *path, +- const char *name); ++ size_t size); ++int fuse_fs_removexattr(struct fuse_fs *fs, const char *path, const char *name); + int fuse_fs_bmap(struct fuse_fs *fs, const char *path, size_t blocksize, +- uint64_t *idx); ++ uint64_t *idx); + int fuse_fs_ioctl(struct fuse_fs *fs, const char *path, unsigned int cmd, +- void *arg, struct fuse_file_info *fi, unsigned int flags, +- void *data); ++ void *arg, struct fuse_file_info *fi, unsigned int flags, ++ void *data); + int fuse_fs_poll(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi, struct fuse_pollhandle *ph, +- unsigned *reventsp); ++ struct fuse_file_info *fi, struct fuse_pollhandle *ph, ++ unsigned *reventsp); + int fuse_fs_fallocate(struct fuse_fs *fs, const char *path, int mode, +- off_t offset, off_t length, struct fuse_file_info *fi); ++ off_t offset, off_t length, struct fuse_file_info *fi); + ssize_t fuse_fs_copy_file_range(struct fuse_fs *fs, const char *path_in, +- struct fuse_file_info *fi_in, off_t off_in, +- const char *path_out, +- struct fuse_file_info *fi_out, off_t off_out, +- size_t len, int flags); ++ struct fuse_file_info *fi_in, off_t off_in, ++ const char *path_out, ++ struct fuse_file_info *fi_out, off_t off_out, ++ size_t len, int flags); + off_t fuse_fs_lseek(struct fuse_fs *fs, const char *path, off_t off, int whence, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + void fuse_fs_init(struct fuse_fs *fs, struct fuse_conn_info *conn, +- struct fuse_config *cfg); ++ struct fuse_config *cfg); + void fuse_fs_destroy(struct fuse_fs *fs); + + int fuse_notify_poll(struct fuse_pollhandle *ph); +@@ -1182,7 +1202,7 @@ int fuse_notify_poll(struct fuse_pollhandle *ph); + * @return a new filesystem object + */ + struct fuse_fs *fuse_fs_new(const struct fuse_operations *op, size_t op_size, +- void *private_data); ++ void *private_data); + + /** + * Factory for creating filesystem objects +@@ -1199,7 +1219,7 @@ struct fuse_fs *fuse_fs_new(const struct fuse_operations *op, size_t op_size, + * @return the new filesystem object + */ + typedef struct fuse_fs *(*fuse_module_factory_t)(struct fuse_args *args, +- struct fuse_fs *fs[]); ++ struct fuse_fs *fs[]); + /** + * Register filesystem module + * +@@ -1211,7 +1231,7 @@ typedef struct fuse_fs *(*fuse_module_factory_t)(struct fuse_args *args, + * @param factory_ the factory function for this filesystem module + */ + #define FUSE_REGISTER_MODULE(name_, factory_) \ +- fuse_module_factory_t fuse_module_ ## name_ ## _factory = factory_ ++ fuse_module_factory_t fuse_module_##name_##_factory = factory_ + + /** Get session from fuse object */ + struct fuse_session *fuse_get_session(struct fuse *f); +diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h +index bf8f8cc..bd9bf86 100644 +--- a/tools/virtiofsd/fuse_common.h ++++ b/tools/virtiofsd/fuse_common.h +@@ -1,21 +1,23 @@ +-/* FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB. +-*/ ++/* ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB. ++ */ + + /** @file */ + + #if !defined(FUSE_H_) && !defined(FUSE_LOWLEVEL_H_) +-#error "Never include directly; use or instead." ++#error \ ++ "Never include directly; use or instead." + #endif + + #ifndef FUSE_COMMON_H_ + #define FUSE_COMMON_H_ + +-#include "fuse_opt.h" + #include "fuse_log.h" ++#include "fuse_opt.h" + #include + #include + +@@ -25,7 +27,7 @@ + /** Minor version of FUSE library interface */ + #define FUSE_MINOR_VERSION 2 + +-#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) ++#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) + #define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) + + /** +@@ -38,67 +40,83 @@ + * descriptors can share a single file handle. + */ + struct fuse_file_info { +- /** Open flags. Available in open() and release() */ +- int flags; +- +- /** In case of a write operation indicates if this was caused +- by a delayed write from the page cache. If so, then the +- context's pid, uid, and gid fields will not be valid, and +- the *fh* value may not match the *fh* value that would +- have been sent with the corresponding individual write +- requests if write caching had been disabled. */ +- unsigned int writepage : 1; +- +- /** Can be filled in by open, to use direct I/O on this file. */ +- unsigned int direct_io : 1; +- +- /** Can be filled in by open. It signals the kernel that any +- currently cached file data (ie., data that the filesystem +- provided the last time the file was open) need not be +- invalidated. Has no effect when set in other contexts (in +- particular it does nothing when set by opendir()). */ +- unsigned int keep_cache : 1; +- +- /** Indicates a flush operation. Set in flush operation, also +- maybe set in highlevel lock operation and lowlevel release +- operation. */ +- unsigned int flush : 1; +- +- /** Can be filled in by open, to indicate that the file is not +- seekable. */ +- unsigned int nonseekable : 1; +- +- /* Indicates that flock locks for this file should be +- released. If set, lock_owner shall contain a valid value. +- May only be set in ->release(). */ +- unsigned int flock_release : 1; +- +- /** Can be filled in by opendir. It signals the kernel to +- enable caching of entries returned by readdir(). Has no +- effect when set in other contexts (in particular it does +- nothing when set by open()). */ +- unsigned int cache_readdir : 1; +- +- /** Padding. Reserved for future use*/ +- unsigned int padding : 25; +- unsigned int padding2 : 32; +- +- /** File handle id. May be filled in by filesystem in create, +- * open, and opendir(). Available in most other file operations on the +- * same file handle. */ +- uint64_t fh; +- +- /** Lock owner id. Available in locking operations and flush */ +- uint64_t lock_owner; +- +- /** Requested poll events. Available in ->poll. Only set on kernels +- which support it. If unsupported, this field is set to zero. */ +- uint32_t poll_events; ++ /** Open flags. Available in open() and release() */ ++ int flags; ++ ++ /* ++ * In case of a write operation indicates if this was caused ++ * by a delayed write from the page cache. If so, then the ++ * context's pid, uid, and gid fields will not be valid, and ++ * the *fh* value may not match the *fh* value that would ++ * have been sent with the corresponding individual write ++ * requests if write caching had been disabled. ++ */ ++ unsigned int writepage:1; ++ ++ /** Can be filled in by open, to use direct I/O on this file. */ ++ unsigned int direct_io:1; ++ ++ /* ++ * Can be filled in by open. It signals the kernel that any ++ * currently cached file data (ie., data that the filesystem ++ * provided the last time the file was open) need not be ++ * invalidated. Has no effect when set in other contexts (in ++ * particular it does nothing when set by opendir()). ++ */ ++ unsigned int keep_cache:1; ++ ++ /* ++ * Indicates a flush operation. Set in flush operation, also ++ * maybe set in highlevel lock operation and lowlevel release ++ * operation. ++ */ ++ unsigned int flush:1; ++ ++ /* ++ * Can be filled in by open, to indicate that the file is not ++ * seekable. ++ */ ++ unsigned int nonseekable:1; ++ ++ /* ++ * Indicates that flock locks for this file should be ++ * released. If set, lock_owner shall contain a valid value. ++ * May only be set in ->release(). ++ */ ++ unsigned int flock_release:1; ++ ++ /* ++ * Can be filled in by opendir. It signals the kernel to ++ * enable caching of entries returned by readdir(). Has no ++ * effect when set in other contexts (in particular it does ++ * nothing when set by open()). ++ */ ++ unsigned int cache_readdir:1; ++ ++ /** Padding. Reserved for future use*/ ++ unsigned int padding:25; ++ unsigned int padding2:32; ++ ++ /* ++ * File handle id. May be filled in by filesystem in create, ++ * open, and opendir(). Available in most other file operations on the ++ * same file handle. ++ */ ++ uint64_t fh; ++ ++ /** Lock owner id. Available in locking operations and flush */ ++ uint64_t lock_owner; ++ ++ /* ++ * Requested poll events. Available in ->poll. Only set on kernels ++ * which support it. If unsupported, this field is set to zero. ++ */ ++ uint32_t poll_events; + }; + +-/************************************************************************** +- * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' * +- **************************************************************************/ ++/* ++ * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' ++ */ + + /** + * Indicates that the filesystem supports asynchronous read requests. +@@ -110,7 +128,7 @@ struct fuse_file_info { + * + * This feature is enabled by default when supported by the kernel. + */ +-#define FUSE_CAP_ASYNC_READ (1 << 0) ++#define FUSE_CAP_ASYNC_READ (1 << 0) + + /** + * Indicates that the filesystem supports "remote" locking. +@@ -118,7 +136,7 @@ struct fuse_file_info { + * This feature is enabled by default when supported by the kernel, + * and if getlk() and setlk() handlers are implemented. + */ +-#define FUSE_CAP_POSIX_LOCKS (1 << 1) ++#define FUSE_CAP_POSIX_LOCKS (1 << 1) + + /** + * Indicates that the filesystem supports the O_TRUNC open flag. If +@@ -127,14 +145,14 @@ struct fuse_file_info { + * + * This feature is enabled by default when supported by the kernel. + */ +-#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3) ++#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3) + + /** + * Indicates that the filesystem supports lookups of "." and "..". + * + * This feature is disabled by default. + */ +-#define FUSE_CAP_EXPORT_SUPPORT (1 << 4) ++#define FUSE_CAP_EXPORT_SUPPORT (1 << 4) + + /** + * Indicates that the kernel should not apply the umask to the +@@ -142,7 +160,7 @@ struct fuse_file_info { + * + * This feature is disabled by default. + */ +-#define FUSE_CAP_DONT_MASK (1 << 6) ++#define FUSE_CAP_DONT_MASK (1 << 6) + + /** + * Indicates that libfuse should try to use splice() when writing to +@@ -150,7 +168,7 @@ struct fuse_file_info { + * + * This feature is disabled by default. + */ +-#define FUSE_CAP_SPLICE_WRITE (1 << 7) ++#define FUSE_CAP_SPLICE_WRITE (1 << 7) + + /** + * Indicates that libfuse should try to move pages instead of copying when +@@ -158,7 +176,7 @@ struct fuse_file_info { + * + * This feature is disabled by default. + */ +-#define FUSE_CAP_SPLICE_MOVE (1 << 8) ++#define FUSE_CAP_SPLICE_MOVE (1 << 8) + + /** + * Indicates that libfuse should try to use splice() when reading from +@@ -167,7 +185,7 @@ struct fuse_file_info { + * This feature is enabled by default when supported by the kernel and + * if the filesystem implements a write_buf() handler. + */ +-#define FUSE_CAP_SPLICE_READ (1 << 9) ++#define FUSE_CAP_SPLICE_READ (1 << 9) + + /** + * If set, the calls to flock(2) will be emulated using POSIX locks and must +@@ -180,14 +198,14 @@ struct fuse_file_info { + * This feature is enabled by default when supported by the kernel and + * if the filesystem implements a flock() handler. + */ +-#define FUSE_CAP_FLOCK_LOCKS (1 << 10) ++#define FUSE_CAP_FLOCK_LOCKS (1 << 10) + + /** + * Indicates that the filesystem supports ioctl's on directories. + * + * This feature is enabled by default when supported by the kernel. + */ +-#define FUSE_CAP_IOCTL_DIR (1 << 11) ++#define FUSE_CAP_IOCTL_DIR (1 << 11) + + /** + * Traditionally, while a file is open the FUSE kernel module only +@@ -209,7 +227,7 @@ struct fuse_file_info { + * + * This feature is enabled by default when supported by the kernel. + */ +-#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12) ++#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12) + + /** + * Indicates that the filesystem supports readdirplus. +@@ -217,7 +235,7 @@ struct fuse_file_info { + * This feature is enabled by default when supported by the kernel and if the + * filesystem implements a readdirplus() handler. + */ +-#define FUSE_CAP_READDIRPLUS (1 << 13) ++#define FUSE_CAP_READDIRPLUS (1 << 13) + + /** + * Indicates that the filesystem supports adaptive readdirplus. +@@ -245,7 +263,7 @@ struct fuse_file_info { + * if the filesystem implements both a readdirplus() and a readdir() + * handler. + */ +-#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14) ++#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14) + + /** + * Indicates that the filesystem supports asynchronous direct I/O submission. +@@ -256,7 +274,7 @@ struct fuse_file_info { + * + * This feature is enabled by default when supported by the kernel. + */ +-#define FUSE_CAP_ASYNC_DIO (1 << 15) ++#define FUSE_CAP_ASYNC_DIO (1 << 15) + + /** + * Indicates that writeback caching should be enabled. This means that +@@ -265,7 +283,7 @@ struct fuse_file_info { + * + * This feature is disabled by default. + */ +-#define FUSE_CAP_WRITEBACK_CACHE (1 << 16) ++#define FUSE_CAP_WRITEBACK_CACHE (1 << 16) + + /** + * Indicates support for zero-message opens. If this flag is set in +@@ -278,7 +296,7 @@ struct fuse_file_info { + * Setting (or unsetting) this flag in the `want` field has *no + * effect*. + */ +-#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17) ++#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17) + + /** + * Indicates support for parallel directory operations. If this flag +@@ -288,7 +306,7 @@ struct fuse_file_info { + * + * This feature is enabled by default when supported by the kernel. + */ +-#define FUSE_CAP_PARALLEL_DIROPS (1 << 18) ++#define FUSE_CAP_PARALLEL_DIROPS (1 << 18) + + /** + * Indicates support for POSIX ACLs. +@@ -307,7 +325,7 @@ struct fuse_file_info { + * + * This feature is disabled by default. + */ +-#define FUSE_CAP_POSIX_ACL (1 << 19) ++#define FUSE_CAP_POSIX_ACL (1 << 19) + + /** + * Indicates that the filesystem is responsible for unsetting +@@ -316,7 +334,7 @@ struct fuse_file_info { + * + * This feature is enabled by default when supported by the kernel. + */ +-#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20) ++#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20) + + /** + * Indicates support for zero-message opendirs. If this flag is set in +@@ -328,7 +346,7 @@ struct fuse_file_info { + * + * Setting (or unsetting) this flag in the `want` field has *no effect*. + */ +-#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) ++#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) + + /** + * Ioctl flags +@@ -340,12 +358,12 @@ struct fuse_file_info { + * + * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs + */ +-#define FUSE_IOCTL_COMPAT (1 << 0) +-#define FUSE_IOCTL_UNRESTRICTED (1 << 1) +-#define FUSE_IOCTL_RETRY (1 << 2) +-#define FUSE_IOCTL_DIR (1 << 4) ++#define FUSE_IOCTL_COMPAT (1 << 0) ++#define FUSE_IOCTL_UNRESTRICTED (1 << 1) ++#define FUSE_IOCTL_RETRY (1 << 2) ++#define FUSE_IOCTL_DIR (1 << 4) + +-#define FUSE_IOCTL_MAX_IOV 256 ++#define FUSE_IOCTL_MAX_IOV 256 + + /** + * Connection information, passed to the ->init() method +@@ -355,114 +373,114 @@ struct fuse_file_info { + * value must usually be smaller than the indicated value. + */ + struct fuse_conn_info { +- /** +- * Major version of the protocol (read-only) +- */ +- unsigned proto_major; +- +- /** +- * Minor version of the protocol (read-only) +- */ +- unsigned proto_minor; +- +- /** +- * Maximum size of the write buffer +- */ +- unsigned max_write; +- +- /** +- * Maximum size of read requests. A value of zero indicates no +- * limit. However, even if the filesystem does not specify a +- * limit, the maximum size of read requests will still be +- * limited by the kernel. +- * +- * NOTE: For the time being, the maximum size of read requests +- * must be set both here *and* passed to fuse_session_new() +- * using the ``-o max_read=`` mount option. At some point +- * in the future, specifying the mount option will no longer +- * be necessary. +- */ +- unsigned max_read; +- +- /** +- * Maximum readahead +- */ +- unsigned max_readahead; +- +- /** +- * Capability flags that the kernel supports (read-only) +- */ +- unsigned capable; +- +- /** +- * Capability flags that the filesystem wants to enable. +- * +- * libfuse attempts to initialize this field with +- * reasonable default values before calling the init() handler. +- */ +- unsigned want; +- +- /** +- * Maximum number of pending "background" requests. A +- * background request is any type of request for which the +- * total number is not limited by other means. As of kernel +- * 4.8, only two types of requests fall into this category: +- * +- * 1. Read-ahead requests +- * 2. Asynchronous direct I/O requests +- * +- * Read-ahead requests are generated (if max_readahead is +- * non-zero) by the kernel to preemptively fill its caches +- * when it anticipates that userspace will soon read more +- * data. +- * +- * Asynchronous direct I/O requests are generated if +- * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large +- * direct I/O request. In this case the kernel will internally +- * split it up into multiple smaller requests and submit them +- * to the filesystem concurrently. +- * +- * Note that the following requests are *not* background +- * requests: writeback requests (limited by the kernel's +- * flusher algorithm), regular (i.e., synchronous and +- * buffered) userspace read/write requests (limited to one per +- * thread), asynchronous read requests (Linux's io_submit(2) +- * call actually blocks, so these are also limited to one per +- * thread). +- */ +- unsigned max_background; +- +- /** +- * Kernel congestion threshold parameter. If the number of pending +- * background requests exceeds this number, the FUSE kernel module will +- * mark the filesystem as "congested". This instructs the kernel to +- * expect that queued requests will take some time to complete, and to +- * adjust its algorithms accordingly (e.g. by putting a waiting thread +- * to sleep instead of using a busy-loop). +- */ +- unsigned congestion_threshold; +- +- /** +- * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible +- * for updating mtime and ctime when write requests are received. The +- * updated values are passed to the filesystem with setattr() requests. +- * However, if the filesystem does not support the full resolution of +- * the kernel timestamps (nanoseconds), the mtime and ctime values used +- * by kernel and filesystem will differ (and result in an apparent +- * change of times after a cache flush). +- * +- * To prevent this problem, this variable can be used to inform the +- * kernel about the timestamp granularity supported by the file-system. +- * The value should be power of 10. The default is 1, i.e. full +- * nano-second resolution. Filesystems supporting only second resolution +- * should set this to 1000000000. +- */ +- unsigned time_gran; +- +- /** +- * For future use. +- */ +- unsigned reserved[22]; ++ /** ++ * Major version of the protocol (read-only) ++ */ ++ unsigned proto_major; ++ ++ /** ++ * Minor version of the protocol (read-only) ++ */ ++ unsigned proto_minor; ++ ++ /** ++ * Maximum size of the write buffer ++ */ ++ unsigned max_write; ++ ++ /** ++ * Maximum size of read requests. A value of zero indicates no ++ * limit. However, even if the filesystem does not specify a ++ * limit, the maximum size of read requests will still be ++ * limited by the kernel. ++ * ++ * NOTE: For the time being, the maximum size of read requests ++ * must be set both here *and* passed to fuse_session_new() ++ * using the ``-o max_read=`` mount option. At some point ++ * in the future, specifying the mount option will no longer ++ * be necessary. ++ */ ++ unsigned max_read; ++ ++ /** ++ * Maximum readahead ++ */ ++ unsigned max_readahead; ++ ++ /** ++ * Capability flags that the kernel supports (read-only) ++ */ ++ unsigned capable; ++ ++ /** ++ * Capability flags that the filesystem wants to enable. ++ * ++ * libfuse attempts to initialize this field with ++ * reasonable default values before calling the init() handler. ++ */ ++ unsigned want; ++ ++ /** ++ * Maximum number of pending "background" requests. A ++ * background request is any type of request for which the ++ * total number is not limited by other means. As of kernel ++ * 4.8, only two types of requests fall into this category: ++ * ++ * 1. Read-ahead requests ++ * 2. Asynchronous direct I/O requests ++ * ++ * Read-ahead requests are generated (if max_readahead is ++ * non-zero) by the kernel to preemptively fill its caches ++ * when it anticipates that userspace will soon read more ++ * data. ++ * ++ * Asynchronous direct I/O requests are generated if ++ * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large ++ * direct I/O request. In this case the kernel will internally ++ * split it up into multiple smaller requests and submit them ++ * to the filesystem concurrently. ++ * ++ * Note that the following requests are *not* background ++ * requests: writeback requests (limited by the kernel's ++ * flusher algorithm), regular (i.e., synchronous and ++ * buffered) userspace read/write requests (limited to one per ++ * thread), asynchronous read requests (Linux's io_submit(2) ++ * call actually blocks, so these are also limited to one per ++ * thread). ++ */ ++ unsigned max_background; ++ ++ /** ++ * Kernel congestion threshold parameter. If the number of pending ++ * background requests exceeds this number, the FUSE kernel module will ++ * mark the filesystem as "congested". This instructs the kernel to ++ * expect that queued requests will take some time to complete, and to ++ * adjust its algorithms accordingly (e.g. by putting a waiting thread ++ * to sleep instead of using a busy-loop). ++ */ ++ unsigned congestion_threshold; ++ ++ /** ++ * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible ++ * for updating mtime and ctime when write requests are received. The ++ * updated values are passed to the filesystem with setattr() requests. ++ * However, if the filesystem does not support the full resolution of ++ * the kernel timestamps (nanoseconds), the mtime and ctime values used ++ * by kernel and filesystem will differ (and result in an apparent ++ * change of times after a cache flush). ++ * ++ * To prevent this problem, this variable can be used to inform the ++ * kernel about the timestamp granularity supported by the file-system. ++ * The value should be power of 10. The default is 1, i.e. full ++ * nano-second resolution. Filesystems supporting only second resolution ++ * should set this to 1000000000. ++ */ ++ unsigned time_gran; ++ ++ /** ++ * For future use. ++ */ ++ unsigned reserved[22]; + }; + + struct fuse_session; +@@ -489,21 +507,20 @@ struct fuse_conn_info_opts; + * -o async_read sets FUSE_CAP_ASYNC_READ in conn->want + * -o sync_read unsets FUSE_CAP_ASYNC_READ in conn->want + * -o atomic_o_trunc sets FUSE_CAP_ATOMIC_O_TRUNC in conn->want +- * -o no_remote_lock Equivalent to -o no_remote_flock,no_remote_posix_lock +- * -o no_remote_flock Unsets FUSE_CAP_FLOCK_LOCKS in conn->want +- * -o no_remote_posix_lock Unsets FUSE_CAP_POSIX_LOCKS in conn->want +- * -o [no_]splice_write (un-)sets FUSE_CAP_SPLICE_WRITE in conn->want +- * -o [no_]splice_move (un-)sets FUSE_CAP_SPLICE_MOVE in conn->want +- * -o [no_]splice_read (un-)sets FUSE_CAP_SPLICE_READ in conn->want +- * -o [no_]auto_inval_data (un-)sets FUSE_CAP_AUTO_INVAL_DATA in conn->want +- * -o readdirplus=no unsets FUSE_CAP_READDIRPLUS in conn->want +- * -o readdirplus=yes sets FUSE_CAP_READDIRPLUS and unsets +- * FUSE_CAP_READDIRPLUS_AUTO in conn->want +- * -o readdirplus=auto sets FUSE_CAP_READDIRPLUS and +- * FUSE_CAP_READDIRPLUS_AUTO in conn->want +- * -o [no_]async_dio (un-)sets FUSE_CAP_ASYNC_DIO in conn->want +- * -o [no_]writeback_cache (un-)sets FUSE_CAP_WRITEBACK_CACHE in conn->want +- * -o time_gran=N sets conn->time_gran ++ * -o no_remote_lock Equivalent to -o ++ *no_remote_flock,no_remote_posix_lock -o no_remote_flock Unsets ++ *FUSE_CAP_FLOCK_LOCKS in conn->want -o no_remote_posix_lock Unsets ++ *FUSE_CAP_POSIX_LOCKS in conn->want -o [no_]splice_write (un-)sets ++ *FUSE_CAP_SPLICE_WRITE in conn->want -o [no_]splice_move (un-)sets ++ *FUSE_CAP_SPLICE_MOVE in conn->want -o [no_]splice_read (un-)sets ++ *FUSE_CAP_SPLICE_READ in conn->want -o [no_]auto_inval_data (un-)sets ++ *FUSE_CAP_AUTO_INVAL_DATA in conn->want -o readdirplus=no unsets ++ *FUSE_CAP_READDIRPLUS in conn->want -o readdirplus=yes sets ++ *FUSE_CAP_READDIRPLUS and unsets FUSE_CAP_READDIRPLUS_AUTO in conn->want -o ++ *readdirplus=auto sets FUSE_CAP_READDIRPLUS and FUSE_CAP_READDIRPLUS_AUTO ++ *in conn->want -o [no_]async_dio (un-)sets FUSE_CAP_ASYNC_DIO in ++ *conn->want -o [no_]writeback_cache (un-)sets FUSE_CAP_WRITEBACK_CACHE in ++ *conn->want -o time_gran=N sets conn->time_gran + * + * Known options will be removed from *args*, unknown options will be + * passed through unchanged. +@@ -511,7 +528,7 @@ struct fuse_conn_info_opts; + * @param args argument vector (input+output) + * @return parsed options + **/ +-struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args); ++struct fuse_conn_info_opts *fuse_parse_conn_info_opts(struct fuse_args *args); + + /** + * This function applies the (parsed) parameters in *opts* to the +@@ -521,7 +538,7 @@ struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args); + * option has been explicitly set. + */ + void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, +- struct fuse_conn_info *conn); ++ struct fuse_conn_info *conn); + + /** + * Go into the background +@@ -552,81 +569,81 @@ const char *fuse_pkgversion(void); + */ + void fuse_pollhandle_destroy(struct fuse_pollhandle *ph); + +-/* ----------------------------------------------------------- * +- * Data buffer * +- * ----------------------------------------------------------- */ ++/* ++ * Data buffer ++ */ + + /** + * Buffer flags + */ + enum fuse_buf_flags { +- /** +- * Buffer contains a file descriptor +- * +- * If this flag is set, the .fd field is valid, otherwise the +- * .mem fields is valid. +- */ +- FUSE_BUF_IS_FD = (1 << 1), +- +- /** +- * Seek on the file descriptor +- * +- * If this flag is set then the .pos field is valid and is +- * used to seek to the given offset before performing +- * operation on file descriptor. +- */ +- FUSE_BUF_FD_SEEK = (1 << 2), +- +- /** +- * Retry operation on file descriptor +- * +- * If this flag is set then retry operation on file descriptor +- * until .size bytes have been copied or an error or EOF is +- * detected. +- */ +- FUSE_BUF_FD_RETRY = (1 << 3), ++ /** ++ * Buffer contains a file descriptor ++ * ++ * If this flag is set, the .fd field is valid, otherwise the ++ * .mem fields is valid. ++ */ ++ FUSE_BUF_IS_FD = (1 << 1), ++ ++ /** ++ * Seek on the file descriptor ++ * ++ * If this flag is set then the .pos field is valid and is ++ * used to seek to the given offset before performing ++ * operation on file descriptor. ++ */ ++ FUSE_BUF_FD_SEEK = (1 << 2), ++ ++ /** ++ * Retry operation on file descriptor ++ * ++ * If this flag is set then retry operation on file descriptor ++ * until .size bytes have been copied or an error or EOF is ++ * detected. ++ */ ++ FUSE_BUF_FD_RETRY = (1 << 3), + }; + + /** + * Buffer copy flags + */ + enum fuse_buf_copy_flags { +- /** +- * Don't use splice(2) +- * +- * Always fall back to using read and write instead of +- * splice(2) to copy data from one file descriptor to another. +- * +- * If this flag is not set, then only fall back if splice is +- * unavailable. +- */ +- FUSE_BUF_NO_SPLICE = (1 << 1), +- +- /** +- * Force splice +- * +- * Always use splice(2) to copy data from one file descriptor +- * to another. If splice is not available, return -EINVAL. +- */ +- FUSE_BUF_FORCE_SPLICE = (1 << 2), +- +- /** +- * Try to move data with splice. +- * +- * If splice is used, try to move pages from the source to the +- * destination instead of copying. See documentation of +- * SPLICE_F_MOVE in splice(2) man page. +- */ +- FUSE_BUF_SPLICE_MOVE = (1 << 3), +- +- /** +- * Don't block on the pipe when copying data with splice +- * +- * Makes the operations on the pipe non-blocking (if the pipe +- * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) +- * man page. +- */ +- FUSE_BUF_SPLICE_NONBLOCK= (1 << 4), ++ /** ++ * Don't use splice(2) ++ * ++ * Always fall back to using read and write instead of ++ * splice(2) to copy data from one file descriptor to another. ++ * ++ * If this flag is not set, then only fall back if splice is ++ * unavailable. ++ */ ++ FUSE_BUF_NO_SPLICE = (1 << 1), ++ ++ /** ++ * Force splice ++ * ++ * Always use splice(2) to copy data from one file descriptor ++ * to another. If splice is not available, return -EINVAL. ++ */ ++ FUSE_BUF_FORCE_SPLICE = (1 << 2), ++ ++ /** ++ * Try to move data with splice. ++ * ++ * If splice is used, try to move pages from the source to the ++ * destination instead of copying. See documentation of ++ * SPLICE_F_MOVE in splice(2) man page. ++ */ ++ FUSE_BUF_SPLICE_MOVE = (1 << 3), ++ ++ /** ++ * Don't block on the pipe when copying data with splice ++ * ++ * Makes the operations on the pipe non-blocking (if the pipe ++ * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) ++ * man page. ++ */ ++ FUSE_BUF_SPLICE_NONBLOCK = (1 << 4), + }; + + /** +@@ -636,36 +653,36 @@ enum fuse_buf_copy_flags { + * be supplied as a memory pointer or as a file descriptor + */ + struct fuse_buf { +- /** +- * Size of data in bytes +- */ +- size_t size; +- +- /** +- * Buffer flags +- */ +- enum fuse_buf_flags flags; +- +- /** +- * Memory pointer +- * +- * Used unless FUSE_BUF_IS_FD flag is set. +- */ +- void *mem; +- +- /** +- * File descriptor +- * +- * Used if FUSE_BUF_IS_FD flag is set. +- */ +- int fd; +- +- /** +- * File position +- * +- * Used if FUSE_BUF_FD_SEEK flag is set. +- */ +- off_t pos; ++ /** ++ * Size of data in bytes ++ */ ++ size_t size; ++ ++ /** ++ * Buffer flags ++ */ ++ enum fuse_buf_flags flags; ++ ++ /** ++ * Memory pointer ++ * ++ * Used unless FUSE_BUF_IS_FD flag is set. ++ */ ++ void *mem; ++ ++ /** ++ * File descriptor ++ * ++ * Used if FUSE_BUF_IS_FD flag is set. ++ */ ++ int fd; ++ ++ /** ++ * File position ++ * ++ * Used if FUSE_BUF_FD_SEEK flag is set. ++ */ ++ off_t pos; + }; + + /** +@@ -677,41 +694,39 @@ struct fuse_buf { + * Allocate dynamically to add more than one buffer. + */ + struct fuse_bufvec { +- /** +- * Number of buffers in the array +- */ +- size_t count; +- +- /** +- * Index of current buffer within the array +- */ +- size_t idx; +- +- /** +- * Current offset within the current buffer +- */ +- size_t off; +- +- /** +- * Array of buffers +- */ +- struct fuse_buf buf[1]; ++ /** ++ * Number of buffers in the array ++ */ ++ size_t count; ++ ++ /** ++ * Index of current buffer within the array ++ */ ++ size_t idx; ++ ++ /** ++ * Current offset within the current buffer ++ */ ++ size_t off; ++ ++ /** ++ * Array of buffers ++ */ ++ struct fuse_buf buf[1]; + }; + + /* Initialize bufvec with a single buffer of given size */ +-#define FUSE_BUFVEC_INIT(size__) \ +- ((struct fuse_bufvec) { \ +- /* .count= */ 1, \ +- /* .idx = */ 0, \ +- /* .off = */ 0, \ +- /* .buf = */ { /* [0] = */ { \ +- /* .size = */ (size__), \ +- /* .flags = */ (enum fuse_buf_flags) 0, \ +- /* .mem = */ NULL, \ +- /* .fd = */ -1, \ +- /* .pos = */ 0, \ +- } } \ +- } ) ++#define FUSE_BUFVEC_INIT(size__) \ ++ ((struct fuse_bufvec){ /* .count= */ 1, \ ++ /* .idx = */ 0, \ ++ /* .off = */ 0, /* .buf = */ \ ++ { /* [0] = */ { \ ++ /* .size = */ (size__), \ ++ /* .flags = */ (enum fuse_buf_flags)0, \ ++ /* .mem = */ NULL, \ ++ /* .fd = */ -1, \ ++ /* .pos = */ 0, \ ++ } } }) + + /** + * Get total size of data in a fuse buffer vector +@@ -730,16 +745,16 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv); + * @return actual number of bytes copied or -errno on error + */ + ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src, +- enum fuse_buf_copy_flags flags); ++ enum fuse_buf_copy_flags flags); + +-/* ----------------------------------------------------------- * +- * Signal handling * +- * ----------------------------------------------------------- */ ++/* ++ * Signal handling ++ */ + + /** + * Exit session on HUP, TERM and INT signals and ignore PIPE signal + * +- * Stores session in a global variable. May only be called once per ++ * Stores session in a global variable. May only be called once per + * process until fuse_remove_signal_handlers() is called. + * + * Once either of the POSIX signals arrives, the signal handler calls +@@ -766,12 +781,12 @@ int fuse_set_signal_handlers(struct fuse_session *se); + */ + void fuse_remove_signal_handlers(struct fuse_session *se); + +-/* ----------------------------------------------------------- * +- * Compatibility stuff * +- * ----------------------------------------------------------- */ ++/* ++ * Compatibility stuff ++ */ + + #if !defined(FUSE_USE_VERSION) || FUSE_USE_VERSION < 30 +-# error only API version 30 or greater is supported ++#error only API version 30 or greater is supported + #endif + + +@@ -781,11 +796,14 @@ void fuse_remove_signal_handlers(struct fuse_session *se); + * On 32bit systems please add -D_FILE_OFFSET_BITS=64 to your compile flags! + */ + +-#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus ++#if defined(__GNUC__) && \ ++ (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && \ ++ !defined __cplusplus + _Static_assert(sizeof(off_t) == 8, "fuse: off_t must be 64bit"); + #else +-struct _fuse_off_t_must_be_64bit_dummy_struct \ +- { unsigned _fuse_off_t_must_be_64bit:((sizeof(off_t) == 8) ? 1 : -1); }; ++struct _fuse_off_t_must_be_64bit_dummy_struct { ++ unsigned _fuse_off_t_must_be_64bit:((sizeof(off_t) == 8) ? 1 : -1); ++}; + #endif + + #endif /* FUSE_COMMON_H_ */ +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index b39522e..e63cb58 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -1,71 +1,71 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ + + #include "fuse.h" + #include "fuse_lowlevel.h" + + struct fuse_req { +- struct fuse_session *se; +- uint64_t unique; +- int ctr; +- pthread_mutex_t lock; +- struct fuse_ctx ctx; +- struct fuse_chan *ch; +- int interrupted; +- unsigned int ioctl_64bit : 1; +- union { +- struct { +- uint64_t unique; +- } i; +- struct { +- fuse_interrupt_func_t func; +- void *data; +- } ni; +- } u; +- struct fuse_req *next; +- struct fuse_req *prev; ++ struct fuse_session *se; ++ uint64_t unique; ++ int ctr; ++ pthread_mutex_t lock; ++ struct fuse_ctx ctx; ++ struct fuse_chan *ch; ++ int interrupted; ++ unsigned int ioctl_64bit:1; ++ union { ++ struct { ++ uint64_t unique; ++ } i; ++ struct { ++ fuse_interrupt_func_t func; ++ void *data; ++ } ni; ++ } u; ++ struct fuse_req *next; ++ struct fuse_req *prev; + }; + + struct fuse_notify_req { +- uint64_t unique; +- void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t, +- const void *, const struct fuse_buf *); +- struct fuse_notify_req *next; +- struct fuse_notify_req *prev; ++ uint64_t unique; ++ void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t, ++ const void *, const struct fuse_buf *); ++ struct fuse_notify_req *next; ++ struct fuse_notify_req *prev; + }; + + struct fuse_session { +- char *mountpoint; +- volatile int exited; +- int fd; +- int debug; +- int deny_others; +- struct fuse_lowlevel_ops op; +- int got_init; +- struct cuse_data *cuse_data; +- void *userdata; +- uid_t owner; +- struct fuse_conn_info conn; +- struct fuse_req list; +- struct fuse_req interrupts; +- pthread_mutex_t lock; +- int got_destroy; +- int broken_splice_nonblock; +- uint64_t notify_ctr; +- struct fuse_notify_req notify_list; +- size_t bufsize; +- int error; ++ char *mountpoint; ++ volatile int exited; ++ int fd; ++ int debug; ++ int deny_others; ++ struct fuse_lowlevel_ops op; ++ int got_init; ++ struct cuse_data *cuse_data; ++ void *userdata; ++ uid_t owner; ++ struct fuse_conn_info conn; ++ struct fuse_req list; ++ struct fuse_req interrupts; ++ pthread_mutex_t lock; ++ int got_destroy; ++ int broken_splice_nonblock; ++ uint64_t notify_ctr; ++ struct fuse_notify_req notify_list; ++ size_t bufsize; ++ int error; + }; + + struct fuse_chan { +- pthread_mutex_t lock; +- int ctr; +- int fd; ++ pthread_mutex_t lock; ++ int ctr; ++ int fd; + }; + + /** +@@ -76,19 +76,20 @@ struct fuse_chan { + * + */ + struct fuse_module { +- char *name; +- fuse_module_factory_t factory; +- struct fuse_module *next; +- struct fusemod_so *so; +- int ctr; ++ char *name; ++ fuse_module_factory_t factory; ++ struct fuse_module *next; ++ struct fusemod_so *so; ++ int ctr; + }; + + int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, +- int count); ++ int count); + void fuse_free_req(fuse_req_t req); + + void fuse_session_process_buf_int(struct fuse_session *se, +- const struct fuse_buf *buf, struct fuse_chan *ch); ++ const struct fuse_buf *buf, ++ struct fuse_chan *ch); + + + #define FUSE_MAX_MAX_PAGES 256 +diff --git a/tools/virtiofsd/fuse_log.c b/tools/virtiofsd/fuse_log.c +index 0d268ab..11345f9 100644 +--- a/tools/virtiofsd/fuse_log.c ++++ b/tools/virtiofsd/fuse_log.c +@@ -1,40 +1,40 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2019 Red Hat, Inc. +- +- Logging API. +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2019 Red Hat, Inc. ++ * ++ * Logging API. ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ + + #include "fuse_log.h" + + #include + #include + +-static void default_log_func( +- __attribute__(( unused )) enum fuse_log_level level, +- const char *fmt, va_list ap) ++static void default_log_func(__attribute__((unused)) enum fuse_log_level level, ++ const char *fmt, va_list ap) + { +- vfprintf(stderr, fmt, ap); ++ vfprintf(stderr, fmt, ap); + } + + static fuse_log_func_t log_func = default_log_func; + + void fuse_set_log_func(fuse_log_func_t func) + { +- if (!func) +- func = default_log_func; ++ if (!func) { ++ func = default_log_func; ++ } + +- log_func = func; ++ log_func = func; + } + + void fuse_log(enum fuse_log_level level, const char *fmt, ...) + { +- va_list ap; ++ va_list ap; + +- va_start(ap, fmt); +- log_func(level, fmt, ap); +- va_end(ap); ++ va_start(ap, fmt); ++ log_func(level, fmt, ap); ++ va_end(ap); + } +diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h +index 0af700d..bf6c11f 100644 +--- a/tools/virtiofsd/fuse_log.h ++++ b/tools/virtiofsd/fuse_log.h +@@ -1,10 +1,10 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2019 Red Hat, Inc. +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB. +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2019 Red Hat, Inc. ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB. ++ */ + + #ifndef FUSE_LOG_H_ + #define FUSE_LOG_H_ +@@ -22,14 +22,14 @@ + * These levels correspond to syslog(2) log levels since they are widely used. + */ + enum fuse_log_level { +- FUSE_LOG_EMERG, +- FUSE_LOG_ALERT, +- FUSE_LOG_CRIT, +- FUSE_LOG_ERR, +- FUSE_LOG_WARNING, +- FUSE_LOG_NOTICE, +- FUSE_LOG_INFO, +- FUSE_LOG_DEBUG ++ FUSE_LOG_EMERG, ++ FUSE_LOG_ALERT, ++ FUSE_LOG_CRIT, ++ FUSE_LOG_ERR, ++ FUSE_LOG_WARNING, ++ FUSE_LOG_NOTICE, ++ FUSE_LOG_INFO, ++ FUSE_LOG_DEBUG + }; + + /** +@@ -45,8 +45,8 @@ enum fuse_log_level { + * @param fmt sprintf-style format string including newline + * @param ap format string arguments + */ +-typedef void (*fuse_log_func_t)(enum fuse_log_level level, +- const char *fmt, va_list ap); ++typedef void (*fuse_log_func_t)(enum fuse_log_level level, const char *fmt, ++ va_list ap); + + /** + * Install a custom log handler function. +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index e6fa247..5c9cb52 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1,2380 +1,2515 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- Implementation of (most of) the low-level FUSE API. The session loop +- functions are implemented in separate files. +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * Implementation of (most of) the low-level FUSE API. The session loop ++ * functions are implemented in separate files. ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ + + #define _GNU_SOURCE + + #include "config.h" + #include "fuse_i.h" + #include "fuse_kernel.h" +-#include "fuse_opt.h" + #include "fuse_misc.h" ++#include "fuse_opt.h" + ++#include ++#include ++#include ++#include + #include + #include +-#include + #include +-#include +-#include +-#include +-#include + #include +- ++#include + + + #define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) + #define OFFSET_MAX 0x7fffffffffffffffLL + +-#define container_of(ptr, type, member) ({ \ +- const typeof( ((type *)0)->member ) *__mptr = (ptr); \ +- (type *)( (char *)__mptr - offsetof(type,member) );}) ++#define container_of(ptr, type, member) \ ++ ({ \ ++ const typeof(((type *)0)->member) *__mptr = (ptr); \ ++ (type *)((char *)__mptr - offsetof(type, member)); \ ++ }) + + struct fuse_pollhandle { +- uint64_t kh; +- struct fuse_session *se; ++ uint64_t kh; ++ struct fuse_session *se; + }; + + static size_t pagesize; + + static __attribute__((constructor)) void fuse_ll_init_pagesize(void) + { +- pagesize = getpagesize(); ++ pagesize = getpagesize(); + } + + static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr) + { +- attr->ino = stbuf->st_ino; +- attr->mode = stbuf->st_mode; +- attr->nlink = stbuf->st_nlink; +- attr->uid = stbuf->st_uid; +- attr->gid = stbuf->st_gid; +- attr->rdev = stbuf->st_rdev; +- attr->size = stbuf->st_size; +- attr->blksize = stbuf->st_blksize; +- attr->blocks = stbuf->st_blocks; +- attr->atime = stbuf->st_atime; +- attr->mtime = stbuf->st_mtime; +- attr->ctime = stbuf->st_ctime; +- attr->atimensec = ST_ATIM_NSEC(stbuf); +- attr->mtimensec = ST_MTIM_NSEC(stbuf); +- attr->ctimensec = ST_CTIM_NSEC(stbuf); ++ attr->ino = stbuf->st_ino; ++ attr->mode = stbuf->st_mode; ++ attr->nlink = stbuf->st_nlink; ++ attr->uid = stbuf->st_uid; ++ attr->gid = stbuf->st_gid; ++ attr->rdev = stbuf->st_rdev; ++ attr->size = stbuf->st_size; ++ attr->blksize = stbuf->st_blksize; ++ attr->blocks = stbuf->st_blocks; ++ attr->atime = stbuf->st_atime; ++ attr->mtime = stbuf->st_mtime; ++ attr->ctime = stbuf->st_ctime; ++ attr->atimensec = ST_ATIM_NSEC(stbuf); ++ attr->mtimensec = ST_MTIM_NSEC(stbuf); ++ attr->ctimensec = ST_CTIM_NSEC(stbuf); + } + + static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf) + { +- stbuf->st_mode = attr->mode; +- stbuf->st_uid = attr->uid; +- stbuf->st_gid = attr->gid; +- stbuf->st_size = attr->size; +- stbuf->st_atime = attr->atime; +- stbuf->st_mtime = attr->mtime; +- stbuf->st_ctime = attr->ctime; +- ST_ATIM_NSEC_SET(stbuf, attr->atimensec); +- ST_MTIM_NSEC_SET(stbuf, attr->mtimensec); +- ST_CTIM_NSEC_SET(stbuf, attr->ctimensec); ++ stbuf->st_mode = attr->mode; ++ stbuf->st_uid = attr->uid; ++ stbuf->st_gid = attr->gid; ++ stbuf->st_size = attr->size; ++ stbuf->st_atime = attr->atime; ++ stbuf->st_mtime = attr->mtime; ++ stbuf->st_ctime = attr->ctime; ++ ST_ATIM_NSEC_SET(stbuf, attr->atimensec); ++ ST_MTIM_NSEC_SET(stbuf, attr->mtimensec); ++ ST_CTIM_NSEC_SET(stbuf, attr->ctimensec); + } + +-static size_t iov_length(const struct iovec *iov, size_t count) ++static size_t iov_length(const struct iovec *iov, size_t count) + { +- size_t seg; +- size_t ret = 0; ++ size_t seg; ++ size_t ret = 0; + +- for (seg = 0; seg < count; seg++) +- ret += iov[seg].iov_len; +- return ret; ++ for (seg = 0; seg < count; seg++) { ++ ret += iov[seg].iov_len; ++ } ++ return ret; + } + + static void list_init_req(struct fuse_req *req) + { +- req->next = req; +- req->prev = req; ++ req->next = req; ++ req->prev = req; + } + + static void list_del_req(struct fuse_req *req) + { +- struct fuse_req *prev = req->prev; +- struct fuse_req *next = req->next; +- prev->next = next; +- next->prev = prev; ++ struct fuse_req *prev = req->prev; ++ struct fuse_req *next = req->next; ++ prev->next = next; ++ next->prev = prev; + } + + static void list_add_req(struct fuse_req *req, struct fuse_req *next) + { +- struct fuse_req *prev = next->prev; +- req->next = next; +- req->prev = prev; +- prev->next = req; +- next->prev = req; ++ struct fuse_req *prev = next->prev; ++ req->next = next; ++ req->prev = prev; ++ prev->next = req; ++ next->prev = req; + } + + static void destroy_req(fuse_req_t req) + { +- pthread_mutex_destroy(&req->lock); +- free(req); ++ pthread_mutex_destroy(&req->lock); ++ free(req); + } + + void fuse_free_req(fuse_req_t req) + { +- int ctr; +- struct fuse_session *se = req->se; ++ int ctr; ++ struct fuse_session *se = req->se; + +- pthread_mutex_lock(&se->lock); +- req->u.ni.func = NULL; +- req->u.ni.data = NULL; +- list_del_req(req); +- ctr = --req->ctr; +- req->ch = NULL; +- pthread_mutex_unlock(&se->lock); +- if (!ctr) +- destroy_req(req); ++ pthread_mutex_lock(&se->lock); ++ req->u.ni.func = NULL; ++ req->u.ni.data = NULL; ++ list_del_req(req); ++ ctr = --req->ctr; ++ req->ch = NULL; ++ pthread_mutex_unlock(&se->lock); ++ if (!ctr) { ++ destroy_req(req); ++ } + } + + static struct fuse_req *fuse_ll_alloc_req(struct fuse_session *se) + { +- struct fuse_req *req; ++ struct fuse_req *req; + +- req = (struct fuse_req *) calloc(1, sizeof(struct fuse_req)); +- if (req == NULL) { +- fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n"); +- } else { +- req->se = se; +- req->ctr = 1; +- list_init_req(req); +- fuse_mutex_init(&req->lock); +- } ++ req = (struct fuse_req *)calloc(1, sizeof(struct fuse_req)); ++ if (req == NULL) { ++ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n"); ++ } else { ++ req->se = se; ++ req->ctr = 1; ++ list_init_req(req); ++ fuse_mutex_init(&req->lock); ++ } + +- return req; ++ return req; + } + + /* Send data. If *ch* is NULL, send via session master fd */ + static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, +- struct iovec *iov, int count) ++ struct iovec *iov, int count) + { +- struct fuse_out_header *out = iov[0].iov_base; ++ struct fuse_out_header *out = iov[0].iov_base; + +- out->len = iov_length(iov, count); +- if (se->debug) { +- if (out->unique == 0) { +- fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", +- out->error, out->len); +- } else if (out->error) { +- fuse_log(FUSE_LOG_DEBUG, +- " unique: %llu, error: %i (%s), outsize: %i\n", +- (unsigned long long) out->unique, out->error, +- strerror(-out->error), out->len); +- } else { +- fuse_log(FUSE_LOG_DEBUG, +- " unique: %llu, success, outsize: %i\n", +- (unsigned long long) out->unique, out->len); +- } +- } ++ out->len = iov_length(iov, count); ++ if (se->debug) { ++ if (out->unique == 0) { ++ fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error, ++ out->len); ++ } else if (out->error) { ++ fuse_log(FUSE_LOG_DEBUG, ++ " unique: %llu, error: %i (%s), outsize: %i\n", ++ (unsigned long long)out->unique, out->error, ++ strerror(-out->error), out->len); ++ } else { ++ fuse_log(FUSE_LOG_DEBUG, " unique: %llu, success, outsize: %i\n", ++ (unsigned long long)out->unique, out->len); ++ } ++ } + +- abort(); /* virtio should have taken it before here */ +- return 0; ++ abort(); /* virtio should have taken it before here */ ++ return 0; + } + + + int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, +- int count) ++ int count) + { +- struct fuse_out_header out; ++ struct fuse_out_header out; + +- if (error <= -1000 || error > 0) { +- fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); +- error = -ERANGE; +- } ++ if (error <= -1000 || error > 0) { ++ fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); ++ error = -ERANGE; ++ } + +- out.unique = req->unique; +- out.error = error; ++ out.unique = req->unique; ++ out.error = error; + +- iov[0].iov_base = &out; +- iov[0].iov_len = sizeof(struct fuse_out_header); ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(struct fuse_out_header); + +- return fuse_send_msg(req->se, req->ch, iov, count); ++ return fuse_send_msg(req->se, req->ch, iov, count); + } + + static int send_reply_iov(fuse_req_t req, int error, struct iovec *iov, +- int count) ++ int count) + { +- int res; ++ int res; + +- res = fuse_send_reply_iov_nofree(req, error, iov, count); +- fuse_free_req(req); +- return res; ++ res = fuse_send_reply_iov_nofree(req, error, iov, count); ++ fuse_free_req(req); ++ return res; + } + + static int send_reply(fuse_req_t req, int error, const void *arg, +- size_t argsize) ++ size_t argsize) + { +- struct iovec iov[2]; +- int count = 1; +- if (argsize) { +- iov[1].iov_base = (void *) arg; +- iov[1].iov_len = argsize; +- count++; +- } +- return send_reply_iov(req, error, iov, count); ++ struct iovec iov[2]; ++ int count = 1; ++ if (argsize) { ++ iov[1].iov_base = (void *)arg; ++ iov[1].iov_len = argsize; ++ count++; ++ } ++ return send_reply_iov(req, error, iov, count); + } + + int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count) + { +- int res; +- struct iovec *padded_iov; ++ int res; ++ struct iovec *padded_iov; + +- padded_iov = malloc((count + 1) * sizeof(struct iovec)); +- if (padded_iov == NULL) +- return fuse_reply_err(req, ENOMEM); ++ padded_iov = malloc((count + 1) * sizeof(struct iovec)); ++ if (padded_iov == NULL) { ++ return fuse_reply_err(req, ENOMEM); ++ } + +- memcpy(padded_iov + 1, iov, count * sizeof(struct iovec)); +- count++; ++ memcpy(padded_iov + 1, iov, count * sizeof(struct iovec)); ++ count++; + +- res = send_reply_iov(req, 0, padded_iov, count); +- free(padded_iov); ++ res = send_reply_iov(req, 0, padded_iov, count); ++ free(padded_iov); + +- return res; ++ return res; + } + + +-/* `buf` is allowed to be empty so that the proper size may be +- allocated by the caller */ ++/* ++ * 'buf` is allowed to be empty so that the proper size may be ++ * allocated by the caller ++ */ + size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, +- const char *name, const struct stat *stbuf, off_t off) ++ const char *name, const struct stat *stbuf, off_t off) + { +- (void)req; +- size_t namelen; +- size_t entlen; +- size_t entlen_padded; +- struct fuse_dirent *dirent; ++ (void)req; ++ size_t namelen; ++ size_t entlen; ++ size_t entlen_padded; ++ struct fuse_dirent *dirent; + +- namelen = strlen(name); +- entlen = FUSE_NAME_OFFSET + namelen; +- entlen_padded = FUSE_DIRENT_ALIGN(entlen); ++ namelen = strlen(name); ++ entlen = FUSE_NAME_OFFSET + namelen; ++ entlen_padded = FUSE_DIRENT_ALIGN(entlen); + +- if ((buf == NULL) || (entlen_padded > bufsize)) +- return entlen_padded; ++ if ((buf == NULL) || (entlen_padded > bufsize)) { ++ return entlen_padded; ++ } + +- dirent = (struct fuse_dirent*) buf; +- dirent->ino = stbuf->st_ino; +- dirent->off = off; +- dirent->namelen = namelen; +- dirent->type = (stbuf->st_mode & S_IFMT) >> 12; +- memcpy(dirent->name, name, namelen); +- memset(dirent->name + namelen, 0, entlen_padded - entlen); ++ dirent = (struct fuse_dirent *)buf; ++ dirent->ino = stbuf->st_ino; ++ dirent->off = off; ++ dirent->namelen = namelen; ++ dirent->type = (stbuf->st_mode & S_IFMT) >> 12; ++ memcpy(dirent->name, name, namelen); ++ memset(dirent->name + namelen, 0, entlen_padded - entlen); + +- return entlen_padded; ++ return entlen_padded; + } + + static void convert_statfs(const struct statvfs *stbuf, +- struct fuse_kstatfs *kstatfs) ++ struct fuse_kstatfs *kstatfs) + { +- kstatfs->bsize = stbuf->f_bsize; +- kstatfs->frsize = stbuf->f_frsize; +- kstatfs->blocks = stbuf->f_blocks; +- kstatfs->bfree = stbuf->f_bfree; +- kstatfs->bavail = stbuf->f_bavail; +- kstatfs->files = stbuf->f_files; +- kstatfs->ffree = stbuf->f_ffree; +- kstatfs->namelen = stbuf->f_namemax; ++ kstatfs->bsize = stbuf->f_bsize; ++ kstatfs->frsize = stbuf->f_frsize; ++ kstatfs->blocks = stbuf->f_blocks; ++ kstatfs->bfree = stbuf->f_bfree; ++ kstatfs->bavail = stbuf->f_bavail; ++ kstatfs->files = stbuf->f_files; ++ kstatfs->ffree = stbuf->f_ffree; ++ kstatfs->namelen = stbuf->f_namemax; + } + + static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize) + { +- return send_reply(req, 0, arg, argsize); ++ return send_reply(req, 0, arg, argsize); + } + + int fuse_reply_err(fuse_req_t req, int err) + { +- return send_reply(req, -err, NULL, 0); ++ return send_reply(req, -err, NULL, 0); + } + + void fuse_reply_none(fuse_req_t req) + { +- fuse_free_req(req); ++ fuse_free_req(req); + } + + static unsigned long calc_timeout_sec(double t) + { +- if (t > (double) ULONG_MAX) +- return ULONG_MAX; +- else if (t < 0.0) +- return 0; +- else +- return (unsigned long) t; ++ if (t > (double)ULONG_MAX) { ++ return ULONG_MAX; ++ } else if (t < 0.0) { ++ return 0; ++ } else { ++ return (unsigned long)t; ++ } + } + + static unsigned int calc_timeout_nsec(double t) + { +- double f = t - (double) calc_timeout_sec(t); +- if (f < 0.0) +- return 0; +- else if (f >= 0.999999999) +- return 999999999; +- else +- return (unsigned int) (f * 1.0e9); ++ double f = t - (double)calc_timeout_sec(t); ++ if (f < 0.0) { ++ return 0; ++ } else if (f >= 0.999999999) { ++ return 999999999; ++ } else { ++ return (unsigned int)(f * 1.0e9); ++ } + } + + static void fill_entry(struct fuse_entry_out *arg, +- const struct fuse_entry_param *e) ++ const struct fuse_entry_param *e) + { +- arg->nodeid = e->ino; +- arg->generation = e->generation; +- arg->entry_valid = calc_timeout_sec(e->entry_timeout); +- arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); +- arg->attr_valid = calc_timeout_sec(e->attr_timeout); +- arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); +- convert_stat(&e->attr, &arg->attr); ++ arg->nodeid = e->ino; ++ arg->generation = e->generation; ++ arg->entry_valid = calc_timeout_sec(e->entry_timeout); ++ arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); ++ arg->attr_valid = calc_timeout_sec(e->attr_timeout); ++ arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); ++ convert_stat(&e->attr, &arg->attr); + } + +-/* `buf` is allowed to be empty so that the proper size may be +- allocated by the caller */ ++/* ++ * `buf` is allowed to be empty so that the proper size may be ++ * allocated by the caller ++ */ + size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, +- const char *name, +- const struct fuse_entry_param *e, off_t off) +-{ +- (void)req; +- size_t namelen; +- size_t entlen; +- size_t entlen_padded; +- +- namelen = strlen(name); +- entlen = FUSE_NAME_OFFSET_DIRENTPLUS + namelen; +- entlen_padded = FUSE_DIRENT_ALIGN(entlen); +- if ((buf == NULL) || (entlen_padded > bufsize)) +- return entlen_padded; +- +- struct fuse_direntplus *dp = (struct fuse_direntplus *) buf; +- memset(&dp->entry_out, 0, sizeof(dp->entry_out)); +- fill_entry(&dp->entry_out, e); +- +- struct fuse_dirent *dirent = &dp->dirent; +- dirent->ino = e->attr.st_ino; +- dirent->off = off; +- dirent->namelen = namelen; +- dirent->type = (e->attr.st_mode & S_IFMT) >> 12; +- memcpy(dirent->name, name, namelen); +- memset(dirent->name + namelen, 0, entlen_padded - entlen); +- +- return entlen_padded; +-} +- +-static void fill_open(struct fuse_open_out *arg, +- const struct fuse_file_info *f) +-{ +- arg->fh = f->fh; +- if (f->direct_io) +- arg->open_flags |= FOPEN_DIRECT_IO; +- if (f->keep_cache) +- arg->open_flags |= FOPEN_KEEP_CACHE; +- if (f->cache_readdir) +- arg->open_flags |= FOPEN_CACHE_DIR; +- if (f->nonseekable) +- arg->open_flags |= FOPEN_NONSEEKABLE; ++ const char *name, ++ const struct fuse_entry_param *e, off_t off) ++{ ++ (void)req; ++ size_t namelen; ++ size_t entlen; ++ size_t entlen_padded; ++ ++ namelen = strlen(name); ++ entlen = FUSE_NAME_OFFSET_DIRENTPLUS + namelen; ++ entlen_padded = FUSE_DIRENT_ALIGN(entlen); ++ if ((buf == NULL) || (entlen_padded > bufsize)) { ++ return entlen_padded; ++ } ++ ++ struct fuse_direntplus *dp = (struct fuse_direntplus *)buf; ++ memset(&dp->entry_out, 0, sizeof(dp->entry_out)); ++ fill_entry(&dp->entry_out, e); ++ ++ struct fuse_dirent *dirent = &dp->dirent; ++ dirent->ino = e->attr.st_ino; ++ dirent->off = off; ++ dirent->namelen = namelen; ++ dirent->type = (e->attr.st_mode & S_IFMT) >> 12; ++ memcpy(dirent->name, name, namelen); ++ memset(dirent->name + namelen, 0, entlen_padded - entlen); ++ ++ return entlen_padded; ++} ++ ++static void fill_open(struct fuse_open_out *arg, const struct fuse_file_info *f) ++{ ++ arg->fh = f->fh; ++ if (f->direct_io) { ++ arg->open_flags |= FOPEN_DIRECT_IO; ++ } ++ if (f->keep_cache) { ++ arg->open_flags |= FOPEN_KEEP_CACHE; ++ } ++ if (f->cache_readdir) { ++ arg->open_flags |= FOPEN_CACHE_DIR; ++ } ++ if (f->nonseekable) { ++ arg->open_flags |= FOPEN_NONSEEKABLE; ++ } + } + + int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) + { +- struct fuse_entry_out arg; +- size_t size = req->se->conn.proto_minor < 9 ? +- FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(arg); ++ struct fuse_entry_out arg; ++ size_t size = req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ENTRY_OUT_SIZE : ++ sizeof(arg); + +- /* before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant +- negative entry */ +- if (!e->ino && req->se->conn.proto_minor < 4) +- return fuse_reply_err(req, ENOENT); ++ /* ++ * before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant ++ * negative entry ++ */ ++ if (!e->ino && req->se->conn.proto_minor < 4) { ++ return fuse_reply_err(req, ENOENT); ++ } + +- memset(&arg, 0, sizeof(arg)); +- fill_entry(&arg, e); +- return send_reply_ok(req, &arg, size); ++ memset(&arg, 0, sizeof(arg)); ++ fill_entry(&arg, e); ++ return send_reply_ok(req, &arg, size); + } + + int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, +- const struct fuse_file_info *f) ++ const struct fuse_file_info *f) + { +- char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; +- size_t entrysize = req->se->conn.proto_minor < 9 ? +- FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(struct fuse_entry_out); +- struct fuse_entry_out *earg = (struct fuse_entry_out *) buf; +- struct fuse_open_out *oarg = (struct fuse_open_out *) (buf + entrysize); ++ char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; ++ size_t entrysize = req->se->conn.proto_minor < 9 ? ++ FUSE_COMPAT_ENTRY_OUT_SIZE : ++ sizeof(struct fuse_entry_out); ++ struct fuse_entry_out *earg = (struct fuse_entry_out *)buf; ++ struct fuse_open_out *oarg = (struct fuse_open_out *)(buf + entrysize); + +- memset(buf, 0, sizeof(buf)); +- fill_entry(earg, e); +- fill_open(oarg, f); +- return send_reply_ok(req, buf, +- entrysize + sizeof(struct fuse_open_out)); ++ memset(buf, 0, sizeof(buf)); ++ fill_entry(earg, e); ++ fill_open(oarg, f); ++ return send_reply_ok(req, buf, entrysize + sizeof(struct fuse_open_out)); + } + + int fuse_reply_attr(fuse_req_t req, const struct stat *attr, +- double attr_timeout) ++ double attr_timeout) + { +- struct fuse_attr_out arg; +- size_t size = req->se->conn.proto_minor < 9 ? +- FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); ++ struct fuse_attr_out arg; ++ size_t size = ++ req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); + +- memset(&arg, 0, sizeof(arg)); +- arg.attr_valid = calc_timeout_sec(attr_timeout); +- arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout); +- convert_stat(attr, &arg.attr); ++ memset(&arg, 0, sizeof(arg)); ++ arg.attr_valid = calc_timeout_sec(attr_timeout); ++ arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout); ++ convert_stat(attr, &arg.attr); + +- return send_reply_ok(req, &arg, size); ++ return send_reply_ok(req, &arg, size); + } + + int fuse_reply_readlink(fuse_req_t req, const char *linkname) + { +- return send_reply_ok(req, linkname, strlen(linkname)); ++ return send_reply_ok(req, linkname, strlen(linkname)); + } + + int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *f) + { +- struct fuse_open_out arg; ++ struct fuse_open_out arg; + +- memset(&arg, 0, sizeof(arg)); +- fill_open(&arg, f); +- return send_reply_ok(req, &arg, sizeof(arg)); ++ memset(&arg, 0, sizeof(arg)); ++ fill_open(&arg, f); ++ return send_reply_ok(req, &arg, sizeof(arg)); + } + + int fuse_reply_write(fuse_req_t req, size_t count) + { +- struct fuse_write_out arg; ++ struct fuse_write_out arg; + +- memset(&arg, 0, sizeof(arg)); +- arg.size = count; ++ memset(&arg, 0, sizeof(arg)); ++ arg.size = count; + +- return send_reply_ok(req, &arg, sizeof(arg)); ++ return send_reply_ok(req, &arg, sizeof(arg)); + } + + int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size) + { +- return send_reply_ok(req, buf, size); ++ return send_reply_ok(req, buf, size); + } + + static int fuse_send_data_iov_fallback(struct fuse_session *se, +- struct fuse_chan *ch, +- struct iovec *iov, int iov_count, +- struct fuse_bufvec *buf, +- size_t len) ++ struct fuse_chan *ch, struct iovec *iov, ++ int iov_count, struct fuse_bufvec *buf, ++ size_t len) + { +- /* Optimize common case */ +- if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && +- !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { +- /* FIXME: also avoid memory copy if there are multiple buffers +- but none of them contain an fd */ ++ /* Optimize common case */ ++ if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && ++ !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { ++ /* ++ * FIXME: also avoid memory copy if there are multiple buffers ++ * but none of them contain an fd ++ */ + +- iov[iov_count].iov_base = buf->buf[0].mem; +- iov[iov_count].iov_len = len; +- iov_count++; +- return fuse_send_msg(se, ch, iov, iov_count); +- } ++ iov[iov_count].iov_base = buf->buf[0].mem; ++ iov[iov_count].iov_len = len; ++ iov_count++; ++ return fuse_send_msg(se, ch, iov, iov_count); ++ } + +- abort(); /* Will have taken vhost path */ +- return 0; ++ abort(); /* Will have taken vhost path */ ++ return 0; + } + + static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, +- struct iovec *iov, int iov_count, +- struct fuse_bufvec *buf, unsigned int flags) ++ struct iovec *iov, int iov_count, ++ struct fuse_bufvec *buf, unsigned int flags) + { +- size_t len = fuse_buf_size(buf); +- (void) flags; ++ size_t len = fuse_buf_size(buf); ++ (void)flags; + +- return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); ++ return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); + } + + int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags) ++ enum fuse_buf_copy_flags flags) + { +- struct iovec iov[2]; +- struct fuse_out_header out; +- int res; ++ struct iovec iov[2]; ++ struct fuse_out_header out; ++ int res; + +- iov[0].iov_base = &out; +- iov[0].iov_len = sizeof(struct fuse_out_header); ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(struct fuse_out_header); + +- out.unique = req->unique; +- out.error = 0; ++ out.unique = req->unique; ++ out.error = 0; + +- res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); +- if (res <= 0) { +- fuse_free_req(req); +- return res; +- } else { +- return fuse_reply_err(req, res); +- } ++ res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); ++ if (res <= 0) { ++ fuse_free_req(req); ++ return res; ++ } else { ++ return fuse_reply_err(req, res); ++ } + } + + int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf) + { +- struct fuse_statfs_out arg; +- size_t size = req->se->conn.proto_minor < 4 ? +- FUSE_COMPAT_STATFS_SIZE : sizeof(arg); ++ struct fuse_statfs_out arg; ++ size_t size = ++ req->se->conn.proto_minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(arg); + +- memset(&arg, 0, sizeof(arg)); +- convert_statfs(stbuf, &arg.st); ++ memset(&arg, 0, sizeof(arg)); ++ convert_statfs(stbuf, &arg.st); + +- return send_reply_ok(req, &arg, size); ++ return send_reply_ok(req, &arg, size); + } + + int fuse_reply_xattr(fuse_req_t req, size_t count) + { +- struct fuse_getxattr_out arg; ++ struct fuse_getxattr_out arg; + +- memset(&arg, 0, sizeof(arg)); +- arg.size = count; ++ memset(&arg, 0, sizeof(arg)); ++ arg.size = count; + +- return send_reply_ok(req, &arg, sizeof(arg)); ++ return send_reply_ok(req, &arg, sizeof(arg)); + } + + int fuse_reply_lock(fuse_req_t req, const struct flock *lock) + { +- struct fuse_lk_out arg; ++ struct fuse_lk_out arg; + +- memset(&arg, 0, sizeof(arg)); +- arg.lk.type = lock->l_type; +- if (lock->l_type != F_UNLCK) { +- arg.lk.start = lock->l_start; +- if (lock->l_len == 0) +- arg.lk.end = OFFSET_MAX; +- else +- arg.lk.end = lock->l_start + lock->l_len - 1; +- } +- arg.lk.pid = lock->l_pid; +- return send_reply_ok(req, &arg, sizeof(arg)); ++ memset(&arg, 0, sizeof(arg)); ++ arg.lk.type = lock->l_type; ++ if (lock->l_type != F_UNLCK) { ++ arg.lk.start = lock->l_start; ++ if (lock->l_len == 0) { ++ arg.lk.end = OFFSET_MAX; ++ } else { ++ arg.lk.end = lock->l_start + lock->l_len - 1; ++ } ++ } ++ arg.lk.pid = lock->l_pid; ++ return send_reply_ok(req, &arg, sizeof(arg)); + } + + int fuse_reply_bmap(fuse_req_t req, uint64_t idx) + { +- struct fuse_bmap_out arg; ++ struct fuse_bmap_out arg; + +- memset(&arg, 0, sizeof(arg)); +- arg.block = idx; ++ memset(&arg, 0, sizeof(arg)); ++ arg.block = idx; + +- return send_reply_ok(req, &arg, sizeof(arg)); ++ return send_reply_ok(req, &arg, sizeof(arg)); + } + + static struct fuse_ioctl_iovec *fuse_ioctl_iovec_copy(const struct iovec *iov, +- size_t count) +-{ +- struct fuse_ioctl_iovec *fiov; +- size_t i; +- +- fiov = malloc(sizeof(fiov[0]) * count); +- if (!fiov) +- return NULL; +- +- for (i = 0; i < count; i++) { +- fiov[i].base = (uintptr_t) iov[i].iov_base; +- fiov[i].len = iov[i].iov_len; +- } +- +- return fiov; +-} +- +-int fuse_reply_ioctl_retry(fuse_req_t req, +- const struct iovec *in_iov, size_t in_count, +- const struct iovec *out_iov, size_t out_count) +-{ +- struct fuse_ioctl_out arg; +- struct fuse_ioctl_iovec *in_fiov = NULL; +- struct fuse_ioctl_iovec *out_fiov = NULL; +- struct iovec iov[4]; +- size_t count = 1; +- int res; +- +- memset(&arg, 0, sizeof(arg)); +- arg.flags |= FUSE_IOCTL_RETRY; +- arg.in_iovs = in_count; +- arg.out_iovs = out_count; +- iov[count].iov_base = &arg; +- iov[count].iov_len = sizeof(arg); +- count++; +- +- if (req->se->conn.proto_minor < 16) { +- if (in_count) { +- iov[count].iov_base = (void *)in_iov; +- iov[count].iov_len = sizeof(in_iov[0]) * in_count; +- count++; +- } +- +- if (out_count) { +- iov[count].iov_base = (void *)out_iov; +- iov[count].iov_len = sizeof(out_iov[0]) * out_count; +- count++; +- } +- } else { +- /* Can't handle non-compat 64bit ioctls on 32bit */ +- if (sizeof(void *) == 4 && req->ioctl_64bit) { +- res = fuse_reply_err(req, EINVAL); +- goto out; +- } +- +- if (in_count) { +- in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); +- if (!in_fiov) +- goto enomem; +- +- iov[count].iov_base = (void *)in_fiov; +- iov[count].iov_len = sizeof(in_fiov[0]) * in_count; +- count++; +- } +- if (out_count) { +- out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); +- if (!out_fiov) +- goto enomem; +- +- iov[count].iov_base = (void *)out_fiov; +- iov[count].iov_len = sizeof(out_fiov[0]) * out_count; +- count++; +- } +- } +- +- res = send_reply_iov(req, 0, iov, count); ++ size_t count) ++{ ++ struct fuse_ioctl_iovec *fiov; ++ size_t i; ++ ++ fiov = malloc(sizeof(fiov[0]) * count); ++ if (!fiov) { ++ return NULL; ++ } ++ ++ for (i = 0; i < count; i++) { ++ fiov[i].base = (uintptr_t)iov[i].iov_base; ++ fiov[i].len = iov[i].iov_len; ++ } ++ ++ return fiov; ++} ++ ++int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov, ++ size_t in_count, const struct iovec *out_iov, ++ size_t out_count) ++{ ++ struct fuse_ioctl_out arg; ++ struct fuse_ioctl_iovec *in_fiov = NULL; ++ struct fuse_ioctl_iovec *out_fiov = NULL; ++ struct iovec iov[4]; ++ size_t count = 1; ++ int res; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.flags |= FUSE_IOCTL_RETRY; ++ arg.in_iovs = in_count; ++ arg.out_iovs = out_count; ++ iov[count].iov_base = &arg; ++ iov[count].iov_len = sizeof(arg); ++ count++; ++ ++ if (req->se->conn.proto_minor < 16) { ++ if (in_count) { ++ iov[count].iov_base = (void *)in_iov; ++ iov[count].iov_len = sizeof(in_iov[0]) * in_count; ++ count++; ++ } ++ ++ if (out_count) { ++ iov[count].iov_base = (void *)out_iov; ++ iov[count].iov_len = sizeof(out_iov[0]) * out_count; ++ count++; ++ } ++ } else { ++ /* Can't handle non-compat 64bit ioctls on 32bit */ ++ if (sizeof(void *) == 4 && req->ioctl_64bit) { ++ res = fuse_reply_err(req, EINVAL); ++ goto out; ++ } ++ ++ if (in_count) { ++ in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); ++ if (!in_fiov) { ++ goto enomem; ++ } ++ ++ iov[count].iov_base = (void *)in_fiov; ++ iov[count].iov_len = sizeof(in_fiov[0]) * in_count; ++ count++; ++ } ++ if (out_count) { ++ out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); ++ if (!out_fiov) { ++ goto enomem; ++ } ++ ++ iov[count].iov_base = (void *)out_fiov; ++ iov[count].iov_len = sizeof(out_fiov[0]) * out_count; ++ count++; ++ } ++ } ++ ++ res = send_reply_iov(req, 0, iov, count); + out: +- free(in_fiov); +- free(out_fiov); ++ free(in_fiov); ++ free(out_fiov); + +- return res; ++ return res; + + enomem: +- res = fuse_reply_err(req, ENOMEM); +- goto out; ++ res = fuse_reply_err(req, ENOMEM); ++ goto out; + } + + int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size) + { +- struct fuse_ioctl_out arg; +- struct iovec iov[3]; +- size_t count = 1; ++ struct fuse_ioctl_out arg; ++ struct iovec iov[3]; ++ size_t count = 1; + +- memset(&arg, 0, sizeof(arg)); +- arg.result = result; +- iov[count].iov_base = &arg; +- iov[count].iov_len = sizeof(arg); +- count++; ++ memset(&arg, 0, sizeof(arg)); ++ arg.result = result; ++ iov[count].iov_base = &arg; ++ iov[count].iov_len = sizeof(arg); ++ count++; + +- if (size) { +- iov[count].iov_base = (char *) buf; +- iov[count].iov_len = size; +- count++; +- } ++ if (size) { ++ iov[count].iov_base = (char *)buf; ++ iov[count].iov_len = size; ++ count++; ++ } + +- return send_reply_iov(req, 0, iov, count); ++ return send_reply_iov(req, 0, iov, count); + } + + int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, +- int count) ++ int count) + { +- struct iovec *padded_iov; +- struct fuse_ioctl_out arg; +- int res; ++ struct iovec *padded_iov; ++ struct fuse_ioctl_out arg; ++ int res; + +- padded_iov = malloc((count + 2) * sizeof(struct iovec)); +- if (padded_iov == NULL) +- return fuse_reply_err(req, ENOMEM); ++ padded_iov = malloc((count + 2) * sizeof(struct iovec)); ++ if (padded_iov == NULL) { ++ return fuse_reply_err(req, ENOMEM); ++ } + +- memset(&arg, 0, sizeof(arg)); +- arg.result = result; +- padded_iov[1].iov_base = &arg; +- padded_iov[1].iov_len = sizeof(arg); ++ memset(&arg, 0, sizeof(arg)); ++ arg.result = result; ++ padded_iov[1].iov_base = &arg; ++ padded_iov[1].iov_len = sizeof(arg); + +- memcpy(&padded_iov[2], iov, count * sizeof(struct iovec)); ++ memcpy(&padded_iov[2], iov, count * sizeof(struct iovec)); + +- res = send_reply_iov(req, 0, padded_iov, count + 2); +- free(padded_iov); ++ res = send_reply_iov(req, 0, padded_iov, count + 2); ++ free(padded_iov); + +- return res; ++ return res; + } + + int fuse_reply_poll(fuse_req_t req, unsigned revents) + { +- struct fuse_poll_out arg; ++ struct fuse_poll_out arg; + +- memset(&arg, 0, sizeof(arg)); +- arg.revents = revents; ++ memset(&arg, 0, sizeof(arg)); ++ arg.revents = revents; + +- return send_reply_ok(req, &arg, sizeof(arg)); ++ return send_reply_ok(req, &arg, sizeof(arg)); + } + + int fuse_reply_lseek(fuse_req_t req, off_t off) + { +- struct fuse_lseek_out arg; ++ struct fuse_lseek_out arg; + +- memset(&arg, 0, sizeof(arg)); +- arg.offset = off; ++ memset(&arg, 0, sizeof(arg)); ++ arg.offset = off; + +- return send_reply_ok(req, &arg, sizeof(arg)); ++ return send_reply_ok(req, &arg, sizeof(arg)); + } + + static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- char *name = (char *) inarg; ++ char *name = (char *)inarg; + +- if (req->se->op.lookup) +- req->se->op.lookup(req, nodeid, name); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.lookup) { ++ req->se->op.lookup(req, nodeid, name); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_forget_in *arg = (struct fuse_forget_in *) inarg; ++ struct fuse_forget_in *arg = (struct fuse_forget_in *)inarg; + +- if (req->se->op.forget) +- req->se->op.forget(req, nodeid, arg->nlookup); +- else +- fuse_reply_none(req); ++ if (req->se->op.forget) { ++ req->se->op.forget(req, nodeid, arg->nlookup); ++ } else { ++ fuse_reply_none(req); ++ } + } + + static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, +- const void *inarg) ++ const void *inarg) + { +- struct fuse_batch_forget_in *arg = (void *) inarg; +- struct fuse_forget_one *param = (void *) PARAM(arg); +- unsigned int i; ++ struct fuse_batch_forget_in *arg = (void *)inarg; ++ struct fuse_forget_one *param = (void *)PARAM(arg); ++ unsigned int i; + +- (void) nodeid; ++ (void)nodeid; + +- if (req->se->op.forget_multi) { +- req->se->op.forget_multi(req, arg->count, +- (struct fuse_forget_data *) param); +- } else if (req->se->op.forget) { +- for (i = 0; i < arg->count; i++) { +- struct fuse_forget_one *forget = ¶m[i]; +- struct fuse_req *dummy_req; ++ if (req->se->op.forget_multi) { ++ req->se->op.forget_multi(req, arg->count, ++ (struct fuse_forget_data *)param); ++ } else if (req->se->op.forget) { ++ for (i = 0; i < arg->count; i++) { ++ struct fuse_forget_one *forget = ¶m[i]; ++ struct fuse_req *dummy_req; + +- dummy_req = fuse_ll_alloc_req(req->se); +- if (dummy_req == NULL) +- break; ++ dummy_req = fuse_ll_alloc_req(req->se); ++ if (dummy_req == NULL) { ++ break; ++ } + +- dummy_req->unique = req->unique; +- dummy_req->ctx = req->ctx; +- dummy_req->ch = NULL; ++ dummy_req->unique = req->unique; ++ dummy_req->ctx = req->ctx; ++ dummy_req->ch = NULL; + +- req->se->op.forget(dummy_req, forget->nodeid, +- forget->nlookup); +- } +- fuse_reply_none(req); +- } else { +- fuse_reply_none(req); +- } ++ req->se->op.forget(dummy_req, forget->nodeid, forget->nlookup); ++ } ++ fuse_reply_none(req); ++ } else { ++ fuse_reply_none(req); ++ } + } + + static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_file_info *fip = NULL; +- struct fuse_file_info fi; ++ struct fuse_file_info *fip = NULL; ++ struct fuse_file_info fi; + +- if (req->se->conn.proto_minor >= 9) { +- struct fuse_getattr_in *arg = (struct fuse_getattr_in *) inarg; ++ if (req->se->conn.proto_minor >= 9) { ++ struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; + +- if (arg->getattr_flags & FUSE_GETATTR_FH) { +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fip = &fi; +- } +- } ++ if (arg->getattr_flags & FUSE_GETATTR_FH) { ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fip = &fi; ++ } ++ } + +- if (req->se->op.getattr) +- req->se->op.getattr(req, nodeid, fip); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.getattr) { ++ req->se->op.getattr(req, nodeid, fip); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_setattr_in *arg = (struct fuse_setattr_in *) inarg; +- +- if (req->se->op.setattr) { +- struct fuse_file_info *fi = NULL; +- struct fuse_file_info fi_store; +- struct stat stbuf; +- memset(&stbuf, 0, sizeof(stbuf)); +- convert_attr(arg, &stbuf); +- if (arg->valid & FATTR_FH) { +- arg->valid &= ~FATTR_FH; +- memset(&fi_store, 0, sizeof(fi_store)); +- fi = &fi_store; +- fi->fh = arg->fh; +- } +- arg->valid &= +- FUSE_SET_ATTR_MODE | +- FUSE_SET_ATTR_UID | +- FUSE_SET_ATTR_GID | +- FUSE_SET_ATTR_SIZE | +- FUSE_SET_ATTR_ATIME | +- FUSE_SET_ATTR_MTIME | +- FUSE_SET_ATTR_ATIME_NOW | +- FUSE_SET_ATTR_MTIME_NOW | +- FUSE_SET_ATTR_CTIME; +- +- req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi); +- } else +- fuse_reply_err(req, ENOSYS); ++ struct fuse_setattr_in *arg = (struct fuse_setattr_in *)inarg; ++ ++ if (req->se->op.setattr) { ++ struct fuse_file_info *fi = NULL; ++ struct fuse_file_info fi_store; ++ struct stat stbuf; ++ memset(&stbuf, 0, sizeof(stbuf)); ++ convert_attr(arg, &stbuf); ++ if (arg->valid & FATTR_FH) { ++ arg->valid &= ~FATTR_FH; ++ memset(&fi_store, 0, sizeof(fi_store)); ++ fi = &fi_store; ++ fi->fh = arg->fh; ++ } ++ arg->valid &= FUSE_SET_ATTR_MODE | FUSE_SET_ATTR_UID | ++ FUSE_SET_ATTR_GID | FUSE_SET_ATTR_SIZE | ++ FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME | ++ FUSE_SET_ATTR_ATIME_NOW | FUSE_SET_ATTR_MTIME_NOW | ++ FUSE_SET_ATTR_CTIME; ++ ++ req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_access_in *arg = (struct fuse_access_in *) inarg; ++ struct fuse_access_in *arg = (struct fuse_access_in *)inarg; + +- if (req->se->op.access) +- req->se->op.access(req, nodeid, arg->mask); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.access) { ++ req->se->op.access(req, nodeid, arg->mask); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- (void) inarg; ++ (void)inarg; + +- if (req->se->op.readlink) +- req->se->op.readlink(req, nodeid); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.readlink) { ++ req->se->op.readlink(req, nodeid); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_mknod_in *arg = (struct fuse_mknod_in *) inarg; +- char *name = PARAM(arg); ++ struct fuse_mknod_in *arg = (struct fuse_mknod_in *)inarg; ++ char *name = PARAM(arg); + +- if (req->se->conn.proto_minor >= 12) +- req->ctx.umask = arg->umask; +- else +- name = (char *) inarg + FUSE_COMPAT_MKNOD_IN_SIZE; ++ if (req->se->conn.proto_minor >= 12) { ++ req->ctx.umask = arg->umask; ++ } else { ++ name = (char *)inarg + FUSE_COMPAT_MKNOD_IN_SIZE; ++ } + +- if (req->se->op.mknod) +- req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.mknod) { ++ req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *) inarg; ++ struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *)inarg; + +- if (req->se->conn.proto_minor >= 12) +- req->ctx.umask = arg->umask; ++ if (req->se->conn.proto_minor >= 12) { ++ req->ctx.umask = arg->umask; ++ } + +- if (req->se->op.mkdir) +- req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.mkdir) { ++ req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- char *name = (char *) inarg; ++ char *name = (char *)inarg; + +- if (req->se->op.unlink) +- req->se->op.unlink(req, nodeid, name); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.unlink) { ++ req->se->op.unlink(req, nodeid, name); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- char *name = (char *) inarg; ++ char *name = (char *)inarg; + +- if (req->se->op.rmdir) +- req->se->op.rmdir(req, nodeid, name); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.rmdir) { ++ req->se->op.rmdir(req, nodeid, name); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- char *name = (char *) inarg; +- char *linkname = ((char *) inarg) + strlen((char *) inarg) + 1; ++ char *name = (char *)inarg; ++ char *linkname = ((char *)inarg) + strlen((char *)inarg) + 1; + +- if (req->se->op.symlink) +- req->se->op.symlink(req, linkname, nodeid, name); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.symlink) { ++ req->se->op.symlink(req, linkname, nodeid, name); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_rename_in *arg = (struct fuse_rename_in *) inarg; +- char *oldname = PARAM(arg); +- char *newname = oldname + strlen(oldname) + 1; ++ struct fuse_rename_in *arg = (struct fuse_rename_in *)inarg; ++ char *oldname = PARAM(arg); ++ char *newname = oldname + strlen(oldname) + 1; + +- if (req->se->op.rename) +- req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, +- 0); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.rename) { ++ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, 0); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_rename2_in *arg = (struct fuse_rename2_in *) inarg; +- char *oldname = PARAM(arg); +- char *newname = oldname + strlen(oldname) + 1; ++ struct fuse_rename2_in *arg = (struct fuse_rename2_in *)inarg; ++ char *oldname = PARAM(arg); ++ char *newname = oldname + strlen(oldname) + 1; + +- if (req->se->op.rename) +- req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, +- arg->flags); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.rename) { ++ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, ++ arg->flags); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_link(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_link_in *arg = (struct fuse_link_in *) inarg; ++ struct fuse_link_in *arg = (struct fuse_link_in *)inarg; + +- if (req->se->op.link) +- req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.link) { ++ req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_create_in *arg = (struct fuse_create_in *) inarg; ++ struct fuse_create_in *arg = (struct fuse_create_in *)inarg; + +- if (req->se->op.create) { +- struct fuse_file_info fi; +- char *name = PARAM(arg); ++ if (req->se->op.create) { ++ struct fuse_file_info fi; ++ char *name = PARAM(arg); + +- memset(&fi, 0, sizeof(fi)); +- fi.flags = arg->flags; ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; + +- if (req->se->conn.proto_minor >= 12) +- req->ctx.umask = arg->umask; +- else +- name = (char *) inarg + sizeof(struct fuse_open_in); ++ if (req->se->conn.proto_minor >= 12) { ++ req->ctx.umask = arg->umask; ++ } else { ++ name = (char *)inarg + sizeof(struct fuse_open_in); ++ } + +- req->se->op.create(req, nodeid, name, arg->mode, &fi); +- } else +- fuse_reply_err(req, ENOSYS); ++ req->se->op.create(req, nodeid, name, arg->mode, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_open_in *arg = (struct fuse_open_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_open_in *arg = (struct fuse_open_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.flags = arg->flags; ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; + +- if (req->se->op.open) +- req->se->op.open(req, nodeid, &fi); +- else +- fuse_reply_open(req, &fi); ++ if (req->se->op.open) { ++ req->se->op.open(req, nodeid, &fi); ++ } else { ++ fuse_reply_open(req, &fi); ++ } + } + + static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_read_in *arg = (struct fuse_read_in *) inarg; ++ struct fuse_read_in *arg = (struct fuse_read_in *)inarg; + +- if (req->se->op.read) { +- struct fuse_file_info fi; ++ if (req->se->op.read) { ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- if (req->se->conn.proto_minor >= 9) { +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- } +- req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); +- } else +- fuse_reply_err(req, ENOSYS); ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ if (req->se->conn.proto_minor >= 9) { ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ } ++ req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_write_in *arg = (struct fuse_write_in *) inarg; +- struct fuse_file_info fi; +- char *param; ++ struct fuse_write_in *arg = (struct fuse_write_in *)inarg; ++ struct fuse_file_info fi; ++ char *param; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; + +- if (req->se->conn.proto_minor < 9) { +- param = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; +- } else { +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- param = PARAM(arg); +- } ++ if (req->se->conn.proto_minor < 9) { ++ param = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; ++ } else { ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ param = PARAM(arg); ++ } + +- if (req->se->op.write) +- req->se->op.write(req, nodeid, param, arg->size, +- arg->offset, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.write) { ++ req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, +- const struct fuse_buf *ibuf) +-{ +- struct fuse_session *se = req->se; +- struct fuse_bufvec bufv = { +- .buf[0] = *ibuf, +- .count = 1, +- }; +- struct fuse_write_in *arg = (struct fuse_write_in *) inarg; +- struct fuse_file_info fi; +- +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; +- +- if (se->conn.proto_minor < 9) { +- bufv.buf[0].mem = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; +- bufv.buf[0].size -= sizeof(struct fuse_in_header) + +- FUSE_COMPAT_WRITE_IN_SIZE; +- assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); +- } else { +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) +- bufv.buf[0].mem = PARAM(arg); +- +- bufv.buf[0].size -= sizeof(struct fuse_in_header) + +- sizeof(struct fuse_write_in); +- } +- if (bufv.buf[0].size < arg->size) { +- fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); +- fuse_reply_err(req, EIO); +- return; +- } +- bufv.buf[0].size = arg->size; +- +- se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); ++ const struct fuse_buf *ibuf) ++{ ++ struct fuse_session *se = req->se; ++ struct fuse_bufvec bufv = { ++ .buf[0] = *ibuf, ++ .count = 1, ++ }; ++ struct fuse_write_in *arg = (struct fuse_write_in *)inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; ++ ++ if (se->conn.proto_minor < 9) { ++ bufv.buf[0].mem = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; ++ bufv.buf[0].size -= ++ sizeof(struct fuse_in_header) + FUSE_COMPAT_WRITE_IN_SIZE; ++ assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); ++ } else { ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { ++ bufv.buf[0].mem = PARAM(arg); ++ } ++ ++ bufv.buf[0].size -= ++ sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); ++ } ++ if (bufv.buf[0].size < arg->size) { ++ fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); ++ fuse_reply_err(req, EIO); ++ return; ++ } ++ bufv.buf[0].size = arg->size; ++ ++ se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); + } + + static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_flush_in *arg = (struct fuse_flush_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_flush_in *arg = (struct fuse_flush_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fi.flush = 1; +- if (req->se->conn.proto_minor >= 7) +- fi.lock_owner = arg->lock_owner; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.flush = 1; ++ if (req->se->conn.proto_minor >= 7) { ++ fi.lock_owner = arg->lock_owner; ++ } + +- if (req->se->op.flush) +- req->se->op.flush(req, nodeid, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.flush) { ++ req->se->op.flush(req, nodeid, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_release_in *arg = (struct fuse_release_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_release_in *arg = (struct fuse_release_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.flags = arg->flags; +- fi.fh = arg->fh; +- if (req->se->conn.proto_minor >= 8) { +- fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; +- fi.lock_owner = arg->lock_owner; +- } +- if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { +- fi.flock_release = 1; +- fi.lock_owner = arg->lock_owner; +- } ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; ++ fi.fh = arg->fh; ++ if (req->se->conn.proto_minor >= 8) { ++ fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; ++ fi.lock_owner = arg->lock_owner; ++ } ++ if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { ++ fi.flock_release = 1; ++ fi.lock_owner = arg->lock_owner; ++ } + +- if (req->se->op.release) +- req->se->op.release(req, nodeid, &fi); +- else +- fuse_reply_err(req, 0); ++ if (req->se->op.release) { ++ req->se->op.release(req, nodeid, &fi); ++ } else { ++ fuse_reply_err(req, 0); ++ } + } + + static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; +- struct fuse_file_info fi; +- int datasync = arg->fsync_flags & 1; ++ struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; ++ struct fuse_file_info fi; ++ int datasync = arg->fsync_flags & 1; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; + +- if (req->se->op.fsync) +- req->se->op.fsync(req, nodeid, datasync, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.fsync) { ++ req->se->op.fsync(req, nodeid, datasync, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_open_in *arg = (struct fuse_open_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_open_in *arg = (struct fuse_open_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.flags = arg->flags; ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; + +- if (req->se->op.opendir) +- req->se->op.opendir(req, nodeid, &fi); +- else +- fuse_reply_open(req, &fi); ++ if (req->se->op.opendir) { ++ req->se->op.opendir(req, nodeid, &fi); ++ } else { ++ fuse_reply_open(req, &fi); ++ } + } + + static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_read_in *arg = (struct fuse_read_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_read_in *arg = (struct fuse_read_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; + +- if (req->se->op.readdir) +- req->se->op.readdir(req, nodeid, arg->size, arg->offset, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.readdir) { ++ req->se->op.readdir(req, nodeid, arg->size, arg->offset, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_read_in *arg = (struct fuse_read_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_read_in *arg = (struct fuse_read_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; + +- if (req->se->op.readdirplus) +- req->se->op.readdirplus(req, nodeid, arg->size, arg->offset, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.readdirplus) { ++ req->se->op.readdirplus(req, nodeid, arg->size, arg->offset, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_release_in *arg = (struct fuse_release_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_release_in *arg = (struct fuse_release_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.flags = arg->flags; +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; ++ fi.fh = arg->fh; + +- if (req->se->op.releasedir) +- req->se->op.releasedir(req, nodeid, &fi); +- else +- fuse_reply_err(req, 0); ++ if (req->se->op.releasedir) { ++ req->se->op.releasedir(req, nodeid, &fi); ++ } else { ++ fuse_reply_err(req, 0); ++ } + } + + static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; +- struct fuse_file_info fi; +- int datasync = arg->fsync_flags & 1; ++ struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; ++ struct fuse_file_info fi; ++ int datasync = arg->fsync_flags & 1; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; + +- if (req->se->op.fsyncdir) +- req->se->op.fsyncdir(req, nodeid, datasync, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.fsyncdir) { ++ req->se->op.fsyncdir(req, nodeid, datasync, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- (void) nodeid; +- (void) inarg; ++ (void)nodeid; ++ (void)inarg; + +- if (req->se->op.statfs) +- req->se->op.statfs(req, nodeid); +- else { +- struct statvfs buf = { +- .f_namemax = 255, +- .f_bsize = 512, +- }; +- fuse_reply_statfs(req, &buf); +- } ++ if (req->se->op.statfs) { ++ req->se->op.statfs(req, nodeid); ++ } else { ++ struct statvfs buf = { ++ .f_namemax = 255, ++ .f_bsize = 512, ++ }; ++ fuse_reply_statfs(req, &buf); ++ } + } + + static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *) inarg; +- char *name = PARAM(arg); +- char *value = name + strlen(name) + 1; ++ struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *)inarg; ++ char *name = PARAM(arg); ++ char *value = name + strlen(name) + 1; + +- if (req->se->op.setxattr) +- req->se->op.setxattr(req, nodeid, name, value, arg->size, +- arg->flags); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.setxattr) { ++ req->se->op.setxattr(req, nodeid, name, value, arg->size, arg->flags); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; ++ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; + +- if (req->se->op.getxattr) +- req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.getxattr) { ++ req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; ++ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; + +- if (req->se->op.listxattr) +- req->se->op.listxattr(req, nodeid, arg->size); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.listxattr) { ++ req->se->op.listxattr(req, nodeid, arg->size); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- char *name = (char *) inarg; ++ char *name = (char *)inarg; + +- if (req->se->op.removexattr) +- req->se->op.removexattr(req, nodeid, name); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.removexattr) { ++ req->se->op.removexattr(req, nodeid, name); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void convert_fuse_file_lock(struct fuse_file_lock *fl, +- struct flock *flock) ++ struct flock *flock) + { +- memset(flock, 0, sizeof(struct flock)); +- flock->l_type = fl->type; +- flock->l_whence = SEEK_SET; +- flock->l_start = fl->start; +- if (fl->end == OFFSET_MAX) +- flock->l_len = 0; +- else +- flock->l_len = fl->end - fl->start + 1; +- flock->l_pid = fl->pid; ++ memset(flock, 0, sizeof(struct flock)); ++ flock->l_type = fl->type; ++ flock->l_whence = SEEK_SET; ++ flock->l_start = fl->start; ++ if (fl->end == OFFSET_MAX) { ++ flock->l_len = 0; ++ } else { ++ flock->l_len = fl->end - fl->start + 1; ++ } ++ flock->l_pid = fl->pid; + } + + static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; +- struct fuse_file_info fi; +- struct flock flock; ++ struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; ++ struct fuse_file_info fi; ++ struct flock flock; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fi.lock_owner = arg->owner; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.lock_owner = arg->owner; + +- convert_fuse_file_lock(&arg->lk, &flock); +- if (req->se->op.getlk) +- req->se->op.getlk(req, nodeid, &fi, &flock); +- else +- fuse_reply_err(req, ENOSYS); ++ convert_fuse_file_lock(&arg->lk, &flock); ++ if (req->se->op.getlk) { ++ req->se->op.getlk(req, nodeid, &fi, &flock); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, +- const void *inarg, int sleep) +-{ +- struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; +- struct fuse_file_info fi; +- struct flock flock; +- +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fi.lock_owner = arg->owner; +- +- if (arg->lk_flags & FUSE_LK_FLOCK) { +- int op = 0; +- +- switch (arg->lk.type) { +- case F_RDLCK: +- op = LOCK_SH; +- break; +- case F_WRLCK: +- op = LOCK_EX; +- break; +- case F_UNLCK: +- op = LOCK_UN; +- break; +- } +- if (!sleep) +- op |= LOCK_NB; +- +- if (req->se->op.flock) +- req->se->op.flock(req, nodeid, &fi, op); +- else +- fuse_reply_err(req, ENOSYS); +- } else { +- convert_fuse_file_lock(&arg->lk, &flock); +- if (req->se->op.setlk) +- req->se->op.setlk(req, nodeid, &fi, &flock, sleep); +- else +- fuse_reply_err(req, ENOSYS); +- } ++ const void *inarg, int sleep) ++{ ++ struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; ++ struct fuse_file_info fi; ++ struct flock flock; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.lock_owner = arg->owner; ++ ++ if (arg->lk_flags & FUSE_LK_FLOCK) { ++ int op = 0; ++ ++ switch (arg->lk.type) { ++ case F_RDLCK: ++ op = LOCK_SH; ++ break; ++ case F_WRLCK: ++ op = LOCK_EX; ++ break; ++ case F_UNLCK: ++ op = LOCK_UN; ++ break; ++ } ++ if (!sleep) { ++ op |= LOCK_NB; ++ } ++ ++ if (req->se->op.flock) { ++ req->se->op.flock(req, nodeid, &fi, op); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } ++ } else { ++ convert_fuse_file_lock(&arg->lk, &flock); ++ if (req->se->op.setlk) { ++ req->se->op.setlk(req, nodeid, &fi, &flock, sleep); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } ++ } + } + + static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- do_setlk_common(req, nodeid, inarg, 0); ++ do_setlk_common(req, nodeid, inarg, 0); + } + + static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- do_setlk_common(req, nodeid, inarg, 1); ++ do_setlk_common(req, nodeid, inarg, 1); + } + + static int find_interrupted(struct fuse_session *se, struct fuse_req *req) + { +- struct fuse_req *curr; +- +- for (curr = se->list.next; curr != &se->list; curr = curr->next) { +- if (curr->unique == req->u.i.unique) { +- fuse_interrupt_func_t func; +- void *data; +- +- curr->ctr++; +- pthread_mutex_unlock(&se->lock); +- +- /* Ugh, ugly locking */ +- pthread_mutex_lock(&curr->lock); +- pthread_mutex_lock(&se->lock); +- curr->interrupted = 1; +- func = curr->u.ni.func; +- data = curr->u.ni.data; +- pthread_mutex_unlock(&se->lock); +- if (func) +- func(curr, data); +- pthread_mutex_unlock(&curr->lock); +- +- pthread_mutex_lock(&se->lock); +- curr->ctr--; +- if (!curr->ctr) +- destroy_req(curr); +- +- return 1; +- } +- } +- for (curr = se->interrupts.next; curr != &se->interrupts; +- curr = curr->next) { +- if (curr->u.i.unique == req->u.i.unique) +- return 1; +- } +- return 0; ++ struct fuse_req *curr; ++ ++ for (curr = se->list.next; curr != &se->list; curr = curr->next) { ++ if (curr->unique == req->u.i.unique) { ++ fuse_interrupt_func_t func; ++ void *data; ++ ++ curr->ctr++; ++ pthread_mutex_unlock(&se->lock); ++ ++ /* Ugh, ugly locking */ ++ pthread_mutex_lock(&curr->lock); ++ pthread_mutex_lock(&se->lock); ++ curr->interrupted = 1; ++ func = curr->u.ni.func; ++ data = curr->u.ni.data; ++ pthread_mutex_unlock(&se->lock); ++ if (func) { ++ func(curr, data); ++ } ++ pthread_mutex_unlock(&curr->lock); ++ ++ pthread_mutex_lock(&se->lock); ++ curr->ctr--; ++ if (!curr->ctr) { ++ destroy_req(curr); ++ } ++ ++ return 1; ++ } ++ } ++ for (curr = se->interrupts.next; curr != &se->interrupts; ++ curr = curr->next) { ++ if (curr->u.i.unique == req->u.i.unique) { ++ return 1; ++ } ++ } ++ return 0; + } + + static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *) inarg; +- struct fuse_session *se = req->se; ++ struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *)inarg; ++ struct fuse_session *se = req->se; + +- (void) nodeid; +- if (se->debug) +- fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", +- (unsigned long long) arg->unique); ++ (void)nodeid; ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", ++ (unsigned long long)arg->unique); ++ } + +- req->u.i.unique = arg->unique; ++ req->u.i.unique = arg->unique; + +- pthread_mutex_lock(&se->lock); +- if (find_interrupted(se, req)) +- destroy_req(req); +- else +- list_add_req(req, &se->interrupts); +- pthread_mutex_unlock(&se->lock); ++ pthread_mutex_lock(&se->lock); ++ if (find_interrupted(se, req)) { ++ destroy_req(req); ++ } else { ++ list_add_req(req, &se->interrupts); ++ } ++ pthread_mutex_unlock(&se->lock); + } + + static struct fuse_req *check_interrupt(struct fuse_session *se, +- struct fuse_req *req) +-{ +- struct fuse_req *curr; +- +- for (curr = se->interrupts.next; curr != &se->interrupts; +- curr = curr->next) { +- if (curr->u.i.unique == req->unique) { +- req->interrupted = 1; +- list_del_req(curr); +- free(curr); +- return NULL; +- } +- } +- curr = se->interrupts.next; +- if (curr != &se->interrupts) { +- list_del_req(curr); +- list_init_req(curr); +- return curr; +- } else +- return NULL; ++ struct fuse_req *req) ++{ ++ struct fuse_req *curr; ++ ++ for (curr = se->interrupts.next; curr != &se->interrupts; ++ curr = curr->next) { ++ if (curr->u.i.unique == req->unique) { ++ req->interrupted = 1; ++ list_del_req(curr); ++ free(curr); ++ return NULL; ++ } ++ } ++ curr = se->interrupts.next; ++ if (curr != &se->interrupts) { ++ list_del_req(curr); ++ list_init_req(curr); ++ return curr; ++ } else { ++ return NULL; ++ } + } + + static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_bmap_in *arg = (struct fuse_bmap_in *) inarg; ++ struct fuse_bmap_in *arg = (struct fuse_bmap_in *)inarg; + +- if (req->se->op.bmap) +- req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.bmap) { ++ req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *) inarg; +- unsigned int flags = arg->flags; +- void *in_buf = arg->in_size ? PARAM(arg) : NULL; +- struct fuse_file_info fi; ++ struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *)inarg; ++ unsigned int flags = arg->flags; ++ void *in_buf = arg->in_size ? PARAM(arg) : NULL; ++ struct fuse_file_info fi; + +- if (flags & FUSE_IOCTL_DIR && +- !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { +- fuse_reply_err(req, ENOTTY); +- return; +- } ++ if (flags & FUSE_IOCTL_DIR && !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { ++ fuse_reply_err(req, ENOTTY); ++ return; ++ } + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; + +- if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && +- !(flags & FUSE_IOCTL_32BIT)) { +- req->ioctl_64bit = 1; +- } ++ if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && ++ !(flags & FUSE_IOCTL_32BIT)) { ++ req->ioctl_64bit = 1; ++ } + +- if (req->se->op.ioctl) +- req->se->op.ioctl(req, nodeid, arg->cmd, +- (void *)(uintptr_t)arg->arg, &fi, flags, +- in_buf, arg->in_size, arg->out_size); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.ioctl) { ++ req->se->op.ioctl(req, nodeid, arg->cmd, (void *)(uintptr_t)arg->arg, ++ &fi, flags, in_buf, arg->in_size, arg->out_size); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + void fuse_pollhandle_destroy(struct fuse_pollhandle *ph) + { +- free(ph); ++ free(ph); + } + + static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_poll_in *arg = (struct fuse_poll_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_poll_in *arg = (struct fuse_poll_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fi.poll_events = arg->events; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.poll_events = arg->events; + +- if (req->se->op.poll) { +- struct fuse_pollhandle *ph = NULL; ++ if (req->se->op.poll) { ++ struct fuse_pollhandle *ph = NULL; + +- if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) { +- ph = malloc(sizeof(struct fuse_pollhandle)); +- if (ph == NULL) { +- fuse_reply_err(req, ENOMEM); +- return; +- } +- ph->kh = arg->kh; +- ph->se = req->se; +- } ++ if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) { ++ ph = malloc(sizeof(struct fuse_pollhandle)); ++ if (ph == NULL) { ++ fuse_reply_err(req, ENOMEM); ++ return; ++ } ++ ph->kh = arg->kh; ++ ph->se = req->se; ++ } + +- req->se->op.poll(req, nodeid, &fi, ph); +- } else { +- fuse_reply_err(req, ENOSYS); +- } ++ req->se->op.poll(req, nodeid, &fi, ph); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; + +- if (req->se->op.fallocate) +- req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.fallocate) { ++ req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length, ++ &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + +-static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, const void *inarg) ++static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, ++ const void *inarg) + { +- struct fuse_copy_file_range_in *arg = (struct fuse_copy_file_range_in *) inarg; +- struct fuse_file_info fi_in, fi_out; ++ struct fuse_copy_file_range_in *arg = ++ (struct fuse_copy_file_range_in *)inarg; ++ struct fuse_file_info fi_in, fi_out; + +- memset(&fi_in, 0, sizeof(fi_in)); +- fi_in.fh = arg->fh_in; ++ memset(&fi_in, 0, sizeof(fi_in)); ++ fi_in.fh = arg->fh_in; + +- memset(&fi_out, 0, sizeof(fi_out)); +- fi_out.fh = arg->fh_out; ++ memset(&fi_out, 0, sizeof(fi_out)); ++ fi_out.fh = arg->fh_out; + + +- if (req->se->op.copy_file_range) +- req->se->op.copy_file_range(req, nodeid_in, arg->off_in, +- &fi_in, arg->nodeid_out, +- arg->off_out, &fi_out, arg->len, +- arg->flags); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.copy_file_range) { ++ req->se->op.copy_file_range(req, nodeid_in, arg->off_in, &fi_in, ++ arg->nodeid_out, arg->off_out, &fi_out, ++ arg->len, arg->flags); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_lseek_in *arg = (struct fuse_lseek_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_lseek_in *arg = (struct fuse_lseek_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; + +- if (req->se->op.lseek) +- req->se->op.lseek(req, nodeid, arg->offset, arg->whence, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.lseek) { ++ req->se->op.lseek(req, nodeid, arg->offset, arg->whence, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_init_in *arg = (struct fuse_init_in *) inarg; +- struct fuse_init_out outarg; +- struct fuse_session *se = req->se; +- size_t bufsize = se->bufsize; +- size_t outargsize = sizeof(outarg); +- +- (void) nodeid; +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); +- if (arg->major == 7 && arg->minor >= 6) { +- fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); +- fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", +- arg->max_readahead); +- } +- } +- se->conn.proto_major = arg->major; +- se->conn.proto_minor = arg->minor; +- se->conn.capable = 0; +- se->conn.want = 0; +- +- memset(&outarg, 0, sizeof(outarg)); +- outarg.major = FUSE_KERNEL_VERSION; +- outarg.minor = FUSE_KERNEL_MINOR_VERSION; +- +- if (arg->major < 7) { +- fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", +- arg->major, arg->minor); +- fuse_reply_err(req, EPROTO); +- return; +- } +- +- if (arg->major > 7) { +- /* Wait for a second INIT request with a 7.X version */ +- send_reply_ok(req, &outarg, sizeof(outarg)); +- return; +- } +- +- if (arg->minor >= 6) { +- if (arg->max_readahead < se->conn.max_readahead) +- se->conn.max_readahead = arg->max_readahead; +- if (arg->flags & FUSE_ASYNC_READ) +- se->conn.capable |= FUSE_CAP_ASYNC_READ; +- if (arg->flags & FUSE_POSIX_LOCKS) +- se->conn.capable |= FUSE_CAP_POSIX_LOCKS; +- if (arg->flags & FUSE_ATOMIC_O_TRUNC) +- se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; +- if (arg->flags & FUSE_EXPORT_SUPPORT) +- se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; +- if (arg->flags & FUSE_DONT_MASK) +- se->conn.capable |= FUSE_CAP_DONT_MASK; +- if (arg->flags & FUSE_FLOCK_LOCKS) +- se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; +- if (arg->flags & FUSE_AUTO_INVAL_DATA) +- se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; +- if (arg->flags & FUSE_DO_READDIRPLUS) +- se->conn.capable |= FUSE_CAP_READDIRPLUS; +- if (arg->flags & FUSE_READDIRPLUS_AUTO) +- se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; +- if (arg->flags & FUSE_ASYNC_DIO) +- se->conn.capable |= FUSE_CAP_ASYNC_DIO; +- if (arg->flags & FUSE_WRITEBACK_CACHE) +- se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; +- if (arg->flags & FUSE_NO_OPEN_SUPPORT) +- se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; +- if (arg->flags & FUSE_PARALLEL_DIROPS) +- se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; +- if (arg->flags & FUSE_POSIX_ACL) +- se->conn.capable |= FUSE_CAP_POSIX_ACL; +- if (arg->flags & FUSE_HANDLE_KILLPRIV) +- se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; +- if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) +- se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; +- if (!(arg->flags & FUSE_MAX_PAGES)) { +- size_t max_bufsize = +- FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() +- + FUSE_BUFFER_HEADER_SIZE; +- if (bufsize > max_bufsize) { +- bufsize = max_bufsize; +- } +- } +- } else { +- se->conn.max_readahead = 0; +- } +- +- if (se->conn.proto_minor >= 14) { ++ struct fuse_init_in *arg = (struct fuse_init_in *)inarg; ++ struct fuse_init_out outarg; ++ struct fuse_session *se = req->se; ++ size_t bufsize = se->bufsize; ++ size_t outargsize = sizeof(outarg); ++ ++ (void)nodeid; ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); ++ if (arg->major == 7 && arg->minor >= 6) { ++ fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); ++ fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", ++ arg->max_readahead); ++ } ++ } ++ se->conn.proto_major = arg->major; ++ se->conn.proto_minor = arg->minor; ++ se->conn.capable = 0; ++ se->conn.want = 0; ++ ++ memset(&outarg, 0, sizeof(outarg)); ++ outarg.major = FUSE_KERNEL_VERSION; ++ outarg.minor = FUSE_KERNEL_MINOR_VERSION; ++ ++ if (arg->major < 7) { ++ fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", ++ arg->major, arg->minor); ++ fuse_reply_err(req, EPROTO); ++ return; ++ } ++ ++ if (arg->major > 7) { ++ /* Wait for a second INIT request with a 7.X version */ ++ send_reply_ok(req, &outarg, sizeof(outarg)); ++ return; ++ } ++ ++ if (arg->minor >= 6) { ++ if (arg->max_readahead < se->conn.max_readahead) { ++ se->conn.max_readahead = arg->max_readahead; ++ } ++ if (arg->flags & FUSE_ASYNC_READ) { ++ se->conn.capable |= FUSE_CAP_ASYNC_READ; ++ } ++ if (arg->flags & FUSE_POSIX_LOCKS) { ++ se->conn.capable |= FUSE_CAP_POSIX_LOCKS; ++ } ++ if (arg->flags & FUSE_ATOMIC_O_TRUNC) { ++ se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; ++ } ++ if (arg->flags & FUSE_EXPORT_SUPPORT) { ++ se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; ++ } ++ if (arg->flags & FUSE_DONT_MASK) { ++ se->conn.capable |= FUSE_CAP_DONT_MASK; ++ } ++ if (arg->flags & FUSE_FLOCK_LOCKS) { ++ se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; ++ } ++ if (arg->flags & FUSE_AUTO_INVAL_DATA) { ++ se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; ++ } ++ if (arg->flags & FUSE_DO_READDIRPLUS) { ++ se->conn.capable |= FUSE_CAP_READDIRPLUS; ++ } ++ if (arg->flags & FUSE_READDIRPLUS_AUTO) { ++ se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; ++ } ++ if (arg->flags & FUSE_ASYNC_DIO) { ++ se->conn.capable |= FUSE_CAP_ASYNC_DIO; ++ } ++ if (arg->flags & FUSE_WRITEBACK_CACHE) { ++ se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; ++ } ++ if (arg->flags & FUSE_NO_OPEN_SUPPORT) { ++ se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; ++ } ++ if (arg->flags & FUSE_PARALLEL_DIROPS) { ++ se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; ++ } ++ if (arg->flags & FUSE_POSIX_ACL) { ++ se->conn.capable |= FUSE_CAP_POSIX_ACL; ++ } ++ if (arg->flags & FUSE_HANDLE_KILLPRIV) { ++ se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; ++ } ++ if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) { ++ se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; ++ } ++ if (!(arg->flags & FUSE_MAX_PAGES)) { ++ size_t max_bufsize = ++ FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() + ++ FUSE_BUFFER_HEADER_SIZE; ++ if (bufsize > max_bufsize) { ++ bufsize = max_bufsize; ++ } ++ } ++ } else { ++ se->conn.max_readahead = 0; ++ } ++ ++ if (se->conn.proto_minor >= 14) { + #ifdef HAVE_SPLICE + #ifdef HAVE_VMSPLICE +- se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; ++ se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; + #endif +- se->conn.capable |= FUSE_CAP_SPLICE_READ; ++ se->conn.capable |= FUSE_CAP_SPLICE_READ; + #endif +- } +- if (se->conn.proto_minor >= 18) +- se->conn.capable |= FUSE_CAP_IOCTL_DIR; +- +- /* Default settings for modern filesystems. +- * +- * Most of these capabilities were disabled by default in +- * libfuse2 for backwards compatibility reasons. In libfuse3, +- * we can finally enable them by default (as long as they're +- * supported by the kernel). +- */ +-#define LL_SET_DEFAULT(cond, cap) \ +- if ((cond) && (se->conn.capable & (cap))) \ +- se->conn.want |= (cap) +- LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_READ); +- LL_SET_DEFAULT(1, FUSE_CAP_PARALLEL_DIROPS); +- LL_SET_DEFAULT(1, FUSE_CAP_AUTO_INVAL_DATA); +- LL_SET_DEFAULT(1, FUSE_CAP_HANDLE_KILLPRIV); +- LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO); +- LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR); +- LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC); +- LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ); +- LL_SET_DEFAULT(se->op.getlk && se->op.setlk, +- FUSE_CAP_POSIX_LOCKS); +- LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS); +- LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS); +- LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir, +- FUSE_CAP_READDIRPLUS_AUTO); +- se->conn.time_gran = 1; +- +- if (bufsize < FUSE_MIN_READ_BUFFER) { +- fuse_log(FUSE_LOG_ERR, "fuse: warning: buffer size too small: %zu\n", +- bufsize); +- bufsize = FUSE_MIN_READ_BUFFER; +- } +- se->bufsize = bufsize; +- +- if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE) +- se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE; +- +- se->got_init = 1; +- if (se->op.init) +- se->op.init(se->userdata, &se->conn); +- +- if (se->conn.want & (~se->conn.capable)) { +- fuse_log(FUSE_LOG_ERR, "fuse: error: filesystem requested capabilities " +- "0x%x that are not supported by kernel, aborting.\n", +- se->conn.want & (~se->conn.capable)); +- fuse_reply_err(req, EPROTO); +- se->error = -EPROTO; +- fuse_session_exit(se); +- return; +- } +- +- if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { +- se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; +- } +- if (arg->flags & FUSE_MAX_PAGES) { +- outarg.flags |= FUSE_MAX_PAGES; +- outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1; +- } +- +- /* Always enable big writes, this is superseded +- by the max_write option */ +- outarg.flags |= FUSE_BIG_WRITES; +- +- if (se->conn.want & FUSE_CAP_ASYNC_READ) +- outarg.flags |= FUSE_ASYNC_READ; +- if (se->conn.want & FUSE_CAP_POSIX_LOCKS) +- outarg.flags |= FUSE_POSIX_LOCKS; +- if (se->conn.want & FUSE_CAP_ATOMIC_O_TRUNC) +- outarg.flags |= FUSE_ATOMIC_O_TRUNC; +- if (se->conn.want & FUSE_CAP_EXPORT_SUPPORT) +- outarg.flags |= FUSE_EXPORT_SUPPORT; +- if (se->conn.want & FUSE_CAP_DONT_MASK) +- outarg.flags |= FUSE_DONT_MASK; +- if (se->conn.want & FUSE_CAP_FLOCK_LOCKS) +- outarg.flags |= FUSE_FLOCK_LOCKS; +- if (se->conn.want & FUSE_CAP_AUTO_INVAL_DATA) +- outarg.flags |= FUSE_AUTO_INVAL_DATA; +- if (se->conn.want & FUSE_CAP_READDIRPLUS) +- outarg.flags |= FUSE_DO_READDIRPLUS; +- if (se->conn.want & FUSE_CAP_READDIRPLUS_AUTO) +- outarg.flags |= FUSE_READDIRPLUS_AUTO; +- if (se->conn.want & FUSE_CAP_ASYNC_DIO) +- outarg.flags |= FUSE_ASYNC_DIO; +- if (se->conn.want & FUSE_CAP_WRITEBACK_CACHE) +- outarg.flags |= FUSE_WRITEBACK_CACHE; +- if (se->conn.want & FUSE_CAP_POSIX_ACL) +- outarg.flags |= FUSE_POSIX_ACL; +- outarg.max_readahead = se->conn.max_readahead; +- outarg.max_write = se->conn.max_write; +- if (se->conn.proto_minor >= 13) { +- if (se->conn.max_background >= (1 << 16)) +- se->conn.max_background = (1 << 16) - 1; +- if (se->conn.congestion_threshold > se->conn.max_background) +- se->conn.congestion_threshold = se->conn.max_background; +- if (!se->conn.congestion_threshold) { +- se->conn.congestion_threshold = +- se->conn.max_background * 3 / 4; +- } +- +- outarg.max_background = se->conn.max_background; +- outarg.congestion_threshold = se->conn.congestion_threshold; +- } +- if (se->conn.proto_minor >= 23) +- outarg.time_gran = se->conn.time_gran; +- +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor); +- fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); +- fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", +- outarg.max_readahead); +- fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); +- fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", +- outarg.max_background); +- fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", +- outarg.congestion_threshold); +- fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", +- outarg.time_gran); +- } +- if (arg->minor < 5) +- outargsize = FUSE_COMPAT_INIT_OUT_SIZE; +- else if (arg->minor < 23) +- outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; +- +- send_reply_ok(req, &outarg, outargsize); ++ } ++ if (se->conn.proto_minor >= 18) { ++ se->conn.capable |= FUSE_CAP_IOCTL_DIR; ++ } ++ ++ /* ++ * Default settings for modern filesystems. ++ * ++ * Most of these capabilities were disabled by default in ++ * libfuse2 for backwards compatibility reasons. In libfuse3, ++ * we can finally enable them by default (as long as they're ++ * supported by the kernel). ++ */ ++#define LL_SET_DEFAULT(cond, cap) \ ++ if ((cond) && (se->conn.capable & (cap))) \ ++ se->conn.want |= (cap) ++ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_READ); ++ LL_SET_DEFAULT(1, FUSE_CAP_PARALLEL_DIROPS); ++ LL_SET_DEFAULT(1, FUSE_CAP_AUTO_INVAL_DATA); ++ LL_SET_DEFAULT(1, FUSE_CAP_HANDLE_KILLPRIV); ++ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO); ++ LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR); ++ LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC); ++ LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ); ++ LL_SET_DEFAULT(se->op.getlk && se->op.setlk, FUSE_CAP_POSIX_LOCKS); ++ LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS); ++ LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS); ++ LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir, ++ FUSE_CAP_READDIRPLUS_AUTO); ++ se->conn.time_gran = 1; ++ ++ if (bufsize < FUSE_MIN_READ_BUFFER) { ++ fuse_log(FUSE_LOG_ERR, "fuse: warning: buffer size too small: %zu\n", ++ bufsize); ++ bufsize = FUSE_MIN_READ_BUFFER; ++ } ++ se->bufsize = bufsize; ++ ++ if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE) { ++ se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE; ++ } ++ ++ se->got_init = 1; ++ if (se->op.init) { ++ se->op.init(se->userdata, &se->conn); ++ } ++ ++ if (se->conn.want & (~se->conn.capable)) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: error: filesystem requested capabilities " ++ "0x%x that are not supported by kernel, aborting.\n", ++ se->conn.want & (~se->conn.capable)); ++ fuse_reply_err(req, EPROTO); ++ se->error = -EPROTO; ++ fuse_session_exit(se); ++ return; ++ } ++ ++ if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { ++ se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; ++ } ++ if (arg->flags & FUSE_MAX_PAGES) { ++ outarg.flags |= FUSE_MAX_PAGES; ++ outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1; ++ } ++ ++ /* ++ * Always enable big writes, this is superseded ++ * by the max_write option ++ */ ++ outarg.flags |= FUSE_BIG_WRITES; ++ ++ if (se->conn.want & FUSE_CAP_ASYNC_READ) { ++ outarg.flags |= FUSE_ASYNC_READ; ++ } ++ if (se->conn.want & FUSE_CAP_POSIX_LOCKS) { ++ outarg.flags |= FUSE_POSIX_LOCKS; ++ } ++ if (se->conn.want & FUSE_CAP_ATOMIC_O_TRUNC) { ++ outarg.flags |= FUSE_ATOMIC_O_TRUNC; ++ } ++ if (se->conn.want & FUSE_CAP_EXPORT_SUPPORT) { ++ outarg.flags |= FUSE_EXPORT_SUPPORT; ++ } ++ if (se->conn.want & FUSE_CAP_DONT_MASK) { ++ outarg.flags |= FUSE_DONT_MASK; ++ } ++ if (se->conn.want & FUSE_CAP_FLOCK_LOCKS) { ++ outarg.flags |= FUSE_FLOCK_LOCKS; ++ } ++ if (se->conn.want & FUSE_CAP_AUTO_INVAL_DATA) { ++ outarg.flags |= FUSE_AUTO_INVAL_DATA; ++ } ++ if (se->conn.want & FUSE_CAP_READDIRPLUS) { ++ outarg.flags |= FUSE_DO_READDIRPLUS; ++ } ++ if (se->conn.want & FUSE_CAP_READDIRPLUS_AUTO) { ++ outarg.flags |= FUSE_READDIRPLUS_AUTO; ++ } ++ if (se->conn.want & FUSE_CAP_ASYNC_DIO) { ++ outarg.flags |= FUSE_ASYNC_DIO; ++ } ++ if (se->conn.want & FUSE_CAP_WRITEBACK_CACHE) { ++ outarg.flags |= FUSE_WRITEBACK_CACHE; ++ } ++ if (se->conn.want & FUSE_CAP_POSIX_ACL) { ++ outarg.flags |= FUSE_POSIX_ACL; ++ } ++ outarg.max_readahead = se->conn.max_readahead; ++ outarg.max_write = se->conn.max_write; ++ if (se->conn.proto_minor >= 13) { ++ if (se->conn.max_background >= (1 << 16)) { ++ se->conn.max_background = (1 << 16) - 1; ++ } ++ if (se->conn.congestion_threshold > se->conn.max_background) { ++ se->conn.congestion_threshold = se->conn.max_background; ++ } ++ if (!se->conn.congestion_threshold) { ++ se->conn.congestion_threshold = se->conn.max_background * 3 / 4; ++ } ++ ++ outarg.max_background = se->conn.max_background; ++ outarg.congestion_threshold = se->conn.congestion_threshold; ++ } ++ if (se->conn.proto_minor >= 23) { ++ outarg.time_gran = se->conn.time_gran; ++ } ++ ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, ++ outarg.minor); ++ fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); ++ fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", ++ outarg.max_readahead); ++ fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); ++ fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", ++ outarg.max_background); ++ fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", ++ outarg.congestion_threshold); ++ fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); ++ } ++ if (arg->minor < 5) { ++ outargsize = FUSE_COMPAT_INIT_OUT_SIZE; ++ } else if (arg->minor < 23) { ++ outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; ++ } ++ ++ send_reply_ok(req, &outarg, outargsize); + } + + static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_session *se = req->se; ++ struct fuse_session *se = req->se; + +- (void) nodeid; +- (void) inarg; ++ (void)nodeid; ++ (void)inarg; + +- se->got_destroy = 1; +- if (se->op.destroy) +- se->op.destroy(se->userdata); ++ se->got_destroy = 1; ++ if (se->op.destroy) { ++ se->op.destroy(se->userdata); ++ } + +- send_reply_ok(req, NULL, 0); ++ send_reply_ok(req, NULL, 0); + } + + static void list_del_nreq(struct fuse_notify_req *nreq) + { +- struct fuse_notify_req *prev = nreq->prev; +- struct fuse_notify_req *next = nreq->next; +- prev->next = next; +- next->prev = prev; ++ struct fuse_notify_req *prev = nreq->prev; ++ struct fuse_notify_req *next = nreq->next; ++ prev->next = next; ++ next->prev = prev; + } + + static void list_add_nreq(struct fuse_notify_req *nreq, +- struct fuse_notify_req *next) ++ struct fuse_notify_req *next) + { +- struct fuse_notify_req *prev = next->prev; +- nreq->next = next; +- nreq->prev = prev; +- prev->next = nreq; +- next->prev = nreq; ++ struct fuse_notify_req *prev = next->prev; ++ nreq->next = next; ++ nreq->prev = prev; ++ prev->next = nreq; ++ next->prev = nreq; + } + + static void list_init_nreq(struct fuse_notify_req *nreq) + { +- nreq->next = nreq; +- nreq->prev = nreq; ++ nreq->next = nreq; ++ nreq->prev = nreq; + } + + static void do_notify_reply(fuse_req_t req, fuse_ino_t nodeid, +- const void *inarg, const struct fuse_buf *buf) ++ const void *inarg, const struct fuse_buf *buf) + { +- struct fuse_session *se = req->se; +- struct fuse_notify_req *nreq; +- struct fuse_notify_req *head; ++ struct fuse_session *se = req->se; ++ struct fuse_notify_req *nreq; ++ struct fuse_notify_req *head; + +- pthread_mutex_lock(&se->lock); +- head = &se->notify_list; +- for (nreq = head->next; nreq != head; nreq = nreq->next) { +- if (nreq->unique == req->unique) { +- list_del_nreq(nreq); +- break; +- } +- } +- pthread_mutex_unlock(&se->lock); ++ pthread_mutex_lock(&se->lock); ++ head = &se->notify_list; ++ for (nreq = head->next; nreq != head; nreq = nreq->next) { ++ if (nreq->unique == req->unique) { ++ list_del_nreq(nreq); ++ break; ++ } ++ } ++ pthread_mutex_unlock(&se->lock); + +- if (nreq != head) +- nreq->reply(nreq, req, nodeid, inarg, buf); ++ if (nreq != head) { ++ nreq->reply(nreq, req, nodeid, inarg, buf); ++ } + } + + static int send_notify_iov(struct fuse_session *se, int notify_code, +- struct iovec *iov, int count) ++ struct iovec *iov, int count) + { +- struct fuse_out_header out; ++ struct fuse_out_header out; + +- if (!se->got_init) +- return -ENOTCONN; ++ if (!se->got_init) { ++ return -ENOTCONN; ++ } + +- out.unique = 0; +- out.error = notify_code; +- iov[0].iov_base = &out; +- iov[0].iov_len = sizeof(struct fuse_out_header); ++ out.unique = 0; ++ out.error = notify_code; ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(struct fuse_out_header); + +- return fuse_send_msg(se, NULL, iov, count); ++ return fuse_send_msg(se, NULL, iov, count); + } + + int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) + { +- if (ph != NULL) { +- struct fuse_notify_poll_wakeup_out outarg; +- struct iovec iov[2]; ++ if (ph != NULL) { ++ struct fuse_notify_poll_wakeup_out outarg; ++ struct iovec iov[2]; + +- outarg.kh = ph->kh; ++ outarg.kh = ph->kh; + +- iov[1].iov_base = &outarg; +- iov[1].iov_len = sizeof(outarg); ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); + +- return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2); +- } else { +- return 0; +- } ++ return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2); ++ } else { ++ return 0; ++ } + } + + int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, +- off_t off, off_t len) ++ off_t off, off_t len) + { +- struct fuse_notify_inval_inode_out outarg; +- struct iovec iov[2]; ++ struct fuse_notify_inval_inode_out outarg; ++ struct iovec iov[2]; ++ ++ if (!se) { ++ return -EINVAL; ++ } + +- if (!se) +- return -EINVAL; ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { ++ return -ENOSYS; ++ } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) +- return -ENOSYS; +- +- outarg.ino = ino; +- outarg.off = off; +- outarg.len = len; ++ outarg.ino = ino; ++ outarg.off = off; ++ outarg.len = len; + +- iov[1].iov_base = &outarg; +- iov[1].iov_len = sizeof(outarg); ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); + +- return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2); ++ return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2); + } + + int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, +- const char *name, size_t namelen) ++ const char *name, size_t namelen) + { +- struct fuse_notify_inval_entry_out outarg; +- struct iovec iov[3]; ++ struct fuse_notify_inval_entry_out outarg; ++ struct iovec iov[3]; ++ ++ if (!se) { ++ return -EINVAL; ++ } + +- if (!se) +- return -EINVAL; +- +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) +- return -ENOSYS; ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { ++ return -ENOSYS; ++ } + +- outarg.parent = parent; +- outarg.namelen = namelen; +- outarg.padding = 0; ++ outarg.parent = parent; ++ outarg.namelen = namelen; ++ outarg.padding = 0; + +- iov[1].iov_base = &outarg; +- iov[1].iov_len = sizeof(outarg); +- iov[2].iov_base = (void *)name; +- iov[2].iov_len = namelen + 1; ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ iov[2].iov_base = (void *)name; ++ iov[2].iov_len = namelen + 1; + +- return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3); ++ return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3); + } + +-int fuse_lowlevel_notify_delete(struct fuse_session *se, +- fuse_ino_t parent, fuse_ino_t child, +- const char *name, size_t namelen) ++int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, ++ fuse_ino_t child, const char *name, ++ size_t namelen) + { +- struct fuse_notify_delete_out outarg; +- struct iovec iov[3]; ++ struct fuse_notify_delete_out outarg; ++ struct iovec iov[3]; + +- if (!se) +- return -EINVAL; ++ if (!se) { ++ return -EINVAL; ++ } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) +- return -ENOSYS; ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) { ++ return -ENOSYS; ++ } + +- outarg.parent = parent; +- outarg.child = child; +- outarg.namelen = namelen; +- outarg.padding = 0; ++ outarg.parent = parent; ++ outarg.child = child; ++ outarg.namelen = namelen; ++ outarg.padding = 0; + +- iov[1].iov_base = &outarg; +- iov[1].iov_len = sizeof(outarg); +- iov[2].iov_base = (void *)name; +- iov[2].iov_len = namelen + 1; ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ iov[2].iov_base = (void *)name; ++ iov[2].iov_len = namelen + 1; + +- return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3); ++ return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3); + } + + int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, +- off_t offset, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags) ++ off_t offset, struct fuse_bufvec *bufv, ++ enum fuse_buf_copy_flags flags) + { +- struct fuse_out_header out; +- struct fuse_notify_store_out outarg; +- struct iovec iov[3]; +- size_t size = fuse_buf_size(bufv); +- int res; ++ struct fuse_out_header out; ++ struct fuse_notify_store_out outarg; ++ struct iovec iov[3]; ++ size_t size = fuse_buf_size(bufv); ++ int res; + +- if (!se) +- return -EINVAL; ++ if (!se) { ++ return -EINVAL; ++ } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) +- return -ENOSYS; ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { ++ return -ENOSYS; ++ } + +- out.unique = 0; +- out.error = FUSE_NOTIFY_STORE; ++ out.unique = 0; ++ out.error = FUSE_NOTIFY_STORE; + +- outarg.nodeid = ino; +- outarg.offset = offset; +- outarg.size = size; +- outarg.padding = 0; ++ outarg.nodeid = ino; ++ outarg.offset = offset; ++ outarg.size = size; ++ outarg.padding = 0; + +- iov[0].iov_base = &out; +- iov[0].iov_len = sizeof(out); +- iov[1].iov_base = &outarg; +- iov[1].iov_len = sizeof(outarg); ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(out); ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); + +- res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); +- if (res > 0) +- res = -res; ++ res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); ++ if (res > 0) { ++ res = -res; ++ } + +- return res; ++ return res; + } + + struct fuse_retrieve_req { +- struct fuse_notify_req nreq; +- void *cookie; ++ struct fuse_notify_req nreq; ++ void *cookie; + }; + +-static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, +- fuse_req_t req, fuse_ino_t ino, +- const void *inarg, +- const struct fuse_buf *ibuf) +-{ +- struct fuse_session *se = req->se; +- struct fuse_retrieve_req *rreq = +- container_of(nreq, struct fuse_retrieve_req, nreq); +- const struct fuse_notify_retrieve_in *arg = inarg; +- struct fuse_bufvec bufv = { +- .buf[0] = *ibuf, +- .count = 1, +- }; +- +- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) +- bufv.buf[0].mem = PARAM(arg); +- +- bufv.buf[0].size -= sizeof(struct fuse_in_header) + +- sizeof(struct fuse_notify_retrieve_in); +- +- if (bufv.buf[0].size < arg->size) { +- fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); +- fuse_reply_none(req); +- goto out; +- } +- bufv.buf[0].size = arg->size; +- +- if (se->op.retrieve_reply) { +- se->op.retrieve_reply(req, rreq->cookie, ino, +- arg->offset, &bufv); +- } else { +- fuse_reply_none(req); +- } ++static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, fuse_req_t req, ++ fuse_ino_t ino, const void *inarg, ++ const struct fuse_buf *ibuf) ++{ ++ struct fuse_session *se = req->se; ++ struct fuse_retrieve_req *rreq = ++ container_of(nreq, struct fuse_retrieve_req, nreq); ++ const struct fuse_notify_retrieve_in *arg = inarg; ++ struct fuse_bufvec bufv = { ++ .buf[0] = *ibuf, ++ .count = 1, ++ }; ++ ++ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { ++ bufv.buf[0].mem = PARAM(arg); ++ } ++ ++ bufv.buf[0].size -= ++ sizeof(struct fuse_in_header) + sizeof(struct fuse_notify_retrieve_in); ++ ++ if (bufv.buf[0].size < arg->size) { ++ fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); ++ fuse_reply_none(req); ++ goto out; ++ } ++ bufv.buf[0].size = arg->size; ++ ++ if (se->op.retrieve_reply) { ++ se->op.retrieve_reply(req, rreq->cookie, ino, arg->offset, &bufv); ++ } else { ++ fuse_reply_none(req); ++ } + out: +- free(rreq); ++ free(rreq); + } + + int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, +- size_t size, off_t offset, void *cookie) ++ size_t size, off_t offset, void *cookie) + { +- struct fuse_notify_retrieve_out outarg; +- struct iovec iov[2]; +- struct fuse_retrieve_req *rreq; +- int err; ++ struct fuse_notify_retrieve_out outarg; ++ struct iovec iov[2]; ++ struct fuse_retrieve_req *rreq; ++ int err; + +- if (!se) +- return -EINVAL; ++ if (!se) { ++ return -EINVAL; ++ } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) +- return -ENOSYS; ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { ++ return -ENOSYS; ++ } + +- rreq = malloc(sizeof(*rreq)); +- if (rreq == NULL) +- return -ENOMEM; ++ rreq = malloc(sizeof(*rreq)); ++ if (rreq == NULL) { ++ return -ENOMEM; ++ } + +- pthread_mutex_lock(&se->lock); +- rreq->cookie = cookie; +- rreq->nreq.unique = se->notify_ctr++; +- rreq->nreq.reply = fuse_ll_retrieve_reply; +- list_add_nreq(&rreq->nreq, &se->notify_list); +- pthread_mutex_unlock(&se->lock); ++ pthread_mutex_lock(&se->lock); ++ rreq->cookie = cookie; ++ rreq->nreq.unique = se->notify_ctr++; ++ rreq->nreq.reply = fuse_ll_retrieve_reply; ++ list_add_nreq(&rreq->nreq, &se->notify_list); ++ pthread_mutex_unlock(&se->lock); + +- outarg.notify_unique = rreq->nreq.unique; +- outarg.nodeid = ino; +- outarg.offset = offset; +- outarg.size = size; +- outarg.padding = 0; ++ outarg.notify_unique = rreq->nreq.unique; ++ outarg.nodeid = ino; ++ outarg.offset = offset; ++ outarg.size = size; ++ outarg.padding = 0; + +- iov[1].iov_base = &outarg; +- iov[1].iov_len = sizeof(outarg); ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); + +- err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); +- if (err) { +- pthread_mutex_lock(&se->lock); +- list_del_nreq(&rreq->nreq); +- pthread_mutex_unlock(&se->lock); +- free(rreq); +- } ++ err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); ++ if (err) { ++ pthread_mutex_lock(&se->lock); ++ list_del_nreq(&rreq->nreq); ++ pthread_mutex_unlock(&se->lock); ++ free(rreq); ++ } + +- return err; ++ return err; + } + + void *fuse_req_userdata(fuse_req_t req) + { +- return req->se->userdata; ++ return req->se->userdata; + } + + const struct fuse_ctx *fuse_req_ctx(fuse_req_t req) + { +- return &req->ctx; ++ return &req->ctx; + } + + void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, +- void *data) ++ void *data) + { +- pthread_mutex_lock(&req->lock); +- pthread_mutex_lock(&req->se->lock); +- req->u.ni.func = func; +- req->u.ni.data = data; +- pthread_mutex_unlock(&req->se->lock); +- if (req->interrupted && func) +- func(req, data); +- pthread_mutex_unlock(&req->lock); ++ pthread_mutex_lock(&req->lock); ++ pthread_mutex_lock(&req->se->lock); ++ req->u.ni.func = func; ++ req->u.ni.data = data; ++ pthread_mutex_unlock(&req->se->lock); ++ if (req->interrupted && func) { ++ func(req, data); ++ } ++ pthread_mutex_unlock(&req->lock); + } + + int fuse_req_interrupted(fuse_req_t req) + { +- int interrupted; ++ int interrupted; + +- pthread_mutex_lock(&req->se->lock); +- interrupted = req->interrupted; +- pthread_mutex_unlock(&req->se->lock); ++ pthread_mutex_lock(&req->se->lock); ++ interrupted = req->interrupted; ++ pthread_mutex_unlock(&req->se->lock); + +- return interrupted; ++ return interrupted; + } + + static struct { +- void (*func)(fuse_req_t, fuse_ino_t, const void *); +- const char *name; ++ void (*func)(fuse_req_t, fuse_ino_t, const void *); ++ const char *name; + } fuse_ll_ops[] = { +- [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, +- [FUSE_FORGET] = { do_forget, "FORGET" }, +- [FUSE_GETATTR] = { do_getattr, "GETATTR" }, +- [FUSE_SETATTR] = { do_setattr, "SETATTR" }, +- [FUSE_READLINK] = { do_readlink, "READLINK" }, +- [FUSE_SYMLINK] = { do_symlink, "SYMLINK" }, +- [FUSE_MKNOD] = { do_mknod, "MKNOD" }, +- [FUSE_MKDIR] = { do_mkdir, "MKDIR" }, +- [FUSE_UNLINK] = { do_unlink, "UNLINK" }, +- [FUSE_RMDIR] = { do_rmdir, "RMDIR" }, +- [FUSE_RENAME] = { do_rename, "RENAME" }, +- [FUSE_LINK] = { do_link, "LINK" }, +- [FUSE_OPEN] = { do_open, "OPEN" }, +- [FUSE_READ] = { do_read, "READ" }, +- [FUSE_WRITE] = { do_write, "WRITE" }, +- [FUSE_STATFS] = { do_statfs, "STATFS" }, +- [FUSE_RELEASE] = { do_release, "RELEASE" }, +- [FUSE_FSYNC] = { do_fsync, "FSYNC" }, +- [FUSE_SETXATTR] = { do_setxattr, "SETXATTR" }, +- [FUSE_GETXATTR] = { do_getxattr, "GETXATTR" }, +- [FUSE_LISTXATTR] = { do_listxattr, "LISTXATTR" }, +- [FUSE_REMOVEXATTR] = { do_removexattr, "REMOVEXATTR" }, +- [FUSE_FLUSH] = { do_flush, "FLUSH" }, +- [FUSE_INIT] = { do_init, "INIT" }, +- [FUSE_OPENDIR] = { do_opendir, "OPENDIR" }, +- [FUSE_READDIR] = { do_readdir, "READDIR" }, +- [FUSE_RELEASEDIR] = { do_releasedir, "RELEASEDIR" }, +- [FUSE_FSYNCDIR] = { do_fsyncdir, "FSYNCDIR" }, +- [FUSE_GETLK] = { do_getlk, "GETLK" }, +- [FUSE_SETLK] = { do_setlk, "SETLK" }, +- [FUSE_SETLKW] = { do_setlkw, "SETLKW" }, +- [FUSE_ACCESS] = { do_access, "ACCESS" }, +- [FUSE_CREATE] = { do_create, "CREATE" }, +- [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" }, +- [FUSE_BMAP] = { do_bmap, "BMAP" }, +- [FUSE_IOCTL] = { do_ioctl, "IOCTL" }, +- [FUSE_POLL] = { do_poll, "POLL" }, +- [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, +- [FUSE_DESTROY] = { do_destroy, "DESTROY" }, +- [FUSE_NOTIFY_REPLY] = { (void *) 1, "NOTIFY_REPLY" }, +- [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, +- [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS"}, +- [FUSE_RENAME2] = { do_rename2, "RENAME2" }, +- [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, +- [FUSE_LSEEK] = { do_lseek, "LSEEK" }, ++ [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, ++ [FUSE_FORGET] = { do_forget, "FORGET" }, ++ [FUSE_GETATTR] = { do_getattr, "GETATTR" }, ++ [FUSE_SETATTR] = { do_setattr, "SETATTR" }, ++ [FUSE_READLINK] = { do_readlink, "READLINK" }, ++ [FUSE_SYMLINK] = { do_symlink, "SYMLINK" }, ++ [FUSE_MKNOD] = { do_mknod, "MKNOD" }, ++ [FUSE_MKDIR] = { do_mkdir, "MKDIR" }, ++ [FUSE_UNLINK] = { do_unlink, "UNLINK" }, ++ [FUSE_RMDIR] = { do_rmdir, "RMDIR" }, ++ [FUSE_RENAME] = { do_rename, "RENAME" }, ++ [FUSE_LINK] = { do_link, "LINK" }, ++ [FUSE_OPEN] = { do_open, "OPEN" }, ++ [FUSE_READ] = { do_read, "READ" }, ++ [FUSE_WRITE] = { do_write, "WRITE" }, ++ [FUSE_STATFS] = { do_statfs, "STATFS" }, ++ [FUSE_RELEASE] = { do_release, "RELEASE" }, ++ [FUSE_FSYNC] = { do_fsync, "FSYNC" }, ++ [FUSE_SETXATTR] = { do_setxattr, "SETXATTR" }, ++ [FUSE_GETXATTR] = { do_getxattr, "GETXATTR" }, ++ [FUSE_LISTXATTR] = { do_listxattr, "LISTXATTR" }, ++ [FUSE_REMOVEXATTR] = { do_removexattr, "REMOVEXATTR" }, ++ [FUSE_FLUSH] = { do_flush, "FLUSH" }, ++ [FUSE_INIT] = { do_init, "INIT" }, ++ [FUSE_OPENDIR] = { do_opendir, "OPENDIR" }, ++ [FUSE_READDIR] = { do_readdir, "READDIR" }, ++ [FUSE_RELEASEDIR] = { do_releasedir, "RELEASEDIR" }, ++ [FUSE_FSYNCDIR] = { do_fsyncdir, "FSYNCDIR" }, ++ [FUSE_GETLK] = { do_getlk, "GETLK" }, ++ [FUSE_SETLK] = { do_setlk, "SETLK" }, ++ [FUSE_SETLKW] = { do_setlkw, "SETLKW" }, ++ [FUSE_ACCESS] = { do_access, "ACCESS" }, ++ [FUSE_CREATE] = { do_create, "CREATE" }, ++ [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" }, ++ [FUSE_BMAP] = { do_bmap, "BMAP" }, ++ [FUSE_IOCTL] = { do_ioctl, "IOCTL" }, ++ [FUSE_POLL] = { do_poll, "POLL" }, ++ [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, ++ [FUSE_DESTROY] = { do_destroy, "DESTROY" }, ++ [FUSE_NOTIFY_REPLY] = { (void *)1, "NOTIFY_REPLY" }, ++ [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, ++ [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS" }, ++ [FUSE_RENAME2] = { do_rename2, "RENAME2" }, ++ [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, ++ [FUSE_LSEEK] = { do_lseek, "LSEEK" }, + }; + + #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) + + static const char *opname(enum fuse_opcode opcode) + { +- if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) +- return "???"; +- else +- return fuse_ll_ops[opcode].name; ++ if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) { ++ return "???"; ++ } else { ++ return fuse_ll_ops[opcode].name; ++ } + } + + void fuse_session_process_buf(struct fuse_session *se, +- const struct fuse_buf *buf) ++ const struct fuse_buf *buf) + { +- fuse_session_process_buf_int(se, buf, NULL); ++ fuse_session_process_buf_int(se, buf, NULL); + } + + void fuse_session_process_buf_int(struct fuse_session *se, +- const struct fuse_buf *buf, struct fuse_chan *ch) +-{ +- struct fuse_in_header *in; +- const void *inarg; +- struct fuse_req *req; +- int err; +- +- in = buf->mem; +- +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, +- "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n", +- (unsigned long long) in->unique, +- opname((enum fuse_opcode) in->opcode), in->opcode, +- (unsigned long long) in->nodeid, buf->size, in->pid); +- } +- +- req = fuse_ll_alloc_req(se); +- if (req == NULL) { +- struct fuse_out_header out = { +- .unique = in->unique, +- .error = -ENOMEM, +- }; +- struct iovec iov = { +- .iov_base = &out, +- .iov_len = sizeof(struct fuse_out_header), +- }; +- +- fuse_send_msg(se, ch, &iov, 1); +- return; +- } +- +- req->unique = in->unique; +- req->ctx.uid = in->uid; +- req->ctx.gid = in->gid; +- req->ctx.pid = in->pid; +- req->ch = ch; +- +- err = EIO; +- if (!se->got_init) { +- enum fuse_opcode expected; +- +- expected = se->cuse_data ? CUSE_INIT : FUSE_INIT; +- if (in->opcode != expected) +- goto reply_err; +- } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) +- goto reply_err; +- +- err = EACCES; +- /* Implement -o allow_root */ +- if (se->deny_others && in->uid != se->owner && in->uid != 0 && +- in->opcode != FUSE_INIT && in->opcode != FUSE_READ && +- in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC && +- in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR && +- in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR && +- in->opcode != FUSE_NOTIFY_REPLY && +- in->opcode != FUSE_READDIRPLUS) +- goto reply_err; +- +- err = ENOSYS; +- if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) +- goto reply_err; +- if (in->opcode != FUSE_INTERRUPT) { +- struct fuse_req *intr; +- pthread_mutex_lock(&se->lock); +- intr = check_interrupt(se, req); +- list_add_req(req, &se->list); +- pthread_mutex_unlock(&se->lock); +- if (intr) +- fuse_reply_err(intr, EAGAIN); +- } +- +- inarg = (void *) &in[1]; +- if (in->opcode == FUSE_WRITE && se->op.write_buf) +- do_write_buf(req, in->nodeid, inarg, buf); +- else if (in->opcode == FUSE_NOTIFY_REPLY) +- do_notify_reply(req, in->nodeid, inarg, buf); +- else +- fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); +- +- return; ++ const struct fuse_buf *buf, ++ struct fuse_chan *ch) ++{ ++ struct fuse_in_header *in; ++ const void *inarg; ++ struct fuse_req *req; ++ int err; ++ ++ in = buf->mem; ++ ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, ++ "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, " ++ "pid: %u\n", ++ (unsigned long long)in->unique, ++ opname((enum fuse_opcode)in->opcode), in->opcode, ++ (unsigned long long)in->nodeid, buf->size, in->pid); ++ } ++ ++ req = fuse_ll_alloc_req(se); ++ if (req == NULL) { ++ struct fuse_out_header out = { ++ .unique = in->unique, ++ .error = -ENOMEM, ++ }; ++ struct iovec iov = { ++ .iov_base = &out, ++ .iov_len = sizeof(struct fuse_out_header), ++ }; ++ ++ fuse_send_msg(se, ch, &iov, 1); ++ return; ++ } ++ ++ req->unique = in->unique; ++ req->ctx.uid = in->uid; ++ req->ctx.gid = in->gid; ++ req->ctx.pid = in->pid; ++ req->ch = ch; ++ ++ err = EIO; ++ if (!se->got_init) { ++ enum fuse_opcode expected; ++ ++ expected = se->cuse_data ? CUSE_INIT : FUSE_INIT; ++ if (in->opcode != expected) { ++ goto reply_err; ++ } ++ } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) { ++ goto reply_err; ++ } ++ ++ err = EACCES; ++ /* Implement -o allow_root */ ++ if (se->deny_others && in->uid != se->owner && in->uid != 0 && ++ in->opcode != FUSE_INIT && in->opcode != FUSE_READ && ++ in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC && ++ in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR && ++ in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR && ++ in->opcode != FUSE_NOTIFY_REPLY && in->opcode != FUSE_READDIRPLUS) { ++ goto reply_err; ++ } ++ ++ err = ENOSYS; ++ if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) { ++ goto reply_err; ++ } ++ if (in->opcode != FUSE_INTERRUPT) { ++ struct fuse_req *intr; ++ pthread_mutex_lock(&se->lock); ++ intr = check_interrupt(se, req); ++ list_add_req(req, &se->list); ++ pthread_mutex_unlock(&se->lock); ++ if (intr) { ++ fuse_reply_err(intr, EAGAIN); ++ } ++ } ++ ++ inarg = (void *)&in[1]; ++ if (in->opcode == FUSE_WRITE && se->op.write_buf) { ++ do_write_buf(req, in->nodeid, inarg, buf); ++ } else if (in->opcode == FUSE_NOTIFY_REPLY) { ++ do_notify_reply(req, in->nodeid, inarg, buf); ++ } else { ++ fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); ++ } ++ ++ return; + + reply_err: +- fuse_reply_err(req, err); ++ fuse_reply_err(req, err); + } + +-#define LL_OPTION(n,o,v) \ +- { n, offsetof(struct fuse_session, o), v } ++#define LL_OPTION(n, o, v) \ ++ { \ ++ n, offsetof(struct fuse_session, o), v \ ++ } + + static const struct fuse_opt fuse_ll_opts[] = { +- LL_OPTION("debug", debug, 1), +- LL_OPTION("-d", debug, 1), +- LL_OPTION("--debug", debug, 1), +- LL_OPTION("allow_root", deny_others, 1), +- FUSE_OPT_END ++ LL_OPTION("debug", debug, 1), LL_OPTION("-d", debug, 1), ++ LL_OPTION("--debug", debug, 1), LL_OPTION("allow_root", deny_others, 1), ++ FUSE_OPT_END + }; + + void fuse_lowlevel_version(void) + { +- printf("using FUSE kernel interface version %i.%i\n", +- FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); ++ printf("using FUSE kernel interface version %i.%i\n", FUSE_KERNEL_VERSION, ++ FUSE_KERNEL_MINOR_VERSION); + } + + void fuse_lowlevel_help(void) + { +- /* These are not all options, but the ones that are +- potentially of interest to an end-user */ +- printf( +-" -o allow_root allow access by root\n" +-); ++ /* ++ * These are not all options, but the ones that are ++ * potentially of interest to an end-user ++ */ ++ printf(" -o allow_root allow access by root\n"); + } + + void fuse_session_destroy(struct fuse_session *se) + { +- if (se->got_init && !se->got_destroy) { +- if (se->op.destroy) +- se->op.destroy(se->userdata); +- } +- pthread_mutex_destroy(&se->lock); +- free(se->cuse_data); +- if (se->fd != -1) +- close(se->fd); +- free(se); ++ if (se->got_init && !se->got_destroy) { ++ if (se->op.destroy) { ++ se->op.destroy(se->userdata); ++ } ++ } ++ pthread_mutex_destroy(&se->lock); ++ free(se->cuse_data); ++ if (se->fd != -1) { ++ close(se->fd); ++ } ++ free(se); + } + + + struct fuse_session *fuse_session_new(struct fuse_args *args, +- const struct fuse_lowlevel_ops *op, +- size_t op_size, void *userdata) +-{ +- struct fuse_session *se; +- +- if (sizeof(struct fuse_lowlevel_ops) < op_size) { +- fuse_log(FUSE_LOG_ERR, "fuse: warning: library too old, some operations may not work\n"); +- op_size = sizeof(struct fuse_lowlevel_ops); +- } +- +- if (args->argc == 0) { +- fuse_log(FUSE_LOG_ERR, "fuse: empty argv passed to fuse_session_new().\n"); +- return NULL; +- } +- +- se = (struct fuse_session *) calloc(1, sizeof(struct fuse_session)); +- if (se == NULL) { +- fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n"); +- goto out1; +- } +- se->fd = -1; +- se->conn.max_write = UINT_MAX; +- se->conn.max_readahead = UINT_MAX; +- +- /* Parse options */ +- if(fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) +- goto out2; +- if(args->argc == 1 && +- args->argv[0][0] == '-') { +- fuse_log(FUSE_LOG_ERR, "fuse: warning: argv[0] looks like an option, but " +- "will be ignored\n"); +- } else if (args->argc != 1) { +- int i; +- fuse_log(FUSE_LOG_ERR, "fuse: unknown option(s): `"); +- for(i = 1; i < args->argc-1; i++) +- fuse_log(FUSE_LOG_ERR, "%s ", args->argv[i]); +- fuse_log(FUSE_LOG_ERR, "%s'\n", args->argv[i]); +- goto out4; +- } +- +- se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + +- FUSE_BUFFER_HEADER_SIZE; +- +- list_init_req(&se->list); +- list_init_req(&se->interrupts); +- list_init_nreq(&se->notify_list); +- se->notify_ctr = 1; +- fuse_mutex_init(&se->lock); +- +- memcpy(&se->op, op, op_size); +- se->owner = getuid(); +- se->userdata = userdata; +- +- return se; ++ const struct fuse_lowlevel_ops *op, ++ size_t op_size, void *userdata) ++{ ++ struct fuse_session *se; ++ ++ if (sizeof(struct fuse_lowlevel_ops) < op_size) { ++ fuse_log( ++ FUSE_LOG_ERR, ++ "fuse: warning: library too old, some operations may not work\n"); ++ op_size = sizeof(struct fuse_lowlevel_ops); ++ } ++ ++ if (args->argc == 0) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: empty argv passed to fuse_session_new().\n"); ++ return NULL; ++ } ++ ++ se = (struct fuse_session *)calloc(1, sizeof(struct fuse_session)); ++ if (se == NULL) { ++ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n"); ++ goto out1; ++ } ++ se->fd = -1; ++ se->conn.max_write = UINT_MAX; ++ se->conn.max_readahead = UINT_MAX; ++ ++ /* Parse options */ ++ if (fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) { ++ goto out2; ++ } ++ if (args->argc == 1 && args->argv[0][0] == '-') { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: warning: argv[0] looks like an option, but " ++ "will be ignored\n"); ++ } else if (args->argc != 1) { ++ int i; ++ fuse_log(FUSE_LOG_ERR, "fuse: unknown option(s): `"); ++ for (i = 1; i < args->argc - 1; i++) { ++ fuse_log(FUSE_LOG_ERR, "%s ", args->argv[i]); ++ } ++ fuse_log(FUSE_LOG_ERR, "%s'\n", args->argv[i]); ++ goto out4; ++ } ++ ++ se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + FUSE_BUFFER_HEADER_SIZE; ++ ++ list_init_req(&se->list); ++ list_init_req(&se->interrupts); ++ list_init_nreq(&se->notify_list); ++ se->notify_ctr = 1; ++ fuse_mutex_init(&se->lock); ++ ++ memcpy(&se->op, op, op_size); ++ se->owner = getuid(); ++ se->userdata = userdata; ++ ++ return se; + + out4: +- fuse_opt_free_args(args); ++ fuse_opt_free_args(args); + out2: +- free(se); ++ free(se); + out1: +- return NULL; ++ return NULL; + } + + int fuse_session_mount(struct fuse_session *se, const char *mountpoint) + { +- int fd; +- +- /* +- * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos +- * would ensue. +- */ +- do { +- fd = open("/dev/null", O_RDWR); +- if (fd > 2) +- close(fd); +- } while (fd >= 0 && fd <= 2); +- +- /* +- * To allow FUSE daemons to run without privileges, the caller may open +- * /dev/fuse before launching the file system and pass on the file +- * descriptor by specifying /dev/fd/N as the mount point. Note that the +- * parent process takes care of performing the mount in this case. +- */ +- fd = fuse_mnt_parse_fuse_fd(mountpoint); +- if (fd != -1) { +- if (fcntl(fd, F_GETFD) == -1) { +- fuse_log(FUSE_LOG_ERR, +- "fuse: Invalid file descriptor /dev/fd/%u\n", +- fd); +- return -1; +- } +- se->fd = fd; +- return 0; +- } +- +- /* Open channel */ +- fd = fuse_kern_mount(mountpoint, se->mo); +- if (fd == -1) +- return -1; +- se->fd = fd; +- +- /* Save mountpoint */ +- se->mountpoint = strdup(mountpoint); +- if (se->mountpoint == NULL) +- goto error_out; +- +- return 0; ++ int fd; ++ ++ /* ++ * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos ++ * would ensue. ++ */ ++ do { ++ fd = open("/dev/null", O_RDWR); ++ if (fd > 2) { ++ close(fd); ++ } ++ } while (fd >= 0 && fd <= 2); ++ ++ /* ++ * To allow FUSE daemons to run without privileges, the caller may open ++ * /dev/fuse before launching the file system and pass on the file ++ * descriptor by specifying /dev/fd/N as the mount point. Note that the ++ * parent process takes care of performing the mount in this case. ++ */ ++ fd = fuse_mnt_parse_fuse_fd(mountpoint); ++ if (fd != -1) { ++ if (fcntl(fd, F_GETFD) == -1) { ++ fuse_log(FUSE_LOG_ERR, "fuse: Invalid file descriptor /dev/fd/%u\n", ++ fd); ++ return -1; ++ } ++ se->fd = fd; ++ return 0; ++ } ++ ++ /* Open channel */ ++ fd = fuse_kern_mount(mountpoint, se->mo); ++ if (fd == -1) { ++ return -1; ++ } ++ se->fd = fd; ++ ++ /* Save mountpoint */ ++ se->mountpoint = strdup(mountpoint); ++ if (se->mountpoint == NULL) { ++ goto error_out; ++ } ++ ++ return 0; + + error_out: +- fuse_kern_unmount(mountpoint, fd); +- return -1; ++ fuse_kern_unmount(mountpoint, fd); ++ return -1; + } + + int fuse_session_fd(struct fuse_session *se) + { +- return se->fd; ++ return se->fd; + } + + void fuse_session_unmount(struct fuse_session *se) +@@ -2384,61 +2519,66 @@ void fuse_session_unmount(struct fuse_session *se) + #ifdef linux + int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) + { +- char *buf; +- size_t bufsize = 1024; +- char path[128]; +- int ret; +- int fd; +- unsigned long pid = req->ctx.pid; +- char *s; ++ char *buf; ++ size_t bufsize = 1024; ++ char path[128]; ++ int ret; ++ int fd; ++ unsigned long pid = req->ctx.pid; ++ char *s; + +- sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); ++ sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); + + retry: +- buf = malloc(bufsize); +- if (buf == NULL) +- return -ENOMEM; +- +- ret = -EIO; +- fd = open(path, O_RDONLY); +- if (fd == -1) +- goto out_free; +- +- ret = read(fd, buf, bufsize); +- close(fd); +- if (ret < 0) { +- ret = -EIO; +- goto out_free; +- } +- +- if ((size_t)ret == bufsize) { +- free(buf); +- bufsize *= 4; +- goto retry; +- } +- +- ret = -EIO; +- s = strstr(buf, "\nGroups:"); +- if (s == NULL) +- goto out_free; +- +- s += 8; +- ret = 0; +- while (1) { +- char *end; +- unsigned long val = strtoul(s, &end, 0); +- if (end == s) +- break; +- +- s = end; +- if (ret < size) +- list[ret] = val; +- ret++; +- } ++ buf = malloc(bufsize); ++ if (buf == NULL) { ++ return -ENOMEM; ++ } ++ ++ ret = -EIO; ++ fd = open(path, O_RDONLY); ++ if (fd == -1) { ++ goto out_free; ++ } ++ ++ ret = read(fd, buf, bufsize); ++ close(fd); ++ if (ret < 0) { ++ ret = -EIO; ++ goto out_free; ++ } ++ ++ if ((size_t)ret == bufsize) { ++ free(buf); ++ bufsize *= 4; ++ goto retry; ++ } ++ ++ ret = -EIO; ++ s = strstr(buf, "\nGroups:"); ++ if (s == NULL) { ++ goto out_free; ++ } ++ ++ s += 8; ++ ret = 0; ++ while (1) { ++ char *end; ++ unsigned long val = strtoul(s, &end, 0); ++ if (end == s) { ++ break; ++ } ++ ++ s = end; ++ if (ret < size) { ++ list[ret] = val; ++ } ++ ret++; ++ } + + out_free: +- free(buf); +- return ret; ++ free(buf); ++ return ret; + } + #else /* linux */ + /* +@@ -2446,23 +2586,25 @@ out_free: + */ + int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) + { +- (void) req; (void) size; (void) list; +- return -ENOSYS; ++ (void)req; ++ (void)size; ++ (void)list; ++ return -ENOSYS; + } + #endif + + void fuse_session_exit(struct fuse_session *se) + { +- se->exited = 1; ++ se->exited = 1; + } + + void fuse_session_reset(struct fuse_session *se) + { +- se->exited = 0; +- se->error = 0; ++ se->exited = 0; ++ se->error = 0; + } + + int fuse_session_exited(struct fuse_session *se) + { +- return se->exited; ++ return se->exited; + } +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 6b1adfc..adb9054 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1,15 +1,16 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB. +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB. ++ */ + + #ifndef FUSE_LOWLEVEL_H_ + #define FUSE_LOWLEVEL_H_ + +-/** @file ++/** ++ * @file + * + * Low level API + * +@@ -24,16 +25,16 @@ + + #include "fuse_common.h" + +-#include + #include +-#include + #include + #include ++#include + #include ++#include + +-/* ----------------------------------------------------------- * +- * Miscellaneous definitions * +- * ----------------------------------------------------------- */ ++/* ++ * Miscellaneous definitions ++ */ + + /** The node ID of the root inode */ + #define FUSE_ROOT_ID 1 +@@ -53,47 +54,54 @@ struct fuse_session; + + /** Directory entry parameters supplied to fuse_reply_entry() */ + struct fuse_entry_param { +- /** Unique inode number +- * +- * In lookup, zero means negative entry (from version 2.5) +- * Returning ENOENT also means negative entry, but by setting zero +- * ino the kernel may cache negative entries for entry_timeout +- * seconds. +- */ +- fuse_ino_t ino; +- +- /** Generation number for this entry. +- * +- * If the file system will be exported over NFS, the +- * ino/generation pairs need to be unique over the file +- * system's lifetime (rather than just the mount time). So if +- * the file system reuses an inode after it has been deleted, +- * it must assign a new, previously unused generation number +- * to the inode at the same time. +- * +- */ +- uint64_t generation; +- +- /** Inode attributes. +- * +- * Even if attr_timeout == 0, attr must be correct. For example, +- * for open(), FUSE uses attr.st_size from lookup() to determine +- * how many bytes to request. If this value is not correct, +- * incorrect data will be returned. +- */ +- struct stat attr; +- +- /** Validity timeout (in seconds) for inode attributes. If +- attributes only change as a result of requests that come +- through the kernel, this should be set to a very large +- value. */ +- double attr_timeout; +- +- /** Validity timeout (in seconds) for the name. If directory +- entries are changed/deleted only as a result of requests +- that come through the kernel, this should be set to a very +- large value. */ +- double entry_timeout; ++ /** ++ * Unique inode number ++ * ++ * In lookup, zero means negative entry (from version 2.5) ++ * Returning ENOENT also means negative entry, but by setting zero ++ * ino the kernel may cache negative entries for entry_timeout ++ * seconds. ++ */ ++ fuse_ino_t ino; ++ ++ /** ++ * Generation number for this entry. ++ * ++ * If the file system will be exported over NFS, the ++ * ino/generation pairs need to be unique over the file ++ * system's lifetime (rather than just the mount time). So if ++ * the file system reuses an inode after it has been deleted, ++ * it must assign a new, previously unused generation number ++ * to the inode at the same time. ++ * ++ */ ++ uint64_t generation; ++ ++ /** ++ * Inode attributes. ++ * ++ * Even if attr_timeout == 0, attr must be correct. For example, ++ * for open(), FUSE uses attr.st_size from lookup() to determine ++ * how many bytes to request. If this value is not correct, ++ * incorrect data will be returned. ++ */ ++ struct stat attr; ++ ++ /** ++ * Validity timeout (in seconds) for inode attributes. If ++ * attributes only change as a result of requests that come ++ * through the kernel, this should be set to a very large ++ * value. ++ */ ++ double attr_timeout; ++ ++ /** ++ * Validity timeout (in seconds) for the name. If directory ++ * entries are changed/deleted only as a result of requests ++ * that come through the kernel, this should be set to a very ++ * large value. ++ */ ++ double entry_timeout; + }; + + /** +@@ -105,38 +113,38 @@ struct fuse_entry_param { + * there is no valid uid/pid/gid that could be reported. + */ + struct fuse_ctx { +- /** User ID of the calling process */ +- uid_t uid; ++ /** User ID of the calling process */ ++ uid_t uid; + +- /** Group ID of the calling process */ +- gid_t gid; ++ /** Group ID of the calling process */ ++ gid_t gid; + +- /** Thread ID of the calling process */ +- pid_t pid; ++ /** Thread ID of the calling process */ ++ pid_t pid; + +- /** Umask of the calling process */ +- mode_t umask; ++ /** Umask of the calling process */ ++ mode_t umask; + }; + + struct fuse_forget_data { +- fuse_ino_t ino; +- uint64_t nlookup; ++ fuse_ino_t ino; ++ uint64_t nlookup; + }; + + /* 'to_set' flags in setattr */ +-#define FUSE_SET_ATTR_MODE (1 << 0) +-#define FUSE_SET_ATTR_UID (1 << 1) +-#define FUSE_SET_ATTR_GID (1 << 2) +-#define FUSE_SET_ATTR_SIZE (1 << 3) +-#define FUSE_SET_ATTR_ATIME (1 << 4) +-#define FUSE_SET_ATTR_MTIME (1 << 5) +-#define FUSE_SET_ATTR_ATIME_NOW (1 << 7) +-#define FUSE_SET_ATTR_MTIME_NOW (1 << 8) +-#define FUSE_SET_ATTR_CTIME (1 << 10) +- +-/* ----------------------------------------------------------- * +- * Request methods and replies * +- * ----------------------------------------------------------- */ ++#define FUSE_SET_ATTR_MODE (1 << 0) ++#define FUSE_SET_ATTR_UID (1 << 1) ++#define FUSE_SET_ATTR_GID (1 << 2) ++#define FUSE_SET_ATTR_SIZE (1 << 3) ++#define FUSE_SET_ATTR_ATIME (1 << 4) ++#define FUSE_SET_ATTR_MTIME (1 << 5) ++#define FUSE_SET_ATTR_ATIME_NOW (1 << 7) ++#define FUSE_SET_ATTR_MTIME_NOW (1 << 8) ++#define FUSE_SET_ATTR_CTIME (1 << 10) ++ ++/* ++ * Request methods and replies ++ */ + + /** + * Low level filesystem operations +@@ -166,1075 +174,1069 @@ struct fuse_forget_data { + * this file will not be called. + */ + struct fuse_lowlevel_ops { +- /** +- * Initialize filesystem +- * +- * This function is called when libfuse establishes +- * communication with the FUSE kernel module. The file system +- * should use this module to inspect and/or modify the +- * connection parameters provided in the `conn` structure. +- * +- * Note that some parameters may be overwritten by options +- * passed to fuse_session_new() which take precedence over the +- * values set in this handler. +- * +- * There's no reply to this function +- * +- * @param userdata the user data passed to fuse_session_new() +- */ +- void (*init) (void *userdata, struct fuse_conn_info *conn); +- +- /** +- * Clean up filesystem. +- * +- * Called on filesystem exit. When this method is called, the +- * connection to the kernel may be gone already, so that eg. calls +- * to fuse_lowlevel_notify_* will fail. +- * +- * There's no reply to this function +- * +- * @param userdata the user data passed to fuse_session_new() +- */ +- void (*destroy) (void *userdata); +- +- /** +- * Look up a directory entry by name and get its attributes. +- * +- * Valid replies: +- * fuse_reply_entry +- * fuse_reply_err +- * +- * @param req request handle +- * @param parent inode number of the parent directory +- * @param name the name to look up +- */ +- void (*lookup) (fuse_req_t req, fuse_ino_t parent, const char *name); +- +- /** +- * Forget about an inode +- * +- * This function is called when the kernel removes an inode +- * from its internal caches. +- * +- * The inode's lookup count increases by one for every call to +- * fuse_reply_entry and fuse_reply_create. The nlookup parameter +- * indicates by how much the lookup count should be decreased. +- * +- * Inodes with a non-zero lookup count may receive request from +- * the kernel even after calls to unlink, rmdir or (when +- * overwriting an existing file) rename. Filesystems must handle +- * such requests properly and it is recommended to defer removal +- * of the inode until the lookup count reaches zero. Calls to +- * unlink, rmdir or rename will be followed closely by forget +- * unless the file or directory is open, in which case the +- * kernel issues forget only after the release or releasedir +- * calls. +- * +- * Note that if a file system will be exported over NFS the +- * inodes lifetime must extend even beyond forget. See the +- * generation field in struct fuse_entry_param above. +- * +- * On unmount the lookup count for all inodes implicitly drops +- * to zero. It is not guaranteed that the file system will +- * receive corresponding forget messages for the affected +- * inodes. +- * +- * Valid replies: +- * fuse_reply_none +- * +- * @param req request handle +- * @param ino the inode number +- * @param nlookup the number of lookups to forget +- */ +- void (*forget) (fuse_req_t req, fuse_ino_t ino, uint64_t nlookup); +- +- /** +- * Get file attributes. +- * +- * If writeback caching is enabled, the kernel may have a +- * better idea of a file's length than the FUSE file system +- * (eg if there has been a write that extended the file size, +- * but that has not yet been passed to the filesystem.n +- * +- * In this case, the st_size value provided by the file system +- * will be ignored. +- * +- * Valid replies: +- * fuse_reply_attr +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi for future use, currently always NULL +- */ +- void (*getattr) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi); +- +- /** +- * Set file attributes +- * +- * In the 'attr' argument only members indicated by the 'to_set' +- * bitmask contain valid values. Other members contain undefined +- * values. +- * +- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is +- * expected to reset the setuid and setgid bits if the file +- * size or owner is being changed. +- * +- * If the setattr was invoked from the ftruncate() system call +- * under Linux kernel versions 2.6.15 or later, the fi->fh will +- * contain the value set by the open method or will be undefined +- * if the open method didn't set any value. Otherwise (not +- * ftruncate call, or kernel version earlier than 2.6.15) the fi +- * parameter will be NULL. +- * +- * Valid replies: +- * fuse_reply_attr +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param attr the attributes +- * @param to_set bit mask of attributes which should be set +- * @param fi file information, or NULL +- */ +- void (*setattr) (fuse_req_t req, fuse_ino_t ino, struct stat *attr, +- int to_set, struct fuse_file_info *fi); +- +- /** +- * Read symbolic link +- * +- * Valid replies: +- * fuse_reply_readlink +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- */ +- void (*readlink) (fuse_req_t req, fuse_ino_t ino); +- +- /** +- * Create file node +- * +- * Create a regular file, character device, block device, fifo or +- * socket node. +- * +- * Valid replies: +- * fuse_reply_entry +- * fuse_reply_err +- * +- * @param req request handle +- * @param parent inode number of the parent directory +- * @param name to create +- * @param mode file type and mode with which to create the new file +- * @param rdev the device number (only valid if created file is a device) +- */ +- void (*mknod) (fuse_req_t req, fuse_ino_t parent, const char *name, +- mode_t mode, dev_t rdev); +- +- /** +- * Create a directory +- * +- * Valid replies: +- * fuse_reply_entry +- * fuse_reply_err +- * +- * @param req request handle +- * @param parent inode number of the parent directory +- * @param name to create +- * @param mode with which to create the new file +- */ +- void (*mkdir) (fuse_req_t req, fuse_ino_t parent, const char *name, +- mode_t mode); +- +- /** +- * Remove a file +- * +- * If the file's inode's lookup count is non-zero, the file +- * system is expected to postpone any removal of the inode +- * until the lookup count reaches zero (see description of the +- * forget function). +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param parent inode number of the parent directory +- * @param name to remove +- */ +- void (*unlink) (fuse_req_t req, fuse_ino_t parent, const char *name); +- +- /** +- * Remove a directory +- * +- * If the directory's inode's lookup count is non-zero, the +- * file system is expected to postpone any removal of the +- * inode until the lookup count reaches zero (see description +- * of the forget function). +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param parent inode number of the parent directory +- * @param name to remove +- */ +- void (*rmdir) (fuse_req_t req, fuse_ino_t parent, const char *name); +- +- /** +- * Create a symbolic link +- * +- * Valid replies: +- * fuse_reply_entry +- * fuse_reply_err +- * +- * @param req request handle +- * @param link the contents of the symbolic link +- * @param parent inode number of the parent directory +- * @param name to create +- */ +- void (*symlink) (fuse_req_t req, const char *link, fuse_ino_t parent, +- const char *name); +- +- /** Rename a file +- * +- * If the target exists it should be atomically replaced. If +- * the target's inode's lookup count is non-zero, the file +- * system is expected to postpone any removal of the inode +- * until the lookup count reaches zero (see description of the +- * forget function). +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure with error code EINVAL, i.e. all +- * future bmap requests will fail with EINVAL without being +- * send to the filesystem process. +- * +- * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If +- * RENAME_NOREPLACE is specified, the filesystem must not +- * overwrite *newname* if it exists and return an error +- * instead. If `RENAME_EXCHANGE` is specified, the filesystem +- * must atomically exchange the two files, i.e. both must +- * exist and neither may be deleted. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param parent inode number of the old parent directory +- * @param name old name +- * @param newparent inode number of the new parent directory +- * @param newname new name +- */ +- void (*rename) (fuse_req_t req, fuse_ino_t parent, const char *name, +- fuse_ino_t newparent, const char *newname, +- unsigned int flags); +- +- /** +- * Create a hard link +- * +- * Valid replies: +- * fuse_reply_entry +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the old inode number +- * @param newparent inode number of the new parent directory +- * @param newname new name to create +- */ +- void (*link) (fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, +- const char *newname); +- +- /** +- * Open a file +- * +- * Open flags are available in fi->flags. The following rules +- * apply. +- * +- * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be +- * filtered out / handled by the kernel. +- * +- * - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used +- * by the filesystem to check if the operation is +- * permitted. If the ``-o default_permissions`` mount +- * option is given, this check is already done by the +- * kernel before calling open() and may thus be omitted by +- * the filesystem. +- * +- * - When writeback caching is enabled, the kernel may send +- * read requests even for files opened with O_WRONLY. The +- * filesystem should be prepared to handle this. +- * +- * - When writeback caching is disabled, the filesystem is +- * expected to properly handle the O_APPEND flag and ensure +- * that each write is appending to the end of the file. +- * +- * - When writeback caching is enabled, the kernel will +- * handle O_APPEND. However, unless all changes to the file +- * come through the kernel this will not work reliably. The +- * filesystem should thus either ignore the O_APPEND flag +- * (and let the kernel handle it), or return an error +- * (indicating that reliably O_APPEND is not available). +- * +- * Filesystem may store an arbitrary file handle (pointer, +- * index, etc) in fi->fh, and use this in other all other file +- * operations (read, write, flush, release, fsync). +- * +- * Filesystem may also implement stateless file I/O and not store +- * anything in fi->fh. +- * +- * There are also some flags (direct_io, keep_cache) which the +- * filesystem may set in fi, to change the way the file is opened. +- * See fuse_file_info structure in for more details. +- * +- * If this request is answered with an error code of ENOSYS +- * and FUSE_CAP_NO_OPEN_SUPPORT is set in +- * `fuse_conn_info.capable`, this is treated as success and +- * future calls to open and release will also succeed without being +- * sent to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_open +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- */ +- void (*open) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi); +- +- /** +- * Read data +- * +- * Read should send exactly the number of bytes requested except +- * on EOF or error, otherwise the rest of the data will be +- * substituted with zeroes. An exception to this is when the file +- * has been opened in 'direct_io' mode, in which case the return +- * value of the read system call will reflect the return value of +- * this operation. +- * +- * fi->fh will contain the value set by the open method, or will +- * be undefined if the open method didn't set any value. +- * +- * Valid replies: +- * fuse_reply_buf +- * fuse_reply_iov +- * fuse_reply_data +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param size number of bytes to read +- * @param off offset to read from +- * @param fi file information +- */ +- void (*read) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, +- struct fuse_file_info *fi); +- +- /** +- * Write data +- * +- * Write should return exactly the number of bytes requested +- * except on error. An exception to this is when the file has +- * been opened in 'direct_io' mode, in which case the return value +- * of the write system call will reflect the return value of this +- * operation. +- * +- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is +- * expected to reset the setuid and setgid bits. +- * +- * fi->fh will contain the value set by the open method, or will +- * be undefined if the open method didn't set any value. +- * +- * Valid replies: +- * fuse_reply_write +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param buf data to write +- * @param size number of bytes to write +- * @param off offset to write to +- * @param fi file information +- */ +- void (*write) (fuse_req_t req, fuse_ino_t ino, const char *buf, +- size_t size, off_t off, struct fuse_file_info *fi); +- +- /** +- * Flush method +- * +- * This is called on each close() of the opened file. +- * +- * Since file descriptors can be duplicated (dup, dup2, fork), for +- * one open call there may be many flush calls. +- * +- * Filesystems shouldn't assume that flush will always be called +- * after some writes, or that if will be called at all. +- * +- * fi->fh will contain the value set by the open method, or will +- * be undefined if the open method didn't set any value. +- * +- * NOTE: the name of the method is misleading, since (unlike +- * fsync) the filesystem is not forced to flush pending writes. +- * One reason to flush data is if the filesystem wants to return +- * write errors during close. However, such use is non-portable +- * because POSIX does not require [close] to wait for delayed I/O to +- * complete. +- * +- * If the filesystem supports file locking operations (setlk, +- * getlk) it should remove all locks belonging to 'fi->owner'. +- * +- * If this request is answered with an error code of ENOSYS, +- * this is treated as success and future calls to flush() will +- * succeed automatically without being send to the filesystem +- * process. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- * +- * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html +- */ +- void (*flush) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi); +- +- /** +- * Release an open file +- * +- * Release is called when there are no more references to an open +- * file: all file descriptors are closed and all memory mappings +- * are unmapped. +- * +- * For every open call there will be exactly one release call (unless +- * the filesystem is force-unmounted). +- * +- * The filesystem may reply with an error, but error values are +- * not returned to close() or munmap() which triggered the +- * release. +- * +- * fi->fh will contain the value set by the open method, or will +- * be undefined if the open method didn't set any value. +- * fi->flags will contain the same flags as for open. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- */ +- void (*release) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi); +- +- /** +- * Synchronize file contents +- * +- * If the datasync parameter is non-zero, then only the user data +- * should be flushed, not the meta data. +- * +- * If this request is answered with an error code of ENOSYS, +- * this is treated as success and future calls to fsync() will +- * succeed automatically without being send to the filesystem +- * process. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param datasync flag indicating if only data should be flushed +- * @param fi file information +- */ +- void (*fsync) (fuse_req_t req, fuse_ino_t ino, int datasync, +- struct fuse_file_info *fi); +- +- /** +- * Open a directory +- * +- * Filesystem may store an arbitrary file handle (pointer, index, +- * etc) in fi->fh, and use this in other all other directory +- * stream operations (readdir, releasedir, fsyncdir). +- * +- * If this request is answered with an error code of ENOSYS and +- * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`, +- * this is treated as success and future calls to opendir and +- * releasedir will also succeed without being sent to the filesystem +- * process. In addition, the kernel will cache readdir results +- * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR. +- * +- * Valid replies: +- * fuse_reply_open +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- */ +- void (*opendir) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi); +- +- /** +- * Read directory +- * +- * Send a buffer filled using fuse_add_direntry(), with size not +- * exceeding the requested size. Send an empty buffer on end of +- * stream. +- * +- * fi->fh will contain the value set by the opendir method, or +- * will be undefined if the opendir method didn't set any value. +- * +- * Returning a directory entry from readdir() does not affect +- * its lookup count. +- * +- * If off_t is non-zero, then it will correspond to one of the off_t +- * values that was previously returned by readdir() for the same +- * directory handle. In this case, readdir() should skip over entries +- * coming before the position defined by the off_t value. If entries +- * are added or removed while the directory handle is open, they filesystem +- * may still include the entries that have been removed, and may not +- * report the entries that have been created. However, addition or +- * removal of entries must never cause readdir() to skip over unrelated +- * entries or to report them more than once. This means +- * that off_t can not be a simple index that enumerates the entries +- * that have been returned but must contain sufficient information to +- * uniquely determine the next directory entry to return even when the +- * set of entries is changing. +- * +- * The function does not have to report the '.' and '..' +- * entries, but is allowed to do so. Note that, if readdir does +- * not return '.' or '..', they will not be implicitly returned, +- * and this behavior is observable by the caller. +- * +- * Valid replies: +- * fuse_reply_buf +- * fuse_reply_data +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param size maximum number of bytes to send +- * @param off offset to continue reading the directory stream +- * @param fi file information +- */ +- void (*readdir) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, +- struct fuse_file_info *fi); +- +- /** +- * Release an open directory +- * +- * For every opendir call there will be exactly one releasedir +- * call (unless the filesystem is force-unmounted). +- * +- * fi->fh will contain the value set by the opendir method, or +- * will be undefined if the opendir method didn't set any value. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- */ +- void (*releasedir) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi); +- +- /** +- * Synchronize directory contents +- * +- * If the datasync parameter is non-zero, then only the directory +- * contents should be flushed, not the meta data. +- * +- * fi->fh will contain the value set by the opendir method, or +- * will be undefined if the opendir method didn't set any value. +- * +- * If this request is answered with an error code of ENOSYS, +- * this is treated as success and future calls to fsyncdir() will +- * succeed automatically without being send to the filesystem +- * process. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param datasync flag indicating if only data should be flushed +- * @param fi file information +- */ +- void (*fsyncdir) (fuse_req_t req, fuse_ino_t ino, int datasync, +- struct fuse_file_info *fi); +- +- /** +- * Get file system statistics +- * +- * Valid replies: +- * fuse_reply_statfs +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number, zero means "undefined" +- */ +- void (*statfs) (fuse_req_t req, fuse_ino_t ino); +- +- /** +- * Set an extended attribute +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all +- * future setxattr() requests will fail with EOPNOTSUPP without being +- * send to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_err +- */ +- void (*setxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, +- const char *value, size_t size, int flags); +- +- /** +- * Get an extended attribute +- * +- * If size is zero, the size of the value should be sent with +- * fuse_reply_xattr. +- * +- * If the size is non-zero, and the value fits in the buffer, the +- * value should be sent with fuse_reply_buf. +- * +- * If the size is too small for the value, the ERANGE error should +- * be sent. +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all +- * future getxattr() requests will fail with EOPNOTSUPP without being +- * send to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_buf +- * fuse_reply_data +- * fuse_reply_xattr +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param name of the extended attribute +- * @param size maximum size of the value to send +- */ +- void (*getxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, +- size_t size); +- +- /** +- * List extended attribute names +- * +- * If size is zero, the total size of the attribute list should be +- * sent with fuse_reply_xattr. +- * +- * If the size is non-zero, and the null character separated +- * attribute list fits in the buffer, the list should be sent with +- * fuse_reply_buf. +- * +- * If the size is too small for the list, the ERANGE error should +- * be sent. +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all +- * future listxattr() requests will fail with EOPNOTSUPP without being +- * send to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_buf +- * fuse_reply_data +- * fuse_reply_xattr +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param size maximum size of the list to send +- */ +- void (*listxattr) (fuse_req_t req, fuse_ino_t ino, size_t size); +- +- /** +- * Remove an extended attribute +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all +- * future removexattr() requests will fail with EOPNOTSUPP without being +- * send to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param name of the extended attribute +- */ +- void (*removexattr) (fuse_req_t req, fuse_ino_t ino, const char *name); +- +- /** +- * Check file access permissions +- * +- * This will be called for the access() and chdir() system +- * calls. If the 'default_permissions' mount option is given, +- * this method is not called. +- * +- * This method is not called under Linux kernel versions 2.4.x +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent success, i.e. this and all future access() +- * requests will succeed without being send to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param mask requested access mode +- */ +- void (*access) (fuse_req_t req, fuse_ino_t ino, int mask); +- +- /** +- * Create and open a file +- * +- * If the file does not exist, first create it with the specified +- * mode, and then open it. +- * +- * See the description of the open handler for more +- * information. +- * +- * If this method is not implemented or under Linux kernel +- * versions earlier than 2.6.15, the mknod() and open() methods +- * will be called instead. +- * +- * If this request is answered with an error code of ENOSYS, the handler +- * is treated as not implemented (i.e., for this and future requests the +- * mknod() and open() handlers will be called instead). +- * +- * Valid replies: +- * fuse_reply_create +- * fuse_reply_err +- * +- * @param req request handle +- * @param parent inode number of the parent directory +- * @param name to create +- * @param mode file type and mode with which to create the new file +- * @param fi file information +- */ +- void (*create) (fuse_req_t req, fuse_ino_t parent, const char *name, +- mode_t mode, struct fuse_file_info *fi); +- +- /** +- * Test for a POSIX file lock +- * +- * Valid replies: +- * fuse_reply_lock +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- * @param lock the region/type to test +- */ +- void (*getlk) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi, struct flock *lock); +- +- /** +- * Acquire, modify or release a POSIX file lock +- * +- * For POSIX threads (NPTL) there's a 1-1 relation between pid and +- * owner, but otherwise this is not always the case. For checking +- * lock ownership, 'fi->owner' must be used. The l_pid field in +- * 'struct flock' should only be used to fill in this field in +- * getlk(). +- * +- * Note: if the locking methods are not implemented, the kernel +- * will still allow file locking to work locally. Hence these are +- * only interesting for network filesystems and similar. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- * @param lock the region/type to set +- * @param sleep locking operation may sleep +- */ +- void (*setlk) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi, +- struct flock *lock, int sleep); +- +- /** +- * Map block index within file to block index within device +- * +- * Note: This makes sense only for block device backed filesystems +- * mounted with the 'blkdev' option +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure, i.e. all future bmap() requests will +- * fail with the same error code without being send to the filesystem +- * process. +- * +- * Valid replies: +- * fuse_reply_bmap +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param blocksize unit of block index +- * @param idx block index within file +- */ +- void (*bmap) (fuse_req_t req, fuse_ino_t ino, size_t blocksize, +- uint64_t idx); +- +- /** +- * Ioctl +- * +- * Note: For unrestricted ioctls (not allowed for FUSE +- * servers), data in and out areas can be discovered by giving +- * iovs and setting FUSE_IOCTL_RETRY in *flags*. For +- * restricted ioctls, kernel prepares in/out data area +- * according to the information encoded in cmd. +- * +- * Valid replies: +- * fuse_reply_ioctl_retry +- * fuse_reply_ioctl +- * fuse_reply_ioctl_iov +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param cmd ioctl command +- * @param arg ioctl argument +- * @param fi file information +- * @param flags for FUSE_IOCTL_* flags +- * @param in_buf data fetched from the caller +- * @param in_bufsz number of fetched bytes +- * @param out_bufsz maximum size of output data +- * +- * Note : the unsigned long request submitted by the application +- * is truncated to 32 bits. +- */ +- void (*ioctl) (fuse_req_t req, fuse_ino_t ino, unsigned int cmd, +- void *arg, struct fuse_file_info *fi, unsigned flags, +- const void *in_buf, size_t in_bufsz, size_t out_bufsz); +- +- /** +- * Poll for IO readiness +- * +- * Note: If ph is non-NULL, the client should notify +- * when IO readiness events occur by calling +- * fuse_lowlevel_notify_poll() with the specified ph. +- * +- * Regardless of the number of times poll with a non-NULL ph +- * is received, single notification is enough to clear all. +- * Notifying more times incurs overhead but doesn't harm +- * correctness. +- * +- * The callee is responsible for destroying ph with +- * fuse_pollhandle_destroy() when no longer in use. +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as success (with a kernel-defined default poll-mask) and +- * future calls to pull() will succeed the same way without being send +- * to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_poll +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- * @param ph poll handle to be used for notification +- */ +- void (*poll) (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, +- struct fuse_pollhandle *ph); +- +- /** +- * Write data made available in a buffer +- * +- * This is a more generic version of the ->write() method. If +- * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the +- * kernel supports splicing from the fuse device, then the +- * data will be made available in pipe for supporting zero +- * copy data transfer. +- * +- * buf->count is guaranteed to be one (and thus buf->idx is +- * always zero). The write_buf handler must ensure that +- * bufv->off is correctly updated (reflecting the number of +- * bytes read from bufv->buf[0]). +- * +- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is +- * expected to reset the setuid and setgid bits. +- * +- * Valid replies: +- * fuse_reply_write +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param bufv buffer containing the data +- * @param off offset to write to +- * @param fi file information +- */ +- void (*write_buf) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_bufvec *bufv, off_t off, +- struct fuse_file_info *fi); +- +- /** +- * Callback function for the retrieve request +- * +- * Valid replies: +- * fuse_reply_none +- * +- * @param req request handle +- * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() +- * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() +- * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() +- * @param bufv the buffer containing the returned data +- */ +- void (*retrieve_reply) (fuse_req_t req, void *cookie, fuse_ino_t ino, +- off_t offset, struct fuse_bufvec *bufv); +- +- /** +- * Forget about multiple inodes +- * +- * See description of the forget function for more +- * information. +- * +- * Valid replies: +- * fuse_reply_none +- * +- * @param req request handle +- */ +- void (*forget_multi) (fuse_req_t req, size_t count, +- struct fuse_forget_data *forgets); +- +- /** +- * Acquire, modify or release a BSD file lock +- * +- * Note: if the locking methods are not implemented, the kernel +- * will still allow file locking to work locally. Hence these are +- * only interesting for network filesystems and similar. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- * @param op the locking operation, see flock(2) +- */ +- void (*flock) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi, int op); +- +- /** +- * Allocate requested space. If this function returns success then +- * subsequent writes to the specified range shall not fail due to the lack +- * of free space on the file system storage media. +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all +- * future fallocate() requests will fail with EOPNOTSUPP without being +- * send to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param offset starting point for allocated region +- * @param length size of allocated region +- * @param mode determines the operation to be performed on the given range, +- * see fallocate(2) +- */ +- void (*fallocate) (fuse_req_t req, fuse_ino_t ino, int mode, +- off_t offset, off_t length, struct fuse_file_info *fi); +- +- /** +- * Read directory with attributes +- * +- * Send a buffer filled using fuse_add_direntry_plus(), with size not +- * exceeding the requested size. Send an empty buffer on end of +- * stream. +- * +- * fi->fh will contain the value set by the opendir method, or +- * will be undefined if the opendir method didn't set any value. +- * +- * In contrast to readdir() (which does not affect the lookup counts), +- * the lookup count of every entry returned by readdirplus(), except "." +- * and "..", is incremented by one. +- * +- * Valid replies: +- * fuse_reply_buf +- * fuse_reply_data +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param size maximum number of bytes to send +- * @param off offset to continue reading the directory stream +- * @param fi file information +- */ +- void (*readdirplus) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, +- struct fuse_file_info *fi); +- +- /** +- * Copy a range of data from one file to another +- * +- * Performs an optimized copy between two file descriptors without the +- * additional cost of transferring data through the FUSE kernel module +- * to user space (glibc) and then back into the FUSE filesystem again. +- * +- * In case this method is not implemented, glibc falls back to reading +- * data from the source and writing to the destination. Effectively +- * doing an inefficient copy of the data. +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all +- * future copy_file_range() requests will fail with EOPNOTSUPP without +- * being send to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_write +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino_in the inode number or the source file +- * @param off_in starting point from were the data should be read +- * @param fi_in file information of the source file +- * @param ino_out the inode number or the destination file +- * @param off_out starting point where the data should be written +- * @param fi_out file information of the destination file +- * @param len maximum size of the data to copy +- * @param flags passed along with the copy_file_range() syscall +- */ +- void (*copy_file_range) (fuse_req_t req, fuse_ino_t ino_in, +- off_t off_in, struct fuse_file_info *fi_in, +- fuse_ino_t ino_out, off_t off_out, +- struct fuse_file_info *fi_out, size_t len, +- int flags); +- +- /** +- * Find next data or hole after the specified offset +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure, i.e. all future lseek() requests will +- * fail with the same error code without being send to the filesystem +- * process. +- * +- * Valid replies: +- * fuse_reply_lseek +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param off offset to start search from +- * @param whence either SEEK_DATA or SEEK_HOLE +- * @param fi file information +- */ +- void (*lseek) (fuse_req_t req, fuse_ino_t ino, off_t off, int whence, +- struct fuse_file_info *fi); ++ /** ++ * Initialize filesystem ++ * ++ * This function is called when libfuse establishes ++ * communication with the FUSE kernel module. The file system ++ * should use this module to inspect and/or modify the ++ * connection parameters provided in the `conn` structure. ++ * ++ * Note that some parameters may be overwritten by options ++ * passed to fuse_session_new() which take precedence over the ++ * values set in this handler. ++ * ++ * There's no reply to this function ++ * ++ * @param userdata the user data passed to fuse_session_new() ++ */ ++ void (*init)(void *userdata, struct fuse_conn_info *conn); ++ ++ /** ++ * Clean up filesystem. ++ * ++ * Called on filesystem exit. When this method is called, the ++ * connection to the kernel may be gone already, so that eg. calls ++ * to fuse_lowlevel_notify_* will fail. ++ * ++ * There's no reply to this function ++ * ++ * @param userdata the user data passed to fuse_session_new() ++ */ ++ void (*destroy)(void *userdata); ++ ++ /** ++ * Look up a directory entry by name and get its attributes. ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name the name to look up ++ */ ++ void (*lookup)(fuse_req_t req, fuse_ino_t parent, const char *name); ++ ++ /** ++ * Forget about an inode ++ * ++ * This function is called when the kernel removes an inode ++ * from its internal caches. ++ * ++ * The inode's lookup count increases by one for every call to ++ * fuse_reply_entry and fuse_reply_create. The nlookup parameter ++ * indicates by how much the lookup count should be decreased. ++ * ++ * Inodes with a non-zero lookup count may receive request from ++ * the kernel even after calls to unlink, rmdir or (when ++ * overwriting an existing file) rename. Filesystems must handle ++ * such requests properly and it is recommended to defer removal ++ * of the inode until the lookup count reaches zero. Calls to ++ * unlink, rmdir or rename will be followed closely by forget ++ * unless the file or directory is open, in which case the ++ * kernel issues forget only after the release or releasedir ++ * calls. ++ * ++ * Note that if a file system will be exported over NFS the ++ * inodes lifetime must extend even beyond forget. See the ++ * generation field in struct fuse_entry_param above. ++ * ++ * On unmount the lookup count for all inodes implicitly drops ++ * to zero. It is not guaranteed that the file system will ++ * receive corresponding forget messages for the affected ++ * inodes. ++ * ++ * Valid replies: ++ * fuse_reply_none ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param nlookup the number of lookups to forget ++ */ ++ void (*forget)(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup); ++ ++ /** ++ * Get file attributes. ++ * ++ * If writeback caching is enabled, the kernel may have a ++ * better idea of a file's length than the FUSE file system ++ * (eg if there has been a write that extended the file size, ++ * but that has not yet been passed to the filesystem.n ++ * ++ * In this case, the st_size value provided by the file system ++ * will be ignored. ++ * ++ * Valid replies: ++ * fuse_reply_attr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi for future use, currently always NULL ++ */ ++ void (*getattr)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); ++ ++ /** ++ * Set file attributes ++ * ++ * In the 'attr' argument only members indicated by the 'to_set' ++ * bitmask contain valid values. Other members contain undefined ++ * values. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits if the file ++ * size or owner is being changed. ++ * ++ * If the setattr was invoked from the ftruncate() system call ++ * under Linux kernel versions 2.6.15 or later, the fi->fh will ++ * contain the value set by the open method or will be undefined ++ * if the open method didn't set any value. Otherwise (not ++ * ftruncate call, or kernel version earlier than 2.6.15) the fi ++ * parameter will be NULL. ++ * ++ * Valid replies: ++ * fuse_reply_attr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param attr the attributes ++ * @param to_set bit mask of attributes which should be set ++ * @param fi file information, or NULL ++ */ ++ void (*setattr)(fuse_req_t req, fuse_ino_t ino, struct stat *attr, ++ int to_set, struct fuse_file_info *fi); ++ ++ /** ++ * Read symbolic link ++ * ++ * Valid replies: ++ * fuse_reply_readlink ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ */ ++ void (*readlink)(fuse_req_t req, fuse_ino_t ino); ++ ++ /** ++ * Create file node ++ * ++ * Create a regular file, character device, block device, fifo or ++ * socket node. ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to create ++ * @param mode file type and mode with which to create the new file ++ * @param rdev the device number (only valid if created file is a device) ++ */ ++ void (*mknod)(fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode, dev_t rdev); ++ ++ /** ++ * Create a directory ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to create ++ * @param mode with which to create the new file ++ */ ++ void (*mkdir)(fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode); ++ ++ /** ++ * Remove a file ++ * ++ * If the file's inode's lookup count is non-zero, the file ++ * system is expected to postpone any removal of the inode ++ * until the lookup count reaches zero (see description of the ++ * forget function). ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to remove ++ */ ++ void (*unlink)(fuse_req_t req, fuse_ino_t parent, const char *name); ++ ++ /** ++ * Remove a directory ++ * ++ * If the directory's inode's lookup count is non-zero, the ++ * file system is expected to postpone any removal of the ++ * inode until the lookup count reaches zero (see description ++ * of the forget function). ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to remove ++ */ ++ void (*rmdir)(fuse_req_t req, fuse_ino_t parent, const char *name); ++ ++ /** ++ * Create a symbolic link ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param link the contents of the symbolic link ++ * @param parent inode number of the parent directory ++ * @param name to create ++ */ ++ void (*symlink)(fuse_req_t req, const char *link, fuse_ino_t parent, ++ const char *name); ++ ++ /** ++ * Rename a file ++ * ++ * If the target exists it should be atomically replaced. If ++ * the target's inode's lookup count is non-zero, the file ++ * system is expected to postpone any removal of the inode ++ * until the lookup count reaches zero (see description of the ++ * forget function). ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EINVAL, i.e. all ++ * future bmap requests will fail with EINVAL without being ++ * send to the filesystem process. ++ * ++ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If ++ * RENAME_NOREPLACE is specified, the filesystem must not ++ * overwrite *newname* if it exists and return an error ++ * instead. If `RENAME_EXCHANGE` is specified, the filesystem ++ * must atomically exchange the two files, i.e. both must ++ * exist and neither may be deleted. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the old parent directory ++ * @param name old name ++ * @param newparent inode number of the new parent directory ++ * @param newname new name ++ */ ++ void (*rename)(fuse_req_t req, fuse_ino_t parent, const char *name, ++ fuse_ino_t newparent, const char *newname, ++ unsigned int flags); ++ ++ /** ++ * Create a hard link ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the old inode number ++ * @param newparent inode number of the new parent directory ++ * @param newname new name to create ++ */ ++ void (*link)(fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, ++ const char *newname); ++ ++ /** ++ * Open a file ++ * ++ * Open flags are available in fi->flags. The following rules ++ * apply. ++ * ++ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be ++ * filtered out / handled by the kernel. ++ * ++ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used ++ * by the filesystem to check if the operation is ++ * permitted. If the ``-o default_permissions`` mount ++ * option is given, this check is already done by the ++ * kernel before calling open() and may thus be omitted by ++ * the filesystem. ++ * ++ * - When writeback caching is enabled, the kernel may send ++ * read requests even for files opened with O_WRONLY. The ++ * filesystem should be prepared to handle this. ++ * ++ * - When writeback caching is disabled, the filesystem is ++ * expected to properly handle the O_APPEND flag and ensure ++ * that each write is appending to the end of the file. ++ * ++ * - When writeback caching is enabled, the kernel will ++ * handle O_APPEND. However, unless all changes to the file ++ * come through the kernel this will not work reliably. The ++ * filesystem should thus either ignore the O_APPEND flag ++ * (and let the kernel handle it), or return an error ++ * (indicating that reliably O_APPEND is not available). ++ * ++ * Filesystem may store an arbitrary file handle (pointer, ++ * index, etc) in fi->fh, and use this in other all other file ++ * operations (read, write, flush, release, fsync). ++ * ++ * Filesystem may also implement stateless file I/O and not store ++ * anything in fi->fh. ++ * ++ * There are also some flags (direct_io, keep_cache) which the ++ * filesystem may set in fi, to change the way the file is opened. ++ * See fuse_file_info structure in for more details. ++ * ++ * If this request is answered with an error code of ENOSYS ++ * and FUSE_CAP_NO_OPEN_SUPPORT is set in ++ * `fuse_conn_info.capable`, this is treated as success and ++ * future calls to open and release will also succeed without being ++ * sent to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_open ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*open)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); ++ ++ /** ++ * Read data ++ * ++ * Read should send exactly the number of bytes requested except ++ * on EOF or error, otherwise the rest of the data will be ++ * substituted with zeroes. An exception to this is when the file ++ * has been opened in 'direct_io' mode, in which case the return ++ * value of the read system call will reflect the return value of ++ * this operation. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_iov ++ * fuse_reply_data ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size number of bytes to read ++ * @param off offset to read from ++ * @param fi file information ++ */ ++ void (*read)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Write data ++ * ++ * Write should return exactly the number of bytes requested ++ * except on error. An exception to this is when the file has ++ * been opened in 'direct_io' mode, in which case the return value ++ * of the write system call will reflect the return value of this ++ * operation. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * ++ * Valid replies: ++ * fuse_reply_write ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param buf data to write ++ * @param size number of bytes to write ++ * @param off offset to write to ++ * @param fi file information ++ */ ++ void (*write)(fuse_req_t req, fuse_ino_t ino, const char *buf, size_t size, ++ off_t off, struct fuse_file_info *fi); ++ ++ /** ++ * Flush method ++ * ++ * This is called on each close() of the opened file. ++ * ++ * Since file descriptors can be duplicated (dup, dup2, fork), for ++ * one open call there may be many flush calls. ++ * ++ * Filesystems shouldn't assume that flush will always be called ++ * after some writes, or that if will be called at all. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * ++ * NOTE: the name of the method is misleading, since (unlike ++ * fsync) the filesystem is not forced to flush pending writes. ++ * One reason to flush data is if the filesystem wants to return ++ * write errors during close. However, such use is non-portable ++ * because POSIX does not require [close] to wait for delayed I/O to ++ * complete. ++ * ++ * If the filesystem supports file locking operations (setlk, ++ * getlk) it should remove all locks belonging to 'fi->owner'. ++ * ++ * If this request is answered with an error code of ENOSYS, ++ * this is treated as success and future calls to flush() will ++ * succeed automatically without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * ++ * [close]: ++ * http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html ++ */ ++ void (*flush)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); ++ ++ /** ++ * Release an open file ++ * ++ * Release is called when there are no more references to an open ++ * file: all file descriptors are closed and all memory mappings ++ * are unmapped. ++ * ++ * For every open call there will be exactly one release call (unless ++ * the filesystem is force-unmounted). ++ * ++ * The filesystem may reply with an error, but error values are ++ * not returned to close() or munmap() which triggered the ++ * release. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * fi->flags will contain the same flags as for open. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*release)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); ++ ++ /** ++ * Synchronize file contents ++ * ++ * If the datasync parameter is non-zero, then only the user data ++ * should be flushed, not the meta data. ++ * ++ * If this request is answered with an error code of ENOSYS, ++ * this is treated as success and future calls to fsync() will ++ * succeed automatically without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param datasync flag indicating if only data should be flushed ++ * @param fi file information ++ */ ++ void (*fsync)(fuse_req_t req, fuse_ino_t ino, int datasync, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Open a directory ++ * ++ * Filesystem may store an arbitrary file handle (pointer, index, ++ * etc) in fi->fh, and use this in other all other directory ++ * stream operations (readdir, releasedir, fsyncdir). ++ * ++ * If this request is answered with an error code of ENOSYS and ++ * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`, ++ * this is treated as success and future calls to opendir and ++ * releasedir will also succeed without being sent to the filesystem ++ * process. In addition, the kernel will cache readdir results ++ * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR. ++ * ++ * Valid replies: ++ * fuse_reply_open ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*opendir)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); ++ ++ /** ++ * Read directory ++ * ++ * Send a buffer filled using fuse_add_direntry(), with size not ++ * exceeding the requested size. Send an empty buffer on end of ++ * stream. ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * Returning a directory entry from readdir() does not affect ++ * its lookup count. ++ * ++ * If off_t is non-zero, then it will correspond to one of the off_t ++ * values that was previously returned by readdir() for the same ++ * directory handle. In this case, readdir() should skip over entries ++ * coming before the position defined by the off_t value. If entries ++ * are added or removed while the directory handle is open, they filesystem ++ * may still include the entries that have been removed, and may not ++ * report the entries that have been created. However, addition or ++ * removal of entries must never cause readdir() to skip over unrelated ++ * entries or to report them more than once. This means ++ * that off_t can not be a simple index that enumerates the entries ++ * that have been returned but must contain sufficient information to ++ * uniquely determine the next directory entry to return even when the ++ * set of entries is changing. ++ * ++ * The function does not have to report the '.' and '..' ++ * entries, but is allowed to do so. Note that, if readdir does ++ * not return '.' or '..', they will not be implicitly returned, ++ * and this behavior is observable by the caller. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size maximum number of bytes to send ++ * @param off offset to continue reading the directory stream ++ * @param fi file information ++ */ ++ void (*readdir)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Release an open directory ++ * ++ * For every opendir call there will be exactly one releasedir ++ * call (unless the filesystem is force-unmounted). ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*releasedir)(fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Synchronize directory contents ++ * ++ * If the datasync parameter is non-zero, then only the directory ++ * contents should be flushed, not the meta data. ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * If this request is answered with an error code of ENOSYS, ++ * this is treated as success and future calls to fsyncdir() will ++ * succeed automatically without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param datasync flag indicating if only data should be flushed ++ * @param fi file information ++ */ ++ void (*fsyncdir)(fuse_req_t req, fuse_ino_t ino, int datasync, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Get file system statistics ++ * ++ * Valid replies: ++ * fuse_reply_statfs ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number, zero means "undefined" ++ */ ++ void (*statfs)(fuse_req_t req, fuse_ino_t ino); ++ ++ /** ++ * Set an extended attribute ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future setxattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ */ ++ void (*setxattr)(fuse_req_t req, fuse_ino_t ino, const char *name, ++ const char *value, size_t size, int flags); ++ ++ /** ++ * Get an extended attribute ++ * ++ * If size is zero, the size of the value should be sent with ++ * fuse_reply_xattr. ++ * ++ * If the size is non-zero, and the value fits in the buffer, the ++ * value should be sent with fuse_reply_buf. ++ * ++ * If the size is too small for the value, the ERANGE error should ++ * be sent. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future getxattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_xattr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param name of the extended attribute ++ * @param size maximum size of the value to send ++ */ ++ void (*getxattr)(fuse_req_t req, fuse_ino_t ino, const char *name, ++ size_t size); ++ ++ /** ++ * List extended attribute names ++ * ++ * If size is zero, the total size of the attribute list should be ++ * sent with fuse_reply_xattr. ++ * ++ * If the size is non-zero, and the null character separated ++ * attribute list fits in the buffer, the list should be sent with ++ * fuse_reply_buf. ++ * ++ * If the size is too small for the list, the ERANGE error should ++ * be sent. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future listxattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_xattr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size maximum size of the list to send ++ */ ++ void (*listxattr)(fuse_req_t req, fuse_ino_t ino, size_t size); ++ ++ /** ++ * Remove an extended attribute ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future removexattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param name of the extended attribute ++ */ ++ void (*removexattr)(fuse_req_t req, fuse_ino_t ino, const char *name); ++ ++ /** ++ * Check file access permissions ++ * ++ * This will be called for the access() and chdir() system ++ * calls. If the 'default_permissions' mount option is given, ++ * this method is not called. ++ * ++ * This method is not called under Linux kernel versions 2.4.x ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent success, i.e. this and all future access() ++ * requests will succeed without being send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param mask requested access mode ++ */ ++ void (*access)(fuse_req_t req, fuse_ino_t ino, int mask); ++ ++ /** ++ * Create and open a file ++ * ++ * If the file does not exist, first create it with the specified ++ * mode, and then open it. ++ * ++ * See the description of the open handler for more ++ * information. ++ * ++ * If this method is not implemented or under Linux kernel ++ * versions earlier than 2.6.15, the mknod() and open() methods ++ * will be called instead. ++ * ++ * If this request is answered with an error code of ENOSYS, the handler ++ * is treated as not implemented (i.e., for this and future requests the ++ * mknod() and open() handlers will be called instead). ++ * ++ * Valid replies: ++ * fuse_reply_create ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to create ++ * @param mode file type and mode with which to create the new file ++ * @param fi file information ++ */ ++ void (*create)(fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode, struct fuse_file_info *fi); ++ ++ /** ++ * Test for a POSIX file lock ++ * ++ * Valid replies: ++ * fuse_reply_lock ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param lock the region/type to test ++ */ ++ void (*getlk)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ struct flock *lock); ++ ++ /** ++ * Acquire, modify or release a POSIX file lock ++ * ++ * For POSIX threads (NPTL) there's a 1-1 relation between pid and ++ * owner, but otherwise this is not always the case. For checking ++ * lock ownership, 'fi->owner' must be used. The l_pid field in ++ * 'struct flock' should only be used to fill in this field in ++ * getlk(). ++ * ++ * Note: if the locking methods are not implemented, the kernel ++ * will still allow file locking to work locally. Hence these are ++ * only interesting for network filesystems and similar. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param lock the region/type to set ++ * @param sleep locking operation may sleep ++ */ ++ void (*setlk)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ struct flock *lock, int sleep); ++ ++ /** ++ * Map block index within file to block index within device ++ * ++ * Note: This makes sense only for block device backed filesystems ++ * mounted with the 'blkdev' option ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure, i.e. all future bmap() requests will ++ * fail with the same error code without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_bmap ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param blocksize unit of block index ++ * @param idx block index within file ++ */ ++ void (*bmap)(fuse_req_t req, fuse_ino_t ino, size_t blocksize, ++ uint64_t idx); ++ ++ /** ++ * Ioctl ++ * ++ * Note: For unrestricted ioctls (not allowed for FUSE ++ * servers), data in and out areas can be discovered by giving ++ * iovs and setting FUSE_IOCTL_RETRY in *flags*. For ++ * restricted ioctls, kernel prepares in/out data area ++ * according to the information encoded in cmd. ++ * ++ * Valid replies: ++ * fuse_reply_ioctl_retry ++ * fuse_reply_ioctl ++ * fuse_reply_ioctl_iov ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param cmd ioctl command ++ * @param arg ioctl argument ++ * @param fi file information ++ * @param flags for FUSE_IOCTL_* flags ++ * @param in_buf data fetched from the caller ++ * @param in_bufsz number of fetched bytes ++ * @param out_bufsz maximum size of output data ++ * ++ * Note : the unsigned long request submitted by the application ++ * is truncated to 32 bits. ++ */ ++ void (*ioctl)(fuse_req_t req, fuse_ino_t ino, unsigned int cmd, void *arg, ++ struct fuse_file_info *fi, unsigned flags, const void *in_buf, ++ size_t in_bufsz, size_t out_bufsz); ++ ++ /** ++ * Poll for IO readiness ++ * ++ * Note: If ph is non-NULL, the client should notify ++ * when IO readiness events occur by calling ++ * fuse_lowlevel_notify_poll() with the specified ph. ++ * ++ * Regardless of the number of times poll with a non-NULL ph ++ * is received, single notification is enough to clear all. ++ * Notifying more times incurs overhead but doesn't harm ++ * correctness. ++ * ++ * The callee is responsible for destroying ph with ++ * fuse_pollhandle_destroy() when no longer in use. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as success (with a kernel-defined default poll-mask) and ++ * future calls to pull() will succeed the same way without being send ++ * to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_poll ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param ph poll handle to be used for notification ++ */ ++ void (*poll)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ struct fuse_pollhandle *ph); ++ ++ /** ++ * Write data made available in a buffer ++ * ++ * This is a more generic version of the ->write() method. If ++ * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the ++ * kernel supports splicing from the fuse device, then the ++ * data will be made available in pipe for supporting zero ++ * copy data transfer. ++ * ++ * buf->count is guaranteed to be one (and thus buf->idx is ++ * always zero). The write_buf handler must ensure that ++ * bufv->off is correctly updated (reflecting the number of ++ * bytes read from bufv->buf[0]). ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ * ++ * Valid replies: ++ * fuse_reply_write ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param bufv buffer containing the data ++ * @param off offset to write to ++ * @param fi file information ++ */ ++ void (*write_buf)(fuse_req_t req, fuse_ino_t ino, struct fuse_bufvec *bufv, ++ off_t off, struct fuse_file_info *fi); ++ ++ /** ++ * Callback function for the retrieve request ++ * ++ * Valid replies: ++ * fuse_reply_none ++ * ++ * @param req request handle ++ * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() ++ * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() ++ * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() ++ * @param bufv the buffer containing the returned data ++ */ ++ void (*retrieve_reply)(fuse_req_t req, void *cookie, fuse_ino_t ino, ++ off_t offset, struct fuse_bufvec *bufv); ++ ++ /** ++ * Forget about multiple inodes ++ * ++ * See description of the forget function for more ++ * information. ++ * ++ * Valid replies: ++ * fuse_reply_none ++ * ++ * @param req request handle ++ */ ++ void (*forget_multi)(fuse_req_t req, size_t count, ++ struct fuse_forget_data *forgets); ++ ++ /** ++ * Acquire, modify or release a BSD file lock ++ * ++ * Note: if the locking methods are not implemented, the kernel ++ * will still allow file locking to work locally. Hence these are ++ * only interesting for network filesystems and similar. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param op the locking operation, see flock(2) ++ */ ++ void (*flock)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ int op); ++ ++ /** ++ * Allocate requested space. If this function returns success then ++ * subsequent writes to the specified range shall not fail due to the lack ++ * of free space on the file system storage media. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future fallocate() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param offset starting point for allocated region ++ * @param length size of allocated region ++ * @param mode determines the operation to be performed on the given range, ++ * see fallocate(2) ++ */ ++ void (*fallocate)(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, ++ off_t length, struct fuse_file_info *fi); ++ ++ /** ++ * Read directory with attributes ++ * ++ * Send a buffer filled using fuse_add_direntry_plus(), with size not ++ * exceeding the requested size. Send an empty buffer on end of ++ * stream. ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * In contrast to readdir() (which does not affect the lookup counts), ++ * the lookup count of every entry returned by readdirplus(), except "." ++ * and "..", is incremented by one. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size maximum number of bytes to send ++ * @param off offset to continue reading the directory stream ++ * @param fi file information ++ */ ++ void (*readdirplus)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Copy a range of data from one file to another ++ * ++ * Performs an optimized copy between two file descriptors without the ++ * additional cost of transferring data through the FUSE kernel module ++ * to user space (glibc) and then back into the FUSE filesystem again. ++ * ++ * In case this method is not implemented, glibc falls back to reading ++ * data from the source and writing to the destination. Effectively ++ * doing an inefficient copy of the data. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future copy_file_range() requests will fail with EOPNOTSUPP without ++ * being send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_write ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino_in the inode number or the source file ++ * @param off_in starting point from were the data should be read ++ * @param fi_in file information of the source file ++ * @param ino_out the inode number or the destination file ++ * @param off_out starting point where the data should be written ++ * @param fi_out file information of the destination file ++ * @param len maximum size of the data to copy ++ * @param flags passed along with the copy_file_range() syscall ++ */ ++ void (*copy_file_range)(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, ++ struct fuse_file_info *fi_in, fuse_ino_t ino_out, ++ off_t off_out, struct fuse_file_info *fi_out, ++ size_t len, int flags); ++ ++ /** ++ * Find next data or hole after the specified offset ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure, i.e. all future lseek() requests will ++ * fail with the same error code without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_lseek ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param off offset to start search from ++ * @param whence either SEEK_DATA or SEEK_HOLE ++ * @param fi file information ++ */ ++ void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, ++ struct fuse_file_info *fi); + }; + + /** +@@ -1305,7 +1307,7 @@ int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e); + * @return zero for success, -errno for failure to send reply + */ + int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, +- const struct fuse_file_info *fi); ++ const struct fuse_file_info *fi); + + /** + * Reply with attributes +@@ -1315,11 +1317,11 @@ int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, + * + * @param req request handle + * @param attr the attributes +- * @param attr_timeout validity timeout (in seconds) for the attributes ++ * @param attr_timeout validity timeout (in seconds) for the attributes + * @return zero for success, -errno for failure to send reply + */ + int fuse_reply_attr(fuse_req_t req, const struct stat *attr, +- double attr_timeout); ++ double attr_timeout); + + /** + * Reply with the contents of a symbolic link +@@ -1417,7 +1419,7 @@ int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); + * @return zero for success, -errno for failure to send reply + */ + int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags); ++ enum fuse_buf_copy_flags flags); + + /** + * Reply with data vector +@@ -1480,9 +1482,9 @@ int fuse_reply_lock(fuse_req_t req, const struct flock *lock); + */ + int fuse_reply_bmap(fuse_req_t req, uint64_t idx); + +-/* ----------------------------------------------------------- * +- * Filling a buffer in readdir * +- * ----------------------------------------------------------- */ ++/* ++ * Filling a buffer in readdir ++ */ + + /** + * Add a directory entry to the buffer +@@ -1512,8 +1514,7 @@ int fuse_reply_bmap(fuse_req_t req, uint64_t idx); + * @return the space needed for the entry + */ + size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, +- const char *name, const struct stat *stbuf, +- off_t off); ++ const char *name, const struct stat *stbuf, off_t off); + + /** + * Add a directory entry to the buffer with the attributes +@@ -1529,8 +1530,8 @@ size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, + * @return the space needed for the entry + */ + size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, +- const char *name, +- const struct fuse_entry_param *e, off_t off); ++ const char *name, ++ const struct fuse_entry_param *e, off_t off); + + /** + * Reply to ask for data fetch and output buffer preparation. ioctl +@@ -1547,9 +1548,9 @@ size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, + * @param out_count number of entries in out_iov + * @return zero for success, -errno for failure to send reply + */ +-int fuse_reply_ioctl_retry(fuse_req_t req, +- const struct iovec *in_iov, size_t in_count, +- const struct iovec *out_iov, size_t out_count); ++int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov, ++ size_t in_count, const struct iovec *out_iov, ++ size_t out_count); + + /** + * Reply to finish ioctl +@@ -1576,7 +1577,7 @@ int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size); + * @param count the size of vector + */ + int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, +- int count); ++ int count); + + /** + * Reply with poll result event mask +@@ -1598,9 +1599,9 @@ int fuse_reply_poll(fuse_req_t req, unsigned revents); + */ + int fuse_reply_lseek(fuse_req_t req, off_t off); + +-/* ----------------------------------------------------------- * +- * Notification * +- * ----------------------------------------------------------- */ ++/* ++ * Notification ++ */ + + /** + * Notify IO readiness event +@@ -1635,7 +1636,7 @@ int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph); + * @return zero for success, -errno for failure + */ + int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, +- off_t off, off_t len); ++ off_t off, off_t len); + + /** + * Notify to invalidate parent attributes and the dentry matching +@@ -1663,7 +1664,7 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, + * @return zero for success, -errno for failure + */ + int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, +- const char *name, size_t namelen); ++ const char *name, size_t namelen); + + /** + * This function behaves like fuse_lowlevel_notify_inval_entry() with +@@ -1693,9 +1694,9 @@ int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, + * @param namelen strlen() of file name + * @return zero for success, -errno for failure + */ +-int fuse_lowlevel_notify_delete(struct fuse_session *se, +- fuse_ino_t parent, fuse_ino_t child, +- const char *name, size_t namelen); ++int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, ++ fuse_ino_t child, const char *name, ++ size_t namelen); + + /** + * Store data to the kernel buffers +@@ -1723,8 +1724,8 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, + * @return zero for success, -errno for failure + */ + int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, +- off_t offset, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags); ++ off_t offset, struct fuse_bufvec *bufv, ++ enum fuse_buf_copy_flags flags); + /** + * Retrieve data from the kernel buffers + * +@@ -1755,12 +1756,12 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, + * @return zero for success, -errno for failure + */ + int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, +- size_t size, off_t offset, void *cookie); ++ size_t size, off_t offset, void *cookie); + + +-/* ----------------------------------------------------------- * +- * Utility functions * +- * ----------------------------------------------------------- */ ++/* ++ * Utility functions ++ */ + + /** + * Get the userdata from the request +@@ -1822,7 +1823,7 @@ typedef void (*fuse_interrupt_func_t)(fuse_req_t req, void *data); + * @param data user data passed to the callback function + */ + void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, +- void *data); ++ void *data); + + /** + * Check if a request has already been interrupted +@@ -1833,9 +1834,9 @@ void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, + int fuse_req_interrupted(fuse_req_t req); + + +-/* ----------------------------------------------------------- * +- * Inquiry functions * +- * ----------------------------------------------------------- */ ++/* ++ * Inquiry functions ++ */ + + /** + * Print low-level version information to stdout. +@@ -1854,18 +1855,18 @@ void fuse_lowlevel_help(void); + */ + void fuse_cmdline_help(void); + +-/* ----------------------------------------------------------- * +- * Filesystem setup & teardown * +- * ----------------------------------------------------------- */ ++/* ++ * Filesystem setup & teardown ++ */ + + struct fuse_cmdline_opts { +- int foreground; +- int debug; +- int nodefault_subtype; +- char *mountpoint; +- int show_version; +- int show_help; +- unsigned int max_idle_threads; ++ int foreground; ++ int debug; ++ int nodefault_subtype; ++ char *mountpoint; ++ int show_version; ++ int show_help; ++ unsigned int max_idle_threads; + }; + + /** +@@ -1886,8 +1887,7 @@ struct fuse_cmdline_opts { + * @param opts output argument for parsed options + * @return 0 on success, -1 on failure + */ +-int fuse_parse_cmdline(struct fuse_args *args, +- struct fuse_cmdline_opts *opts); ++int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts); + + /** + * Create a low level session. +@@ -1918,8 +1918,8 @@ int fuse_parse_cmdline(struct fuse_args *args, + * @return the fuse session on success, NULL on failure + **/ + struct fuse_session *fuse_session_new(struct fuse_args *args, +- const struct fuse_lowlevel_ops *op, +- size_t op_size, void *userdata); ++ const struct fuse_lowlevel_ops *op, ++ size_t op_size, void *userdata); + + /** + * Mount a FUSE file system. +@@ -2014,9 +2014,9 @@ void fuse_session_unmount(struct fuse_session *se); + */ + void fuse_session_destroy(struct fuse_session *se); + +-/* ----------------------------------------------------------- * +- * Custom event loop support * +- * ----------------------------------------------------------- */ ++/* ++ * Custom event loop support ++ */ + + /** + * Return file descriptor for communication with kernel. +@@ -2043,7 +2043,7 @@ int fuse_session_fd(struct fuse_session *se); + * @param buf the fuse_buf containing the request + */ + void fuse_session_process_buf(struct fuse_session *se, +- const struct fuse_buf *buf); ++ const struct fuse_buf *buf); + + /** + * Read a raw request from the kernel into the supplied buffer. +diff --git a/tools/virtiofsd/fuse_misc.h b/tools/virtiofsd/fuse_misc.h +index 2f6663e..f252baa 100644 +--- a/tools/virtiofsd/fuse_misc.h ++++ b/tools/virtiofsd/fuse_misc.h +@@ -1,18 +1,18 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ + + #include + + /* +- Versioned symbols cannot be used in some cases because it +- - confuse the dynamic linker in uClibc +- - not supported on MacOSX (in MachO binary format) +-*/ ++ * Versioned symbols cannot be used in some cases because it ++ * - confuse the dynamic linker in uClibc ++ * - not supported on MacOSX (in MachO binary format) ++ */ + #if (!defined(__UCLIBC__) && !defined(__APPLE__)) + #define FUSE_SYMVER(x) __asm__(x) + #else +@@ -25,11 +25,11 @@ + /* Is this hack still needed? */ + static inline void fuse_mutex_init(pthread_mutex_t *mut) + { +- pthread_mutexattr_t attr; +- pthread_mutexattr_init(&attr); +- pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); +- pthread_mutex_init(mut, &attr); +- pthread_mutexattr_destroy(&attr); ++ pthread_mutexattr_t attr; ++ pthread_mutexattr_init(&attr); ++ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); ++ pthread_mutex_init(mut, &attr); ++ pthread_mutexattr_destroy(&attr); + } + #endif + +diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c +index 93066b9..edd36f4 100644 +--- a/tools/virtiofsd/fuse_opt.c ++++ b/tools/virtiofsd/fuse_opt.c +@@ -1,423 +1,450 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- Implementation of option parsing routines (dealing with `struct +- fuse_args`). +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * Implementation of option parsing routines (dealing with `struct ++ * fuse_args`). ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ + ++#include "fuse_opt.h" + #include "config.h" + #include "fuse_i.h" +-#include "fuse_opt.h" + #include "fuse_misc.h" + ++#include + #include + #include + #include +-#include + + struct fuse_opt_context { +- void *data; +- const struct fuse_opt *opt; +- fuse_opt_proc_t proc; +- int argctr; +- int argc; +- char **argv; +- struct fuse_args outargs; +- char *opts; +- int nonopt; ++ void *data; ++ const struct fuse_opt *opt; ++ fuse_opt_proc_t proc; ++ int argctr; ++ int argc; ++ char **argv; ++ struct fuse_args outargs; ++ char *opts; ++ int nonopt; + }; + + void fuse_opt_free_args(struct fuse_args *args) + { +- if (args) { +- if (args->argv && args->allocated) { +- int i; +- for (i = 0; i < args->argc; i++) +- free(args->argv[i]); +- free(args->argv); +- } +- args->argc = 0; +- args->argv = NULL; +- args->allocated = 0; +- } ++ if (args) { ++ if (args->argv && args->allocated) { ++ int i; ++ for (i = 0; i < args->argc; i++) { ++ free(args->argv[i]); ++ } ++ free(args->argv); ++ } ++ args->argc = 0; ++ args->argv = NULL; ++ args->allocated = 0; ++ } + } + + static int alloc_failed(void) + { +- fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); +- return -1; ++ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); ++ return -1; + } + + int fuse_opt_add_arg(struct fuse_args *args, const char *arg) + { +- char **newargv; +- char *newarg; +- +- assert(!args->argv || args->allocated); +- +- newarg = strdup(arg); +- if (!newarg) +- return alloc_failed(); +- +- newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *)); +- if (!newargv) { +- free(newarg); +- return alloc_failed(); +- } +- +- args->argv = newargv; +- args->allocated = 1; +- args->argv[args->argc++] = newarg; +- args->argv[args->argc] = NULL; +- return 0; ++ char **newargv; ++ char *newarg; ++ ++ assert(!args->argv || args->allocated); ++ ++ newarg = strdup(arg); ++ if (!newarg) { ++ return alloc_failed(); ++ } ++ ++ newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *)); ++ if (!newargv) { ++ free(newarg); ++ return alloc_failed(); ++ } ++ ++ args->argv = newargv; ++ args->allocated = 1; ++ args->argv[args->argc++] = newarg; ++ args->argv[args->argc] = NULL; ++ return 0; + } + + static int fuse_opt_insert_arg_common(struct fuse_args *args, int pos, +- const char *arg) ++ const char *arg) + { +- assert(pos <= args->argc); +- if (fuse_opt_add_arg(args, arg) == -1) +- return -1; +- +- if (pos != args->argc - 1) { +- char *newarg = args->argv[args->argc - 1]; +- memmove(&args->argv[pos + 1], &args->argv[pos], +- sizeof(char *) * (args->argc - pos - 1)); +- args->argv[pos] = newarg; +- } +- return 0; ++ assert(pos <= args->argc); ++ if (fuse_opt_add_arg(args, arg) == -1) { ++ return -1; ++ } ++ ++ if (pos != args->argc - 1) { ++ char *newarg = args->argv[args->argc - 1]; ++ memmove(&args->argv[pos + 1], &args->argv[pos], ++ sizeof(char *) * (args->argc - pos - 1)); ++ args->argv[pos] = newarg; ++ } ++ return 0; + } + + int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg) + { +- return fuse_opt_insert_arg_common(args, pos, arg); ++ return fuse_opt_insert_arg_common(args, pos, arg); + } + + static int next_arg(struct fuse_opt_context *ctx, const char *opt) + { +- if (ctx->argctr + 1 >= ctx->argc) { +- fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt); +- return -1; +- } +- ctx->argctr++; +- return 0; ++ if (ctx->argctr + 1 >= ctx->argc) { ++ fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt); ++ return -1; ++ } ++ ctx->argctr++; ++ return 0; + } + + static int add_arg(struct fuse_opt_context *ctx, const char *arg) + { +- return fuse_opt_add_arg(&ctx->outargs, arg); ++ return fuse_opt_add_arg(&ctx->outargs, arg); + } + + static int add_opt_common(char **opts, const char *opt, int esc) + { +- unsigned oldlen = *opts ? strlen(*opts) : 0; +- char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1); +- +- if (!d) +- return alloc_failed(); +- +- *opts = d; +- if (oldlen) { +- d += oldlen; +- *d++ = ','; +- } +- +- for (; *opt; opt++) { +- if (esc && (*opt == ',' || *opt == '\\')) +- *d++ = '\\'; +- *d++ = *opt; +- } +- *d = '\0'; +- +- return 0; ++ unsigned oldlen = *opts ? strlen(*opts) : 0; ++ char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1); ++ ++ if (!d) { ++ return alloc_failed(); ++ } ++ ++ *opts = d; ++ if (oldlen) { ++ d += oldlen; ++ *d++ = ','; ++ } ++ ++ for (; *opt; opt++) { ++ if (esc && (*opt == ',' || *opt == '\\')) { ++ *d++ = '\\'; ++ } ++ *d++ = *opt; ++ } ++ *d = '\0'; ++ ++ return 0; + } + + int fuse_opt_add_opt(char **opts, const char *opt) + { +- return add_opt_common(opts, opt, 0); ++ return add_opt_common(opts, opt, 0); + } + + int fuse_opt_add_opt_escaped(char **opts, const char *opt) + { +- return add_opt_common(opts, opt, 1); ++ return add_opt_common(opts, opt, 1); + } + + static int add_opt(struct fuse_opt_context *ctx, const char *opt) + { +- return add_opt_common(&ctx->opts, opt, 1); ++ return add_opt_common(&ctx->opts, opt, 1); + } + + static int call_proc(struct fuse_opt_context *ctx, const char *arg, int key, +- int iso) ++ int iso) + { +- if (key == FUSE_OPT_KEY_DISCARD) +- return 0; +- +- if (key != FUSE_OPT_KEY_KEEP && ctx->proc) { +- int res = ctx->proc(ctx->data, arg, key, &ctx->outargs); +- if (res == -1 || !res) +- return res; +- } +- if (iso) +- return add_opt(ctx, arg); +- else +- return add_arg(ctx, arg); ++ if (key == FUSE_OPT_KEY_DISCARD) { ++ return 0; ++ } ++ ++ if (key != FUSE_OPT_KEY_KEEP && ctx->proc) { ++ int res = ctx->proc(ctx->data, arg, key, &ctx->outargs); ++ if (res == -1 || !res) { ++ return res; ++ } ++ } ++ if (iso) { ++ return add_opt(ctx, arg); ++ } else { ++ return add_arg(ctx, arg); ++ } + } + + static int match_template(const char *t, const char *arg, unsigned *sepp) + { +- int arglen = strlen(arg); +- const char *sep = strchr(t, '='); +- sep = sep ? sep : strchr(t, ' '); +- if (sep && (!sep[1] || sep[1] == '%')) { +- int tlen = sep - t; +- if (sep[0] == '=') +- tlen ++; +- if (arglen >= tlen && strncmp(arg, t, tlen) == 0) { +- *sepp = sep - t; +- return 1; +- } +- } +- if (strcmp(t, arg) == 0) { +- *sepp = 0; +- return 1; +- } +- return 0; ++ int arglen = strlen(arg); ++ const char *sep = strchr(t, '='); ++ sep = sep ? sep : strchr(t, ' '); ++ if (sep && (!sep[1] || sep[1] == '%')) { ++ int tlen = sep - t; ++ if (sep[0] == '=') { ++ tlen++; ++ } ++ if (arglen >= tlen && strncmp(arg, t, tlen) == 0) { ++ *sepp = sep - t; ++ return 1; ++ } ++ } ++ if (strcmp(t, arg) == 0) { ++ *sepp = 0; ++ return 1; ++ } ++ return 0; + } + + static const struct fuse_opt *find_opt(const struct fuse_opt *opt, +- const char *arg, unsigned *sepp) ++ const char *arg, unsigned *sepp) + { +- for (; opt && opt->templ; opt++) +- if (match_template(opt->templ, arg, sepp)) +- return opt; +- return NULL; ++ for (; opt && opt->templ; opt++) { ++ if (match_template(opt->templ, arg, sepp)) { ++ return opt; ++ } ++ } ++ return NULL; + } + + int fuse_opt_match(const struct fuse_opt *opts, const char *opt) + { +- unsigned dummy; +- return find_opt(opts, opt, &dummy) ? 1 : 0; ++ unsigned dummy; ++ return find_opt(opts, opt, &dummy) ? 1 : 0; + } + + static int process_opt_param(void *var, const char *format, const char *param, +- const char *arg) ++ const char *arg) + { +- assert(format[0] == '%'); +- if (format[1] == 's') { +- char **s = var; +- char *copy = strdup(param); +- if (!copy) +- return alloc_failed(); +- +- free(*s); +- *s = copy; +- } else { +- if (sscanf(param, format, var) != 1) { +- fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n", arg); +- return -1; +- } +- } +- return 0; ++ assert(format[0] == '%'); ++ if (format[1] == 's') { ++ char **s = var; ++ char *copy = strdup(param); ++ if (!copy) { ++ return alloc_failed(); ++ } ++ ++ free(*s); ++ *s = copy; ++ } else { ++ if (sscanf(param, format, var) != 1) { ++ fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n", ++ arg); ++ return -1; ++ } ++ } ++ return 0; + } + +-static int process_opt(struct fuse_opt_context *ctx, +- const struct fuse_opt *opt, unsigned sep, +- const char *arg, int iso) ++static int process_opt(struct fuse_opt_context *ctx, const struct fuse_opt *opt, ++ unsigned sep, const char *arg, int iso) + { +- if (opt->offset == -1U) { +- if (call_proc(ctx, arg, opt->value, iso) == -1) +- return -1; +- } else { +- void *var = (char *)ctx->data + opt->offset; +- if (sep && opt->templ[sep + 1]) { +- const char *param = arg + sep; +- if (opt->templ[sep] == '=') +- param ++; +- if (process_opt_param(var, opt->templ + sep + 1, +- param, arg) == -1) +- return -1; +- } else +- *(int *)var = opt->value; +- } +- return 0; ++ if (opt->offset == -1U) { ++ if (call_proc(ctx, arg, opt->value, iso) == -1) { ++ return -1; ++ } ++ } else { ++ void *var = (char *)ctx->data + opt->offset; ++ if (sep && opt->templ[sep + 1]) { ++ const char *param = arg + sep; ++ if (opt->templ[sep] == '=') { ++ param++; ++ } ++ if (process_opt_param(var, opt->templ + sep + 1, param, arg) == ++ -1) { ++ return -1; ++ } ++ } else { ++ *(int *)var = opt->value; ++ } ++ } ++ return 0; + } + + static int process_opt_sep_arg(struct fuse_opt_context *ctx, +- const struct fuse_opt *opt, unsigned sep, +- const char *arg, int iso) ++ const struct fuse_opt *opt, unsigned sep, ++ const char *arg, int iso) + { +- int res; +- char *newarg; +- char *param; +- +- if (next_arg(ctx, arg) == -1) +- return -1; +- +- param = ctx->argv[ctx->argctr]; +- newarg = malloc(sep + strlen(param) + 1); +- if (!newarg) +- return alloc_failed(); +- +- memcpy(newarg, arg, sep); +- strcpy(newarg + sep, param); +- res = process_opt(ctx, opt, sep, newarg, iso); +- free(newarg); +- +- return res; ++ int res; ++ char *newarg; ++ char *param; ++ ++ if (next_arg(ctx, arg) == -1) { ++ return -1; ++ } ++ ++ param = ctx->argv[ctx->argctr]; ++ newarg = malloc(sep + strlen(param) + 1); ++ if (!newarg) { ++ return alloc_failed(); ++ } ++ ++ memcpy(newarg, arg, sep); ++ strcpy(newarg + sep, param); ++ res = process_opt(ctx, opt, sep, newarg, iso); ++ free(newarg); ++ ++ return res; + } + + static int process_gopt(struct fuse_opt_context *ctx, const char *arg, int iso) + { +- unsigned sep; +- const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep); +- if (opt) { +- for (; opt; opt = find_opt(opt + 1, arg, &sep)) { +- int res; +- if (sep && opt->templ[sep] == ' ' && !arg[sep]) +- res = process_opt_sep_arg(ctx, opt, sep, arg, +- iso); +- else +- res = process_opt(ctx, opt, sep, arg, iso); +- if (res == -1) +- return -1; +- } +- return 0; +- } else +- return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso); ++ unsigned sep; ++ const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep); ++ if (opt) { ++ for (; opt; opt = find_opt(opt + 1, arg, &sep)) { ++ int res; ++ if (sep && opt->templ[sep] == ' ' && !arg[sep]) { ++ res = process_opt_sep_arg(ctx, opt, sep, arg, iso); ++ } else { ++ res = process_opt(ctx, opt, sep, arg, iso); ++ } ++ if (res == -1) { ++ return -1; ++ } ++ } ++ return 0; ++ } else { ++ return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso); ++ } + } + + static int process_real_option_group(struct fuse_opt_context *ctx, char *opts) + { +- char *s = opts; +- char *d = s; +- int end = 0; +- +- while (!end) { +- if (*s == '\0') +- end = 1; +- if (*s == ',' || end) { +- int res; +- +- *d = '\0'; +- res = process_gopt(ctx, opts, 1); +- if (res == -1) +- return -1; +- d = opts; +- } else { +- if (s[0] == '\\' && s[1] != '\0') { +- s++; +- if (s[0] >= '0' && s[0] <= '3' && +- s[1] >= '0' && s[1] <= '7' && +- s[2] >= '0' && s[2] <= '7') { +- *d++ = (s[0] - '0') * 0100 + +- (s[1] - '0') * 0010 + +- (s[2] - '0'); +- s += 2; +- } else { +- *d++ = *s; +- } +- } else { +- *d++ = *s; +- } +- } +- s++; +- } +- +- return 0; ++ char *s = opts; ++ char *d = s; ++ int end = 0; ++ ++ while (!end) { ++ if (*s == '\0') { ++ end = 1; ++ } ++ if (*s == ',' || end) { ++ int res; ++ ++ *d = '\0'; ++ res = process_gopt(ctx, opts, 1); ++ if (res == -1) { ++ return -1; ++ } ++ d = opts; ++ } else { ++ if (s[0] == '\\' && s[1] != '\0') { ++ s++; ++ if (s[0] >= '0' && s[0] <= '3' && s[1] >= '0' && s[1] <= '7' && ++ s[2] >= '0' && s[2] <= '7') { ++ *d++ = (s[0] - '0') * 0100 + (s[1] - '0') * 0010 + ++ (s[2] - '0'); ++ s += 2; ++ } else { ++ *d++ = *s; ++ } ++ } else { ++ *d++ = *s; ++ } ++ } ++ s++; ++ } ++ ++ return 0; + } + + static int process_option_group(struct fuse_opt_context *ctx, const char *opts) + { +- int res; +- char *copy = strdup(opts); +- +- if (!copy) { +- fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); +- return -1; +- } +- res = process_real_option_group(ctx, copy); +- free(copy); +- return res; ++ int res; ++ char *copy = strdup(opts); ++ ++ if (!copy) { ++ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); ++ return -1; ++ } ++ res = process_real_option_group(ctx, copy); ++ free(copy); ++ return res; + } + + static int process_one(struct fuse_opt_context *ctx, const char *arg) + { +- if (ctx->nonopt || arg[0] != '-') +- return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0); +- else if (arg[1] == 'o') { +- if (arg[2]) +- return process_option_group(ctx, arg + 2); +- else { +- if (next_arg(ctx, arg) == -1) +- return -1; +- +- return process_option_group(ctx, +- ctx->argv[ctx->argctr]); +- } +- } else if (arg[1] == '-' && !arg[2]) { +- if (add_arg(ctx, arg) == -1) +- return -1; +- ctx->nonopt = ctx->outargs.argc; +- return 0; +- } else +- return process_gopt(ctx, arg, 0); ++ if (ctx->nonopt || arg[0] != '-') { ++ return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0); ++ } else if (arg[1] == 'o') { ++ if (arg[2]) { ++ return process_option_group(ctx, arg + 2); ++ } else { ++ if (next_arg(ctx, arg) == -1) { ++ return -1; ++ } ++ ++ return process_option_group(ctx, ctx->argv[ctx->argctr]); ++ } ++ } else if (arg[1] == '-' && !arg[2]) { ++ if (add_arg(ctx, arg) == -1) { ++ return -1; ++ } ++ ctx->nonopt = ctx->outargs.argc; ++ return 0; ++ } else { ++ return process_gopt(ctx, arg, 0); ++ } + } + + static int opt_parse(struct fuse_opt_context *ctx) + { +- if (ctx->argc) { +- if (add_arg(ctx, ctx->argv[0]) == -1) +- return -1; +- } +- +- for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) +- if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) +- return -1; +- +- if (ctx->opts) { +- if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 || +- fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) +- return -1; +- } +- +- /* If option separator ("--") is the last argument, remove it */ +- if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc && +- strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) { +- free(ctx->outargs.argv[ctx->outargs.argc - 1]); +- ctx->outargs.argv[--ctx->outargs.argc] = NULL; +- } +- +- return 0; ++ if (ctx->argc) { ++ if (add_arg(ctx, ctx->argv[0]) == -1) { ++ return -1; ++ } ++ } ++ ++ for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) { ++ if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) { ++ return -1; ++ } ++ } ++ ++ if (ctx->opts) { ++ if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 || ++ fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) { ++ return -1; ++ } ++ } ++ ++ /* If option separator ("--") is the last argument, remove it */ ++ if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc && ++ strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) { ++ free(ctx->outargs.argv[ctx->outargs.argc - 1]); ++ ctx->outargs.argv[--ctx->outargs.argc] = NULL; ++ } ++ ++ return 0; + } + + int fuse_opt_parse(struct fuse_args *args, void *data, +- const struct fuse_opt opts[], fuse_opt_proc_t proc) ++ const struct fuse_opt opts[], fuse_opt_proc_t proc) + { +- int res; +- struct fuse_opt_context ctx = { +- .data = data, +- .opt = opts, +- .proc = proc, +- }; +- +- if (!args || !args->argv || !args->argc) +- return 0; +- +- ctx.argc = args->argc; +- ctx.argv = args->argv; +- +- res = opt_parse(&ctx); +- if (res != -1) { +- struct fuse_args tmp = *args; +- *args = ctx.outargs; +- ctx.outargs = tmp; +- } +- free(ctx.opts); +- fuse_opt_free_args(&ctx.outargs); +- return res; ++ int res; ++ struct fuse_opt_context ctx = { ++ .data = data, ++ .opt = opts, ++ .proc = proc, ++ }; ++ ++ if (!args || !args->argv || !args->argc) { ++ return 0; ++ } ++ ++ ctx.argc = args->argc; ++ ctx.argv = args->argv; ++ ++ res = opt_parse(&ctx); ++ if (res != -1) { ++ struct fuse_args tmp = *args; ++ *args = ctx.outargs; ++ ctx.outargs = tmp; ++ } ++ free(ctx.opts); ++ fuse_opt_free_args(&ctx.outargs); ++ return res; + } +diff --git a/tools/virtiofsd/fuse_opt.h b/tools/virtiofsd/fuse_opt.h +index 6910255..8f59b4d 100644 +--- a/tools/virtiofsd/fuse_opt.h ++++ b/tools/virtiofsd/fuse_opt.h +@@ -1,10 +1,10 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB. +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB. ++ */ + + #ifndef FUSE_OPT_H_ + #define FUSE_OPT_H_ +@@ -37,7 +37,7 @@ + * + * - 'offsetof(struct foo, member)' actions i) and iii) + * +- * - -1 action ii) ++ * - -1 action ii) + * + * The 'offsetof()' macro is defined in the header. + * +@@ -48,7 +48,7 @@ + * + * The types of templates are: + * +- * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only ++ * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only + * themselves. Invalid values are "--" and anything beginning + * with "-o" + * +@@ -71,58 +71,67 @@ + * freed. + */ + struct fuse_opt { +- /** Matching template and optional parameter formatting */ +- const char *templ; ++ /** Matching template and optional parameter formatting */ ++ const char *templ; + +- /** +- * Offset of variable within 'data' parameter of fuse_opt_parse() +- * or -1 +- */ +- unsigned long offset; ++ /** ++ * Offset of variable within 'data' parameter of fuse_opt_parse() ++ * or -1 ++ */ ++ unsigned long offset; + +- /** +- * Value to set the variable to, or to be passed as 'key' to the +- * processing function. Ignored if template has a format +- */ +- int value; ++ /** ++ * Value to set the variable to, or to be passed as 'key' to the ++ * processing function. Ignored if template has a format ++ */ ++ int value; + }; + + /** +- * Key option. In case of a match, the processing function will be ++ * Key option. In case of a match, the processing function will be + * called with the specified key. + */ +-#define FUSE_OPT_KEY(templ, key) { templ, -1U, key } ++#define FUSE_OPT_KEY(templ, key) \ ++ { \ ++ templ, -1U, key \ ++ } + + /** +- * Last option. An array of 'struct fuse_opt' must end with a NULL ++ * Last option. An array of 'struct fuse_opt' must end with a NULL + * template value + */ +-#define FUSE_OPT_END { NULL, 0, 0 } ++#define FUSE_OPT_END \ ++ { \ ++ NULL, 0, 0 \ ++ } + + /** + * Argument list + */ + struct fuse_args { +- /** Argument count */ +- int argc; ++ /** Argument count */ ++ int argc; + +- /** Argument vector. NULL terminated */ +- char **argv; ++ /** Argument vector. NULL terminated */ ++ char **argv; + +- /** Is 'argv' allocated? */ +- int allocated; ++ /** Is 'argv' allocated? */ ++ int allocated; + }; + + /** + * Initializer for 'struct fuse_args' + */ +-#define FUSE_ARGS_INIT(argc, argv) { argc, argv, 0 } ++#define FUSE_ARGS_INIT(argc, argv) \ ++ { \ ++ argc, argv, 0 \ ++ } + + /** + * Key value passed to the processing function if an option did not + * match any template + */ +-#define FUSE_OPT_KEY_OPT -1 ++#define FUSE_OPT_KEY_OPT -1 + + /** + * Key value passed to the processing function for all non-options +@@ -130,7 +139,7 @@ struct fuse_args { + * Non-options are the arguments beginning with a character other than + * '-' or all arguments after the special '--' option + */ +-#define FUSE_OPT_KEY_NONOPT -2 ++#define FUSE_OPT_KEY_NONOPT -2 + + /** + * Special key value for options to keep +@@ -174,7 +183,7 @@ struct fuse_args { + * @return -1 on error, 0 if arg is to be discarded, 1 if arg should be kept + */ + typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key, +- struct fuse_args *outargs); ++ struct fuse_args *outargs); + + /** + * Option parsing function +@@ -197,7 +206,7 @@ typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key, + * @return -1 on error, 0 on success + */ + int fuse_opt_parse(struct fuse_args *args, void *data, +- const struct fuse_opt opts[], fuse_opt_proc_t proc); ++ const struct fuse_opt opts[], fuse_opt_proc_t proc); + + /** + * Add an option to a comma separated option list +diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c +index 4271947..19d6791 100644 +--- a/tools/virtiofsd/fuse_signals.c ++++ b/tools/virtiofsd/fuse_signals.c +@@ -1,91 +1,95 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- Utility functions for setting signal handlers. +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * Utility functions for setting signal handlers. ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ + + #include "config.h" +-#include "fuse_lowlevel.h" + #include "fuse_i.h" ++#include "fuse_lowlevel.h" + +-#include +-#include + #include ++#include + #include ++#include + + static struct fuse_session *fuse_instance; + + static void exit_handler(int sig) + { +- if (fuse_instance) { +- fuse_session_exit(fuse_instance); +- if(sig <= 0) { +- fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n"); +- abort(); +- } +- fuse_instance->error = sig; +- } ++ if (fuse_instance) { ++ fuse_session_exit(fuse_instance); ++ if (sig <= 0) { ++ fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n"); ++ abort(); ++ } ++ fuse_instance->error = sig; ++ } + } + + static void do_nothing(int sig) + { +- (void) sig; ++ (void)sig; + } + + static int set_one_signal_handler(int sig, void (*handler)(int), int remove) + { +- struct sigaction sa; +- struct sigaction old_sa; ++ struct sigaction sa; ++ struct sigaction old_sa; + +- memset(&sa, 0, sizeof(struct sigaction)); +- sa.sa_handler = remove ? SIG_DFL : handler; +- sigemptyset(&(sa.sa_mask)); +- sa.sa_flags = 0; ++ memset(&sa, 0, sizeof(struct sigaction)); ++ sa.sa_handler = remove ? SIG_DFL : handler; ++ sigemptyset(&(sa.sa_mask)); ++ sa.sa_flags = 0; + +- if (sigaction(sig, NULL, &old_sa) == -1) { +- perror("fuse: cannot get old signal handler"); +- return -1; +- } ++ if (sigaction(sig, NULL, &old_sa) == -1) { ++ perror("fuse: cannot get old signal handler"); ++ return -1; ++ } + +- if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && +- sigaction(sig, &sa, NULL) == -1) { +- perror("fuse: cannot set signal handler"); +- return -1; +- } +- return 0; ++ if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && ++ sigaction(sig, &sa, NULL) == -1) { ++ perror("fuse: cannot set signal handler"); ++ return -1; ++ } ++ return 0; + } + + int fuse_set_signal_handlers(struct fuse_session *se) + { +- /* If we used SIG_IGN instead of the do_nothing function, +- then we would be unable to tell if we set SIG_IGN (and +- thus should reset to SIG_DFL in fuse_remove_signal_handlers) +- or if it was already set to SIG_IGN (and should be left +- untouched. */ +- if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 || +- set_one_signal_handler(SIGINT, exit_handler, 0) == -1 || +- set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 || +- set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) +- return -1; ++ /* ++ * If we used SIG_IGN instead of the do_nothing function, ++ * then we would be unable to tell if we set SIG_IGN (and ++ * thus should reset to SIG_DFL in fuse_remove_signal_handlers) ++ * or if it was already set to SIG_IGN (and should be left ++ * untouched. ++ */ ++ if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 || ++ set_one_signal_handler(SIGINT, exit_handler, 0) == -1 || ++ set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 || ++ set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) { ++ return -1; ++ } + +- fuse_instance = se; +- return 0; ++ fuse_instance = se; ++ return 0; + } + + void fuse_remove_signal_handlers(struct fuse_session *se) + { +- if (fuse_instance != se) +- fuse_log(FUSE_LOG_ERR, +- "fuse: fuse_remove_signal_handlers: unknown session\n"); +- else +- fuse_instance = NULL; ++ if (fuse_instance != se) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: fuse_remove_signal_handlers: unknown session\n"); ++ } else { ++ fuse_instance = NULL; ++ } + +- set_one_signal_handler(SIGHUP, exit_handler, 1); +- set_one_signal_handler(SIGINT, exit_handler, 1); +- set_one_signal_handler(SIGTERM, exit_handler, 1); +- set_one_signal_handler(SIGPIPE, do_nothing, 1); ++ set_one_signal_handler(SIGHUP, exit_handler, 1); ++ set_one_signal_handler(SIGINT, exit_handler, 1); ++ set_one_signal_handler(SIGTERM, exit_handler, 1); ++ set_one_signal_handler(SIGPIPE, do_nothing, 1); + } +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 5a2e64c..5711dd2 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -1,297 +1,309 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * Helper functions to create (simple) standalone programs. With the ++ * aid of these functions it should be possible to create full FUSE ++ * file system by implementing nothing but the request handlers. + +- Helper functions to create (simple) standalone programs. With the +- aid of these functions it should be possible to create full FUSE +- file system by implementing nothing but the request handlers. +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB. +-*/ ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB. ++ */ + + #include "config.h" + #include "fuse_i.h" ++#include "fuse_lowlevel.h" + #include "fuse_misc.h" + #include "fuse_opt.h" +-#include "fuse_lowlevel.h" + #include "mount_util.h" + ++#include ++#include ++#include + #include + #include +-#include +-#include + #include +-#include +-#include + #include ++#include + +-#define FUSE_HELPER_OPT(t, p) \ +- { t, offsetof(struct fuse_cmdline_opts, p), 1 } ++#define FUSE_HELPER_OPT(t, p) \ ++ { \ ++ t, offsetof(struct fuse_cmdline_opts, p), 1 \ ++ } + + static const struct fuse_opt fuse_helper_opts[] = { +- FUSE_HELPER_OPT("-h", show_help), +- FUSE_HELPER_OPT("--help", show_help), +- FUSE_HELPER_OPT("-V", show_version), +- FUSE_HELPER_OPT("--version", show_version), +- FUSE_HELPER_OPT("-d", debug), +- FUSE_HELPER_OPT("debug", debug), +- FUSE_HELPER_OPT("-d", foreground), +- FUSE_HELPER_OPT("debug", foreground), +- FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), +- FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), +- FUSE_HELPER_OPT("-f", foreground), +- FUSE_HELPER_OPT("fsname=", nodefault_subtype), +- FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), +- FUSE_HELPER_OPT("subtype=", nodefault_subtype), +- FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), +- FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), +- FUSE_OPT_END ++ FUSE_HELPER_OPT("-h", show_help), ++ FUSE_HELPER_OPT("--help", show_help), ++ FUSE_HELPER_OPT("-V", show_version), ++ FUSE_HELPER_OPT("--version", show_version), ++ FUSE_HELPER_OPT("-d", debug), ++ FUSE_HELPER_OPT("debug", debug), ++ FUSE_HELPER_OPT("-d", foreground), ++ FUSE_HELPER_OPT("debug", foreground), ++ FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), ++ FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), ++ FUSE_HELPER_OPT("-f", foreground), ++ FUSE_HELPER_OPT("fsname=", nodefault_subtype), ++ FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), ++ FUSE_HELPER_OPT("subtype=", nodefault_subtype), ++ FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), ++ FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), ++ FUSE_OPT_END + }; + + struct fuse_conn_info_opts { +- int atomic_o_trunc; +- int no_remote_posix_lock; +- int no_remote_flock; +- int splice_write; +- int splice_move; +- int splice_read; +- int no_splice_write; +- int no_splice_move; +- int no_splice_read; +- int auto_inval_data; +- int no_auto_inval_data; +- int no_readdirplus; +- int no_readdirplus_auto; +- int async_dio; +- int no_async_dio; +- int writeback_cache; +- int no_writeback_cache; +- int async_read; +- int sync_read; +- unsigned max_write; +- unsigned max_readahead; +- unsigned max_background; +- unsigned congestion_threshold; +- unsigned time_gran; +- int set_max_write; +- int set_max_readahead; +- int set_max_background; +- int set_congestion_threshold; +- int set_time_gran; ++ int atomic_o_trunc; ++ int no_remote_posix_lock; ++ int no_remote_flock; ++ int splice_write; ++ int splice_move; ++ int splice_read; ++ int no_splice_write; ++ int no_splice_move; ++ int no_splice_read; ++ int auto_inval_data; ++ int no_auto_inval_data; ++ int no_readdirplus; ++ int no_readdirplus_auto; ++ int async_dio; ++ int no_async_dio; ++ int writeback_cache; ++ int no_writeback_cache; ++ int async_read; ++ int sync_read; ++ unsigned max_write; ++ unsigned max_readahead; ++ unsigned max_background; ++ unsigned congestion_threshold; ++ unsigned time_gran; ++ int set_max_write; ++ int set_max_readahead; ++ int set_max_background; ++ int set_congestion_threshold; ++ int set_time_gran; + }; + +-#define CONN_OPTION(t, p, v) \ +- { t, offsetof(struct fuse_conn_info_opts, p), v } ++#define CONN_OPTION(t, p, v) \ ++ { \ ++ t, offsetof(struct fuse_conn_info_opts, p), v \ ++ } + static const struct fuse_opt conn_info_opt_spec[] = { +- CONN_OPTION("max_write=%u", max_write, 0), +- CONN_OPTION("max_write=", set_max_write, 1), +- CONN_OPTION("max_readahead=%u", max_readahead, 0), +- CONN_OPTION("max_readahead=", set_max_readahead, 1), +- CONN_OPTION("max_background=%u", max_background, 0), +- CONN_OPTION("max_background=", set_max_background, 1), +- CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0), +- CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1), +- CONN_OPTION("sync_read", sync_read, 1), +- CONN_OPTION("async_read", async_read, 1), +- CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1), +- CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1), +- CONN_OPTION("no_remote_lock", no_remote_flock, 1), +- CONN_OPTION("no_remote_flock", no_remote_flock, 1), +- CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1), +- CONN_OPTION("splice_write", splice_write, 1), +- CONN_OPTION("no_splice_write", no_splice_write, 1), +- CONN_OPTION("splice_move", splice_move, 1), +- CONN_OPTION("no_splice_move", no_splice_move, 1), +- CONN_OPTION("splice_read", splice_read, 1), +- CONN_OPTION("no_splice_read", no_splice_read, 1), +- CONN_OPTION("auto_inval_data", auto_inval_data, 1), +- CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1), +- CONN_OPTION("readdirplus=no", no_readdirplus, 1), +- CONN_OPTION("readdirplus=yes", no_readdirplus, 0), +- CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1), +- CONN_OPTION("readdirplus=auto", no_readdirplus, 0), +- CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0), +- CONN_OPTION("async_dio", async_dio, 1), +- CONN_OPTION("no_async_dio", no_async_dio, 1), +- CONN_OPTION("writeback_cache", writeback_cache, 1), +- CONN_OPTION("no_writeback_cache", no_writeback_cache, 1), +- CONN_OPTION("time_gran=%u", time_gran, 0), +- CONN_OPTION("time_gran=", set_time_gran, 1), +- FUSE_OPT_END ++ CONN_OPTION("max_write=%u", max_write, 0), ++ CONN_OPTION("max_write=", set_max_write, 1), ++ CONN_OPTION("max_readahead=%u", max_readahead, 0), ++ CONN_OPTION("max_readahead=", set_max_readahead, 1), ++ CONN_OPTION("max_background=%u", max_background, 0), ++ CONN_OPTION("max_background=", set_max_background, 1), ++ CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0), ++ CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1), ++ CONN_OPTION("sync_read", sync_read, 1), ++ CONN_OPTION("async_read", async_read, 1), ++ CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1), ++ CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1), ++ CONN_OPTION("no_remote_lock", no_remote_flock, 1), ++ CONN_OPTION("no_remote_flock", no_remote_flock, 1), ++ CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1), ++ CONN_OPTION("splice_write", splice_write, 1), ++ CONN_OPTION("no_splice_write", no_splice_write, 1), ++ CONN_OPTION("splice_move", splice_move, 1), ++ CONN_OPTION("no_splice_move", no_splice_move, 1), ++ CONN_OPTION("splice_read", splice_read, 1), ++ CONN_OPTION("no_splice_read", no_splice_read, 1), ++ CONN_OPTION("auto_inval_data", auto_inval_data, 1), ++ CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1), ++ CONN_OPTION("readdirplus=no", no_readdirplus, 1), ++ CONN_OPTION("readdirplus=yes", no_readdirplus, 0), ++ CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1), ++ CONN_OPTION("readdirplus=auto", no_readdirplus, 0), ++ CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0), ++ CONN_OPTION("async_dio", async_dio, 1), ++ CONN_OPTION("no_async_dio", no_async_dio, 1), ++ CONN_OPTION("writeback_cache", writeback_cache, 1), ++ CONN_OPTION("no_writeback_cache", no_writeback_cache, 1), ++ CONN_OPTION("time_gran=%u", time_gran, 0), ++ CONN_OPTION("time_gran=", set_time_gran, 1), ++ FUSE_OPT_END + }; + + + void fuse_cmdline_help(void) + { +- printf(" -h --help print help\n" +- " -V --version print version\n" +- " -d -o debug enable debug output (implies -f)\n" +- " -f foreground operation\n" +- " -o max_idle_threads the maximum number of idle worker threads\n" +- " allowed (default: 10)\n"); ++ printf( ++ " -h --help print help\n" ++ " -V --version print version\n" ++ " -d -o debug enable debug output (implies -f)\n" ++ " -f foreground operation\n" ++ " -o max_idle_threads the maximum number of idle worker threads\n" ++ " allowed (default: 10)\n"); + } + + static int fuse_helper_opt_proc(void *data, const char *arg, int key, +- struct fuse_args *outargs) ++ struct fuse_args *outargs) + { +- (void) outargs; +- struct fuse_cmdline_opts *opts = data; +- +- switch (key) { +- case FUSE_OPT_KEY_NONOPT: +- if (!opts->mountpoint) { +- if (fuse_mnt_parse_fuse_fd(arg) != -1) { +- return fuse_opt_add_opt(&opts->mountpoint, arg); +- } +- +- char mountpoint[PATH_MAX] = ""; +- if (realpath(arg, mountpoint) == NULL) { +- fuse_log(FUSE_LOG_ERR, +- "fuse: bad mount point `%s': %s\n", +- arg, strerror(errno)); +- return -1; +- } +- return fuse_opt_add_opt(&opts->mountpoint, mountpoint); +- } else { +- fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); +- return -1; +- } +- +- default: +- /* Pass through unknown options */ +- return 1; +- } ++ (void)outargs; ++ struct fuse_cmdline_opts *opts = data; ++ ++ switch (key) { ++ case FUSE_OPT_KEY_NONOPT: ++ if (!opts->mountpoint) { ++ if (fuse_mnt_parse_fuse_fd(arg) != -1) { ++ return fuse_opt_add_opt(&opts->mountpoint, arg); ++ } ++ ++ char mountpoint[PATH_MAX] = ""; ++ if (realpath(arg, mountpoint) == NULL) { ++ fuse_log(FUSE_LOG_ERR, "fuse: bad mount point `%s': %s\n", arg, ++ strerror(errno)); ++ return -1; ++ } ++ return fuse_opt_add_opt(&opts->mountpoint, mountpoint); ++ } else { ++ fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); ++ return -1; ++ } ++ ++ default: ++ /* Pass through unknown options */ ++ return 1; ++ } + } + +-int fuse_parse_cmdline(struct fuse_args *args, +- struct fuse_cmdline_opts *opts) ++int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts) + { +- memset(opts, 0, sizeof(struct fuse_cmdline_opts)); ++ memset(opts, 0, sizeof(struct fuse_cmdline_opts)); + +- opts->max_idle_threads = 10; ++ opts->max_idle_threads = 10; + +- if (fuse_opt_parse(args, opts, fuse_helper_opts, +- fuse_helper_opt_proc) == -1) +- return -1; ++ if (fuse_opt_parse(args, opts, fuse_helper_opts, fuse_helper_opt_proc) == ++ -1) { ++ return -1; ++ } + +- return 0; ++ return 0; + } + + + int fuse_daemonize(int foreground) + { +- if (!foreground) { +- int nullfd; +- int waiter[2]; +- char completed; +- +- if (pipe(waiter)) { +- perror("fuse_daemonize: pipe"); +- return -1; +- } +- +- /* +- * demonize current process by forking it and killing the +- * parent. This makes current process as a child of 'init'. +- */ +- switch(fork()) { +- case -1: +- perror("fuse_daemonize: fork"); +- return -1; +- case 0: +- break; +- default: +- (void) read(waiter[0], &completed, sizeof(completed)); +- _exit(0); +- } +- +- if (setsid() == -1) { +- perror("fuse_daemonize: setsid"); +- return -1; +- } +- +- (void) chdir("/"); +- +- nullfd = open("/dev/null", O_RDWR, 0); +- if (nullfd != -1) { +- (void) dup2(nullfd, 0); +- (void) dup2(nullfd, 1); +- (void) dup2(nullfd, 2); +- if (nullfd > 2) +- close(nullfd); +- } +- +- /* Propagate completion of daemon initialization */ +- completed = 1; +- (void) write(waiter[1], &completed, sizeof(completed)); +- close(waiter[0]); +- close(waiter[1]); +- } else { +- (void) chdir("/"); +- } +- return 0; ++ if (!foreground) { ++ int nullfd; ++ int waiter[2]; ++ char completed; ++ ++ if (pipe(waiter)) { ++ perror("fuse_daemonize: pipe"); ++ return -1; ++ } ++ ++ /* ++ * demonize current process by forking it and killing the ++ * parent. This makes current process as a child of 'init'. ++ */ ++ switch (fork()) { ++ case -1: ++ perror("fuse_daemonize: fork"); ++ return -1; ++ case 0: ++ break; ++ default: ++ (void)read(waiter[0], &completed, sizeof(completed)); ++ _exit(0); ++ } ++ ++ if (setsid() == -1) { ++ perror("fuse_daemonize: setsid"); ++ return -1; ++ } ++ ++ (void)chdir("/"); ++ ++ nullfd = open("/dev/null", O_RDWR, 0); ++ if (nullfd != -1) { ++ (void)dup2(nullfd, 0); ++ (void)dup2(nullfd, 1); ++ (void)dup2(nullfd, 2); ++ if (nullfd > 2) { ++ close(nullfd); ++ } ++ } ++ ++ /* Propagate completion of daemon initialization */ ++ completed = 1; ++ (void)write(waiter[1], &completed, sizeof(completed)); ++ close(waiter[0]); ++ close(waiter[1]); ++ } else { ++ (void)chdir("/"); ++ } ++ return 0; + } + + void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, +- struct fuse_conn_info *conn) ++ struct fuse_conn_info *conn) + { +- if(opts->set_max_write) +- conn->max_write = opts->max_write; +- if(opts->set_max_background) +- conn->max_background = opts->max_background; +- if(opts->set_congestion_threshold) +- conn->congestion_threshold = opts->congestion_threshold; +- if(opts->set_time_gran) +- conn->time_gran = opts->time_gran; +- if(opts->set_max_readahead) +- conn->max_readahead = opts->max_readahead; +- +-#define LL_ENABLE(cond,cap) \ +- if (cond) conn->want |= (cap) +-#define LL_DISABLE(cond,cap) \ +- if (cond) conn->want &= ~(cap) +- +- LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ); +- LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ); +- +- LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE); +- LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE); +- +- LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE); +- LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE); +- +- LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); +- LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); +- +- LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS); +- LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO); +- +- LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO); +- LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO); +- +- LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE); +- LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE); +- +- LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ); +- LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ); +- +- LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS); +- LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS); ++ if (opts->set_max_write) { ++ conn->max_write = opts->max_write; ++ } ++ if (opts->set_max_background) { ++ conn->max_background = opts->max_background; ++ } ++ if (opts->set_congestion_threshold) { ++ conn->congestion_threshold = opts->congestion_threshold; ++ } ++ if (opts->set_time_gran) { ++ conn->time_gran = opts->time_gran; ++ } ++ if (opts->set_max_readahead) { ++ conn->max_readahead = opts->max_readahead; ++ } ++ ++#define LL_ENABLE(cond, cap) \ ++ if (cond) \ ++ conn->want |= (cap) ++#define LL_DISABLE(cond, cap) \ ++ if (cond) \ ++ conn->want &= ~(cap) ++ ++ LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ); ++ LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ); ++ ++ LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE); ++ LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE); ++ ++ LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE); ++ LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE); ++ ++ LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); ++ LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); ++ ++ LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS); ++ LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO); ++ ++ LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO); ++ LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO); ++ ++ LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE); ++ LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE); ++ ++ LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ); ++ LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ); ++ ++ LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS); ++ LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS); + } + +-struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args) ++struct fuse_conn_info_opts *fuse_parse_conn_info_opts(struct fuse_args *args) + { +- struct fuse_conn_info_opts *opts; +- +- opts = calloc(1, sizeof(struct fuse_conn_info_opts)); +- if(opts == NULL) { +- fuse_log(FUSE_LOG_ERR, "calloc failed\n"); +- return NULL; +- } +- if(fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) { +- free(opts); +- return NULL; +- } +- return opts; ++ struct fuse_conn_info_opts *opts; ++ ++ opts = calloc(1, sizeof(struct fuse_conn_info_opts)); ++ if (opts == NULL) { ++ fuse_log(FUSE_LOG_ERR, "calloc failed\n"); ++ return NULL; ++ } ++ if (fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) { ++ free(opts); ++ return NULL; ++ } ++ return opts; + } +diff --git a/tools/virtiofsd/passthrough_helpers.h b/tools/virtiofsd/passthrough_helpers.h +index 7c5f561..0b98275 100644 +--- a/tools/virtiofsd/passthrough_helpers.h ++++ b/tools/virtiofsd/passthrough_helpers.h +@@ -28,23 +28,24 @@ + * operation + */ + static int mknod_wrapper(int dirfd, const char *path, const char *link, +- int mode, dev_t rdev) ++ int mode, dev_t rdev) + { +- int res; ++ int res; + +- if (S_ISREG(mode)) { +- res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode); +- if (res >= 0) +- res = close(res); +- } else if (S_ISDIR(mode)) { +- res = mkdirat(dirfd, path, mode); +- } else if (S_ISLNK(mode) && link != NULL) { +- res = symlinkat(link, dirfd, path); +- } else if (S_ISFIFO(mode)) { +- res = mkfifoat(dirfd, path, mode); +- } else { +- res = mknodat(dirfd, path, mode, rdev); +- } ++ if (S_ISREG(mode)) { ++ res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode); ++ if (res >= 0) { ++ res = close(res); ++ } ++ } else if (S_ISDIR(mode)) { ++ res = mkdirat(dirfd, path, mode); ++ } else if (S_ISLNK(mode) && link != NULL) { ++ res = symlinkat(link, dirfd, path); ++ } else if (S_ISFIFO(mode)) { ++ res = mkfifoat(dirfd, path, mode); ++ } else { ++ res = mknodat(dirfd, path, mode, rdev); ++ } + +- return res; ++ return res; + } +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e5f7115..c5850ef 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1,12 +1,12 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- This program can be distributed under the terms of the GNU GPLv2. +- See the file COPYING. +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * This program can be distributed under the terms of the GNU GPLv2. ++ * See the file COPYING. ++ */ + +-/** @file ++/* + * + * This file system mirrors the existing file system hierarchy of the + * system, starting at the root file system. This is implemented by +@@ -28,7 +28,8 @@ + * + * Compile with: + * +- * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o passthrough_ll ++ * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o ++ * passthrough_ll + * + * ## Source code ## + * \include passthrough_ll.c +@@ -39,1299 +40,1365 @@ + + #include "config.h" + +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include + #include ++#include + #include ++#include + #include ++#include + #include ++#include ++#include ++#include ++#include ++#include + #include + #include ++#include + + #include "passthrough_helpers.h" + +-/* We are re-using pointers to our `struct lo_inode` and `struct +- lo_dirp` elements as inodes. This means that we must be able to +- store uintptr_t values in a fuse_ino_t variable. The following +- incantation checks this condition at compile time. */ +-#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus ++/* ++ * We are re-using pointers to our `struct lo_inode` and `struct ++ * lo_dirp` elements as inodes. This means that we must be able to ++ * store uintptr_t values in a fuse_ino_t variable. The following ++ * incantation checks this condition at compile time. ++ */ ++#if defined(__GNUC__) && \ ++ (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && \ ++ !defined __cplusplus + _Static_assert(sizeof(fuse_ino_t) >= sizeof(uintptr_t), +- "fuse_ino_t too small to hold uintptr_t values!"); ++ "fuse_ino_t too small to hold uintptr_t values!"); + #else +-struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct \ +- { unsigned _uintptr_to_must_hold_fuse_ino_t: +- ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); }; ++struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { ++ unsigned _uintptr_to_must_hold_fuse_ino_t ++ : ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); ++}; + #endif + + struct lo_inode { +- struct lo_inode *next; /* protected by lo->mutex */ +- struct lo_inode *prev; /* protected by lo->mutex */ +- int fd; +- bool is_symlink; +- ino_t ino; +- dev_t dev; +- uint64_t refcount; /* protected by lo->mutex */ ++ struct lo_inode *next; /* protected by lo->mutex */ ++ struct lo_inode *prev; /* protected by lo->mutex */ ++ int fd; ++ bool is_symlink; ++ ino_t ino; ++ dev_t dev; ++ uint64_t refcount; /* protected by lo->mutex */ + }; + + enum { +- CACHE_NEVER, +- CACHE_NORMAL, +- CACHE_ALWAYS, ++ CACHE_NEVER, ++ CACHE_NORMAL, ++ CACHE_ALWAYS, + }; + + struct lo_data { +- pthread_mutex_t mutex; +- int debug; +- int writeback; +- int flock; +- int xattr; +- const char *source; +- double timeout; +- int cache; +- int timeout_set; +- struct lo_inode root; /* protected by lo->mutex */ ++ pthread_mutex_t mutex; ++ int debug; ++ int writeback; ++ int flock; ++ int xattr; ++ const char *source; ++ double timeout; ++ int cache; ++ int timeout_set; ++ struct lo_inode root; /* protected by lo->mutex */ + }; + + static const struct fuse_opt lo_opts[] = { +- { "writeback", +- offsetof(struct lo_data, writeback), 1 }, +- { "no_writeback", +- offsetof(struct lo_data, writeback), 0 }, +- { "source=%s", +- offsetof(struct lo_data, source), 0 }, +- { "flock", +- offsetof(struct lo_data, flock), 1 }, +- { "no_flock", +- offsetof(struct lo_data, flock), 0 }, +- { "xattr", +- offsetof(struct lo_data, xattr), 1 }, +- { "no_xattr", +- offsetof(struct lo_data, xattr), 0 }, +- { "timeout=%lf", +- offsetof(struct lo_data, timeout), 0 }, +- { "timeout=", +- offsetof(struct lo_data, timeout_set), 1 }, +- { "cache=never", +- offsetof(struct lo_data, cache), CACHE_NEVER }, +- { "cache=auto", +- offsetof(struct lo_data, cache), CACHE_NORMAL }, +- { "cache=always", +- offsetof(struct lo_data, cache), CACHE_ALWAYS }, +- +- FUSE_OPT_END ++ { "writeback", offsetof(struct lo_data, writeback), 1 }, ++ { "no_writeback", offsetof(struct lo_data, writeback), 0 }, ++ { "source=%s", offsetof(struct lo_data, source), 0 }, ++ { "flock", offsetof(struct lo_data, flock), 1 }, ++ { "no_flock", offsetof(struct lo_data, flock), 0 }, ++ { "xattr", offsetof(struct lo_data, xattr), 1 }, ++ { "no_xattr", offsetof(struct lo_data, xattr), 0 }, ++ { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, ++ { "timeout=", offsetof(struct lo_data, timeout_set), 1 }, ++ { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER }, ++ { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, ++ { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, ++ ++ FUSE_OPT_END + }; + + static struct lo_data *lo_data(fuse_req_t req) + { +- return (struct lo_data *) fuse_req_userdata(req); ++ return (struct lo_data *)fuse_req_userdata(req); + } + + static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) + { +- if (ino == FUSE_ROOT_ID) +- return &lo_data(req)->root; +- else +- return (struct lo_inode *) (uintptr_t) ino; ++ if (ino == FUSE_ROOT_ID) { ++ return &lo_data(req)->root; ++ } else { ++ return (struct lo_inode *)(uintptr_t)ino; ++ } + } + + static int lo_fd(fuse_req_t req, fuse_ino_t ino) + { +- return lo_inode(req, ino)->fd; ++ return lo_inode(req, ino)->fd; + } + + static bool lo_debug(fuse_req_t req) + { +- return lo_data(req)->debug != 0; ++ return lo_data(req)->debug != 0; + } + +-static void lo_init(void *userdata, +- struct fuse_conn_info *conn) ++static void lo_init(void *userdata, struct fuse_conn_info *conn) + { +- struct lo_data *lo = (struct lo_data*) userdata; +- +- if(conn->capable & FUSE_CAP_EXPORT_SUPPORT) +- conn->want |= FUSE_CAP_EXPORT_SUPPORT; +- +- if (lo->writeback && +- conn->capable & FUSE_CAP_WRITEBACK_CACHE) { +- if (lo->debug) +- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); +- conn->want |= FUSE_CAP_WRITEBACK_CACHE; +- } +- if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { +- if (lo->debug) +- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); +- conn->want |= FUSE_CAP_FLOCK_LOCKS; +- } ++ struct lo_data *lo = (struct lo_data *)userdata; ++ ++ if (conn->capable & FUSE_CAP_EXPORT_SUPPORT) { ++ conn->want |= FUSE_CAP_EXPORT_SUPPORT; ++ } ++ ++ if (lo->writeback && conn->capable & FUSE_CAP_WRITEBACK_CACHE) { ++ if (lo->debug) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); ++ } ++ conn->want |= FUSE_CAP_WRITEBACK_CACHE; ++ } ++ if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { ++ if (lo->debug) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); ++ } ++ conn->want |= FUSE_CAP_FLOCK_LOCKS; ++ } + } + + static void lo_getattr(fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi) ++ struct fuse_file_info *fi) + { +- int res; +- struct stat buf; +- struct lo_data *lo = lo_data(req); ++ int res; ++ struct stat buf; ++ struct lo_data *lo = lo_data(req); + +- (void) fi; ++ (void)fi; + +- res = fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); +- if (res == -1) +- return (void) fuse_reply_err(req, errno); ++ res = ++ fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) { ++ return (void)fuse_reply_err(req, errno); ++ } + +- fuse_reply_attr(req, &buf, lo->timeout); ++ fuse_reply_attr(req, &buf, lo->timeout); + } + + static int utimensat_empty_nofollow(struct lo_inode *inode, +- const struct timespec *tv) ++ const struct timespec *tv) + { +- int res; +- char procname[64]; +- +- if (inode->is_symlink) { +- res = utimensat(inode->fd, "", tv, +- AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); +- if (res == -1 && errno == EINVAL) { +- /* Sorry, no race free way to set times on symlink. */ +- errno = EPERM; +- } +- return res; +- } +- sprintf(procname, "/proc/self/fd/%i", inode->fd); +- +- return utimensat(AT_FDCWD, procname, tv, 0); ++ int res; ++ char procname[64]; ++ ++ if (inode->is_symlink) { ++ res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1 && errno == EINVAL) { ++ /* Sorry, no race free way to set times on symlink. */ ++ errno = EPERM; ++ } ++ return res; ++ } ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ return utimensat(AT_FDCWD, procname, tv, 0); + } + + static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, +- int valid, struct fuse_file_info *fi) ++ int valid, struct fuse_file_info *fi) + { +- int saverr; +- char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); +- int ifd = inode->fd; +- int res; +- +- if (valid & FUSE_SET_ATTR_MODE) { +- if (fi) { +- res = fchmod(fi->fh, attr->st_mode); +- } else { +- sprintf(procname, "/proc/self/fd/%i", ifd); +- res = chmod(procname, attr->st_mode); +- } +- if (res == -1) +- goto out_err; +- } +- if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { +- uid_t uid = (valid & FUSE_SET_ATTR_UID) ? +- attr->st_uid : (uid_t) -1; +- gid_t gid = (valid & FUSE_SET_ATTR_GID) ? +- attr->st_gid : (gid_t) -1; +- +- res = fchownat(ifd, "", uid, gid, +- AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); +- if (res == -1) +- goto out_err; +- } +- if (valid & FUSE_SET_ATTR_SIZE) { +- if (fi) { +- res = ftruncate(fi->fh, attr->st_size); +- } else { +- sprintf(procname, "/proc/self/fd/%i", ifd); +- res = truncate(procname, attr->st_size); +- } +- if (res == -1) +- goto out_err; +- } +- if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { +- struct timespec tv[2]; +- +- tv[0].tv_sec = 0; +- tv[1].tv_sec = 0; +- tv[0].tv_nsec = UTIME_OMIT; +- tv[1].tv_nsec = UTIME_OMIT; +- +- if (valid & FUSE_SET_ATTR_ATIME_NOW) +- tv[0].tv_nsec = UTIME_NOW; +- else if (valid & FUSE_SET_ATTR_ATIME) +- tv[0] = attr->st_atim; +- +- if (valid & FUSE_SET_ATTR_MTIME_NOW) +- tv[1].tv_nsec = UTIME_NOW; +- else if (valid & FUSE_SET_ATTR_MTIME) +- tv[1] = attr->st_mtim; +- +- if (fi) +- res = futimens(fi->fh, tv); +- else +- res = utimensat_empty_nofollow(inode, tv); +- if (res == -1) +- goto out_err; +- } +- +- return lo_getattr(req, ino, fi); ++ int saverr; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ int ifd = inode->fd; ++ int res; ++ ++ if (valid & FUSE_SET_ATTR_MODE) { ++ if (fi) { ++ res = fchmod(fi->fh, attr->st_mode); ++ } else { ++ sprintf(procname, "/proc/self/fd/%i", ifd); ++ res = chmod(procname, attr->st_mode); ++ } ++ if (res == -1) { ++ goto out_err; ++ } ++ } ++ if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { ++ uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : (uid_t)-1; ++ gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : (gid_t)-1; ++ ++ res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) { ++ goto out_err; ++ } ++ } ++ if (valid & FUSE_SET_ATTR_SIZE) { ++ if (fi) { ++ res = ftruncate(fi->fh, attr->st_size); ++ } else { ++ sprintf(procname, "/proc/self/fd/%i", ifd); ++ res = truncate(procname, attr->st_size); ++ } ++ if (res == -1) { ++ goto out_err; ++ } ++ } ++ if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { ++ struct timespec tv[2]; ++ ++ tv[0].tv_sec = 0; ++ tv[1].tv_sec = 0; ++ tv[0].tv_nsec = UTIME_OMIT; ++ tv[1].tv_nsec = UTIME_OMIT; ++ ++ if (valid & FUSE_SET_ATTR_ATIME_NOW) { ++ tv[0].tv_nsec = UTIME_NOW; ++ } else if (valid & FUSE_SET_ATTR_ATIME) { ++ tv[0] = attr->st_atim; ++ } ++ ++ if (valid & FUSE_SET_ATTR_MTIME_NOW) { ++ tv[1].tv_nsec = UTIME_NOW; ++ } else if (valid & FUSE_SET_ATTR_MTIME) { ++ tv[1] = attr->st_mtim; ++ } ++ ++ if (fi) { ++ res = futimens(fi->fh, tv); ++ } else { ++ res = utimensat_empty_nofollow(inode, tv); ++ } ++ if (res == -1) { ++ goto out_err; ++ } ++ } ++ ++ return lo_getattr(req, ino, fi); + + out_err: +- saverr = errno; +- fuse_reply_err(req, saverr); ++ saverr = errno; ++ fuse_reply_err(req, saverr); + } + + static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) + { +- struct lo_inode *p; +- struct lo_inode *ret = NULL; +- +- pthread_mutex_lock(&lo->mutex); +- for (p = lo->root.next; p != &lo->root; p = p->next) { +- if (p->ino == st->st_ino && p->dev == st->st_dev) { +- assert(p->refcount > 0); +- ret = p; +- ret->refcount++; +- break; +- } +- } +- pthread_mutex_unlock(&lo->mutex); +- return ret; ++ struct lo_inode *p; ++ struct lo_inode *ret = NULL; ++ ++ pthread_mutex_lock(&lo->mutex); ++ for (p = lo->root.next; p != &lo->root; p = p->next) { ++ if (p->ino == st->st_ino && p->dev == st->st_dev) { ++ assert(p->refcount > 0); ++ ret = p; ++ ret->refcount++; ++ break; ++ } ++ } ++ pthread_mutex_unlock(&lo->mutex); ++ return ret; + } + + static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, +- struct fuse_entry_param *e) ++ struct fuse_entry_param *e) + { +- int newfd; +- int res; +- int saverr; +- struct lo_data *lo = lo_data(req); +- struct lo_inode *inode; +- +- memset(e, 0, sizeof(*e)); +- e->attr_timeout = lo->timeout; +- e->entry_timeout = lo->timeout; +- +- newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); +- if (newfd == -1) +- goto out_err; +- +- res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); +- if (res == -1) +- goto out_err; +- +- inode = lo_find(lo_data(req), &e->attr); +- if (inode) { +- close(newfd); +- newfd = -1; +- } else { +- struct lo_inode *prev, *next; +- +- saverr = ENOMEM; +- inode = calloc(1, sizeof(struct lo_inode)); +- if (!inode) +- goto out_err; +- +- inode->is_symlink = S_ISLNK(e->attr.st_mode); +- inode->refcount = 1; +- inode->fd = newfd; +- inode->ino = e->attr.st_ino; +- inode->dev = e->attr.st_dev; +- +- pthread_mutex_lock(&lo->mutex); +- prev = &lo->root; +- next = prev->next; +- next->prev = inode; +- inode->next = next; +- inode->prev = prev; +- prev->next = inode; +- pthread_mutex_unlock(&lo->mutex); +- } +- e->ino = (uintptr_t) inode; +- +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +- (unsigned long long) parent, name, (unsigned long long) e->ino); +- +- return 0; ++ int newfd; ++ int res; ++ int saverr; ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode; ++ ++ memset(e, 0, sizeof(*e)); ++ e->attr_timeout = lo->timeout; ++ e->entry_timeout = lo->timeout; ++ ++ newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); ++ if (newfd == -1) { ++ goto out_err; ++ } ++ ++ res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) { ++ goto out_err; ++ } ++ ++ inode = lo_find(lo_data(req), &e->attr); ++ if (inode) { ++ close(newfd); ++ newfd = -1; ++ } else { ++ struct lo_inode *prev, *next; ++ ++ saverr = ENOMEM; ++ inode = calloc(1, sizeof(struct lo_inode)); ++ if (!inode) { ++ goto out_err; ++ } ++ ++ inode->is_symlink = S_ISLNK(e->attr.st_mode); ++ inode->refcount = 1; ++ inode->fd = newfd; ++ inode->ino = e->attr.st_ino; ++ inode->dev = e->attr.st_dev; ++ ++ pthread_mutex_lock(&lo->mutex); ++ prev = &lo->root; ++ next = prev->next; ++ next->prev = inode; ++ inode->next = next; ++ inode->prev = prev; ++ prev->next = inode; ++ pthread_mutex_unlock(&lo->mutex); ++ } ++ e->ino = (uintptr_t)inode; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", ++ (unsigned long long)parent, name, (unsigned long long)e->ino); ++ } ++ ++ return 0; + + out_err: +- saverr = errno; +- if (newfd != -1) +- close(newfd); +- return saverr; ++ saverr = errno; ++ if (newfd != -1) { ++ close(newfd); ++ } ++ return saverr; + } + + static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) + { +- struct fuse_entry_param e; +- int err; +- +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", +- parent, name); +- +- err = lo_do_lookup(req, parent, name, &e); +- if (err) +- fuse_reply_err(req, err); +- else +- fuse_reply_entry(req, &e); ++ struct fuse_entry_param e; ++ int err; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", ++ parent, name); ++ } ++ ++ err = lo_do_lookup(req, parent, name, &e); ++ if (err) { ++ fuse_reply_err(req, err); ++ } else { ++ fuse_reply_entry(req, &e); ++ } + } + + static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, +- const char *name, mode_t mode, dev_t rdev, +- const char *link) ++ const char *name, mode_t mode, dev_t rdev, ++ const char *link) + { +- int res; +- int saverr; +- struct lo_inode *dir = lo_inode(req, parent); +- struct fuse_entry_param e; ++ int res; ++ int saverr; ++ struct lo_inode *dir = lo_inode(req, parent); ++ struct fuse_entry_param e; + +- saverr = ENOMEM; ++ saverr = ENOMEM; + +- res = mknod_wrapper(dir->fd, name, link, mode, rdev); ++ res = mknod_wrapper(dir->fd, name, link, mode, rdev); + +- saverr = errno; +- if (res == -1) +- goto out; ++ saverr = errno; ++ if (res == -1) { ++ goto out; ++ } + +- saverr = lo_do_lookup(req, parent, name, &e); +- if (saverr) +- goto out; ++ saverr = lo_do_lookup(req, parent, name, &e); ++ if (saverr) { ++ goto out; ++ } + +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +- (unsigned long long) parent, name, (unsigned long long) e.ino); ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", ++ (unsigned long long)parent, name, (unsigned long long)e.ino); ++ } + +- fuse_reply_entry(req, &e); +- return; ++ fuse_reply_entry(req, &e); ++ return; + + out: +- fuse_reply_err(req, saverr); ++ fuse_reply_err(req, saverr); + } + +-static void lo_mknod(fuse_req_t req, fuse_ino_t parent, +- const char *name, mode_t mode, dev_t rdev) ++static void lo_mknod(fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode, dev_t rdev) + { +- lo_mknod_symlink(req, parent, name, mode, rdev, NULL); ++ lo_mknod_symlink(req, parent, name, mode, rdev, NULL); + } + + static void lo_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, +- mode_t mode) ++ mode_t mode) + { +- lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL); ++ lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL); + } + +-static void lo_symlink(fuse_req_t req, const char *link, +- fuse_ino_t parent, const char *name) ++static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent, ++ const char *name) + { +- lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); ++ lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); + } + + static int linkat_empty_nofollow(struct lo_inode *inode, int dfd, +- const char *name) ++ const char *name) + { +- int res; +- char procname[64]; ++ int res; ++ char procname[64]; + +- if (inode->is_symlink) { +- res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); +- if (res == -1 && (errno == ENOENT || errno == EINVAL)) { +- /* Sorry, no race free way to hard-link a symlink. */ +- errno = EPERM; +- } +- return res; +- } ++ if (inode->is_symlink) { ++ res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); ++ if (res == -1 && (errno == ENOENT || errno == EINVAL)) { ++ /* Sorry, no race free way to hard-link a symlink. */ ++ errno = EPERM; ++ } ++ return res; ++ } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); + +- return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); ++ return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); + } + + static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, +- const char *name) ++ const char *name) + { +- int res; +- struct lo_data *lo = lo_data(req); +- struct lo_inode *inode = lo_inode(req, ino); +- struct fuse_entry_param e; +- int saverr; +- +- memset(&e, 0, sizeof(struct fuse_entry_param)); +- e.attr_timeout = lo->timeout; +- e.entry_timeout = lo->timeout; +- +- res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); +- if (res == -1) +- goto out_err; +- +- res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); +- if (res == -1) +- goto out_err; +- +- pthread_mutex_lock(&lo->mutex); +- inode->refcount++; +- pthread_mutex_unlock(&lo->mutex); +- e.ino = (uintptr_t) inode; +- +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +- (unsigned long long) parent, name, +- (unsigned long long) e.ino); +- +- fuse_reply_entry(req, &e); +- return; ++ int res; ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode = lo_inode(req, ino); ++ struct fuse_entry_param e; ++ int saverr; ++ ++ memset(&e, 0, sizeof(struct fuse_entry_param)); ++ e.attr_timeout = lo->timeout; ++ e.entry_timeout = lo->timeout; ++ ++ res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); ++ if (res == -1) { ++ goto out_err; ++ } ++ ++ res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) { ++ goto out_err; ++ } ++ ++ pthread_mutex_lock(&lo->mutex); ++ inode->refcount++; ++ pthread_mutex_unlock(&lo->mutex); ++ e.ino = (uintptr_t)inode; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", ++ (unsigned long long)parent, name, (unsigned long long)e.ino); ++ } ++ ++ fuse_reply_entry(req, &e); ++ return; + + out_err: +- saverr = errno; +- fuse_reply_err(req, saverr); ++ saverr = errno; ++ fuse_reply_err(req, saverr); + } + + static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) + { +- int res; ++ int res; + +- res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); ++ res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); + +- fuse_reply_err(req, res == -1 ? errno : 0); ++ fuse_reply_err(req, res == -1 ? errno : 0); + } + + static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, +- fuse_ino_t newparent, const char *newname, +- unsigned int flags) ++ fuse_ino_t newparent, const char *newname, ++ unsigned int flags) + { +- int res; ++ int res; + +- if (flags) { +- fuse_reply_err(req, EINVAL); +- return; +- } ++ if (flags) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + +- res = renameat(lo_fd(req, parent), name, +- lo_fd(req, newparent), newname); ++ res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname); + +- fuse_reply_err(req, res == -1 ? errno : 0); ++ fuse_reply_err(req, res == -1 ? errno : 0); + } + + static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) + { +- int res; ++ int res; + +- res = unlinkat(lo_fd(req, parent), name, 0); ++ res = unlinkat(lo_fd(req, parent), name, 0); + +- fuse_reply_err(req, res == -1 ? errno : 0); ++ fuse_reply_err(req, res == -1 ? errno : 0); + } + + static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) + { +- if (!inode) +- return; +- +- pthread_mutex_lock(&lo->mutex); +- assert(inode->refcount >= n); +- inode->refcount -= n; +- if (!inode->refcount) { +- struct lo_inode *prev, *next; +- +- prev = inode->prev; +- next = inode->next; +- next->prev = prev; +- prev->next = next; +- +- pthread_mutex_unlock(&lo->mutex); +- close(inode->fd); +- free(inode); +- +- } else { +- pthread_mutex_unlock(&lo->mutex); +- } ++ if (!inode) { ++ return; ++ } ++ ++ pthread_mutex_lock(&lo->mutex); ++ assert(inode->refcount >= n); ++ inode->refcount -= n; ++ if (!inode->refcount) { ++ struct lo_inode *prev, *next; ++ ++ prev = inode->prev; ++ next = inode->next; ++ next->prev = prev; ++ prev->next = next; ++ ++ pthread_mutex_unlock(&lo->mutex); ++ close(inode->fd); ++ free(inode); ++ ++ } else { ++ pthread_mutex_unlock(&lo->mutex); ++ } + } + + static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + { +- struct lo_data *lo = lo_data(req); +- struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode = lo_inode(req, ino); + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", +- (unsigned long long) ino, +- (unsigned long long) inode->refcount, +- (unsigned long long) nlookup); +- } ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", ++ (unsigned long long)ino, (unsigned long long)inode->refcount, ++ (unsigned long long)nlookup); ++ } + +- unref_inode(lo, inode, nlookup); ++ unref_inode(lo, inode, nlookup); + } + + static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + { +- lo_forget_one(req, ino, nlookup); +- fuse_reply_none(req); ++ lo_forget_one(req, ino, nlookup); ++ fuse_reply_none(req); + } + + static void lo_forget_multi(fuse_req_t req, size_t count, +- struct fuse_forget_data *forgets) ++ struct fuse_forget_data *forgets) + { +- int i; ++ int i; + +- for (i = 0; i < count; i++) +- lo_forget_one(req, forgets[i].ino, forgets[i].nlookup); +- fuse_reply_none(req); ++ for (i = 0; i < count; i++) { ++ lo_forget_one(req, forgets[i].ino, forgets[i].nlookup); ++ } ++ fuse_reply_none(req); + } + + static void lo_readlink(fuse_req_t req, fuse_ino_t ino) + { +- char buf[PATH_MAX + 1]; +- int res; ++ char buf[PATH_MAX + 1]; ++ int res; + +- res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf)); +- if (res == -1) +- return (void) fuse_reply_err(req, errno); ++ res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf)); ++ if (res == -1) { ++ return (void)fuse_reply_err(req, errno); ++ } + +- if (res == sizeof(buf)) +- return (void) fuse_reply_err(req, ENAMETOOLONG); ++ if (res == sizeof(buf)) { ++ return (void)fuse_reply_err(req, ENAMETOOLONG); ++ } + +- buf[res] = '\0'; ++ buf[res] = '\0'; + +- fuse_reply_readlink(req, buf); ++ fuse_reply_readlink(req, buf); + } + + struct lo_dirp { +- DIR *dp; +- struct dirent *entry; +- off_t offset; ++ DIR *dp; ++ struct dirent *entry; ++ off_t offset; + }; + + static struct lo_dirp *lo_dirp(struct fuse_file_info *fi) + { +- return (struct lo_dirp *) (uintptr_t) fi->fh; ++ return (struct lo_dirp *)(uintptr_t)fi->fh; + } + +-static void lo_opendir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++static void lo_opendir(fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi) + { +- int error = ENOMEM; +- struct lo_data *lo = lo_data(req); +- struct lo_dirp *d; +- int fd; +- +- d = calloc(1, sizeof(struct lo_dirp)); +- if (d == NULL) +- goto out_err; +- +- fd = openat(lo_fd(req, ino), ".", O_RDONLY); +- if (fd == -1) +- goto out_errno; +- +- d->dp = fdopendir(fd); +- if (d->dp == NULL) +- goto out_errno; +- +- d->offset = 0; +- d->entry = NULL; +- +- fi->fh = (uintptr_t) d; +- if (lo->cache == CACHE_ALWAYS) +- fi->keep_cache = 1; +- fuse_reply_open(req, fi); +- return; ++ int error = ENOMEM; ++ struct lo_data *lo = lo_data(req); ++ struct lo_dirp *d; ++ int fd; ++ ++ d = calloc(1, sizeof(struct lo_dirp)); ++ if (d == NULL) { ++ goto out_err; ++ } ++ ++ fd = openat(lo_fd(req, ino), ".", O_RDONLY); ++ if (fd == -1) { ++ goto out_errno; ++ } ++ ++ d->dp = fdopendir(fd); ++ if (d->dp == NULL) { ++ goto out_errno; ++ } ++ ++ d->offset = 0; ++ d->entry = NULL; ++ ++ fi->fh = (uintptr_t)d; ++ if (lo->cache == CACHE_ALWAYS) { ++ fi->keep_cache = 1; ++ } ++ fuse_reply_open(req, fi); ++ return; + + out_errno: +- error = errno; ++ error = errno; + out_err: +- if (d) { +- if (fd != -1) +- close(fd); +- free(d); +- } +- fuse_reply_err(req, error); ++ if (d) { ++ if (fd != -1) { ++ close(fd); ++ } ++ free(d); ++ } ++ fuse_reply_err(req, error); + } + + static int is_dot_or_dotdot(const char *name) + { +- return name[0] == '.' && (name[1] == '\0' || +- (name[1] == '.' && name[2] == '\0')); ++ return name[0] == '.' && ++ (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); + } + + static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, +- off_t offset, struct fuse_file_info *fi, int plus) ++ off_t offset, struct fuse_file_info *fi, int plus) + { +- struct lo_dirp *d = lo_dirp(fi); +- char *buf; +- char *p; +- size_t rem = size; +- int err; +- +- (void) ino; +- +- buf = calloc(1, size); +- if (!buf) { +- err = ENOMEM; +- goto error; +- } +- p = buf; +- +- if (offset != d->offset) { +- seekdir(d->dp, offset); +- d->entry = NULL; +- d->offset = offset; +- } +- while (1) { +- size_t entsize; +- off_t nextoff; +- const char *name; +- +- if (!d->entry) { +- errno = 0; +- d->entry = readdir(d->dp); +- if (!d->entry) { +- if (errno) { // Error +- err = errno; +- goto error; +- } else { // End of stream +- break; +- } +- } +- } +- nextoff = d->entry->d_off; +- name = d->entry->d_name; +- fuse_ino_t entry_ino = 0; +- if (plus) { +- struct fuse_entry_param e; +- if (is_dot_or_dotdot(name)) { +- e = (struct fuse_entry_param) { +- .attr.st_ino = d->entry->d_ino, +- .attr.st_mode = d->entry->d_type << 12, +- }; +- } else { +- err = lo_do_lookup(req, ino, name, &e); +- if (err) +- goto error; +- entry_ino = e.ino; +- } +- +- entsize = fuse_add_direntry_plus(req, p, rem, name, +- &e, nextoff); +- } else { +- struct stat st = { +- .st_ino = d->entry->d_ino, +- .st_mode = d->entry->d_type << 12, +- }; +- entsize = fuse_add_direntry(req, p, rem, name, +- &st, nextoff); +- } +- if (entsize > rem) { +- if (entry_ino != 0) +- lo_forget_one(req, entry_ino, 1); +- break; +- } +- +- p += entsize; +- rem -= entsize; +- +- d->entry = NULL; +- d->offset = nextoff; +- } ++ struct lo_dirp *d = lo_dirp(fi); ++ char *buf; ++ char *p; ++ size_t rem = size; ++ int err; ++ ++ (void)ino; ++ ++ buf = calloc(1, size); ++ if (!buf) { ++ err = ENOMEM; ++ goto error; ++ } ++ p = buf; ++ ++ if (offset != d->offset) { ++ seekdir(d->dp, offset); ++ d->entry = NULL; ++ d->offset = offset; ++ } ++ while (1) { ++ size_t entsize; ++ off_t nextoff; ++ const char *name; ++ ++ if (!d->entry) { ++ errno = 0; ++ d->entry = readdir(d->dp); ++ if (!d->entry) { ++ if (errno) { /* Error */ ++ err = errno; ++ goto error; ++ } else { /* End of stream */ ++ break; ++ } ++ } ++ } ++ nextoff = d->entry->d_off; ++ name = d->entry->d_name; ++ fuse_ino_t entry_ino = 0; ++ if (plus) { ++ struct fuse_entry_param e; ++ if (is_dot_or_dotdot(name)) { ++ e = (struct fuse_entry_param){ ++ .attr.st_ino = d->entry->d_ino, ++ .attr.st_mode = d->entry->d_type << 12, ++ }; ++ } else { ++ err = lo_do_lookup(req, ino, name, &e); ++ if (err) { ++ goto error; ++ } ++ entry_ino = e.ino; ++ } ++ ++ entsize = fuse_add_direntry_plus(req, p, rem, name, &e, nextoff); ++ } else { ++ struct stat st = { ++ .st_ino = d->entry->d_ino, ++ .st_mode = d->entry->d_type << 12, ++ }; ++ entsize = fuse_add_direntry(req, p, rem, name, &st, nextoff); ++ } ++ if (entsize > rem) { ++ if (entry_ino != 0) { ++ lo_forget_one(req, entry_ino, 1); ++ } ++ break; ++ } ++ ++ p += entsize; ++ rem -= entsize; ++ ++ d->entry = NULL; ++ d->offset = nextoff; ++ } + + err = 0; + error: +- // If there's an error, we can only signal it if we haven't stored +- // any entries yet - otherwise we'd end up with wrong lookup +- // counts for the entries that are already in the buffer. So we +- // return what we've collected until that point. +- if (err && rem == size) +- fuse_reply_err(req, err); +- else +- fuse_reply_buf(req, buf, size - rem); ++ /* ++ * If there's an error, we can only signal it if we haven't stored ++ * any entries yet - otherwise we'd end up with wrong lookup ++ * counts for the entries that are already in the buffer. So we ++ * return what we've collected until that point. ++ */ ++ if (err && rem == size) { ++ fuse_reply_err(req, err); ++ } else { ++ fuse_reply_buf(req, buf, size - rem); ++ } + free(buf); + } + + static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, +- off_t offset, struct fuse_file_info *fi) ++ off_t offset, struct fuse_file_info *fi) + { +- lo_do_readdir(req, ino, size, offset, fi, 0); ++ lo_do_readdir(req, ino, size, offset, fi, 0); + } + + static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, +- off_t offset, struct fuse_file_info *fi) ++ off_t offset, struct fuse_file_info *fi) + { +- lo_do_readdir(req, ino, size, offset, fi, 1); ++ lo_do_readdir(req, ino, size, offset, fi, 1); + } + +-static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi) + { +- struct lo_dirp *d = lo_dirp(fi); +- (void) ino; +- closedir(d->dp); +- free(d); +- fuse_reply_err(req, 0); ++ struct lo_dirp *d = lo_dirp(fi); ++ (void)ino; ++ closedir(d->dp); ++ free(d); ++ fuse_reply_err(req, 0); + } + + static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, +- mode_t mode, struct fuse_file_info *fi) ++ mode_t mode, struct fuse_file_info *fi) + { +- int fd; +- struct lo_data *lo = lo_data(req); +- struct fuse_entry_param e; +- int err; +- +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", +- parent, name); +- +- fd = openat(lo_fd(req, parent), name, +- (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode); +- if (fd == -1) +- return (void) fuse_reply_err(req, errno); +- +- fi->fh = fd; +- if (lo->cache == CACHE_NEVER) +- fi->direct_io = 1; +- else if (lo->cache == CACHE_ALWAYS) +- fi->keep_cache = 1; +- +- err = lo_do_lookup(req, parent, name, &e); +- if (err) +- fuse_reply_err(req, err); +- else +- fuse_reply_create(req, &e, fi); ++ int fd; ++ struct lo_data *lo = lo_data(req); ++ struct fuse_entry_param e; ++ int err; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", ++ parent, name); ++ } ++ ++ fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, ++ mode); ++ if (fd == -1) { ++ return (void)fuse_reply_err(req, errno); ++ } ++ ++ fi->fh = fd; ++ if (lo->cache == CACHE_NEVER) { ++ fi->direct_io = 1; ++ } else if (lo->cache == CACHE_ALWAYS) { ++ fi->keep_cache = 1; ++ } ++ ++ err = lo_do_lookup(req, parent, name, &e); ++ if (err) { ++ fuse_reply_err(req, err); ++ } else { ++ fuse_reply_create(req, &e, fi); ++ } + } + + static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, +- struct fuse_file_info *fi) ++ struct fuse_file_info *fi) + { +- int res; +- int fd = dirfd(lo_dirp(fi)->dp); +- (void) ino; +- if (datasync) +- res = fdatasync(fd); +- else +- res = fsync(fd); +- fuse_reply_err(req, res == -1 ? errno : 0); ++ int res; ++ int fd = dirfd(lo_dirp(fi)->dp); ++ (void)ino; ++ if (datasync) { ++ res = fdatasync(fd); ++ } else { ++ res = fsync(fd); ++ } ++ fuse_reply_err(req, res == -1 ? errno : 0); + } + + static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + { +- int fd; +- char buf[64]; +- struct lo_data *lo = lo_data(req); +- +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", +- ino, fi->flags); +- +- /* With writeback cache, kernel may send read requests even +- when userspace opened write-only */ +- if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { +- fi->flags &= ~O_ACCMODE; +- fi->flags |= O_RDWR; +- } +- +- /* With writeback cache, O_APPEND is handled by the kernel. +- This breaks atomicity (since the file may change in the +- underlying filesystem, so that the kernel's idea of the +- end of the file isn't accurate anymore). In this example, +- we just accept that. A more rigorous filesystem may want +- to return an error here */ +- if (lo->writeback && (fi->flags & O_APPEND)) +- fi->flags &= ~O_APPEND; +- +- sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); +- fd = open(buf, fi->flags & ~O_NOFOLLOW); +- if (fd == -1) +- return (void) fuse_reply_err(req, errno); +- +- fi->fh = fd; +- if (lo->cache == CACHE_NEVER) +- fi->direct_io = 1; +- else if (lo->cache == CACHE_ALWAYS) +- fi->keep_cache = 1; +- fuse_reply_open(req, fi); ++ int fd; ++ char buf[64]; ++ struct lo_data *lo = lo_data(req); ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, ++ fi->flags); ++ } ++ ++ /* ++ * With writeback cache, kernel may send read requests even ++ * when userspace opened write-only ++ */ ++ if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { ++ fi->flags &= ~O_ACCMODE; ++ fi->flags |= O_RDWR; ++ } ++ ++ /* ++ * With writeback cache, O_APPEND is handled by the kernel. ++ * This breaks atomicity (since the file may change in the ++ * underlying filesystem, so that the kernel's idea of the ++ * end of the file isn't accurate anymore). In this example, ++ * we just accept that. A more rigorous filesystem may want ++ * to return an error here ++ */ ++ if (lo->writeback && (fi->flags & O_APPEND)) { ++ fi->flags &= ~O_APPEND; ++ } ++ ++ sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); ++ fd = open(buf, fi->flags & ~O_NOFOLLOW); ++ if (fd == -1) { ++ return (void)fuse_reply_err(req, errno); ++ } ++ ++ fi->fh = fd; ++ if (lo->cache == CACHE_NEVER) { ++ fi->direct_io = 1; ++ } else if (lo->cache == CACHE_ALWAYS) { ++ fi->keep_cache = 1; ++ } ++ fuse_reply_open(req, fi); + } + +-static void lo_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++static void lo_release(fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi) + { +- (void) ino; ++ (void)ino; + +- close(fi->fh); +- fuse_reply_err(req, 0); ++ close(fi->fh); ++ fuse_reply_err(req, 0); + } + + static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + { +- int res; +- (void) ino; +- res = close(dup(fi->fh)); +- fuse_reply_err(req, res == -1 ? errno : 0); ++ int res; ++ (void)ino; ++ res = close(dup(fi->fh)); ++ fuse_reply_err(req, res == -1 ? errno : 0); + } + + static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, +- struct fuse_file_info *fi) ++ struct fuse_file_info *fi) + { +- int res; +- (void) ino; +- if (datasync) +- res = fdatasync(fi->fh); +- else +- res = fsync(fi->fh); +- fuse_reply_err(req, res == -1 ? errno : 0); ++ int res; ++ (void)ino; ++ if (datasync) { ++ res = fdatasync(fi->fh); ++ } else { ++ res = fsync(fi->fh); ++ } ++ fuse_reply_err(req, res == -1 ? errno : 0); + } + +-static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, +- off_t offset, struct fuse_file_info *fi) ++static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, ++ struct fuse_file_info *fi) + { +- struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); ++ struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); + +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, "lo_read(ino=%" PRIu64 ", size=%zd, " +- "off=%lu)\n", ino, size, (unsigned long) offset); ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_read(ino=%" PRIu64 ", size=%zd, " ++ "off=%lu)\n", ++ ino, size, (unsigned long)offset); ++ } + +- buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; +- buf.buf[0].fd = fi->fh; +- buf.buf[0].pos = offset; ++ buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; ++ buf.buf[0].fd = fi->fh; ++ buf.buf[0].pos = offset; + +- fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); ++ fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); + } + + static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, +- struct fuse_bufvec *in_buf, off_t off, +- struct fuse_file_info *fi) ++ struct fuse_bufvec *in_buf, off_t off, ++ struct fuse_file_info *fi) + { +- (void) ino; +- ssize_t res; +- struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); +- +- out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; +- out_buf.buf[0].fd = fi->fh; +- out_buf.buf[0].pos = off; +- +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", +- ino, out_buf.buf[0].size, (unsigned long) off); +- +- res = fuse_buf_copy(&out_buf, in_buf, 0); +- if(res < 0) +- fuse_reply_err(req, -res); +- else +- fuse_reply_write(req, (size_t) res); ++ (void)ino; ++ ssize_t res; ++ struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); ++ ++ out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; ++ out_buf.buf[0].fd = fi->fh; ++ out_buf.buf[0].pos = off; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, ++ out_buf.buf[0].size, (unsigned long)off); ++ } ++ ++ res = fuse_buf_copy(&out_buf, in_buf, 0); ++ if (res < 0) { ++ fuse_reply_err(req, -res); ++ } else { ++ fuse_reply_write(req, (size_t)res); ++ } + } + + static void lo_statfs(fuse_req_t req, fuse_ino_t ino) + { +- int res; +- struct statvfs stbuf; +- +- res = fstatvfs(lo_fd(req, ino), &stbuf); +- if (res == -1) +- fuse_reply_err(req, errno); +- else +- fuse_reply_statfs(req, &stbuf); ++ int res; ++ struct statvfs stbuf; ++ ++ res = fstatvfs(lo_fd(req, ino), &stbuf); ++ if (res == -1) { ++ fuse_reply_err(req, errno); ++ } else { ++ fuse_reply_statfs(req, &stbuf); ++ } + } + +-static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, +- off_t offset, off_t length, struct fuse_file_info *fi) ++static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, ++ off_t length, struct fuse_file_info *fi) + { +- int err = EOPNOTSUPP; +- (void) ino; ++ int err = EOPNOTSUPP; ++ (void)ino; + + #ifdef HAVE_FALLOCATE +- err = fallocate(fi->fh, mode, offset, length); +- if (err < 0) +- err = errno; ++ err = fallocate(fi->fh, mode, offset, length); ++ if (err < 0) { ++ err = errno; ++ } + + #elif defined(HAVE_POSIX_FALLOCATE) +- if (mode) { +- fuse_reply_err(req, EOPNOTSUPP); +- return; +- } ++ if (mode) { ++ fuse_reply_err(req, EOPNOTSUPP); ++ return; ++ } + +- err = posix_fallocate(fi->fh, offset, length); ++ err = posix_fallocate(fi->fh, offset, length); + #endif + +- fuse_reply_err(req, err); ++ fuse_reply_err(req, err); + } + + static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, +- int op) ++ int op) + { +- int res; +- (void) ino; ++ int res; ++ (void)ino; + +- res = flock(fi->fh, op); ++ res = flock(fi->fh, op); + +- fuse_reply_err(req, res == -1 ? errno : 0); ++ fuse_reply_err(req, res == -1 ? errno : 0); + } + + static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, +- size_t size) ++ size_t size) + { +- char *value = NULL; +- char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); +- ssize_t ret; +- int saverr; +- +- saverr = ENOSYS; +- if (!lo_data(req)->xattr) +- goto out; +- +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", +- ino, name, size); +- } +- +- if (inode->is_symlink) { +- /* Sorry, no race free way to getxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } +- +- sprintf(procname, "/proc/self/fd/%i", inode->fd); +- +- if (size) { +- value = malloc(size); +- if (!value) +- goto out_err; +- +- ret = getxattr(procname, name, value, size); +- if (ret == -1) +- goto out_err; +- saverr = 0; +- if (ret == 0) +- goto out; +- +- fuse_reply_buf(req, value, ret); +- } else { +- ret = getxattr(procname, name, NULL, 0); +- if (ret == -1) +- goto out_err; +- +- fuse_reply_xattr(req, ret); +- } ++ char *value = NULL; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; ++ ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) { ++ goto out; ++ } ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", ino, name, ++ size); ++ } ++ ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to getxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } ++ ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ if (size) { ++ value = malloc(size); ++ if (!value) { ++ goto out_err; ++ } ++ ++ ret = getxattr(procname, name, value, size); ++ if (ret == -1) { ++ goto out_err; ++ } ++ saverr = 0; ++ if (ret == 0) { ++ goto out; ++ } ++ ++ fuse_reply_buf(req, value, ret); ++ } else { ++ ret = getxattr(procname, name, NULL, 0); ++ if (ret == -1) { ++ goto out_err; ++ } ++ ++ fuse_reply_xattr(req, ret); ++ } + out_free: +- free(value); +- return; ++ free(value); ++ return; + + out_err: +- saverr = errno; ++ saverr = errno; + out: +- fuse_reply_err(req, saverr); +- goto out_free; ++ fuse_reply_err(req, saverr); ++ goto out_free; + } + + static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + { +- char *value = NULL; +- char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); +- ssize_t ret; +- int saverr; +- +- saverr = ENOSYS; +- if (!lo_data(req)->xattr) +- goto out; +- +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", +- ino, size); +- } +- +- if (inode->is_symlink) { +- /* Sorry, no race free way to listxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } +- +- sprintf(procname, "/proc/self/fd/%i", inode->fd); +- +- if (size) { +- value = malloc(size); +- if (!value) +- goto out_err; +- +- ret = listxattr(procname, value, size); +- if (ret == -1) +- goto out_err; +- saverr = 0; +- if (ret == 0) +- goto out; +- +- fuse_reply_buf(req, value, ret); +- } else { +- ret = listxattr(procname, NULL, 0); +- if (ret == -1) +- goto out_err; +- +- fuse_reply_xattr(req, ret); +- } ++ char *value = NULL; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; ++ ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) { ++ goto out; ++ } ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ++ ino, size); ++ } ++ ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to listxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } ++ ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ if (size) { ++ value = malloc(size); ++ if (!value) { ++ goto out_err; ++ } ++ ++ ret = listxattr(procname, value, size); ++ if (ret == -1) { ++ goto out_err; ++ } ++ saverr = 0; ++ if (ret == 0) { ++ goto out; ++ } ++ ++ fuse_reply_buf(req, value, ret); ++ } else { ++ ret = listxattr(procname, NULL, 0); ++ if (ret == -1) { ++ goto out_err; ++ } ++ ++ fuse_reply_xattr(req, ret); ++ } + out_free: +- free(value); +- return; ++ free(value); ++ return; + + out_err: +- saverr = errno; ++ saverr = errno; + out: +- fuse_reply_err(req, saverr); +- goto out_free; ++ fuse_reply_err(req, saverr); ++ goto out_free; + } + + static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, +- const char *value, size_t size, int flags) ++ const char *value, size_t size, int flags) + { +- char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); +- ssize_t ret; +- int saverr; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; + +- saverr = ENOSYS; +- if (!lo_data(req)->xattr) +- goto out; ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) { ++ goto out; ++ } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", +- ino, name, value, size); +- } ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", ++ ino, name, value, size); ++ } + +- if (inode->is_symlink) { +- /* Sorry, no race free way to setxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to setxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); + +- ret = setxattr(procname, name, value, size, flags); +- saverr = ret == -1 ? errno : 0; ++ ret = setxattr(procname, name, value, size, flags); ++ saverr = ret == -1 ? errno : 0; + + out: +- fuse_reply_err(req, saverr); ++ fuse_reply_err(req, saverr); + } + + static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) + { +- char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); +- ssize_t ret; +- int saverr; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; + +- saverr = ENOSYS; +- if (!lo_data(req)->xattr) +- goto out; ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) { ++ goto out; ++ } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", +- ino, name); +- } ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ++ ino, name); ++ } + +- if (inode->is_symlink) { +- /* Sorry, no race free way to setxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to setxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); + +- ret = removexattr(procname, name); +- saverr = ret == -1 ? errno : 0; ++ ret = removexattr(procname, name); ++ saverr = ret == -1 ? errno : 0; + + out: +- fuse_reply_err(req, saverr); ++ fuse_reply_err(req, saverr); + } + + #ifdef HAVE_COPY_FILE_RANGE + static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, +- struct fuse_file_info *fi_in, +- fuse_ino_t ino_out, off_t off_out, +- struct fuse_file_info *fi_out, size_t len, +- int flags) ++ struct fuse_file_info *fi_in, fuse_ino_t ino_out, ++ off_t off_out, struct fuse_file_info *fi_out, ++ size_t len, int flags) + { +- ssize_t res; +- +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " +- "off=%lu, ino=%" PRIu64 "/fd=%lu, " +- "off=%lu, size=%zd, flags=0x%x)\n", +- ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, +- len, flags); +- +- res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, +- flags); +- if (res < 0) +- fuse_reply_err(req, -errno); +- else +- fuse_reply_write(req, res); ++ ssize_t res; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " ++ "off=%lu, ino=%" PRIu64 "/fd=%lu, " ++ "off=%lu, size=%zd, flags=0x%x)\n", ++ ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, len, ++ flags); ++ ++ res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, flags); ++ if (res < 0) { ++ fuse_reply_err(req, -errno); ++ } else { ++ fuse_reply_write(req, res); ++ } + } + #endif + + static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, +- struct fuse_file_info *fi) ++ struct fuse_file_info *fi) + { +- off_t res; +- +- (void)ino; +- res = lseek(fi->fh, off, whence); +- if (res != -1) +- fuse_reply_lseek(req, res); +- else +- fuse_reply_err(req, errno); ++ off_t res; ++ ++ (void)ino; ++ res = lseek(fi->fh, off, whence); ++ if (res != -1) { ++ fuse_reply_lseek(req, res); ++ } else { ++ fuse_reply_err(req, errno); ++ } + } + + static struct fuse_lowlevel_ops lo_oper = { +- .init = lo_init, +- .lookup = lo_lookup, +- .mkdir = lo_mkdir, +- .mknod = lo_mknod, +- .symlink = lo_symlink, +- .link = lo_link, +- .unlink = lo_unlink, +- .rmdir = lo_rmdir, +- .rename = lo_rename, +- .forget = lo_forget, +- .forget_multi = lo_forget_multi, +- .getattr = lo_getattr, +- .setattr = lo_setattr, +- .readlink = lo_readlink, +- .opendir = lo_opendir, +- .readdir = lo_readdir, +- .readdirplus = lo_readdirplus, +- .releasedir = lo_releasedir, +- .fsyncdir = lo_fsyncdir, +- .create = lo_create, +- .open = lo_open, +- .release = lo_release, +- .flush = lo_flush, +- .fsync = lo_fsync, +- .read = lo_read, +- .write_buf = lo_write_buf, +- .statfs = lo_statfs, +- .fallocate = lo_fallocate, +- .flock = lo_flock, +- .getxattr = lo_getxattr, +- .listxattr = lo_listxattr, +- .setxattr = lo_setxattr, +- .removexattr = lo_removexattr, ++ .init = lo_init, ++ .lookup = lo_lookup, ++ .mkdir = lo_mkdir, ++ .mknod = lo_mknod, ++ .symlink = lo_symlink, ++ .link = lo_link, ++ .unlink = lo_unlink, ++ .rmdir = lo_rmdir, ++ .rename = lo_rename, ++ .forget = lo_forget, ++ .forget_multi = lo_forget_multi, ++ .getattr = lo_getattr, ++ .setattr = lo_setattr, ++ .readlink = lo_readlink, ++ .opendir = lo_opendir, ++ .readdir = lo_readdir, ++ .readdirplus = lo_readdirplus, ++ .releasedir = lo_releasedir, ++ .fsyncdir = lo_fsyncdir, ++ .create = lo_create, ++ .open = lo_open, ++ .release = lo_release, ++ .flush = lo_flush, ++ .fsync = lo_fsync, ++ .read = lo_read, ++ .write_buf = lo_write_buf, ++ .statfs = lo_statfs, ++ .fallocate = lo_fallocate, ++ .flock = lo_flock, ++ .getxattr = lo_getxattr, ++ .listxattr = lo_listxattr, ++ .setxattr = lo_setxattr, ++ .removexattr = lo_removexattr, + #ifdef HAVE_COPY_FILE_RANGE +- .copy_file_range = lo_copy_file_range, ++ .copy_file_range = lo_copy_file_range, + #endif +- .lseek = lo_lseek, ++ .lseek = lo_lseek, + }; + + int main(int argc, char *argv[]) + { +- struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +- struct fuse_session *se; +- struct fuse_cmdline_opts opts; +- struct lo_data lo = { .debug = 0, +- .writeback = 0 }; +- int ret = -1; +- +- /* Don't mask creation mode, kernel already did that */ +- umask(0); +- +- pthread_mutex_init(&lo.mutex, NULL); +- lo.root.next = lo.root.prev = &lo.root; +- lo.root.fd = -1; +- lo.cache = CACHE_NORMAL; +- +- if (fuse_parse_cmdline(&args, &opts) != 0) +- return 1; +- if (opts.show_help) { +- printf("usage: %s [options] \n\n", argv[0]); +- fuse_cmdline_help(); +- fuse_lowlevel_help(); +- ret = 0; +- goto err_out1; +- } else if (opts.show_version) { +- fuse_lowlevel_version(); +- ret = 0; +- goto err_out1; +- } +- +- if(opts.mountpoint == NULL) { +- printf("usage: %s [options] \n", argv[0]); +- printf(" %s --help\n", argv[0]); +- ret = 1; +- goto err_out1; +- } +- +- if (fuse_opt_parse(&args, &lo, lo_opts, NULL)== -1) +- return 1; +- +- lo.debug = opts.debug; +- lo.root.refcount = 2; +- if (lo.source) { +- struct stat stat; +- int res; +- +- res = lstat(lo.source, &stat); +- if (res == -1) { +- fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n", +- lo.source); +- exit(1); +- } +- if (!S_ISDIR(stat.st_mode)) { +- fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); +- exit(1); +- } +- +- } else { +- lo.source = "/"; +- } +- lo.root.is_symlink = false; +- if (!lo.timeout_set) { +- switch (lo.cache) { +- case CACHE_NEVER: +- lo.timeout = 0.0; +- break; +- +- case CACHE_NORMAL: +- lo.timeout = 1.0; +- break; +- +- case CACHE_ALWAYS: +- lo.timeout = 86400.0; +- break; +- } +- } else if (lo.timeout < 0) { +- fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", +- lo.timeout); +- exit(1); +- } +- +- lo.root.fd = open(lo.source, O_PATH); +- if (lo.root.fd == -1) { +- fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", +- lo.source); +- exit(1); +- } +- +- se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); +- if (se == NULL) +- goto err_out1; +- +- if (fuse_set_signal_handlers(se) != 0) +- goto err_out2; +- +- if (fuse_session_mount(se, opts.mountpoint) != 0) +- goto err_out3; +- +- fuse_daemonize(opts.foreground); +- +- /* Block until ctrl+c or fusermount -u */ +- if (opts.singlethread) +- ret = fuse_session_loop(se); +- else +- ret = fuse_session_loop_mt(se, opts.clone_fd); +- +- fuse_session_unmount(se); ++ struct fuse_args args = FUSE_ARGS_INIT(argc, argv); ++ struct fuse_session *se; ++ struct fuse_cmdline_opts opts; ++ struct lo_data lo = { .debug = 0, .writeback = 0 }; ++ int ret = -1; ++ ++ /* Don't mask creation mode, kernel already did that */ ++ umask(0); ++ ++ pthread_mutex_init(&lo.mutex, NULL); ++ lo.root.next = lo.root.prev = &lo.root; ++ lo.root.fd = -1; ++ lo.cache = CACHE_NORMAL; ++ ++ if (fuse_parse_cmdline(&args, &opts) != 0) { ++ return 1; ++ } ++ if (opts.show_help) { ++ printf("usage: %s [options] \n\n", argv[0]); ++ fuse_cmdline_help(); ++ fuse_lowlevel_help(); ++ ret = 0; ++ goto err_out1; ++ } else if (opts.show_version) { ++ fuse_lowlevel_version(); ++ ret = 0; ++ goto err_out1; ++ } ++ ++ if (opts.mountpoint == NULL) { ++ printf("usage: %s [options] \n", argv[0]); ++ printf(" %s --help\n", argv[0]); ++ ret = 1; ++ goto err_out1; ++ } ++ ++ if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { ++ return 1; ++ } ++ ++ lo.debug = opts.debug; ++ lo.root.refcount = 2; ++ if (lo.source) { ++ struct stat stat; ++ int res; ++ ++ res = lstat(lo.source, &stat); ++ if (res == -1) { ++ fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n", ++ lo.source); ++ exit(1); ++ } ++ if (!S_ISDIR(stat.st_mode)) { ++ fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); ++ exit(1); ++ } ++ ++ } else { ++ lo.source = "/"; ++ } ++ lo.root.is_symlink = false; ++ if (!lo.timeout_set) { ++ switch (lo.cache) { ++ case CACHE_NEVER: ++ lo.timeout = 0.0; ++ break; ++ ++ case CACHE_NORMAL: ++ lo.timeout = 1.0; ++ break; ++ ++ case CACHE_ALWAYS: ++ lo.timeout = 86400.0; ++ break; ++ } ++ } else if (lo.timeout < 0) { ++ fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", lo.timeout); ++ exit(1); ++ } ++ ++ lo.root.fd = open(lo.source, O_PATH); ++ if (lo.root.fd == -1) { ++ fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source); ++ exit(1); ++ } ++ ++ se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); ++ if (se == NULL) { ++ goto err_out1; ++ } ++ ++ if (fuse_set_signal_handlers(se) != 0) { ++ goto err_out2; ++ } ++ ++ if (fuse_session_mount(se, opts.mountpoint) != 0) { ++ goto err_out3; ++ } ++ ++ fuse_daemonize(opts.foreground); ++ ++ /* Block until ctrl+c or fusermount -u */ ++ if (opts.singlethread) { ++ ret = fuse_session_loop(se); ++ } else { ++ ret = fuse_session_loop_mt(se, opts.clone_fd); ++ } ++ ++ fuse_session_unmount(se); + err_out3: +- fuse_remove_signal_handlers(se); ++ fuse_remove_signal_handlers(se); + err_out2: +- fuse_session_destroy(se); ++ fuse_session_destroy(se); + err_out1: +- free(opts.mountpoint); +- fuse_opt_free_args(&args); ++ free(opts.mountpoint); ++ fuse_opt_free_args(&args); + +- if (lo.root.fd >= 0) +- close(lo.root.fd); ++ if (lo.root.fd >= 0) { ++ close(lo.root.fd); ++ } + +- return ret ? 1 : 0; ++ return ret ? 1 : 0; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Handle-hard-reboot.patch b/SOURCES/kvm-virtiofsd-Handle-hard-reboot.patch new file mode 100644 index 0000000..8888030 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Handle-hard-reboot.patch @@ -0,0 +1,65 @@ +From 616407b06517361ce444dcc0960aeaf55b52da33 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:41 +0100 +Subject: [PATCH 070/116] virtiofsd: Handle hard reboot +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-67-dgilbert@redhat.com> +Patchwork-id: 93521 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 066/112] virtiofsd: Handle hard reboot +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Handle a + mount + hard reboot (without unmount) + mount + +we get another 'init' which FUSE doesn't normally expect. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit e8556f49098b5d95634e592d79a97f761b76c96e) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 16 +++++++++++++++- + 1 file changed, 15 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 7d742b5..65f91da 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2433,7 +2433,21 @@ void fuse_session_process_buf_int(struct fuse_session *se, + goto reply_err; + } + } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) { +- goto reply_err; ++ if (fuse_lowlevel_is_virtio(se)) { ++ /* ++ * TODO: This is after a hard reboot typically, we need to do ++ * a destroy, but we can't reply to this request yet so ++ * we can't use do_destroy ++ */ ++ fuse_log(FUSE_LOG_DEBUG, "%s: reinit\n", __func__); ++ se->got_destroy = 1; ++ se->got_init = 0; ++ if (se->op.destroy) { ++ se->op.destroy(se->userdata); ++ } ++ } else { ++ goto reply_err; ++ } + } + + err = EACCES; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Handle-reinit.patch b/SOURCES/kvm-virtiofsd-Handle-reinit.patch new file mode 100644 index 0000000..3f9577b --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Handle-reinit.patch @@ -0,0 +1,53 @@ +From 485adfa1aa1b3e2d1449edf5c42d6ec396cbfb5d Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:40 +0100 +Subject: [PATCH 069/116] virtiofsd: Handle reinit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-66-dgilbert@redhat.com> +Patchwork-id: 93520 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 065/112] virtiofsd: Handle reinit +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Allow init->destroy->init for mount->umount->mount + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit c806d6435fe95fd54b379920aca2f4e3ea1f3258) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index a7a1968..7d742b5 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2028,6 +2028,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, + } + + se->got_init = 1; ++ se->got_destroy = 0; + if (se->op.init) { + se->op.init(se->userdata, &se->conn); + } +@@ -2130,6 +2131,7 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, + (void)iter; + + se->got_destroy = 1; ++ se->got_init = 0; + if (se->op.destroy) { + se->op.destroy(se->userdata); + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Keep-track-of-replies.patch b/SOURCES/kvm-virtiofsd-Keep-track-of-replies.patch new file mode 100644 index 0000000..18be3e0 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Keep-track-of-replies.patch @@ -0,0 +1,116 @@ +From c818a1cb603cad07aa5c49ce808aa09435667c7c Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:04 +0100 +Subject: [PATCH 033/116] virtiofsd: Keep track of replies +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-30-dgilbert@redhat.com> +Patchwork-id: 93481 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 029/112] virtiofsd: Keep track of replies +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Keep track of whether we sent a reply to a request; this is a bit +paranoid but it means: + a) We should always recycle an element even if there was an error + in the request + b) Never try and send two replies on one queue element + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 2f65e69a7f22da8d20c747f34f339ebb40a0634f) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 23 ++++++++++++++++++++--- + 1 file changed, 20 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 05d0e29..f1adeb6 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -44,6 +44,7 @@ struct fv_QueueInfo { + + /* The element for the command currently being processed */ + VuVirtqElement *qe; ++ bool reply_sent; + }; + + /* +@@ -178,6 +179,7 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, + { + VuVirtqElement *elem; + VuVirtq *q; ++ int ret = 0; + + assert(count >= 1); + assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); +@@ -191,6 +193,7 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, + assert(out->unique); + /* For virtio we always have ch */ + assert(ch); ++ assert(!ch->qi->reply_sent); + elem = ch->qi->qe; + q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; + +@@ -208,19 +211,23 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, + if (in_len < sizeof(struct fuse_out_header)) { + fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", + __func__, elem->index); +- return -E2BIG; ++ ret = -E2BIG; ++ goto err; + } + if (in_len < tosend_len) { + fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n", + __func__, elem->index, tosend_len); +- return -E2BIG; ++ ret = -E2BIG; ++ goto err; + } + + copy_iov(iov, count, in_sg, in_num, tosend_len); + vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); + vu_queue_notify(&se->virtio_dev->dev, q); ++ ch->qi->reply_sent = true; + +- return 0; ++err: ++ return ret; + } + + /* Thread function for individual queues, created when a queue is 'started' */ +@@ -296,6 +303,9 @@ static void *fv_queue_thread(void *opaque) + break; + } + ++ qi->qe = elem; ++ qi->reply_sent = false; ++ + if (!fbuf.mem) { + fbuf.mem = malloc(se->bufsize); + assert(fbuf.mem); +@@ -331,6 +341,13 @@ static void *fv_queue_thread(void *opaque) + /* TODO: Add checks for fuse_session_exited */ + fuse_session_process_buf_int(se, &fbuf, &ch); + ++ if (!qi->reply_sent) { ++ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", ++ __func__, elem->index); ++ /* I think we've still got to recycle the element */ ++ vu_queue_push(dev, q, elem, 0); ++ vu_queue_notify(dev, q); ++ } + qi->qe = NULL; + free(elem); + elem = NULL; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch b/SOURCES/kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch new file mode 100644 index 0000000..5e054f3 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch @@ -0,0 +1,143 @@ +From b37344c38b866c7e7fb773b4a3172a39306bac7e Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:42 +0100 +Subject: [PATCH 071/116] virtiofsd: Kill threads when queues are stopped +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-68-dgilbert@redhat.com> +Patchwork-id: 93522 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 067/112] virtiofsd: Kill threads when queues are stopped +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Kill the threads we've started when the queues get stopped. + +Signed-off-by: Dr. David Alan Gilbert +With improvements by: +Signed-off-by: Eryu Guan +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 10477ac47fc57d00a84802ff97c15450cd8021c1) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 51 +++++++++++++++++++++++++++++++++++++------ + 1 file changed, 44 insertions(+), 7 deletions(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 872968f..7a8774a 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -41,6 +41,7 @@ struct fv_QueueInfo { + /* Our queue index, corresponds to array position */ + int qidx; + int kick_fd; ++ int kill_fd; /* For killing the thread */ + + /* The element for the command currently being processed */ + VuVirtqElement *qe; +@@ -412,14 +413,17 @@ static void *fv_queue_thread(void *opaque) + fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, + qi->qidx, qi->kick_fd); + while (1) { +- struct pollfd pf[1]; ++ struct pollfd pf[2]; + pf[0].fd = qi->kick_fd; + pf[0].events = POLLIN; + pf[0].revents = 0; ++ pf[1].fd = qi->kill_fd; ++ pf[1].events = POLLIN; ++ pf[1].revents = 0; + + fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for Queue %d event\n", __func__, + qi->qidx); +- int poll_res = ppoll(pf, 1, NULL, NULL); ++ int poll_res = ppoll(pf, 2, NULL, NULL); + + if (poll_res == -1) { + if (errno == EINTR) { +@@ -430,12 +434,23 @@ static void *fv_queue_thread(void *opaque) + fuse_log(FUSE_LOG_ERR, "fv_queue_thread ppoll: %m\n"); + break; + } +- assert(poll_res == 1); ++ assert(poll_res >= 1); + if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) { + fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x Queue %d\n", + __func__, pf[0].revents, qi->qidx); + break; + } ++ if (pf[1].revents & (POLLERR | POLLHUP | POLLNVAL)) { ++ fuse_log(FUSE_LOG_ERR, ++ "%s: Unexpected poll revents %x Queue %d killfd\n", ++ __func__, pf[1].revents, qi->qidx); ++ break; ++ } ++ if (pf[1].revents) { ++ fuse_log(FUSE_LOG_INFO, "%s: kill event on queue %d - quitting\n", ++ __func__, qi->qidx); ++ break; ++ } + assert(pf[0].revents & POLLIN); + fuse_log(FUSE_LOG_DEBUG, "%s: Got queue event on Queue %d\n", __func__, + qi->qidx); +@@ -589,6 +604,28 @@ out: + return NULL; + } + ++static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx) ++{ ++ int ret; ++ struct fv_QueueInfo *ourqi; ++ ++ assert(qidx < vud->nqueues); ++ ourqi = vud->qi[qidx]; ++ ++ /* Kill the thread */ ++ if (eventfd_write(ourqi->kill_fd, 1)) { ++ fuse_log(FUSE_LOG_ERR, "Eventfd_write for queue %d: %s\n", ++ qidx, strerror(errno)); ++ } ++ ret = pthread_join(ourqi->thread, NULL); ++ if (ret) { ++ fuse_log(FUSE_LOG_ERR, "%s: Failed to join thread idx %d err %d\n", ++ __func__, qidx, ret); ++ } ++ close(ourqi->kill_fd); ++ ourqi->kick_fd = -1; ++} ++ + /* Callback from libvhost-user on start or stop of a queue */ + static void fv_queue_set_started(VuDev *dev, int qidx, bool started) + { +@@ -633,16 +670,16 @@ static void fv_queue_set_started(VuDev *dev, int qidx, bool started) + } + ourqi = vud->qi[qidx]; + ourqi->kick_fd = dev->vq[qidx].kick_fd; ++ ++ ourqi->kill_fd = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE); ++ assert(ourqi->kill_fd != -1); + if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) { + fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n", + __func__, qidx); + assert(0); + } + } else { +- /* TODO: Kill the thread */ +- assert(qidx < vud->nqueues); +- ourqi = vud->qi[qidx]; +- ourqi->kick_fd = -1; ++ fv_queue_cleanup_thread(vud, qidx); + } + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch b/SOURCES/kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch new file mode 100644 index 0000000..98211cb --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch @@ -0,0 +1,96 @@ +From f09f13f9a001a50ee3465c165f4bbaf870fcadb9 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:53 +0100 +Subject: [PATCH 022/116] virtiofsd: Make fsync work even if only inode is + passed in +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-19-dgilbert@redhat.com> +Patchwork-id: 93472 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 018/112] virtiofsd: Make fsync work even if only inode is passed in +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Vivek Goyal + +If caller has not sent file handle in request, then using inode, retrieve +the fd opened using O_PATH and use that to open file again and issue +fsync. This will be needed when dax_flush() calls fsync. At that time +we only have inode information (and not file). + +Signed-off-by: Vivek Goyal +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 1b209805f8159c3f4d89ddb9390a5f64887cebff) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 6 +++++- + tools/virtiofsd/passthrough_ll.c | 28 ++++++++++++++++++++++++++-- + 2 files changed, 31 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 514d79c..8552cfb 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1075,7 +1075,11 @@ static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + fi.fh = arg->fh; + + if (req->se->op.fsync) { +- req->se->op.fsync(req, nodeid, datasync, &fi); ++ if (fi.fh == (uint64_t)-1) { ++ req->se->op.fsync(req, nodeid, datasync, NULL); ++ } else { ++ req->se->op.fsync(req, nodeid, datasync, &fi); ++ } + } else { + fuse_reply_err(req, ENOSYS); + } +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 6c4da18..26ac870 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -903,10 +903,34 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, + { + int res; + (void)ino; ++ int fd; ++ char *buf; ++ ++ fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino, ++ (void *)fi); ++ ++ if (!fi) { ++ res = asprintf(&buf, "/proc/self/fd/%i", lo_fd(req, ino)); ++ if (res == -1) { ++ return (void)fuse_reply_err(req, errno); ++ } ++ ++ fd = open(buf, O_RDWR); ++ free(buf); ++ if (fd == -1) { ++ return (void)fuse_reply_err(req, errno); ++ } ++ } else { ++ fd = fi->fh; ++ } ++ + if (datasync) { +- res = fdatasync(fi->fh); ++ res = fdatasync(fd); + } else { +- res = fsync(fi->fh); ++ res = fsync(fd); ++ } ++ if (!fi) { ++ close(fd); + } + fuse_reply_err(req, res == -1 ? errno : 0); + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch b/SOURCES/kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch new file mode 100644 index 0000000..2c9874d --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch @@ -0,0 +1,257 @@ +From a96042f05eaf494fbe26a9cbd940f5f815f782f9 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:56 +0100 +Subject: [PATCH 025/116] virtiofsd: Open vhost connection instead of mounting +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-22-dgilbert@redhat.com> +Patchwork-id: 93476 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 021/112] virtiofsd: Open vhost connection instead of mounting +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +When run with vhost-user options we conect to the QEMU instead +via a socket. Start this off by creating the socket. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit d14bf584dd965821e80d14c16d9292a464b1ab85) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 7 ++-- + tools/virtiofsd/fuse_lowlevel.c | 55 ++++------------------------ + tools/virtiofsd/fuse_virtio.c | 79 +++++++++++++++++++++++++++++++++++++++++ + tools/virtiofsd/fuse_virtio.h | 23 ++++++++++++ + 4 files changed, 114 insertions(+), 50 deletions(-) + create mode 100644 tools/virtiofsd/fuse_virtio.c + create mode 100644 tools/virtiofsd/fuse_virtio.h + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index 26b1a7d..82d6ac7 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -6,9 +6,10 @@ + * See the file COPYING.LIB + */ + +-#define FUSE_USE_VERSION 31 +- ++#ifndef FUSE_I_H ++#define FUSE_I_H + ++#define FUSE_USE_VERSION 31 + #include "fuse.h" + #include "fuse_lowlevel.h" + +@@ -101,3 +102,5 @@ void fuse_session_process_buf_int(struct fuse_session *se, + + /* room needed in buffer to accommodate header */ + #define FUSE_BUFFER_HEADER_SIZE 0x1000 ++ ++#endif +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 17e8718..5df124e 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -14,6 +14,7 @@ + #include "standard-headers/linux/fuse.h" + #include "fuse_misc.h" + #include "fuse_opt.h" ++#include "fuse_virtio.h" + + #include + #include +@@ -2202,6 +2203,11 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + goto out4; + } + ++ if (!se->vu_socket_path) { ++ fprintf(stderr, "fuse: missing -o vhost_user_socket option\n"); ++ goto out4; ++ } ++ + se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + FUSE_BUFFER_HEADER_SIZE; + + list_init_req(&se->list); +@@ -2224,54 +2230,7 @@ out1: + + int fuse_session_mount(struct fuse_session *se) + { +- int fd; +- +- /* +- * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos +- * would ensue. +- */ +- do { +- fd = open("/dev/null", O_RDWR); +- if (fd > 2) { +- close(fd); +- } +- } while (fd >= 0 && fd <= 2); +- +- /* +- * To allow FUSE daemons to run without privileges, the caller may open +- * /dev/fuse before launching the file system and pass on the file +- * descriptor by specifying /dev/fd/N as the mount point. Note that the +- * parent process takes care of performing the mount in this case. +- */ +- fd = fuse_mnt_parse_fuse_fd(mountpoint); +- if (fd != -1) { +- if (fcntl(fd, F_GETFD) == -1) { +- fuse_log(FUSE_LOG_ERR, "fuse: Invalid file descriptor /dev/fd/%u\n", +- fd); +- return -1; +- } +- se->fd = fd; +- return 0; +- } +- +- /* Open channel */ +- fd = fuse_kern_mount(mountpoint, se->mo); +- if (fd == -1) { +- return -1; +- } +- se->fd = fd; +- +- /* Save mountpoint */ +- se->mountpoint = strdup(mountpoint); +- if (se->mountpoint == NULL) { +- goto error_out; +- } +- +- return 0; +- +-error_out: +- fuse_kern_unmount(mountpoint, fd); +- return -1; ++ return virtio_session_mount(se); + } + + int fuse_session_fd(struct fuse_session *se) +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +new file mode 100644 +index 0000000..cbef6ff +--- /dev/null ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -0,0 +1,79 @@ ++/* ++ * virtio-fs glue for FUSE ++ * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates ++ * ++ * Authors: ++ * Dave Gilbert ++ * ++ * Implements the glue between libfuse and libvhost-user ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ ++ ++#include "fuse_i.h" ++#include "standard-headers/linux/fuse.h" ++#include "fuse_misc.h" ++#include "fuse_opt.h" ++#include "fuse_virtio.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* From spec */ ++struct virtio_fs_config { ++ char tag[36]; ++ uint32_t num_queues; ++}; ++ ++int virtio_session_mount(struct fuse_session *se) ++{ ++ struct sockaddr_un un; ++ mode_t old_umask; ++ ++ if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) { ++ fuse_log(FUSE_LOG_ERR, "Socket path too long\n"); ++ return -1; ++ } ++ ++ se->fd = -1; ++ ++ /* ++ * Create the Unix socket to communicate with qemu ++ * based on QEMU's vhost-user-bridge ++ */ ++ unlink(se->vu_socket_path); ++ strcpy(un.sun_path, se->vu_socket_path); ++ size_t addr_len = sizeof(un); ++ ++ int listen_sock = socket(AF_UNIX, SOCK_STREAM, 0); ++ if (listen_sock == -1) { ++ fuse_log(FUSE_LOG_ERR, "vhost socket creation: %m\n"); ++ return -1; ++ } ++ un.sun_family = AF_UNIX; ++ ++ /* ++ * Unfortunately bind doesn't let you set the mask on the socket, ++ * so set umask to 077 and restore it later. ++ */ ++ old_umask = umask(0077); ++ if (bind(listen_sock, (struct sockaddr *)&un, addr_len) == -1) { ++ fuse_log(FUSE_LOG_ERR, "vhost socket bind: %m\n"); ++ umask(old_umask); ++ return -1; ++ } ++ umask(old_umask); ++ ++ if (listen(listen_sock, 1) == -1) { ++ fuse_log(FUSE_LOG_ERR, "vhost socket listen: %m\n"); ++ return -1; ++ } ++ ++ return -1; ++} +diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h +new file mode 100644 +index 0000000..8f2edb6 +--- /dev/null ++++ b/tools/virtiofsd/fuse_virtio.h +@@ -0,0 +1,23 @@ ++/* ++ * virtio-fs glue for FUSE ++ * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates ++ * ++ * Authors: ++ * Dave Gilbert ++ * ++ * Implements the glue between libfuse and libvhost-user ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ ++ ++#ifndef FUSE_VIRTIO_H ++#define FUSE_VIRTIO_H ++ ++#include "fuse_i.h" ++ ++struct fuse_session; ++ ++int virtio_session_mount(struct fuse_session *se); ++ ++#endif +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch b/SOURCES/kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch new file mode 100644 index 0000000..8d8de78 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch @@ -0,0 +1,76 @@ +From ade3dcad8a907d281549b341a8908851e36ba458 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:31 +0100 +Subject: [PATCH 060/116] virtiofsd: Parse flag FUSE_WRITE_KILL_PRIV +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-57-dgilbert@redhat.com> +Patchwork-id: 93505 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 056/112] virtiofsd: Parse flag FUSE_WRITE_KILL_PRIV +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Vivek Goyal + +Caller can set FUSE_WRITE_KILL_PRIV in write_flags. Parse it and pass it +to the filesystem. + +Signed-off-by: Vivek Goyal +Reviewed-by: Misono Tomohiro +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit f779bc5265e7e7abb13a03d4bfbc74151afc15c2) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_common.h | 6 +++++- + tools/virtiofsd/fuse_lowlevel.c | 4 +++- + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h +index f8f6433..686c42c 100644 +--- a/tools/virtiofsd/fuse_common.h ++++ b/tools/virtiofsd/fuse_common.h +@@ -93,8 +93,12 @@ struct fuse_file_info { + */ + unsigned int cache_readdir:1; + ++ /* Indicates that suid/sgid bits should be removed upon write */ ++ unsigned int kill_priv:1; ++ ++ + /** Padding. Reserved for future use*/ +- unsigned int padding:25; ++ unsigned int padding:24; + unsigned int padding2:32; + + /* +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 02e1d83..2d6dc5a 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1142,6 +1142,7 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; ++ fi.kill_priv = !!(arg->write_flags & FUSE_WRITE_KILL_PRIV); + + fi.lock_owner = arg->lock_owner; + fi.flags = arg->flags; +@@ -1177,7 +1178,8 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, + fi.lock_owner = arg->lock_owner; + fi.flags = arg->flags; + fi.fh = arg->fh; +- fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; ++ fi.writepage = !!(arg->write_flags & FUSE_WRITE_CACHE); ++ fi.kill_priv = !!(arg->write_flags & FUSE_WRITE_KILL_PRIV); + + if (ibufv->count == 1) { + assert(!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch b/SOURCES/kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch new file mode 100644 index 0000000..7d095c9 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch @@ -0,0 +1,140 @@ +From d5986c804f05070a07dfe702f7c66357daaa1ab6 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:20 +0100 +Subject: [PATCH 049/116] virtiofsd: Pass write iov's all the way through +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-46-dgilbert@redhat.com> +Patchwork-id: 93497 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 045/112] virtiofsd: Pass write iov's all the way through +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Pass the write iov pointing to guest RAM all the way through rather +than copying the data. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Xiao Yang +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit e17f7a580e2c599330ad3a6946be615ca2fe97d9) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 79 +++++++++++++++++++++++++++++++++++++++---- + 1 file changed, 73 insertions(+), 6 deletions(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index fd588a4..872968f 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -454,6 +454,10 @@ static void *fv_queue_thread(void *opaque) + __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); + + while (1) { ++ bool allocated_bufv = false; ++ struct fuse_bufvec bufv; ++ struct fuse_bufvec *pbufv; ++ + /* + * An element contains one request and the space to send our + * response They're spread over multiple descriptors in a +@@ -495,14 +499,76 @@ static void *fv_queue_thread(void *opaque) + __func__, elem->index); + assert(0); /* TODO */ + } +- copy_from_iov(&fbuf, out_num, out_sg); +- fbuf.size = out_len; ++ /* Copy just the first element and look at it */ ++ copy_from_iov(&fbuf, 1, out_sg); ++ ++ if (out_num > 2 && ++ out_sg[0].iov_len == sizeof(struct fuse_in_header) && ++ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && ++ out_sg[1].iov_len == sizeof(struct fuse_write_in)) { ++ /* ++ * For a write we don't actually need to copy the ++ * data, we can just do it straight out of guest memory ++ * but we must still copy the headers in case the guest ++ * was nasty and changed them while we were using them. ++ */ ++ fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__); ++ ++ /* copy the fuse_write_in header after the fuse_in_header */ ++ fbuf.mem += out_sg->iov_len; ++ copy_from_iov(&fbuf, 1, out_sg + 1); ++ fbuf.mem -= out_sg->iov_len; ++ fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; ++ ++ /* Allocate the bufv, with space for the rest of the iov */ ++ allocated_bufv = true; ++ pbufv = malloc(sizeof(struct fuse_bufvec) + ++ sizeof(struct fuse_buf) * (out_num - 2)); ++ if (!pbufv) { ++ vu_queue_unpop(dev, q, elem, 0); ++ free(elem); ++ fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", ++ __func__); ++ goto out; ++ } ++ ++ pbufv->count = 1; ++ pbufv->buf[0] = fbuf; ++ ++ size_t iovindex, pbufvindex; ++ iovindex = 2; /* 2 headers, separate iovs */ ++ pbufvindex = 1; /* 2 headers, 1 fusebuf */ ++ ++ for (; iovindex < out_num; iovindex++, pbufvindex++) { ++ pbufv->count++; ++ pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ ++ pbufv->buf[pbufvindex].flags = 0; ++ pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; ++ pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; ++ } ++ } else { ++ /* Normal (non fast write) path */ ++ ++ /* Copy the rest of the buffer */ ++ fbuf.mem += out_sg->iov_len; ++ copy_from_iov(&fbuf, out_num - 1, out_sg + 1); ++ fbuf.mem -= out_sg->iov_len; ++ fbuf.size = out_len; + +- /* TODO! Endianness of header */ ++ /* TODO! Endianness of header */ + +- /* TODO: Add checks for fuse_session_exited */ +- struct fuse_bufvec bufv = { .buf[0] = fbuf, .count = 1 }; +- fuse_session_process_buf_int(se, &bufv, &ch); ++ /* TODO: Add checks for fuse_session_exited */ ++ bufv.buf[0] = fbuf; ++ bufv.count = 1; ++ pbufv = &bufv; ++ } ++ pbufv->idx = 0; ++ pbufv->off = 0; ++ fuse_session_process_buf_int(se, pbufv, &ch); ++ ++ if (allocated_bufv) { ++ free(pbufv); ++ } + + if (!qi->reply_sent) { + fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", +@@ -516,6 +582,7 @@ static void *fv_queue_thread(void *opaque) + elem = NULL; + } + } ++out: + pthread_mutex_destroy(&ch.lock); + free(fbuf.mem); + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch b/SOURCES/kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch new file mode 100644 index 0000000..834ced1 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch @@ -0,0 +1,168 @@ +From 9e4320eec5204da851ac95fb7a7e6520c9ccee7d Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:19 +0100 +Subject: [PATCH 048/116] virtiofsd: Plumb fuse_bufvec through to do_write_buf +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-45-dgilbert@redhat.com> +Patchwork-id: 93499 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 044/112] virtiofsd: Plumb fuse_bufvec through to do_write_buf +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Let fuse_session_process_buf_int take a fuse_bufvec * instead of a +fuse_buf; and then through to do_write_buf - where in the best +case it can pass that straight through to op.write_buf without copying +(other than skipping a header). + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 469f9d2fc405b0508e6cf1b4b5bbcadfc82064e5) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 2 +- + tools/virtiofsd/fuse_lowlevel.c | 61 +++++++++++++++++++++++++++-------------- + tools/virtiofsd/fuse_virtio.c | 3 +- + 3 files changed, 44 insertions(+), 22 deletions(-) + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index 45995f3..a20854f 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -100,7 +100,7 @@ int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, + void fuse_free_req(fuse_req_t req); + + void fuse_session_process_buf_int(struct fuse_session *se, +- const struct fuse_buf *buf, ++ struct fuse_bufvec *bufv, + struct fuse_chan *ch); + + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 95f4db8..7e10995 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1004,11 +1004,12 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + + static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, +- const struct fuse_buf *ibuf) ++ struct fuse_bufvec *ibufv) + { + struct fuse_session *se = req->se; +- struct fuse_bufvec bufv = { +- .buf[0] = *ibuf, ++ struct fuse_bufvec *pbufv = ibufv; ++ struct fuse_bufvec tmpbufv = { ++ .buf[0] = ibufv->buf[0], + .count = 1, + }; + struct fuse_write_in *arg = (struct fuse_write_in *)inarg; +@@ -1018,22 +1019,31 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, + fi.fh = arg->fh; + fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; + +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { +- bufv.buf[0].mem = PARAM(arg); +- } +- +- bufv.buf[0].size -= +- sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); +- if (bufv.buf[0].size < arg->size) { +- fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); +- fuse_reply_err(req, EIO); +- return; ++ if (ibufv->count == 1) { ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ if (!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)) { ++ tmpbufv.buf[0].mem = PARAM(arg); ++ } ++ tmpbufv.buf[0].size -= ++ sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); ++ if (tmpbufv.buf[0].size < arg->size) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: do_write_buf: buffer size too small\n"); ++ fuse_reply_err(req, EIO); ++ return; ++ } ++ tmpbufv.buf[0].size = arg->size; ++ pbufv = &tmpbufv; ++ } else { ++ /* ++ * Input bufv contains the headers in the first element ++ * and the data in the rest, we need to skip that first element ++ */ ++ ibufv->buf[0].size = 0; + } +- bufv.buf[0].size = arg->size; + +- se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); ++ se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi); + } + + static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +@@ -2024,13 +2034,24 @@ static const char *opname(enum fuse_opcode opcode) + void fuse_session_process_buf(struct fuse_session *se, + const struct fuse_buf *buf) + { +- fuse_session_process_buf_int(se, buf, NULL); ++ struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 }; ++ fuse_session_process_buf_int(se, &bufv, NULL); + } + ++/* ++ * Restriction: ++ * bufv is normally a single entry buffer, except for a write ++ * where (if it's in memory) then the bufv may be multiple entries, ++ * where the first entry contains all headers and subsequent entries ++ * contain data ++ * bufv shall not use any offsets etc to make the data anything ++ * other than contiguous starting from 0. ++ */ + void fuse_session_process_buf_int(struct fuse_session *se, +- const struct fuse_buf *buf, ++ struct fuse_bufvec *bufv, + struct fuse_chan *ch) + { ++ const struct fuse_buf *buf = bufv->buf; + struct fuse_in_header *in; + const void *inarg; + struct fuse_req *req; +@@ -2108,7 +2129,7 @@ void fuse_session_process_buf_int(struct fuse_session *se, + + inarg = (void *)&in[1]; + if (in->opcode == FUSE_WRITE && se->op.write_buf) { +- do_write_buf(req, in->nodeid, inarg, buf); ++ do_write_buf(req, in->nodeid, inarg, bufv); + } else { + fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); + } +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 635f877..fd588a4 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -501,7 +501,8 @@ static void *fv_queue_thread(void *opaque) + /* TODO! Endianness of header */ + + /* TODO: Add checks for fuse_session_exited */ +- fuse_session_process_buf_int(se, &fbuf, &ch); ++ struct fuse_bufvec bufv = { .buf[0] = fbuf, .count = 1 }; ++ fuse_session_process_buf_int(se, &bufv, &ch); + + if (!qi->reply_sent) { + fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Poll-kick_fd-for-queue.patch b/SOURCES/kvm-virtiofsd-Poll-kick_fd-for-queue.patch new file mode 100644 index 0000000..d7c6c0a --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Poll-kick_fd-for-queue.patch @@ -0,0 +1,97 @@ +From 083b944fac29bc3115a19eb38e176f6b23f04938 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:01 +0100 +Subject: [PATCH 030/116] virtiofsd: Poll kick_fd for queue +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-27-dgilbert@redhat.com> +Patchwork-id: 93483 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 026/112] virtiofsd: Poll kick_fd for queue +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +In the queue thread poll the kick_fd we're passed. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 5dcd1f56141378226d33dc3df68ec57913e0aa04) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 40 +++++++++++++++++++++++++++++++++++++++- + 1 file changed, 39 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 2a94bb3..05e7258 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -24,6 +24,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -100,13 +101,50 @@ static void fv_panic(VuDev *dev, const char *err) + exit(EXIT_FAILURE); + } + ++/* Thread function for individual queues, created when a queue is 'started' */ + static void *fv_queue_thread(void *opaque) + { + struct fv_QueueInfo *qi = opaque; + fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, + qi->qidx, qi->kick_fd); + while (1) { +- /* TODO */ ++ struct pollfd pf[1]; ++ pf[0].fd = qi->kick_fd; ++ pf[0].events = POLLIN; ++ pf[0].revents = 0; ++ ++ fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for Queue %d event\n", __func__, ++ qi->qidx); ++ int poll_res = ppoll(pf, 1, NULL, NULL); ++ ++ if (poll_res == -1) { ++ if (errno == EINTR) { ++ fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n", ++ __func__); ++ continue; ++ } ++ fuse_log(FUSE_LOG_ERR, "fv_queue_thread ppoll: %m\n"); ++ break; ++ } ++ assert(poll_res == 1); ++ if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) { ++ fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x Queue %d\n", ++ __func__, pf[0].revents, qi->qidx); ++ break; ++ } ++ assert(pf[0].revents & POLLIN); ++ fuse_log(FUSE_LOG_DEBUG, "%s: Got queue event on Queue %d\n", __func__, ++ qi->qidx); ++ ++ eventfd_t evalue; ++ if (eventfd_read(qi->kick_fd, &evalue)) { ++ fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n"); ++ break; ++ } ++ if (qi->virtio_dev->se->debug) { ++ fprintf(stderr, "%s: Queue %d gave evalue: %zx\n", __func__, ++ qi->qidx, (size_t)evalue); ++ } + } + + return NULL; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch b/SOURCES/kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch new file mode 100644 index 0000000..d4e1ea1 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch @@ -0,0 +1,144 @@ +From ab336e3aea97d76c1b2ac725d19b4518f47dd8f0 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:59 +0100 +Subject: [PATCH 088/116] virtiofsd: Prevent multiply running with same + vhost_user_socket +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-85-dgilbert@redhat.com> +Patchwork-id: 93541 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 084/112] virtiofsd: Prevent multiply running with same vhost_user_socket +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Masayoshi Mizuma + +virtiofsd can run multiply even if the vhost_user_socket is same path. + + ]# ./virtiofsd -o vhost_user_socket=/tmp/vhostqemu -o source=/tmp/share & + [1] 244965 + virtio_session_mount: Waiting for vhost-user socket connection... + ]# ./virtiofsd -o vhost_user_socket=/tmp/vhostqemu -o source=/tmp/share & + [2] 244966 + virtio_session_mount: Waiting for vhost-user socket connection... + ]# + +The user will get confused about the situation and maybe the cause of the +unexpected problem. So it's better to prevent the multiple running. + +Create a regular file under localstatedir directory to exclude the +vhost_user_socket. To create and lock the file, use qemu_write_pidfile() +because the API has some sanity checks and file lock. + +Signed-off-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert + Applied fixes from Stefan's review and moved osdep include +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 96814800d2b49d18737c36e021c387697ec40c62) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 1 + + tools/virtiofsd/fuse_virtio.c | 49 ++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 49 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 440508a..aac282f 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -18,6 +18,7 @@ + + #include + #include ++#include + #include + #include + #include +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index e7bd772..b7948de 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -13,11 +13,12 @@ + + #include "qemu/osdep.h" + #include "qemu/iov.h" +-#include "fuse_virtio.h" ++#include "qapi/error.h" + #include "fuse_i.h" + #include "standard-headers/linux/fuse.h" + #include "fuse_misc.h" + #include "fuse_opt.h" ++#include "fuse_virtio.h" + + #include + #include +@@ -743,6 +744,42 @@ int virtio_loop(struct fuse_session *se) + return 0; + } + ++static void strreplace(char *s, char old, char new) ++{ ++ for (; *s; ++s) { ++ if (*s == old) { ++ *s = new; ++ } ++ } ++} ++ ++static bool fv_socket_lock(struct fuse_session *se) ++{ ++ g_autofree gchar *sk_name = NULL; ++ g_autofree gchar *pidfile = NULL; ++ g_autofree gchar *dir = NULL; ++ Error *local_err = NULL; ++ ++ dir = qemu_get_local_state_pathname("run/virtiofsd"); ++ ++ if (g_mkdir_with_parents(dir, S_IRWXU) < 0) { ++ fuse_log(FUSE_LOG_ERR, "%s: Failed to create directory %s: %s", ++ __func__, dir, strerror(errno)); ++ return false; ++ } ++ ++ sk_name = g_strdup(se->vu_socket_path); ++ strreplace(sk_name, '/', '.'); ++ pidfile = g_strdup_printf("%s/%s.pid", dir, sk_name); ++ ++ if (!qemu_write_pidfile(pidfile, &local_err)) { ++ error_report_err(local_err); ++ return false; ++ } ++ ++ return true; ++} ++ + static int fv_create_listen_socket(struct fuse_session *se) + { + struct sockaddr_un un; +@@ -758,6 +795,16 @@ static int fv_create_listen_socket(struct fuse_session *se) + return -1; + } + ++ if (!strlen(se->vu_socket_path)) { ++ fuse_log(FUSE_LOG_ERR, "Socket path is empty\n"); ++ return -1; ++ } ++ ++ /* Check the vu_socket_path is already used */ ++ if (!fv_socket_lock(se)) { ++ return -1; ++ } ++ + /* + * Create the Unix socket to communicate with qemu + * based on QEMU's vhost-user-bridge +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch b/SOURCES/kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch new file mode 100644 index 0000000..f30f23a --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch @@ -0,0 +1,945 @@ +From e7c1ad608117b21f80c762f5505a66b21c56e9d3 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:40 +0100 +Subject: [PATCH 009/116] virtiofsd: Pull in kernel's fuse.h +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-6-dgilbert@redhat.com> +Patchwork-id: 93460 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 005/112] virtiofsd: Pull in kernel's fuse.h +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Update scripts/update-linux-headers.sh to add fuse.h and +use it to pull in fuse.h from the kernel; from v5.5-rc1 + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit a62a9e192bc5f0aa0bc076b51db5a069add87c78) +Signed-off-by: Miroslav Rezanina +--- + include/standard-headers/linux/fuse.h | 891 ++++++++++++++++++++++++++++++++++ + scripts/update-linux-headers.sh | 1 + + 2 files changed, 892 insertions(+) + create mode 100644 include/standard-headers/linux/fuse.h + +diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h +new file mode 100644 +index 0000000..f4df0a4 +--- /dev/null ++++ b/include/standard-headers/linux/fuse.h +@@ -0,0 +1,891 @@ ++/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ ++/* ++ This file defines the kernel interface of FUSE ++ Copyright (C) 2001-2008 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU GPL. ++ See the file COPYING. ++ ++ This -- and only this -- header file may also be distributed under ++ the terms of the BSD Licence as follows: ++ ++ Copyright (C) 2001-2007 Miklos Szeredi. All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ 1. Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ 2. Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ ++ THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ++ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE ++ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ SUCH DAMAGE. ++*/ ++ ++/* ++ * This file defines the kernel interface of FUSE ++ * ++ * Protocol changelog: ++ * ++ * 7.1: ++ * - add the following messages: ++ * FUSE_SETATTR, FUSE_SYMLINK, FUSE_MKNOD, FUSE_MKDIR, FUSE_UNLINK, ++ * FUSE_RMDIR, FUSE_RENAME, FUSE_LINK, FUSE_OPEN, FUSE_READ, FUSE_WRITE, ++ * FUSE_RELEASE, FUSE_FSYNC, FUSE_FLUSH, FUSE_SETXATTR, FUSE_GETXATTR, ++ * FUSE_LISTXATTR, FUSE_REMOVEXATTR, FUSE_OPENDIR, FUSE_READDIR, ++ * FUSE_RELEASEDIR ++ * - add padding to messages to accommodate 32-bit servers on 64-bit kernels ++ * ++ * 7.2: ++ * - add FOPEN_DIRECT_IO and FOPEN_KEEP_CACHE flags ++ * - add FUSE_FSYNCDIR message ++ * ++ * 7.3: ++ * - add FUSE_ACCESS message ++ * - add FUSE_CREATE message ++ * - add filehandle to fuse_setattr_in ++ * ++ * 7.4: ++ * - add frsize to fuse_kstatfs ++ * - clean up request size limit checking ++ * ++ * 7.5: ++ * - add flags and max_write to fuse_init_out ++ * ++ * 7.6: ++ * - add max_readahead to fuse_init_in and fuse_init_out ++ * ++ * 7.7: ++ * - add FUSE_INTERRUPT message ++ * - add POSIX file lock support ++ * ++ * 7.8: ++ * - add lock_owner and flags fields to fuse_release_in ++ * - add FUSE_BMAP message ++ * - add FUSE_DESTROY message ++ * ++ * 7.9: ++ * - new fuse_getattr_in input argument of GETATTR ++ * - add lk_flags in fuse_lk_in ++ * - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in ++ * - add blksize field to fuse_attr ++ * - add file flags field to fuse_read_in and fuse_write_in ++ * - Add ATIME_NOW and MTIME_NOW flags to fuse_setattr_in ++ * ++ * 7.10 ++ * - add nonseekable open flag ++ * ++ * 7.11 ++ * - add IOCTL message ++ * - add unsolicited notification support ++ * - add POLL message and NOTIFY_POLL notification ++ * ++ * 7.12 ++ * - add umask flag to input argument of create, mknod and mkdir ++ * - add notification messages for invalidation of inodes and ++ * directory entries ++ * ++ * 7.13 ++ * - make max number of background requests and congestion threshold ++ * tunables ++ * ++ * 7.14 ++ * - add splice support to fuse device ++ * ++ * 7.15 ++ * - add store notify ++ * - add retrieve notify ++ * ++ * 7.16 ++ * - add BATCH_FORGET request ++ * - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct ++ * fuse_ioctl_iovec' instead of ambiguous 'struct iovec' ++ * - add FUSE_IOCTL_32BIT flag ++ * ++ * 7.17 ++ * - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK ++ * ++ * 7.18 ++ * - add FUSE_IOCTL_DIR flag ++ * - add FUSE_NOTIFY_DELETE ++ * ++ * 7.19 ++ * - add FUSE_FALLOCATE ++ * ++ * 7.20 ++ * - add FUSE_AUTO_INVAL_DATA ++ * ++ * 7.21 ++ * - add FUSE_READDIRPLUS ++ * - send the requested events in POLL request ++ * ++ * 7.22 ++ * - add FUSE_ASYNC_DIO ++ * ++ * 7.23 ++ * - add FUSE_WRITEBACK_CACHE ++ * - add time_gran to fuse_init_out ++ * - add reserved space to fuse_init_out ++ * - add FATTR_CTIME ++ * - add ctime and ctimensec to fuse_setattr_in ++ * - add FUSE_RENAME2 request ++ * - add FUSE_NO_OPEN_SUPPORT flag ++ * ++ * 7.24 ++ * - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support ++ * ++ * 7.25 ++ * - add FUSE_PARALLEL_DIROPS ++ * ++ * 7.26 ++ * - add FUSE_HANDLE_KILLPRIV ++ * - add FUSE_POSIX_ACL ++ * ++ * 7.27 ++ * - add FUSE_ABORT_ERROR ++ * ++ * 7.28 ++ * - add FUSE_COPY_FILE_RANGE ++ * - add FOPEN_CACHE_DIR ++ * - add FUSE_MAX_PAGES, add max_pages to init_out ++ * - add FUSE_CACHE_SYMLINKS ++ * ++ * 7.29 ++ * - add FUSE_NO_OPENDIR_SUPPORT flag ++ * ++ * 7.30 ++ * - add FUSE_EXPLICIT_INVAL_DATA ++ * - add FUSE_IOCTL_COMPAT_X32 ++ * ++ * 7.31 ++ * - add FUSE_WRITE_KILL_PRIV flag ++ * - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING ++ * - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag ++ */ ++ ++#ifndef _LINUX_FUSE_H ++#define _LINUX_FUSE_H ++ ++#include ++ ++/* ++ * Version negotiation: ++ * ++ * Both the kernel and userspace send the version they support in the ++ * INIT request and reply respectively. ++ * ++ * If the major versions match then both shall use the smallest ++ * of the two minor versions for communication. ++ * ++ * If the kernel supports a larger major version, then userspace shall ++ * reply with the major version it supports, ignore the rest of the ++ * INIT message and expect a new INIT message from the kernel with a ++ * matching major version. ++ * ++ * If the library supports a larger major version, then it shall fall ++ * back to the major protocol version sent by the kernel for ++ * communication and reply with that major version (and an arbitrary ++ * supported minor version). ++ */ ++ ++/** Version number of this interface */ ++#define FUSE_KERNEL_VERSION 7 ++ ++/** Minor version number of this interface */ ++#define FUSE_KERNEL_MINOR_VERSION 31 ++ ++/** The node ID of the root inode */ ++#define FUSE_ROOT_ID 1 ++ ++/* Make sure all structures are padded to 64bit boundary, so 32bit ++ userspace works under 64bit kernels */ ++ ++struct fuse_attr { ++ uint64_t ino; ++ uint64_t size; ++ uint64_t blocks; ++ uint64_t atime; ++ uint64_t mtime; ++ uint64_t ctime; ++ uint32_t atimensec; ++ uint32_t mtimensec; ++ uint32_t ctimensec; ++ uint32_t mode; ++ uint32_t nlink; ++ uint32_t uid; ++ uint32_t gid; ++ uint32_t rdev; ++ uint32_t blksize; ++ uint32_t padding; ++}; ++ ++struct fuse_kstatfs { ++ uint64_t blocks; ++ uint64_t bfree; ++ uint64_t bavail; ++ uint64_t files; ++ uint64_t ffree; ++ uint32_t bsize; ++ uint32_t namelen; ++ uint32_t frsize; ++ uint32_t padding; ++ uint32_t spare[6]; ++}; ++ ++struct fuse_file_lock { ++ uint64_t start; ++ uint64_t end; ++ uint32_t type; ++ uint32_t pid; /* tgid */ ++}; ++ ++/** ++ * Bitmasks for fuse_setattr_in.valid ++ */ ++#define FATTR_MODE (1 << 0) ++#define FATTR_UID (1 << 1) ++#define FATTR_GID (1 << 2) ++#define FATTR_SIZE (1 << 3) ++#define FATTR_ATIME (1 << 4) ++#define FATTR_MTIME (1 << 5) ++#define FATTR_FH (1 << 6) ++#define FATTR_ATIME_NOW (1 << 7) ++#define FATTR_MTIME_NOW (1 << 8) ++#define FATTR_LOCKOWNER (1 << 9) ++#define FATTR_CTIME (1 << 10) ++ ++/** ++ * Flags returned by the OPEN request ++ * ++ * FOPEN_DIRECT_IO: bypass page cache for this open file ++ * FOPEN_KEEP_CACHE: don't invalidate the data cache on open ++ * FOPEN_NONSEEKABLE: the file is not seekable ++ * FOPEN_CACHE_DIR: allow caching this directory ++ * FOPEN_STREAM: the file is stream-like (no file position at all) ++ */ ++#define FOPEN_DIRECT_IO (1 << 0) ++#define FOPEN_KEEP_CACHE (1 << 1) ++#define FOPEN_NONSEEKABLE (1 << 2) ++#define FOPEN_CACHE_DIR (1 << 3) ++#define FOPEN_STREAM (1 << 4) ++ ++/** ++ * INIT request/reply flags ++ * ++ * FUSE_ASYNC_READ: asynchronous read requests ++ * FUSE_POSIX_LOCKS: remote locking for POSIX file locks ++ * FUSE_FILE_OPS: kernel sends file handle for fstat, etc... (not yet supported) ++ * FUSE_ATOMIC_O_TRUNC: handles the O_TRUNC open flag in the filesystem ++ * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".." ++ * FUSE_BIG_WRITES: filesystem can handle write size larger than 4kB ++ * FUSE_DONT_MASK: don't apply umask to file mode on create operations ++ * FUSE_SPLICE_WRITE: kernel supports splice write on the device ++ * FUSE_SPLICE_MOVE: kernel supports splice move on the device ++ * FUSE_SPLICE_READ: kernel supports splice read on the device ++ * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks ++ * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories ++ * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages ++ * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one) ++ * FUSE_READDIRPLUS_AUTO: adaptive readdirplus ++ * FUSE_ASYNC_DIO: asynchronous direct I/O submission ++ * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes ++ * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens ++ * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir ++ * FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc ++ * FUSE_POSIX_ACL: filesystem supports posix acls ++ * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED ++ * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages ++ * FUSE_CACHE_SYMLINKS: cache READLINK responses ++ * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir ++ * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request ++ * FUSE_MAP_ALIGNMENT: map_alignment field is valid ++ */ ++#define FUSE_ASYNC_READ (1 << 0) ++#define FUSE_POSIX_LOCKS (1 << 1) ++#define FUSE_FILE_OPS (1 << 2) ++#define FUSE_ATOMIC_O_TRUNC (1 << 3) ++#define FUSE_EXPORT_SUPPORT (1 << 4) ++#define FUSE_BIG_WRITES (1 << 5) ++#define FUSE_DONT_MASK (1 << 6) ++#define FUSE_SPLICE_WRITE (1 << 7) ++#define FUSE_SPLICE_MOVE (1 << 8) ++#define FUSE_SPLICE_READ (1 << 9) ++#define FUSE_FLOCK_LOCKS (1 << 10) ++#define FUSE_HAS_IOCTL_DIR (1 << 11) ++#define FUSE_AUTO_INVAL_DATA (1 << 12) ++#define FUSE_DO_READDIRPLUS (1 << 13) ++#define FUSE_READDIRPLUS_AUTO (1 << 14) ++#define FUSE_ASYNC_DIO (1 << 15) ++#define FUSE_WRITEBACK_CACHE (1 << 16) ++#define FUSE_NO_OPEN_SUPPORT (1 << 17) ++#define FUSE_PARALLEL_DIROPS (1 << 18) ++#define FUSE_HANDLE_KILLPRIV (1 << 19) ++#define FUSE_POSIX_ACL (1 << 20) ++#define FUSE_ABORT_ERROR (1 << 21) ++#define FUSE_MAX_PAGES (1 << 22) ++#define FUSE_CACHE_SYMLINKS (1 << 23) ++#define FUSE_NO_OPENDIR_SUPPORT (1 << 24) ++#define FUSE_EXPLICIT_INVAL_DATA (1 << 25) ++#define FUSE_MAP_ALIGNMENT (1 << 26) ++ ++/** ++ * CUSE INIT request/reply flags ++ * ++ * CUSE_UNRESTRICTED_IOCTL: use unrestricted ioctl ++ */ ++#define CUSE_UNRESTRICTED_IOCTL (1 << 0) ++ ++/** ++ * Release flags ++ */ ++#define FUSE_RELEASE_FLUSH (1 << 0) ++#define FUSE_RELEASE_FLOCK_UNLOCK (1 << 1) ++ ++/** ++ * Getattr flags ++ */ ++#define FUSE_GETATTR_FH (1 << 0) ++ ++/** ++ * Lock flags ++ */ ++#define FUSE_LK_FLOCK (1 << 0) ++ ++/** ++ * WRITE flags ++ * ++ * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed ++ * FUSE_WRITE_LOCKOWNER: lock_owner field is valid ++ * FUSE_WRITE_KILL_PRIV: kill suid and sgid bits ++ */ ++#define FUSE_WRITE_CACHE (1 << 0) ++#define FUSE_WRITE_LOCKOWNER (1 << 1) ++#define FUSE_WRITE_KILL_PRIV (1 << 2) ++ ++/** ++ * Read flags ++ */ ++#define FUSE_READ_LOCKOWNER (1 << 1) ++ ++/** ++ * Ioctl flags ++ * ++ * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine ++ * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed ++ * FUSE_IOCTL_RETRY: retry with new iovecs ++ * FUSE_IOCTL_32BIT: 32bit ioctl ++ * FUSE_IOCTL_DIR: is a directory ++ * FUSE_IOCTL_COMPAT_X32: x32 compat ioctl on 64bit machine (64bit time_t) ++ * ++ * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs ++ */ ++#define FUSE_IOCTL_COMPAT (1 << 0) ++#define FUSE_IOCTL_UNRESTRICTED (1 << 1) ++#define FUSE_IOCTL_RETRY (1 << 2) ++#define FUSE_IOCTL_32BIT (1 << 3) ++#define FUSE_IOCTL_DIR (1 << 4) ++#define FUSE_IOCTL_COMPAT_X32 (1 << 5) ++ ++#define FUSE_IOCTL_MAX_IOV 256 ++ ++/** ++ * Poll flags ++ * ++ * FUSE_POLL_SCHEDULE_NOTIFY: request poll notify ++ */ ++#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0) ++ ++/** ++ * Fsync flags ++ * ++ * FUSE_FSYNC_FDATASYNC: Sync data only, not metadata ++ */ ++#define FUSE_FSYNC_FDATASYNC (1 << 0) ++ ++enum fuse_opcode { ++ FUSE_LOOKUP = 1, ++ FUSE_FORGET = 2, /* no reply */ ++ FUSE_GETATTR = 3, ++ FUSE_SETATTR = 4, ++ FUSE_READLINK = 5, ++ FUSE_SYMLINK = 6, ++ FUSE_MKNOD = 8, ++ FUSE_MKDIR = 9, ++ FUSE_UNLINK = 10, ++ FUSE_RMDIR = 11, ++ FUSE_RENAME = 12, ++ FUSE_LINK = 13, ++ FUSE_OPEN = 14, ++ FUSE_READ = 15, ++ FUSE_WRITE = 16, ++ FUSE_STATFS = 17, ++ FUSE_RELEASE = 18, ++ FUSE_FSYNC = 20, ++ FUSE_SETXATTR = 21, ++ FUSE_GETXATTR = 22, ++ FUSE_LISTXATTR = 23, ++ FUSE_REMOVEXATTR = 24, ++ FUSE_FLUSH = 25, ++ FUSE_INIT = 26, ++ FUSE_OPENDIR = 27, ++ FUSE_READDIR = 28, ++ FUSE_RELEASEDIR = 29, ++ FUSE_FSYNCDIR = 30, ++ FUSE_GETLK = 31, ++ FUSE_SETLK = 32, ++ FUSE_SETLKW = 33, ++ FUSE_ACCESS = 34, ++ FUSE_CREATE = 35, ++ FUSE_INTERRUPT = 36, ++ FUSE_BMAP = 37, ++ FUSE_DESTROY = 38, ++ FUSE_IOCTL = 39, ++ FUSE_POLL = 40, ++ FUSE_NOTIFY_REPLY = 41, ++ FUSE_BATCH_FORGET = 42, ++ FUSE_FALLOCATE = 43, ++ FUSE_READDIRPLUS = 44, ++ FUSE_RENAME2 = 45, ++ FUSE_LSEEK = 46, ++ FUSE_COPY_FILE_RANGE = 47, ++ FUSE_SETUPMAPPING = 48, ++ FUSE_REMOVEMAPPING = 49, ++ ++ /* CUSE specific operations */ ++ CUSE_INIT = 4096, ++ ++ /* Reserved opcodes: helpful to detect structure endian-ness */ ++ CUSE_INIT_BSWAP_RESERVED = 1048576, /* CUSE_INIT << 8 */ ++ FUSE_INIT_BSWAP_RESERVED = 436207616, /* FUSE_INIT << 24 */ ++}; ++ ++enum fuse_notify_code { ++ FUSE_NOTIFY_POLL = 1, ++ FUSE_NOTIFY_INVAL_INODE = 2, ++ FUSE_NOTIFY_INVAL_ENTRY = 3, ++ FUSE_NOTIFY_STORE = 4, ++ FUSE_NOTIFY_RETRIEVE = 5, ++ FUSE_NOTIFY_DELETE = 6, ++ FUSE_NOTIFY_CODE_MAX, ++}; ++ ++/* The read buffer is required to be at least 8k, but may be much larger */ ++#define FUSE_MIN_READ_BUFFER 8192 ++ ++#define FUSE_COMPAT_ENTRY_OUT_SIZE 120 ++ ++struct fuse_entry_out { ++ uint64_t nodeid; /* Inode ID */ ++ uint64_t generation; /* Inode generation: nodeid:gen must ++ be unique for the fs's lifetime */ ++ uint64_t entry_valid; /* Cache timeout for the name */ ++ uint64_t attr_valid; /* Cache timeout for the attributes */ ++ uint32_t entry_valid_nsec; ++ uint32_t attr_valid_nsec; ++ struct fuse_attr attr; ++}; ++ ++struct fuse_forget_in { ++ uint64_t nlookup; ++}; ++ ++struct fuse_forget_one { ++ uint64_t nodeid; ++ uint64_t nlookup; ++}; ++ ++struct fuse_batch_forget_in { ++ uint32_t count; ++ uint32_t dummy; ++}; ++ ++struct fuse_getattr_in { ++ uint32_t getattr_flags; ++ uint32_t dummy; ++ uint64_t fh; ++}; ++ ++#define FUSE_COMPAT_ATTR_OUT_SIZE 96 ++ ++struct fuse_attr_out { ++ uint64_t attr_valid; /* Cache timeout for the attributes */ ++ uint32_t attr_valid_nsec; ++ uint32_t dummy; ++ struct fuse_attr attr; ++}; ++ ++#define FUSE_COMPAT_MKNOD_IN_SIZE 8 ++ ++struct fuse_mknod_in { ++ uint32_t mode; ++ uint32_t rdev; ++ uint32_t umask; ++ uint32_t padding; ++}; ++ ++struct fuse_mkdir_in { ++ uint32_t mode; ++ uint32_t umask; ++}; ++ ++struct fuse_rename_in { ++ uint64_t newdir; ++}; ++ ++struct fuse_rename2_in { ++ uint64_t newdir; ++ uint32_t flags; ++ uint32_t padding; ++}; ++ ++struct fuse_link_in { ++ uint64_t oldnodeid; ++}; ++ ++struct fuse_setattr_in { ++ uint32_t valid; ++ uint32_t padding; ++ uint64_t fh; ++ uint64_t size; ++ uint64_t lock_owner; ++ uint64_t atime; ++ uint64_t mtime; ++ uint64_t ctime; ++ uint32_t atimensec; ++ uint32_t mtimensec; ++ uint32_t ctimensec; ++ uint32_t mode; ++ uint32_t unused4; ++ uint32_t uid; ++ uint32_t gid; ++ uint32_t unused5; ++}; ++ ++struct fuse_open_in { ++ uint32_t flags; ++ uint32_t unused; ++}; ++ ++struct fuse_create_in { ++ uint32_t flags; ++ uint32_t mode; ++ uint32_t umask; ++ uint32_t padding; ++}; ++ ++struct fuse_open_out { ++ uint64_t fh; ++ uint32_t open_flags; ++ uint32_t padding; ++}; ++ ++struct fuse_release_in { ++ uint64_t fh; ++ uint32_t flags; ++ uint32_t release_flags; ++ uint64_t lock_owner; ++}; ++ ++struct fuse_flush_in { ++ uint64_t fh; ++ uint32_t unused; ++ uint32_t padding; ++ uint64_t lock_owner; ++}; ++ ++struct fuse_read_in { ++ uint64_t fh; ++ uint64_t offset; ++ uint32_t size; ++ uint32_t read_flags; ++ uint64_t lock_owner; ++ uint32_t flags; ++ uint32_t padding; ++}; ++ ++#define FUSE_COMPAT_WRITE_IN_SIZE 24 ++ ++struct fuse_write_in { ++ uint64_t fh; ++ uint64_t offset; ++ uint32_t size; ++ uint32_t write_flags; ++ uint64_t lock_owner; ++ uint32_t flags; ++ uint32_t padding; ++}; ++ ++struct fuse_write_out { ++ uint32_t size; ++ uint32_t padding; ++}; ++ ++#define FUSE_COMPAT_STATFS_SIZE 48 ++ ++struct fuse_statfs_out { ++ struct fuse_kstatfs st; ++}; ++ ++struct fuse_fsync_in { ++ uint64_t fh; ++ uint32_t fsync_flags; ++ uint32_t padding; ++}; ++ ++struct fuse_setxattr_in { ++ uint32_t size; ++ uint32_t flags; ++}; ++ ++struct fuse_getxattr_in { ++ uint32_t size; ++ uint32_t padding; ++}; ++ ++struct fuse_getxattr_out { ++ uint32_t size; ++ uint32_t padding; ++}; ++ ++struct fuse_lk_in { ++ uint64_t fh; ++ uint64_t owner; ++ struct fuse_file_lock lk; ++ uint32_t lk_flags; ++ uint32_t padding; ++}; ++ ++struct fuse_lk_out { ++ struct fuse_file_lock lk; ++}; ++ ++struct fuse_access_in { ++ uint32_t mask; ++ uint32_t padding; ++}; ++ ++struct fuse_init_in { ++ uint32_t major; ++ uint32_t minor; ++ uint32_t max_readahead; ++ uint32_t flags; ++}; ++ ++#define FUSE_COMPAT_INIT_OUT_SIZE 8 ++#define FUSE_COMPAT_22_INIT_OUT_SIZE 24 ++ ++struct fuse_init_out { ++ uint32_t major; ++ uint32_t minor; ++ uint32_t max_readahead; ++ uint32_t flags; ++ uint16_t max_background; ++ uint16_t congestion_threshold; ++ uint32_t max_write; ++ uint32_t time_gran; ++ uint16_t max_pages; ++ uint16_t map_alignment; ++ uint32_t unused[8]; ++}; ++ ++#define CUSE_INIT_INFO_MAX 4096 ++ ++struct cuse_init_in { ++ uint32_t major; ++ uint32_t minor; ++ uint32_t unused; ++ uint32_t flags; ++}; ++ ++struct cuse_init_out { ++ uint32_t major; ++ uint32_t minor; ++ uint32_t unused; ++ uint32_t flags; ++ uint32_t max_read; ++ uint32_t max_write; ++ uint32_t dev_major; /* chardev major */ ++ uint32_t dev_minor; /* chardev minor */ ++ uint32_t spare[10]; ++}; ++ ++struct fuse_interrupt_in { ++ uint64_t unique; ++}; ++ ++struct fuse_bmap_in { ++ uint64_t block; ++ uint32_t blocksize; ++ uint32_t padding; ++}; ++ ++struct fuse_bmap_out { ++ uint64_t block; ++}; ++ ++struct fuse_ioctl_in { ++ uint64_t fh; ++ uint32_t flags; ++ uint32_t cmd; ++ uint64_t arg; ++ uint32_t in_size; ++ uint32_t out_size; ++}; ++ ++struct fuse_ioctl_iovec { ++ uint64_t base; ++ uint64_t len; ++}; ++ ++struct fuse_ioctl_out { ++ int32_t result; ++ uint32_t flags; ++ uint32_t in_iovs; ++ uint32_t out_iovs; ++}; ++ ++struct fuse_poll_in { ++ uint64_t fh; ++ uint64_t kh; ++ uint32_t flags; ++ uint32_t events; ++}; ++ ++struct fuse_poll_out { ++ uint32_t revents; ++ uint32_t padding; ++}; ++ ++struct fuse_notify_poll_wakeup_out { ++ uint64_t kh; ++}; ++ ++struct fuse_fallocate_in { ++ uint64_t fh; ++ uint64_t offset; ++ uint64_t length; ++ uint32_t mode; ++ uint32_t padding; ++}; ++ ++struct fuse_in_header { ++ uint32_t len; ++ uint32_t opcode; ++ uint64_t unique; ++ uint64_t nodeid; ++ uint32_t uid; ++ uint32_t gid; ++ uint32_t pid; ++ uint32_t padding; ++}; ++ ++struct fuse_out_header { ++ uint32_t len; ++ int32_t error; ++ uint64_t unique; ++}; ++ ++struct fuse_dirent { ++ uint64_t ino; ++ uint64_t off; ++ uint32_t namelen; ++ uint32_t type; ++ char name[]; ++}; ++ ++#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) ++#define FUSE_DIRENT_ALIGN(x) \ ++ (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1)) ++#define FUSE_DIRENT_SIZE(d) \ ++ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) ++ ++struct fuse_direntplus { ++ struct fuse_entry_out entry_out; ++ struct fuse_dirent dirent; ++}; ++ ++#define FUSE_NAME_OFFSET_DIRENTPLUS \ ++ offsetof(struct fuse_direntplus, dirent.name) ++#define FUSE_DIRENTPLUS_SIZE(d) \ ++ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen) ++ ++struct fuse_notify_inval_inode_out { ++ uint64_t ino; ++ int64_t off; ++ int64_t len; ++}; ++ ++struct fuse_notify_inval_entry_out { ++ uint64_t parent; ++ uint32_t namelen; ++ uint32_t padding; ++}; ++ ++struct fuse_notify_delete_out { ++ uint64_t parent; ++ uint64_t child; ++ uint32_t namelen; ++ uint32_t padding; ++}; ++ ++struct fuse_notify_store_out { ++ uint64_t nodeid; ++ uint64_t offset; ++ uint32_t size; ++ uint32_t padding; ++}; ++ ++struct fuse_notify_retrieve_out { ++ uint64_t notify_unique; ++ uint64_t nodeid; ++ uint64_t offset; ++ uint32_t size; ++ uint32_t padding; ++}; ++ ++/* Matches the size of fuse_write_in */ ++struct fuse_notify_retrieve_in { ++ uint64_t dummy1; ++ uint64_t offset; ++ uint32_t size; ++ uint32_t dummy2; ++ uint64_t dummy3; ++ uint64_t dummy4; ++}; ++ ++/* Device ioctls: */ ++#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t) ++ ++struct fuse_lseek_in { ++ uint64_t fh; ++ uint64_t offset; ++ uint32_t whence; ++ uint32_t padding; ++}; ++ ++struct fuse_lseek_out { ++ uint64_t offset; ++}; ++ ++struct fuse_copy_file_range_in { ++ uint64_t fh_in; ++ uint64_t off_in; ++ uint64_t nodeid_out; ++ uint64_t fh_out; ++ uint64_t off_out; ++ uint64_t len; ++ uint64_t flags; ++}; ++ ++#endif /* _LINUX_FUSE_H */ +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index f76d773..29c27f4 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -186,6 +186,7 @@ rm -rf "$output/include/standard-headers/linux" + mkdir -p "$output/include/standard-headers/linux" + for i in "$tmpdir"/include/linux/*virtio*.h \ + "$tmpdir/include/linux/qemu_fw_cfg.h" \ ++ "$tmpdir/include/linux/fuse.h" \ + "$tmpdir/include/linux/input.h" \ + "$tmpdir/include/linux/input-event-codes.h" \ + "$tmpdir/include/linux/pci_regs.h" \ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Pull-in-upstream-headers.patch b/SOURCES/kvm-virtiofsd-Pull-in-upstream-headers.patch new file mode 100644 index 0000000..78784fb --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Pull-in-upstream-headers.patch @@ -0,0 +1,4911 @@ +From 434b51e5c2fce756906dec4803900397bc98ad72 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:39 +0100 +Subject: [PATCH 008/116] virtiofsd: Pull in upstream headers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-5-dgilbert@redhat.com> +Patchwork-id: 93457 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 004/112] virtiofsd: Pull in upstream headers +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Pull in headers fromlibfuse's upstream fuse-3.8.0 + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit ee46c78901eb7fa78e328e04c0494ad6d207238b) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse.h | 1275 ++++++++++++++++++++ + tools/virtiofsd/fuse_common.h | 823 +++++++++++++ + tools/virtiofsd/fuse_i.h | 139 +++ + tools/virtiofsd/fuse_log.h | 82 ++ + tools/virtiofsd/fuse_lowlevel.h | 2089 +++++++++++++++++++++++++++++++++ + tools/virtiofsd/fuse_misc.h | 59 + + tools/virtiofsd/fuse_opt.h | 271 +++++ + tools/virtiofsd/passthrough_helpers.h | 76 ++ + 8 files changed, 4814 insertions(+) + create mode 100644 tools/virtiofsd/fuse.h + create mode 100644 tools/virtiofsd/fuse_common.h + create mode 100644 tools/virtiofsd/fuse_i.h + create mode 100644 tools/virtiofsd/fuse_log.h + create mode 100644 tools/virtiofsd/fuse_lowlevel.h + create mode 100644 tools/virtiofsd/fuse_misc.h + create mode 100644 tools/virtiofsd/fuse_opt.h + create mode 100644 tools/virtiofsd/passthrough_helpers.h + +diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h +new file mode 100644 +index 0000000..883f6e5 +--- /dev/null ++++ b/tools/virtiofsd/fuse.h +@@ -0,0 +1,1275 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB. ++*/ ++ ++#ifndef FUSE_H_ ++#define FUSE_H_ ++ ++/** @file ++ * ++ * This file defines the library interface of FUSE ++ * ++ * IMPORTANT: you should define FUSE_USE_VERSION before including this header. ++ */ ++ ++#include "fuse_common.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* ----------------------------------------------------------- * ++ * Basic FUSE API * ++ * ----------------------------------------------------------- */ ++ ++/** Handle for a FUSE filesystem */ ++struct fuse; ++ ++/** ++ * Readdir flags, passed to ->readdir() ++ */ ++enum fuse_readdir_flags { ++ /** ++ * "Plus" mode. ++ * ++ * The kernel wants to prefill the inode cache during readdir. The ++ * filesystem may honour this by filling in the attributes and setting ++ * FUSE_FILL_DIR_FLAGS for the filler function. The filesystem may also ++ * just ignore this flag completely. ++ */ ++ FUSE_READDIR_PLUS = (1 << 0), ++}; ++ ++enum fuse_fill_dir_flags { ++ /** ++ * "Plus" mode: all file attributes are valid ++ * ++ * The attributes are used by the kernel to prefill the inode cache ++ * during a readdir. ++ * ++ * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set ++ * and vice versa. ++ */ ++ FUSE_FILL_DIR_PLUS = (1 << 1), ++}; ++ ++/** Function to add an entry in a readdir() operation ++ * ++ * The *off* parameter can be any non-zero value that enables the ++ * filesystem to identify the current point in the directory ++ * stream. It does not need to be the actual physical position. A ++ * value of zero is reserved to indicate that seeking in directories ++ * is not supported. ++ * ++ * @param buf the buffer passed to the readdir() operation ++ * @param name the file name of the directory entry ++ * @param stat file attributes, can be NULL ++ * @param off offset of the next entry or zero ++ * @param flags fill flags ++ * @return 1 if buffer is full, zero otherwise ++ */ ++typedef int (*fuse_fill_dir_t) (void *buf, const char *name, ++ const struct stat *stbuf, off_t off, ++ enum fuse_fill_dir_flags flags); ++/** ++ * Configuration of the high-level API ++ * ++ * This structure is initialized from the arguments passed to ++ * fuse_new(), and then passed to the file system's init() handler ++ * which should ensure that the configuration is compatible with the ++ * file system implementation. ++ */ ++struct fuse_config { ++ /** ++ * If `set_gid` is non-zero, the st_gid attribute of each file ++ * is overwritten with the value of `gid`. ++ */ ++ int set_gid; ++ unsigned int gid; ++ ++ /** ++ * If `set_uid` is non-zero, the st_uid attribute of each file ++ * is overwritten with the value of `uid`. ++ */ ++ int set_uid; ++ unsigned int uid; ++ ++ /** ++ * If `set_mode` is non-zero, the any permissions bits set in ++ * `umask` are unset in the st_mode attribute of each file. ++ */ ++ int set_mode; ++ unsigned int umask; ++ ++ /** ++ * The timeout in seconds for which name lookups will be ++ * cached. ++ */ ++ double entry_timeout; ++ ++ /** ++ * The timeout in seconds for which a negative lookup will be ++ * cached. This means, that if file did not exist (lookup ++ * retuned ENOENT), the lookup will only be redone after the ++ * timeout, and the file/directory will be assumed to not ++ * exist until then. A value of zero means that negative ++ * lookups are not cached. ++ */ ++ double negative_timeout; ++ ++ /** ++ * The timeout in seconds for which file/directory attributes ++ * (as returned by e.g. the `getattr` handler) are cached. ++ */ ++ double attr_timeout; ++ ++ /** ++ * Allow requests to be interrupted ++ */ ++ int intr; ++ ++ /** ++ * Specify which signal number to send to the filesystem when ++ * a request is interrupted. The default is hardcoded to ++ * USR1. ++ */ ++ int intr_signal; ++ ++ /** ++ * Normally, FUSE assigns inodes to paths only for as long as ++ * the kernel is aware of them. With this option inodes are ++ * instead remembered for at least this many seconds. This ++ * will require more memory, but may be necessary when using ++ * applications that make use of inode numbers. ++ * ++ * A number of -1 means that inodes will be remembered for the ++ * entire life-time of the file-system process. ++ */ ++ int remember; ++ ++ /** ++ * The default behavior is that if an open file is deleted, ++ * the file is renamed to a hidden file (.fuse_hiddenXXX), and ++ * only removed when the file is finally released. This ++ * relieves the filesystem implementation of having to deal ++ * with this problem. This option disables the hiding ++ * behavior, and files are removed immediately in an unlink ++ * operation (or in a rename operation which overwrites an ++ * existing file). ++ * ++ * It is recommended that you not use the hard_remove ++ * option. When hard_remove is set, the following libc ++ * functions fail on unlinked files (returning errno of ++ * ENOENT): read(2), write(2), fsync(2), close(2), f*xattr(2), ++ * ftruncate(2), fstat(2), fchmod(2), fchown(2) ++ */ ++ int hard_remove; ++ ++ /** ++ * Honor the st_ino field in the functions getattr() and ++ * fill_dir(). This value is used to fill in the st_ino field ++ * in the stat(2), lstat(2), fstat(2) functions and the d_ino ++ * field in the readdir(2) function. The filesystem does not ++ * have to guarantee uniqueness, however some applications ++ * rely on this value being unique for the whole filesystem. ++ * ++ * Note that this does *not* affect the inode that libfuse ++ * and the kernel use internally (also called the "nodeid"). ++ */ ++ int use_ino; ++ ++ /** ++ * If use_ino option is not given, still try to fill in the ++ * d_ino field in readdir(2). If the name was previously ++ * looked up, and is still in the cache, the inode number ++ * found there will be used. Otherwise it will be set to -1. ++ * If use_ino option is given, this option is ignored. ++ */ ++ int readdir_ino; ++ ++ /** ++ * This option disables the use of page cache (file content cache) ++ * in the kernel for this filesystem. This has several affects: ++ * ++ * 1. Each read(2) or write(2) system call will initiate one ++ * or more read or write operations, data will not be ++ * cached in the kernel. ++ * ++ * 2. The return value of the read() and write() system calls ++ * will correspond to the return values of the read and ++ * write operations. This is useful for example if the ++ * file size is not known in advance (before reading it). ++ * ++ * Internally, enabling this option causes fuse to set the ++ * `direct_io` field of `struct fuse_file_info` - overwriting ++ * any value that was put there by the file system. ++ */ ++ int direct_io; ++ ++ /** ++ * This option disables flushing the cache of the file ++ * contents on every open(2). This should only be enabled on ++ * filesystems where the file data is never changed ++ * externally (not through the mounted FUSE filesystem). Thus ++ * it is not suitable for network filesystems and other ++ * intermediate filesystems. ++ * ++ * NOTE: if this option is not specified (and neither ++ * direct_io) data is still cached after the open(2), so a ++ * read(2) system call will not always initiate a read ++ * operation. ++ * ++ * Internally, enabling this option causes fuse to set the ++ * `keep_cache` field of `struct fuse_file_info` - overwriting ++ * any value that was put there by the file system. ++ */ ++ int kernel_cache; ++ ++ /** ++ * This option is an alternative to `kernel_cache`. Instead of ++ * unconditionally keeping cached data, the cached data is ++ * invalidated on open(2) if if the modification time or the ++ * size of the file has changed since it was last opened. ++ */ ++ int auto_cache; ++ ++ /** ++ * The timeout in seconds for which file attributes are cached ++ * for the purpose of checking if auto_cache should flush the ++ * file data on open. ++ */ ++ int ac_attr_timeout_set; ++ double ac_attr_timeout; ++ ++ /** ++ * If this option is given the file-system handlers for the ++ * following operations will not receive path information: ++ * read, write, flush, release, fsync, readdir, releasedir, ++ * fsyncdir, lock, ioctl and poll. ++ * ++ * For the truncate, getattr, chmod, chown and utimens ++ * operations the path will be provided only if the struct ++ * fuse_file_info argument is NULL. ++ */ ++ int nullpath_ok; ++ ++ /** ++ * The remaining options are used by libfuse internally and ++ * should not be touched. ++ */ ++ int show_help; ++ char *modules; ++ int debug; ++}; ++ ++ ++/** ++ * The file system operations: ++ * ++ * Most of these should work very similarly to the well known UNIX ++ * file system operations. A major exception is that instead of ++ * returning an error in 'errno', the operation should return the ++ * negated error value (-errno) directly. ++ * ++ * All methods are optional, but some are essential for a useful ++ * filesystem (e.g. getattr). Open, flush, release, fsync, opendir, ++ * releasedir, fsyncdir, access, create, truncate, lock, init and ++ * destroy are special purpose methods, without which a full featured ++ * filesystem can still be implemented. ++ * ++ * In general, all methods are expected to perform any necessary ++ * permission checking. However, a filesystem may delegate this task ++ * to the kernel by passing the `default_permissions` mount option to ++ * `fuse_new()`. In this case, methods will only be called if ++ * the kernel's permission check has succeeded. ++ * ++ * Almost all operations take a path which can be of any length. ++ */ ++struct fuse_operations { ++ /** Get file attributes. ++ * ++ * Similar to stat(). The 'st_dev' and 'st_blksize' fields are ++ * ignored. The 'st_ino' field is ignored except if the 'use_ino' ++ * mount option is given. In that case it is passed to userspace, ++ * but libfuse and the kernel will still assign a different ++ * inode for internal use (called the "nodeid"). ++ * ++ * `fi` will always be NULL if the file is not currently open, but ++ * may also be NULL if the file is open. ++ */ ++ int (*getattr) (const char *, struct stat *, struct fuse_file_info *fi); ++ ++ /** Read the target of a symbolic link ++ * ++ * The buffer should be filled with a null terminated string. The ++ * buffer size argument includes the space for the terminating ++ * null character. If the linkname is too long to fit in the ++ * buffer, it should be truncated. The return value should be 0 ++ * for success. ++ */ ++ int (*readlink) (const char *, char *, size_t); ++ ++ /** Create a file node ++ * ++ * This is called for creation of all non-directory, non-symlink ++ * nodes. If the filesystem defines a create() method, then for ++ * regular files that will be called instead. ++ */ ++ int (*mknod) (const char *, mode_t, dev_t); ++ ++ /** Create a directory ++ * ++ * Note that the mode argument may not have the type specification ++ * bits set, i.e. S_ISDIR(mode) can be false. To obtain the ++ * correct directory type bits use mode|S_IFDIR ++ * */ ++ int (*mkdir) (const char *, mode_t); ++ ++ /** Remove a file */ ++ int (*unlink) (const char *); ++ ++ /** Remove a directory */ ++ int (*rmdir) (const char *); ++ ++ /** Create a symbolic link */ ++ int (*symlink) (const char *, const char *); ++ ++ /** Rename a file ++ * ++ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If ++ * RENAME_NOREPLACE is specified, the filesystem must not ++ * overwrite *newname* if it exists and return an error ++ * instead. If `RENAME_EXCHANGE` is specified, the filesystem ++ * must atomically exchange the two files, i.e. both must ++ * exist and neither may be deleted. ++ */ ++ int (*rename) (const char *, const char *, unsigned int flags); ++ ++ /** Create a hard link to a file */ ++ int (*link) (const char *, const char *); ++ ++ /** Change the permission bits of a file ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ */ ++ int (*chmod) (const char *, mode_t, struct fuse_file_info *fi); ++ ++ /** Change the owner and group of a file ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*chown) (const char *, uid_t, gid_t, struct fuse_file_info *fi); ++ ++ /** Change the size of a file ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*truncate) (const char *, off_t, struct fuse_file_info *fi); ++ ++ /** Open a file ++ * ++ * Open flags are available in fi->flags. The following rules ++ * apply. ++ * ++ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be ++ * filtered out / handled by the kernel. ++ * ++ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR, O_EXEC, O_SEARCH) ++ * should be used by the filesystem to check if the operation is ++ * permitted. If the ``-o default_permissions`` mount option is ++ * given, this check is already done by the kernel before calling ++ * open() and may thus be omitted by the filesystem. ++ * ++ * - When writeback caching is enabled, the kernel may send ++ * read requests even for files opened with O_WRONLY. The ++ * filesystem should be prepared to handle this. ++ * ++ * - When writeback caching is disabled, the filesystem is ++ * expected to properly handle the O_APPEND flag and ensure ++ * that each write is appending to the end of the file. ++ * ++ * - When writeback caching is enabled, the kernel will ++ * handle O_APPEND. However, unless all changes to the file ++ * come through the kernel this will not work reliably. The ++ * filesystem should thus either ignore the O_APPEND flag ++ * (and let the kernel handle it), or return an error ++ * (indicating that reliably O_APPEND is not available). ++ * ++ * Filesystem may store an arbitrary file handle (pointer, ++ * index, etc) in fi->fh, and use this in other all other file ++ * operations (read, write, flush, release, fsync). ++ * ++ * Filesystem may also implement stateless file I/O and not store ++ * anything in fi->fh. ++ * ++ * There are also some flags (direct_io, keep_cache) which the ++ * filesystem may set in fi, to change the way the file is opened. ++ * See fuse_file_info structure in for more details. ++ * ++ * If this request is answered with an error code of ENOSYS ++ * and FUSE_CAP_NO_OPEN_SUPPORT is set in ++ * `fuse_conn_info.capable`, this is treated as success and ++ * future calls to open will also succeed without being send ++ * to the filesystem process. ++ * ++ */ ++ int (*open) (const char *, struct fuse_file_info *); ++ ++ /** Read data from an open file ++ * ++ * Read should return exactly the number of bytes requested except ++ * on EOF or error, otherwise the rest of the data will be ++ * substituted with zeroes. An exception to this is when the ++ * 'direct_io' mount option is specified, in which case the return ++ * value of the read system call will reflect the return value of ++ * this operation. ++ */ ++ int (*read) (const char *, char *, size_t, off_t, ++ struct fuse_file_info *); ++ ++ /** Write data to an open file ++ * ++ * Write should return exactly the number of bytes requested ++ * except on error. An exception to this is when the 'direct_io' ++ * mount option is specified (see read operation). ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*write) (const char *, const char *, size_t, off_t, ++ struct fuse_file_info *); ++ ++ /** Get file system statistics ++ * ++ * The 'f_favail', 'f_fsid' and 'f_flag' fields are ignored ++ */ ++ int (*statfs) (const char *, struct statvfs *); ++ ++ /** Possibly flush cached data ++ * ++ * BIG NOTE: This is not equivalent to fsync(). It's not a ++ * request to sync dirty data. ++ * ++ * Flush is called on each close() of a file descriptor, as opposed to ++ * release which is called on the close of the last file descriptor for ++ * a file. Under Linux, errors returned by flush() will be passed to ++ * userspace as errors from close(), so flush() is a good place to write ++ * back any cached dirty data. However, many applications ignore errors ++ * on close(), and on non-Linux systems, close() may succeed even if flush() ++ * returns an error. For these reasons, filesystems should not assume ++ * that errors returned by flush will ever be noticed or even ++ * delivered. ++ * ++ * NOTE: The flush() method may be called more than once for each ++ * open(). This happens if more than one file descriptor refers to an ++ * open file handle, e.g. due to dup(), dup2() or fork() calls. It is ++ * not possible to determine if a flush is final, so each flush should ++ * be treated equally. Multiple write-flush sequences are relatively ++ * rare, so this shouldn't be a problem. ++ * ++ * Filesystems shouldn't assume that flush will be called at any ++ * particular point. It may be called more times than expected, or not ++ * at all. ++ * ++ * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html ++ */ ++ int (*flush) (const char *, struct fuse_file_info *); ++ ++ /** Release an open file ++ * ++ * Release is called when there are no more references to an open ++ * file: all file descriptors are closed and all memory mappings ++ * are unmapped. ++ * ++ * For every open() call there will be exactly one release() call ++ * with the same flags and file handle. It is possible to ++ * have a file opened more than once, in which case only the last ++ * release will mean, that no more reads/writes will happen on the ++ * file. The return value of release is ignored. ++ */ ++ int (*release) (const char *, struct fuse_file_info *); ++ ++ /** Synchronize file contents ++ * ++ * If the datasync parameter is non-zero, then only the user data ++ * should be flushed, not the meta data. ++ */ ++ int (*fsync) (const char *, int, struct fuse_file_info *); ++ ++ /** Set extended attributes */ ++ int (*setxattr) (const char *, const char *, const char *, size_t, int); ++ ++ /** Get extended attributes */ ++ int (*getxattr) (const char *, const char *, char *, size_t); ++ ++ /** List extended attributes */ ++ int (*listxattr) (const char *, char *, size_t); ++ ++ /** Remove extended attributes */ ++ int (*removexattr) (const char *, const char *); ++ ++ /** Open directory ++ * ++ * Unless the 'default_permissions' mount option is given, ++ * this method should check if opendir is permitted for this ++ * directory. Optionally opendir may also return an arbitrary ++ * filehandle in the fuse_file_info structure, which will be ++ * passed to readdir, releasedir and fsyncdir. ++ */ ++ int (*opendir) (const char *, struct fuse_file_info *); ++ ++ /** Read directory ++ * ++ * The filesystem may choose between two modes of operation: ++ * ++ * 1) The readdir implementation ignores the offset parameter, and ++ * passes zero to the filler function's offset. The filler ++ * function will not return '1' (unless an error happens), so the ++ * whole directory is read in a single readdir operation. ++ * ++ * 2) The readdir implementation keeps track of the offsets of the ++ * directory entries. It uses the offset parameter and always ++ * passes non-zero offset to the filler function. When the buffer ++ * is full (or an error happens) the filler function will return ++ * '1'. ++ */ ++ int (*readdir) (const char *, void *, fuse_fill_dir_t, off_t, ++ struct fuse_file_info *, enum fuse_readdir_flags); ++ ++ /** Release directory ++ */ ++ int (*releasedir) (const char *, struct fuse_file_info *); ++ ++ /** Synchronize directory contents ++ * ++ * If the datasync parameter is non-zero, then only the user data ++ * should be flushed, not the meta data ++ */ ++ int (*fsyncdir) (const char *, int, struct fuse_file_info *); ++ ++ /** ++ * Initialize filesystem ++ * ++ * The return value will passed in the `private_data` field of ++ * `struct fuse_context` to all file operations, and as a ++ * parameter to the destroy() method. It overrides the initial ++ * value provided to fuse_main() / fuse_new(). ++ */ ++ void *(*init) (struct fuse_conn_info *conn, ++ struct fuse_config *cfg); ++ ++ /** ++ * Clean up filesystem ++ * ++ * Called on filesystem exit. ++ */ ++ void (*destroy) (void *private_data); ++ ++ /** ++ * Check file access permissions ++ * ++ * This will be called for the access() system call. If the ++ * 'default_permissions' mount option is given, this method is not ++ * called. ++ * ++ * This method is not called under Linux kernel versions 2.4.x ++ */ ++ int (*access) (const char *, int); ++ ++ /** ++ * Create and open a file ++ * ++ * If the file does not exist, first create it with the specified ++ * mode, and then open it. ++ * ++ * If this method is not implemented or under Linux kernel ++ * versions earlier than 2.6.15, the mknod() and open() methods ++ * will be called instead. ++ */ ++ int (*create) (const char *, mode_t, struct fuse_file_info *); ++ ++ /** ++ * Perform POSIX file locking operation ++ * ++ * The cmd argument will be either F_GETLK, F_SETLK or F_SETLKW. ++ * ++ * For the meaning of fields in 'struct flock' see the man page ++ * for fcntl(2). The l_whence field will always be set to ++ * SEEK_SET. ++ * ++ * For checking lock ownership, the 'fuse_file_info->owner' ++ * argument must be used. ++ * ++ * For F_GETLK operation, the library will first check currently ++ * held locks, and if a conflicting lock is found it will return ++ * information without calling this method. This ensures, that ++ * for local locks the l_pid field is correctly filled in. The ++ * results may not be accurate in case of race conditions and in ++ * the presence of hard links, but it's unlikely that an ++ * application would rely on accurate GETLK results in these ++ * cases. If a conflicting lock is not found, this method will be ++ * called, and the filesystem may fill out l_pid by a meaningful ++ * value, or it may leave this field zero. ++ * ++ * For F_SETLK and F_SETLKW the l_pid field will be set to the pid ++ * of the process performing the locking operation. ++ * ++ * Note: if this method is not implemented, the kernel will still ++ * allow file locking to work locally. Hence it is only ++ * interesting for network filesystems and similar. ++ */ ++ int (*lock) (const char *, struct fuse_file_info *, int cmd, ++ struct flock *); ++ ++ /** ++ * Change the access and modification times of a file with ++ * nanosecond resolution ++ * ++ * This supersedes the old utime() interface. New applications ++ * should use this. ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ * ++ * See the utimensat(2) man page for details. ++ */ ++ int (*utimens) (const char *, const struct timespec tv[2], ++ struct fuse_file_info *fi); ++ ++ /** ++ * Map block index within file to block index within device ++ * ++ * Note: This makes sense only for block device backed filesystems ++ * mounted with the 'blkdev' option ++ */ ++ int (*bmap) (const char *, size_t blocksize, uint64_t *idx); ++ ++ /** ++ * Ioctl ++ * ++ * flags will have FUSE_IOCTL_COMPAT set for 32bit ioctls in ++ * 64bit environment. The size and direction of data is ++ * determined by _IOC_*() decoding of cmd. For _IOC_NONE, ++ * data will be NULL, for _IOC_WRITE data is out area, for ++ * _IOC_READ in area and if both are set in/out area. In all ++ * non-NULL cases, the area is of _IOC_SIZE(cmd) bytes. ++ * ++ * If flags has FUSE_IOCTL_DIR then the fuse_file_info refers to a ++ * directory file handle. ++ * ++ * Note : the unsigned long request submitted by the application ++ * is truncated to 32 bits. ++ */ ++ int (*ioctl) (const char *, unsigned int cmd, void *arg, ++ struct fuse_file_info *, unsigned int flags, void *data); ++ ++ /** ++ * Poll for IO readiness events ++ * ++ * Note: If ph is non-NULL, the client should notify ++ * when IO readiness events occur by calling ++ * fuse_notify_poll() with the specified ph. ++ * ++ * Regardless of the number of times poll with a non-NULL ph ++ * is received, single notification is enough to clear all. ++ * Notifying more times incurs overhead but doesn't harm ++ * correctness. ++ * ++ * The callee is responsible for destroying ph with ++ * fuse_pollhandle_destroy() when no longer in use. ++ */ ++ int (*poll) (const char *, struct fuse_file_info *, ++ struct fuse_pollhandle *ph, unsigned *reventsp); ++ ++ /** Write contents of buffer to an open file ++ * ++ * Similar to the write() method, but data is supplied in a ++ * generic buffer. Use fuse_buf_copy() to transfer data to ++ * the destination. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*write_buf) (const char *, struct fuse_bufvec *buf, off_t off, ++ struct fuse_file_info *); ++ ++ /** Store data from an open file in a buffer ++ * ++ * Similar to the read() method, but data is stored and ++ * returned in a generic buffer. ++ * ++ * No actual copying of data has to take place, the source ++ * file descriptor may simply be stored in the buffer for ++ * later data transfer. ++ * ++ * The buffer must be allocated dynamically and stored at the ++ * location pointed to by bufp. If the buffer contains memory ++ * regions, they too must be allocated using malloc(). The ++ * allocated memory will be freed by the caller. ++ */ ++ int (*read_buf) (const char *, struct fuse_bufvec **bufp, ++ size_t size, off_t off, struct fuse_file_info *); ++ /** ++ * Perform BSD file locking operation ++ * ++ * The op argument will be either LOCK_SH, LOCK_EX or LOCK_UN ++ * ++ * Nonblocking requests will be indicated by ORing LOCK_NB to ++ * the above operations ++ * ++ * For more information see the flock(2) manual page. ++ * ++ * Additionally fi->owner will be set to a value unique to ++ * this open file. This same value will be supplied to ++ * ->release() when the file is released. ++ * ++ * Note: if this method is not implemented, the kernel will still ++ * allow file locking to work locally. Hence it is only ++ * interesting for network filesystems and similar. ++ */ ++ int (*flock) (const char *, struct fuse_file_info *, int op); ++ ++ /** ++ * Allocates space for an open file ++ * ++ * This function ensures that required space is allocated for specified ++ * file. If this function returns success then any subsequent write ++ * request to specified range is guaranteed not to fail because of lack ++ * of space on the file system media. ++ */ ++ int (*fallocate) (const char *, int, off_t, off_t, ++ struct fuse_file_info *); ++ ++ /** ++ * Copy a range of data from one file to another ++ * ++ * Performs an optimized copy between two file descriptors without the ++ * additional cost of transferring data through the FUSE kernel module ++ * to user space (glibc) and then back into the FUSE filesystem again. ++ * ++ * In case this method is not implemented, glibc falls back to reading ++ * data from the source and writing to the destination. Effectively ++ * doing an inefficient copy of the data. ++ */ ++ ssize_t (*copy_file_range) (const char *path_in, ++ struct fuse_file_info *fi_in, ++ off_t offset_in, const char *path_out, ++ struct fuse_file_info *fi_out, ++ off_t offset_out, size_t size, int flags); ++ ++ /** ++ * Find next data or hole after the specified offset ++ */ ++ off_t (*lseek) (const char *, off_t off, int whence, struct fuse_file_info *); ++}; ++ ++/** Extra context that may be needed by some filesystems ++ * ++ * The uid, gid and pid fields are not filled in case of a writepage ++ * operation. ++ */ ++struct fuse_context { ++ /** Pointer to the fuse object */ ++ struct fuse *fuse; ++ ++ /** User ID of the calling process */ ++ uid_t uid; ++ ++ /** Group ID of the calling process */ ++ gid_t gid; ++ ++ /** Process ID of the calling thread */ ++ pid_t pid; ++ ++ /** Private filesystem data */ ++ void *private_data; ++ ++ /** Umask of the calling process */ ++ mode_t umask; ++}; ++ ++/** ++ * Main function of FUSE. ++ * ++ * This is for the lazy. This is all that has to be called from the ++ * main() function. ++ * ++ * This function does the following: ++ * - parses command line options, and handles --help and ++ * --version ++ * - installs signal handlers for INT, HUP, TERM and PIPE ++ * - registers an exit handler to unmount the filesystem on program exit ++ * - creates a fuse handle ++ * - registers the operations ++ * - calls either the single-threaded or the multi-threaded event loop ++ * ++ * Most file systems will have to parse some file-system specific ++ * arguments before calling this function. It is recommended to do ++ * this with fuse_opt_parse() and a processing function that passes ++ * through any unknown options (this can also be achieved by just ++ * passing NULL as the processing function). That way, the remaining ++ * options can be passed directly to fuse_main(). ++ * ++ * fuse_main() accepts all options that can be passed to ++ * fuse_parse_cmdline(), fuse_new(), or fuse_session_new(). ++ * ++ * Option parsing skips argv[0], which is assumed to contain the ++ * program name. This element must always be present and is used to ++ * construct a basic ``usage: `` message for the --help ++ * output. argv[0] may also be set to the empty string. In this case ++ * the usage message is suppressed. This can be used by file systems ++ * to print their own usage line first. See hello.c for an example of ++ * how to do this. ++ * ++ * Note: this is currently implemented as a macro. ++ * ++ * The following error codes may be returned from fuse_main(): ++ * 1: Invalid option arguments ++ * 2: No mount point specified ++ * 3: FUSE setup failed ++ * 4: Mounting failed ++ * 5: Failed to daemonize (detach from session) ++ * 6: Failed to set up signal handlers ++ * 7: An error occured during the life of the file system ++ * ++ * @param argc the argument counter passed to the main() function ++ * @param argv the argument vector passed to the main() function ++ * @param op the file system operation ++ * @param private_data Initial value for the `private_data` ++ * field of `struct fuse_context`. May be overridden by the ++ * `struct fuse_operations.init` handler. ++ * @return 0 on success, nonzero on failure ++ * ++ * Example usage, see hello.c ++ */ ++/* ++ int fuse_main(int argc, char *argv[], const struct fuse_operations *op, ++ void *private_data); ++*/ ++#define fuse_main(argc, argv, op, private_data) \ ++ fuse_main_real(argc, argv, op, sizeof(*(op)), private_data) ++ ++/* ----------------------------------------------------------- * ++ * More detailed API * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Print available options (high- and low-level) to stdout. This is ++ * not an exhaustive list, but includes only those options that may be ++ * of interest to an end-user of a file system. ++ * ++ * The function looks at the argument vector only to determine if ++ * there are additional modules to be loaded (module=foo option), ++ * and attempts to call their help functions as well. ++ * ++ * @param args the argument vector. ++ */ ++void fuse_lib_help(struct fuse_args *args); ++ ++/** ++ * Create a new FUSE filesystem. ++ * ++ * This function accepts most file-system independent mount options ++ * (like context, nodev, ro - see mount(8)), as well as the ++ * FUSE-specific mount options from mount.fuse(8). ++ * ++ * If the --help option is specified, the function writes a help text ++ * to stdout and returns NULL. ++ * ++ * Option parsing skips argv[0], which is assumed to contain the ++ * program name. This element must always be present and is used to ++ * construct a basic ``usage: `` message for the --help output. If ++ * argv[0] is set to the empty string, no usage message is included in ++ * the --help output. ++ * ++ * If an unknown option is passed in, an error message is written to ++ * stderr and the function returns NULL. ++ * ++ * @param args argument vector ++ * @param op the filesystem operations ++ * @param op_size the size of the fuse_operations structure ++ * @param private_data Initial value for the `private_data` ++ * field of `struct fuse_context`. May be overridden by the ++ * `struct fuse_operations.init` handler. ++ * @return the created FUSE handle ++ */ ++#if FUSE_USE_VERSION == 30 ++struct fuse *fuse_new_30(struct fuse_args *args, const struct fuse_operations *op, ++ size_t op_size, void *private_data); ++#define fuse_new(args, op, size, data) fuse_new_30(args, op, size, data) ++#else ++struct fuse *fuse_new(struct fuse_args *args, const struct fuse_operations *op, ++ size_t op_size, void *private_data); ++#endif ++ ++/** ++ * Mount a FUSE file system. ++ * ++ * @param mountpoint the mount point path ++ * @param f the FUSE handle ++ * ++ * @return 0 on success, -1 on failure. ++ **/ ++int fuse_mount(struct fuse *f, const char *mountpoint); ++ ++/** ++ * Unmount a FUSE file system. ++ * ++ * See fuse_session_unmount() for additional information. ++ * ++ * @param f the FUSE handle ++ **/ ++void fuse_unmount(struct fuse *f); ++ ++/** ++ * Destroy the FUSE handle. ++ * ++ * NOTE: This function does not unmount the filesystem. If this is ++ * needed, call fuse_unmount() before calling this function. ++ * ++ * @param f the FUSE handle ++ */ ++void fuse_destroy(struct fuse *f); ++ ++/** ++ * FUSE event loop. ++ * ++ * Requests from the kernel are processed, and the appropriate ++ * operations are called. ++ * ++ * For a description of the return value and the conditions when the ++ * event loop exits, refer to the documentation of ++ * fuse_session_loop(). ++ * ++ * @param f the FUSE handle ++ * @return see fuse_session_loop() ++ * ++ * See also: fuse_loop_mt() ++ */ ++int fuse_loop(struct fuse *f); ++ ++/** ++ * Flag session as terminated ++ * ++ * This function will cause any running event loops to exit on ++ * the next opportunity. ++ * ++ * @param f the FUSE handle ++ */ ++void fuse_exit(struct fuse *f); ++ ++/** ++ * FUSE event loop with multiple threads ++ * ++ * Requests from the kernel are processed, and the appropriate ++ * operations are called. Request are processed in parallel by ++ * distributing them between multiple threads. ++ * ++ * For a description of the return value and the conditions when the ++ * event loop exits, refer to the documentation of ++ * fuse_session_loop(). ++ * ++ * Note: using fuse_loop() instead of fuse_loop_mt() means you are running in ++ * single-threaded mode, and that you will not have to worry about reentrancy, ++ * though you will have to worry about recursive lookups. In single-threaded ++ * mode, FUSE will wait for one callback to return before calling another. ++ * ++ * Enabling multiple threads, by using fuse_loop_mt(), will cause FUSE to make ++ * multiple simultaneous calls into the various callback functions given by your ++ * fuse_operations record. ++ * ++ * If you are using multiple threads, you can enjoy all the parallel execution ++ * and interactive response benefits of threads, and you get to enjoy all the ++ * benefits of race conditions and locking bugs, too. Ensure that any code used ++ * in the callback function of fuse_operations is also thread-safe. ++ * ++ * @param f the FUSE handle ++ * @param config loop configuration ++ * @return see fuse_session_loop() ++ * ++ * See also: fuse_loop() ++ */ ++#if FUSE_USE_VERSION < 32 ++int fuse_loop_mt_31(struct fuse *f, int clone_fd); ++#define fuse_loop_mt(f, clone_fd) fuse_loop_mt_31(f, clone_fd) ++#else ++int fuse_loop_mt(struct fuse *f, struct fuse_loop_config *config); ++#endif ++ ++/** ++ * Get the current context ++ * ++ * The context is only valid for the duration of a filesystem ++ * operation, and thus must not be stored and used later. ++ * ++ * @return the context ++ */ ++struct fuse_context *fuse_get_context(void); ++ ++/** ++ * Get the current supplementary group IDs for the current request ++ * ++ * Similar to the getgroups(2) system call, except the return value is ++ * always the total number of group IDs, even if it is larger than the ++ * specified size. ++ * ++ * The current fuse kernel module in linux (as of 2.6.30) doesn't pass ++ * the group list to userspace, hence this function needs to parse ++ * "/proc/$TID/task/$TID/status" to get the group IDs. ++ * ++ * This feature may not be supported on all operating systems. In ++ * such a case this function will return -ENOSYS. ++ * ++ * @param size size of given array ++ * @param list array of group IDs to be filled in ++ * @return the total number of supplementary group IDs or -errno on failure ++ */ ++int fuse_getgroups(int size, gid_t list[]); ++ ++/** ++ * Check if the current request has already been interrupted ++ * ++ * @return 1 if the request has been interrupted, 0 otherwise ++ */ ++int fuse_interrupted(void); ++ ++/** ++ * Invalidates cache for the given path. ++ * ++ * This calls fuse_lowlevel_notify_inval_inode internally. ++ * ++ * @return 0 on successful invalidation, negative error value otherwise. ++ * This routine may return -ENOENT to indicate that there was ++ * no entry to be invalidated, e.g., because the path has not ++ * been seen before or has been forgotten; this should not be ++ * considered to be an error. ++ */ ++int fuse_invalidate_path(struct fuse *f, const char *path); ++ ++/** ++ * The real main function ++ * ++ * Do not call this directly, use fuse_main() ++ */ ++int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, ++ size_t op_size, void *private_data); ++ ++/** ++ * Start the cleanup thread when using option "remember". ++ * ++ * This is done automatically by fuse_loop_mt() ++ * @param fuse struct fuse pointer for fuse instance ++ * @return 0 on success and -1 on error ++ */ ++int fuse_start_cleanup_thread(struct fuse *fuse); ++ ++/** ++ * Stop the cleanup thread when using option "remember". ++ * ++ * This is done automatically by fuse_loop_mt() ++ * @param fuse struct fuse pointer for fuse instance ++ */ ++void fuse_stop_cleanup_thread(struct fuse *fuse); ++ ++/** ++ * Iterate over cache removing stale entries ++ * use in conjunction with "-oremember" ++ * ++ * NOTE: This is already done for the standard sessions ++ * ++ * @param fuse struct fuse pointer for fuse instance ++ * @return the number of seconds until the next cleanup ++ */ ++int fuse_clean_cache(struct fuse *fuse); ++ ++/* ++ * Stacking API ++ */ ++ ++/** ++ * Fuse filesystem object ++ * ++ * This is opaque object represents a filesystem layer ++ */ ++struct fuse_fs; ++ ++/* ++ * These functions call the relevant filesystem operation, and return ++ * the result. ++ * ++ * If the operation is not defined, they return -ENOSYS, with the ++ * exception of fuse_fs_open, fuse_fs_release, fuse_fs_opendir, ++ * fuse_fs_releasedir and fuse_fs_statfs, which return 0. ++ */ ++ ++int fuse_fs_getattr(struct fuse_fs *fs, const char *path, struct stat *buf, ++ struct fuse_file_info *fi); ++int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, ++ const char *newpath, unsigned int flags); ++int fuse_fs_unlink(struct fuse_fs *fs, const char *path); ++int fuse_fs_rmdir(struct fuse_fs *fs, const char *path); ++int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, ++ const char *path); ++int fuse_fs_link(struct fuse_fs *fs, const char *oldpath, const char *newpath); ++int fuse_fs_release(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi); ++int fuse_fs_open(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi); ++int fuse_fs_read(struct fuse_fs *fs, const char *path, char *buf, size_t size, ++ off_t off, struct fuse_file_info *fi); ++int fuse_fs_read_buf(struct fuse_fs *fs, const char *path, ++ struct fuse_bufvec **bufp, size_t size, off_t off, ++ struct fuse_file_info *fi); ++int fuse_fs_write(struct fuse_fs *fs, const char *path, const char *buf, ++ size_t size, off_t off, struct fuse_file_info *fi); ++int fuse_fs_write_buf(struct fuse_fs *fs, const char *path, ++ struct fuse_bufvec *buf, off_t off, ++ struct fuse_file_info *fi); ++int fuse_fs_fsync(struct fuse_fs *fs, const char *path, int datasync, ++ struct fuse_file_info *fi); ++int fuse_fs_flush(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi); ++int fuse_fs_statfs(struct fuse_fs *fs, const char *path, struct statvfs *buf); ++int fuse_fs_opendir(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi); ++int fuse_fs_readdir(struct fuse_fs *fs, const char *path, void *buf, ++ fuse_fill_dir_t filler, off_t off, ++ struct fuse_file_info *fi, enum fuse_readdir_flags flags); ++int fuse_fs_fsyncdir(struct fuse_fs *fs, const char *path, int datasync, ++ struct fuse_file_info *fi); ++int fuse_fs_releasedir(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi); ++int fuse_fs_create(struct fuse_fs *fs, const char *path, mode_t mode, ++ struct fuse_file_info *fi); ++int fuse_fs_lock(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi, int cmd, struct flock *lock); ++int fuse_fs_flock(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi, int op); ++int fuse_fs_chmod(struct fuse_fs *fs, const char *path, mode_t mode, ++ struct fuse_file_info *fi); ++int fuse_fs_chown(struct fuse_fs *fs, const char *path, uid_t uid, gid_t gid, ++ struct fuse_file_info *fi); ++int fuse_fs_truncate(struct fuse_fs *fs, const char *path, off_t size, ++ struct fuse_file_info *fi); ++int fuse_fs_utimens(struct fuse_fs *fs, const char *path, ++ const struct timespec tv[2], struct fuse_file_info *fi); ++int fuse_fs_access(struct fuse_fs *fs, const char *path, int mask); ++int fuse_fs_readlink(struct fuse_fs *fs, const char *path, char *buf, ++ size_t len); ++int fuse_fs_mknod(struct fuse_fs *fs, const char *path, mode_t mode, ++ dev_t rdev); ++int fuse_fs_mkdir(struct fuse_fs *fs, const char *path, mode_t mode); ++int fuse_fs_setxattr(struct fuse_fs *fs, const char *path, const char *name, ++ const char *value, size_t size, int flags); ++int fuse_fs_getxattr(struct fuse_fs *fs, const char *path, const char *name, ++ char *value, size_t size); ++int fuse_fs_listxattr(struct fuse_fs *fs, const char *path, char *list, ++ size_t size); ++int fuse_fs_removexattr(struct fuse_fs *fs, const char *path, ++ const char *name); ++int fuse_fs_bmap(struct fuse_fs *fs, const char *path, size_t blocksize, ++ uint64_t *idx); ++int fuse_fs_ioctl(struct fuse_fs *fs, const char *path, unsigned int cmd, ++ void *arg, struct fuse_file_info *fi, unsigned int flags, ++ void *data); ++int fuse_fs_poll(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi, struct fuse_pollhandle *ph, ++ unsigned *reventsp); ++int fuse_fs_fallocate(struct fuse_fs *fs, const char *path, int mode, ++ off_t offset, off_t length, struct fuse_file_info *fi); ++ssize_t fuse_fs_copy_file_range(struct fuse_fs *fs, const char *path_in, ++ struct fuse_file_info *fi_in, off_t off_in, ++ const char *path_out, ++ struct fuse_file_info *fi_out, off_t off_out, ++ size_t len, int flags); ++off_t fuse_fs_lseek(struct fuse_fs *fs, const char *path, off_t off, int whence, ++ struct fuse_file_info *fi); ++void fuse_fs_init(struct fuse_fs *fs, struct fuse_conn_info *conn, ++ struct fuse_config *cfg); ++void fuse_fs_destroy(struct fuse_fs *fs); ++ ++int fuse_notify_poll(struct fuse_pollhandle *ph); ++ ++/** ++ * Create a new fuse filesystem object ++ * ++ * This is usually called from the factory of a fuse module to create ++ * a new instance of a filesystem. ++ * ++ * @param op the filesystem operations ++ * @param op_size the size of the fuse_operations structure ++ * @param private_data Initial value for the `private_data` ++ * field of `struct fuse_context`. May be overridden by the ++ * `struct fuse_operations.init` handler. ++ * @return a new filesystem object ++ */ ++struct fuse_fs *fuse_fs_new(const struct fuse_operations *op, size_t op_size, ++ void *private_data); ++ ++/** ++ * Factory for creating filesystem objects ++ * ++ * The function may use and remove options from 'args' that belong ++ * to this module. ++ * ++ * For now the 'fs' vector always contains exactly one filesystem. ++ * This is the filesystem which will be below the newly created ++ * filesystem in the stack. ++ * ++ * @param args the command line arguments ++ * @param fs NULL terminated filesystem object vector ++ * @return the new filesystem object ++ */ ++typedef struct fuse_fs *(*fuse_module_factory_t)(struct fuse_args *args, ++ struct fuse_fs *fs[]); ++/** ++ * Register filesystem module ++ * ++ * If the "-omodules=*name*_:..." option is present, filesystem ++ * objects are created and pushed onto the stack with the *factory_* ++ * function. ++ * ++ * @param name_ the name of this filesystem module ++ * @param factory_ the factory function for this filesystem module ++ */ ++#define FUSE_REGISTER_MODULE(name_, factory_) \ ++ fuse_module_factory_t fuse_module_ ## name_ ## _factory = factory_ ++ ++/** Get session from fuse object */ ++struct fuse_session *fuse_get_session(struct fuse *f); ++ ++/** ++ * Open a FUSE file descriptor and set up the mount for the given ++ * mountpoint and flags. ++ * ++ * @param mountpoint reference to the mount in the file system ++ * @param options mount options ++ * @return the FUSE file descriptor or -1 upon error ++ */ ++int fuse_open_channel(const char *mountpoint, const char *options); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* FUSE_H_ */ +diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h +new file mode 100644 +index 0000000..2d686b2 +--- /dev/null ++++ b/tools/virtiofsd/fuse_common.h +@@ -0,0 +1,823 @@ ++/* FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB. ++*/ ++ ++/** @file */ ++ ++#if !defined(FUSE_H_) && !defined(FUSE_LOWLEVEL_H_) ++#error "Never include directly; use or instead." ++#endif ++ ++#ifndef FUSE_COMMON_H_ ++#define FUSE_COMMON_H_ ++ ++#include "fuse_opt.h" ++#include "fuse_log.h" ++#include ++#include ++ ++/** Major version of FUSE library interface */ ++#define FUSE_MAJOR_VERSION 3 ++ ++/** Minor version of FUSE library interface */ ++#define FUSE_MINOR_VERSION 2 ++ ++#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) ++#define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/** ++ * Information about an open file. ++ * ++ * File Handles are created by the open, opendir, and create methods and closed ++ * by the release and releasedir methods. Multiple file handles may be ++ * concurrently open for the same file. Generally, a client will create one ++ * file handle per file descriptor, though in some cases multiple file ++ * descriptors can share a single file handle. ++ */ ++struct fuse_file_info { ++ /** Open flags. Available in open() and release() */ ++ int flags; ++ ++ /** In case of a write operation indicates if this was caused ++ by a delayed write from the page cache. If so, then the ++ context's pid, uid, and gid fields will not be valid, and ++ the *fh* value may not match the *fh* value that would ++ have been sent with the corresponding individual write ++ requests if write caching had been disabled. */ ++ unsigned int writepage : 1; ++ ++ /** Can be filled in by open, to use direct I/O on this file. */ ++ unsigned int direct_io : 1; ++ ++ /** Can be filled in by open. It signals the kernel that any ++ currently cached file data (ie., data that the filesystem ++ provided the last time the file was open) need not be ++ invalidated. Has no effect when set in other contexts (in ++ particular it does nothing when set by opendir()). */ ++ unsigned int keep_cache : 1; ++ ++ /** Indicates a flush operation. Set in flush operation, also ++ maybe set in highlevel lock operation and lowlevel release ++ operation. */ ++ unsigned int flush : 1; ++ ++ /** Can be filled in by open, to indicate that the file is not ++ seekable. */ ++ unsigned int nonseekable : 1; ++ ++ /* Indicates that flock locks for this file should be ++ released. If set, lock_owner shall contain a valid value. ++ May only be set in ->release(). */ ++ unsigned int flock_release : 1; ++ ++ /** Can be filled in by opendir. It signals the kernel to ++ enable caching of entries returned by readdir(). Has no ++ effect when set in other contexts (in particular it does ++ nothing when set by open()). */ ++ unsigned int cache_readdir : 1; ++ ++ /** Padding. Reserved for future use*/ ++ unsigned int padding : 25; ++ unsigned int padding2 : 32; ++ ++ /** File handle id. May be filled in by filesystem in create, ++ * open, and opendir(). Available in most other file operations on the ++ * same file handle. */ ++ uint64_t fh; ++ ++ /** Lock owner id. Available in locking operations and flush */ ++ uint64_t lock_owner; ++ ++ /** Requested poll events. Available in ->poll. Only set on kernels ++ which support it. If unsupported, this field is set to zero. */ ++ uint32_t poll_events; ++}; ++ ++/** ++ * Configuration parameters passed to fuse_session_loop_mt() and ++ * fuse_loop_mt(). ++ */ ++struct fuse_loop_config { ++ /** ++ * whether to use separate device fds for each thread ++ * (may increase performance) ++ */ ++ int clone_fd; ++ ++ /** ++ * The maximum number of available worker threads before they ++ * start to get deleted when they become idle. If not ++ * specified, the default is 10. ++ * ++ * Adjusting this has performance implications; a very small number ++ * of threads in the pool will cause a lot of thread creation and ++ * deletion overhead and performance may suffer. When set to 0, a new ++ * thread will be created to service every operation. ++ */ ++ unsigned int max_idle_threads; ++}; ++ ++/************************************************************************** ++ * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' * ++ **************************************************************************/ ++ ++/** ++ * Indicates that the filesystem supports asynchronous read requests. ++ * ++ * If this capability is not requested/available, the kernel will ++ * ensure that there is at most one pending read request per ++ * file-handle at any time, and will attempt to order read requests by ++ * increasing offset. ++ * ++ * This feature is enabled by default when supported by the kernel. ++ */ ++#define FUSE_CAP_ASYNC_READ (1 << 0) ++ ++/** ++ * Indicates that the filesystem supports "remote" locking. ++ * ++ * This feature is enabled by default when supported by the kernel, ++ * and if getlk() and setlk() handlers are implemented. ++ */ ++#define FUSE_CAP_POSIX_LOCKS (1 << 1) ++ ++/** ++ * Indicates that the filesystem supports the O_TRUNC open flag. If ++ * disabled, and an application specifies O_TRUNC, fuse first calls ++ * truncate() and then open() with O_TRUNC filtered out. ++ * ++ * This feature is enabled by default when supported by the kernel. ++ */ ++#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3) ++ ++/** ++ * Indicates that the filesystem supports lookups of "." and "..". ++ * ++ * This feature is disabled by default. ++ */ ++#define FUSE_CAP_EXPORT_SUPPORT (1 << 4) ++ ++/** ++ * Indicates that the kernel should not apply the umask to the ++ * file mode on create operations. ++ * ++ * This feature is disabled by default. ++ */ ++#define FUSE_CAP_DONT_MASK (1 << 6) ++ ++/** ++ * Indicates that libfuse should try to use splice() when writing to ++ * the fuse device. This may improve performance. ++ * ++ * This feature is disabled by default. ++ */ ++#define FUSE_CAP_SPLICE_WRITE (1 << 7) ++ ++/** ++ * Indicates that libfuse should try to move pages instead of copying when ++ * writing to / reading from the fuse device. This may improve performance. ++ * ++ * This feature is disabled by default. ++ */ ++#define FUSE_CAP_SPLICE_MOVE (1 << 8) ++ ++/** ++ * Indicates that libfuse should try to use splice() when reading from ++ * the fuse device. This may improve performance. ++ * ++ * This feature is enabled by default when supported by the kernel and ++ * if the filesystem implements a write_buf() handler. ++ */ ++#define FUSE_CAP_SPLICE_READ (1 << 9) ++ ++/** ++ * If set, the calls to flock(2) will be emulated using POSIX locks and must ++ * then be handled by the filesystem's setlock() handler. ++ * ++ * If not set, flock(2) calls will be handled by the FUSE kernel module ++ * internally (so any access that does not go through the kernel cannot be taken ++ * into account). ++ * ++ * This feature is enabled by default when supported by the kernel and ++ * if the filesystem implements a flock() handler. ++ */ ++#define FUSE_CAP_FLOCK_LOCKS (1 << 10) ++ ++/** ++ * Indicates that the filesystem supports ioctl's on directories. ++ * ++ * This feature is enabled by default when supported by the kernel. ++ */ ++#define FUSE_CAP_IOCTL_DIR (1 << 11) ++ ++/** ++ * Traditionally, while a file is open the FUSE kernel module only ++ * asks the filesystem for an update of the file's attributes when a ++ * client attempts to read beyond EOF. This is unsuitable for ++ * e.g. network filesystems, where the file contents may change ++ * without the kernel knowing about it. ++ * ++ * If this flag is set, FUSE will check the validity of the attributes ++ * on every read. If the attributes are no longer valid (i.e., if the ++ * *attr_timeout* passed to fuse_reply_attr() or set in `struct ++ * fuse_entry_param` has passed), it will first issue a `getattr` ++ * request. If the new mtime differs from the previous value, any ++ * cached file *contents* will be invalidated as well. ++ * ++ * This flag should always be set when available. If all file changes ++ * go through the kernel, *attr_timeout* should be set to a very large ++ * number to avoid unnecessary getattr() calls. ++ * ++ * This feature is enabled by default when supported by the kernel. ++ */ ++#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12) ++ ++/** ++ * Indicates that the filesystem supports readdirplus. ++ * ++ * This feature is enabled by default when supported by the kernel and if the ++ * filesystem implements a readdirplus() handler. ++ */ ++#define FUSE_CAP_READDIRPLUS (1 << 13) ++ ++/** ++ * Indicates that the filesystem supports adaptive readdirplus. ++ * ++ * If FUSE_CAP_READDIRPLUS is not set, this flag has no effect. ++ * ++ * If FUSE_CAP_READDIRPLUS is set and this flag is not set, the kernel ++ * will always issue readdirplus() requests to retrieve directory ++ * contents. ++ * ++ * If FUSE_CAP_READDIRPLUS is set and this flag is set, the kernel ++ * will issue both readdir() and readdirplus() requests, depending on ++ * how much information is expected to be required. ++ * ++ * As of Linux 4.20, the algorithm is as follows: when userspace ++ * starts to read directory entries, issue a READDIRPLUS request to ++ * the filesystem. If any entry attributes have been looked up by the ++ * time userspace requests the next batch of entries continue with ++ * READDIRPLUS, otherwise switch to plain READDIR. This will reasult ++ * in eg plain "ls" triggering READDIRPLUS first then READDIR after ++ * that because it doesn't do lookups. "ls -l" should result in all ++ * READDIRPLUS, except if dentries are already cached. ++ * ++ * This feature is enabled by default when supported by the kernel and ++ * if the filesystem implements both a readdirplus() and a readdir() ++ * handler. ++ */ ++#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14) ++ ++/** ++ * Indicates that the filesystem supports asynchronous direct I/O submission. ++ * ++ * If this capability is not requested/available, the kernel will ensure that ++ * there is at most one pending read and one pending write request per direct ++ * I/O file-handle at any time. ++ * ++ * This feature is enabled by default when supported by the kernel. ++ */ ++#define FUSE_CAP_ASYNC_DIO (1 << 15) ++ ++/** ++ * Indicates that writeback caching should be enabled. This means that ++ * individual write request may be buffered and merged in the kernel ++ * before they are send to the filesystem. ++ * ++ * This feature is disabled by default. ++ */ ++#define FUSE_CAP_WRITEBACK_CACHE (1 << 16) ++ ++/** ++ * Indicates support for zero-message opens. If this flag is set in ++ * the `capable` field of the `fuse_conn_info` structure, then the ++ * filesystem may return `ENOSYS` from the open() handler to indicate ++ * success. Further attempts to open files will be handled in the ++ * kernel. (If this flag is not set, returning ENOSYS will be treated ++ * as an error and signaled to the caller). ++ * ++ * Setting (or unsetting) this flag in the `want` field has *no ++ * effect*. ++ */ ++#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17) ++ ++/** ++ * Indicates support for parallel directory operations. If this flag ++ * is unset, the FUSE kernel module will ensure that lookup() and ++ * readdir() requests are never issued concurrently for the same ++ * directory. ++ * ++ * This feature is enabled by default when supported by the kernel. ++ */ ++#define FUSE_CAP_PARALLEL_DIROPS (1 << 18) ++ ++/** ++ * Indicates support for POSIX ACLs. ++ * ++ * If this feature is enabled, the kernel will cache and have ++ * responsibility for enforcing ACLs. ACL will be stored as xattrs and ++ * passed to userspace, which is responsible for updating the ACLs in ++ * the filesystem, keeping the file mode in sync with the ACL, and ++ * ensuring inheritance of default ACLs when new filesystem nodes are ++ * created. Note that this requires that the file system is able to ++ * parse and interpret the xattr representation of ACLs. ++ * ++ * Enabling this feature implicitly turns on the ++ * ``default_permissions`` mount option (even if it was not passed to ++ * mount(2)). ++ * ++ * This feature is disabled by default. ++ */ ++#define FUSE_CAP_POSIX_ACL (1 << 19) ++ ++/** ++ * Indicates that the filesystem is responsible for unsetting ++ * setuid and setgid bits when a file is written, truncated, or ++ * its owner is changed. ++ * ++ * This feature is enabled by default when supported by the kernel. ++ */ ++#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20) ++ ++/** ++ * Indicates support for zero-message opendirs. If this flag is set in ++ * the `capable` field of the `fuse_conn_info` structure, then the filesystem ++ * may return `ENOSYS` from the opendir() handler to indicate success. Further ++ * opendir and releasedir messages will be handled in the kernel. (If this ++ * flag is not set, returning ENOSYS will be treated as an error and signalled ++ * to the caller.) ++ * ++ * Setting (or unsetting) this flag in the `want` field has *no effect*. ++ */ ++#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) ++ ++/** ++ * Ioctl flags ++ * ++ * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine ++ * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed ++ * FUSE_IOCTL_RETRY: retry with new iovecs ++ * FUSE_IOCTL_DIR: is a directory ++ * ++ * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs ++ */ ++#define FUSE_IOCTL_COMPAT (1 << 0) ++#define FUSE_IOCTL_UNRESTRICTED (1 << 1) ++#define FUSE_IOCTL_RETRY (1 << 2) ++#define FUSE_IOCTL_DIR (1 << 4) ++ ++#define FUSE_IOCTL_MAX_IOV 256 ++ ++/** ++ * Connection information, passed to the ->init() method ++ * ++ * Some of the elements are read-write, these can be changed to ++ * indicate the value requested by the filesystem. The requested ++ * value must usually be smaller than the indicated value. ++ */ ++struct fuse_conn_info { ++ /** ++ * Major version of the protocol (read-only) ++ */ ++ unsigned proto_major; ++ ++ /** ++ * Minor version of the protocol (read-only) ++ */ ++ unsigned proto_minor; ++ ++ /** ++ * Maximum size of the write buffer ++ */ ++ unsigned max_write; ++ ++ /** ++ * Maximum size of read requests. A value of zero indicates no ++ * limit. However, even if the filesystem does not specify a ++ * limit, the maximum size of read requests will still be ++ * limited by the kernel. ++ * ++ * NOTE: For the time being, the maximum size of read requests ++ * must be set both here *and* passed to fuse_session_new() ++ * using the ``-o max_read=`` mount option. At some point ++ * in the future, specifying the mount option will no longer ++ * be necessary. ++ */ ++ unsigned max_read; ++ ++ /** ++ * Maximum readahead ++ */ ++ unsigned max_readahead; ++ ++ /** ++ * Capability flags that the kernel supports (read-only) ++ */ ++ unsigned capable; ++ ++ /** ++ * Capability flags that the filesystem wants to enable. ++ * ++ * libfuse attempts to initialize this field with ++ * reasonable default values before calling the init() handler. ++ */ ++ unsigned want; ++ ++ /** ++ * Maximum number of pending "background" requests. A ++ * background request is any type of request for which the ++ * total number is not limited by other means. As of kernel ++ * 4.8, only two types of requests fall into this category: ++ * ++ * 1. Read-ahead requests ++ * 2. Asynchronous direct I/O requests ++ * ++ * Read-ahead requests are generated (if max_readahead is ++ * non-zero) by the kernel to preemptively fill its caches ++ * when it anticipates that userspace will soon read more ++ * data. ++ * ++ * Asynchronous direct I/O requests are generated if ++ * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large ++ * direct I/O request. In this case the kernel will internally ++ * split it up into multiple smaller requests and submit them ++ * to the filesystem concurrently. ++ * ++ * Note that the following requests are *not* background ++ * requests: writeback requests (limited by the kernel's ++ * flusher algorithm), regular (i.e., synchronous and ++ * buffered) userspace read/write requests (limited to one per ++ * thread), asynchronous read requests (Linux's io_submit(2) ++ * call actually blocks, so these are also limited to one per ++ * thread). ++ */ ++ unsigned max_background; ++ ++ /** ++ * Kernel congestion threshold parameter. If the number of pending ++ * background requests exceeds this number, the FUSE kernel module will ++ * mark the filesystem as "congested". This instructs the kernel to ++ * expect that queued requests will take some time to complete, and to ++ * adjust its algorithms accordingly (e.g. by putting a waiting thread ++ * to sleep instead of using a busy-loop). ++ */ ++ unsigned congestion_threshold; ++ ++ /** ++ * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible ++ * for updating mtime and ctime when write requests are received. The ++ * updated values are passed to the filesystem with setattr() requests. ++ * However, if the filesystem does not support the full resolution of ++ * the kernel timestamps (nanoseconds), the mtime and ctime values used ++ * by kernel and filesystem will differ (and result in an apparent ++ * change of times after a cache flush). ++ * ++ * To prevent this problem, this variable can be used to inform the ++ * kernel about the timestamp granularity supported by the file-system. ++ * The value should be power of 10. The default is 1, i.e. full ++ * nano-second resolution. Filesystems supporting only second resolution ++ * should set this to 1000000000. ++ */ ++ unsigned time_gran; ++ ++ /** ++ * For future use. ++ */ ++ unsigned reserved[22]; ++}; ++ ++struct fuse_session; ++struct fuse_pollhandle; ++struct fuse_conn_info_opts; ++ ++/** ++ * This function parses several command-line options that can be used ++ * to override elements of struct fuse_conn_info. The pointer returned ++ * by this function should be passed to the ++ * fuse_apply_conn_info_opts() method by the file system's init() ++ * handler. ++ * ++ * Before using this function, think twice if you really want these ++ * parameters to be adjustable from the command line. In most cases, ++ * they should be determined by the file system internally. ++ * ++ * The following options are recognized: ++ * ++ * -o max_write=N sets conn->max_write ++ * -o max_readahead=N sets conn->max_readahead ++ * -o max_background=N sets conn->max_background ++ * -o congestion_threshold=N sets conn->congestion_threshold ++ * -o async_read sets FUSE_CAP_ASYNC_READ in conn->want ++ * -o sync_read unsets FUSE_CAP_ASYNC_READ in conn->want ++ * -o atomic_o_trunc sets FUSE_CAP_ATOMIC_O_TRUNC in conn->want ++ * -o no_remote_lock Equivalent to -o no_remote_flock,no_remote_posix_lock ++ * -o no_remote_flock Unsets FUSE_CAP_FLOCK_LOCKS in conn->want ++ * -o no_remote_posix_lock Unsets FUSE_CAP_POSIX_LOCKS in conn->want ++ * -o [no_]splice_write (un-)sets FUSE_CAP_SPLICE_WRITE in conn->want ++ * -o [no_]splice_move (un-)sets FUSE_CAP_SPLICE_MOVE in conn->want ++ * -o [no_]splice_read (un-)sets FUSE_CAP_SPLICE_READ in conn->want ++ * -o [no_]auto_inval_data (un-)sets FUSE_CAP_AUTO_INVAL_DATA in conn->want ++ * -o readdirplus=no unsets FUSE_CAP_READDIRPLUS in conn->want ++ * -o readdirplus=yes sets FUSE_CAP_READDIRPLUS and unsets ++ * FUSE_CAP_READDIRPLUS_AUTO in conn->want ++ * -o readdirplus=auto sets FUSE_CAP_READDIRPLUS and ++ * FUSE_CAP_READDIRPLUS_AUTO in conn->want ++ * -o [no_]async_dio (un-)sets FUSE_CAP_ASYNC_DIO in conn->want ++ * -o [no_]writeback_cache (un-)sets FUSE_CAP_WRITEBACK_CACHE in conn->want ++ * -o time_gran=N sets conn->time_gran ++ * ++ * Known options will be removed from *args*, unknown options will be ++ * passed through unchanged. ++ * ++ * @param args argument vector (input+output) ++ * @return parsed options ++ **/ ++struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args); ++ ++/** ++ * This function applies the (parsed) parameters in *opts* to the ++ * *conn* pointer. It may modify the following fields: wants, ++ * max_write, max_readahead, congestion_threshold, max_background, ++ * time_gran. A field is only set (or unset) if the corresponding ++ * option has been explicitly set. ++ */ ++void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, ++ struct fuse_conn_info *conn); ++ ++/** ++ * Go into the background ++ * ++ * @param foreground if true, stay in the foreground ++ * @return 0 on success, -1 on failure ++ */ ++int fuse_daemonize(int foreground); ++ ++/** ++ * Get the version of the library ++ * ++ * @return the version ++ */ ++int fuse_version(void); ++ ++/** ++ * Get the full package version string of the library ++ * ++ * @return the package version ++ */ ++const char *fuse_pkgversion(void); ++ ++/** ++ * Destroy poll handle ++ * ++ * @param ph the poll handle ++ */ ++void fuse_pollhandle_destroy(struct fuse_pollhandle *ph); ++ ++/* ----------------------------------------------------------- * ++ * Data buffer * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Buffer flags ++ */ ++enum fuse_buf_flags { ++ /** ++ * Buffer contains a file descriptor ++ * ++ * If this flag is set, the .fd field is valid, otherwise the ++ * .mem fields is valid. ++ */ ++ FUSE_BUF_IS_FD = (1 << 1), ++ ++ /** ++ * Seek on the file descriptor ++ * ++ * If this flag is set then the .pos field is valid and is ++ * used to seek to the given offset before performing ++ * operation on file descriptor. ++ */ ++ FUSE_BUF_FD_SEEK = (1 << 2), ++ ++ /** ++ * Retry operation on file descriptor ++ * ++ * If this flag is set then retry operation on file descriptor ++ * until .size bytes have been copied or an error or EOF is ++ * detected. ++ */ ++ FUSE_BUF_FD_RETRY = (1 << 3), ++}; ++ ++/** ++ * Buffer copy flags ++ */ ++enum fuse_buf_copy_flags { ++ /** ++ * Don't use splice(2) ++ * ++ * Always fall back to using read and write instead of ++ * splice(2) to copy data from one file descriptor to another. ++ * ++ * If this flag is not set, then only fall back if splice is ++ * unavailable. ++ */ ++ FUSE_BUF_NO_SPLICE = (1 << 1), ++ ++ /** ++ * Force splice ++ * ++ * Always use splice(2) to copy data from one file descriptor ++ * to another. If splice is not available, return -EINVAL. ++ */ ++ FUSE_BUF_FORCE_SPLICE = (1 << 2), ++ ++ /** ++ * Try to move data with splice. ++ * ++ * If splice is used, try to move pages from the source to the ++ * destination instead of copying. See documentation of ++ * SPLICE_F_MOVE in splice(2) man page. ++ */ ++ FUSE_BUF_SPLICE_MOVE = (1 << 3), ++ ++ /** ++ * Don't block on the pipe when copying data with splice ++ * ++ * Makes the operations on the pipe non-blocking (if the pipe ++ * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) ++ * man page. ++ */ ++ FUSE_BUF_SPLICE_NONBLOCK= (1 << 4), ++}; ++ ++/** ++ * Single data buffer ++ * ++ * Generic data buffer for I/O, extended attributes, etc... Data may ++ * be supplied as a memory pointer or as a file descriptor ++ */ ++struct fuse_buf { ++ /** ++ * Size of data in bytes ++ */ ++ size_t size; ++ ++ /** ++ * Buffer flags ++ */ ++ enum fuse_buf_flags flags; ++ ++ /** ++ * Memory pointer ++ * ++ * Used unless FUSE_BUF_IS_FD flag is set. ++ */ ++ void *mem; ++ ++ /** ++ * File descriptor ++ * ++ * Used if FUSE_BUF_IS_FD flag is set. ++ */ ++ int fd; ++ ++ /** ++ * File position ++ * ++ * Used if FUSE_BUF_FD_SEEK flag is set. ++ */ ++ off_t pos; ++}; ++ ++/** ++ * Data buffer vector ++ * ++ * An array of data buffers, each containing a memory pointer or a ++ * file descriptor. ++ * ++ * Allocate dynamically to add more than one buffer. ++ */ ++struct fuse_bufvec { ++ /** ++ * Number of buffers in the array ++ */ ++ size_t count; ++ ++ /** ++ * Index of current buffer within the array ++ */ ++ size_t idx; ++ ++ /** ++ * Current offset within the current buffer ++ */ ++ size_t off; ++ ++ /** ++ * Array of buffers ++ */ ++ struct fuse_buf buf[1]; ++}; ++ ++/* Initialize bufvec with a single buffer of given size */ ++#define FUSE_BUFVEC_INIT(size__) \ ++ ((struct fuse_bufvec) { \ ++ /* .count= */ 1, \ ++ /* .idx = */ 0, \ ++ /* .off = */ 0, \ ++ /* .buf = */ { /* [0] = */ { \ ++ /* .size = */ (size__), \ ++ /* .flags = */ (enum fuse_buf_flags) 0, \ ++ /* .mem = */ NULL, \ ++ /* .fd = */ -1, \ ++ /* .pos = */ 0, \ ++ } } \ ++ } ) ++ ++/** ++ * Get total size of data in a fuse buffer vector ++ * ++ * @param bufv buffer vector ++ * @return size of data ++ */ ++size_t fuse_buf_size(const struct fuse_bufvec *bufv); ++ ++/** ++ * Copy data from one buffer vector to another ++ * ++ * @param dst destination buffer vector ++ * @param src source buffer vector ++ * @param flags flags controlling the copy ++ * @return actual number of bytes copied or -errno on error ++ */ ++ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src, ++ enum fuse_buf_copy_flags flags); ++ ++/* ----------------------------------------------------------- * ++ * Signal handling * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Exit session on HUP, TERM and INT signals and ignore PIPE signal ++ * ++ * Stores session in a global variable. May only be called once per ++ * process until fuse_remove_signal_handlers() is called. ++ * ++ * Once either of the POSIX signals arrives, the signal handler calls ++ * fuse_session_exit(). ++ * ++ * @param se the session to exit ++ * @return 0 on success, -1 on failure ++ * ++ * See also: ++ * fuse_remove_signal_handlers() ++ */ ++int fuse_set_signal_handlers(struct fuse_session *se); ++ ++/** ++ * Restore default signal handlers ++ * ++ * Resets global session. After this fuse_set_signal_handlers() may ++ * be called again. ++ * ++ * @param se the same session as given in fuse_set_signal_handlers() ++ * ++ * See also: ++ * fuse_set_signal_handlers() ++ */ ++void fuse_remove_signal_handlers(struct fuse_session *se); ++ ++/* ----------------------------------------------------------- * ++ * Compatibility stuff * ++ * ----------------------------------------------------------- */ ++ ++#if !defined(FUSE_USE_VERSION) || FUSE_USE_VERSION < 30 ++# error only API version 30 or greater is supported ++#endif ++ ++#ifdef __cplusplus ++} ++#endif ++ ++ ++/* ++ * This interface uses 64 bit off_t. ++ * ++ * On 32bit systems please add -D_FILE_OFFSET_BITS=64 to your compile flags! ++ */ ++ ++#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus ++_Static_assert(sizeof(off_t) == 8, "fuse: off_t must be 64bit"); ++#else ++struct _fuse_off_t_must_be_64bit_dummy_struct \ ++ { unsigned _fuse_off_t_must_be_64bit:((sizeof(off_t) == 8) ? 1 : -1); }; ++#endif ++ ++#endif /* FUSE_COMMON_H_ */ +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +new file mode 100644 +index 0000000..d38b630 +--- /dev/null ++++ b/tools/virtiofsd/fuse_i.h +@@ -0,0 +1,139 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB ++*/ ++ ++#include "fuse.h" ++#include "fuse_lowlevel.h" ++ ++struct mount_opts; ++ ++struct fuse_req { ++ struct fuse_session *se; ++ uint64_t unique; ++ int ctr; ++ pthread_mutex_t lock; ++ struct fuse_ctx ctx; ++ struct fuse_chan *ch; ++ int interrupted; ++ unsigned int ioctl_64bit : 1; ++ union { ++ struct { ++ uint64_t unique; ++ } i; ++ struct { ++ fuse_interrupt_func_t func; ++ void *data; ++ } ni; ++ } u; ++ struct fuse_req *next; ++ struct fuse_req *prev; ++}; ++ ++struct fuse_notify_req { ++ uint64_t unique; ++ void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t, ++ const void *, const struct fuse_buf *); ++ struct fuse_notify_req *next; ++ struct fuse_notify_req *prev; ++}; ++ ++struct fuse_session { ++ char *mountpoint; ++ volatile int exited; ++ int fd; ++ struct mount_opts *mo; ++ int debug; ++ int deny_others; ++ struct fuse_lowlevel_ops op; ++ int got_init; ++ struct cuse_data *cuse_data; ++ void *userdata; ++ uid_t owner; ++ struct fuse_conn_info conn; ++ struct fuse_req list; ++ struct fuse_req interrupts; ++ pthread_mutex_t lock; ++ int got_destroy; ++ pthread_key_t pipe_key; ++ int broken_splice_nonblock; ++ uint64_t notify_ctr; ++ struct fuse_notify_req notify_list; ++ size_t bufsize; ++ int error; ++}; ++ ++struct fuse_chan { ++ pthread_mutex_t lock; ++ int ctr; ++ int fd; ++}; ++ ++/** ++ * Filesystem module ++ * ++ * Filesystem modules are registered with the FUSE_REGISTER_MODULE() ++ * macro. ++ * ++ */ ++struct fuse_module { ++ char *name; ++ fuse_module_factory_t factory; ++ struct fuse_module *next; ++ struct fusemod_so *so; ++ int ctr; ++}; ++ ++/* ----------------------------------------------------------- * ++ * Channel interface (when using -o clone_fd) * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Obtain counted reference to the channel ++ * ++ * @param ch the channel ++ * @return the channel ++ */ ++struct fuse_chan *fuse_chan_get(struct fuse_chan *ch); ++ ++/** ++ * Drop counted reference to a channel ++ * ++ * @param ch the channel ++ */ ++void fuse_chan_put(struct fuse_chan *ch); ++ ++struct mount_opts *parse_mount_opts(struct fuse_args *args); ++void destroy_mount_opts(struct mount_opts *mo); ++void fuse_mount_version(void); ++unsigned get_max_read(struct mount_opts *o); ++void fuse_kern_unmount(const char *mountpoint, int fd); ++int fuse_kern_mount(const char *mountpoint, struct mount_opts *mo); ++ ++int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, ++ int count); ++void fuse_free_req(fuse_req_t req); ++ ++void cuse_lowlevel_init(fuse_req_t req, fuse_ino_t nodeide, const void *inarg); ++ ++int fuse_start_thread(pthread_t *thread_id, void *(*func)(void *), void *arg); ++ ++int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, ++ struct fuse_chan *ch); ++void fuse_session_process_buf_int(struct fuse_session *se, ++ const struct fuse_buf *buf, struct fuse_chan *ch); ++ ++struct fuse *fuse_new_31(struct fuse_args *args, const struct fuse_operations *op, ++ size_t op_size, void *private_data); ++int fuse_loop_mt_32(struct fuse *f, struct fuse_loop_config *config); ++int fuse_session_loop_mt_32(struct fuse_session *se, struct fuse_loop_config *config); ++ ++#define FUSE_MAX_MAX_PAGES 256 ++#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32 ++ ++/* room needed in buffer to accommodate header */ ++#define FUSE_BUFFER_HEADER_SIZE 0x1000 ++ +diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h +new file mode 100644 +index 0000000..5e112e0 +--- /dev/null ++++ b/tools/virtiofsd/fuse_log.h +@@ -0,0 +1,82 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2019 Red Hat, Inc. ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB. ++*/ ++ ++#ifndef FUSE_LOG_H_ ++#define FUSE_LOG_H_ ++ ++/** @file ++ * ++ * This file defines the logging interface of FUSE ++ */ ++ ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/** ++ * Log severity level ++ * ++ * These levels correspond to syslog(2) log levels since they are widely used. ++ */ ++enum fuse_log_level { ++ FUSE_LOG_EMERG, ++ FUSE_LOG_ALERT, ++ FUSE_LOG_CRIT, ++ FUSE_LOG_ERR, ++ FUSE_LOG_WARNING, ++ FUSE_LOG_NOTICE, ++ FUSE_LOG_INFO, ++ FUSE_LOG_DEBUG ++}; ++ ++/** ++ * Log message handler function. ++ * ++ * This function must be thread-safe. It may be called from any libfuse ++ * function, including fuse_parse_cmdline() and other functions invoked before ++ * a FUSE filesystem is created. ++ * ++ * Install a custom log message handler function using fuse_set_log_func(). ++ * ++ * @param level log severity level ++ * @param fmt sprintf-style format string including newline ++ * @param ap format string arguments ++ */ ++typedef void (*fuse_log_func_t)(enum fuse_log_level level, ++ const char *fmt, va_list ap); ++ ++/** ++ * Install a custom log handler function. ++ * ++ * Log messages are emitted by libfuse functions to report errors and debug ++ * information. Messages are printed to stderr by default but this can be ++ * overridden by installing a custom log message handler function. ++ * ++ * The log message handler function is global and affects all FUSE filesystems ++ * created within this process. ++ * ++ * @param func a custom log message handler function or NULL to revert to ++ * the default ++ */ ++void fuse_set_log_func(fuse_log_func_t func); ++ ++/** ++ * Emit a log message ++ * ++ * @param level severity level (FUSE_LOG_ERR, FUSE_LOG_DEBUG, etc) ++ * @param fmt sprintf-style format string including newline ++ */ ++void fuse_log(enum fuse_log_level level, const char *fmt, ...); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* FUSE_LOG_H_ */ +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +new file mode 100644 +index 0000000..18c6363 +--- /dev/null ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -0,0 +1,2089 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB. ++*/ ++ ++#ifndef FUSE_LOWLEVEL_H_ ++#define FUSE_LOWLEVEL_H_ ++ ++/** @file ++ * ++ * Low level API ++ * ++ * IMPORTANT: you should define FUSE_USE_VERSION before including this ++ * header. To use the newest API define it to 31 (recommended for any ++ * new application). ++ */ ++ ++#ifndef FUSE_USE_VERSION ++#error FUSE_USE_VERSION not defined ++#endif ++ ++#include "fuse_common.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* ----------------------------------------------------------- * ++ * Miscellaneous definitions * ++ * ----------------------------------------------------------- */ ++ ++/** The node ID of the root inode */ ++#define FUSE_ROOT_ID 1 ++ ++/** Inode number type */ ++typedef uint64_t fuse_ino_t; ++ ++/** Request pointer type */ ++typedef struct fuse_req *fuse_req_t; ++ ++/** ++ * Session ++ * ++ * This provides hooks for processing requests, and exiting ++ */ ++struct fuse_session; ++ ++/** Directory entry parameters supplied to fuse_reply_entry() */ ++struct fuse_entry_param { ++ /** Unique inode number ++ * ++ * In lookup, zero means negative entry (from version 2.5) ++ * Returning ENOENT also means negative entry, but by setting zero ++ * ino the kernel may cache negative entries for entry_timeout ++ * seconds. ++ */ ++ fuse_ino_t ino; ++ ++ /** Generation number for this entry. ++ * ++ * If the file system will be exported over NFS, the ++ * ino/generation pairs need to be unique over the file ++ * system's lifetime (rather than just the mount time). So if ++ * the file system reuses an inode after it has been deleted, ++ * it must assign a new, previously unused generation number ++ * to the inode at the same time. ++ * ++ */ ++ uint64_t generation; ++ ++ /** Inode attributes. ++ * ++ * Even if attr_timeout == 0, attr must be correct. For example, ++ * for open(), FUSE uses attr.st_size from lookup() to determine ++ * how many bytes to request. If this value is not correct, ++ * incorrect data will be returned. ++ */ ++ struct stat attr; ++ ++ /** Validity timeout (in seconds) for inode attributes. If ++ attributes only change as a result of requests that come ++ through the kernel, this should be set to a very large ++ value. */ ++ double attr_timeout; ++ ++ /** Validity timeout (in seconds) for the name. If directory ++ entries are changed/deleted only as a result of requests ++ that come through the kernel, this should be set to a very ++ large value. */ ++ double entry_timeout; ++}; ++ ++/** ++ * Additional context associated with requests. ++ * ++ * Note that the reported client uid, gid and pid may be zero in some ++ * situations. For example, if the FUSE file system is running in a ++ * PID or user namespace but then accessed from outside the namespace, ++ * there is no valid uid/pid/gid that could be reported. ++ */ ++struct fuse_ctx { ++ /** User ID of the calling process */ ++ uid_t uid; ++ ++ /** Group ID of the calling process */ ++ gid_t gid; ++ ++ /** Thread ID of the calling process */ ++ pid_t pid; ++ ++ /** Umask of the calling process */ ++ mode_t umask; ++}; ++ ++struct fuse_forget_data { ++ fuse_ino_t ino; ++ uint64_t nlookup; ++}; ++ ++/* 'to_set' flags in setattr */ ++#define FUSE_SET_ATTR_MODE (1 << 0) ++#define FUSE_SET_ATTR_UID (1 << 1) ++#define FUSE_SET_ATTR_GID (1 << 2) ++#define FUSE_SET_ATTR_SIZE (1 << 3) ++#define FUSE_SET_ATTR_ATIME (1 << 4) ++#define FUSE_SET_ATTR_MTIME (1 << 5) ++#define FUSE_SET_ATTR_ATIME_NOW (1 << 7) ++#define FUSE_SET_ATTR_MTIME_NOW (1 << 8) ++#define FUSE_SET_ATTR_CTIME (1 << 10) ++ ++/* ----------------------------------------------------------- * ++ * Request methods and replies * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Low level filesystem operations ++ * ++ * Most of the methods (with the exception of init and destroy) ++ * receive a request handle (fuse_req_t) as their first argument. ++ * This handle must be passed to one of the specified reply functions. ++ * ++ * This may be done inside the method invocation, or after the call ++ * has returned. The request handle is valid until one of the reply ++ * functions is called. ++ * ++ * Other pointer arguments (name, fuse_file_info, etc) are not valid ++ * after the call has returned, so if they are needed later, their ++ * contents have to be copied. ++ * ++ * In general, all methods are expected to perform any necessary ++ * permission checking. However, a filesystem may delegate this task ++ * to the kernel by passing the `default_permissions` mount option to ++ * `fuse_session_new()`. In this case, methods will only be called if ++ * the kernel's permission check has succeeded. ++ * ++ * The filesystem sometimes needs to handle a return value of -ENOENT ++ * from the reply function, which means, that the request was ++ * interrupted, and the reply discarded. For example if ++ * fuse_reply_open() return -ENOENT means, that the release method for ++ * this file will not be called. ++ */ ++struct fuse_lowlevel_ops { ++ /** ++ * Initialize filesystem ++ * ++ * This function is called when libfuse establishes ++ * communication with the FUSE kernel module. The file system ++ * should use this module to inspect and/or modify the ++ * connection parameters provided in the `conn` structure. ++ * ++ * Note that some parameters may be overwritten by options ++ * passed to fuse_session_new() which take precedence over the ++ * values set in this handler. ++ * ++ * There's no reply to this function ++ * ++ * @param userdata the user data passed to fuse_session_new() ++ */ ++ void (*init) (void *userdata, struct fuse_conn_info *conn); ++ ++ /** ++ * Clean up filesystem. ++ * ++ * Called on filesystem exit. When this method is called, the ++ * connection to the kernel may be gone already, so that eg. calls ++ * to fuse_lowlevel_notify_* will fail. ++ * ++ * There's no reply to this function ++ * ++ * @param userdata the user data passed to fuse_session_new() ++ */ ++ void (*destroy) (void *userdata); ++ ++ /** ++ * Look up a directory entry by name and get its attributes. ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name the name to look up ++ */ ++ void (*lookup) (fuse_req_t req, fuse_ino_t parent, const char *name); ++ ++ /** ++ * Forget about an inode ++ * ++ * This function is called when the kernel removes an inode ++ * from its internal caches. ++ * ++ * The inode's lookup count increases by one for every call to ++ * fuse_reply_entry and fuse_reply_create. The nlookup parameter ++ * indicates by how much the lookup count should be decreased. ++ * ++ * Inodes with a non-zero lookup count may receive request from ++ * the kernel even after calls to unlink, rmdir or (when ++ * overwriting an existing file) rename. Filesystems must handle ++ * such requests properly and it is recommended to defer removal ++ * of the inode until the lookup count reaches zero. Calls to ++ * unlink, rmdir or rename will be followed closely by forget ++ * unless the file or directory is open, in which case the ++ * kernel issues forget only after the release or releasedir ++ * calls. ++ * ++ * Note that if a file system will be exported over NFS the ++ * inodes lifetime must extend even beyond forget. See the ++ * generation field in struct fuse_entry_param above. ++ * ++ * On unmount the lookup count for all inodes implicitly drops ++ * to zero. It is not guaranteed that the file system will ++ * receive corresponding forget messages for the affected ++ * inodes. ++ * ++ * Valid replies: ++ * fuse_reply_none ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param nlookup the number of lookups to forget ++ */ ++ void (*forget) (fuse_req_t req, fuse_ino_t ino, uint64_t nlookup); ++ ++ /** ++ * Get file attributes. ++ * ++ * If writeback caching is enabled, the kernel may have a ++ * better idea of a file's length than the FUSE file system ++ * (eg if there has been a write that extended the file size, ++ * but that has not yet been passed to the filesystem.n ++ * ++ * In this case, the st_size value provided by the file system ++ * will be ignored. ++ * ++ * Valid replies: ++ * fuse_reply_attr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi for future use, currently always NULL ++ */ ++ void (*getattr) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Set file attributes ++ * ++ * In the 'attr' argument only members indicated by the 'to_set' ++ * bitmask contain valid values. Other members contain undefined ++ * values. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits if the file ++ * size or owner is being changed. ++ * ++ * If the setattr was invoked from the ftruncate() system call ++ * under Linux kernel versions 2.6.15 or later, the fi->fh will ++ * contain the value set by the open method or will be undefined ++ * if the open method didn't set any value. Otherwise (not ++ * ftruncate call, or kernel version earlier than 2.6.15) the fi ++ * parameter will be NULL. ++ * ++ * Valid replies: ++ * fuse_reply_attr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param attr the attributes ++ * @param to_set bit mask of attributes which should be set ++ * @param fi file information, or NULL ++ */ ++ void (*setattr) (fuse_req_t req, fuse_ino_t ino, struct stat *attr, ++ int to_set, struct fuse_file_info *fi); ++ ++ /** ++ * Read symbolic link ++ * ++ * Valid replies: ++ * fuse_reply_readlink ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ */ ++ void (*readlink) (fuse_req_t req, fuse_ino_t ino); ++ ++ /** ++ * Create file node ++ * ++ * Create a regular file, character device, block device, fifo or ++ * socket node. ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to create ++ * @param mode file type and mode with which to create the new file ++ * @param rdev the device number (only valid if created file is a device) ++ */ ++ void (*mknod) (fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode, dev_t rdev); ++ ++ /** ++ * Create a directory ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to create ++ * @param mode with which to create the new file ++ */ ++ void (*mkdir) (fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode); ++ ++ /** ++ * Remove a file ++ * ++ * If the file's inode's lookup count is non-zero, the file ++ * system is expected to postpone any removal of the inode ++ * until the lookup count reaches zero (see description of the ++ * forget function). ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to remove ++ */ ++ void (*unlink) (fuse_req_t req, fuse_ino_t parent, const char *name); ++ ++ /** ++ * Remove a directory ++ * ++ * If the directory's inode's lookup count is non-zero, the ++ * file system is expected to postpone any removal of the ++ * inode until the lookup count reaches zero (see description ++ * of the forget function). ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to remove ++ */ ++ void (*rmdir) (fuse_req_t req, fuse_ino_t parent, const char *name); ++ ++ /** ++ * Create a symbolic link ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param link the contents of the symbolic link ++ * @param parent inode number of the parent directory ++ * @param name to create ++ */ ++ void (*symlink) (fuse_req_t req, const char *link, fuse_ino_t parent, ++ const char *name); ++ ++ /** Rename a file ++ * ++ * If the target exists it should be atomically replaced. If ++ * the target's inode's lookup count is non-zero, the file ++ * system is expected to postpone any removal of the inode ++ * until the lookup count reaches zero (see description of the ++ * forget function). ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EINVAL, i.e. all ++ * future bmap requests will fail with EINVAL without being ++ * send to the filesystem process. ++ * ++ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If ++ * RENAME_NOREPLACE is specified, the filesystem must not ++ * overwrite *newname* if it exists and return an error ++ * instead. If `RENAME_EXCHANGE` is specified, the filesystem ++ * must atomically exchange the two files, i.e. both must ++ * exist and neither may be deleted. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the old parent directory ++ * @param name old name ++ * @param newparent inode number of the new parent directory ++ * @param newname new name ++ */ ++ void (*rename) (fuse_req_t req, fuse_ino_t parent, const char *name, ++ fuse_ino_t newparent, const char *newname, ++ unsigned int flags); ++ ++ /** ++ * Create a hard link ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the old inode number ++ * @param newparent inode number of the new parent directory ++ * @param newname new name to create ++ */ ++ void (*link) (fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, ++ const char *newname); ++ ++ /** ++ * Open a file ++ * ++ * Open flags are available in fi->flags. The following rules ++ * apply. ++ * ++ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be ++ * filtered out / handled by the kernel. ++ * ++ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used ++ * by the filesystem to check if the operation is ++ * permitted. If the ``-o default_permissions`` mount ++ * option is given, this check is already done by the ++ * kernel before calling open() and may thus be omitted by ++ * the filesystem. ++ * ++ * - When writeback caching is enabled, the kernel may send ++ * read requests even for files opened with O_WRONLY. The ++ * filesystem should be prepared to handle this. ++ * ++ * - When writeback caching is disabled, the filesystem is ++ * expected to properly handle the O_APPEND flag and ensure ++ * that each write is appending to the end of the file. ++ * ++ * - When writeback caching is enabled, the kernel will ++ * handle O_APPEND. However, unless all changes to the file ++ * come through the kernel this will not work reliably. The ++ * filesystem should thus either ignore the O_APPEND flag ++ * (and let the kernel handle it), or return an error ++ * (indicating that reliably O_APPEND is not available). ++ * ++ * Filesystem may store an arbitrary file handle (pointer, ++ * index, etc) in fi->fh, and use this in other all other file ++ * operations (read, write, flush, release, fsync). ++ * ++ * Filesystem may also implement stateless file I/O and not store ++ * anything in fi->fh. ++ * ++ * There are also some flags (direct_io, keep_cache) which the ++ * filesystem may set in fi, to change the way the file is opened. ++ * See fuse_file_info structure in for more details. ++ * ++ * If this request is answered with an error code of ENOSYS ++ * and FUSE_CAP_NO_OPEN_SUPPORT is set in ++ * `fuse_conn_info.capable`, this is treated as success and ++ * future calls to open and release will also succeed without being ++ * sent to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_open ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*open) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Read data ++ * ++ * Read should send exactly the number of bytes requested except ++ * on EOF or error, otherwise the rest of the data will be ++ * substituted with zeroes. An exception to this is when the file ++ * has been opened in 'direct_io' mode, in which case the return ++ * value of the read system call will reflect the return value of ++ * this operation. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_iov ++ * fuse_reply_data ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size number of bytes to read ++ * @param off offset to read from ++ * @param fi file information ++ */ ++ void (*read) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Write data ++ * ++ * Write should return exactly the number of bytes requested ++ * except on error. An exception to this is when the file has ++ * been opened in 'direct_io' mode, in which case the return value ++ * of the write system call will reflect the return value of this ++ * operation. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * ++ * Valid replies: ++ * fuse_reply_write ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param buf data to write ++ * @param size number of bytes to write ++ * @param off offset to write to ++ * @param fi file information ++ */ ++ void (*write) (fuse_req_t req, fuse_ino_t ino, const char *buf, ++ size_t size, off_t off, struct fuse_file_info *fi); ++ ++ /** ++ * Flush method ++ * ++ * This is called on each close() of the opened file. ++ * ++ * Since file descriptors can be duplicated (dup, dup2, fork), for ++ * one open call there may be many flush calls. ++ * ++ * Filesystems shouldn't assume that flush will always be called ++ * after some writes, or that if will be called at all. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * ++ * NOTE: the name of the method is misleading, since (unlike ++ * fsync) the filesystem is not forced to flush pending writes. ++ * One reason to flush data is if the filesystem wants to return ++ * write errors during close. However, such use is non-portable ++ * because POSIX does not require [close] to wait for delayed I/O to ++ * complete. ++ * ++ * If the filesystem supports file locking operations (setlk, ++ * getlk) it should remove all locks belonging to 'fi->owner'. ++ * ++ * If this request is answered with an error code of ENOSYS, ++ * this is treated as success and future calls to flush() will ++ * succeed automatically without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * ++ * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html ++ */ ++ void (*flush) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Release an open file ++ * ++ * Release is called when there are no more references to an open ++ * file: all file descriptors are closed and all memory mappings ++ * are unmapped. ++ * ++ * For every open call there will be exactly one release call (unless ++ * the filesystem is force-unmounted). ++ * ++ * The filesystem may reply with an error, but error values are ++ * not returned to close() or munmap() which triggered the ++ * release. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * fi->flags will contain the same flags as for open. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*release) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Synchronize file contents ++ * ++ * If the datasync parameter is non-zero, then only the user data ++ * should be flushed, not the meta data. ++ * ++ * If this request is answered with an error code of ENOSYS, ++ * this is treated as success and future calls to fsync() will ++ * succeed automatically without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param datasync flag indicating if only data should be flushed ++ * @param fi file information ++ */ ++ void (*fsync) (fuse_req_t req, fuse_ino_t ino, int datasync, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Open a directory ++ * ++ * Filesystem may store an arbitrary file handle (pointer, index, ++ * etc) in fi->fh, and use this in other all other directory ++ * stream operations (readdir, releasedir, fsyncdir). ++ * ++ * If this request is answered with an error code of ENOSYS and ++ * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`, ++ * this is treated as success and future calls to opendir and ++ * releasedir will also succeed without being sent to the filesystem ++ * process. In addition, the kernel will cache readdir results ++ * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR. ++ * ++ * Valid replies: ++ * fuse_reply_open ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*opendir) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Read directory ++ * ++ * Send a buffer filled using fuse_add_direntry(), with size not ++ * exceeding the requested size. Send an empty buffer on end of ++ * stream. ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * Returning a directory entry from readdir() does not affect ++ * its lookup count. ++ * ++ * If off_t is non-zero, then it will correspond to one of the off_t ++ * values that was previously returned by readdir() for the same ++ * directory handle. In this case, readdir() should skip over entries ++ * coming before the position defined by the off_t value. If entries ++ * are added or removed while the directory handle is open, they filesystem ++ * may still include the entries that have been removed, and may not ++ * report the entries that have been created. However, addition or ++ * removal of entries must never cause readdir() to skip over unrelated ++ * entries or to report them more than once. This means ++ * that off_t can not be a simple index that enumerates the entries ++ * that have been returned but must contain sufficient information to ++ * uniquely determine the next directory entry to return even when the ++ * set of entries is changing. ++ * ++ * The function does not have to report the '.' and '..' ++ * entries, but is allowed to do so. Note that, if readdir does ++ * not return '.' or '..', they will not be implicitly returned, ++ * and this behavior is observable by the caller. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size maximum number of bytes to send ++ * @param off offset to continue reading the directory stream ++ * @param fi file information ++ */ ++ void (*readdir) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Release an open directory ++ * ++ * For every opendir call there will be exactly one releasedir ++ * call (unless the filesystem is force-unmounted). ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*releasedir) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Synchronize directory contents ++ * ++ * If the datasync parameter is non-zero, then only the directory ++ * contents should be flushed, not the meta data. ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * If this request is answered with an error code of ENOSYS, ++ * this is treated as success and future calls to fsyncdir() will ++ * succeed automatically without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param datasync flag indicating if only data should be flushed ++ * @param fi file information ++ */ ++ void (*fsyncdir) (fuse_req_t req, fuse_ino_t ino, int datasync, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Get file system statistics ++ * ++ * Valid replies: ++ * fuse_reply_statfs ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number, zero means "undefined" ++ */ ++ void (*statfs) (fuse_req_t req, fuse_ino_t ino); ++ ++ /** ++ * Set an extended attribute ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future setxattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ */ ++ void (*setxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, ++ const char *value, size_t size, int flags); ++ ++ /** ++ * Get an extended attribute ++ * ++ * If size is zero, the size of the value should be sent with ++ * fuse_reply_xattr. ++ * ++ * If the size is non-zero, and the value fits in the buffer, the ++ * value should be sent with fuse_reply_buf. ++ * ++ * If the size is too small for the value, the ERANGE error should ++ * be sent. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future getxattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_xattr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param name of the extended attribute ++ * @param size maximum size of the value to send ++ */ ++ void (*getxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, ++ size_t size); ++ ++ /** ++ * List extended attribute names ++ * ++ * If size is zero, the total size of the attribute list should be ++ * sent with fuse_reply_xattr. ++ * ++ * If the size is non-zero, and the null character separated ++ * attribute list fits in the buffer, the list should be sent with ++ * fuse_reply_buf. ++ * ++ * If the size is too small for the list, the ERANGE error should ++ * be sent. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future listxattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_xattr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size maximum size of the list to send ++ */ ++ void (*listxattr) (fuse_req_t req, fuse_ino_t ino, size_t size); ++ ++ /** ++ * Remove an extended attribute ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future removexattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param name of the extended attribute ++ */ ++ void (*removexattr) (fuse_req_t req, fuse_ino_t ino, const char *name); ++ ++ /** ++ * Check file access permissions ++ * ++ * This will be called for the access() and chdir() system ++ * calls. If the 'default_permissions' mount option is given, ++ * this method is not called. ++ * ++ * This method is not called under Linux kernel versions 2.4.x ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent success, i.e. this and all future access() ++ * requests will succeed without being send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param mask requested access mode ++ */ ++ void (*access) (fuse_req_t req, fuse_ino_t ino, int mask); ++ ++ /** ++ * Create and open a file ++ * ++ * If the file does not exist, first create it with the specified ++ * mode, and then open it. ++ * ++ * See the description of the open handler for more ++ * information. ++ * ++ * If this method is not implemented or under Linux kernel ++ * versions earlier than 2.6.15, the mknod() and open() methods ++ * will be called instead. ++ * ++ * If this request is answered with an error code of ENOSYS, the handler ++ * is treated as not implemented (i.e., for this and future requests the ++ * mknod() and open() handlers will be called instead). ++ * ++ * Valid replies: ++ * fuse_reply_create ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to create ++ * @param mode file type and mode with which to create the new file ++ * @param fi file information ++ */ ++ void (*create) (fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode, struct fuse_file_info *fi); ++ ++ /** ++ * Test for a POSIX file lock ++ * ++ * Valid replies: ++ * fuse_reply_lock ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param lock the region/type to test ++ */ ++ void (*getlk) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi, struct flock *lock); ++ ++ /** ++ * Acquire, modify or release a POSIX file lock ++ * ++ * For POSIX threads (NPTL) there's a 1-1 relation between pid and ++ * owner, but otherwise this is not always the case. For checking ++ * lock ownership, 'fi->owner' must be used. The l_pid field in ++ * 'struct flock' should only be used to fill in this field in ++ * getlk(). ++ * ++ * Note: if the locking methods are not implemented, the kernel ++ * will still allow file locking to work locally. Hence these are ++ * only interesting for network filesystems and similar. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param lock the region/type to set ++ * @param sleep locking operation may sleep ++ */ ++ void (*setlk) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi, ++ struct flock *lock, int sleep); ++ ++ /** ++ * Map block index within file to block index within device ++ * ++ * Note: This makes sense only for block device backed filesystems ++ * mounted with the 'blkdev' option ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure, i.e. all future bmap() requests will ++ * fail with the same error code without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_bmap ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param blocksize unit of block index ++ * @param idx block index within file ++ */ ++ void (*bmap) (fuse_req_t req, fuse_ino_t ino, size_t blocksize, ++ uint64_t idx); ++ ++ /** ++ * Ioctl ++ * ++ * Note: For unrestricted ioctls (not allowed for FUSE ++ * servers), data in and out areas can be discovered by giving ++ * iovs and setting FUSE_IOCTL_RETRY in *flags*. For ++ * restricted ioctls, kernel prepares in/out data area ++ * according to the information encoded in cmd. ++ * ++ * Valid replies: ++ * fuse_reply_ioctl_retry ++ * fuse_reply_ioctl ++ * fuse_reply_ioctl_iov ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param cmd ioctl command ++ * @param arg ioctl argument ++ * @param fi file information ++ * @param flags for FUSE_IOCTL_* flags ++ * @param in_buf data fetched from the caller ++ * @param in_bufsz number of fetched bytes ++ * @param out_bufsz maximum size of output data ++ * ++ * Note : the unsigned long request submitted by the application ++ * is truncated to 32 bits. ++ */ ++ void (*ioctl) (fuse_req_t req, fuse_ino_t ino, unsigned int cmd, ++ void *arg, struct fuse_file_info *fi, unsigned flags, ++ const void *in_buf, size_t in_bufsz, size_t out_bufsz); ++ ++ /** ++ * Poll for IO readiness ++ * ++ * Note: If ph is non-NULL, the client should notify ++ * when IO readiness events occur by calling ++ * fuse_lowlevel_notify_poll() with the specified ph. ++ * ++ * Regardless of the number of times poll with a non-NULL ph ++ * is received, single notification is enough to clear all. ++ * Notifying more times incurs overhead but doesn't harm ++ * correctness. ++ * ++ * The callee is responsible for destroying ph with ++ * fuse_pollhandle_destroy() when no longer in use. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as success (with a kernel-defined default poll-mask) and ++ * future calls to pull() will succeed the same way without being send ++ * to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_poll ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param ph poll handle to be used for notification ++ */ ++ void (*poll) (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ struct fuse_pollhandle *ph); ++ ++ /** ++ * Write data made available in a buffer ++ * ++ * This is a more generic version of the ->write() method. If ++ * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the ++ * kernel supports splicing from the fuse device, then the ++ * data will be made available in pipe for supporting zero ++ * copy data transfer. ++ * ++ * buf->count is guaranteed to be one (and thus buf->idx is ++ * always zero). The write_buf handler must ensure that ++ * bufv->off is correctly updated (reflecting the number of ++ * bytes read from bufv->buf[0]). ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ * ++ * Valid replies: ++ * fuse_reply_write ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param bufv buffer containing the data ++ * @param off offset to write to ++ * @param fi file information ++ */ ++ void (*write_buf) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_bufvec *bufv, off_t off, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Callback function for the retrieve request ++ * ++ * Valid replies: ++ * fuse_reply_none ++ * ++ * @param req request handle ++ * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() ++ * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() ++ * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() ++ * @param bufv the buffer containing the returned data ++ */ ++ void (*retrieve_reply) (fuse_req_t req, void *cookie, fuse_ino_t ino, ++ off_t offset, struct fuse_bufvec *bufv); ++ ++ /** ++ * Forget about multiple inodes ++ * ++ * See description of the forget function for more ++ * information. ++ * ++ * Valid replies: ++ * fuse_reply_none ++ * ++ * @param req request handle ++ */ ++ void (*forget_multi) (fuse_req_t req, size_t count, ++ struct fuse_forget_data *forgets); ++ ++ /** ++ * Acquire, modify or release a BSD file lock ++ * ++ * Note: if the locking methods are not implemented, the kernel ++ * will still allow file locking to work locally. Hence these are ++ * only interesting for network filesystems and similar. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param op the locking operation, see flock(2) ++ */ ++ void (*flock) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi, int op); ++ ++ /** ++ * Allocate requested space. If this function returns success then ++ * subsequent writes to the specified range shall not fail due to the lack ++ * of free space on the file system storage media. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future fallocate() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param offset starting point for allocated region ++ * @param length size of allocated region ++ * @param mode determines the operation to be performed on the given range, ++ * see fallocate(2) ++ */ ++ void (*fallocate) (fuse_req_t req, fuse_ino_t ino, int mode, ++ off_t offset, off_t length, struct fuse_file_info *fi); ++ ++ /** ++ * Read directory with attributes ++ * ++ * Send a buffer filled using fuse_add_direntry_plus(), with size not ++ * exceeding the requested size. Send an empty buffer on end of ++ * stream. ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * In contrast to readdir() (which does not affect the lookup counts), ++ * the lookup count of every entry returned by readdirplus(), except "." ++ * and "..", is incremented by one. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size maximum number of bytes to send ++ * @param off offset to continue reading the directory stream ++ * @param fi file information ++ */ ++ void (*readdirplus) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Copy a range of data from one file to another ++ * ++ * Performs an optimized copy between two file descriptors without the ++ * additional cost of transferring data through the FUSE kernel module ++ * to user space (glibc) and then back into the FUSE filesystem again. ++ * ++ * In case this method is not implemented, glibc falls back to reading ++ * data from the source and writing to the destination. Effectively ++ * doing an inefficient copy of the data. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future copy_file_range() requests will fail with EOPNOTSUPP without ++ * being send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_write ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino_in the inode number or the source file ++ * @param off_in starting point from were the data should be read ++ * @param fi_in file information of the source file ++ * @param ino_out the inode number or the destination file ++ * @param off_out starting point where the data should be written ++ * @param fi_out file information of the destination file ++ * @param len maximum size of the data to copy ++ * @param flags passed along with the copy_file_range() syscall ++ */ ++ void (*copy_file_range) (fuse_req_t req, fuse_ino_t ino_in, ++ off_t off_in, struct fuse_file_info *fi_in, ++ fuse_ino_t ino_out, off_t off_out, ++ struct fuse_file_info *fi_out, size_t len, ++ int flags); ++ ++ /** ++ * Find next data or hole after the specified offset ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure, i.e. all future lseek() requests will ++ * fail with the same error code without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_lseek ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param off offset to start search from ++ * @param whence either SEEK_DATA or SEEK_HOLE ++ * @param fi file information ++ */ ++ void (*lseek) (fuse_req_t req, fuse_ino_t ino, off_t off, int whence, ++ struct fuse_file_info *fi); ++}; ++ ++/** ++ * Reply with an error code or success. ++ * ++ * Possible requests: ++ * all except forget ++ * ++ * Whereever possible, error codes should be chosen from the list of ++ * documented error conditions in the corresponding system calls ++ * manpage. ++ * ++ * An error code of ENOSYS is sometimes treated specially. This is ++ * indicated in the documentation of the affected handler functions. ++ * ++ * The following requests may be answered with a zero error code: ++ * unlink, rmdir, rename, flush, release, fsync, fsyncdir, setxattr, ++ * removexattr, setlk. ++ * ++ * @param req request handle ++ * @param err the positive error value, or zero for success ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_err(fuse_req_t req, int err); ++ ++/** ++ * Don't send reply ++ * ++ * Possible requests: ++ * forget ++ * forget_multi ++ * retrieve_reply ++ * ++ * @param req request handle ++ */ ++void fuse_reply_none(fuse_req_t req); ++ ++/** ++ * Reply with a directory entry ++ * ++ * Possible requests: ++ * lookup, mknod, mkdir, symlink, link ++ * ++ * Side effects: ++ * increments the lookup count on success ++ * ++ * @param req request handle ++ * @param e the entry parameters ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e); ++ ++/** ++ * Reply with a directory entry and open parameters ++ * ++ * currently the following members of 'fi' are used: ++ * fh, direct_io, keep_cache ++ * ++ * Possible requests: ++ * create ++ * ++ * Side effects: ++ * increments the lookup count on success ++ * ++ * @param req request handle ++ * @param e the entry parameters ++ * @param fi file information ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, ++ const struct fuse_file_info *fi); ++ ++/** ++ * Reply with attributes ++ * ++ * Possible requests: ++ * getattr, setattr ++ * ++ * @param req request handle ++ * @param attr the attributes ++ * @param attr_timeout validity timeout (in seconds) for the attributes ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_attr(fuse_req_t req, const struct stat *attr, ++ double attr_timeout); ++ ++/** ++ * Reply with the contents of a symbolic link ++ * ++ * Possible requests: ++ * readlink ++ * ++ * @param req request handle ++ * @param link symbolic link contents ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_readlink(fuse_req_t req, const char *link); ++ ++/** ++ * Reply with open parameters ++ * ++ * currently the following members of 'fi' are used: ++ * fh, direct_io, keep_cache ++ * ++ * Possible requests: ++ * open, opendir ++ * ++ * @param req request handle ++ * @param fi file information ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *fi); ++ ++/** ++ * Reply with number of bytes written ++ * ++ * Possible requests: ++ * write ++ * ++ * @param req request handle ++ * @param count the number of bytes written ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_write(fuse_req_t req, size_t count); ++ ++/** ++ * Reply with data ++ * ++ * Possible requests: ++ * read, readdir, getxattr, listxattr ++ * ++ * @param req request handle ++ * @param buf buffer containing data ++ * @param size the size of data in bytes ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); ++ ++/** ++ * Reply with data copied/moved from buffer(s) ++ * ++ * Zero copy data transfer ("splicing") will be used under ++ * the following circumstances: ++ * ++ * 1. FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.want, and ++ * 2. the kernel supports splicing from the fuse device ++ * (FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.capable), and ++ * 3. *flags* does not contain FUSE_BUF_NO_SPLICE ++ * 4. The amount of data that is provided in file-descriptor backed ++ * buffers (i.e., buffers for which bufv[n].flags == FUSE_BUF_FD) ++ * is at least twice the page size. ++ * ++ * In order for SPLICE_F_MOVE to be used, the following additional ++ * conditions have to be fulfilled: ++ * ++ * 1. FUSE_CAP_SPLICE_MOVE is set in fuse_conn_info.want, and ++ * 2. the kernel supports it (i.e, FUSE_CAP_SPLICE_MOVE is set in ++ fuse_conn_info.capable), and ++ * 3. *flags* contains FUSE_BUF_SPLICE_MOVE ++ * ++ * Note that, if splice is used, the data is actually spliced twice: ++ * once into a temporary pipe (to prepend header data), and then again ++ * into the kernel. If some of the provided buffers are memory-backed, ++ * the data in them is copied in step one and spliced in step two. ++ * ++ * The FUSE_BUF_SPLICE_FORCE_SPLICE and FUSE_BUF_SPLICE_NONBLOCK flags ++ * are silently ignored. ++ * ++ * Possible requests: ++ * read, readdir, getxattr, listxattr ++ * ++ * Side effects: ++ * when used to return data from a readdirplus() (but not readdir()) ++ * call, increments the lookup count of each returned entry by one ++ * on success. ++ * ++ * @param req request handle ++ * @param bufv buffer vector ++ * @param flags flags controlling the copy ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, ++ enum fuse_buf_copy_flags flags); ++ ++/** ++ * Reply with data vector ++ * ++ * Possible requests: ++ * read, readdir, getxattr, listxattr ++ * ++ * @param req request handle ++ * @param iov the vector containing the data ++ * @param count the size of vector ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count); ++ ++/** ++ * Reply with filesystem statistics ++ * ++ * Possible requests: ++ * statfs ++ * ++ * @param req request handle ++ * @param stbuf filesystem statistics ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf); ++ ++/** ++ * Reply with needed buffer size ++ * ++ * Possible requests: ++ * getxattr, listxattr ++ * ++ * @param req request handle ++ * @param count the buffer size needed in bytes ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_xattr(fuse_req_t req, size_t count); ++ ++/** ++ * Reply with file lock information ++ * ++ * Possible requests: ++ * getlk ++ * ++ * @param req request handle ++ * @param lock the lock information ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_lock(fuse_req_t req, const struct flock *lock); ++ ++/** ++ * Reply with block index ++ * ++ * Possible requests: ++ * bmap ++ * ++ * @param req request handle ++ * @param idx block index within device ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_bmap(fuse_req_t req, uint64_t idx); ++ ++/* ----------------------------------------------------------- * ++ * Filling a buffer in readdir * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Add a directory entry to the buffer ++ * ++ * Buffer needs to be large enough to hold the entry. If it's not, ++ * then the entry is not filled in but the size of the entry is still ++ * returned. The caller can check this by comparing the bufsize ++ * parameter with the returned entry size. If the entry size is ++ * larger than the buffer size, the operation failed. ++ * ++ * From the 'stbuf' argument the st_ino field and bits 12-15 of the ++ * st_mode field are used. The other fields are ignored. ++ * ++ * *off* should be any non-zero value that the filesystem can use to ++ * identify the current point in the directory stream. It does not ++ * need to be the actual physical position. A value of zero is ++ * reserved to mean "from the beginning", and should therefore never ++ * be used (the first call to fuse_add_direntry should be passed the ++ * offset of the second directory entry). ++ * ++ * @param req request handle ++ * @param buf the point where the new entry will be added to the buffer ++ * @param bufsize remaining size of the buffer ++ * @param name the name of the entry ++ * @param stbuf the file attributes ++ * @param off the offset of the next entry ++ * @return the space needed for the entry ++ */ ++size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, ++ const char *name, const struct stat *stbuf, ++ off_t off); ++ ++/** ++ * Add a directory entry to the buffer with the attributes ++ * ++ * See documentation of `fuse_add_direntry()` for more details. ++ * ++ * @param req request handle ++ * @param buf the point where the new entry will be added to the buffer ++ * @param bufsize remaining size of the buffer ++ * @param name the name of the entry ++ * @param e the directory entry ++ * @param off the offset of the next entry ++ * @return the space needed for the entry ++ */ ++size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, ++ const char *name, ++ const struct fuse_entry_param *e, off_t off); ++ ++/** ++ * Reply to ask for data fetch and output buffer preparation. ioctl ++ * will be retried with the specified input data fetched and output ++ * buffer prepared. ++ * ++ * Possible requests: ++ * ioctl ++ * ++ * @param req request handle ++ * @param in_iov iovec specifying data to fetch from the caller ++ * @param in_count number of entries in in_iov ++ * @param out_iov iovec specifying addresses to write output to ++ * @param out_count number of entries in out_iov ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_ioctl_retry(fuse_req_t req, ++ const struct iovec *in_iov, size_t in_count, ++ const struct iovec *out_iov, size_t out_count); ++ ++/** ++ * Reply to finish ioctl ++ * ++ * Possible requests: ++ * ioctl ++ * ++ * @param req request handle ++ * @param result result to be passed to the caller ++ * @param buf buffer containing output data ++ * @param size length of output data ++ */ ++int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size); ++ ++/** ++ * Reply to finish ioctl with iov buffer ++ * ++ * Possible requests: ++ * ioctl ++ * ++ * @param req request handle ++ * @param result result to be passed to the caller ++ * @param iov the vector containing the data ++ * @param count the size of vector ++ */ ++int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, ++ int count); ++ ++/** ++ * Reply with poll result event mask ++ * ++ * @param req request handle ++ * @param revents poll result event mask ++ */ ++int fuse_reply_poll(fuse_req_t req, unsigned revents); ++ ++/** ++ * Reply with offset ++ * ++ * Possible requests: ++ * lseek ++ * ++ * @param req request handle ++ * @param off offset of next data or hole ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_lseek(fuse_req_t req, off_t off); ++ ++/* ----------------------------------------------------------- * ++ * Notification * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Notify IO readiness event ++ * ++ * For more information, please read comment for poll operation. ++ * ++ * @param ph poll handle to notify IO readiness event for ++ */ ++int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph); ++ ++/** ++ * Notify to invalidate cache for an inode. ++ * ++ * Added in FUSE protocol version 7.12. If the kernel does not support ++ * this (or a newer) version, the function will return -ENOSYS and do ++ * nothing. ++ * ++ * If the filesystem has writeback caching enabled, invalidating an ++ * inode will first trigger a writeback of all dirty pages. The call ++ * will block until all writeback requests have completed and the ++ * inode has been invalidated. It will, however, not wait for ++ * completion of pending writeback requests that have been issued ++ * before. ++ * ++ * If there are no dirty pages, this function will never block. ++ * ++ * @param se the session object ++ * @param ino the inode number ++ * @param off the offset in the inode where to start invalidating ++ * or negative to invalidate attributes only ++ * @param len the amount of cache to invalidate or 0 for all ++ * @return zero for success, -errno for failure ++ */ ++int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, ++ off_t off, off_t len); ++ ++/** ++ * Notify to invalidate parent attributes and the dentry matching ++ * parent/name ++ * ++ * To avoid a deadlock this function must not be called in the ++ * execution path of a related filesytem operation or within any code ++ * that could hold a lock that could be needed to execute such an ++ * operation. As of kernel 4.18, a "related operation" is a lookup(), ++ * symlink(), mknod(), mkdir(), unlink(), rename(), link() or create() ++ * request for the parent, and a setattr(), unlink(), rmdir(), ++ * rename(), setxattr(), removexattr(), readdir() or readdirplus() ++ * request for the inode itself. ++ * ++ * When called correctly, this function will never block. ++ * ++ * Added in FUSE protocol version 7.12. If the kernel does not support ++ * this (or a newer) version, the function will return -ENOSYS and do ++ * nothing. ++ * ++ * @param se the session object ++ * @param parent inode number ++ * @param name file name ++ * @param namelen strlen() of file name ++ * @return zero for success, -errno for failure ++ */ ++int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, ++ const char *name, size_t namelen); ++ ++/** ++ * This function behaves like fuse_lowlevel_notify_inval_entry() with ++ * the following additional effect (at least as of Linux kernel 4.8): ++ * ++ * If the provided *child* inode matches the inode that is currently ++ * associated with the cached dentry, and if there are any inotify ++ * watches registered for the dentry, then the watchers are informed ++ * that the dentry has been deleted. ++ * ++ * To avoid a deadlock this function must not be called while ++ * executing a related filesytem operation or while holding a lock ++ * that could be needed to execute such an operation (see the ++ * description of fuse_lowlevel_notify_inval_entry() for more ++ * details). ++ * ++ * When called correctly, this function will never block. ++ * ++ * Added in FUSE protocol version 7.18. If the kernel does not support ++ * this (or a newer) version, the function will return -ENOSYS and do ++ * nothing. ++ * ++ * @param se the session object ++ * @param parent inode number ++ * @param child inode number ++ * @param name file name ++ * @param namelen strlen() of file name ++ * @return zero for success, -errno for failure ++ */ ++int fuse_lowlevel_notify_delete(struct fuse_session *se, ++ fuse_ino_t parent, fuse_ino_t child, ++ const char *name, size_t namelen); ++ ++/** ++ * Store data to the kernel buffers ++ * ++ * Synchronously store data in the kernel buffers belonging to the ++ * given inode. The stored data is marked up-to-date (no read will be ++ * performed against it, unless it's invalidated or evicted from the ++ * cache). ++ * ++ * If the stored data overflows the current file size, then the size ++ * is extended, similarly to a write(2) on the filesystem. ++ * ++ * If this function returns an error, then the store wasn't fully ++ * completed, but it may have been partially completed. ++ * ++ * Added in FUSE protocol version 7.15. If the kernel does not support ++ * this (or a newer) version, the function will return -ENOSYS and do ++ * nothing. ++ * ++ * @param se the session object ++ * @param ino the inode number ++ * @param offset the starting offset into the file to store to ++ * @param bufv buffer vector ++ * @param flags flags controlling the copy ++ * @return zero for success, -errno for failure ++ */ ++int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, ++ off_t offset, struct fuse_bufvec *bufv, ++ enum fuse_buf_copy_flags flags); ++/** ++ * Retrieve data from the kernel buffers ++ * ++ * Retrieve data in the kernel buffers belonging to the given inode. ++ * If successful then the retrieve_reply() method will be called with ++ * the returned data. ++ * ++ * Only present pages are returned in the retrieve reply. Retrieving ++ * stops when it finds a non-present page and only data prior to that ++ * is returned. ++ * ++ * If this function returns an error, then the retrieve will not be ++ * completed and no reply will be sent. ++ * ++ * This function doesn't change the dirty state of pages in the kernel ++ * buffer. For dirty pages the write() method will be called ++ * regardless of having been retrieved previously. ++ * ++ * Added in FUSE protocol version 7.15. If the kernel does not support ++ * this (or a newer) version, the function will return -ENOSYS and do ++ * nothing. ++ * ++ * @param se the session object ++ * @param ino the inode number ++ * @param size the number of bytes to retrieve ++ * @param offset the starting offset into the file to retrieve from ++ * @param cookie user data to supply to the reply callback ++ * @return zero for success, -errno for failure ++ */ ++int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, ++ size_t size, off_t offset, void *cookie); ++ ++ ++/* ----------------------------------------------------------- * ++ * Utility functions * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Get the userdata from the request ++ * ++ * @param req request handle ++ * @return the user data passed to fuse_session_new() ++ */ ++void *fuse_req_userdata(fuse_req_t req); ++ ++/** ++ * Get the context from the request ++ * ++ * The pointer returned by this function will only be valid for the ++ * request's lifetime ++ * ++ * @param req request handle ++ * @return the context structure ++ */ ++const struct fuse_ctx *fuse_req_ctx(fuse_req_t req); ++ ++/** ++ * Get the current supplementary group IDs for the specified request ++ * ++ * Similar to the getgroups(2) system call, except the return value is ++ * always the total number of group IDs, even if it is larger than the ++ * specified size. ++ * ++ * The current fuse kernel module in linux (as of 2.6.30) doesn't pass ++ * the group list to userspace, hence this function needs to parse ++ * "/proc/$TID/task/$TID/status" to get the group IDs. ++ * ++ * This feature may not be supported on all operating systems. In ++ * such a case this function will return -ENOSYS. ++ * ++ * @param req request handle ++ * @param size size of given array ++ * @param list array of group IDs to be filled in ++ * @return the total number of supplementary group IDs or -errno on failure ++ */ ++int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]); ++ ++/** ++ * Callback function for an interrupt ++ * ++ * @param req interrupted request ++ * @param data user data ++ */ ++typedef void (*fuse_interrupt_func_t)(fuse_req_t req, void *data); ++ ++/** ++ * Register/unregister callback for an interrupt ++ * ++ * If an interrupt has already happened, then the callback function is ++ * called from within this function, hence it's not possible for ++ * interrupts to be lost. ++ * ++ * @param req request handle ++ * @param func the callback function or NULL for unregister ++ * @param data user data passed to the callback function ++ */ ++void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, ++ void *data); ++ ++/** ++ * Check if a request has already been interrupted ++ * ++ * @param req request handle ++ * @return 1 if the request has been interrupted, 0 otherwise ++ */ ++int fuse_req_interrupted(fuse_req_t req); ++ ++ ++/* ----------------------------------------------------------- * ++ * Inquiry functions * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Print low-level version information to stdout. ++ */ ++void fuse_lowlevel_version(void); ++ ++/** ++ * Print available low-level options to stdout. This is not an ++ * exhaustive list, but includes only those options that may be of ++ * interest to an end-user of a file system. ++ */ ++void fuse_lowlevel_help(void); ++ ++/** ++ * Print available options for `fuse_parse_cmdline()`. ++ */ ++void fuse_cmdline_help(void); ++ ++/* ----------------------------------------------------------- * ++ * Filesystem setup & teardown * ++ * ----------------------------------------------------------- */ ++ ++struct fuse_cmdline_opts { ++ int singlethread; ++ int foreground; ++ int debug; ++ int nodefault_subtype; ++ char *mountpoint; ++ int show_version; ++ int show_help; ++ int clone_fd; ++ unsigned int max_idle_threads; ++}; ++ ++/** ++ * Utility function to parse common options for simple file systems ++ * using the low-level API. A help text that describes the available ++ * options can be printed with `fuse_cmdline_help`. A single ++ * non-option argument is treated as the mountpoint. Multiple ++ * non-option arguments will result in an error. ++ * ++ * If neither -o subtype= or -o fsname= options are given, a new ++ * subtype option will be added and set to the basename of the program ++ * (the fsname will remain unset, and then defaults to "fuse"). ++ * ++ * Known options will be removed from *args*, unknown options will ++ * remain. ++ * ++ * @param args argument vector (input+output) ++ * @param opts output argument for parsed options ++ * @return 0 on success, -1 on failure ++ */ ++int fuse_parse_cmdline(struct fuse_args *args, ++ struct fuse_cmdline_opts *opts); ++ ++/** ++ * Create a low level session. ++ * ++ * Returns a session structure suitable for passing to ++ * fuse_session_mount() and fuse_session_loop(). ++ * ++ * This function accepts most file-system independent mount options ++ * (like context, nodev, ro - see mount(8)), as well as the general ++ * fuse mount options listed in mount.fuse(8) (e.g. -o allow_root and ++ * -o default_permissions, but not ``-o use_ino``). Instead of `-o ++ * debug`, debugging may also enabled with `-d` or `--debug`. ++ * ++ * If not all options are known, an error message is written to stderr ++ * and the function returns NULL. ++ * ++ * Option parsing skips argv[0], which is assumed to contain the ++ * program name. To prevent accidentally passing an option in ++ * argv[0], this element must always be present (even if no options ++ * are specified). It may be set to the empty string ('\0') if no ++ * reasonable value can be provided. ++ * ++ * @param args argument vector ++ * @param op the (low-level) filesystem operations ++ * @param op_size sizeof(struct fuse_lowlevel_ops) ++ * @param userdata user data ++ * ++ * @return the fuse session on success, NULL on failure ++ **/ ++struct fuse_session *fuse_session_new(struct fuse_args *args, ++ const struct fuse_lowlevel_ops *op, ++ size_t op_size, void *userdata); ++ ++/** ++ * Mount a FUSE file system. ++ * ++ * @param mountpoint the mount point path ++ * @param se session object ++ * ++ * @return 0 on success, -1 on failure. ++ **/ ++int fuse_session_mount(struct fuse_session *se, const char *mountpoint); ++ ++/** ++ * Enter a single threaded, blocking event loop. ++ * ++ * When the event loop terminates because the connection to the FUSE ++ * kernel module has been closed, this function returns zero. This ++ * happens when the filesystem is unmounted regularly (by the ++ * filesystem owner or root running the umount(8) or fusermount(1) ++ * command), or if connection is explicitly severed by writing ``1`` ++ * to the``abort`` file in ``/sys/fs/fuse/connections/NNN``. The only ++ * way to distinguish between these two conditions is to check if the ++ * filesystem is still mounted after the session loop returns. ++ * ++ * When some error occurs during request processing, the function ++ * returns a negated errno(3) value. ++ * ++ * If the loop has been terminated because of a signal handler ++ * installed by fuse_set_signal_handlers(), this function returns the ++ * (positive) signal value that triggered the exit. ++ * ++ * @param se the session ++ * @return 0, -errno, or a signal value ++ */ ++int fuse_session_loop(struct fuse_session *se); ++ ++/** ++ * Enter a multi-threaded event loop. ++ * ++ * For a description of the return value and the conditions when the ++ * event loop exits, refer to the documentation of ++ * fuse_session_loop(). ++ * ++ * @param se the session ++ * @param config session loop configuration ++ * @return see fuse_session_loop() ++ */ ++#if FUSE_USE_VERSION < 32 ++int fuse_session_loop_mt_31(struct fuse_session *se, int clone_fd); ++#define fuse_session_loop_mt(se, clone_fd) fuse_session_loop_mt_31(se, clone_fd) ++#else ++int fuse_session_loop_mt(struct fuse_session *se, struct fuse_loop_config *config); ++#endif ++ ++/** ++ * Flag a session as terminated. ++ * ++ * This function is invoked by the POSIX signal handlers, when ++ * registered using fuse_set_signal_handlers(). It will cause any ++ * running event loops to terminate on the next opportunity. ++ * ++ * @param se the session ++ */ ++void fuse_session_exit(struct fuse_session *se); ++ ++/** ++ * Reset the terminated flag of a session ++ * ++ * @param se the session ++ */ ++void fuse_session_reset(struct fuse_session *se); ++ ++/** ++ * Query the terminated flag of a session ++ * ++ * @param se the session ++ * @return 1 if exited, 0 if not exited ++ */ ++int fuse_session_exited(struct fuse_session *se); ++ ++/** ++ * Ensure that file system is unmounted. ++ * ++ * In regular operation, the file system is typically unmounted by the ++ * user calling umount(8) or fusermount(1), which then terminates the ++ * FUSE session loop. However, the session loop may also terminate as ++ * a result of an explicit call to fuse_session_exit() (e.g. by a ++ * signal handler installed by fuse_set_signal_handler()). In this ++ * case the filesystem remains mounted, but any attempt to access it ++ * will block (while the filesystem process is still running) or give ++ * an ESHUTDOWN error (after the filesystem process has terminated). ++ * ++ * If the communication channel with the FUSE kernel module is still ++ * open (i.e., if the session loop was terminated by an explicit call ++ * to fuse_session_exit()), this function will close it and unmount ++ * the filesystem. If the communication channel has been closed by the ++ * kernel, this method will do (almost) nothing. ++ * ++ * NOTE: The above semantics mean that if the connection to the kernel ++ * is terminated via the ``/sys/fs/fuse/connections/NNN/abort`` file, ++ * this method will *not* unmount the filesystem. ++ * ++ * @param se the session ++ */ ++void fuse_session_unmount(struct fuse_session *se); ++ ++/** ++ * Destroy a session ++ * ++ * @param se the session ++ */ ++void fuse_session_destroy(struct fuse_session *se); ++ ++/* ----------------------------------------------------------- * ++ * Custom event loop support * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Return file descriptor for communication with kernel. ++ * ++ * The file selector can be used to integrate FUSE with a custom event ++ * loop. Whenever data is available for reading on the provided fd, ++ * the event loop should call `fuse_session_receive_buf` followed by ++ * `fuse_session_process_buf` to process the request. ++ * ++ * The returned file descriptor is valid until `fuse_session_unmount` ++ * is called. ++ * ++ * @param se the session ++ * @return a file descriptor ++ */ ++int fuse_session_fd(struct fuse_session *se); ++ ++/** ++ * Process a raw request supplied in a generic buffer ++ * ++ * The fuse_buf may contain a memory buffer or a pipe file descriptor. ++ * ++ * @param se the session ++ * @param buf the fuse_buf containing the request ++ */ ++void fuse_session_process_buf(struct fuse_session *se, ++ const struct fuse_buf *buf); ++ ++/** ++ * Read a raw request from the kernel into the supplied buffer. ++ * ++ * Depending on file system options, system capabilities, and request ++ * size the request is either read into a memory buffer or spliced ++ * into a temporary pipe. ++ * ++ * @param se the session ++ * @param buf the fuse_buf to store the request in ++ * @return the actual size of the raw request, or -errno on error ++ */ ++int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* FUSE_LOWLEVEL_H_ */ +diff --git a/tools/virtiofsd/fuse_misc.h b/tools/virtiofsd/fuse_misc.h +new file mode 100644 +index 0000000..2f6663e +--- /dev/null ++++ b/tools/virtiofsd/fuse_misc.h +@@ -0,0 +1,59 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB ++*/ ++ ++#include ++ ++/* ++ Versioned symbols cannot be used in some cases because it ++ - confuse the dynamic linker in uClibc ++ - not supported on MacOSX (in MachO binary format) ++*/ ++#if (!defined(__UCLIBC__) && !defined(__APPLE__)) ++#define FUSE_SYMVER(x) __asm__(x) ++#else ++#define FUSE_SYMVER(x) ++#endif ++ ++#ifndef USE_UCLIBC ++#define fuse_mutex_init(mut) pthread_mutex_init(mut, NULL) ++#else ++/* Is this hack still needed? */ ++static inline void fuse_mutex_init(pthread_mutex_t *mut) ++{ ++ pthread_mutexattr_t attr; ++ pthread_mutexattr_init(&attr); ++ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); ++ pthread_mutex_init(mut, &attr); ++ pthread_mutexattr_destroy(&attr); ++} ++#endif ++ ++#ifdef HAVE_STRUCT_STAT_ST_ATIM ++/* Linux */ ++#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atim.tv_nsec) ++#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctim.tv_nsec) ++#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtim.tv_nsec) ++#define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atim.tv_nsec = (val) ++#define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctim.tv_nsec = (val) ++#define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtim.tv_nsec = (val) ++#elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC) ++/* FreeBSD */ ++#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atimespec.tv_nsec) ++#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctimespec.tv_nsec) ++#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtimespec.tv_nsec) ++#define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atimespec.tv_nsec = (val) ++#define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctimespec.tv_nsec = (val) ++#define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtimespec.tv_nsec = (val) ++#else ++#define ST_ATIM_NSEC(stbuf) 0 ++#define ST_CTIM_NSEC(stbuf) 0 ++#define ST_MTIM_NSEC(stbuf) 0 ++#define ST_ATIM_NSEC_SET(stbuf, val) do { } while (0) ++#define ST_CTIM_NSEC_SET(stbuf, val) do { } while (0) ++#define ST_MTIM_NSEC_SET(stbuf, val) do { } while (0) ++#endif +diff --git a/tools/virtiofsd/fuse_opt.h b/tools/virtiofsd/fuse_opt.h +new file mode 100644 +index 0000000..d8573e7 +--- /dev/null ++++ b/tools/virtiofsd/fuse_opt.h +@@ -0,0 +1,271 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB. ++*/ ++ ++#ifndef FUSE_OPT_H_ ++#define FUSE_OPT_H_ ++ ++/** @file ++ * ++ * This file defines the option parsing interface of FUSE ++ */ ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/** ++ * Option description ++ * ++ * This structure describes a single option, and action associated ++ * with it, in case it matches. ++ * ++ * More than one such match may occur, in which case the action for ++ * each match is executed. ++ * ++ * There are three possible actions in case of a match: ++ * ++ * i) An integer (int or unsigned) variable determined by 'offset' is ++ * set to 'value' ++ * ++ * ii) The processing function is called, with 'value' as the key ++ * ++ * iii) An integer (any) or string (char *) variable determined by ++ * 'offset' is set to the value of an option parameter ++ * ++ * 'offset' should normally be either set to ++ * ++ * - 'offsetof(struct foo, member)' actions i) and iii) ++ * ++ * - -1 action ii) ++ * ++ * The 'offsetof()' macro is defined in the header. ++ * ++ * The template determines which options match, and also have an ++ * effect on the action. Normally the action is either i) or ii), but ++ * if a format is present in the template, then action iii) is ++ * performed. ++ * ++ * The types of templates are: ++ * ++ * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only ++ * themselves. Invalid values are "--" and anything beginning ++ * with "-o" ++ * ++ * 2) "foo", "foo-bar", etc. These match "-ofoo", "-ofoo-bar" or ++ * the relevant option in a comma separated option list ++ * ++ * 3) "bar=", "--foo=", etc. These are variations of 1) and 2) ++ * which have a parameter ++ * ++ * 4) "bar=%s", "--foo=%lu", etc. Same matching as above but perform ++ * action iii). ++ * ++ * 5) "-x ", etc. Matches either "-xparam" or "-x param" as ++ * two separate arguments ++ * ++ * 6) "-x %s", etc. Combination of 4) and 5) ++ * ++ * If the format is "%s", memory is allocated for the string unlike with ++ * scanf(). The previous value (if non-NULL) stored at the this location is ++ * freed. ++ */ ++struct fuse_opt { ++ /** Matching template and optional parameter formatting */ ++ const char *templ; ++ ++ /** ++ * Offset of variable within 'data' parameter of fuse_opt_parse() ++ * or -1 ++ */ ++ unsigned long offset; ++ ++ /** ++ * Value to set the variable to, or to be passed as 'key' to the ++ * processing function. Ignored if template has a format ++ */ ++ int value; ++}; ++ ++/** ++ * Key option. In case of a match, the processing function will be ++ * called with the specified key. ++ */ ++#define FUSE_OPT_KEY(templ, key) { templ, -1U, key } ++ ++/** ++ * Last option. An array of 'struct fuse_opt' must end with a NULL ++ * template value ++ */ ++#define FUSE_OPT_END { NULL, 0, 0 } ++ ++/** ++ * Argument list ++ */ ++struct fuse_args { ++ /** Argument count */ ++ int argc; ++ ++ /** Argument vector. NULL terminated */ ++ char **argv; ++ ++ /** Is 'argv' allocated? */ ++ int allocated; ++}; ++ ++/** ++ * Initializer for 'struct fuse_args' ++ */ ++#define FUSE_ARGS_INIT(argc, argv) { argc, argv, 0 } ++ ++/** ++ * Key value passed to the processing function if an option did not ++ * match any template ++ */ ++#define FUSE_OPT_KEY_OPT -1 ++ ++/** ++ * Key value passed to the processing function for all non-options ++ * ++ * Non-options are the arguments beginning with a character other than ++ * '-' or all arguments after the special '--' option ++ */ ++#define FUSE_OPT_KEY_NONOPT -2 ++ ++/** ++ * Special key value for options to keep ++ * ++ * Argument is not passed to processing function, but behave as if the ++ * processing function returned 1 ++ */ ++#define FUSE_OPT_KEY_KEEP -3 ++ ++/** ++ * Special key value for options to discard ++ * ++ * Argument is not passed to processing function, but behave as if the ++ * processing function returned zero ++ */ ++#define FUSE_OPT_KEY_DISCARD -4 ++ ++/** ++ * Processing function ++ * ++ * This function is called if ++ * - option did not match any 'struct fuse_opt' ++ * - argument is a non-option ++ * - option did match and offset was set to -1 ++ * ++ * The 'arg' parameter will always contain the whole argument or ++ * option including the parameter if exists. A two-argument option ++ * ("-x foo") is always converted to single argument option of the ++ * form "-xfoo" before this function is called. ++ * ++ * Options of the form '-ofoo' are passed to this function without the ++ * '-o' prefix. ++ * ++ * The return value of this function determines whether this argument ++ * is to be inserted into the output argument vector, or discarded. ++ * ++ * @param data is the user data passed to the fuse_opt_parse() function ++ * @param arg is the whole argument or option ++ * @param key determines why the processing function was called ++ * @param outargs the current output argument list ++ * @return -1 on error, 0 if arg is to be discarded, 1 if arg should be kept ++ */ ++typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key, ++ struct fuse_args *outargs); ++ ++/** ++ * Option parsing function ++ * ++ * If 'args' was returned from a previous call to fuse_opt_parse() or ++ * it was constructed from ++ * ++ * A NULL 'args' is equivalent to an empty argument vector ++ * ++ * A NULL 'opts' is equivalent to an 'opts' array containing a single ++ * end marker ++ * ++ * A NULL 'proc' is equivalent to a processing function always ++ * returning '1' ++ * ++ * @param args is the input and output argument list ++ * @param data is the user data ++ * @param opts is the option description array ++ * @param proc is the processing function ++ * @return -1 on error, 0 on success ++ */ ++int fuse_opt_parse(struct fuse_args *args, void *data, ++ const struct fuse_opt opts[], fuse_opt_proc_t proc); ++ ++/** ++ * Add an option to a comma separated option list ++ * ++ * @param opts is a pointer to an option list, may point to a NULL value ++ * @param opt is the option to add ++ * @return -1 on allocation error, 0 on success ++ */ ++int fuse_opt_add_opt(char **opts, const char *opt); ++ ++/** ++ * Add an option, escaping commas, to a comma separated option list ++ * ++ * @param opts is a pointer to an option list, may point to a NULL value ++ * @param opt is the option to add ++ * @return -1 on allocation error, 0 on success ++ */ ++int fuse_opt_add_opt_escaped(char **opts, const char *opt); ++ ++/** ++ * Add an argument to a NULL terminated argument vector ++ * ++ * @param args is the structure containing the current argument list ++ * @param arg is the new argument to add ++ * @return -1 on allocation error, 0 on success ++ */ ++int fuse_opt_add_arg(struct fuse_args *args, const char *arg); ++ ++/** ++ * Add an argument at the specified position in a NULL terminated ++ * argument vector ++ * ++ * Adds the argument to the N-th position. This is useful for adding ++ * options at the beginning of the array which must not come after the ++ * special '--' option. ++ * ++ * @param args is the structure containing the current argument list ++ * @param pos is the position at which to add the argument ++ * @param arg is the new argument to add ++ * @return -1 on allocation error, 0 on success ++ */ ++int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg); ++ ++/** ++ * Free the contents of argument list ++ * ++ * The structure itself is not freed ++ * ++ * @param args is the structure containing the argument list ++ */ ++void fuse_opt_free_args(struct fuse_args *args); ++ ++ ++/** ++ * Check if an option matches ++ * ++ * @param opts is the option description array ++ * @param opt is the option to match ++ * @return 1 if a match is found, 0 if not ++ */ ++int fuse_opt_match(const struct fuse_opt opts[], const char *opt); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* FUSE_OPT_H_ */ +diff --git a/tools/virtiofsd/passthrough_helpers.h b/tools/virtiofsd/passthrough_helpers.h +new file mode 100644 +index 0000000..6b77c33 +--- /dev/null ++++ b/tools/virtiofsd/passthrough_helpers.h +@@ -0,0 +1,76 @@ ++/* ++ * FUSE: Filesystem in Userspace ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE ++ */ ++ ++/* ++ * Creates files on the underlying file system in response to a FUSE_MKNOD ++ * operation ++ */ ++static int mknod_wrapper(int dirfd, const char *path, const char *link, ++ int mode, dev_t rdev) ++{ ++ int res; ++ ++ if (S_ISREG(mode)) { ++ res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode); ++ if (res >= 0) ++ res = close(res); ++ } else if (S_ISDIR(mode)) { ++ res = mkdirat(dirfd, path, mode); ++ } else if (S_ISLNK(mode) && link != NULL) { ++ res = symlinkat(link, dirfd, path); ++ } else if (S_ISFIFO(mode)) { ++ res = mkfifoat(dirfd, path, mode); ++#ifdef __FreeBSD__ ++ } else if (S_ISSOCK(mode)) { ++ struct sockaddr_un su; ++ int fd; ++ ++ if (strlen(path) >= sizeof(su.sun_path)) { ++ errno = ENAMETOOLONG; ++ return -1; ++ } ++ fd = socket(AF_UNIX, SOCK_STREAM, 0); ++ if (fd >= 0) { ++ /* ++ * We must bind the socket to the underlying file ++ * system to create the socket file, even though ++ * we'll never listen on this socket. ++ */ ++ su.sun_family = AF_UNIX; ++ strncpy(su.sun_path, path, sizeof(su.sun_path)); ++ res = bindat(dirfd, fd, (struct sockaddr*)&su, ++ sizeof(su)); ++ if (res == 0) ++ close(fd); ++ } else { ++ res = -1; ++ } ++#endif ++ } else { ++ res = mknodat(dirfd, path, mode, rdev); ++ } ++ ++ return res; ++} +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Remove-fuse_req_getgroups.patch b/SOURCES/kvm-virtiofsd-Remove-fuse_req_getgroups.patch new file mode 100644 index 0000000..27e71f2 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Remove-fuse_req_getgroups.patch @@ -0,0 +1,193 @@ +From 7a1860c83ff042f3e796c449e780ee0528107213 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 3 Mar 2020 18:43:08 +0000 +Subject: [PATCH 12/18] virtiofsd: Remove fuse_req_getgroups +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200303184314.155564-2-dgilbert@redhat.com> +Patchwork-id: 94122 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/7] virtiofsd: Remove fuse_req_getgroups +Bugzilla: 1797064 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Ján Tomko + +From: "Dr. David Alan Gilbert" + +Remove fuse_req_getgroups that's unused in virtiofsd; it came in +from libfuse but we don't actually use it. It was called from +fuse_getgroups which we previously removed (but had left it's header +in). + +Coverity had complained about null termination in it, but removing +it is the easiest answer. + +Fixes: Coverity CID: 1413117 (String not null terminated) +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Stefan Hajnoczi +(cherry picked from commit 988717b46b6424907618cb845ace9d69062703af) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/fuse.h | 20 ----------- + tools/virtiofsd/fuse_lowlevel.c | 77 ----------------------------------------- + tools/virtiofsd/fuse_lowlevel.h | 21 ----------- + 3 files changed, 118 deletions(-) + +diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h +index 7a4c713..aba13fe 100644 +--- a/tools/virtiofsd/fuse.h ++++ b/tools/virtiofsd/fuse.h +@@ -1007,26 +1007,6 @@ void fuse_exit(struct fuse *f); + struct fuse_context *fuse_get_context(void); + + /** +- * Get the current supplementary group IDs for the current request +- * +- * Similar to the getgroups(2) system call, except the return value is +- * always the total number of group IDs, even if it is larger than the +- * specified size. +- * +- * The current fuse kernel module in linux (as of 2.6.30) doesn't pass +- * the group list to userspace, hence this function needs to parse +- * "/proc/$TID/task/$TID/status" to get the group IDs. +- * +- * This feature may not be supported on all operating systems. In +- * such a case this function will return -ENOSYS. +- * +- * @param size size of given array +- * @param list array of group IDs to be filled in +- * @return the total number of supplementary group IDs or -errno on failure +- */ +-int fuse_getgroups(int size, gid_t list[]); +- +-/** + * Check if the current request has already been interrupted + * + * @return 1 if the request has been interrupted, 0 otherwise +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index de2e2e0..01c418a 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2667,83 +2667,6 @@ int fuse_lowlevel_is_virtio(struct fuse_session *se) + return !!se->virtio_dev; + } + +-#ifdef linux +-int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) +-{ +- char *buf; +- size_t bufsize = 1024; +- char path[128]; +- int ret; +- int fd; +- unsigned long pid = req->ctx.pid; +- char *s; +- +- sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); +- +-retry: +- buf = malloc(bufsize); +- if (buf == NULL) { +- return -ENOMEM; +- } +- +- ret = -EIO; +- fd = open(path, O_RDONLY); +- if (fd == -1) { +- goto out_free; +- } +- +- ret = read(fd, buf, bufsize); +- close(fd); +- if (ret < 0) { +- ret = -EIO; +- goto out_free; +- } +- +- if ((size_t)ret == bufsize) { +- free(buf); +- bufsize *= 4; +- goto retry; +- } +- +- ret = -EIO; +- s = strstr(buf, "\nGroups:"); +- if (s == NULL) { +- goto out_free; +- } +- +- s += 8; +- ret = 0; +- while (1) { +- char *end; +- unsigned long val = strtoul(s, &end, 0); +- if (end == s) { +- break; +- } +- +- s = end; +- if (ret < size) { +- list[ret] = val; +- } +- ret++; +- } +- +-out_free: +- free(buf); +- return ret; +-} +-#else /* linux */ +-/* +- * This is currently not implemented on other than Linux... +- */ +-int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) +-{ +- (void)req; +- (void)size; +- (void)list; +- return -ENOSYS; +-} +-#endif +- + void fuse_session_exit(struct fuse_session *se) + { + se->exited = 1; +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 138041e..8f6d705 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1705,27 +1705,6 @@ void *fuse_req_userdata(fuse_req_t req); + const struct fuse_ctx *fuse_req_ctx(fuse_req_t req); + + /** +- * Get the current supplementary group IDs for the specified request +- * +- * Similar to the getgroups(2) system call, except the return value is +- * always the total number of group IDs, even if it is larger than the +- * specified size. +- * +- * The current fuse kernel module in linux (as of 2.6.30) doesn't pass +- * the group list to userspace, hence this function needs to parse +- * "/proc/$TID/task/$TID/status" to get the group IDs. +- * +- * This feature may not be supported on all operating systems. In +- * such a case this function will return -ENOSYS. +- * +- * @param req request handle +- * @param size size of given array +- * @param list array of group IDs to be filled in +- * @return the total number of supplementary group IDs or -errno on failure +- */ +-int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]); +- +-/** + * Callback function for an interrupt + * + * @param req interrupted request +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch b/SOURCES/kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch new file mode 100644 index 0000000..7f9c5bb --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch @@ -0,0 +1,271 @@ +From 80237df2b22eca685037456e65d149fed4654165 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:48 +0100 +Subject: [PATCH 017/116] virtiofsd: Remove unused enum fuse_buf_copy_flags +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-14-dgilbert@redhat.com> +Patchwork-id: 93465 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 013/112] virtiofsd: Remove unused enum fuse_buf_copy_flags +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Xiao Yang + +Signed-off-by: Xiao Yang +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 8c3fe75e0308ba2f01d160ace534b7e386cea808) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 7 +++--- + tools/virtiofsd/fuse_common.h | 46 +--------------------------------------- + tools/virtiofsd/fuse_lowlevel.c | 13 +++++------- + tools/virtiofsd/fuse_lowlevel.h | 35 ++---------------------------- + tools/virtiofsd/passthrough_ll.c | 4 ++-- + 5 files changed, 13 insertions(+), 92 deletions(-) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index 5df946c..4d507f3 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -171,7 +171,7 @@ static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, + + static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, + const struct fuse_buf *src, size_t src_off, +- size_t len, enum fuse_buf_copy_flags flags) ++ size_t len) + { + int src_is_fd = src->flags & FUSE_BUF_IS_FD; + int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; +@@ -224,8 +224,7 @@ static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) + return 1; + } + +-ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, +- enum fuse_buf_copy_flags flags) ++ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv) + { + size_t copied = 0; + +@@ -249,7 +248,7 @@ ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, + dst_len = dst->size - dstv->off; + len = min_size(src_len, dst_len); + +- res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); ++ res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len); + if (res < 0) { + if (!copied) { + return res; +diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h +index bd9bf86..0cb33ac 100644 +--- a/tools/virtiofsd/fuse_common.h ++++ b/tools/virtiofsd/fuse_common.h +@@ -605,48 +605,6 @@ enum fuse_buf_flags { + }; + + /** +- * Buffer copy flags +- */ +-enum fuse_buf_copy_flags { +- /** +- * Don't use splice(2) +- * +- * Always fall back to using read and write instead of +- * splice(2) to copy data from one file descriptor to another. +- * +- * If this flag is not set, then only fall back if splice is +- * unavailable. +- */ +- FUSE_BUF_NO_SPLICE = (1 << 1), +- +- /** +- * Force splice +- * +- * Always use splice(2) to copy data from one file descriptor +- * to another. If splice is not available, return -EINVAL. +- */ +- FUSE_BUF_FORCE_SPLICE = (1 << 2), +- +- /** +- * Try to move data with splice. +- * +- * If splice is used, try to move pages from the source to the +- * destination instead of copying. See documentation of +- * SPLICE_F_MOVE in splice(2) man page. +- */ +- FUSE_BUF_SPLICE_MOVE = (1 << 3), +- +- /** +- * Don't block on the pipe when copying data with splice +- * +- * Makes the operations on the pipe non-blocking (if the pipe +- * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) +- * man page. +- */ +- FUSE_BUF_SPLICE_NONBLOCK = (1 << 4), +-}; +- +-/** + * Single data buffer + * + * Generic data buffer for I/O, extended attributes, etc... Data may +@@ -741,11 +699,9 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv); + * + * @param dst destination buffer vector + * @param src source buffer vector +- * @param flags flags controlling the copy + * @return actual number of bytes copied or -errno on error + */ +-ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src, +- enum fuse_buf_copy_flags flags); ++ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src); + + /* + * Signal handling +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index eb0ec49..3da80de 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -490,16 +490,14 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, + + static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + struct iovec *iov, int iov_count, +- struct fuse_bufvec *buf, unsigned int flags) ++ struct fuse_bufvec *buf) + { + size_t len = fuse_buf_size(buf); +- (void)flags; + + return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); + } + +-int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags) ++int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv) + { + struct iovec iov[2]; + struct fuse_out_header out; +@@ -511,7 +509,7 @@ int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, + out.unique = req->unique; + out.error = 0; + +- res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); ++ res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv); + if (res <= 0) { + fuse_free_req(req); + return res; +@@ -1969,8 +1967,7 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, + } + + int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, +- off_t offset, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags) ++ off_t offset, struct fuse_bufvec *bufv) + { + struct fuse_out_header out; + struct fuse_notify_store_out outarg; +@@ -1999,7 +1996,7 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, + iov[1].iov_base = &outarg; + iov[1].iov_len = sizeof(outarg); + +- res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); ++ res = fuse_send_data_iov(se, NULL, iov, 2, bufv); + if (res > 0) { + res = -res; + } +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 12a84b4..2fa225d 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1363,33 +1363,6 @@ int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); + /** + * Reply with data copied/moved from buffer(s) + * +- * Zero copy data transfer ("splicing") will be used under +- * the following circumstances: +- * +- * 1. FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.want, and +- * 2. the kernel supports splicing from the fuse device +- * (FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.capable), and +- * 3. *flags* does not contain FUSE_BUF_NO_SPLICE +- * 4. The amount of data that is provided in file-descriptor backed +- * buffers (i.e., buffers for which bufv[n].flags == FUSE_BUF_FD) +- * is at least twice the page size. +- * +- * In order for SPLICE_F_MOVE to be used, the following additional +- * conditions have to be fulfilled: +- * +- * 1. FUSE_CAP_SPLICE_MOVE is set in fuse_conn_info.want, and +- * 2. the kernel supports it (i.e, FUSE_CAP_SPLICE_MOVE is set in +- fuse_conn_info.capable), and +- * 3. *flags* contains FUSE_BUF_SPLICE_MOVE +- * +- * Note that, if splice is used, the data is actually spliced twice: +- * once into a temporary pipe (to prepend header data), and then again +- * into the kernel. If some of the provided buffers are memory-backed, +- * the data in them is copied in step one and spliced in step two. +- * +- * The FUSE_BUF_SPLICE_FORCE_SPLICE and FUSE_BUF_SPLICE_NONBLOCK flags +- * are silently ignored. +- * + * Possible requests: + * read, readdir, getxattr, listxattr + * +@@ -1400,11 +1373,9 @@ int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); + * + * @param req request handle + * @param bufv buffer vector +- * @param flags flags controlling the copy + * @return zero for success, -errno for failure to send reply + */ +-int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags); ++int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv); + + /** + * Reply with data vector +@@ -1705,12 +1676,10 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, + * @param ino the inode number + * @param offset the starting offset into the file to store to + * @param bufv buffer vector +- * @param flags flags controlling the copy + * @return zero for success, -errno for failure + */ + int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, +- off_t offset, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags); ++ off_t offset, struct fuse_bufvec *bufv); + + /* + * Utility functions +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 9377718..126a56c 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -931,7 +931,7 @@ static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, + buf.buf[0].fd = fi->fh; + buf.buf[0].pos = offset; + +- fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); ++ fuse_reply_data(req, &buf); + } + + static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, +@@ -952,7 +952,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, + out_buf.buf[0].size, (unsigned long)off); + } + +- res = fuse_buf_copy(&out_buf, in_buf, 0); ++ res = fuse_buf_copy(&out_buf, in_buf); + if (res < 0) { + fuse_reply_err(req, -res); + } else { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch b/SOURCES/kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch new file mode 100644 index 0000000..e1a3cd1 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch @@ -0,0 +1,72 @@ +From b8d62021f28114f054571b96ec0cd4dad4476923 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:14 +0100 +Subject: [PATCH 103/116] virtiofsd: Reset O_DIRECT flag during file open +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-100-dgilbert@redhat.com> +Patchwork-id: 93553 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 099/112] virtiofsd: Reset O_DIRECT flag during file open +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Vivek Goyal + +If an application wants to do direct IO and opens a file with O_DIRECT +in guest, that does not necessarily mean that we need to bypass page +cache on host as well. So reset this flag on host. + +If somebody needs to bypass page cache on host as well (and it is safe to +do so), we can add a knob in daemon later to control this behavior. + +I check virtio-9p and they do reset O_DIRECT flag. + +Signed-off-by: Vivek Goyal +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 65da4539803373ec4eec97ffc49ee90083e56efd) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index ccbbec1..948cb19 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1721,6 +1721,13 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + goto out; + } + ++ /* ++ * O_DIRECT in guest should not necessarily mean bypassing page ++ * cache on host as well. If somebody needs that behavior, it ++ * probably should be a configuration knob in daemon. ++ */ ++ fi->flags &= ~O_DIRECT; ++ + fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, + mode); + err = fd == -1 ? errno : 0; +@@ -1950,6 +1957,13 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + fi->flags &= ~O_APPEND; + } + ++ /* ++ * O_DIRECT in guest should not necessarily mean bypassing page ++ * cache on host as well. If somebody needs that behavior, it ++ * probably should be a configuration knob in daemon. ++ */ ++ fi->flags &= ~O_DIRECT; ++ + sprintf(buf, "%i", lo_fd(req, ino)); + fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); + if (fd == -1) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Send-replies-to-messages.patch b/SOURCES/kvm-virtiofsd-Send-replies-to-messages.patch new file mode 100644 index 0000000..5453fda --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Send-replies-to-messages.patch @@ -0,0 +1,199 @@ +From bb1f691dc410ce11ac9675ced70e78a3ce2511b0 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:03 +0100 +Subject: [PATCH 032/116] virtiofsd: Send replies to messages +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-29-dgilbert@redhat.com> +Patchwork-id: 93485 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 028/112] virtiofsd: Send replies to messages +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Route fuse out messages back through the same queue elements +that had the command that triggered the request. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit df57ba919ec3edef9cc208d35685095e6e92713e) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 4 ++ + tools/virtiofsd/fuse_virtio.c | 107 ++++++++++++++++++++++++++++++++++++++-- + tools/virtiofsd/fuse_virtio.h | 4 ++ + 3 files changed, 111 insertions(+), 4 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index af09fa2..380d93b 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -171,6 +171,10 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, + } + } + ++ if (fuse_lowlevel_is_virtio(se)) { ++ return virtio_send_msg(se, ch, iov, count); ++ } ++ + abort(); /* virtio should have taken it before here */ + return 0; + } +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 3841b20..05d0e29 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -41,6 +41,9 @@ struct fv_QueueInfo { + /* Our queue index, corresponds to array position */ + int qidx; + int kick_fd; ++ ++ /* The element for the command currently being processed */ ++ VuVirtqElement *qe; + }; + + /* +@@ -121,6 +124,105 @@ static void copy_from_iov(struct fuse_buf *buf, size_t out_num, + } + } + ++/* ++ * Copy from one iov to another, the given number of bytes ++ * The caller must have checked sizes. ++ */ ++static void copy_iov(struct iovec *src_iov, int src_count, ++ struct iovec *dst_iov, int dst_count, size_t to_copy) ++{ ++ size_t dst_offset = 0; ++ /* Outer loop copies 'src' elements */ ++ while (to_copy) { ++ assert(src_count); ++ size_t src_len = src_iov[0].iov_len; ++ size_t src_offset = 0; ++ ++ if (src_len > to_copy) { ++ src_len = to_copy; ++ } ++ /* Inner loop copies contents of one 'src' to maybe multiple dst. */ ++ while (src_len) { ++ assert(dst_count); ++ size_t dst_len = dst_iov[0].iov_len - dst_offset; ++ if (dst_len > src_len) { ++ dst_len = src_len; ++ } ++ ++ memcpy(dst_iov[0].iov_base + dst_offset, ++ src_iov[0].iov_base + src_offset, dst_len); ++ src_len -= dst_len; ++ to_copy -= dst_len; ++ src_offset += dst_len; ++ dst_offset += dst_len; ++ ++ assert(dst_offset <= dst_iov[0].iov_len); ++ if (dst_offset == dst_iov[0].iov_len) { ++ dst_offset = 0; ++ dst_iov++; ++ dst_count--; ++ } ++ } ++ src_iov++; ++ src_count--; ++ } ++} ++ ++/* ++ * Called back by ll whenever it wants to send a reply/message back ++ * The 1st element of the iov starts with the fuse_out_header ++ * 'unique'==0 means it's a notify message. ++ */ ++int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, ++ struct iovec *iov, int count) ++{ ++ VuVirtqElement *elem; ++ VuVirtq *q; ++ ++ assert(count >= 1); ++ assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); ++ ++ struct fuse_out_header *out = iov[0].iov_base; ++ /* TODO: Endianness! */ ++ ++ size_t tosend_len = iov_size(iov, count); ++ ++ /* unique == 0 is notification, which we don't support */ ++ assert(out->unique); ++ /* For virtio we always have ch */ ++ assert(ch); ++ elem = ch->qi->qe; ++ q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; ++ ++ /* The 'in' part of the elem is to qemu */ ++ unsigned int in_num = elem->in_num; ++ struct iovec *in_sg = elem->in_sg; ++ size_t in_len = iov_size(in_sg, in_num); ++ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n", ++ __func__, elem->index, in_num, in_len); ++ ++ /* ++ * The elem should have room for a 'fuse_out_header' (out from fuse) ++ * plus the data based on the len in the header. ++ */ ++ if (in_len < sizeof(struct fuse_out_header)) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", ++ __func__, elem->index); ++ return -E2BIG; ++ } ++ if (in_len < tosend_len) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n", ++ __func__, elem->index, tosend_len); ++ return -E2BIG; ++ } ++ ++ copy_iov(iov, count, in_sg, in_num, tosend_len); ++ vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); ++ vu_queue_notify(&se->virtio_dev->dev, q); ++ ++ return 0; ++} ++ + /* Thread function for individual queues, created when a queue is 'started' */ + static void *fv_queue_thread(void *opaque) + { +@@ -226,13 +328,10 @@ static void *fv_queue_thread(void *opaque) + + /* TODO! Endianness of header */ + +- /* TODO: Fixup fuse_send_msg */ + /* TODO: Add checks for fuse_session_exited */ + fuse_session_process_buf_int(se, &fbuf, &ch); + +- /* TODO: vu_queue_push(dev, q, elem, qi->write_count); */ +- vu_queue_notify(dev, q); +- ++ qi->qe = NULL; + free(elem); + elem = NULL; + } +diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h +index 23026d6..135a148 100644 +--- a/tools/virtiofsd/fuse_virtio.h ++++ b/tools/virtiofsd/fuse_virtio.h +@@ -22,4 +22,8 @@ int virtio_session_mount(struct fuse_session *se); + + int virtio_loop(struct fuse_session *se); + ++ ++int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, ++ struct iovec *iov, int count); ++ + #endif +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Start-queue-threads.patch b/SOURCES/kvm-virtiofsd-Start-queue-threads.patch new file mode 100644 index 0000000..8b03cd6 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Start-queue-threads.patch @@ -0,0 +1,165 @@ +From 38282d996cde61261211160577b366b83cad8012 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:00 +0100 +Subject: [PATCH 029/116] virtiofsd: Start queue threads +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-26-dgilbert@redhat.com> +Patchwork-id: 93479 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 025/112] virtiofsd: Start queue threads +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Start a thread for each queue when we get notified it's been started. + +Signed-off-by: Dr. David Alan Gilbert +fix by: +Signed-off-by: Jun Piao +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit e4c55a3c144493b436e40031e2eed61a84eca47b) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 89 +++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 89 insertions(+) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 4819e56..2a94bb3 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -11,6 +11,7 @@ + * See the file COPYING.LIB + */ + ++#include "qemu/osdep.h" + #include "fuse_virtio.h" + #include "fuse_i.h" + #include "standard-headers/linux/fuse.h" +@@ -30,6 +31,15 @@ + + #include "contrib/libvhost-user/libvhost-user.h" + ++struct fv_QueueInfo { ++ pthread_t thread; ++ struct fv_VuDev *virtio_dev; ++ ++ /* Our queue index, corresponds to array position */ ++ int qidx; ++ int kick_fd; ++}; ++ + /* + * We pass the dev element into libvhost-user + * and then use it to get back to the outer +@@ -38,6 +48,13 @@ + struct fv_VuDev { + VuDev dev; + struct fuse_session *se; ++ ++ /* ++ * The following pair of fields are only accessed in the main ++ * virtio_loop ++ */ ++ size_t nqueues; ++ struct fv_QueueInfo **qi; + }; + + /* From spec */ +@@ -83,6 +100,75 @@ static void fv_panic(VuDev *dev, const char *err) + exit(EXIT_FAILURE); + } + ++static void *fv_queue_thread(void *opaque) ++{ ++ struct fv_QueueInfo *qi = opaque; ++ fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, ++ qi->qidx, qi->kick_fd); ++ while (1) { ++ /* TODO */ ++ } ++ ++ return NULL; ++} ++ ++/* Callback from libvhost-user on start or stop of a queue */ ++static void fv_queue_set_started(VuDev *dev, int qidx, bool started) ++{ ++ struct fv_VuDev *vud = container_of(dev, struct fv_VuDev, dev); ++ struct fv_QueueInfo *ourqi; ++ ++ fuse_log(FUSE_LOG_INFO, "%s: qidx=%d started=%d\n", __func__, qidx, ++ started); ++ assert(qidx >= 0); ++ ++ /* ++ * Ignore additional request queues for now. passthrough_ll.c must be ++ * audited for thread-safety issues first. It was written with a ++ * well-behaved client in mind and may not protect against all types of ++ * races yet. ++ */ ++ if (qidx > 1) { ++ fuse_log(FUSE_LOG_ERR, ++ "%s: multiple request queues not yet implemented, please only " ++ "configure 1 request queue\n", ++ __func__); ++ exit(EXIT_FAILURE); ++ } ++ ++ if (started) { ++ /* Fire up a thread to watch this queue */ ++ if (qidx >= vud->nqueues) { ++ vud->qi = realloc(vud->qi, (qidx + 1) * sizeof(vud->qi[0])); ++ assert(vud->qi); ++ memset(vud->qi + vud->nqueues, 0, ++ sizeof(vud->qi[0]) * (1 + (qidx - vud->nqueues))); ++ vud->nqueues = qidx + 1; ++ } ++ if (!vud->qi[qidx]) { ++ vud->qi[qidx] = calloc(sizeof(struct fv_QueueInfo), 1); ++ assert(vud->qi[qidx]); ++ vud->qi[qidx]->virtio_dev = vud; ++ vud->qi[qidx]->qidx = qidx; ++ } else { ++ /* Shouldn't have been started */ ++ assert(vud->qi[qidx]->kick_fd == -1); ++ } ++ ourqi = vud->qi[qidx]; ++ ourqi->kick_fd = dev->vq[qidx].kick_fd; ++ if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) { ++ fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n", ++ __func__, qidx); ++ assert(0); ++ } ++ } else { ++ /* TODO: Kill the thread */ ++ assert(qidx < vud->nqueues); ++ ourqi = vud->qi[qidx]; ++ ourqi->kick_fd = -1; ++ } ++} ++ + static bool fv_queue_order(VuDev *dev, int qidx) + { + return false; +@@ -92,6 +178,9 @@ static const VuDevIface fv_iface = { + .get_features = fv_get_features, + .set_features = fv_set_features, + ++ /* Don't need process message, we've not got any at vhost-user level */ ++ .queue_set_started = fv_queue_set_started, ++ + .queue_is_processed_in_order = fv_queue_order, + }; + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Start-reading-commands-from-queue.patch b/SOURCES/kvm-virtiofsd-Start-reading-commands-from-queue.patch new file mode 100644 index 0000000..2022480 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Start-reading-commands-from-queue.patch @@ -0,0 +1,200 @@ +From b4af2eff8ecadb4e2c9520602455f77fac2cb943 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:02 +0100 +Subject: [PATCH 031/116] virtiofsd: Start reading commands from queue +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-28-dgilbert@redhat.com> +Patchwork-id: 93484 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 027/112] virtiofsd: Start reading commands from queue +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Pop queue elements off queues, copy the data from them and +pass that to fuse. + + Note: 'out' in a VuVirtqElement is from QEMU + 'in' in libfuse is into the daemon + + So we read from the out iov's to get a fuse_in_header + +When we get a kick we've got to read all the elements until the queue +is empty. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit b509e1228b3e5eb83c14819045988999fc2dbd1b) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 2 + + tools/virtiofsd/fuse_virtio.c | 99 +++++++++++++++++++++++++++++++++++++++++-- + 2 files changed, 98 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index ec04449..1126723 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -14,6 +14,7 @@ + #include "fuse_lowlevel.h" + + struct fv_VuDev; ++struct fv_QueueInfo; + + struct fuse_req { + struct fuse_session *se; +@@ -75,6 +76,7 @@ struct fuse_chan { + pthread_mutex_t lock; + int ctr; + int fd; ++ struct fv_QueueInfo *qi; + }; + + /** +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 05e7258..3841b20 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -12,6 +12,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/iov.h" + #include "fuse_virtio.h" + #include "fuse_i.h" + #include "standard-headers/linux/fuse.h" +@@ -32,6 +33,7 @@ + + #include "contrib/libvhost-user/libvhost-user.h" + ++struct fv_VuDev; + struct fv_QueueInfo { + pthread_t thread; + struct fv_VuDev *virtio_dev; +@@ -101,10 +103,41 @@ static void fv_panic(VuDev *dev, const char *err) + exit(EXIT_FAILURE); + } + ++/* ++ * Copy from an iovec into a fuse_buf (memory only) ++ * Caller must ensure there is space ++ */ ++static void copy_from_iov(struct fuse_buf *buf, size_t out_num, ++ const struct iovec *out_sg) ++{ ++ void *dest = buf->mem; ++ ++ while (out_num) { ++ size_t onelen = out_sg->iov_len; ++ memcpy(dest, out_sg->iov_base, onelen); ++ dest += onelen; ++ out_sg++; ++ out_num--; ++ } ++} ++ + /* Thread function for individual queues, created when a queue is 'started' */ + static void *fv_queue_thread(void *opaque) + { + struct fv_QueueInfo *qi = opaque; ++ struct VuDev *dev = &qi->virtio_dev->dev; ++ struct VuVirtq *q = vu_get_queue(dev, qi->qidx); ++ struct fuse_session *se = qi->virtio_dev->se; ++ struct fuse_chan ch; ++ struct fuse_buf fbuf; ++ ++ fbuf.mem = NULL; ++ fbuf.flags = 0; ++ ++ fuse_mutex_init(&ch.lock); ++ ch.fd = (int)0xdaff0d111; ++ ch.qi = qi; ++ + fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, + qi->qidx, qi->kick_fd); + while (1) { +@@ -141,11 +174,71 @@ static void *fv_queue_thread(void *opaque) + fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n"); + break; + } +- if (qi->virtio_dev->se->debug) { +- fprintf(stderr, "%s: Queue %d gave evalue: %zx\n", __func__, +- qi->qidx, (size_t)evalue); ++ /* out is from guest, in is too guest */ ++ unsigned int in_bytes, out_bytes; ++ vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0); ++ ++ fuse_log(FUSE_LOG_DEBUG, ++ "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n", ++ __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); ++ ++ while (1) { ++ /* ++ * An element contains one request and the space to send our ++ * response They're spread over multiple descriptors in a ++ * scatter/gather set and we can't trust the guest to keep them ++ * still; so copy in/out. ++ */ ++ VuVirtqElement *elem = vu_queue_pop(dev, q, sizeof(VuVirtqElement)); ++ if (!elem) { ++ break; ++ } ++ ++ if (!fbuf.mem) { ++ fbuf.mem = malloc(se->bufsize); ++ assert(fbuf.mem); ++ assert(se->bufsize > sizeof(struct fuse_in_header)); ++ } ++ /* The 'out' part of the elem is from qemu */ ++ unsigned int out_num = elem->out_num; ++ struct iovec *out_sg = elem->out_sg; ++ size_t out_len = iov_size(out_sg, out_num); ++ fuse_log(FUSE_LOG_DEBUG, ++ "%s: elem %d: with %d out desc of length %zd\n", __func__, ++ elem->index, out_num, out_len); ++ ++ /* ++ * The elem should contain a 'fuse_in_header' (in to fuse) ++ * plus the data based on the len in the header. ++ */ ++ if (out_len < sizeof(struct fuse_in_header)) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n", ++ __func__, elem->index); ++ assert(0); /* TODO */ ++ } ++ if (out_len > se->bufsize) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", ++ __func__, elem->index); ++ assert(0); /* TODO */ ++ } ++ copy_from_iov(&fbuf, out_num, out_sg); ++ fbuf.size = out_len; ++ ++ /* TODO! Endianness of header */ ++ ++ /* TODO: Fixup fuse_send_msg */ ++ /* TODO: Add checks for fuse_session_exited */ ++ fuse_session_process_buf_int(se, &fbuf, &ch); ++ ++ /* TODO: vu_queue_push(dev, q, elem, qi->write_count); */ ++ vu_queue_notify(dev, q); ++ ++ free(elem); ++ elem = NULL; + } + } ++ pthread_mutex_destroy(&ch.lock); ++ free(fbuf.mem); + + return NULL; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Start-wiring-up-vhost-user.patch b/SOURCES/kvm-virtiofsd-Start-wiring-up-vhost-user.patch new file mode 100644 index 0000000..7b50118 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Start-wiring-up-vhost-user.patch @@ -0,0 +1,247 @@ +From 020f593031b0b54e4c35faffea489b700aed6a72 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:57 +0100 +Subject: [PATCH 026/116] virtiofsd: Start wiring up vhost-user +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-23-dgilbert@redhat.com> +Patchwork-id: 93477 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 022/112] virtiofsd: Start wiring up vhost-user +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Listen on our unix socket for the connection from QEMU, when we get it +initialise vhost-user and dive into our own loop variant (currently +dummy). + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit f6f3573c6f271af5ded63ce28589a113f7205c72) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 4 ++ + tools/virtiofsd/fuse_lowlevel.c | 5 +++ + tools/virtiofsd/fuse_lowlevel.h | 7 ++++ + tools/virtiofsd/fuse_virtio.c | 87 +++++++++++++++++++++++++++++++++++++++- + tools/virtiofsd/fuse_virtio.h | 2 + + tools/virtiofsd/passthrough_ll.c | 7 +--- + 6 files changed, 106 insertions(+), 6 deletions(-) + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index 82d6ac7..ec04449 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -13,6 +13,8 @@ + #include "fuse.h" + #include "fuse_lowlevel.h" + ++struct fv_VuDev; ++ + struct fuse_req { + struct fuse_session *se; + uint64_t unique; +@@ -65,6 +67,8 @@ struct fuse_session { + size_t bufsize; + int error; + char *vu_socket_path; ++ int vu_socketfd; ++ struct fv_VuDev *virtio_dev; + }; + + struct fuse_chan { +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 5df124e..af09fa2 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2242,6 +2242,11 @@ void fuse_session_unmount(struct fuse_session *se) + { + } + ++int fuse_lowlevel_is_virtio(struct fuse_session *se) ++{ ++ return se->vu_socket_path != NULL; ++} ++ + #ifdef linux + int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) + { +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 2fa225d..f6b3470 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1755,6 +1755,13 @@ void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, + */ + int fuse_req_interrupted(fuse_req_t req); + ++/** ++ * Check if the session is connected via virtio ++ * ++ * @param se session object ++ * @return 1 if the session is a virtio session ++ */ ++int fuse_lowlevel_is_virtio(struct fuse_session *se); + + /* + * Inquiry functions +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index cbef6ff..2ae3c76 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -19,18 +19,78 @@ + + #include + #include ++#include + #include + #include + #include + #include + #include + ++#include "contrib/libvhost-user/libvhost-user.h" ++ ++/* ++ * We pass the dev element into libvhost-user ++ * and then use it to get back to the outer ++ * container for other data. ++ */ ++struct fv_VuDev { ++ VuDev dev; ++ struct fuse_session *se; ++}; ++ + /* From spec */ + struct virtio_fs_config { + char tag[36]; + uint32_t num_queues; + }; + ++/* ++ * Callback from libvhost-user if there's a new fd we're supposed to listen ++ * to, typically a queue kick? ++ */ ++static void fv_set_watch(VuDev *dev, int fd, int condition, vu_watch_cb cb, ++ void *data) ++{ ++ fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd); ++} ++ ++/* ++ * Callback from libvhost-user if we're no longer supposed to listen on an fd ++ */ ++static void fv_remove_watch(VuDev *dev, int fd) ++{ ++ fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd); ++} ++ ++/* Callback from libvhost-user to panic */ ++static void fv_panic(VuDev *dev, const char *err) ++{ ++ fuse_log(FUSE_LOG_ERR, "%s: libvhost-user: %s\n", __func__, err); ++ /* TODO: Allow reconnects?? */ ++ exit(EXIT_FAILURE); ++} ++ ++static bool fv_queue_order(VuDev *dev, int qidx) ++{ ++ return false; ++} ++ ++static const VuDevIface fv_iface = { ++ /* TODO: Add other callbacks */ ++ .queue_is_processed_in_order = fv_queue_order, ++}; ++ ++int virtio_loop(struct fuse_session *se) ++{ ++ fuse_log(FUSE_LOG_INFO, "%s: Entry\n", __func__); ++ ++ while (1) { ++ /* TODO: Add stuffing */ ++ } ++ ++ fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__); ++} ++ + int virtio_session_mount(struct fuse_session *se) + { + struct sockaddr_un un; +@@ -75,5 +135,30 @@ int virtio_session_mount(struct fuse_session *se) + return -1; + } + +- return -1; ++ fuse_log(FUSE_LOG_INFO, "%s: Waiting for vhost-user socket connection...\n", ++ __func__); ++ int data_sock = accept(listen_sock, NULL, NULL); ++ if (data_sock == -1) { ++ fuse_log(FUSE_LOG_ERR, "vhost socket accept: %m\n"); ++ close(listen_sock); ++ return -1; ++ } ++ close(listen_sock); ++ fuse_log(FUSE_LOG_INFO, "%s: Received vhost-user socket connection\n", ++ __func__); ++ ++ /* TODO: Some cleanup/deallocation! */ ++ se->virtio_dev = calloc(sizeof(struct fv_VuDev), 1); ++ if (!se->virtio_dev) { ++ fuse_log(FUSE_LOG_ERR, "%s: virtio_dev calloc failed\n", __func__); ++ close(data_sock); ++ return -1; ++ } ++ ++ se->vu_socketfd = data_sock; ++ se->virtio_dev->se = se; ++ vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, fv_set_watch, ++ fv_remove_watch, &fv_iface); ++ ++ return 0; + } +diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h +index 8f2edb6..23026d6 100644 +--- a/tools/virtiofsd/fuse_virtio.h ++++ b/tools/virtiofsd/fuse_virtio.h +@@ -20,4 +20,6 @@ struct fuse_session; + + int virtio_session_mount(struct fuse_session *se); + ++int virtio_loop(struct fuse_session *se); ++ + #endif +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index fc9b264..037c5d7 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -36,6 +36,7 @@ + */ + + #include "qemu/osdep.h" ++#include "fuse_virtio.h" + #include "fuse_lowlevel.h" + #include + #include +@@ -1395,11 +1396,7 @@ int main(int argc, char *argv[]) + fuse_daemonize(opts.foreground); + + /* Block until ctrl+c or fusermount -u */ +- if (opts.singlethread) { +- ret = fuse_session_loop(se); +- } else { +- ret = fuse_session_loop_mt(se, opts.clone_fd); +- } ++ ret = virtio_loop(se); + + fuse_session_unmount(se); + err_out3: +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Support-remote-posix-locks.patch b/SOURCES/kvm-virtiofsd-Support-remote-posix-locks.patch new file mode 100644 index 0000000..e60364a --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Support-remote-posix-locks.patch @@ -0,0 +1,355 @@ +From 8e46d0862c4c204f92c08ce2ae961921f270efb5 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:03 +0100 +Subject: [PATCH 092/116] virtiofsd: Support remote posix locks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-89-dgilbert@redhat.com> +Patchwork-id: 93537 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 088/112] virtiofsd: Support remote posix locks +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Vivek Goyal + +Doing posix locks with-in guest kernel are not sufficient if a file/dir +is being shared by multiple guests. So we need the notion of daemon doing +the locks which are visible to rest of the guests. + +Given posix locks are per process, one can not call posix lock API on host, +otherwise bunch of basic posix locks properties are broken. For example, +If two processes (A and B) in guest open the file and take locks on different +sections of file, if one of the processes closes the fd, it will close +fd on virtiofsd and all posix locks on file will go away. This means if +process A closes the fd, then locks of process B will go away too. + +Similar other problems exist too. + +This patch set tries to emulate posix locks while using open file +description locks provided on Linux. + +Daemon provides two options (-o posix_lock, -o no_posix_lock) to enable +or disable posix locking in daemon. By default it is enabled. + +There are few issues though. + +- GETLK() returns pid of process holding lock. As we are emulating locks + using OFD, and these locks are not per process and don't return pid + of process, so GETLK() in guest does not reuturn process pid. + +- As of now only F_SETLK is supported and not F_SETLKW. We can't block + the thread in virtiofsd for arbitrary long duration as there is only + one thread serving the queue. That means unlock request will not make + it to daemon and F_SETLKW will block infinitely and bring virtio-fs + to a halt. This is a solvable problem though and will require significant + changes in virtiofsd and kernel. Left as a TODO item for now. + +Signed-off-by: Vivek Goyal +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 0e81414c54161296212f6bc8a1c70526c4a9755a) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/helper.c | 3 + + tools/virtiofsd/passthrough_ll.c | 189 +++++++++++++++++++++++++++++++++++++++ + 2 files changed, 192 insertions(+) + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 5672024..33749bf 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -156,6 +156,9 @@ void fuse_cmdline_help(void) + " allowed (default: 10)\n" + " -o norace disable racy fallback\n" + " default: false\n" ++ " -o posix_lock|no_posix_lock\n" ++ " enable/disable remote posix lock\n" ++ " default: posix_lock\n" + " -o readdirplus|no_readdirplus\n" + " enable/disable readirplus\n" + " default: readdirplus except with " +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 05b5f89..9414935 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -67,6 +67,12 @@ + #include "passthrough_helpers.h" + #include "seccomp.h" + ++/* Keep track of inode posix locks for each owner. */ ++struct lo_inode_plock { ++ uint64_t lock_owner; ++ int fd; /* fd for OFD locks */ ++}; ++ + struct lo_map_elem { + union { + struct lo_inode *inode; +@@ -95,6 +101,8 @@ struct lo_inode { + struct lo_key key; + uint64_t refcount; /* protected by lo->mutex */ + fuse_ino_t fuse_ino; ++ pthread_mutex_t plock_mutex; ++ GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ + }; + + struct lo_cred { +@@ -114,6 +122,7 @@ struct lo_data { + int norace; + int writeback; + int flock; ++ int posix_lock; + int xattr; + char *source; + double timeout; +@@ -137,6 +146,8 @@ static const struct fuse_opt lo_opts[] = { + { "source=%s", offsetof(struct lo_data, source), 0 }, + { "flock", offsetof(struct lo_data, flock), 1 }, + { "no_flock", offsetof(struct lo_data, flock), 0 }, ++ { "posix_lock", offsetof(struct lo_data, posix_lock), 1 }, ++ { "no_posix_lock", offsetof(struct lo_data, posix_lock), 0 }, + { "xattr", offsetof(struct lo_data, xattr), 1 }, + { "no_xattr", offsetof(struct lo_data, xattr), 0 }, + { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, +@@ -485,6 +496,17 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) + fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); + conn->want |= FUSE_CAP_FLOCK_LOCKS; + } ++ ++ if (conn->capable & FUSE_CAP_POSIX_LOCKS) { ++ if (lo->posix_lock) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating posix locks\n"); ++ conn->want |= FUSE_CAP_POSIX_LOCKS; ++ } else { ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix locks\n"); ++ conn->want &= ~FUSE_CAP_POSIX_LOCKS; ++ } ++ } ++ + if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) || + lo->readdirplus_clear) { + fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); +@@ -772,6 +794,19 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) + return p; + } + ++/* value_destroy_func for posix_locks GHashTable */ ++static void posix_locks_value_destroy(gpointer data) ++{ ++ struct lo_inode_plock *plock = data; ++ ++ /* ++ * We had used open() for locks and had only one fd. So ++ * closing this fd should release all OFD locks. ++ */ ++ close(plock->fd); ++ free(plock); ++} ++ + static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + struct fuse_entry_param *e) + { +@@ -825,6 +860,9 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + newfd = -1; + inode->key.ino = e->attr.st_ino; + inode->key.dev = e->attr.st_dev; ++ pthread_mutex_init(&inode->plock_mutex, NULL); ++ inode->posix_locks = g_hash_table_new_full( ++ g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); + + pthread_mutex_lock(&lo->mutex); + inode->fuse_ino = lo_add_inode_mapping(req, inode); +@@ -1160,6 +1198,11 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, + if (!inode->refcount) { + lo_map_remove(&lo->ino_map, inode->fuse_ino); + g_hash_table_remove(lo->inodes, &inode->key); ++ if (g_hash_table_size(inode->posix_locks)) { ++ fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n"); ++ } ++ g_hash_table_destroy(inode->posix_locks); ++ pthread_mutex_destroy(&inode->plock_mutex); + pthread_mutex_unlock(&lo->mutex); + close(inode->fd); + free(inode); +@@ -1516,6 +1559,136 @@ out: + } + } + ++/* Should be called with inode->plock_mutex held */ ++static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, ++ struct lo_inode *inode, ++ uint64_t lock_owner, ++ pid_t pid, int *err) ++{ ++ struct lo_inode_plock *plock; ++ char procname[64]; ++ int fd; ++ ++ plock = ++ g_hash_table_lookup(inode->posix_locks, GUINT_TO_POINTER(lock_owner)); ++ ++ if (plock) { ++ return plock; ++ } ++ ++ plock = malloc(sizeof(struct lo_inode_plock)); ++ if (!plock) { ++ *err = ENOMEM; ++ return NULL; ++ } ++ ++ /* Open another instance of file which can be used for ofd locks. */ ++ sprintf(procname, "%i", inode->fd); ++ ++ /* TODO: What if file is not writable? */ ++ fd = openat(lo->proc_self_fd, procname, O_RDWR); ++ if (fd == -1) { ++ *err = errno; ++ free(plock); ++ return NULL; ++ } ++ ++ plock->lock_owner = lock_owner; ++ plock->fd = fd; ++ g_hash_table_insert(inode->posix_locks, GUINT_TO_POINTER(plock->lock_owner), ++ plock); ++ return plock; ++} ++ ++static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ struct flock *lock) ++{ ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode; ++ struct lo_inode_plock *plock; ++ int ret, saverr = 0; ++ ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_getlk(ino=%" PRIu64 ", flags=%d)" ++ " owner=0x%lx, l_type=%d l_start=0x%lx" ++ " l_len=0x%lx\n", ++ ino, fi->flags, fi->lock_owner, lock->l_type, lock->l_start, ++ lock->l_len); ++ ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ ++ pthread_mutex_lock(&inode->plock_mutex); ++ plock = ++ lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); ++ if (!plock) { ++ pthread_mutex_unlock(&inode->plock_mutex); ++ fuse_reply_err(req, ret); ++ return; ++ } ++ ++ ret = fcntl(plock->fd, F_OFD_GETLK, lock); ++ if (ret == -1) { ++ saverr = errno; ++ } ++ pthread_mutex_unlock(&inode->plock_mutex); ++ ++ if (saverr) { ++ fuse_reply_err(req, saverr); ++ } else { ++ fuse_reply_lock(req, lock); ++ } ++} ++ ++static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ struct flock *lock, int sleep) ++{ ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode; ++ struct lo_inode_plock *plock; ++ int ret, saverr = 0; ++ ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_setlk(ino=%" PRIu64 ", flags=%d)" ++ " cmd=%d pid=%d owner=0x%lx sleep=%d l_whence=%d" ++ " l_start=0x%lx l_len=0x%lx\n", ++ ino, fi->flags, lock->l_type, lock->l_pid, fi->lock_owner, sleep, ++ lock->l_whence, lock->l_start, lock->l_len); ++ ++ if (sleep) { ++ fuse_reply_err(req, EOPNOTSUPP); ++ return; ++ } ++ ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ ++ pthread_mutex_lock(&inode->plock_mutex); ++ plock = ++ lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); ++ ++ if (!plock) { ++ pthread_mutex_unlock(&inode->plock_mutex); ++ fuse_reply_err(req, ret); ++ return; ++ } ++ ++ /* TODO: Is it alright to modify flock? */ ++ lock->l_pid = 0; ++ ret = fcntl(plock->fd, F_OFD_SETLK, lock); ++ if (ret == -1) { ++ saverr = errno; ++ } ++ pthread_mutex_unlock(&inode->plock_mutex); ++ fuse_reply_err(req, saverr); ++} ++ + static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, + struct fuse_file_info *fi) + { +@@ -1617,6 +1790,19 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + { + int res; + (void)ino; ++ struct lo_inode *inode; ++ ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ ++ /* An fd is going away. Cleanup associated posix locks */ ++ pthread_mutex_lock(&inode->plock_mutex); ++ g_hash_table_remove(inode->posix_locks, GUINT_TO_POINTER(fi->lock_owner)); ++ pthread_mutex_unlock(&inode->plock_mutex); ++ + res = close(dup(lo_fi_fd(req, fi))); + fuse_reply_err(req, res == -1 ? errno : 0); + } +@@ -2080,6 +2266,8 @@ static struct fuse_lowlevel_ops lo_oper = { + .releasedir = lo_releasedir, + .fsyncdir = lo_fsyncdir, + .create = lo_create, ++ .getlk = lo_getlk, ++ .setlk = lo_setlk, + .open = lo_open, + .release = lo_release, + .flush = lo_flush, +@@ -2434,6 +2622,7 @@ int main(int argc, char *argv[]) + struct lo_data lo = { + .debug = 0, + .writeback = 0, ++ .posix_lock = 1, + .proc_self_fd = -1, + }; + struct lo_map_elem *root_elem; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Trim-down-imported-files.patch b/SOURCES/kvm-virtiofsd-Trim-down-imported-files.patch new file mode 100644 index 0000000..f3f1e85 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Trim-down-imported-files.patch @@ -0,0 +1,1582 @@ +From 9d3788b1c2fa5cb4f14e292232a05c6a5217802d Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:44 +0100 +Subject: [PATCH 013/116] virtiofsd: Trim down imported files +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-10-dgilbert@redhat.com> +Patchwork-id: 93463 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 009/112] virtiofsd: Trim down imported files +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +There's a lot of the original fuse code we don't need; trim them down. + +Signed-off-by: Dr. David Alan Gilbert +with additional trimming by: +Signed-off-by: Misono Tomohiro +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Xiao Yang +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit a3e23f325439a290c504d6bbc48c2e742149ecab) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 71 +--- + tools/virtiofsd/fuse.h | 46 --- + tools/virtiofsd/fuse_common.h | 32 -- + tools/virtiofsd/fuse_i.h | 41 --- + tools/virtiofsd/fuse_log.h | 8 - + tools/virtiofsd/fuse_lowlevel.c | 675 +--------------------------------- + tools/virtiofsd/fuse_lowlevel.h | 28 -- + tools/virtiofsd/fuse_opt.h | 8 - + tools/virtiofsd/helper.c | 143 ------- + tools/virtiofsd/passthrough_helpers.h | 26 -- + tools/virtiofsd/passthrough_ll.c | 1 - + 11 files changed, 8 insertions(+), 1071 deletions(-) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index 5ab9b87..aefb7db 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -157,73 +157,6 @@ static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, + return copied; + } + +-#ifdef HAVE_SPLICE +-static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, +- const struct fuse_buf *src, size_t src_off, +- size_t len, enum fuse_buf_copy_flags flags) +-{ +- int splice_flags = 0; +- off_t *srcpos = NULL; +- off_t *dstpos = NULL; +- off_t srcpos_val; +- off_t dstpos_val; +- ssize_t res; +- size_t copied = 0; +- +- if (flags & FUSE_BUF_SPLICE_MOVE) +- splice_flags |= SPLICE_F_MOVE; +- if (flags & FUSE_BUF_SPLICE_NONBLOCK) +- splice_flags |= SPLICE_F_NONBLOCK; +- +- if (src->flags & FUSE_BUF_FD_SEEK) { +- srcpos_val = src->pos + src_off; +- srcpos = &srcpos_val; +- } +- if (dst->flags & FUSE_BUF_FD_SEEK) { +- dstpos_val = dst->pos + dst_off; +- dstpos = &dstpos_val; +- } +- +- while (len) { +- res = splice(src->fd, srcpos, dst->fd, dstpos, len, +- splice_flags); +- if (res == -1) { +- if (copied) +- break; +- +- if (errno != EINVAL || (flags & FUSE_BUF_FORCE_SPLICE)) +- return -errno; +- +- /* Maybe splice is not supported for this combination */ +- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, +- len); +- } +- if (res == 0) +- break; +- +- copied += res; +- if (!(src->flags & FUSE_BUF_FD_RETRY) && +- !(dst->flags & FUSE_BUF_FD_RETRY)) { +- break; +- } +- +- len -= res; +- } +- +- return copied; +-} +-#else +-static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, +- const struct fuse_buf *src, size_t src_off, +- size_t len, enum fuse_buf_copy_flags flags) +-{ +- (void) flags; +- +- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); +-} +-#endif +- +- + static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, + const struct fuse_buf *src, size_t src_off, + size_t len, enum fuse_buf_copy_flags flags) +@@ -247,10 +180,8 @@ static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, + return fuse_buf_write(dst, dst_off, src, src_off, len); + } else if (!dst_is_fd) { + return fuse_buf_read(dst, dst_off, src, src_off, len); +- } else if (flags & FUSE_BUF_NO_SPLICE) { +- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); + } else { +- return fuse_buf_splice(dst, dst_off, src, src_off, len, flags); ++ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); + } + } + +diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h +index 883f6e5..3202fba 100644 +--- a/tools/virtiofsd/fuse.h ++++ b/tools/virtiofsd/fuse.h +@@ -25,10 +25,6 @@ + #include + #include + +-#ifdef __cplusplus +-extern "C" { +-#endif +- + /* ----------------------------------------------------------- * + * Basic FUSE API * + * ----------------------------------------------------------- */ +@@ -979,44 +975,6 @@ int fuse_loop(struct fuse *f); + void fuse_exit(struct fuse *f); + + /** +- * FUSE event loop with multiple threads +- * +- * Requests from the kernel are processed, and the appropriate +- * operations are called. Request are processed in parallel by +- * distributing them between multiple threads. +- * +- * For a description of the return value and the conditions when the +- * event loop exits, refer to the documentation of +- * fuse_session_loop(). +- * +- * Note: using fuse_loop() instead of fuse_loop_mt() means you are running in +- * single-threaded mode, and that you will not have to worry about reentrancy, +- * though you will have to worry about recursive lookups. In single-threaded +- * mode, FUSE will wait for one callback to return before calling another. +- * +- * Enabling multiple threads, by using fuse_loop_mt(), will cause FUSE to make +- * multiple simultaneous calls into the various callback functions given by your +- * fuse_operations record. +- * +- * If you are using multiple threads, you can enjoy all the parallel execution +- * and interactive response benefits of threads, and you get to enjoy all the +- * benefits of race conditions and locking bugs, too. Ensure that any code used +- * in the callback function of fuse_operations is also thread-safe. +- * +- * @param f the FUSE handle +- * @param config loop configuration +- * @return see fuse_session_loop() +- * +- * See also: fuse_loop() +- */ +-#if FUSE_USE_VERSION < 32 +-int fuse_loop_mt_31(struct fuse *f, int clone_fd); +-#define fuse_loop_mt(f, clone_fd) fuse_loop_mt_31(f, clone_fd) +-#else +-int fuse_loop_mt(struct fuse *f, struct fuse_loop_config *config); +-#endif +- +-/** + * Get the current context + * + * The context is only valid for the duration of a filesystem +@@ -1268,8 +1226,4 @@ struct fuse_session *fuse_get_session(struct fuse *f); + */ + int fuse_open_channel(const char *mountpoint, const char *options); + +-#ifdef __cplusplus +-} +-#endif +- + #endif /* FUSE_H_ */ +diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h +index 2d686b2..bf8f8cc 100644 +--- a/tools/virtiofsd/fuse_common.h ++++ b/tools/virtiofsd/fuse_common.h +@@ -28,10 +28,6 @@ + #define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) + #define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) + +-#ifdef __cplusplus +-extern "C" { +-#endif +- + /** + * Information about an open file. + * +@@ -100,30 +96,6 @@ struct fuse_file_info { + uint32_t poll_events; + }; + +-/** +- * Configuration parameters passed to fuse_session_loop_mt() and +- * fuse_loop_mt(). +- */ +-struct fuse_loop_config { +- /** +- * whether to use separate device fds for each thread +- * (may increase performance) +- */ +- int clone_fd; +- +- /** +- * The maximum number of available worker threads before they +- * start to get deleted when they become idle. If not +- * specified, the default is 10. +- * +- * Adjusting this has performance implications; a very small number +- * of threads in the pool will cause a lot of thread creation and +- * deletion overhead and performance may suffer. When set to 0, a new +- * thread will be created to service every operation. +- */ +- unsigned int max_idle_threads; +-}; +- + /************************************************************************** + * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' * + **************************************************************************/ +@@ -802,10 +774,6 @@ void fuse_remove_signal_handlers(struct fuse_session *se); + # error only API version 30 or greater is supported + #endif + +-#ifdef __cplusplus +-} +-#endif +- + + /* + * This interface uses 64 bit off_t. +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index d38b630..b39522e 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -9,8 +9,6 @@ + #include "fuse.h" + #include "fuse_lowlevel.h" + +-struct mount_opts; +- + struct fuse_req { + struct fuse_session *se; + uint64_t unique; +@@ -45,7 +43,6 @@ struct fuse_session { + char *mountpoint; + volatile int exited; + int fd; +- struct mount_opts *mo; + int debug; + int deny_others; + struct fuse_lowlevel_ops op; +@@ -58,7 +55,6 @@ struct fuse_session { + struct fuse_req interrupts; + pthread_mutex_t lock; + int got_destroy; +- pthread_key_t pipe_key; + int broken_splice_nonblock; + uint64_t notify_ctr; + struct fuse_notify_req notify_list; +@@ -87,53 +83,16 @@ struct fuse_module { + int ctr; + }; + +-/* ----------------------------------------------------------- * +- * Channel interface (when using -o clone_fd) * +- * ----------------------------------------------------------- */ +- +-/** +- * Obtain counted reference to the channel +- * +- * @param ch the channel +- * @return the channel +- */ +-struct fuse_chan *fuse_chan_get(struct fuse_chan *ch); +- +-/** +- * Drop counted reference to a channel +- * +- * @param ch the channel +- */ +-void fuse_chan_put(struct fuse_chan *ch); +- +-struct mount_opts *parse_mount_opts(struct fuse_args *args); +-void destroy_mount_opts(struct mount_opts *mo); +-void fuse_mount_version(void); +-unsigned get_max_read(struct mount_opts *o); +-void fuse_kern_unmount(const char *mountpoint, int fd); +-int fuse_kern_mount(const char *mountpoint, struct mount_opts *mo); +- + int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, + int count); + void fuse_free_req(fuse_req_t req); + +-void cuse_lowlevel_init(fuse_req_t req, fuse_ino_t nodeide, const void *inarg); +- +-int fuse_start_thread(pthread_t *thread_id, void *(*func)(void *), void *arg); +- +-int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, +- struct fuse_chan *ch); + void fuse_session_process_buf_int(struct fuse_session *se, + const struct fuse_buf *buf, struct fuse_chan *ch); + +-struct fuse *fuse_new_31(struct fuse_args *args, const struct fuse_operations *op, +- size_t op_size, void *private_data); +-int fuse_loop_mt_32(struct fuse *f, struct fuse_loop_config *config); +-int fuse_session_loop_mt_32(struct fuse_session *se, struct fuse_loop_config *config); + + #define FUSE_MAX_MAX_PAGES 256 + #define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32 + + /* room needed in buffer to accommodate header */ + #define FUSE_BUFFER_HEADER_SIZE 0x1000 +- +diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h +index 5e112e0..0af700d 100644 +--- a/tools/virtiofsd/fuse_log.h ++++ b/tools/virtiofsd/fuse_log.h +@@ -16,10 +16,6 @@ + + #include + +-#ifdef __cplusplus +-extern "C" { +-#endif +- + /** + * Log severity level + * +@@ -75,8 +71,4 @@ void fuse_set_log_func(fuse_log_func_t func); + */ + void fuse_log(enum fuse_log_level level, const char *fmt, ...); + +-#ifdef __cplusplus +-} +-#endif +- + #endif /* FUSE_LOG_H_ */ +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index f2d7038..e6fa247 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -16,7 +16,6 @@ + #include "fuse_kernel.h" + #include "fuse_opt.h" + #include "fuse_misc.h" +-#include "mount_util.h" + + #include + #include +@@ -28,12 +27,6 @@ + #include + #include + +-#ifndef F_LINUX_SPECIFIC_BASE +-#define F_LINUX_SPECIFIC_BASE 1024 +-#endif +-#ifndef F_SETPIPE_SZ +-#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) +-#endif + + + #define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) +@@ -137,7 +130,6 @@ void fuse_free_req(fuse_req_t req) + req->u.ni.data = NULL; + list_del_req(req); + ctr = --req->ctr; +- fuse_chan_put(req->ch); + req->ch = NULL; + pthread_mutex_unlock(&se->lock); + if (!ctr) +@@ -184,19 +176,7 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, + } + } + +- ssize_t res = writev(ch ? ch->fd : se->fd, +- iov, count); +- int err = errno; +- +- if (res == -1) { +- assert(se != NULL); +- +- /* ENOENT means the operation was interrupted */ +- if (!fuse_session_exited(se) && err != ENOENT) +- perror("fuse: writing device"); +- return -err; +- } +- ++ abort(); /* virtio should have taken it before here */ + return 0; + } + +@@ -480,10 +460,6 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, + struct fuse_bufvec *buf, + size_t len) + { +- struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); +- void *mbuf; +- int res; +- + /* Optimize common case */ + if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && + !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { +@@ -496,350 +472,10 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, + return fuse_send_msg(se, ch, iov, iov_count); + } + +- res = posix_memalign(&mbuf, pagesize, len); +- if (res != 0) +- return res; +- +- mem_buf.buf[0].mem = mbuf; +- res = fuse_buf_copy(&mem_buf, buf, 0); +- if (res < 0) { +- free(mbuf); +- return -res; +- } +- len = res; +- +- iov[iov_count].iov_base = mbuf; +- iov[iov_count].iov_len = len; +- iov_count++; +- res = fuse_send_msg(se, ch, iov, iov_count); +- free(mbuf); +- +- return res; +-} +- +-struct fuse_ll_pipe { +- size_t size; +- int can_grow; +- int pipe[2]; +-}; +- +-static void fuse_ll_pipe_free(struct fuse_ll_pipe *llp) +-{ +- close(llp->pipe[0]); +- close(llp->pipe[1]); +- free(llp); +-} +- +-#ifdef HAVE_SPLICE +-#if !defined(HAVE_PIPE2) || !defined(O_CLOEXEC) +-static int fuse_pipe(int fds[2]) +-{ +- int rv = pipe(fds); +- +- if (rv == -1) +- return rv; +- +- if (fcntl(fds[0], F_SETFL, O_NONBLOCK) == -1 || +- fcntl(fds[1], F_SETFL, O_NONBLOCK) == -1 || +- fcntl(fds[0], F_SETFD, FD_CLOEXEC) == -1 || +- fcntl(fds[1], F_SETFD, FD_CLOEXEC) == -1) { +- close(fds[0]); +- close(fds[1]); +- rv = -1; +- } +- return rv; +-} +-#else +-static int fuse_pipe(int fds[2]) +-{ +- return pipe2(fds, O_CLOEXEC | O_NONBLOCK); +-} +-#endif +- +-static struct fuse_ll_pipe *fuse_ll_get_pipe(struct fuse_session *se) +-{ +- struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); +- if (llp == NULL) { +- int res; +- +- llp = malloc(sizeof(struct fuse_ll_pipe)); +- if (llp == NULL) +- return NULL; +- +- res = fuse_pipe(llp->pipe); +- if (res == -1) { +- free(llp); +- return NULL; +- } +- +- /* +- *the default size is 16 pages on linux +- */ +- llp->size = pagesize * 16; +- llp->can_grow = 1; +- +- pthread_setspecific(se->pipe_key, llp); +- } +- +- return llp; +-} +-#endif +- +-static void fuse_ll_clear_pipe(struct fuse_session *se) +-{ +- struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); +- if (llp) { +- pthread_setspecific(se->pipe_key, NULL); +- fuse_ll_pipe_free(llp); +- } +-} +- +-#if defined(HAVE_SPLICE) && defined(HAVE_VMSPLICE) +-static int read_back(int fd, char *buf, size_t len) +-{ +- int res; +- +- res = read(fd, buf, len); +- if (res == -1) { +- fuse_log(FUSE_LOG_ERR, "fuse: internal error: failed to read back from pipe: %s\n", strerror(errno)); +- return -EIO; +- } +- if (res != len) { +- fuse_log(FUSE_LOG_ERR, "fuse: internal error: short read back from pipe: %i from %zi\n", res, len); +- return -EIO; +- } ++ abort(); /* Will have taken vhost path */ + return 0; + } + +-static int grow_pipe_to_max(int pipefd) +-{ +- int max; +- int res; +- int maxfd; +- char buf[32]; +- +- maxfd = open("/proc/sys/fs/pipe-max-size", O_RDONLY); +- if (maxfd < 0) +- return -errno; +- +- res = read(maxfd, buf, sizeof(buf) - 1); +- if (res < 0) { +- int saved_errno; +- +- saved_errno = errno; +- close(maxfd); +- return -saved_errno; +- } +- close(maxfd); +- buf[res] = '\0'; +- +- max = atoi(buf); +- res = fcntl(pipefd, F_SETPIPE_SZ, max); +- if (res < 0) +- return -errno; +- return max; +-} +- +-static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, +- struct iovec *iov, int iov_count, +- struct fuse_bufvec *buf, unsigned int flags) +-{ +- int res; +- size_t len = fuse_buf_size(buf); +- struct fuse_out_header *out = iov[0].iov_base; +- struct fuse_ll_pipe *llp; +- int splice_flags; +- size_t pipesize; +- size_t total_fd_size; +- size_t idx; +- size_t headerlen; +- struct fuse_bufvec pipe_buf = FUSE_BUFVEC_INIT(len); +- +- if (se->broken_splice_nonblock) +- goto fallback; +- +- if (flags & FUSE_BUF_NO_SPLICE) +- goto fallback; +- +- total_fd_size = 0; +- for (idx = buf->idx; idx < buf->count; idx++) { +- if (buf->buf[idx].flags & FUSE_BUF_IS_FD) { +- total_fd_size = buf->buf[idx].size; +- if (idx == buf->idx) +- total_fd_size -= buf->off; +- } +- } +- if (total_fd_size < 2 * pagesize) +- goto fallback; +- +- if (se->conn.proto_minor < 14 || +- !(se->conn.want & FUSE_CAP_SPLICE_WRITE)) +- goto fallback; +- +- llp = fuse_ll_get_pipe(se); +- if (llp == NULL) +- goto fallback; +- +- +- headerlen = iov_length(iov, iov_count); +- +- out->len = headerlen + len; +- +- /* +- * Heuristic for the required pipe size, does not work if the +- * source contains less than page size fragments +- */ +- pipesize = pagesize * (iov_count + buf->count + 1) + out->len; +- +- if (llp->size < pipesize) { +- if (llp->can_grow) { +- res = fcntl(llp->pipe[0], F_SETPIPE_SZ, pipesize); +- if (res == -1) { +- res = grow_pipe_to_max(llp->pipe[0]); +- if (res > 0) +- llp->size = res; +- llp->can_grow = 0; +- goto fallback; +- } +- llp->size = res; +- } +- if (llp->size < pipesize) +- goto fallback; +- } +- +- +- res = vmsplice(llp->pipe[1], iov, iov_count, SPLICE_F_NONBLOCK); +- if (res == -1) +- goto fallback; +- +- if (res != headerlen) { +- res = -EIO; +- fuse_log(FUSE_LOG_ERR, "fuse: short vmsplice to pipe: %u/%zu\n", res, +- headerlen); +- goto clear_pipe; +- } +- +- pipe_buf.buf[0].flags = FUSE_BUF_IS_FD; +- pipe_buf.buf[0].fd = llp->pipe[1]; +- +- res = fuse_buf_copy(&pipe_buf, buf, +- FUSE_BUF_FORCE_SPLICE | FUSE_BUF_SPLICE_NONBLOCK); +- if (res < 0) { +- if (res == -EAGAIN || res == -EINVAL) { +- /* +- * Should only get EAGAIN on kernels with +- * broken SPLICE_F_NONBLOCK support (<= +- * 2.6.35) where this error or a short read is +- * returned even if the pipe itself is not +- * full +- * +- * EINVAL might mean that splice can't handle +- * this combination of input and output. +- */ +- if (res == -EAGAIN) +- se->broken_splice_nonblock = 1; +- +- pthread_setspecific(se->pipe_key, NULL); +- fuse_ll_pipe_free(llp); +- goto fallback; +- } +- res = -res; +- goto clear_pipe; +- } +- +- if (res != 0 && res < len) { +- struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); +- void *mbuf; +- size_t now_len = res; +- /* +- * For regular files a short count is either +- * 1) due to EOF, or +- * 2) because of broken SPLICE_F_NONBLOCK (see above) +- * +- * For other inputs it's possible that we overflowed +- * the pipe because of small buffer fragments. +- */ +- +- res = posix_memalign(&mbuf, pagesize, len); +- if (res != 0) +- goto clear_pipe; +- +- mem_buf.buf[0].mem = mbuf; +- mem_buf.off = now_len; +- res = fuse_buf_copy(&mem_buf, buf, 0); +- if (res > 0) { +- char *tmpbuf; +- size_t extra_len = res; +- /* +- * Trickiest case: got more data. Need to get +- * back the data from the pipe and then fall +- * back to regular write. +- */ +- tmpbuf = malloc(headerlen); +- if (tmpbuf == NULL) { +- free(mbuf); +- res = ENOMEM; +- goto clear_pipe; +- } +- res = read_back(llp->pipe[0], tmpbuf, headerlen); +- free(tmpbuf); +- if (res != 0) { +- free(mbuf); +- goto clear_pipe; +- } +- res = read_back(llp->pipe[0], mbuf, now_len); +- if (res != 0) { +- free(mbuf); +- goto clear_pipe; +- } +- len = now_len + extra_len; +- iov[iov_count].iov_base = mbuf; +- iov[iov_count].iov_len = len; +- iov_count++; +- res = fuse_send_msg(se, ch, iov, iov_count); +- free(mbuf); +- return res; +- } +- free(mbuf); +- res = now_len; +- } +- len = res; +- out->len = headerlen + len; +- +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, +- " unique: %llu, success, outsize: %i (splice)\n", +- (unsigned long long) out->unique, out->len); +- } +- +- splice_flags = 0; +- if ((flags & FUSE_BUF_SPLICE_MOVE) && +- (se->conn.want & FUSE_CAP_SPLICE_MOVE)) +- splice_flags |= SPLICE_F_MOVE; +- +- res = splice(llp->pipe[0], NULL, ch ? ch->fd : se->fd, +- NULL, out->len, splice_flags); +- if (res == -1) { +- res = -errno; +- perror("fuse: splice from pipe"); +- goto clear_pipe; +- } +- if (res != out->len) { +- res = -EIO; +- fuse_log(FUSE_LOG_ERR, "fuse: short splice from pipe: %u/%u\n", +- res, out->len); +- goto clear_pipe; +- } +- return 0; +- +-clear_pipe: +- fuse_ll_clear_pipe(se); +- return res; +- +-fallback: +- return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); +-} +-#else + static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + struct iovec *iov, int iov_count, + struct fuse_bufvec *buf, unsigned int flags) +@@ -849,7 +485,6 @@ static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + + return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); + } +-#endif + + int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, + enum fuse_buf_copy_flags flags) +@@ -1408,16 +1043,11 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, + if (bufv.buf[0].size < arg->size) { + fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); + fuse_reply_err(req, EIO); +- goto out; ++ return; + } + bufv.buf[0].size = arg->size; + + se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); +- +-out: +- /* Need to reset the pipe if ->write_buf() didn't consume all data */ +- if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) +- fuse_ll_clear_pipe(se); + } + + static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +@@ -2038,17 +1668,6 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + return; + } + +- unsigned max_read_mo = get_max_read(se->mo); +- if (se->conn.max_read != max_read_mo) { +- fuse_log(FUSE_LOG_ERR, "fuse: error: init() and fuse_session_new() " +- "requested different maximum read size (%u vs %u)\n", +- se->conn.max_read, max_read_mo); +- fuse_reply_err(req, EPROTO); +- se->error = -EPROTO; +- fuse_session_exit(se); +- return; +- } +- + if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { + se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; + } +@@ -2364,8 +1983,6 @@ static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, + } + out: + free(rreq); +- if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) +- fuse_ll_clear_pipe(se); + } + + int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, +@@ -2496,7 +2113,6 @@ static struct { + [FUSE_RENAME2] = { do_rename2, "RENAME2" }, + [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, + [FUSE_LSEEK] = { do_lseek, "LSEEK" }, +- [CUSE_INIT] = { cuse_lowlevel_init, "CUSE_INIT" }, + }; + + #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) +@@ -2509,21 +2125,6 @@ static const char *opname(enum fuse_opcode opcode) + return fuse_ll_ops[opcode].name; + } + +-static int fuse_ll_copy_from_pipe(struct fuse_bufvec *dst, +- struct fuse_bufvec *src) +-{ +- ssize_t res = fuse_buf_copy(dst, src, 0); +- if (res < 0) { +- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", strerror(-res)); +- return res; +- } +- if ((size_t)res < fuse_buf_size(dst)) { +- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); +- return -1; +- } +- return 0; +-} +- + void fuse_session_process_buf(struct fuse_session *se, + const struct fuse_buf *buf) + { +@@ -2533,36 +2134,12 @@ void fuse_session_process_buf(struct fuse_session *se, + void fuse_session_process_buf_int(struct fuse_session *se, + const struct fuse_buf *buf, struct fuse_chan *ch) + { +- const size_t write_header_size = sizeof(struct fuse_in_header) + +- sizeof(struct fuse_write_in); +- struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 }; +- struct fuse_bufvec tmpbuf = FUSE_BUFVEC_INIT(write_header_size); + struct fuse_in_header *in; + const void *inarg; + struct fuse_req *req; +- void *mbuf = NULL; + int err; +- int res; +- +- if (buf->flags & FUSE_BUF_IS_FD) { +- if (buf->size < tmpbuf.buf[0].size) +- tmpbuf.buf[0].size = buf->size; + +- mbuf = malloc(tmpbuf.buf[0].size); +- if (mbuf == NULL) { +- fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate header\n"); +- goto clear_pipe; +- } +- tmpbuf.buf[0].mem = mbuf; +- +- res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); +- if (res < 0) +- goto clear_pipe; +- +- in = mbuf; +- } else { +- in = buf->mem; +- } ++ in = buf->mem; + + if (se->debug) { + fuse_log(FUSE_LOG_DEBUG, +@@ -2584,14 +2161,14 @@ void fuse_session_process_buf_int(struct fuse_session *se, + }; + + fuse_send_msg(se, ch, &iov, 1); +- goto clear_pipe; ++ return; + } + + req->unique = in->unique; + req->ctx.uid = in->uid; + req->ctx.gid = in->gid; + req->ctx.pid = in->pid; +- req->ch = ch ? fuse_chan_get(ch) : NULL; ++ req->ch = ch; + + err = EIO; + if (!se->got_init) { +@@ -2627,28 +2204,6 @@ void fuse_session_process_buf_int(struct fuse_session *se, + fuse_reply_err(intr, EAGAIN); + } + +- if ((buf->flags & FUSE_BUF_IS_FD) && write_header_size < buf->size && +- (in->opcode != FUSE_WRITE || !se->op.write_buf) && +- in->opcode != FUSE_NOTIFY_REPLY) { +- void *newmbuf; +- +- err = ENOMEM; +- newmbuf = realloc(mbuf, buf->size); +- if (newmbuf == NULL) +- goto reply_err; +- mbuf = newmbuf; +- +- tmpbuf = FUSE_BUFVEC_INIT(buf->size - write_header_size); +- tmpbuf.buf[0].mem = (char *)mbuf + write_header_size; +- +- res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); +- err = -res; +- if (res < 0) +- goto reply_err; +- +- in = mbuf; +- } +- + inarg = (void *) &in[1]; + if (in->opcode == FUSE_WRITE && se->op.write_buf) + do_write_buf(req, in->nodeid, inarg, buf); +@@ -2657,16 +2212,10 @@ void fuse_session_process_buf_int(struct fuse_session *se, + else + fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); + +-out_free: +- free(mbuf); + return; + + reply_err: + fuse_reply_err(req, err); +-clear_pipe: +- if (buf->flags & FUSE_BUF_IS_FD) +- fuse_ll_clear_pipe(se); +- goto out_free; + } + + #define LL_OPTION(n,o,v) \ +@@ -2684,7 +2233,6 @@ void fuse_lowlevel_version(void) + { + printf("using FUSE kernel interface version %i.%i\n", + FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); +- fuse_mount_version(); + } + + void fuse_lowlevel_help(void) +@@ -2692,204 +2240,29 @@ void fuse_lowlevel_help(void) + /* These are not all options, but the ones that are + potentially of interest to an end-user */ + printf( +-" -o allow_other allow access by all users\n" + " -o allow_root allow access by root\n" +-" -o auto_unmount auto unmount on process termination\n"); ++); + } + + void fuse_session_destroy(struct fuse_session *se) + { +- struct fuse_ll_pipe *llp; +- + if (se->got_init && !se->got_destroy) { + if (se->op.destroy) + se->op.destroy(se->userdata); + } +- llp = pthread_getspecific(se->pipe_key); +- if (llp != NULL) +- fuse_ll_pipe_free(llp); +- pthread_key_delete(se->pipe_key); + pthread_mutex_destroy(&se->lock); + free(se->cuse_data); + if (se->fd != -1) + close(se->fd); +- destroy_mount_opts(se->mo); + free(se); + } + + +-static void fuse_ll_pipe_destructor(void *data) +-{ +- struct fuse_ll_pipe *llp = data; +- fuse_ll_pipe_free(llp); +-} +- +-int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf) +-{ +- return fuse_session_receive_buf_int(se, buf, NULL); +-} +- +-int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, +- struct fuse_chan *ch) +-{ +- int err; +- ssize_t res; +-#ifdef HAVE_SPLICE +- size_t bufsize = se->bufsize; +- struct fuse_ll_pipe *llp; +- struct fuse_buf tmpbuf; +- +- if (se->conn.proto_minor < 14 || !(se->conn.want & FUSE_CAP_SPLICE_READ)) +- goto fallback; +- +- llp = fuse_ll_get_pipe(se); +- if (llp == NULL) +- goto fallback; +- +- if (llp->size < bufsize) { +- if (llp->can_grow) { +- res = fcntl(llp->pipe[0], F_SETPIPE_SZ, bufsize); +- if (res == -1) { +- llp->can_grow = 0; +- res = grow_pipe_to_max(llp->pipe[0]); +- if (res > 0) +- llp->size = res; +- goto fallback; +- } +- llp->size = res; +- } +- if (llp->size < bufsize) +- goto fallback; +- } +- +- res = splice(ch ? ch->fd : se->fd, +- NULL, llp->pipe[1], NULL, bufsize, 0); +- err = errno; +- +- if (fuse_session_exited(se)) +- return 0; +- +- if (res == -1) { +- if (err == ENODEV) { +- /* Filesystem was unmounted, or connection was aborted +- via /sys/fs/fuse/connections */ +- fuse_session_exit(se); +- return 0; +- } +- if (err != EINTR && err != EAGAIN) +- perror("fuse: splice from device"); +- return -err; +- } +- +- if (res < sizeof(struct fuse_in_header)) { +- fuse_log(FUSE_LOG_ERR, "short splice from fuse device\n"); +- return -EIO; +- } +- +- tmpbuf = (struct fuse_buf) { +- .size = res, +- .flags = FUSE_BUF_IS_FD, +- .fd = llp->pipe[0], +- }; +- +- /* +- * Don't bother with zero copy for small requests. +- * fuse_loop_mt() needs to check for FORGET so this more than +- * just an optimization. +- */ +- if (res < sizeof(struct fuse_in_header) + +- sizeof(struct fuse_write_in) + pagesize) { +- struct fuse_bufvec src = { .buf[0] = tmpbuf, .count = 1 }; +- struct fuse_bufvec dst = { .count = 1 }; +- +- if (!buf->mem) { +- buf->mem = malloc(se->bufsize); +- if (!buf->mem) { +- fuse_log(FUSE_LOG_ERR, +- "fuse: failed to allocate read buffer\n"); +- return -ENOMEM; +- } +- } +- buf->size = se->bufsize; +- buf->flags = 0; +- dst.buf[0] = *buf; +- +- res = fuse_buf_copy(&dst, &src, 0); +- if (res < 0) { +- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", +- strerror(-res)); +- fuse_ll_clear_pipe(se); +- return res; +- } +- if (res < tmpbuf.size) { +- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); +- fuse_ll_clear_pipe(se); +- return -EIO; +- } +- assert(res == tmpbuf.size); +- +- } else { +- /* Don't overwrite buf->mem, as that would cause a leak */ +- buf->fd = tmpbuf.fd; +- buf->flags = tmpbuf.flags; +- } +- buf->size = tmpbuf.size; +- +- return res; +- +-fallback: +-#endif +- if (!buf->mem) { +- buf->mem = malloc(se->bufsize); +- if (!buf->mem) { +- fuse_log(FUSE_LOG_ERR, +- "fuse: failed to allocate read buffer\n"); +- return -ENOMEM; +- } +- } +- +-restart: +- res = read(ch ? ch->fd : se->fd, buf->mem, se->bufsize); +- err = errno; +- +- if (fuse_session_exited(se)) +- return 0; +- if (res == -1) { +- /* ENOENT means the operation was interrupted, it's safe +- to restart */ +- if (err == ENOENT) +- goto restart; +- +- if (err == ENODEV) { +- /* Filesystem was unmounted, or connection was aborted +- via /sys/fs/fuse/connections */ +- fuse_session_exit(se); +- return 0; +- } +- /* Errors occurring during normal operation: EINTR (read +- interrupted), EAGAIN (nonblocking I/O), ENODEV (filesystem +- umounted) */ +- if (err != EINTR && err != EAGAIN) +- perror("fuse: reading device"); +- return -err; +- } +- if ((size_t) res < sizeof(struct fuse_in_header)) { +- fuse_log(FUSE_LOG_ERR, "short read on fuse device\n"); +- return -EIO; +- } +- +- buf->size = res; +- +- return res; +-} +- + struct fuse_session *fuse_session_new(struct fuse_args *args, + const struct fuse_lowlevel_ops *op, + size_t op_size, void *userdata) + { +- int err; + struct fuse_session *se; +- struct mount_opts *mo; + + if (sizeof(struct fuse_lowlevel_ops) < op_size) { + fuse_log(FUSE_LOG_ERR, "fuse: warning: library too old, some operations may not work\n"); +@@ -2913,20 +2286,6 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + /* Parse options */ + if(fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) + goto out2; +- if(se->deny_others) { +- /* Allowing access only by root is done by instructing +- * kernel to allow access by everyone, and then restricting +- * access to root and mountpoint owner in libfuse. +- */ +- // We may be adding the option a second time, but +- // that doesn't hurt. +- if(fuse_opt_add_arg(args, "-oallow_other") == -1) +- goto out2; +- } +- mo = parse_mount_opts(args); +- if (mo == NULL) +- goto out3; +- + if(args->argc == 1 && + args->argv[0][0] == '-') { + fuse_log(FUSE_LOG_ERR, "fuse: warning: argv[0] looks like an option, but " +@@ -2940,9 +2299,6 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + goto out4; + } + +- if (se->debug) +- fuse_log(FUSE_LOG_DEBUG, "FUSE library version: %s\n", PACKAGE_VERSION); +- + se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + + FUSE_BUFFER_HEADER_SIZE; + +@@ -2952,26 +2308,14 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + se->notify_ctr = 1; + fuse_mutex_init(&se->lock); + +- err = pthread_key_create(&se->pipe_key, fuse_ll_pipe_destructor); +- if (err) { +- fuse_log(FUSE_LOG_ERR, "fuse: failed to create thread specific key: %s\n", +- strerror(err)); +- goto out5; +- } +- + memcpy(&se->op, op, op_size); + se->owner = getuid(); + se->userdata = userdata; + +- se->mo = mo; + return se; + +-out5: +- pthread_mutex_destroy(&se->lock); + out4: + fuse_opt_free_args(args); +-out3: +- free(mo); + out2: + free(se); + out1: +@@ -3035,11 +2379,6 @@ int fuse_session_fd(struct fuse_session *se) + + void fuse_session_unmount(struct fuse_session *se) + { +- if (se->mountpoint != NULL) { +- fuse_kern_unmount(se->mountpoint, se->fd); +- free(se->mountpoint); +- se->mountpoint = NULL; +- } + } + + #ifdef linux +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 18c6363..6b1adfc 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -31,10 +31,6 @@ + #include + #include + +-#ifdef __cplusplus +-extern "C" { +-#endif +- + /* ----------------------------------------------------------- * + * Miscellaneous definitions * + * ----------------------------------------------------------- */ +@@ -1863,14 +1859,12 @@ void fuse_cmdline_help(void); + * ----------------------------------------------------------- */ + + struct fuse_cmdline_opts { +- int singlethread; + int foreground; + int debug; + int nodefault_subtype; + char *mountpoint; + int show_version; + int show_help; +- int clone_fd; + unsigned int max_idle_threads; + }; + +@@ -1962,24 +1956,6 @@ int fuse_session_mount(struct fuse_session *se, const char *mountpoint); + int fuse_session_loop(struct fuse_session *se); + + /** +- * Enter a multi-threaded event loop. +- * +- * For a description of the return value and the conditions when the +- * event loop exits, refer to the documentation of +- * fuse_session_loop(). +- * +- * @param se the session +- * @param config session loop configuration +- * @return see fuse_session_loop() +- */ +-#if FUSE_USE_VERSION < 32 +-int fuse_session_loop_mt_31(struct fuse_session *se, int clone_fd); +-#define fuse_session_loop_mt(se, clone_fd) fuse_session_loop_mt_31(se, clone_fd) +-#else +-int fuse_session_loop_mt(struct fuse_session *se, struct fuse_loop_config *config); +-#endif +- +-/** + * Flag a session as terminated. + * + * This function is invoked by the POSIX signal handlers, when +@@ -2082,8 +2058,4 @@ void fuse_session_process_buf(struct fuse_session *se, + */ + int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf); + +-#ifdef __cplusplus +-} +-#endif +- + #endif /* FUSE_LOWLEVEL_H_ */ +diff --git a/tools/virtiofsd/fuse_opt.h b/tools/virtiofsd/fuse_opt.h +index d8573e7..6910255 100644 +--- a/tools/virtiofsd/fuse_opt.h ++++ b/tools/virtiofsd/fuse_opt.h +@@ -14,10 +14,6 @@ + * This file defines the option parsing interface of FUSE + */ + +-#ifdef __cplusplus +-extern "C" { +-#endif +- + /** + * Option description + * +@@ -264,8 +260,4 @@ void fuse_opt_free_args(struct fuse_args *args); + */ + int fuse_opt_match(const struct fuse_opt opts[], const char *opt); + +-#ifdef __cplusplus +-} +-#endif +- + #endif /* FUSE_OPT_H_ */ +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 64ff7ad..5a2e64c 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -41,14 +41,10 @@ static const struct fuse_opt fuse_helper_opts[] = { + FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), + FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), + FUSE_HELPER_OPT("-f", foreground), +- FUSE_HELPER_OPT("-s", singlethread), + FUSE_HELPER_OPT("fsname=", nodefault_subtype), + FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), +-#ifndef __FreeBSD__ + FUSE_HELPER_OPT("subtype=", nodefault_subtype), + FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), +-#endif +- FUSE_HELPER_OPT("clone_fd", clone_fd), + FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), + FUSE_OPT_END + }; +@@ -132,9 +128,6 @@ void fuse_cmdline_help(void) + " -V --version print version\n" + " -d -o debug enable debug output (implies -f)\n" + " -f foreground operation\n" +- " -s disable multi-threaded operation\n" +- " -o clone_fd use separate fuse device fd for each thread\n" +- " (may improve performance)\n" + " -o max_idle_threads the maximum number of idle worker threads\n" + " allowed (default: 10)\n"); + } +@@ -171,34 +164,6 @@ static int fuse_helper_opt_proc(void *data, const char *arg, int key, + } + } + +-/* Under FreeBSD, there is no subtype option so this +- function actually sets the fsname */ +-static int add_default_subtype(const char *progname, struct fuse_args *args) +-{ +- int res; +- char *subtype_opt; +- +- const char *basename = strrchr(progname, '/'); +- if (basename == NULL) +- basename = progname; +- else if (basename[1] != '\0') +- basename++; +- +- subtype_opt = (char *) malloc(strlen(basename) + 64); +- if (subtype_opt == NULL) { +- fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); +- return -1; +- } +-#ifdef __FreeBSD__ +- sprintf(subtype_opt, "-ofsname=%s", basename); +-#else +- sprintf(subtype_opt, "-osubtype=%s", basename); +-#endif +- res = fuse_opt_add_arg(args, subtype_opt); +- free(subtype_opt); +- return res; +-} +- + int fuse_parse_cmdline(struct fuse_args *args, + struct fuse_cmdline_opts *opts) + { +@@ -210,14 +175,6 @@ int fuse_parse_cmdline(struct fuse_args *args, + fuse_helper_opt_proc) == -1) + return -1; + +- /* *Linux*: if neither -o subtype nor -o fsname are specified, +- set subtype to program's basename. +- *FreeBSD*: if fsname is not specified, set to program's +- basename. */ +- if (!opts->nodefault_subtype) +- if (add_default_subtype(args->argv[0], args) == -1) +- return -1; +- + return 0; + } + +@@ -276,88 +233,6 @@ int fuse_daemonize(int foreground) + return 0; + } + +-int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, +- size_t op_size, void *user_data) +-{ +- struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +- struct fuse *fuse; +- struct fuse_cmdline_opts opts; +- int res; +- +- if (fuse_parse_cmdline(&args, &opts) != 0) +- return 1; +- +- if (opts.show_version) { +- printf("FUSE library version %s\n", PACKAGE_VERSION); +- fuse_lowlevel_version(); +- res = 0; +- goto out1; +- } +- +- if (opts.show_help) { +- if(args.argv[0][0] != '\0') +- printf("usage: %s [options] \n\n", +- args.argv[0]); +- printf("FUSE options:\n"); +- fuse_cmdline_help(); +- fuse_lib_help(&args); +- res = 0; +- goto out1; +- } +- +- if (!opts.show_help && +- !opts.mountpoint) { +- fuse_log(FUSE_LOG_ERR, "error: no mountpoint specified\n"); +- res = 2; +- goto out1; +- } +- +- +- fuse = fuse_new_31(&args, op, op_size, user_data); +- if (fuse == NULL) { +- res = 3; +- goto out1; +- } +- +- if (fuse_mount(fuse,opts.mountpoint) != 0) { +- res = 4; +- goto out2; +- } +- +- if (fuse_daemonize(opts.foreground) != 0) { +- res = 5; +- goto out3; +- } +- +- struct fuse_session *se = fuse_get_session(fuse); +- if (fuse_set_signal_handlers(se) != 0) { +- res = 6; +- goto out3; +- } +- +- if (opts.singlethread) +- res = fuse_loop(fuse); +- else { +- struct fuse_loop_config loop_config; +- loop_config.clone_fd = opts.clone_fd; +- loop_config.max_idle_threads = opts.max_idle_threads; +- res = fuse_loop_mt_32(fuse, &loop_config); +- } +- if (res) +- res = 7; +- +- fuse_remove_signal_handlers(se); +-out3: +- fuse_unmount(fuse); +-out2: +- fuse_destroy(fuse); +-out1: +- free(opts.mountpoint); +- fuse_opt_free_args(&args); +- return res; +-} +- +- + void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, + struct fuse_conn_info *conn) + { +@@ -420,21 +295,3 @@ struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args) + } + return opts; + } +- +-int fuse_open_channel(const char *mountpoint, const char* options) +-{ +- struct mount_opts *opts = NULL; +- int fd = -1; +- const char *argv[] = { "", "-o", options }; +- int argc = sizeof(argv) / sizeof(argv[0]); +- struct fuse_args args = FUSE_ARGS_INIT(argc, (char**) argv); +- +- opts = parse_mount_opts(&args); +- if (opts == NULL) +- return -1; +- +- fd = fuse_kern_mount(mountpoint, opts); +- destroy_mount_opts(opts); +- +- return fd; +-} +diff --git a/tools/virtiofsd/passthrough_helpers.h b/tools/virtiofsd/passthrough_helpers.h +index 6b77c33..7c5f561 100644 +--- a/tools/virtiofsd/passthrough_helpers.h ++++ b/tools/virtiofsd/passthrough_helpers.h +@@ -42,32 +42,6 @@ static int mknod_wrapper(int dirfd, const char *path, const char *link, + res = symlinkat(link, dirfd, path); + } else if (S_ISFIFO(mode)) { + res = mkfifoat(dirfd, path, mode); +-#ifdef __FreeBSD__ +- } else if (S_ISSOCK(mode)) { +- struct sockaddr_un su; +- int fd; +- +- if (strlen(path) >= sizeof(su.sun_path)) { +- errno = ENAMETOOLONG; +- return -1; +- } +- fd = socket(AF_UNIX, SOCK_STREAM, 0); +- if (fd >= 0) { +- /* +- * We must bind the socket to the underlying file +- * system to create the socket file, even though +- * we'll never listen on this socket. +- */ +- su.sun_family = AF_UNIX; +- strncpy(su.sun_path, path, sizeof(su.sun_path)); +- res = bindat(dirfd, fd, (struct sockaddr*)&su, +- sizeof(su)); +- if (res == 0) +- close(fd); +- } else { +- res = -1; +- } +-#endif + } else { + res = mknodat(dirfd, path, mode, rdev); + } +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e1a6056..e5f7115 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1240,7 +1240,6 @@ int main(int argc, char *argv[]) + ret = 0; + goto err_out1; + } else if (opts.show_version) { +- printf("FUSE library version %s\n", fuse_pkgversion()); + fuse_lowlevel_version(); + ret = 0; + goto err_out1; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-Trim-out-compatibility-code.patch b/SOURCES/kvm-virtiofsd-Trim-out-compatibility-code.patch new file mode 100644 index 0000000..411af77 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Trim-out-compatibility-code.patch @@ -0,0 +1,545 @@ +From ff16b837e402de773581f77ca188f8806c0b500f Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:51 +0100 +Subject: [PATCH 020/116] virtiofsd: Trim out compatibility code +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-17-dgilbert@redhat.com> +Patchwork-id: 93468 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 016/112] virtiofsd: Trim out compatibility code +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +virtiofsd only supports major=7, minor>=31; trim out a lot of +old compatibility code. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 72c42e2d65510e073cf78fdc924d121c77fa0080) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 330 +++++++++++++++------------------------- + 1 file changed, 119 insertions(+), 211 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 07fb8a6..514d79c 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -387,16 +387,7 @@ static void fill_open(struct fuse_open_out *arg, const struct fuse_file_info *f) + int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) + { + struct fuse_entry_out arg; +- size_t size = req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ENTRY_OUT_SIZE : +- sizeof(arg); +- +- /* +- * before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant +- * negative entry +- */ +- if (!e->ino && req->se->conn.proto_minor < 4) { +- return fuse_reply_err(req, ENOENT); +- } ++ size_t size = sizeof(arg); + + memset(&arg, 0, sizeof(arg)); + fill_entry(&arg, e); +@@ -407,9 +398,7 @@ int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, + const struct fuse_file_info *f) + { + char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; +- size_t entrysize = req->se->conn.proto_minor < 9 ? +- FUSE_COMPAT_ENTRY_OUT_SIZE : +- sizeof(struct fuse_entry_out); ++ size_t entrysize = sizeof(struct fuse_entry_out); + struct fuse_entry_out *earg = (struct fuse_entry_out *)buf; + struct fuse_open_out *oarg = (struct fuse_open_out *)(buf + entrysize); + +@@ -423,8 +412,7 @@ int fuse_reply_attr(fuse_req_t req, const struct stat *attr, + double attr_timeout) + { + struct fuse_attr_out arg; +- size_t size = +- req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); ++ size_t size = sizeof(arg); + + memset(&arg, 0, sizeof(arg)); + arg.attr_valid = calc_timeout_sec(attr_timeout); +@@ -519,8 +507,7 @@ int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv) + int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf) + { + struct fuse_statfs_out arg; +- size_t size = +- req->se->conn.proto_minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(arg); ++ size_t size = sizeof(arg); + + memset(&arg, 0, sizeof(arg)); + convert_statfs(stbuf, &arg.st); +@@ -604,45 +591,31 @@ int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov, + iov[count].iov_len = sizeof(arg); + count++; + +- if (req->se->conn.proto_minor < 16) { +- if (in_count) { +- iov[count].iov_base = (void *)in_iov; +- iov[count].iov_len = sizeof(in_iov[0]) * in_count; +- count++; +- } ++ /* Can't handle non-compat 64bit ioctls on 32bit */ ++ if (sizeof(void *) == 4 && req->ioctl_64bit) { ++ res = fuse_reply_err(req, EINVAL); ++ goto out; ++ } + +- if (out_count) { +- iov[count].iov_base = (void *)out_iov; +- iov[count].iov_len = sizeof(out_iov[0]) * out_count; +- count++; ++ if (in_count) { ++ in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); ++ if (!in_fiov) { ++ goto enomem; + } +- } else { +- /* Can't handle non-compat 64bit ioctls on 32bit */ +- if (sizeof(void *) == 4 && req->ioctl_64bit) { +- res = fuse_reply_err(req, EINVAL); +- goto out; +- } +- +- if (in_count) { +- in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); +- if (!in_fiov) { +- goto enomem; +- } + +- iov[count].iov_base = (void *)in_fiov; +- iov[count].iov_len = sizeof(in_fiov[0]) * in_count; +- count++; ++ iov[count].iov_base = (void *)in_fiov; ++ iov[count].iov_len = sizeof(in_fiov[0]) * in_count; ++ count++; ++ } ++ if (out_count) { ++ out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); ++ if (!out_fiov) { ++ goto enomem; + } +- if (out_count) { +- out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); +- if (!out_fiov) { +- goto enomem; +- } + +- iov[count].iov_base = (void *)out_fiov; +- iov[count].iov_len = sizeof(out_fiov[0]) * out_count; +- count++; +- } ++ iov[count].iov_base = (void *)out_fiov; ++ iov[count].iov_len = sizeof(out_fiov[0]) * out_count; ++ count++; + } + + res = send_reply_iov(req, 0, iov, count); +@@ -784,14 +757,12 @@ static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + struct fuse_file_info *fip = NULL; + struct fuse_file_info fi; + +- if (req->se->conn.proto_minor >= 9) { +- struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; ++ struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; + +- if (arg->getattr_flags & FUSE_GETATTR_FH) { +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fip = &fi; +- } ++ if (arg->getattr_flags & FUSE_GETATTR_FH) { ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fip = &fi; + } + + if (req->se->op.getattr) { +@@ -856,11 +827,7 @@ static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + struct fuse_mknod_in *arg = (struct fuse_mknod_in *)inarg; + char *name = PARAM(arg); + +- if (req->se->conn.proto_minor >= 12) { +- req->ctx.umask = arg->umask; +- } else { +- name = (char *)inarg + FUSE_COMPAT_MKNOD_IN_SIZE; +- } ++ req->ctx.umask = arg->umask; + + if (req->se->op.mknod) { + req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); +@@ -873,9 +840,7 @@ static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { + struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *)inarg; + +- if (req->se->conn.proto_minor >= 12) { +- req->ctx.umask = arg->umask; +- } ++ req->ctx.umask = arg->umask; + + if (req->se->op.mkdir) { + req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); +@@ -967,11 +932,7 @@ static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + memset(&fi, 0, sizeof(fi)); + fi.flags = arg->flags; + +- if (req->se->conn.proto_minor >= 12) { +- req->ctx.umask = arg->umask; +- } else { +- name = (char *)inarg + sizeof(struct fuse_open_in); +- } ++ req->ctx.umask = arg->umask; + + req->se->op.create(req, nodeid, name, arg->mode, &fi); + } else { +@@ -1003,10 +964,8 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; +- if (req->se->conn.proto_minor >= 9) { +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- } ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; + req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); + } else { + fuse_reply_err(req, ENOSYS); +@@ -1023,13 +982,9 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + fi.fh = arg->fh; + fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; + +- if (req->se->conn.proto_minor < 9) { +- param = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; +- } else { +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- param = PARAM(arg); +- } ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ param = PARAM(arg); + + if (req->se->op.write) { + req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi); +@@ -1053,21 +1008,14 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, + fi.fh = arg->fh; + fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; + +- if (se->conn.proto_minor < 9) { +- bufv.buf[0].mem = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; +- bufv.buf[0].size -= +- sizeof(struct fuse_in_header) + FUSE_COMPAT_WRITE_IN_SIZE; +- assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); +- } else { +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { +- bufv.buf[0].mem = PARAM(arg); +- } +- +- bufv.buf[0].size -= +- sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { ++ bufv.buf[0].mem = PARAM(arg); + } ++ ++ bufv.buf[0].size -= ++ sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); + if (bufv.buf[0].size < arg->size) { + fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); + fuse_reply_err(req, EIO); +@@ -1086,9 +1034,7 @@ static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fi.flush = 1; +- if (req->se->conn.proto_minor >= 7) { +- fi.lock_owner = arg->lock_owner; +- } ++ fi.lock_owner = arg->lock_owner; + + if (req->se->op.flush) { + req->se->op.flush(req, nodeid, &fi); +@@ -1105,10 +1051,8 @@ static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + memset(&fi, 0, sizeof(fi)); + fi.flags = arg->flags; + fi.fh = arg->fh; +- if (req->se->conn.proto_minor >= 8) { +- fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; +- fi.lock_owner = arg->lock_owner; +- } ++ fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; ++ fi.lock_owner = arg->lock_owner; + if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { + fi.flock_release = 1; + fi.lock_owner = arg->lock_owner; +@@ -1477,8 +1421,7 @@ static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + +- if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && +- !(flags & FUSE_IOCTL_32BIT)) { ++ if (sizeof(void *) == 4 && !(flags & FUSE_IOCTL_32BIT)) { + req->ioctl_64bit = 1; + } + +@@ -1603,7 +1546,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + outarg.major = FUSE_KERNEL_VERSION; + outarg.minor = FUSE_KERNEL_MINOR_VERSION; + +- if (arg->major < 7) { ++ if (arg->major < 7 || (arg->major == 7 && arg->minor < 31)) { + fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", + arg->major, arg->minor); + fuse_reply_err(req, EPROTO); +@@ -1616,81 +1559,71 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + return; + } + +- if (arg->minor >= 6) { +- if (arg->max_readahead < se->conn.max_readahead) { +- se->conn.max_readahead = arg->max_readahead; +- } +- if (arg->flags & FUSE_ASYNC_READ) { +- se->conn.capable |= FUSE_CAP_ASYNC_READ; +- } +- if (arg->flags & FUSE_POSIX_LOCKS) { +- se->conn.capable |= FUSE_CAP_POSIX_LOCKS; +- } +- if (arg->flags & FUSE_ATOMIC_O_TRUNC) { +- se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; +- } +- if (arg->flags & FUSE_EXPORT_SUPPORT) { +- se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; +- } +- if (arg->flags & FUSE_DONT_MASK) { +- se->conn.capable |= FUSE_CAP_DONT_MASK; +- } +- if (arg->flags & FUSE_FLOCK_LOCKS) { +- se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; +- } +- if (arg->flags & FUSE_AUTO_INVAL_DATA) { +- se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; +- } +- if (arg->flags & FUSE_DO_READDIRPLUS) { +- se->conn.capable |= FUSE_CAP_READDIRPLUS; +- } +- if (arg->flags & FUSE_READDIRPLUS_AUTO) { +- se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; +- } +- if (arg->flags & FUSE_ASYNC_DIO) { +- se->conn.capable |= FUSE_CAP_ASYNC_DIO; +- } +- if (arg->flags & FUSE_WRITEBACK_CACHE) { +- se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; +- } +- if (arg->flags & FUSE_NO_OPEN_SUPPORT) { +- se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; +- } +- if (arg->flags & FUSE_PARALLEL_DIROPS) { +- se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; +- } +- if (arg->flags & FUSE_POSIX_ACL) { +- se->conn.capable |= FUSE_CAP_POSIX_ACL; +- } +- if (arg->flags & FUSE_HANDLE_KILLPRIV) { +- se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; +- } +- if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) { +- se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; +- } +- if (!(arg->flags & FUSE_MAX_PAGES)) { +- size_t max_bufsize = +- FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() + +- FUSE_BUFFER_HEADER_SIZE; +- if (bufsize > max_bufsize) { +- bufsize = max_bufsize; +- } ++ if (arg->max_readahead < se->conn.max_readahead) { ++ se->conn.max_readahead = arg->max_readahead; ++ } ++ if (arg->flags & FUSE_ASYNC_READ) { ++ se->conn.capable |= FUSE_CAP_ASYNC_READ; ++ } ++ if (arg->flags & FUSE_POSIX_LOCKS) { ++ se->conn.capable |= FUSE_CAP_POSIX_LOCKS; ++ } ++ if (arg->flags & FUSE_ATOMIC_O_TRUNC) { ++ se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; ++ } ++ if (arg->flags & FUSE_EXPORT_SUPPORT) { ++ se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; ++ } ++ if (arg->flags & FUSE_DONT_MASK) { ++ se->conn.capable |= FUSE_CAP_DONT_MASK; ++ } ++ if (arg->flags & FUSE_FLOCK_LOCKS) { ++ se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; ++ } ++ if (arg->flags & FUSE_AUTO_INVAL_DATA) { ++ se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; ++ } ++ if (arg->flags & FUSE_DO_READDIRPLUS) { ++ se->conn.capable |= FUSE_CAP_READDIRPLUS; ++ } ++ if (arg->flags & FUSE_READDIRPLUS_AUTO) { ++ se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; ++ } ++ if (arg->flags & FUSE_ASYNC_DIO) { ++ se->conn.capable |= FUSE_CAP_ASYNC_DIO; ++ } ++ if (arg->flags & FUSE_WRITEBACK_CACHE) { ++ se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; ++ } ++ if (arg->flags & FUSE_NO_OPEN_SUPPORT) { ++ se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; ++ } ++ if (arg->flags & FUSE_PARALLEL_DIROPS) { ++ se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; ++ } ++ if (arg->flags & FUSE_POSIX_ACL) { ++ se->conn.capable |= FUSE_CAP_POSIX_ACL; ++ } ++ if (arg->flags & FUSE_HANDLE_KILLPRIV) { ++ se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; ++ } ++ if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) { ++ se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; ++ } ++ if (!(arg->flags & FUSE_MAX_PAGES)) { ++ size_t max_bufsize = FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() + ++ FUSE_BUFFER_HEADER_SIZE; ++ if (bufsize > max_bufsize) { ++ bufsize = max_bufsize; + } +- } else { +- se->conn.max_readahead = 0; + } +- +- if (se->conn.proto_minor >= 14) { + #ifdef HAVE_SPLICE + #ifdef HAVE_VMSPLICE +- se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; ++ se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; + #endif +- se->conn.capable |= FUSE_CAP_SPLICE_READ; ++ se->conn.capable |= FUSE_CAP_SPLICE_READ; + #endif +- } +- if (se->conn.proto_minor >= 18) { +- se->conn.capable |= FUSE_CAP_IOCTL_DIR; +- } ++ se->conn.capable |= FUSE_CAP_IOCTL_DIR; + + /* + * Default settings for modern filesystems. +@@ -1797,24 +1730,20 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + outarg.max_readahead = se->conn.max_readahead; + outarg.max_write = se->conn.max_write; +- if (se->conn.proto_minor >= 13) { +- if (se->conn.max_background >= (1 << 16)) { +- se->conn.max_background = (1 << 16) - 1; +- } +- if (se->conn.congestion_threshold > se->conn.max_background) { +- se->conn.congestion_threshold = se->conn.max_background; +- } +- if (!se->conn.congestion_threshold) { +- se->conn.congestion_threshold = se->conn.max_background * 3 / 4; +- } +- +- outarg.max_background = se->conn.max_background; +- outarg.congestion_threshold = se->conn.congestion_threshold; ++ if (se->conn.max_background >= (1 << 16)) { ++ se->conn.max_background = (1 << 16) - 1; ++ } ++ if (se->conn.congestion_threshold > se->conn.max_background) { ++ se->conn.congestion_threshold = se->conn.max_background; + } +- if (se->conn.proto_minor >= 23) { +- outarg.time_gran = se->conn.time_gran; ++ if (!se->conn.congestion_threshold) { ++ se->conn.congestion_threshold = se->conn.max_background * 3 / 4; + } + ++ outarg.max_background = se->conn.max_background; ++ outarg.congestion_threshold = se->conn.congestion_threshold; ++ outarg.time_gran = se->conn.time_gran; ++ + if (se->debug) { + fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, + outarg.minor); +@@ -1828,11 +1757,6 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + outarg.congestion_threshold); + fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); + } +- if (arg->minor < 5) { +- outargsize = FUSE_COMPAT_INIT_OUT_SIZE; +- } else if (arg->minor < 23) { +- outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; +- } + + send_reply_ok(req, &outarg, outargsize); + } +@@ -1896,10 +1820,6 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, + return -EINVAL; + } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { +- return -ENOSYS; +- } +- + outarg.ino = ino; + outarg.off = off; + outarg.len = len; +@@ -1920,10 +1840,6 @@ int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, + return -EINVAL; + } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { +- return -ENOSYS; +- } +- + outarg.parent = parent; + outarg.namelen = namelen; + outarg.padding = 0; +@@ -1947,10 +1863,6 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, + return -EINVAL; + } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) { +- return -ENOSYS; +- } +- + outarg.parent = parent; + outarg.child = child; + outarg.namelen = namelen; +@@ -1977,10 +1889,6 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, + return -EINVAL; + } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { +- return -ENOSYS; +- } +- + out.unique = 0; + out.error = FUSE_NOTIFY_STORE; + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch b/SOURCES/kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch new file mode 100644 index 0000000..a0882d5 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch @@ -0,0 +1,93 @@ +From e4c8fd1060fb69a093064851ebf66dd82533ec0e Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:17 +0100 +Subject: [PATCH 106/116] virtiofsd: add definition of fuse_buf_writev() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-103-dgilbert@redhat.com> +Patchwork-id: 93557 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 102/112] virtiofsd: add definition of fuse_buf_writev() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: piaojun + +Define fuse_buf_writev() which use pwritev and writev to improve io +bandwidth. Especially, the src bufs with 0 size should be skipped as +their mems are not *block_size* aligned which will cause writev failed +in direct io mode. + +Signed-off-by: Jun Piao +Suggested-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 9ceaaa15cf21073c2b23058c374f61c30cd39c31) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 38 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 38 insertions(+) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index 42a608f..37befeb 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -14,6 +14,7 @@ + #include "fuse_lowlevel.h" + #include + #include ++#include + #include + #include + +@@ -33,6 +34,43 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv) + return size; + } + ++__attribute__((unused)) ++static ssize_t fuse_buf_writev(struct fuse_buf *out_buf, ++ struct fuse_bufvec *in_buf) ++{ ++ ssize_t res, i, j; ++ size_t iovcnt = in_buf->count; ++ struct iovec *iov; ++ int fd = out_buf->fd; ++ ++ iov = calloc(iovcnt, sizeof(struct iovec)); ++ if (!iov) { ++ return -ENOMEM; ++ } ++ ++ for (i = 0, j = 0; i < iovcnt; i++) { ++ /* Skip the buf with 0 size */ ++ if (in_buf->buf[i].size) { ++ iov[j].iov_base = in_buf->buf[i].mem; ++ iov[j].iov_len = in_buf->buf[i].size; ++ j++; ++ } ++ } ++ ++ if (out_buf->flags & FUSE_BUF_FD_SEEK) { ++ res = pwritev(fd, iov, iovcnt, out_buf->pos); ++ } else { ++ res = writev(fd, iov, iovcnt); ++ } ++ ++ if (res == -1) { ++ res = -errno; ++ } ++ ++ free(iov); ++ return res; ++} ++ + static size_t min_size(size_t s1, size_t s2) + { + return s1 < s2 ? s1 : s2; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch b/SOURCES/kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch new file mode 100644 index 0000000..451f12b --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch @@ -0,0 +1,170 @@ +From f91a9bdc171142174110e9ff1716b611f6fb0039 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:07 +0100 +Subject: [PATCH 036/116] virtiofsd: add --fd=FDNUM fd passing option +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-33-dgilbert@redhat.com> +Patchwork-id: 93487 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 032/112] virtiofsd: add --fd=FDNUM fd passing option +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Although --socket-path=PATH is useful for manual invocations, management +tools typically create the UNIX domain socket themselves and pass it to +the vhost-user device backend. This way QEMU can be launched +immediately with a valid socket. No waiting for the vhost-user device +backend is required when fd passing is used. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit cee8e35d4386e34bf79c3ca2aab7f7b1bb48cf8d) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 1 + + tools/virtiofsd/fuse_lowlevel.c | 16 ++++++++++++---- + tools/virtiofsd/fuse_virtio.c | 31 +++++++++++++++++++++++++------ + 3 files changed, 38 insertions(+), 10 deletions(-) + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index 1126723..45995f3 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -68,6 +68,7 @@ struct fuse_session { + size_t bufsize; + int error; + char *vu_socket_path; ++ int vu_listen_fd; + int vu_socketfd; + struct fv_VuDev *virtio_dev; + }; +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 4f4684d..95f4db8 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2130,6 +2130,7 @@ static const struct fuse_opt fuse_ll_opts[] = { + LL_OPTION("--debug", debug, 1), + LL_OPTION("allow_root", deny_others, 1), + LL_OPTION("--socket-path=%s", vu_socket_path, 0), ++ LL_OPTION("--fd=%d", vu_listen_fd, 0), + FUSE_OPT_END + }; + +@@ -2147,7 +2148,8 @@ void fuse_lowlevel_help(void) + */ + printf( + " -o allow_root allow access by root\n" +- " --socket-path=PATH path for the vhost-user socket\n"); ++ " --socket-path=PATH path for the vhost-user socket\n" ++ " --fd=FDNUM fd number of vhost-user socket\n"); + } + + void fuse_session_destroy(struct fuse_session *se) +@@ -2191,6 +2193,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + goto out1; + } + se->fd = -1; ++ se->vu_listen_fd = -1; + se->conn.max_write = UINT_MAX; + se->conn.max_readahead = UINT_MAX; + +@@ -2212,8 +2215,13 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + goto out4; + } + +- if (!se->vu_socket_path) { +- fprintf(stderr, "fuse: missing -o vhost_user_socket option\n"); ++ if (!se->vu_socket_path && se->vu_listen_fd < 0) { ++ fuse_log(FUSE_LOG_ERR, "fuse: missing --socket-path or --fd option\n"); ++ goto out4; ++ } ++ if (se->vu_socket_path && se->vu_listen_fd >= 0) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: --socket-path and --fd cannot be given together\n"); + goto out4; + } + +@@ -2253,7 +2261,7 @@ void fuse_session_unmount(struct fuse_session *se) + + int fuse_lowlevel_is_virtio(struct fuse_session *se) + { +- return se->vu_socket_path != NULL; ++ return !!se->virtio_dev; + } + + #ifdef linux +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 7e2711b..635f877 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -638,18 +638,21 @@ int virtio_loop(struct fuse_session *se) + return 0; + } + +-int virtio_session_mount(struct fuse_session *se) ++static int fv_create_listen_socket(struct fuse_session *se) + { + struct sockaddr_un un; + mode_t old_umask; + ++ /* Nothing to do if fd is already initialized */ ++ if (se->vu_listen_fd >= 0) { ++ return 0; ++ } ++ + if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) { + fuse_log(FUSE_LOG_ERR, "Socket path too long\n"); + return -1; + } + +- se->fd = -1; +- + /* + * Create the Unix socket to communicate with qemu + * based on QEMU's vhost-user-bridge +@@ -682,15 +685,31 @@ int virtio_session_mount(struct fuse_session *se) + return -1; + } + ++ se->vu_listen_fd = listen_sock; ++ return 0; ++} ++ ++int virtio_session_mount(struct fuse_session *se) ++{ ++ int ret; ++ ++ ret = fv_create_listen_socket(se); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ se->fd = -1; ++ + fuse_log(FUSE_LOG_INFO, "%s: Waiting for vhost-user socket connection...\n", + __func__); +- int data_sock = accept(listen_sock, NULL, NULL); ++ int data_sock = accept(se->vu_listen_fd, NULL, NULL); + if (data_sock == -1) { + fuse_log(FUSE_LOG_ERR, "vhost socket accept: %m\n"); +- close(listen_sock); ++ close(se->vu_listen_fd); + return -1; + } +- close(listen_sock); ++ close(se->vu_listen_fd); ++ se->vu_listen_fd = -1; + fuse_log(FUSE_LOG_INFO, "%s: Received vhost-user socket connection\n", + __func__); + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-fuse_mbuf_iter-API.patch b/SOURCES/kvm-virtiofsd-add-fuse_mbuf_iter-API.patch new file mode 100644 index 0000000..b874dc9 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-fuse_mbuf_iter-API.patch @@ -0,0 +1,134 @@ +From 1b0edd3d0a2ee5c097bcf3501c1dfa937f02e473 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:21 +0100 +Subject: [PATCH 050/116] virtiofsd: add fuse_mbuf_iter API +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-47-dgilbert@redhat.com> +Patchwork-id: 93502 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 046/112] virtiofsd: add fuse_mbuf_iter API +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Introduce an API for consuming bytes from a buffer with size checks. +All FUSE operations will be converted to use this safe API instead of +void *inarg. + +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit dad157e880416ab3a0e45beaa0e81977516568bc) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 28 +++++++++++++++++++++++++ + tools/virtiofsd/fuse_common.h | 49 ++++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 76 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index 772efa9..42a608f 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -267,3 +267,31 @@ ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv) + + return copied; + } ++ ++void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len) ++{ ++ void *ptr; ++ ++ if (len > iter->size - iter->pos) { ++ return NULL; ++ } ++ ++ ptr = iter->mem + iter->pos; ++ iter->pos += len; ++ return ptr; ++} ++ ++const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter) ++{ ++ const char *str = iter->mem + iter->pos; ++ size_t remaining = iter->size - iter->pos; ++ size_t i; ++ ++ for (i = 0; i < remaining; i++) { ++ if (str[i] == '\0') { ++ iter->pos += i + 1; ++ return str; ++ } ++ } ++ return NULL; ++} +diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h +index 0cb33ac..f8f6433 100644 +--- a/tools/virtiofsd/fuse_common.h ++++ b/tools/virtiofsd/fuse_common.h +@@ -703,10 +703,57 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv); + */ + ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src); + ++/** ++ * Memory buffer iterator ++ * ++ */ ++struct fuse_mbuf_iter { ++ /** ++ * Data pointer ++ */ ++ void *mem; ++ ++ /** ++ * Total length, in bytes ++ */ ++ size_t size; ++ ++ /** ++ * Offset from start of buffer ++ */ ++ size_t pos; ++}; ++ ++/* Initialize memory buffer iterator from a fuse_buf */ ++#define FUSE_MBUF_ITER_INIT(fbuf) \ ++ ((struct fuse_mbuf_iter){ \ ++ .mem = fbuf->mem, \ ++ .size = fbuf->size, \ ++ .pos = 0, \ ++ }) ++ ++/** ++ * Consume bytes from a memory buffer iterator ++ * ++ * @param iter memory buffer iterator ++ * @param len number of bytes to consume ++ * @return pointer to start of consumed bytes or ++ * NULL if advancing beyond end of buffer ++ */ ++void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len); ++ ++/** ++ * Consume a NUL-terminated string from a memory buffer iterator ++ * ++ * @param iter memory buffer iterator ++ * @return pointer to the string or ++ * NULL if advancing beyond end of buffer or there is no NUL-terminator ++ */ ++const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter); ++ + /* + * Signal handling + */ +- + /** + * Exit session on HUP, TERM and INT signals and ignore PIPE signal + * +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch b/SOURCES/kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch new file mode 100644 index 0000000..bdef115 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch @@ -0,0 +1,88 @@ +From 7a3c94e10b087c06635ef72aadb1550184dd5c58 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:58 +0100 +Subject: [PATCH 087/116] virtiofsd: add helper for lo_data cleanup +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-84-dgilbert@redhat.com> +Patchwork-id: 93538 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 083/112] virtiofsd: add helper for lo_data cleanup +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Liu Bo + +This offers an helper function for lo_data's cleanup. + +Signed-off-by: Liu Bo +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 18a69cbbb6a4caa7c2040c6db4a33b044a32be7e) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 37 +++++++++++++++++++++---------------- + 1 file changed, 21 insertions(+), 16 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 056ebe8..e8dc5c7 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2407,6 +2407,26 @@ static gboolean lo_key_equal(gconstpointer a, gconstpointer b) + return la->ino == lb->ino && la->dev == lb->dev; + } + ++static void fuse_lo_data_cleanup(struct lo_data *lo) ++{ ++ if (lo->inodes) { ++ g_hash_table_destroy(lo->inodes); ++ } ++ lo_map_destroy(&lo->fd_map); ++ lo_map_destroy(&lo->dirp_map); ++ lo_map_destroy(&lo->ino_map); ++ ++ if (lo->proc_self_fd >= 0) { ++ close(lo->proc_self_fd); ++ } ++ ++ if (lo->root.fd >= 0) { ++ close(lo->root.fd); ++ } ++ ++ free(lo->source); ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +@@ -2554,22 +2574,7 @@ err_out2: + err_out1: + fuse_opt_free_args(&args); + +- if (lo.inodes) { +- g_hash_table_destroy(lo.inodes); +- } +- lo_map_destroy(&lo.fd_map); +- lo_map_destroy(&lo.dirp_map); +- lo_map_destroy(&lo.ino_map); +- +- if (lo.proc_self_fd >= 0) { +- close(lo.proc_self_fd); +- } +- +- if (lo.root.fd >= 0) { +- close(lo.root.fd); +- } +- +- free(lo.source); ++ fuse_lo_data_cleanup(&lo); + + return ret ? 1 : 0; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-o-source-PATH-to-help-output.patch b/SOURCES/kvm-virtiofsd-add-o-source-PATH-to-help-output.patch new file mode 100644 index 0000000..5e81663 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-o-source-PATH-to-help-output.patch @@ -0,0 +1,46 @@ +From c55995c25f60168e3cb6b5bae1bf9a47813383d0 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:55 +0100 +Subject: [PATCH 024/116] virtiofsd: add -o source=PATH to help output +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-21-dgilbert@redhat.com> +Patchwork-id: 93474 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 020/112] virtiofsd: add -o source=PATH to help output +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +The -o source=PATH option will be used by most command-line invocations. +Let's document it! + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 4ff075f72be2f489c8998ae492ec5cdbbbd73e07) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 26ac870..fc9b264 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1319,6 +1319,7 @@ int main(int argc, char *argv[]) + if (opts.show_help) { + printf("usage: %s [options]\n\n", argv[0]); + fuse_cmdline_help(); ++ printf(" -o source=PATH shared directory tree\n"); + fuse_lowlevel_help(); + ret = 0; + goto err_out1; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-print-capabilities-option.patch b/SOURCES/kvm-virtiofsd-add-print-capabilities-option.patch new file mode 100644 index 0000000..b57e408 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-print-capabilities-option.patch @@ -0,0 +1,121 @@ +From 23d81ee7564084f29e32fedaed5196ae1a5a3240 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:10 +0100 +Subject: [PATCH 039/116] virtiofsd: add --print-capabilities option +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-36-dgilbert@redhat.com> +Patchwork-id: 93486 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 035/112] virtiofsd: add --print-capabilities option +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Add the --print-capabilities option as per vhost-user.rst "Backend +programs conventions". Currently there are no advertised features. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 45018fbb0a73ce66fd3dd87ecd2872b45658add4) +Signed-off-by: Miroslav Rezanina +--- + docs/interop/vhost-user.json | 4 +++- + tools/virtiofsd/fuse_lowlevel.h | 1 + + tools/virtiofsd/helper.c | 2 ++ + tools/virtiofsd/passthrough_ll.c | 12 ++++++++++++ + 4 files changed, 18 insertions(+), 1 deletion(-) + +diff --git a/docs/interop/vhost-user.json b/docs/interop/vhost-user.json +index da6aaf5..d4ea1f7 100644 +--- a/docs/interop/vhost-user.json ++++ b/docs/interop/vhost-user.json +@@ -31,6 +31,7 @@ + # @rproc-serial: virtio remoteproc serial link + # @scsi: virtio scsi + # @vsock: virtio vsock transport ++# @fs: virtio fs (since 4.2) + # + # Since: 4.0 + ## +@@ -50,7 +51,8 @@ + 'rpmsg', + 'rproc-serial', + 'scsi', +- 'vsock' ++ 'vsock', ++ 'fs' + ] + } + +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index f6b3470..0d61df8 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1794,6 +1794,7 @@ struct fuse_cmdline_opts { + int nodefault_subtype; + int show_version; + int show_help; ++ int print_capabilities; + unsigned int max_idle_threads; + }; + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index a3645fc..b8ec5ac 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -40,6 +40,7 @@ static const struct fuse_opt fuse_helper_opts[] = { + FUSE_HELPER_OPT("--help", show_help), + FUSE_HELPER_OPT("-V", show_version), + FUSE_HELPER_OPT("--version", show_version), ++ FUSE_HELPER_OPT("--print-capabilities", print_capabilities), + FUSE_HELPER_OPT("-d", debug), + FUSE_HELPER_OPT("debug", debug), + FUSE_HELPER_OPT("-d", foreground), +@@ -135,6 +136,7 @@ void fuse_cmdline_help(void) + { + printf(" -h --help print help\n" + " -V --version print version\n" ++ " --print-capabilities print vhost-user.json\n" + " -d -o debug enable debug output (implies -f)\n" + " -f foreground operation\n" + " --daemonize run in background\n" +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 037c5d7..cd27c09 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1298,6 +1298,14 @@ static struct fuse_lowlevel_ops lo_oper = { + .lseek = lo_lseek, + }; + ++/* Print vhost-user.json backend program capabilities */ ++static void print_capabilities(void) ++{ ++ printf("{\n"); ++ printf(" \"type\": \"fs\"\n"); ++ printf("}\n"); ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +@@ -1328,6 +1336,10 @@ int main(int argc, char *argv[]) + fuse_lowlevel_version(); + ret = 0; + goto err_out1; ++ } else if (opts.print_capabilities) { ++ print_capabilities(); ++ ret = 0; ++ goto err_out1; + } + + if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-seccomp-whitelist.patch b/SOURCES/kvm-virtiofsd-add-seccomp-whitelist.patch new file mode 100644 index 0000000..b34108e --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-seccomp-whitelist.patch @@ -0,0 +1,285 @@ +From 58c4e9473b364fb62aac797b0d69fd8ddb02c8c7 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:30 +0100 +Subject: [PATCH 059/116] virtiofsd: add seccomp whitelist +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-56-dgilbert@redhat.com> +Patchwork-id: 93511 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 055/112] virtiofsd: add seccomp whitelist +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Only allow system calls that are needed by virtiofsd. All other system +calls cause SIGSYS to be directed at the thread and the process will +coredump. + +Restricting system calls reduces the kernel attack surface and limits +what the process can do when compromised. + +Signed-off-by: Stefan Hajnoczi +with additional entries by: +Signed-off-by: Ganesh Maharaj Mahalingam +Signed-off-by: Masayoshi Mizuma +Signed-off-by: Misono Tomohiro +Signed-off-by: piaojun +Signed-off-by: Vivek Goyal +Signed-off-by: Eric Ren +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 4f8bde99c175ffd86b5125098a4707d43f5e80c6) + +Signed-off-by: Miroslav Rezanina +--- + Makefile | 5 +- + tools/virtiofsd/Makefile.objs | 5 +- + tools/virtiofsd/passthrough_ll.c | 2 + + tools/virtiofsd/seccomp.c | 151 +++++++++++++++++++++++++++++++++++++++ + tools/virtiofsd/seccomp.h | 14 ++++ + 5 files changed, 174 insertions(+), 3 deletions(-) + create mode 100644 tools/virtiofsd/seccomp.c + create mode 100644 tools/virtiofsd/seccomp.h + +diff --git a/Makefile b/Makefile +index 0e9755d..6879a06 100644 +--- a/Makefile ++++ b/Makefile +@@ -330,7 +330,7 @@ endif + endif + endif + +-ifdef CONFIG_LINUX ++ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) + HELPERS-y += virtiofsd$(EXESUF) + vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json + endif +@@ -681,7 +681,8 @@ rdmacm-mux$(EXESUF): LIBS += "-libumad" + rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS) + $(call LINK, $^) + +-ifdef CONFIG_LINUX # relies on Linux-specific syscalls ++# relies on Linux-specific syscalls ++ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) + virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS) + $(call LINK, $^) + endif +diff --git a/tools/virtiofsd/Makefile.objs b/tools/virtiofsd/Makefile.objs +index 45a8075..076f667 100644 +--- a/tools/virtiofsd/Makefile.objs ++++ b/tools/virtiofsd/Makefile.objs +@@ -5,5 +5,8 @@ virtiofsd-obj-y = buffer.o \ + fuse_signals.o \ + fuse_virtio.o \ + helper.o \ +- passthrough_ll.o ++ passthrough_ll.o \ ++ seccomp.o + ++seccomp.o-cflags := $(SECCOMP_CFLAGS) ++seccomp.o-libs := $(SECCOMP_LIBS) +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 0947d14..bd8925b 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -59,6 +59,7 @@ + #include + + #include "passthrough_helpers.h" ++#include "seccomp.h" + + struct lo_map_elem { + union { +@@ -2091,6 +2092,7 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) + { + setup_namespaces(lo, se); + setup_mounts(lo->source); ++ setup_seccomp(); + } + + int main(int argc, char *argv[]) +diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c +new file mode 100644 +index 0000000..691fb63 +--- /dev/null ++++ b/tools/virtiofsd/seccomp.c +@@ -0,0 +1,151 @@ ++/* ++ * Seccomp sandboxing for virtiofsd ++ * ++ * Copyright (C) 2019 Red Hat, Inc. ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#include "qemu/osdep.h" ++#include "seccomp.h" ++#include "fuse_i.h" ++#include "fuse_log.h" ++#include ++#include ++#include ++#include ++ ++/* Bodge for libseccomp 2.4.2 which broke ppoll */ ++#if !defined(__SNR_ppoll) && defined(__SNR_brk) ++#ifdef __NR_ppoll ++#define __SNR_ppoll __NR_ppoll ++#else ++#define __SNR_ppoll __PNR_ppoll ++#endif ++#endif ++ ++static const int syscall_whitelist[] = { ++ /* TODO ireg sem*() syscalls */ ++ SCMP_SYS(brk), ++ SCMP_SYS(capget), /* For CAP_FSETID */ ++ SCMP_SYS(capset), ++ SCMP_SYS(clock_gettime), ++ SCMP_SYS(clone), ++#ifdef __NR_clone3 ++ SCMP_SYS(clone3), ++#endif ++ SCMP_SYS(close), ++ SCMP_SYS(copy_file_range), ++ SCMP_SYS(dup), ++ SCMP_SYS(eventfd2), ++ SCMP_SYS(exit), ++ SCMP_SYS(exit_group), ++ SCMP_SYS(fallocate), ++ SCMP_SYS(fchmodat), ++ SCMP_SYS(fchownat), ++ SCMP_SYS(fcntl), ++ SCMP_SYS(fdatasync), ++ SCMP_SYS(fgetxattr), ++ SCMP_SYS(flistxattr), ++ SCMP_SYS(flock), ++ SCMP_SYS(fremovexattr), ++ SCMP_SYS(fsetxattr), ++ SCMP_SYS(fstat), ++ SCMP_SYS(fstatfs), ++ SCMP_SYS(fsync), ++ SCMP_SYS(ftruncate), ++ SCMP_SYS(futex), ++ SCMP_SYS(getdents), ++ SCMP_SYS(getdents64), ++ SCMP_SYS(getegid), ++ SCMP_SYS(geteuid), ++ SCMP_SYS(getpid), ++ SCMP_SYS(gettid), ++ SCMP_SYS(gettimeofday), ++ SCMP_SYS(linkat), ++ SCMP_SYS(lseek), ++ SCMP_SYS(madvise), ++ SCMP_SYS(mkdirat), ++ SCMP_SYS(mknodat), ++ SCMP_SYS(mmap), ++ SCMP_SYS(mprotect), ++ SCMP_SYS(mremap), ++ SCMP_SYS(munmap), ++ SCMP_SYS(newfstatat), ++ SCMP_SYS(open), ++ SCMP_SYS(openat), ++ SCMP_SYS(ppoll), ++ SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */ ++ SCMP_SYS(preadv), ++ SCMP_SYS(pread64), ++ SCMP_SYS(pwritev), ++ SCMP_SYS(pwrite64), ++ SCMP_SYS(read), ++ SCMP_SYS(readlinkat), ++ SCMP_SYS(recvmsg), ++ SCMP_SYS(renameat), ++ SCMP_SYS(renameat2), ++ SCMP_SYS(rt_sigaction), ++ SCMP_SYS(rt_sigprocmask), ++ SCMP_SYS(rt_sigreturn), ++ SCMP_SYS(sendmsg), ++ SCMP_SYS(setresgid), ++ SCMP_SYS(setresuid), ++#ifdef __NR_setresgid32 ++ SCMP_SYS(setresgid32), ++#endif ++#ifdef __NR_setresuid32 ++ SCMP_SYS(setresuid32), ++#endif ++ SCMP_SYS(set_robust_list), ++ SCMP_SYS(symlinkat), ++ SCMP_SYS(time), /* Rarely needed, except on static builds */ ++ SCMP_SYS(tgkill), ++ SCMP_SYS(unlinkat), ++ SCMP_SYS(utimensat), ++ SCMP_SYS(write), ++ SCMP_SYS(writev), ++}; ++ ++void setup_seccomp(void) ++{ ++ scmp_filter_ctx ctx; ++ size_t i; ++ ++#ifdef SCMP_ACT_KILL_PROCESS ++ ctx = seccomp_init(SCMP_ACT_KILL_PROCESS); ++ /* Handle a newer libseccomp but an older kernel */ ++ if (!ctx && errno == EOPNOTSUPP) { ++ ctx = seccomp_init(SCMP_ACT_TRAP); ++ } ++#else ++ ctx = seccomp_init(SCMP_ACT_TRAP); ++#endif ++ if (!ctx) { ++ fuse_log(FUSE_LOG_ERR, "seccomp_init() failed\n"); ++ exit(1); ++ } ++ ++ for (i = 0; i < G_N_ELEMENTS(syscall_whitelist); i++) { ++ if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, ++ syscall_whitelist[i], 0) != 0) { ++ fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d", ++ syscall_whitelist[i]); ++ exit(1); ++ } ++ } ++ ++ /* libvhost-user calls this for post-copy migration, we don't need it */ ++ if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS), ++ SCMP_SYS(userfaultfd), 0) != 0) { ++ fuse_log(FUSE_LOG_ERR, "seccomp_rule_add userfaultfd failed\n"); ++ exit(1); ++ } ++ ++ if (seccomp_load(ctx) < 0) { ++ fuse_log(FUSE_LOG_ERR, "seccomp_load() failed\n"); ++ exit(1); ++ } ++ ++ seccomp_release(ctx); ++} +diff --git a/tools/virtiofsd/seccomp.h b/tools/virtiofsd/seccomp.h +new file mode 100644 +index 0000000..86bce72 +--- /dev/null ++++ b/tools/virtiofsd/seccomp.h +@@ -0,0 +1,14 @@ ++/* ++ * Seccomp sandboxing for virtiofsd ++ * ++ * Copyright (C) 2019 Red Hat, Inc. ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#ifndef VIRTIOFSD_SECCOMP_H ++#define VIRTIOFSD_SECCOMP_H ++ ++void setup_seccomp(void); ++ ++#endif /* VIRTIOFSD_SECCOMP_H */ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-some-options-to-the-help-message.patch b/SOURCES/kvm-virtiofsd-add-some-options-to-the-help-message.patch new file mode 100644 index 0000000..ac6dc54 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-some-options-to-the-help-message.patch @@ -0,0 +1,74 @@ +From 6d62abb99b6b918f05f099b01a99f4326a69d650 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:26 +0100 +Subject: [PATCH 115/116] virtiofsd: add some options to the help message +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-112-dgilbert@redhat.com> +Patchwork-id: 93565 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 111/112] virtiofsd: add some options to the help message +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Masayoshi Mizuma + +Add following options to the help message: +- cache +- flock|no_flock +- norace +- posix_lock|no_posix_lock +- readdirplus|no_readdirplus +- timeout +- writeback|no_writeback +- xattr|no_xattr + +Signed-off-by: Masayoshi Mizuma + +dgilbert: Split cache, norace, posix_lock, readdirplus off + into our own earlier patches that added the options + +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 1d59b1b210d7c3b0bdf4b10ebe0bb1fccfcb8b95) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/helper.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index f98d8f2..0801cf7 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -148,6 +148,8 @@ void fuse_cmdline_help(void) + " -o cache= cache mode. could be one of \"auto, " + "always, none\"\n" + " default: auto\n" ++ " -o flock|no_flock enable/disable flock\n" ++ " default: no_flock\n" + " -o log_level= log level, default to \"info\"\n" + " level could be one of \"debug, " + "info, warn, err\"\n" +@@ -163,7 +165,13 @@ void fuse_cmdline_help(void) + " enable/disable readirplus\n" + " default: readdirplus except with " + "cache=none\n" +- ); ++ " -o timeout= I/O timeout (second)\n" ++ " default: depends on cache= option.\n" ++ " -o writeback|no_writeback enable/disable writeback cache\n" ++ " default: no_writeback\n" ++ " -o xattr|no_xattr enable/disable xattr\n" ++ " default: no_xattr\n" ++ ); + } + + static int fuse_helper_opt_proc(void *data, const char *arg, int key, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-syslog-command-line-option.patch b/SOURCES/kvm-virtiofsd-add-syslog-command-line-option.patch new file mode 100644 index 0000000..5b55342 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-syslog-command-line-option.patch @@ -0,0 +1,239 @@ +From 6f5cf644bebc189bdb16f1caf3d7c47835d7c287 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:36 +0100 +Subject: [PATCH 065/116] virtiofsd: add --syslog command-line option +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-62-dgilbert@redhat.com> +Patchwork-id: 93509 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 061/112] virtiofsd: add --syslog command-line option +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Sometimes collecting output from stderr is inconvenient or does not fit +within the overall logging architecture. Add syslog(3) support for +cases where stderr cannot be used. + +Signed-off-by: Stefan Hajnoczi +dgilbert: Reworked as a logging function +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit f185621d41f03a23b55795b89e6584253fa23505) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.h | 1 + + tools/virtiofsd/helper.c | 2 ++ + tools/virtiofsd/passthrough_ll.c | 50 +++++++++++++++++++++++++++++++++++++--- + tools/virtiofsd/seccomp.c | 32 +++++++++++++++++-------- + tools/virtiofsd/seccomp.h | 4 +++- + 5 files changed, 76 insertions(+), 13 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 0d61df8..f2750bc 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1795,6 +1795,7 @@ struct fuse_cmdline_opts { + int show_version; + int show_help; + int print_capabilities; ++ int syslog; + unsigned int max_idle_threads; + }; + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 5531425..9692ef9 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -54,6 +54,7 @@ static const struct fuse_opt fuse_helper_opts[] = { + FUSE_HELPER_OPT("subtype=", nodefault_subtype), + FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), + FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), ++ FUSE_HELPER_OPT("--syslog", syslog), + FUSE_OPT_END + }; + +@@ -138,6 +139,7 @@ void fuse_cmdline_help(void) + " -V --version print version\n" + " --print-capabilities print vhost-user.json\n" + " -d -o debug enable debug output (implies -f)\n" ++ " --syslog log to syslog (default stderr)\n" + " -f foreground operation\n" + " --daemonize run in background\n" + " -o max_idle_threads the maximum number of idle worker " +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index c281d81..0372aca 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -58,6 +58,7 @@ + #include + #include + #include ++#include + #include + + #include "passthrough_helpers.h" +@@ -138,6 +139,7 @@ static const struct fuse_opt lo_opts[] = { + { "norace", offsetof(struct lo_data, norace), 1 }, + FUSE_OPT_END + }; ++static bool use_syslog = false; + + static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); + +@@ -2262,11 +2264,12 @@ static void setup_mounts(const char *source) + * Lock down this process to prevent access to other processes or files outside + * source directory. This reduces the impact of arbitrary code execution bugs. + */ +-static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) ++static void setup_sandbox(struct lo_data *lo, struct fuse_session *se, ++ bool enable_syslog) + { + setup_namespaces(lo, se); + setup_mounts(lo->source); +- setup_seccomp(); ++ setup_seccomp(enable_syslog); + } + + /* Raise the maximum number of open file descriptors */ +@@ -2298,6 +2301,42 @@ static void setup_nofile_rlimit(void) + } + } + ++static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) ++{ ++ if (use_syslog) { ++ int priority = LOG_ERR; ++ switch (level) { ++ case FUSE_LOG_EMERG: ++ priority = LOG_EMERG; ++ break; ++ case FUSE_LOG_ALERT: ++ priority = LOG_ALERT; ++ break; ++ case FUSE_LOG_CRIT: ++ priority = LOG_CRIT; ++ break; ++ case FUSE_LOG_ERR: ++ priority = LOG_ERR; ++ break; ++ case FUSE_LOG_WARNING: ++ priority = LOG_WARNING; ++ break; ++ case FUSE_LOG_NOTICE: ++ priority = LOG_NOTICE; ++ break; ++ case FUSE_LOG_INFO: ++ priority = LOG_INFO; ++ break; ++ case FUSE_LOG_DEBUG: ++ priority = LOG_DEBUG; ++ break; ++ } ++ vsyslog(priority, fmt, ap); ++ } else { ++ vfprintf(stderr, fmt, ap); ++ } ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +@@ -2336,6 +2375,11 @@ int main(int argc, char *argv[]) + if (fuse_parse_cmdline(&args, &opts) != 0) { + return 1; + } ++ fuse_set_log_func(log_func); ++ use_syslog = opts.syslog; ++ if (use_syslog) { ++ openlog("virtiofsd", LOG_PID, LOG_DAEMON); ++ } + if (opts.show_help) { + printf("usage: %s [options]\n\n", argv[0]); + fuse_cmdline_help(); +@@ -2424,7 +2468,7 @@ int main(int argc, char *argv[]) + /* Must be before sandbox since it wants /proc */ + setup_capng(); + +- setup_sandbox(&lo, se); ++ setup_sandbox(&lo, se, opts.syslog); + + /* Block until ctrl+c or fusermount -u */ + ret = virtio_loop(se); +diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c +index 691fb63..2d9d4a7 100644 +--- a/tools/virtiofsd/seccomp.c ++++ b/tools/virtiofsd/seccomp.c +@@ -107,11 +107,28 @@ static const int syscall_whitelist[] = { + SCMP_SYS(writev), + }; + +-void setup_seccomp(void) ++/* Syscalls used when --syslog is enabled */ ++static const int syscall_whitelist_syslog[] = { ++ SCMP_SYS(sendto), ++}; ++ ++static void add_whitelist(scmp_filter_ctx ctx, const int syscalls[], size_t len) + { +- scmp_filter_ctx ctx; + size_t i; + ++ for (i = 0; i < len; i++) { ++ if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscalls[i], 0) != 0) { ++ fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d failed\n", ++ syscalls[i]); ++ exit(1); ++ } ++ } ++} ++ ++void setup_seccomp(bool enable_syslog) ++{ ++ scmp_filter_ctx ctx; ++ + #ifdef SCMP_ACT_KILL_PROCESS + ctx = seccomp_init(SCMP_ACT_KILL_PROCESS); + /* Handle a newer libseccomp but an older kernel */ +@@ -126,13 +143,10 @@ void setup_seccomp(void) + exit(1); + } + +- for (i = 0; i < G_N_ELEMENTS(syscall_whitelist); i++) { +- if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, +- syscall_whitelist[i], 0) != 0) { +- fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d", +- syscall_whitelist[i]); +- exit(1); +- } ++ add_whitelist(ctx, syscall_whitelist, G_N_ELEMENTS(syscall_whitelist)); ++ if (enable_syslog) { ++ add_whitelist(ctx, syscall_whitelist_syslog, ++ G_N_ELEMENTS(syscall_whitelist_syslog)); + } + + /* libvhost-user calls this for post-copy migration, we don't need it */ +diff --git a/tools/virtiofsd/seccomp.h b/tools/virtiofsd/seccomp.h +index 86bce72..d47c8ea 100644 +--- a/tools/virtiofsd/seccomp.h ++++ b/tools/virtiofsd/seccomp.h +@@ -9,6 +9,8 @@ + #ifndef VIRTIOFSD_SECCOMP_H + #define VIRTIOFSD_SECCOMP_H + +-void setup_seccomp(void); ++#include ++ ++void setup_seccomp(bool enable_syslog); + + #endif /* VIRTIOFSD_SECCOMP_H */ +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-thread-pool-size-NUM-option.patch b/SOURCES/kvm-virtiofsd-add-thread-pool-size-NUM-option.patch new file mode 100644 index 0000000..0241a9d --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-thread-pool-size-NUM-option.patch @@ -0,0 +1,106 @@ +From 3dbfb932288eb5a55dfdc0eebca7e4c7f0cf6f33 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:22 +0100 +Subject: [PATCH 111/116] virtiofsd: add --thread-pool-size=NUM option +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-108-dgilbert@redhat.com> +Patchwork-id: 93561 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 107/112] virtiofsd: add --thread-pool-size=NUM option +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Add an option to control the size of the thread pool. Requests are now +processed in parallel by default. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 951b3120dbc971f08681e1d860360e4a1e638902) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 1 + + tools/virtiofsd/fuse_lowlevel.c | 7 ++++++- + tools/virtiofsd/fuse_virtio.c | 5 +++-- + 3 files changed, 10 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index 1447d86..4e47e58 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -72,6 +72,7 @@ struct fuse_session { + int vu_listen_fd; + int vu_socketfd; + struct fv_VuDev *virtio_dev; ++ int thread_pool_size; + }; + + struct fuse_chan { +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 79a4031..de2e2e0 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -28,6 +28,7 @@ + #include + #include + ++#define THREAD_POOL_SIZE 64 + + #define OFFSET_MAX 0x7fffffffffffffffLL + +@@ -2519,6 +2520,7 @@ static const struct fuse_opt fuse_ll_opts[] = { + LL_OPTION("allow_root", deny_others, 1), + LL_OPTION("--socket-path=%s", vu_socket_path, 0), + LL_OPTION("--fd=%d", vu_listen_fd, 0), ++ LL_OPTION("--thread-pool-size=%d", thread_pool_size, 0), + FUSE_OPT_END + }; + +@@ -2537,7 +2539,9 @@ void fuse_lowlevel_help(void) + printf( + " -o allow_root allow access by root\n" + " --socket-path=PATH path for the vhost-user socket\n" +- " --fd=FDNUM fd number of vhost-user socket\n"); ++ " --fd=FDNUM fd number of vhost-user socket\n" ++ " --thread-pool-size=NUM thread pool size limit (default %d)\n", ++ THREAD_POOL_SIZE); + } + + void fuse_session_destroy(struct fuse_session *se) +@@ -2591,6 +2595,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + } + se->fd = -1; + se->vu_listen_fd = -1; ++ se->thread_pool_size = THREAD_POOL_SIZE; + se->conn.max_write = UINT_MAX; + se->conn.max_readahead = UINT_MAX; + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 0dcf2ef..9f65823 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -572,10 +572,11 @@ static void *fv_queue_thread(void *opaque) + struct fv_QueueInfo *qi = opaque; + struct VuDev *dev = &qi->virtio_dev->dev; + struct VuVirtq *q = vu_get_queue(dev, qi->qidx); ++ struct fuse_session *se = qi->virtio_dev->se; + GThreadPool *pool; + +- pool = g_thread_pool_new(fv_queue_worker, qi, 1 /* TODO max_threads */, +- TRUE, NULL); ++ pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size, TRUE, ++ NULL); + if (!pool) { + fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__); + return NULL; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-add-vhost-user.json-file.patch b/SOURCES/kvm-virtiofsd-add-vhost-user.json-file.patch new file mode 100644 index 0000000..a24b24f --- /dev/null +++ b/SOURCES/kvm-virtiofsd-add-vhost-user.json-file.patch @@ -0,0 +1,73 @@ +From 77eb3258e76a1ac240503572d4f41d45cb832ba2 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:09 +0100 +Subject: [PATCH 038/116] virtiofsd: add vhost-user.json file +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-35-dgilbert@redhat.com> +Patchwork-id: 93490 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 034/112] virtiofsd: add vhost-user.json file +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Install a vhost-user.json file describing virtiofsd. This allows +libvirt and other management tools to enumerate vhost-user backend +programs. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 315616ed50ba15a5d7236ade8a402a93898202de) +Signed-off-by: Miroslav Rezanina +--- + .gitignore | 1 + + Makefile | 1 + + tools/virtiofsd/50-qemu-virtiofsd.json.in | 5 +++++ + 3 files changed, 7 insertions(+) + create mode 100644 tools/virtiofsd/50-qemu-virtiofsd.json.in + +diff --git a/.gitignore b/.gitignore +index aefad32..d7a4f99 100644 +--- a/.gitignore ++++ b/.gitignore +@@ -6,6 +6,7 @@ + /config-target.* + /config.status + /config-temp ++/tools/virtiofsd/50-qemu-virtiofsd.json + /elf2dmp + /trace-events-all + /trace/generated-events.h +diff --git a/Makefile b/Makefile +index 1526775..0e9755d 100644 +--- a/Makefile ++++ b/Makefile +@@ -332,6 +332,7 @@ endif + + ifdef CONFIG_LINUX + HELPERS-y += virtiofsd$(EXESUF) ++vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json + endif + + # Sphinx does not allow building manuals into the same directory as +diff --git a/tools/virtiofsd/50-qemu-virtiofsd.json.in b/tools/virtiofsd/50-qemu-virtiofsd.json.in +new file mode 100644 +index 0000000..9bcd86f +--- /dev/null ++++ b/tools/virtiofsd/50-qemu-virtiofsd.json.in +@@ -0,0 +1,5 @@ ++{ ++ "description": "QEMU virtiofsd vhost-user-fs", ++ "type": "fs", ++ "binary": "@libexecdir@/virtiofsd" ++} +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-cap-ng-helpers.patch b/SOURCES/kvm-virtiofsd-cap-ng-helpers.patch new file mode 100644 index 0000000..305745d --- /dev/null +++ b/SOURCES/kvm-virtiofsd-cap-ng-helpers.patch @@ -0,0 +1,175 @@ +From f62613d8058bcb60b26727d980a37537103b0033 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:32 +0100 +Subject: [PATCH 061/116] virtiofsd: cap-ng helpers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-58-dgilbert@redhat.com> +Patchwork-id: 93512 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 057/112] virtiofsd: cap-ng helpers +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +libcap-ng reads /proc during capng_get_caps_process, and virtiofsd's +sandboxing doesn't have /proc mounted; thus we have to do the +caps read before we sandbox it and save/restore the state. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 2405f3c0d19eb4d516a88aa4e5c54e5f9c6bbea3) +Signed-off-by: Miroslav Rezanina +--- + Makefile | 4 +-- + tools/virtiofsd/passthrough_ll.c | 72 ++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 74 insertions(+), 2 deletions(-) + +diff --git a/Makefile b/Makefile +index 6879a06..ff05c30 100644 +--- a/Makefile ++++ b/Makefile +@@ -330,7 +330,7 @@ endif + endif + endif + +-ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) ++ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy) + HELPERS-y += virtiofsd$(EXESUF) + vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json + endif +@@ -682,7 +682,7 @@ rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS) + $(call LINK, $^) + + # relies on Linux-specific syscalls +-ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) ++ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy) + virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS) + $(call LINK, $^) + endif +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index bd8925b..97e7c75 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -39,6 +39,7 @@ + #include "fuse_virtio.h" + #include "fuse_lowlevel.h" + #include ++#include + #include + #include + #include +@@ -139,6 +140,13 @@ static const struct fuse_opt lo_opts[] = { + + static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); + ++static struct { ++ pthread_mutex_t mutex; ++ void *saved; ++} cap; ++/* That we loaded cap-ng in the current thread from the saved */ ++static __thread bool cap_loaded = 0; ++ + static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st); + + static int is_dot_or_dotdot(const char *name) +@@ -162,6 +170,37 @@ static struct lo_data *lo_data(fuse_req_t req) + return (struct lo_data *)fuse_req_userdata(req); + } + ++/* ++ * Load capng's state from our saved state if the current thread ++ * hadn't previously been loaded. ++ * returns 0 on success ++ */ ++static int load_capng(void) ++{ ++ if (!cap_loaded) { ++ pthread_mutex_lock(&cap.mutex); ++ capng_restore_state(&cap.saved); ++ /* ++ * restore_state free's the saved copy ++ * so make another. ++ */ ++ cap.saved = capng_save_state(); ++ if (!cap.saved) { ++ fuse_log(FUSE_LOG_ERR, "capng_save_state (thread)\n"); ++ return -EINVAL; ++ } ++ pthread_mutex_unlock(&cap.mutex); ++ ++ /* ++ * We want to use the loaded state for our pid, ++ * not the original ++ */ ++ capng_setpid(syscall(SYS_gettid)); ++ cap_loaded = true; ++ } ++ return 0; ++} ++ + static void lo_map_init(struct lo_map *map) + { + map->elems = NULL; +@@ -2024,6 +2063,35 @@ static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) + } + + /* ++ * Capture the capability state, we'll need to restore this for individual ++ * threads later; see load_capng. ++ */ ++static void setup_capng(void) ++{ ++ /* Note this accesses /proc so has to happen before the sandbox */ ++ if (capng_get_caps_process()) { ++ fuse_log(FUSE_LOG_ERR, "capng_get_caps_process\n"); ++ exit(1); ++ } ++ pthread_mutex_init(&cap.mutex, NULL); ++ pthread_mutex_lock(&cap.mutex); ++ cap.saved = capng_save_state(); ++ if (!cap.saved) { ++ fuse_log(FUSE_LOG_ERR, "capng_save_state\n"); ++ exit(1); ++ } ++ pthread_mutex_unlock(&cap.mutex); ++} ++ ++static void cleanup_capng(void) ++{ ++ free(cap.saved); ++ cap.saved = NULL; ++ pthread_mutex_destroy(&cap.mutex); ++} ++ ++ ++/* + * Make the source directory our root so symlinks cannot escape and no other + * files are accessible. Assumes unshare(CLONE_NEWNS) was already called. + */ +@@ -2216,12 +2284,16 @@ int main(int argc, char *argv[]) + + fuse_daemonize(opts.foreground); + ++ /* Must be before sandbox since it wants /proc */ ++ setup_capng(); ++ + setup_sandbox(&lo, se); + + /* Block until ctrl+c or fusermount -u */ + ret = virtio_loop(se); + + fuse_session_unmount(se); ++ cleanup_capng(); + err_out3: + fuse_remove_signal_handlers(se); + err_out2: +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch b/SOURCES/kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch new file mode 100644 index 0000000..caa4560 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch @@ -0,0 +1,1111 @@ +From d6a0067e6c08523a8f605f775be980eaf0a23690 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:23 +0100 +Subject: [PATCH 052/116] virtiofsd: check input buffer size in fuse_lowlevel.c + ops +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-49-dgilbert@redhat.com> +Patchwork-id: 93503 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 048/112] virtiofsd: check input buffer size in fuse_lowlevel.c ops +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Each FUSE operation involves parsing the input buffer. Currently the +code assumes the input buffer is large enough for the expected +arguments. This patch uses fuse_mbuf_iter to check the size. + +Most operations are simple to convert. Some are more complicated due to +variable-length inputs or different sizes depending on the protocol +version. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 70995754416eb4491c31607fe380a83cfd25a087) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 581 +++++++++++++++++++++++++++++++--------- + 1 file changed, 456 insertions(+), 125 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 611e8b0..02e1d83 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -27,7 +28,6 @@ + #include + + +-#define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) + #define OFFSET_MAX 0x7fffffffffffffffLL + + struct fuse_pollhandle { +@@ -706,9 +706,14 @@ int fuse_reply_lseek(fuse_req_t req, off_t off) + return send_reply_ok(req, &arg, sizeof(arg)); + } + +-static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- char *name = (char *)inarg; ++ const char *name = fuse_mbuf_iter_advance_str(iter); ++ if (!name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.lookup) { + req->se->op.lookup(req, nodeid, name); +@@ -717,9 +722,16 @@ static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_forget(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_forget_in *arg = (struct fuse_forget_in *)inarg; ++ struct fuse_forget_in *arg; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.forget) { + req->se->op.forget(req, nodeid, arg->nlookup); +@@ -729,20 +741,48 @@ static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + + static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, +- const void *inarg) ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_batch_forget_in *arg = (void *)inarg; +- struct fuse_forget_one *param = (void *)PARAM(arg); +- unsigned int i; ++ struct fuse_batch_forget_in *arg; ++ struct fuse_forget_data *forgets; ++ size_t scount; + + (void)nodeid; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_none(req); ++ return; ++ } ++ ++ /* ++ * Prevent integer overflow. The compiler emits the following warning ++ * unless we use the scount local variable: ++ * ++ * error: comparison is always false due to limited range of data type ++ * [-Werror=type-limits] ++ * ++ * This may be true on 64-bit hosts but we need this check for 32-bit ++ * hosts. ++ */ ++ scount = arg->count; ++ if (scount > SIZE_MAX / sizeof(forgets[0])) { ++ fuse_reply_none(req); ++ return; ++ } ++ ++ forgets = fuse_mbuf_iter_advance(iter, arg->count * sizeof(forgets[0])); ++ if (!forgets) { ++ fuse_reply_none(req); ++ return; ++ } ++ + if (req->se->op.forget_multi) { +- req->se->op.forget_multi(req, arg->count, +- (struct fuse_forget_data *)param); ++ req->se->op.forget_multi(req, arg->count, forgets); + } else if (req->se->op.forget) { ++ unsigned int i; ++ + for (i = 0; i < arg->count; i++) { +- struct fuse_forget_one *forget = ¶m[i]; + struct fuse_req *dummy_req; + + dummy_req = fuse_ll_alloc_req(req->se); +@@ -754,7 +794,7 @@ static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, + dummy_req->ctx = req->ctx; + dummy_req->ch = NULL; + +- req->se->op.forget(dummy_req, forget->nodeid, forget->nlookup); ++ req->se->op.forget(dummy_req, forgets[i].ino, forgets[i].nlookup); + } + fuse_reply_none(req); + } else { +@@ -762,12 +802,19 @@ static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, + } + } + +-static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { + struct fuse_file_info *fip = NULL; + struct fuse_file_info fi; + +- struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; ++ struct fuse_getattr_in *arg; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (arg->getattr_flags & FUSE_GETATTR_FH) { + memset(&fi, 0, sizeof(fi)); +@@ -782,14 +829,21 @@ static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_setattr_in *arg = (struct fuse_setattr_in *)inarg; +- + if (req->se->op.setattr) { ++ struct fuse_setattr_in *arg; + struct fuse_file_info *fi = NULL; + struct fuse_file_info fi_store; + struct stat stbuf; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&stbuf, 0, sizeof(stbuf)); + convert_attr(arg, &stbuf); + if (arg->valid & FATTR_FH) { +@@ -810,9 +864,16 @@ static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_access(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_access_in *arg = (struct fuse_access_in *)inarg; ++ struct fuse_access_in *arg; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.access) { + req->se->op.access(req, nodeid, arg->mask); +@@ -821,9 +882,10 @@ static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- (void)inarg; ++ (void)iter; + + if (req->se->op.readlink) { + req->se->op.readlink(req, nodeid); +@@ -832,10 +894,18 @@ static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_mknod_in *arg = (struct fuse_mknod_in *)inarg; +- char *name = PARAM(arg); ++ struct fuse_mknod_in *arg; ++ const char *name; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ name = fuse_mbuf_iter_advance_str(iter); ++ if (!arg || !name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + req->ctx.umask = arg->umask; + +@@ -846,22 +916,37 @@ static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *)inarg; ++ struct fuse_mkdir_in *arg; ++ const char *name; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ name = fuse_mbuf_iter_advance_str(iter); ++ if (!arg || !name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + req->ctx.umask = arg->umask; + + if (req->se->op.mkdir) { +- req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); ++ req->se->op.mkdir(req, nodeid, name, arg->mode); + } else { + fuse_reply_err(req, ENOSYS); + } + } + +-static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- char *name = (char *)inarg; ++ const char *name = fuse_mbuf_iter_advance_str(iter); ++ ++ if (!name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.unlink) { + req->se->op.unlink(req, nodeid, name); +@@ -870,9 +955,15 @@ static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- char *name = (char *)inarg; ++ const char *name = fuse_mbuf_iter_advance_str(iter); ++ ++ if (!name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.rmdir) { + req->se->op.rmdir(req, nodeid, name); +@@ -881,10 +972,16 @@ static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- char *name = (char *)inarg; +- char *linkname = ((char *)inarg) + strlen((char *)inarg) + 1; ++ const char *name = fuse_mbuf_iter_advance_str(iter); ++ const char *linkname = fuse_mbuf_iter_advance_str(iter); ++ ++ if (!name || !linkname) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.symlink) { + req->se->op.symlink(req, linkname, nodeid, name); +@@ -893,11 +990,20 @@ static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_rename(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_rename_in *arg = (struct fuse_rename_in *)inarg; +- char *oldname = PARAM(arg); +- char *newname = oldname + strlen(oldname) + 1; ++ struct fuse_rename_in *arg; ++ const char *oldname; ++ const char *newname; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ oldname = fuse_mbuf_iter_advance_str(iter); ++ newname = fuse_mbuf_iter_advance_str(iter); ++ if (!arg || !oldname || !newname) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.rename) { + req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, 0); +@@ -906,11 +1012,20 @@ static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_rename2_in *arg = (struct fuse_rename2_in *)inarg; +- char *oldname = PARAM(arg); +- char *newname = oldname + strlen(oldname) + 1; ++ struct fuse_rename2_in *arg; ++ const char *oldname; ++ const char *newname; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ oldname = fuse_mbuf_iter_advance_str(iter); ++ newname = fuse_mbuf_iter_advance_str(iter); ++ if (!arg || !oldname || !newname) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.rename) { + req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, +@@ -920,24 +1035,38 @@ static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_link(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_link(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_link_in *arg = (struct fuse_link_in *)inarg; ++ struct fuse_link_in *arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ const char *name = fuse_mbuf_iter_advance_str(iter); ++ ++ if (!arg || !name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.link) { +- req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); ++ req->se->op.link(req, arg->oldnodeid, nodeid, name); + } else { + fuse_reply_err(req, ENOSYS); + } + } + +-static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_create(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_create_in *arg = (struct fuse_create_in *)inarg; +- + if (req->se->op.create) { ++ struct fuse_create_in *arg; + struct fuse_file_info fi; +- char *name = PARAM(arg); ++ const char *name; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ name = fuse_mbuf_iter_advance_str(iter); ++ if (!arg || !name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + memset(&fi, 0, sizeof(fi)); + fi.flags = arg->flags; +@@ -950,11 +1079,18 @@ static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_open(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_open_in *arg = (struct fuse_open_in *)inarg; ++ struct fuse_open_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.flags = arg->flags; + +@@ -965,13 +1101,15 @@ static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_read(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_read_in *arg = (struct fuse_read_in *)inarg; +- + if (req->se->op.read) { ++ struct fuse_read_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fi.lock_owner = arg->lock_owner; +@@ -982,11 +1120,24 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_write(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_write_in *arg = (struct fuse_write_in *)inarg; ++ struct fuse_write_in *arg; + struct fuse_file_info fi; +- char *param; ++ const char *param; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ ++ param = fuse_mbuf_iter_advance(iter, arg->size); ++ if (!param) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; +@@ -994,7 +1145,6 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + + fi.lock_owner = arg->lock_owner; + fi.flags = arg->flags; +- param = PARAM(arg); + + if (req->se->op.write) { + req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi); +@@ -1052,11 +1202,18 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, + se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi); + } + +-static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_flush(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_flush_in *arg = (struct fuse_flush_in *)inarg; ++ struct fuse_flush_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fi.flush = 1; +@@ -1069,19 +1226,26 @@ static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_release(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_release_in *arg = (struct fuse_release_in *)inarg; ++ struct fuse_release_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.flags = arg->flags; + fi.fh = arg->fh; + fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; + fi.lock_owner = arg->lock_owner; ++ + if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { + fi.flock_release = 1; +- fi.lock_owner = arg->lock_owner; + } + + if (req->se->op.release) { +@@ -1091,11 +1255,19 @@ static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; ++ struct fuse_fsync_in *arg; + struct fuse_file_info fi; +- int datasync = arg->fsync_flags & 1; ++ int datasync; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ datasync = arg->fsync_flags & 1; + + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; +@@ -1111,11 +1283,18 @@ static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_open_in *arg = (struct fuse_open_in *)inarg; ++ struct fuse_open_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.flags = arg->flags; + +@@ -1126,11 +1305,18 @@ static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_read_in *arg = (struct fuse_read_in *)inarg; ++ struct fuse_read_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + +@@ -1141,11 +1327,18 @@ static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_read_in *arg = (struct fuse_read_in *)inarg; ++ struct fuse_read_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + +@@ -1156,11 +1349,18 @@ static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_release_in *arg = (struct fuse_release_in *)inarg; ++ struct fuse_release_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.flags = arg->flags; + fi.fh = arg->fh; +@@ -1172,11 +1372,19 @@ static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; ++ struct fuse_fsync_in *arg; + struct fuse_file_info fi; +- int datasync = arg->fsync_flags & 1; ++ int datasync; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ datasync = arg->fsync_flags & 1; + + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; +@@ -1188,10 +1396,11 @@ static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { + (void)nodeid; +- (void)inarg; ++ (void)iter; + + if (req->se->op.statfs) { + req->se->op.statfs(req, nodeid); +@@ -1204,11 +1413,25 @@ static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *)inarg; +- char *name = PARAM(arg); +- char *value = name + strlen(name) + 1; ++ struct fuse_setxattr_in *arg; ++ const char *name; ++ const char *value; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ name = fuse_mbuf_iter_advance_str(iter); ++ if (!arg || !name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ ++ value = fuse_mbuf_iter_advance(iter, arg->size); ++ if (!value) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.setxattr) { + req->se->op.setxattr(req, nodeid, name, value, arg->size, arg->flags); +@@ -1217,20 +1440,36 @@ static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; ++ struct fuse_getxattr_in *arg; ++ const char *name; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ name = fuse_mbuf_iter_advance_str(iter); ++ if (!arg || !name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.getxattr) { +- req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); ++ req->se->op.getxattr(req, nodeid, name, arg->size); + } else { + fuse_reply_err(req, ENOSYS); + } + } + +-static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; ++ struct fuse_getxattr_in *arg; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.listxattr) { + req->se->op.listxattr(req, nodeid, arg->size); +@@ -1239,9 +1478,15 @@ static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- char *name = (char *)inarg; ++ const char *name = fuse_mbuf_iter_advance_str(iter); ++ ++ if (!name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.removexattr) { + req->se->op.removexattr(req, nodeid, name); +@@ -1265,12 +1510,19 @@ static void convert_fuse_file_lock(struct fuse_file_lock *fl, + flock->l_pid = fl->pid; + } + +-static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; ++ struct fuse_lk_in *arg; + struct fuse_file_info fi; + struct flock flock; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fi.lock_owner = arg->owner; +@@ -1284,12 +1536,18 @@ static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + + static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, +- const void *inarg, int sleep) ++ struct fuse_mbuf_iter *iter, int sleep) + { +- struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; ++ struct fuse_lk_in *arg; + struct fuse_file_info fi; + struct flock flock; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fi.lock_owner = arg->owner; +@@ -1327,14 +1585,16 @@ static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, + } + } + +-static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- do_setlk_common(req, nodeid, inarg, 0); ++ do_setlk_common(req, nodeid, iter, 0); + } + +-static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- do_setlk_common(req, nodeid, inarg, 1); ++ do_setlk_common(req, nodeid, iter, 1); + } + + static int find_interrupted(struct fuse_session *se, struct fuse_req *req) +@@ -1379,12 +1639,20 @@ static int find_interrupted(struct fuse_session *se, struct fuse_req *req) + return 0; + } + +-static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *)inarg; ++ struct fuse_interrupt_in *arg; + struct fuse_session *se = req->se; + + (void)nodeid; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + if (se->debug) { + fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", + (unsigned long long)arg->unique); +@@ -1425,9 +1693,15 @@ static struct fuse_req *check_interrupt(struct fuse_session *se, + } + } + +-static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_bmap_in *arg = (struct fuse_bmap_in *)inarg; ++ struct fuse_bmap_in *arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.bmap) { + req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); +@@ -1436,18 +1710,34 @@ static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *)inarg; +- unsigned int flags = arg->flags; +- void *in_buf = arg->in_size ? PARAM(arg) : NULL; ++ struct fuse_ioctl_in *arg; ++ unsigned int flags; ++ void *in_buf = NULL; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ ++ flags = arg->flags; + if (flags & FUSE_IOCTL_DIR && !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { + fuse_reply_err(req, ENOTTY); + return; + } + ++ if (arg->in_size) { ++ in_buf = fuse_mbuf_iter_advance(iter, arg->in_size); ++ if (!in_buf) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + +@@ -1468,11 +1758,18 @@ void fuse_pollhandle_destroy(struct fuse_pollhandle *ph) + free(ph); + } + +-static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_poll(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_poll_in *arg = (struct fuse_poll_in *)inarg; ++ struct fuse_poll_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fi.poll_events = arg->events; +@@ -1496,11 +1793,18 @@ static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *)inarg; ++ struct fuse_fallocate_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + +@@ -1513,12 +1817,17 @@ static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + + static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, +- const void *inarg) ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_copy_file_range_in *arg = +- (struct fuse_copy_file_range_in *)inarg; ++ struct fuse_copy_file_range_in *arg; + struct fuse_file_info fi_in, fi_out; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi_in, 0, sizeof(fi_in)); + fi_in.fh = arg->fh_in; + +@@ -1535,11 +1844,17 @@ static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, + } + } + +-static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_lseek_in *arg = (struct fuse_lseek_in *)inarg; ++ struct fuse_lseek_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + +@@ -1550,15 +1865,33 @@ static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_init(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_init_in *arg = (struct fuse_init_in *)inarg; ++ size_t compat_size = offsetof(struct fuse_init_in, max_readahead); ++ struct fuse_init_in *arg; + struct fuse_init_out outarg; + struct fuse_session *se = req->se; + size_t bufsize = se->bufsize; + size_t outargsize = sizeof(outarg); + + (void)nodeid; ++ ++ /* First consume the old fields... */ ++ arg = fuse_mbuf_iter_advance(iter, compat_size); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ ++ /* ...and now consume the new fields. */ ++ if (arg->major == 7 && arg->minor >= 6) { ++ if (!fuse_mbuf_iter_advance(iter, sizeof(*arg) - compat_size)) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ } ++ + if (se->debug) { + fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); + if (arg->major == 7 && arg->minor >= 6) { +@@ -1791,12 +2124,13 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + send_reply_ok(req, &outarg, outargsize); + } + +-static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { + struct fuse_session *se = req->se; + + (void)nodeid; +- (void)inarg; ++ (void)iter; + + se->got_destroy = 1; + if (se->op.destroy) { +@@ -1976,7 +2310,7 @@ int fuse_req_interrupted(fuse_req_t req) + } + + static struct { +- void (*func)(fuse_req_t, fuse_ino_t, const void *); ++ void (*func)(fuse_req_t, fuse_ino_t, struct fuse_mbuf_iter *); + const char *name; + } fuse_ll_ops[] = { + [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, +@@ -2060,7 +2394,6 @@ void fuse_session_process_buf_int(struct fuse_session *se, + const struct fuse_buf *buf = bufv->buf; + struct fuse_mbuf_iter iter = FUSE_MBUF_ITER_INIT(buf); + struct fuse_in_header *in; +- const void *inarg; + struct fuse_req *req; + int err; + +@@ -2138,13 +2471,11 @@ void fuse_session_process_buf_int(struct fuse_session *se, + } + } + +- inarg = (void *)&in[1]; + if (in->opcode == FUSE_WRITE && se->op.write_buf) { + do_write_buf(req, in->nodeid, &iter, bufv); + } else { +- fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); ++ fuse_ll_ops[in->opcode].func(req, in->nodeid, &iter); + } +- + return; + + reply_err: +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-cleanup-allocated-resource-in-se.patch b/SOURCES/kvm-virtiofsd-cleanup-allocated-resource-in-se.patch new file mode 100644 index 0000000..b6de0a9 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-cleanup-allocated-resource-in-se.patch @@ -0,0 +1,82 @@ +From 99ff67682ef7c5659bdc9836008541861ae313d5 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:56 +0100 +Subject: [PATCH 085/116] virtiofsd: cleanup allocated resource in se +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-82-dgilbert@redhat.com> +Patchwork-id: 93533 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 081/112] virtiofsd: cleanup allocated resource in se +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Liu Bo + +This cleans up unfreed resources in se on quiting, including +se->virtio_dev, se->vu_socket_path, se->vu_socketfd. + +Signed-off-by: Liu Bo +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 61cfc44982e566c33b9d5df17858e4d5ae373873) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 7 +++++++ + tools/virtiofsd/fuse_virtio.c | 7 +++++++ + tools/virtiofsd/fuse_virtio.h | 2 +- + 3 files changed, 15 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 65f91da..440508a 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2532,6 +2532,13 @@ void fuse_session_destroy(struct fuse_session *se) + if (se->fd != -1) { + close(se->fd); + } ++ ++ if (se->vu_socket_path) { ++ virtio_session_close(se); ++ free(se->vu_socket_path); ++ se->vu_socket_path = NULL; ++ } ++ + free(se); + } + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 7a8774a..e7bd772 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -833,3 +833,10 @@ int virtio_session_mount(struct fuse_session *se) + + return 0; + } ++ ++void virtio_session_close(struct fuse_session *se) ++{ ++ close(se->vu_socketfd); ++ free(se->virtio_dev); ++ se->virtio_dev = NULL; ++} +diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h +index cc676b9..1116840 100644 +--- a/tools/virtiofsd/fuse_virtio.h ++++ b/tools/virtiofsd/fuse_virtio.h +@@ -19,7 +19,7 @@ + struct fuse_session; + + int virtio_session_mount(struct fuse_session *se); +- ++void virtio_session_close(struct fuse_session *se); + int virtio_loop(struct fuse_session *se); + + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch b/SOURCES/kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch new file mode 100644 index 0000000..d01b000 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch @@ -0,0 +1,99 @@ +From e00543b0384fba61a9c7274c73e11a25e7ab2946 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:13 +0100 +Subject: [PATCH 102/116] virtiofsd: convert more fprintf and perror to use + fuse log infra +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-99-dgilbert@redhat.com> +Patchwork-id: 93552 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 098/112] virtiofsd: convert more fprintf and perror to use fuse log infra +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Eryu Guan + +Signed-off-by: Eryu Guan +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Misono Tomohiro +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit fc1aed0bf96259d0b46b1cfea7497b7762c4ee3d) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_signals.c | 7 +++++-- + tools/virtiofsd/helper.c | 9 ++++++--- + 2 files changed, 11 insertions(+), 5 deletions(-) + +diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c +index dc7c8ac..f18625b 100644 +--- a/tools/virtiofsd/fuse_signals.c ++++ b/tools/virtiofsd/fuse_signals.c +@@ -12,6 +12,7 @@ + #include "fuse_i.h" + #include "fuse_lowlevel.h" + ++#include + #include + #include + #include +@@ -47,13 +48,15 @@ static int set_one_signal_handler(int sig, void (*handler)(int), int remove) + sa.sa_flags = 0; + + if (sigaction(sig, NULL, &old_sa) == -1) { +- perror("fuse: cannot get old signal handler"); ++ fuse_log(FUSE_LOG_ERR, "fuse: cannot get old signal handler: %s\n", ++ strerror(errno)); + return -1; + } + + if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && + sigaction(sig, &sa, NULL) == -1) { +- perror("fuse: cannot set signal handler"); ++ fuse_log(FUSE_LOG_ERR, "fuse: cannot set signal handler: %s\n", ++ strerror(errno)); + return -1; + } + return 0; +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 33749bf..f98d8f2 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -208,7 +208,8 @@ int fuse_daemonize(int foreground) + char completed; + + if (pipe(waiter)) { +- perror("fuse_daemonize: pipe"); ++ fuse_log(FUSE_LOG_ERR, "fuse_daemonize: pipe: %s\n", ++ strerror(errno)); + return -1; + } + +@@ -218,7 +219,8 @@ int fuse_daemonize(int foreground) + */ + switch (fork()) { + case -1: +- perror("fuse_daemonize: fork"); ++ fuse_log(FUSE_LOG_ERR, "fuse_daemonize: fork: %s\n", ++ strerror(errno)); + return -1; + case 0: + break; +@@ -228,7 +230,8 @@ int fuse_daemonize(int foreground) + } + + if (setsid() == -1) { +- perror("fuse_daemonize: setsid"); ++ fuse_log(FUSE_LOG_ERR, "fuse_daemonize: setsid: %s\n", ++ strerror(errno)); + return -1; + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch b/SOURCES/kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch new file mode 100644 index 0000000..8c1022a --- /dev/null +++ b/SOURCES/kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch @@ -0,0 +1,57 @@ +From 8e6473e906dfc7d2a62abaf1ec80ff461e4d201d Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:12 +0100 +Subject: [PATCH 101/116] virtiofsd: do not always set FUSE_FLOCK_LOCKS +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-98-dgilbert@redhat.com> +Patchwork-id: 93551 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 097/112] virtiofsd: do not always set FUSE_FLOCK_LOCKS +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Peng Tao + +Right now we always enable it regardless of given commandlines. +Fix it by setting the flag relying on the lo->flock bit. + +Signed-off-by: Peng Tao +Reviewed-by: Misono Tomohiro +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit e468d4af5f5192ab33283464a9f6933044ce47f7) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index ab16135..ccbbec1 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -546,9 +546,14 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) + fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); + conn->want |= FUSE_CAP_WRITEBACK_CACHE; + } +- if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { +- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); +- conn->want |= FUSE_CAP_FLOCK_LOCKS; ++ if (conn->capable & FUSE_CAP_FLOCK_LOCKS) { ++ if (lo->flock) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); ++ conn->want |= FUSE_CAP_FLOCK_LOCKS; ++ } else { ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling flock locks\n"); ++ conn->want &= ~FUSE_CAP_FLOCK_LOCKS; ++ } + } + + if (conn->capable & FUSE_CAP_POSIX_LOCKS) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-do_read-missing-NULL-check.patch b/SOURCES/kvm-virtiofsd-do_read-missing-NULL-check.patch new file mode 100644 index 0000000..4f8e5ef --- /dev/null +++ b/SOURCES/kvm-virtiofsd-do_read-missing-NULL-check.patch @@ -0,0 +1,49 @@ +From 901c005299b0316bbca7bc190de56f6c7a2a9880 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 3 Mar 2020 18:43:11 +0000 +Subject: [PATCH 15/18] virtiofsd: do_read missing NULL check +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200303184314.155564-5-dgilbert@redhat.com> +Patchwork-id: 94127 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/7] virtiofsd: do_read missing NULL check +Bugzilla: 1797064 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Ján Tomko + +From: "Dr. David Alan Gilbert" + +Missing a NULL check if the argument fetch fails. + +Fixes: Coverity CID 1413119 +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Stefan Hajnoczi +(cherry picked from commit 99ce9a7e60fd12b213b985343ff8fcc172de59fd) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/fuse_lowlevel.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 01c418a..704c036 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1116,6 +1116,10 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, + struct fuse_file_info fi; + + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch b/SOURCES/kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch new file mode 100644 index 0000000..3279a5e --- /dev/null +++ b/SOURCES/kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch @@ -0,0 +1,47 @@ +From bc127914b29f2e4163bc7ca786e04ed955d96016 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:00 +0100 +Subject: [PATCH 089/116] virtiofsd: enable PARALLEL_DIROPS during INIT +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-86-dgilbert@redhat.com> +Patchwork-id: 93539 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 085/112] virtiofsd: enable PARALLEL_DIROPS during INIT +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Liu Bo + +lookup is a RO operations, PARALLEL_DIROPS can be enabled. + +Signed-off-by: Liu Bo +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit b7ed733a3841c4d489d3bd6ca7ed23c84db119c2) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index aac282f..70568d2 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2062,6 +2062,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, + if (se->conn.want & FUSE_CAP_ASYNC_READ) { + outarg.flags |= FUSE_ASYNC_READ; + } ++ if (se->conn.want & FUSE_CAP_PARALLEL_DIROPS) { ++ outarg.flags |= FUSE_PARALLEL_DIROPS; ++ } + if (se->conn.want & FUSE_CAP_POSIX_LOCKS) { + outarg.flags |= FUSE_POSIX_LOCKS; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch b/SOURCES/kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch new file mode 100644 index 0000000..96f91a1 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch @@ -0,0 +1,111 @@ +From 983b383bc4a92a9f7ecff0332cadefed2f58f502 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:50 +0100 +Subject: [PATCH 079/116] virtiofsd: extract root inode init into setup_root() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-76-dgilbert@redhat.com> +Patchwork-id: 93527 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 075/112] virtiofsd: extract root inode init into setup_root() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +Inititialize the root inode in a single place. + +Signed-off-by: Miklos Szeredi +Signed-off-by: Stefan Hajnoczi +dgilbert: +with fix suggested by Misono Tomohiro +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 3ca8a2b1c83eb185c232a4e87abbb65495263756) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 35 +++++++++++++++++++++++++---------- + 1 file changed, 25 insertions(+), 10 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 33bfb4d..9e7191e 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2351,6 +2351,30 @@ static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) + } + } + ++static void setup_root(struct lo_data *lo, struct lo_inode *root) ++{ ++ int fd, res; ++ struct stat stat; ++ ++ fd = open("/", O_PATH); ++ if (fd == -1) { ++ fuse_log(FUSE_LOG_ERR, "open(%s, O_PATH): %m\n", lo->source); ++ exit(1); ++ } ++ ++ res = fstatat(fd, "", &stat, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) { ++ fuse_log(FUSE_LOG_ERR, "fstatat(%s): %m\n", lo->source); ++ exit(1); ++ } ++ ++ root->is_symlink = false; ++ root->fd = fd; ++ root->ino = stat.st_ino; ++ root->dev = stat.st_dev; ++ root->refcount = 2; ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +@@ -2426,8 +2450,6 @@ int main(int argc, char *argv[]) + if (lo.debug) { + current_log_level = FUSE_LOG_DEBUG; + } +- lo.root.refcount = 2; +- + if (lo.source) { + struct stat stat; + int res; +@@ -2446,7 +2468,6 @@ int main(int argc, char *argv[]) + } else { + lo.source = "/"; + } +- lo.root.is_symlink = false; + if (!lo.timeout_set) { + switch (lo.cache) { + case CACHE_NEVER: +@@ -2466,13 +2487,6 @@ int main(int argc, char *argv[]) + exit(1); + } + +- lo.root.fd = open(lo.source, O_PATH); +- +- if (lo.root.fd == -1) { +- fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source); +- exit(1); +- } +- + se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); + if (se == NULL) { + goto err_out1; +@@ -2495,6 +2509,7 @@ int main(int argc, char *argv[]) + + setup_sandbox(&lo, se, opts.syslog); + ++ setup_root(&lo, &lo.root); + /* Block until ctrl+c or fusermount -u */ + ret = virtio_loop(se); + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch b/SOURCES/kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch new file mode 100644 index 0000000..4860bec --- /dev/null +++ b/SOURCES/kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch @@ -0,0 +1,85 @@ +From b3cd18ab58e331d3610cf00f857d6a945f11a030 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:49 +0100 +Subject: [PATCH 078/116] virtiofsd: fail when parent inode isn't known in + lo_do_lookup() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-75-dgilbert@redhat.com> +Patchwork-id: 93529 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 074/112] virtiofsd: fail when parent inode isn't known in lo_do_lookup() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +The Linux file handle APIs (struct export_operations) can access inodes +that are not attached to parents because path name traversal is not +performed. Refuse if there is no parent in lo_do_lookup(). + +Also clean up lo_do_lookup() while we're here. + +Signed-off-by: Miklos Szeredi +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 9de4fab5995d115f8ebfb41d8d94a866d80a1708) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index de12e75..33bfb4d 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -777,6 +777,15 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + struct lo_data *lo = lo_data(req); + struct lo_inode *inode, *dir = lo_inode(req, parent); + ++ /* ++ * name_to_handle_at() and open_by_handle_at() can reach here with fuse ++ * mount point in guest, but we don't have its inode info in the ++ * ino_map. ++ */ ++ if (!dir) { ++ return ENOENT; ++ } ++ + memset(e, 0, sizeof(*e)); + e->attr_timeout = lo->timeout; + e->entry_timeout = lo->timeout; +@@ -786,7 +795,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + name = "."; + } + +- newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); ++ newfd = openat(dir->fd, name, O_PATH | O_NOFOLLOW); + if (newfd == -1) { + goto out_err; + } +@@ -796,7 +805,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + goto out_err; + } + +- inode = lo_find(lo_data(req), &e->attr); ++ inode = lo_find(lo, &e->attr); + if (inode) { + close(newfd); + newfd = -1; +@@ -812,6 +821,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + inode->is_symlink = S_ISLNK(e->attr.st_mode); + inode->refcount = 1; + inode->fd = newfd; ++ newfd = -1; + inode->ino = e->attr.st_ino; + inode->dev = e->attr.st_dev; + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-fix-error-handling-in-main.patch b/SOURCES/kvm-virtiofsd-fix-error-handling-in-main.patch new file mode 100644 index 0000000..a831992 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-fix-error-handling-in-main.patch @@ -0,0 +1,63 @@ +From 0ea1c7375d6509367399c706eb9d1e8cf79a5830 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:55 +0100 +Subject: [PATCH 084/116] virtiofsd: fix error handling in main() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-81-dgilbert@redhat.com> +Patchwork-id: 93534 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 080/112] virtiofsd: fix error handling in main() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Liu Bo + +Neither fuse_parse_cmdline() nor fuse_opt_parse() goes to the right place +to do cleanup. + +Signed-off-by: Liu Bo +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit c6de804670f2255ce776263124c37f3370dc5ac1) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 9ed77a1..af050c6 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2443,13 +2443,14 @@ int main(int argc, char *argv[]) + lo_map_init(&lo.fd_map); + + if (fuse_parse_cmdline(&args, &opts) != 0) { +- return 1; ++ goto err_out1; + } + fuse_set_log_func(log_func); + use_syslog = opts.syslog; + if (use_syslog) { + openlog("virtiofsd", LOG_PID, LOG_DAEMON); + } ++ + if (opts.show_help) { + printf("usage: %s [options]\n\n", argv[0]); + fuse_cmdline_help(); +@@ -2468,7 +2469,7 @@ int main(int argc, char *argv[]) + } + + if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { +- return 1; ++ goto err_out1; + } + + /* +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch b/SOURCES/kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch new file mode 100644 index 0000000..420a8a6 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch @@ -0,0 +1,44 @@ +From 9c291ca8624318613ede6e4174d08cf45aae8384 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:01 +0100 +Subject: [PATCH 090/116] virtiofsd: fix incorrect error handling in + lo_do_lookup +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-87-dgilbert@redhat.com> +Patchwork-id: 93543 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 086/112] virtiofsd: fix incorrect error handling in lo_do_lookup +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Eric Ren + +Signed-off-by: Eric Ren +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit fc3f0041b43b6c64aa97b3558a6abe1a10028354) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e8dc5c7..05b5f89 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -814,7 +814,6 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + close(newfd); + newfd = -1; + } else { +- saverr = ENOMEM; + inode = calloc(1, sizeof(struct lo_inode)); + if (!inode) { + goto out_err; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-fix-libfuse-information-leaks.patch b/SOURCES/kvm-virtiofsd-fix-libfuse-information-leaks.patch new file mode 100644 index 0000000..90debb0 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-fix-libfuse-information-leaks.patch @@ -0,0 +1,322 @@ +From e0d64e481e5a9fab5ff90d2a8f84afcd3311d13b Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:35 +0100 +Subject: [PATCH 064/116] virtiofsd: fix libfuse information leaks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-61-dgilbert@redhat.com> +Patchwork-id: 93515 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 060/112] virtiofsd: fix libfuse information leaks +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Some FUSE message replies contain padding fields that are not +initialized by libfuse. This is fine in traditional FUSE applications +because the kernel is trusted. virtiofsd does not trust the guest and +must not expose uninitialized memory. + +Use C struct initializers to automatically zero out memory. Not all of +these code changes are strictly necessary but they will prevent future +information leaks if the structs are extended. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 3db2876a0153ac7103c077c53090e020faffb3ea) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 150 ++++++++++++++++++++-------------------- + 1 file changed, 76 insertions(+), 74 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 2d6dc5a..6ceb33d 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -44,21 +44,23 @@ static __attribute__((constructor)) void fuse_ll_init_pagesize(void) + + static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr) + { +- attr->ino = stbuf->st_ino; +- attr->mode = stbuf->st_mode; +- attr->nlink = stbuf->st_nlink; +- attr->uid = stbuf->st_uid; +- attr->gid = stbuf->st_gid; +- attr->rdev = stbuf->st_rdev; +- attr->size = stbuf->st_size; +- attr->blksize = stbuf->st_blksize; +- attr->blocks = stbuf->st_blocks; +- attr->atime = stbuf->st_atime; +- attr->mtime = stbuf->st_mtime; +- attr->ctime = stbuf->st_ctime; +- attr->atimensec = ST_ATIM_NSEC(stbuf); +- attr->mtimensec = ST_MTIM_NSEC(stbuf); +- attr->ctimensec = ST_CTIM_NSEC(stbuf); ++ *attr = (struct fuse_attr){ ++ .ino = stbuf->st_ino, ++ .mode = stbuf->st_mode, ++ .nlink = stbuf->st_nlink, ++ .uid = stbuf->st_uid, ++ .gid = stbuf->st_gid, ++ .rdev = stbuf->st_rdev, ++ .size = stbuf->st_size, ++ .blksize = stbuf->st_blksize, ++ .blocks = stbuf->st_blocks, ++ .atime = stbuf->st_atime, ++ .mtime = stbuf->st_mtime, ++ .ctime = stbuf->st_ctime, ++ .atimensec = ST_ATIM_NSEC(stbuf), ++ .mtimensec = ST_MTIM_NSEC(stbuf), ++ .ctimensec = ST_CTIM_NSEC(stbuf), ++ }; + } + + static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf) +@@ -183,16 +185,16 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, + int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, + int count) + { +- struct fuse_out_header out; ++ struct fuse_out_header out = { ++ .unique = req->unique, ++ .error = error, ++ }; + + if (error <= -1000 || error > 0) { + fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); + error = -ERANGE; + } + +- out.unique = req->unique; +- out.error = error; +- + iov[0].iov_base = &out; + iov[0].iov_len = sizeof(struct fuse_out_header); + +@@ -277,14 +279,16 @@ size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, + static void convert_statfs(const struct statvfs *stbuf, + struct fuse_kstatfs *kstatfs) + { +- kstatfs->bsize = stbuf->f_bsize; +- kstatfs->frsize = stbuf->f_frsize; +- kstatfs->blocks = stbuf->f_blocks; +- kstatfs->bfree = stbuf->f_bfree; +- kstatfs->bavail = stbuf->f_bavail; +- kstatfs->files = stbuf->f_files; +- kstatfs->ffree = stbuf->f_ffree; +- kstatfs->namelen = stbuf->f_namemax; ++ *kstatfs = (struct fuse_kstatfs){ ++ .bsize = stbuf->f_bsize, ++ .frsize = stbuf->f_frsize, ++ .blocks = stbuf->f_blocks, ++ .bfree = stbuf->f_bfree, ++ .bavail = stbuf->f_bavail, ++ .files = stbuf->f_files, ++ .ffree = stbuf->f_ffree, ++ .namelen = stbuf->f_namemax, ++ }; + } + + static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize) +@@ -328,12 +332,14 @@ static unsigned int calc_timeout_nsec(double t) + static void fill_entry(struct fuse_entry_out *arg, + const struct fuse_entry_param *e) + { +- arg->nodeid = e->ino; +- arg->generation = e->generation; +- arg->entry_valid = calc_timeout_sec(e->entry_timeout); +- arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); +- arg->attr_valid = calc_timeout_sec(e->attr_timeout); +- arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); ++ *arg = (struct fuse_entry_out){ ++ .nodeid = e->ino, ++ .generation = e->generation, ++ .entry_valid = calc_timeout_sec(e->entry_timeout), ++ .entry_valid_nsec = calc_timeout_nsec(e->entry_timeout), ++ .attr_valid = calc_timeout_sec(e->attr_timeout), ++ .attr_valid_nsec = calc_timeout_nsec(e->attr_timeout), ++ }; + convert_stat(&e->attr, &arg->attr); + } + +@@ -362,10 +368,12 @@ size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, + fill_entry(&dp->entry_out, e); + + struct fuse_dirent *dirent = &dp->dirent; +- dirent->ino = e->attr.st_ino; +- dirent->off = off; +- dirent->namelen = namelen; +- dirent->type = (e->attr.st_mode & S_IFMT) >> 12; ++ *dirent = (struct fuse_dirent){ ++ .ino = e->attr.st_ino, ++ .off = off, ++ .namelen = namelen, ++ .type = (e->attr.st_mode & S_IFMT) >> 12, ++ }; + memcpy(dirent->name, name, namelen); + memset(dirent->name + namelen, 0, entlen_padded - entlen); + +@@ -496,15 +504,14 @@ static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv) + { + struct iovec iov[2]; +- struct fuse_out_header out; ++ struct fuse_out_header out = { ++ .unique = req->unique, ++ }; + int res; + + iov[0].iov_base = &out; + iov[0].iov_len = sizeof(struct fuse_out_header); + +- out.unique = req->unique; +- out.error = 0; +- + res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv); + if (res <= 0) { + fuse_free_req(req); +@@ -2145,14 +2152,14 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, + static int send_notify_iov(struct fuse_session *se, int notify_code, + struct iovec *iov, int count) + { +- struct fuse_out_header out; ++ struct fuse_out_header out = { ++ .error = notify_code, ++ }; + + if (!se->got_init) { + return -ENOTCONN; + } + +- out.unique = 0; +- out.error = notify_code; + iov[0].iov_base = &out; + iov[0].iov_len = sizeof(struct fuse_out_header); + +@@ -2162,11 +2169,11 @@ static int send_notify_iov(struct fuse_session *se, int notify_code, + int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) + { + if (ph != NULL) { +- struct fuse_notify_poll_wakeup_out outarg; ++ struct fuse_notify_poll_wakeup_out outarg = { ++ .kh = ph->kh, ++ }; + struct iovec iov[2]; + +- outarg.kh = ph->kh; +- + iov[1].iov_base = &outarg; + iov[1].iov_len = sizeof(outarg); + +@@ -2179,17 +2186,17 @@ int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) + int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, + off_t off, off_t len) + { +- struct fuse_notify_inval_inode_out outarg; ++ struct fuse_notify_inval_inode_out outarg = { ++ .ino = ino, ++ .off = off, ++ .len = len, ++ }; + struct iovec iov[2]; + + if (!se) { + return -EINVAL; + } + +- outarg.ino = ino; +- outarg.off = off; +- outarg.len = len; +- + iov[1].iov_base = &outarg; + iov[1].iov_len = sizeof(outarg); + +@@ -2199,17 +2206,16 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, + int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, + const char *name, size_t namelen) + { +- struct fuse_notify_inval_entry_out outarg; ++ struct fuse_notify_inval_entry_out outarg = { ++ .parent = parent, ++ .namelen = namelen, ++ }; + struct iovec iov[3]; + + if (!se) { + return -EINVAL; + } + +- outarg.parent = parent; +- outarg.namelen = namelen; +- outarg.padding = 0; +- + iov[1].iov_base = &outarg; + iov[1].iov_len = sizeof(outarg); + iov[2].iov_base = (void *)name; +@@ -2222,18 +2228,17 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, + fuse_ino_t child, const char *name, + size_t namelen) + { +- struct fuse_notify_delete_out outarg; ++ struct fuse_notify_delete_out outarg = { ++ .parent = parent, ++ .child = child, ++ .namelen = namelen, ++ }; + struct iovec iov[3]; + + if (!se) { + return -EINVAL; + } + +- outarg.parent = parent; +- outarg.child = child; +- outarg.namelen = namelen; +- outarg.padding = 0; +- + iov[1].iov_base = &outarg; + iov[1].iov_len = sizeof(outarg); + iov[2].iov_base = (void *)name; +@@ -2245,24 +2250,21 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, + int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, + off_t offset, struct fuse_bufvec *bufv) + { +- struct fuse_out_header out; +- struct fuse_notify_store_out outarg; ++ struct fuse_out_header out = { ++ .error = FUSE_NOTIFY_STORE, ++ }; ++ struct fuse_notify_store_out outarg = { ++ .nodeid = ino, ++ .offset = offset, ++ .size = fuse_buf_size(bufv), ++ }; + struct iovec iov[3]; +- size_t size = fuse_buf_size(bufv); + int res; + + if (!se) { + return -EINVAL; + } + +- out.unique = 0; +- out.error = FUSE_NOTIFY_STORE; +- +- outarg.nodeid = ino; +- outarg.offset = offset; +- outarg.size = size; +- outarg.padding = 0; +- + iov[0].iov_base = &out; + iov[0].iov_len = sizeof(out); + iov[1].iov_base = &outarg; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch b/SOURCES/kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch new file mode 100644 index 0000000..6243037 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch @@ -0,0 +1,94 @@ +From 9a44d78f5019280b006bb5b3de7164336289d639 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:21 +0100 +Subject: [PATCH 110/116] virtiofsd: fix lo_destroy() resource leaks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-107-dgilbert@redhat.com> +Patchwork-id: 93560 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 106/112] virtiofsd: fix lo_destroy() resource leaks +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Now that lo_destroy() is serialized we can call unref_inode() so that +all inode resources are freed. + +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 28f7a3b026f231bfe8de5fed6a18a8d27b1dfcee) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 41 ++++++++++++++++++++-------------------- + 1 file changed, 20 insertions(+), 21 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 79b8b71..eb001b9 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1371,26 +1371,6 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, + } + } + +-static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) +-{ +- struct lo_inode *inode = value; +- struct lo_data *lo = user_data; +- +- inode->nlookup = 0; +- lo_map_remove(&lo->ino_map, inode->fuse_ino); +- close(inode->fd); +- lo_inode_put(lo, &inode); /* Drop our refcount from lo_do_lookup() */ +- +- return TRUE; +-} +- +-static void unref_all_inodes(struct lo_data *lo) +-{ +- pthread_mutex_lock(&lo->mutex); +- g_hash_table_foreach_remove(lo->inodes, unref_all_inodes_cb, lo); +- pthread_mutex_unlock(&lo->mutex); +-} +- + static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + { + struct lo_data *lo = lo_data(req); +@@ -2477,7 +2457,26 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, + static void lo_destroy(void *userdata) + { + struct lo_data *lo = (struct lo_data *)userdata; +- unref_all_inodes(lo); ++ ++ /* ++ * Normally lo->mutex must be taken when traversing lo->inodes but ++ * lo_destroy() is a serialized request so no races are possible here. ++ * ++ * In addition, we cannot acquire lo->mutex since unref_inode() takes it ++ * too and this would result in a recursive lock. ++ */ ++ while (true) { ++ GHashTableIter iter; ++ gpointer key, value; ++ ++ g_hash_table_iter_init(&iter, lo->inodes); ++ if (!g_hash_table_iter_next(&iter, &key, &value)) { ++ break; ++ } ++ ++ struct lo_inode *inode = value; ++ unref_inode_lolocked(lo, inode, inode->nlookup); ++ } + } + + static struct fuse_lowlevel_ops lo_oper = { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-fix-memory-leak-on-lo.source.patch b/SOURCES/kvm-virtiofsd-fix-memory-leak-on-lo.source.patch new file mode 100644 index 0000000..4d7d6dc --- /dev/null +++ b/SOURCES/kvm-virtiofsd-fix-memory-leak-on-lo.source.patch @@ -0,0 +1,66 @@ +From 9e0f5b64f30c2f841f297e25c2f3a6d82c8a16b8 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:57 +0100 +Subject: [PATCH 086/116] virtiofsd: fix memory leak on lo.source +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-83-dgilbert@redhat.com> +Patchwork-id: 93536 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 082/112] virtiofsd: fix memory leak on lo.source +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Liu Bo + +valgrind reported that lo.source is leaked on quiting, but it was defined +as (const char*) as it may point to a const string "/". + +Signed-off-by: Liu Bo +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit eb68a33b5fc5dde87bd9b99b94e7c33a5d8ea82e) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index af050c6..056ebe8 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -115,7 +115,7 @@ struct lo_data { + int writeback; + int flock; + int xattr; +- const char *source; ++ char *source; + double timeout; + int cache; + int timeout_set; +@@ -2497,9 +2497,8 @@ int main(int argc, char *argv[]) + fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); + exit(1); + } +- + } else { +- lo.source = "/"; ++ lo.source = strdup("/"); + } + if (!lo.timeout_set) { + switch (lo.cache) { +@@ -2570,5 +2569,7 @@ err_out1: + close(lo.root.fd); + } + ++ free(lo.source); ++ + return ret ? 1 : 0; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch b/SOURCES/kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch new file mode 100644 index 0000000..b17d93c --- /dev/null +++ b/SOURCES/kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch @@ -0,0 +1,56 @@ +From 3b6461ee08654b2cbb6d4e0cc15c02f89a6610d5 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 3 Mar 2020 18:43:09 +0000 +Subject: [PATCH 13/18] virtiofsd: fv_create_listen_socket error path socket + leak +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200303184314.155564-3-dgilbert@redhat.com> +Patchwork-id: 94124 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/7] virtiofsd: fv_create_listen_socket error path socket leak +Bugzilla: 1797064 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Ján Tomko + +From: "Dr. David Alan Gilbert" + +If we fail when bringing up the socket we can leak the listen_fd; +in practice the daemon will exit so it's not really a problem. + +Fixes: Coverity CID 1413121 +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Stefan Hajnoczi +(cherry picked from commit 6fa249027f97e3080f3d9c0fab3f94f8f80828fe) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/fuse_virtio.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 80a6e92..dd1c605 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -916,6 +916,7 @@ static int fv_create_listen_socket(struct fuse_session *se) + old_umask = umask(0077); + if (bind(listen_sock, (struct sockaddr *)&un, addr_len) == -1) { + fuse_log(FUSE_LOG_ERR, "vhost socket bind: %m\n"); ++ close(listen_sock); + umask(old_umask); + return -1; + } +@@ -923,6 +924,7 @@ static int fv_create_listen_socket(struct fuse_session *se) + + if (listen(listen_sock, 1) == -1) { + fuse_log(FUSE_LOG_ERR, "vhost socket listen: %m\n"); ++ close(listen_sock); + return -1; + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-get-set-features-callbacks.patch b/SOURCES/kvm-virtiofsd-get-set-features-callbacks.patch new file mode 100644 index 0000000..fcb5ca2 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-get-set-features-callbacks.patch @@ -0,0 +1,66 @@ +From 59bfe3ad924d00dc9c7a4363fcd3db36ea247988 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:59 +0100 +Subject: [PATCH 028/116] virtiofsd: get/set features callbacks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-25-dgilbert@redhat.com> +Patchwork-id: 93478 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 024/112] virtiofsd: get/set features callbacks +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Add the get/set features callbacks. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit f2cef5fb9ae20136ca18d16328787b69b3abfa18) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 1928a20..4819e56 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -46,6 +46,17 @@ struct virtio_fs_config { + uint32_t num_queues; + }; + ++/* Callback from libvhost-user */ ++static uint64_t fv_get_features(VuDev *dev) ++{ ++ return 1ULL << VIRTIO_F_VERSION_1; ++} ++ ++/* Callback from libvhost-user */ ++static void fv_set_features(VuDev *dev, uint64_t features) ++{ ++} ++ + /* + * Callback from libvhost-user if there's a new fd we're supposed to listen + * to, typically a queue kick? +@@ -78,7 +89,9 @@ static bool fv_queue_order(VuDev *dev, int qidx) + } + + static const VuDevIface fv_iface = { +- /* TODO: Add other callbacks */ ++ .get_features = fv_get_features, ++ .set_features = fv_set_features, ++ + .queue_is_processed_in_order = fv_queue_order, + }; + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch b/SOURCES/kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch new file mode 100644 index 0000000..68d20e7 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch @@ -0,0 +1,589 @@ +From da6ee5c24397d2ca93dfaf275fdd9dafc922da15 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:11 +0100 +Subject: [PATCH 100/116] virtiofsd: introduce inode refcount to prevent + use-after-free +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-97-dgilbert@redhat.com> +Patchwork-id: 93550 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 096/112] virtiofsd: introduce inode refcount to prevent use-after-free +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +If thread A is using an inode it must not be deleted by thread B when +processing a FUSE_FORGET request. + +The FUSE protocol itself already has a counter called nlookup that is +used in FUSE_FORGET messages. We cannot trust this counter since the +untrusted client can manipulate it via FUSE_FORGET messages. + +Introduce a new refcount to keep inodes alive for the required lifespan. +lo_inode_put() must be called to release a reference. FUSE's nlookup +counter holds exactly one reference so that the inode stays alive as +long as the client still wants to remember it. + +Note that the lo_inode->is_symlink field is moved to avoid creating a +hole in the struct due to struct field alignment. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Misono Tomohiro +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit c241aa9457d88c6a0d027f48fadfed131646bce3) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 169 +++++++++++++++++++++++++++++++++------ + 1 file changed, 146 insertions(+), 23 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e3a6d6b..ab16135 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -97,7 +97,13 @@ struct lo_key { + + struct lo_inode { + int fd; +- bool is_symlink; ++ ++ /* ++ * Atomic reference count for this object. The nlookup field holds a ++ * reference and release it when nlookup reaches 0. ++ */ ++ gint refcount; ++ + struct lo_key key; + + /* +@@ -116,6 +122,8 @@ struct lo_inode { + fuse_ino_t fuse_ino; + pthread_mutex_t plock_mutex; + GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ ++ ++ bool is_symlink; + }; + + struct lo_cred { +@@ -471,6 +479,23 @@ static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) + return elem - lo_data(req)->ino_map.elems; + } + ++static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep) ++{ ++ struct lo_inode *inode = *inodep; ++ ++ if (!inode) { ++ return; ++ } ++ ++ *inodep = NULL; ++ ++ if (g_atomic_int_dec_and_test(&inode->refcount)) { ++ close(inode->fd); ++ free(inode); ++ } ++} ++ ++/* Caller must release refcount using lo_inode_put() */ + static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) + { + struct lo_data *lo = lo_data(req); +@@ -478,6 +503,9 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) + + pthread_mutex_lock(&lo->mutex); + elem = lo_map_get(&lo->ino_map, ino); ++ if (elem) { ++ g_atomic_int_inc(&elem->inode->refcount); ++ } + pthread_mutex_unlock(&lo->mutex); + + if (!elem) { +@@ -487,10 +515,23 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) + return elem->inode; + } + ++/* ++ * TODO Remove this helper and force callers to hold an inode refcount until ++ * they are done with the fd. This will be done in a later patch to make ++ * review easier. ++ */ + static int lo_fd(fuse_req_t req, fuse_ino_t ino) + { + struct lo_inode *inode = lo_inode(req, ino); +- return inode ? inode->fd : -1; ++ int fd; ++ ++ if (!inode) { ++ return -1; ++ } ++ ++ fd = inode->fd; ++ lo_inode_put(lo_data(req), &inode); ++ return fd; + } + + static void lo_init(void *userdata, struct fuse_conn_info *conn) +@@ -545,6 +586,10 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino, + fuse_reply_attr(req, &buf, lo->timeout); + } + ++/* ++ * Increments parent->nlookup and caller must release refcount using ++ * lo_inode_put(&parent). ++ */ + static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, + char path[PATH_MAX], struct lo_inode **parent) + { +@@ -582,6 +627,7 @@ retry: + p = &lo->root; + pthread_mutex_lock(&lo->mutex); + p->nlookup++; ++ g_atomic_int_inc(&p->refcount); + pthread_mutex_unlock(&lo->mutex); + } else { + *last = '\0'; +@@ -625,6 +671,7 @@ retry: + + fail_unref: + unref_inode_lolocked(lo, p, 1); ++ lo_inode_put(lo, &p); + fail: + if (retries) { + retries--; +@@ -663,6 +710,7 @@ fallback: + if (res != -1) { + res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW); + unref_inode_lolocked(lo, parent, 1); ++ lo_inode_put(lo, &parent); + } + + return res; +@@ -780,11 +828,13 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + goto out_err; + } + } ++ lo_inode_put(lo, &inode); + + return lo_getattr(req, ino, fi); + + out_err: + saverr = errno; ++ lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + } + +@@ -801,6 +851,7 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) + if (p) { + assert(p->nlookup > 0); + p->nlookup++; ++ g_atomic_int_inc(&p->refcount); + } + pthread_mutex_unlock(&lo->mutex); + +@@ -820,6 +871,10 @@ static void posix_locks_value_destroy(gpointer data) + free(plock); + } + ++/* ++ * Increments nlookup and caller must release refcount using ++ * lo_inode_put(&parent). ++ */ + static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + struct fuse_entry_param *e) + { +@@ -827,7 +882,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + int res; + int saverr; + struct lo_data *lo = lo_data(req); +- struct lo_inode *inode, *dir = lo_inode(req, parent); ++ struct lo_inode *inode = NULL; ++ struct lo_inode *dir = lo_inode(req, parent); + + /* + * name_to_handle_at() and open_by_handle_at() can reach here with fuse +@@ -868,6 +924,13 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + } + + inode->is_symlink = S_ISLNK(e->attr.st_mode); ++ ++ /* ++ * One for the caller and one for nlookup (released in ++ * unref_inode_lolocked()) ++ */ ++ g_atomic_int_set(&inode->refcount, 2); ++ + inode->nlookup = 1; + inode->fd = newfd; + newfd = -1; +@@ -883,6 +946,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + pthread_mutex_unlock(&lo->mutex); + } + e->ino = inode->fuse_ino; ++ lo_inode_put(lo, &inode); ++ lo_inode_put(lo, &dir); + + fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, + name, (unsigned long long)e->ino); +@@ -894,6 +959,8 @@ out_err: + if (newfd != -1) { + close(newfd); + } ++ lo_inode_put(lo, &inode); ++ lo_inode_put(lo, &dir); + return saverr; + } + +@@ -991,6 +1058,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + { + int res; + int saverr; ++ struct lo_data *lo = lo_data(req); + struct lo_inode *dir; + struct fuse_entry_param e; + struct lo_cred old = {}; +@@ -1032,9 +1100,11 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + name, (unsigned long long)e.ino); + + fuse_reply_entry(req, &e); ++ lo_inode_put(lo, &dir); + return; + + out: ++ lo_inode_put(lo, &dir); + fuse_reply_err(req, saverr); + } + +@@ -1085,6 +1155,7 @@ fallback: + if (res != -1) { + res = linkat(parent->fd, path, dfd, name, 0); + unref_inode_lolocked(lo, parent, 1); ++ lo_inode_put(lo, &parent); + } + + return res; +@@ -1095,6 +1166,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + { + int res; + struct lo_data *lo = lo_data(req); ++ struct lo_inode *parent_inode; + struct lo_inode *inode; + struct fuse_entry_param e; + int saverr; +@@ -1104,17 +1176,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + return; + } + ++ parent_inode = lo_inode(req, parent); + inode = lo_inode(req, ino); +- if (!inode) { +- fuse_reply_err(req, EBADF); +- return; ++ if (!parent_inode || !inode) { ++ errno = EBADF; ++ goto out_err; + } + + memset(&e, 0, sizeof(struct fuse_entry_param)); + e.attr_timeout = lo->timeout; + e.entry_timeout = lo->timeout; + +- res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name); ++ res = linkat_empty_nofollow(lo, inode, parent_inode->fd, name); + if (res == -1) { + goto out_err; + } +@@ -1133,13 +1206,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + name, (unsigned long long)e.ino); + + fuse_reply_entry(req, &e); ++ lo_inode_put(lo, &parent_inode); ++ lo_inode_put(lo, &inode); + return; + + out_err: + saverr = errno; ++ lo_inode_put(lo, &parent_inode); ++ lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + } + ++/* Increments nlookup and caller must release refcount using lo_inode_put() */ + static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent, + const char *name) + { +@@ -1176,6 +1254,7 @@ static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) + + fuse_reply_err(req, res == -1 ? errno : 0); + unref_inode_lolocked(lo, inode, 1); ++ lo_inode_put(lo, &inode); + } + + static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, +@@ -1183,8 +1262,10 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + unsigned int flags) + { + int res; +- struct lo_inode *oldinode; +- struct lo_inode *newinode; ++ struct lo_inode *parent_inode; ++ struct lo_inode *newparent_inode; ++ struct lo_inode *oldinode = NULL; ++ struct lo_inode *newinode = NULL; + struct lo_data *lo = lo_data(req); + + if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { +@@ -1192,6 +1273,13 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + return; + } + ++ parent_inode = lo_inode(req, parent); ++ newparent_inode = lo_inode(req, newparent); ++ if (!parent_inode || !newparent_inode) { ++ fuse_reply_err(req, EBADF); ++ goto out; ++ } ++ + oldinode = lookup_name(req, parent, name); + newinode = lookup_name(req, newparent, newname); + +@@ -1204,8 +1292,8 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + #ifndef SYS_renameat2 + fuse_reply_err(req, EINVAL); + #else +- res = syscall(SYS_renameat2, lo_fd(req, parent), name, +- lo_fd(req, newparent), newname, flags); ++ res = syscall(SYS_renameat2, parent_inode->fd, name, ++ newparent_inode->fd, newname, flags); + if (res == -1 && errno == ENOSYS) { + fuse_reply_err(req, EINVAL); + } else { +@@ -1215,12 +1303,16 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + goto out; + } + +- res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname); ++ res = renameat(parent_inode->fd, name, newparent_inode->fd, newname); + + fuse_reply_err(req, res == -1 ? errno : 0); + out: + unref_inode_lolocked(lo, oldinode, 1); + unref_inode_lolocked(lo, newinode, 1); ++ lo_inode_put(lo, &oldinode); ++ lo_inode_put(lo, &newinode); ++ lo_inode_put(lo, &parent_inode); ++ lo_inode_put(lo, &newparent_inode); + } + + static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) +@@ -1244,6 +1336,7 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) + + fuse_reply_err(req, res == -1 ? errno : 0); + unref_inode_lolocked(lo, inode, 1); ++ lo_inode_put(lo, &inode); + } + + static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, +@@ -1265,8 +1358,9 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, + g_hash_table_destroy(inode->posix_locks); + pthread_mutex_destroy(&inode->plock_mutex); + pthread_mutex_unlock(&lo->mutex); +- close(inode->fd); +- free(inode); ++ ++ /* Drop our refcount from lo_do_lookup() */ ++ lo_inode_put(lo, &inode); + } else { + pthread_mutex_unlock(&lo->mutex); + } +@@ -1280,6 +1374,7 @@ static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) + inode->nlookup = 0; + lo_map_remove(&lo->ino_map, inode->fuse_ino); + close(inode->fd); ++ lo_inode_put(lo, &inode); /* Drop our refcount from lo_do_lookup() */ + + return TRUE; + } +@@ -1306,6 +1401,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + (unsigned long long)nlookup); + + unref_inode_lolocked(lo, inode, nlookup); ++ lo_inode_put(lo, &inode); + } + + static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) +@@ -1537,6 +1633,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + err = 0; + error: + lo_dirp_put(&d); ++ lo_inode_put(lo, &dinode); + + /* + * If there's an error, we can only signal it if we haven't stored +@@ -1595,6 +1692,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + { + int fd; + struct lo_data *lo = lo_data(req); ++ struct lo_inode *parent_inode; + struct fuse_entry_param e; + int err; + struct lo_cred old = {}; +@@ -1607,12 +1705,18 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + return; + } + ++ parent_inode = lo_inode(req, parent); ++ if (!parent_inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ + err = lo_change_cred(req, &old); + if (err) { + goto out; + } + +- fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, ++ fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, + mode); + err = fd == -1 ? errno : 0; + lo_restore_cred(&old); +@@ -1625,8 +1729,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + pthread_mutex_unlock(&lo->mutex); + if (fh == -1) { + close(fd); +- fuse_reply_err(req, ENOMEM); +- return; ++ err = ENOMEM; ++ goto out; + } + + fi->fh = fh; +@@ -1639,6 +1743,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + } + + out: ++ lo_inode_put(lo, &parent_inode); ++ + if (err) { + fuse_reply_err(req, err); + } else { +@@ -1712,16 +1818,18 @@ static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, + plock = + lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); + if (!plock) { +- pthread_mutex_unlock(&inode->plock_mutex); +- fuse_reply_err(req, ret); +- return; ++ saverr = ret; ++ goto out; + } + + ret = fcntl(plock->fd, F_OFD_GETLK, lock); + if (ret == -1) { + saverr = errno; + } ++ ++out: + pthread_mutex_unlock(&inode->plock_mutex); ++ lo_inode_put(lo, &inode); + + if (saverr) { + fuse_reply_err(req, saverr); +@@ -1761,9 +1869,8 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, + lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); + + if (!plock) { +- pthread_mutex_unlock(&inode->plock_mutex); +- fuse_reply_err(req, ret); +- return; ++ saverr = ret; ++ goto out; + } + + /* TODO: Is it alright to modify flock? */ +@@ -1772,7 +1879,11 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, + if (ret == -1) { + saverr = errno; + } ++ ++out: + pthread_mutex_unlock(&inode->plock_mutex); ++ lo_inode_put(lo, &inode); ++ + fuse_reply_err(req, saverr); + } + +@@ -1898,6 +2009,7 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + pthread_mutex_unlock(&inode->plock_mutex); + + res = close(dup(lo_fi_fd(req, fi))); ++ lo_inode_put(lo_data(req), &inode); + fuse_reply_err(req, res == -1 ? errno : 0); + } + +@@ -2115,11 +2227,14 @@ out_free: + if (fd >= 0) { + close(fd); + } ++ ++ lo_inode_put(lo, &inode); + return; + + out_err: + saverr = errno; + out: ++ lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + goto out_free; + } +@@ -2190,11 +2305,14 @@ out_free: + if (fd >= 0) { + close(fd); + } ++ ++ lo_inode_put(lo, &inode); + return; + + out_err: + saverr = errno; + out: ++ lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + goto out_free; + } +@@ -2243,6 +2361,8 @@ out: + if (fd >= 0) { + close(fd); + } ++ ++ lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + } + +@@ -2289,6 +2409,8 @@ out: + if (fd >= 0) { + close(fd); + } ++ ++ lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + } + +@@ -2671,6 +2793,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) + root->key.ino = stat.st_ino; + root->key.dev = stat.st_dev; + root->nlookup = 2; ++ g_atomic_int_set(&root->refcount, 2); + } + + static guint lo_key_hash(gconstpointer key) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-load_capng-missing-unlock.patch b/SOURCES/kvm-virtiofsd-load_capng-missing-unlock.patch new file mode 100644 index 0000000..bc04f6b --- /dev/null +++ b/SOURCES/kvm-virtiofsd-load_capng-missing-unlock.patch @@ -0,0 +1,46 @@ +From ece7649025fbdbde48ff0b954e8ec2e42c4a8b3d Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 3 Mar 2020 18:43:10 +0000 +Subject: [PATCH 14/18] virtiofsd: load_capng missing unlock +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200303184314.155564-4-dgilbert@redhat.com> +Patchwork-id: 94126 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/7] virtiofsd: load_capng missing unlock +Bugzilla: 1797064 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Ján Tomko + +From: "Dr. David Alan Gilbert" + +Missing unlock in error path. + +Fixes: Covertiy CID 1413123 +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Stefan Hajnoczi +(cherry picked from commit 686391112fd42c615bcc4233472887a66a9b5a4a) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/passthrough_ll.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e6f2399..c635fc8 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -232,6 +232,7 @@ static int load_capng(void) + */ + cap.saved = capng_save_state(); + if (!cap.saved) { ++ pthread_mutex_unlock(&cap.mutex); + fuse_log(FUSE_LOG_ERR, "capng_save_state (thread)\n"); + return -EINVAL; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-make-f-foreground-the-default.patch b/SOURCES/kvm-virtiofsd-make-f-foreground-the-default.patch new file mode 100644 index 0000000..d6cb0e3 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-make-f-foreground-the-default.patch @@ -0,0 +1,76 @@ +From 7f2e1f79a3addb242c3018c7a80e2e57589119f0 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:08 +0100 +Subject: [PATCH 037/116] virtiofsd: make -f (foreground) the default +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-34-dgilbert@redhat.com> +Patchwork-id: 93489 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 033/112] virtiofsd: make -f (foreground) the default +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +According to vhost-user.rst "Backend program conventions", backend +programs should run in the foregound by default. Follow the +conventions so libvirt and other management tools can control virtiofsd +in a standard way. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 0bbd31753714ac2899efda0f0de31e353e965789) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/helper.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 676032e..a3645fc 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -29,6 +29,11 @@ + { \ + t, offsetof(struct fuse_cmdline_opts, p), 1 \ + } ++#define FUSE_HELPER_OPT_VALUE(t, p, v) \ ++ { \ ++ t, offsetof(struct fuse_cmdline_opts, p), v \ ++ } ++ + + static const struct fuse_opt fuse_helper_opts[] = { + FUSE_HELPER_OPT("-h", show_help), +@@ -42,6 +47,7 @@ static const struct fuse_opt fuse_helper_opts[] = { + FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), + FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), + FUSE_HELPER_OPT("-f", foreground), ++ FUSE_HELPER_OPT_VALUE("--daemonize", foreground, 0), + FUSE_HELPER_OPT("fsname=", nodefault_subtype), + FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), + FUSE_HELPER_OPT("subtype=", nodefault_subtype), +@@ -131,6 +137,7 @@ void fuse_cmdline_help(void) + " -V --version print version\n" + " -d -o debug enable debug output (implies -f)\n" + " -f foreground operation\n" ++ " --daemonize run in background\n" + " -o max_idle_threads the maximum number of idle worker " + "threads\n" + " allowed (default: 10)\n"); +@@ -158,6 +165,7 @@ int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts) + memset(opts, 0, sizeof(struct fuse_cmdline_opts)); + + opts->max_idle_threads = 10; ++ opts->foreground = 1; + + if (fuse_opt_parse(args, opts, fuse_helper_opts, fuse_helper_opt_proc) == + -1) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-make-lo_release-atomic.patch b/SOURCES/kvm-virtiofsd-make-lo_release-atomic.patch new file mode 100644 index 0000000..6d88549 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-make-lo_release-atomic.patch @@ -0,0 +1,62 @@ +From 4ebabb66f4132186152edf8e1907fce436bf5c69 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:06 +0100 +Subject: [PATCH 095/116] virtiofsd: make lo_release() atomic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-92-dgilbert@redhat.com> +Patchwork-id: 93545 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 091/112] virtiofsd: make lo_release() atomic +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Hold the lock across both lo_map_get() and lo_map_remove() to prevent +races between two FUSE_RELEASE requests. In this case I don't see a +serious bug but it's safer to do things atomically. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit baed65c060c0e524530bc243eec427fb408bd477) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 9414935..690edbc 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1772,14 +1772,18 @@ static void lo_release(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) + { + struct lo_data *lo = lo_data(req); +- int fd; ++ struct lo_map_elem *elem; ++ int fd = -1; + + (void)ino; + +- fd = lo_fi_fd(req, fi); +- + pthread_mutex_lock(&lo->mutex); +- lo_map_remove(&lo->fd_map, fi->fh); ++ elem = lo_map_get(&lo->fd_map, fi->fh); ++ if (elem) { ++ fd = elem->fd; ++ elem = NULL; ++ lo_map_remove(&lo->fd_map, fi->fh); ++ } + pthread_mutex_unlock(&lo->mutex); + + close(fd); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-move-to-a-new-pid-namespace.patch b/SOURCES/kvm-virtiofsd-move-to-a-new-pid-namespace.patch new file mode 100644 index 0000000..9a33d1b --- /dev/null +++ b/SOURCES/kvm-virtiofsd-move-to-a-new-pid-namespace.patch @@ -0,0 +1,223 @@ +From a7a87a751a9893830d031a957a751b7622b71fb2 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:29 +0100 +Subject: [PATCH 058/116] virtiofsd: move to a new pid namespace +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-55-dgilbert@redhat.com> +Patchwork-id: 93510 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 054/112] virtiofsd: move to a new pid namespace +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +virtiofsd needs access to /proc/self/fd. Let's move to a new pid +namespace so that a compromised process cannot see another other +processes running on the system. + +One wrinkle in this approach: unshare(CLONE_NEWPID) affects *child* +processes and not the current process. Therefore we need to fork the +pid 1 process that will actually run virtiofsd and leave a parent in +waitpid(2). This is not the same thing as daemonization and parent +processes should not notice a difference. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 8e1d4ef231d8327be219f7aea7aa15d181375bbc) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 134 +++++++++++++++++++++++++-------------- + 1 file changed, 86 insertions(+), 48 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 27ab328..0947d14 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -51,7 +51,10 @@ + #include + #include + #include ++#include + #include ++#include ++#include + #include + #include + +@@ -1945,24 +1948,95 @@ static void print_capabilities(void) + } + + /* +- * Called after our UNIX domain sockets have been created, now we can move to +- * an empty network namespace to prevent TCP/IP and other network activity in +- * case this process is compromised. ++ * Move to a new mount, net, and pid namespaces to isolate this process. + */ +-static void setup_net_namespace(void) ++static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) + { +- if (unshare(CLONE_NEWNET) != 0) { +- fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNET): %m\n"); ++ pid_t child; ++ ++ /* ++ * Create a new pid namespace for *child* processes. We'll have to ++ * fork in order to enter the new pid namespace. A new mount namespace ++ * is also needed so that we can remount /proc for the new pid ++ * namespace. ++ * ++ * Our UNIX domain sockets have been created. Now we can move to ++ * an empty network namespace to prevent TCP/IP and other network ++ * activity in case this process is compromised. ++ */ ++ if (unshare(CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET) != 0) { ++ fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWPID | CLONE_NEWNS): %m\n"); ++ exit(1); ++ } ++ ++ child = fork(); ++ if (child < 0) { ++ fuse_log(FUSE_LOG_ERR, "fork() failed: %m\n"); ++ exit(1); ++ } ++ if (child > 0) { ++ pid_t waited; ++ int wstatus; ++ ++ /* The parent waits for the child */ ++ do { ++ waited = waitpid(child, &wstatus, 0); ++ } while (waited < 0 && errno == EINTR && !se->exited); ++ ++ /* We were terminated by a signal, see fuse_signals.c */ ++ if (se->exited) { ++ exit(0); ++ } ++ ++ if (WIFEXITED(wstatus)) { ++ exit(WEXITSTATUS(wstatus)); ++ } ++ ++ exit(1); ++ } ++ ++ /* Send us SIGTERM when the parent thread terminates, see prctl(2) */ ++ prctl(PR_SET_PDEATHSIG, SIGTERM); ++ ++ /* ++ * If the mounts have shared propagation then we want to opt out so our ++ * mount changes don't affect the parent mount namespace. ++ */ ++ if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) { ++ fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_SLAVE): %m\n"); ++ exit(1); ++ } ++ ++ /* The child must remount /proc to use the new pid namespace */ ++ if (mount("proc", "/proc", "proc", ++ MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RELATIME, NULL) < 0) { ++ fuse_log(FUSE_LOG_ERR, "mount(/proc): %m\n"); ++ exit(1); ++ } ++ ++ /* Now we can get our /proc/self/fd directory file descriptor */ ++ lo->proc_self_fd = open("/proc/self/fd", O_PATH); ++ if (lo->proc_self_fd == -1) { ++ fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n"); + exit(1); + } + } + +-/* This magic is based on lxc's lxc_pivot_root() */ +-static void setup_pivot_root(const char *source) ++/* ++ * Make the source directory our root so symlinks cannot escape and no other ++ * files are accessible. Assumes unshare(CLONE_NEWNS) was already called. ++ */ ++static void setup_mounts(const char *source) + { + int oldroot; + int newroot; + ++ if (mount(source, source, NULL, MS_BIND, NULL) < 0) { ++ fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); ++ exit(1); ++ } ++ ++ /* This magic is based on lxc's lxc_pivot_root() */ + oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC); + if (oldroot < 0) { + fuse_log(FUSE_LOG_ERR, "open(/): %m\n"); +@@ -2009,47 +2083,14 @@ static void setup_pivot_root(const char *source) + close(oldroot); + } + +-static void setup_proc_self_fd(struct lo_data *lo) +-{ +- lo->proc_self_fd = open("/proc/self/fd", O_PATH); +- if (lo->proc_self_fd == -1) { +- fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n"); +- exit(1); +- } +-} +- +-/* +- * Make the source directory our root so symlinks cannot escape and no other +- * files are accessible. +- */ +-static void setup_mount_namespace(const char *source) +-{ +- if (unshare(CLONE_NEWNS) != 0) { +- fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNS): %m\n"); +- exit(1); +- } +- +- if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) { +- fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_PRIVATE): %m\n"); +- exit(1); +- } +- +- if (mount(source, source, NULL, MS_BIND, NULL) < 0) { +- fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); +- exit(1); +- } +- +- setup_pivot_root(source); +-} +- + /* + * Lock down this process to prevent access to other processes or files outside + * source directory. This reduces the impact of arbitrary code execution bugs. + */ +-static void setup_sandbox(struct lo_data *lo) ++static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) + { +- setup_net_namespace(); +- setup_mount_namespace(lo->source); ++ setup_namespaces(lo, se); ++ setup_mounts(lo->source); + } + + int main(int argc, char *argv[]) +@@ -2173,10 +2214,7 @@ int main(int argc, char *argv[]) + + fuse_daemonize(opts.foreground); + +- /* Must be after daemonize to get the right /proc/self/fd */ +- setup_proc_self_fd(&lo); +- +- setup_sandbox(&lo); ++ setup_sandbox(&lo, se); + + /* Block until ctrl+c or fusermount -u */ + ret = virtio_loop(se); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-move-to-an-empty-network-namespace.patch b/SOURCES/kvm-virtiofsd-move-to-an-empty-network-namespace.patch new file mode 100644 index 0000000..69a7c20 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-move-to-an-empty-network-namespace.patch @@ -0,0 +1,66 @@ +From 19a16f26bdeb6302159736e182a18b06160a3f42 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:28 +0100 +Subject: [PATCH 057/116] virtiofsd: move to an empty network namespace +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-54-dgilbert@redhat.com> +Patchwork-id: 93508 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 053/112] virtiofsd: move to an empty network namespace +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +If the process is compromised there should be no network access. Use an +empty network namespace to sandbox networking. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit d74830d12ae233186ff74ddf64c552d26bb39e50) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 0570453..27ab328 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1944,6 +1944,19 @@ static void print_capabilities(void) + printf("}\n"); + } + ++/* ++ * Called after our UNIX domain sockets have been created, now we can move to ++ * an empty network namespace to prevent TCP/IP and other network activity in ++ * case this process is compromised. ++ */ ++static void setup_net_namespace(void) ++{ ++ if (unshare(CLONE_NEWNET) != 0) { ++ fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNET): %m\n"); ++ exit(1); ++ } ++} ++ + /* This magic is based on lxc's lxc_pivot_root() */ + static void setup_pivot_root(const char *source) + { +@@ -2035,6 +2048,7 @@ static void setup_mount_namespace(const char *source) + */ + static void setup_sandbox(struct lo_data *lo) + { ++ setup_net_namespace(); + setup_mount_namespace(lo->source); + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch new file mode 100644 index 0000000..e3d5773 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch @@ -0,0 +1,54 @@ +From fe031dbbf5e287f64de9fcc9aec361e8ab492109 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:24 +0100 +Subject: [PATCH 113/116] virtiofsd/passthrough_ll: Pass errno to + fuse_reply_err() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-110-dgilbert@redhat.com> +Patchwork-id: 93559 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 109/112] virtiofsd/passthrough_ll: Pass errno to fuse_reply_err() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Xiao Yang + +lo_copy_file_range() passes -errno to fuse_reply_err() and then fuse_reply_err() +changes it to errno again, so that subsequent fuse_send_reply_iov_nofree() catches +the wrong errno.(i.e. reports "fuse: bad error value: ..."). + +Make fuse_send_reply_iov_nofree() accept the correct -errno by passing errno +directly in lo_copy_file_range(). + +Signed-off-by: Xiao Yang +Reviewed-by: Eryu Guan + +dgilbert: Sent upstream and now Merged as aa1185e153f774f1df65 +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit a931b6861e59c78d861017e9c6a9c161ff49a163) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index fc15d61..e6f2399 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2441,7 +2441,7 @@ static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, + + res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags); + if (res < 0) { +- fuse_reply_err(req, -errno); ++ fuse_reply_err(req, errno); + } else { + fuse_reply_write(req, res); + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch new file mode 100644 index 0000000..ddacdbe --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch @@ -0,0 +1,48 @@ +From 83b03fc4a3ecf6086394363488bbebc8d55428c0 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:16 +0100 +Subject: [PATCH 105/116] virtiofsd: passthrough_ll: Use cache_readdir for + directory open +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-102-dgilbert@redhat.com> +Patchwork-id: 93555 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 101/112] virtiofsd: passthrough_ll: Use cache_readdir for directory open +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Misono Tomohiro + +Since keep_cache(FOPEN_KEEP_CACHE) has no effect for directory as +described in fuse_common.h, use cache_readdir(FOPNE_CACHE_DIR) for +diretory open when cache=always mode. + +Signed-off-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 9b610b09b49b1aada256097b338d49da805da6ae) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 4c61ac5..79b8b71 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1523,7 +1523,7 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, + + fi->fh = fh; + if (lo->cache == CACHE_ALWAYS) { +- fi->keep_cache = 1; ++ fi->cache_readdir = 1; + } + fuse_reply_open(req, fi); + return; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch new file mode 100644 index 0000000..0506574 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch @@ -0,0 +1,238 @@ +From 474d0adafed4d73720d6413b2903d6c4b529e5e6 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:15 +0100 +Subject: [PATCH 044/116] virtiofsd: passthrough_ll: add dirp_map to hide + lo_dirp pointers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-41-dgilbert@redhat.com> +Patchwork-id: 93495 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 040/112] virtiofsd: passthrough_ll: add dirp_map to hide lo_dirp pointers +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Do not expose lo_dirp pointers to clients. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit b39bce121bfad8757eec0ee41f14607b883935d3) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 103 +++++++++++++++++++++++++++++---------- + 1 file changed, 76 insertions(+), 27 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index a3ebf74..5f5a72f 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -56,27 +56,10 @@ + + #include "passthrough_helpers.h" + +-/* +- * We are re-using pointers to our `struct lo_inode` +- * elements as inodes. This means that we must be able to +- * store uintptr_t values in a fuse_ino_t variable. The following +- * incantation checks this condition at compile time. +- */ +-#if defined(__GNUC__) && \ +- (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && \ +- !defined __cplusplus +-_Static_assert(sizeof(fuse_ino_t) >= sizeof(uintptr_t), +- "fuse_ino_t too small to hold uintptr_t values!"); +-#else +-struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { +- unsigned _uintptr_to_must_hold_fuse_ino_t +- : ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); +-}; +-#endif +- + struct lo_map_elem { + union { + struct lo_inode *inode; ++ struct lo_dirp *dirp; + ssize_t freelist; + }; + bool in_use; +@@ -123,6 +106,7 @@ struct lo_data { + int timeout_set; + struct lo_inode root; /* protected by lo->mutex */ + struct lo_map ino_map; /* protected by lo->mutex */ ++ struct lo_map dirp_map; /* protected by lo->mutex */ + }; + + static const struct fuse_opt lo_opts[] = { +@@ -253,6 +237,20 @@ static void lo_map_remove(struct lo_map *map, size_t key) + } + + /* Assumes lo->mutex is held */ ++static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp) ++{ ++ struct lo_map_elem *elem; ++ ++ elem = lo_map_alloc_elem(&lo_data(req)->dirp_map); ++ if (!elem) { ++ return -1; ++ } ++ ++ elem->dirp = dirp; ++ return elem - lo_data(req)->dirp_map.elems; ++} ++ ++/* Assumes lo->mutex is held */ + static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) + { + struct lo_map_elem *elem; +@@ -861,9 +859,19 @@ struct lo_dirp { + off_t offset; + }; + +-static struct lo_dirp *lo_dirp(struct fuse_file_info *fi) ++static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi) + { +- return (struct lo_dirp *)(uintptr_t)fi->fh; ++ struct lo_data *lo = lo_data(req); ++ struct lo_map_elem *elem; ++ ++ pthread_mutex_lock(&lo->mutex); ++ elem = lo_map_get(&lo->dirp_map, fi->fh); ++ pthread_mutex_unlock(&lo->mutex); ++ if (!elem) { ++ return NULL; ++ } ++ ++ return elem->dirp; + } + + static void lo_opendir(fuse_req_t req, fuse_ino_t ino, +@@ -873,6 +881,7 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, + struct lo_data *lo = lo_data(req); + struct lo_dirp *d; + int fd; ++ ssize_t fh; + + d = calloc(1, sizeof(struct lo_dirp)); + if (d == NULL) { +@@ -892,7 +901,14 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, + d->offset = 0; + d->entry = NULL; + +- fi->fh = (uintptr_t)d; ++ pthread_mutex_lock(&lo->mutex); ++ fh = lo_add_dirp_mapping(req, d); ++ pthread_mutex_unlock(&lo->mutex); ++ if (fh == -1) { ++ goto out_err; ++ } ++ ++ fi->fh = fh; + if (lo->cache == CACHE_ALWAYS) { + fi->keep_cache = 1; + } +@@ -903,6 +919,9 @@ out_errno: + error = errno; + out_err: + if (d) { ++ if (d->dp) { ++ closedir(d->dp); ++ } + if (fd != -1) { + close(fd); + } +@@ -920,17 +939,21 @@ static int is_dot_or_dotdot(const char *name) + static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + off_t offset, struct fuse_file_info *fi, int plus) + { +- struct lo_dirp *d = lo_dirp(fi); +- char *buf; ++ struct lo_dirp *d; ++ char *buf = NULL; + char *p; + size_t rem = size; +- int err; ++ int err = ENOMEM; + + (void)ino; + ++ d = lo_dirp(req, fi); ++ if (!d) { ++ goto error; ++ } ++ + buf = calloc(1, size); + if (!buf) { +- err = ENOMEM; + goto error; + } + p = buf; +@@ -1028,8 +1051,21 @@ static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, + static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) + { +- struct lo_dirp *d = lo_dirp(fi); ++ struct lo_data *lo = lo_data(req); ++ struct lo_dirp *d; ++ + (void)ino; ++ ++ d = lo_dirp(req, fi); ++ if (!d) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ ++ pthread_mutex_lock(&lo->mutex); ++ lo_map_remove(&lo->dirp_map, fi->fh); ++ pthread_mutex_unlock(&lo->mutex); ++ + closedir(d->dp); + free(d); + fuse_reply_err(req, 0); +@@ -1081,8 +1117,18 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, + struct fuse_file_info *fi) + { + int res; +- int fd = dirfd(lo_dirp(fi)->dp); ++ struct lo_dirp *d; ++ int fd; ++ + (void)ino; ++ ++ d = lo_dirp(req, fi); ++ if (!d) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ ++ fd = dirfd(d->dp); + if (datasync) { + res = fdatasync(fd); + } else { +@@ -1614,6 +1660,8 @@ int main(int argc, char *argv[]) + root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino); + root_elem->inode = &lo.root; + ++ lo_map_init(&lo.dirp_map); ++ + if (fuse_parse_cmdline(&args, &opts) != 0) { + return 1; + } +@@ -1710,6 +1758,7 @@ err_out2: + err_out1: + fuse_opt_free_args(&args); + ++ lo_map_destroy(&lo.dirp_map); + lo_map_destroy(&lo.ino_map); + + if (lo.root.fd >= 0) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch new file mode 100644 index 0000000..b8de3d8 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch @@ -0,0 +1,303 @@ +From 03effbc021064bb77d231ae5ca02d1a579c71ee1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:17 +0100 +Subject: [PATCH 046/116] virtiofsd: passthrough_ll: add fallback for racy ops +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-43-dgilbert@redhat.com> +Patchwork-id: 93496 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 042/112] virtiofsd: passthrough_ll: add fallback for racy ops +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +We have two operations that cannot be done race-free on a symlink in +certain cases: utimes and link. + +Add racy fallback for these if the race-free method doesn't work. We do +our best to avoid races even in this case: + + - get absolute path by reading /proc/self/fd/NN symlink + + - lookup parent directory: after this we are safe against renames in + ancestors + + - lookup name in parent directory, and verify that we got to the original + inode, if not retry the whole thing + +Both utimes(2) and link(2) hold i_lock on the inode across the operation, +so a racing rename/delete by this fuse instance is not possible, only from +other entities changing the filesystem. + +If the "norace" option is given, then disable the racy fallbacks. + +Signed-off-by: Miklos Szeredi +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 5fe319a7b19c9c328e6e061bffcf1ff6cc8b89ce) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/helper.c | 5 +- + tools/virtiofsd/passthrough_ll.c | 157 +++++++++++++++++++++++++++++++++++---- + 2 files changed, 145 insertions(+), 17 deletions(-) + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index b8ec5ac..5531425 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -142,7 +142,10 @@ void fuse_cmdline_help(void) + " --daemonize run in background\n" + " -o max_idle_threads the maximum number of idle worker " + "threads\n" +- " allowed (default: 10)\n"); ++ " allowed (default: 10)\n" ++ " -o norace disable racy fallback\n" ++ " default: false\n" ++ ); + } + + static int fuse_helper_opt_proc(void *data, const char *arg, int key, +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 9815bfa..ac380ef 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -98,6 +98,7 @@ enum { + struct lo_data { + pthread_mutex_t mutex; + int debug; ++ int norace; + int writeback; + int flock; + int xattr; +@@ -124,10 +125,15 @@ static const struct fuse_opt lo_opts[] = { + { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER }, + { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, + { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, +- ++ { "norace", offsetof(struct lo_data, norace), 1 }, + FUSE_OPT_END + }; + ++static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); ++ ++static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st); ++ ++ + static struct lo_data *lo_data(fuse_req_t req) + { + return (struct lo_data *)fuse_req_userdata(req); +@@ -347,23 +353,127 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino, + fuse_reply_attr(req, &buf, lo->timeout); + } + +-static int utimensat_empty_nofollow(struct lo_inode *inode, +- const struct timespec *tv) ++static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, ++ char path[PATH_MAX], struct lo_inode **parent) + { +- int res; + char procname[64]; ++ char *last; ++ struct stat stat; ++ struct lo_inode *p; ++ int retries = 2; ++ int res; ++ ++retry: ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ res = readlink(procname, path, PATH_MAX); ++ if (res < 0) { ++ fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__); ++ goto fail_noretry; ++ } ++ ++ if (res >= PATH_MAX) { ++ fuse_log(FUSE_LOG_WARNING, "%s: readlink overflowed\n", __func__); ++ goto fail_noretry; ++ } ++ path[res] = '\0'; ++ ++ last = strrchr(path, '/'); ++ if (last == NULL) { ++ /* Shouldn't happen */ ++ fuse_log( ++ FUSE_LOG_WARNING, ++ "%s: INTERNAL ERROR: bad path read from proc\n", __func__); ++ goto fail_noretry; ++ } ++ if (last == path) { ++ p = &lo->root; ++ pthread_mutex_lock(&lo->mutex); ++ p->refcount++; ++ pthread_mutex_unlock(&lo->mutex); ++ } else { ++ *last = '\0'; ++ res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0); ++ if (res == -1) { ++ if (!retries) { ++ fuse_log(FUSE_LOG_WARNING, ++ "%s: failed to stat parent: %m\n", __func__); ++ } ++ goto fail; ++ } ++ p = lo_find(lo, &stat); ++ if (p == NULL) { ++ if (!retries) { ++ fuse_log(FUSE_LOG_WARNING, ++ "%s: failed to find parent\n", __func__); ++ } ++ goto fail; ++ } ++ } ++ last++; ++ res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW); ++ if (res == -1) { ++ if (!retries) { ++ fuse_log(FUSE_LOG_WARNING, ++ "%s: failed to stat last\n", __func__); ++ } ++ goto fail_unref; ++ } ++ if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) { ++ if (!retries) { ++ fuse_log(FUSE_LOG_WARNING, ++ "%s: failed to match last\n", __func__); ++ } ++ goto fail_unref; ++ } ++ *parent = p; ++ memmove(path, last, strlen(last) + 1); ++ ++ return 0; ++ ++fail_unref: ++ unref_inode(lo, p, 1); ++fail: ++ if (retries) { ++ retries--; ++ goto retry; ++ } ++fail_noretry: ++ errno = EIO; ++ return -1; ++} ++ ++static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode, ++ const struct timespec *tv) ++{ ++ int res; ++ struct lo_inode *parent; ++ char path[PATH_MAX]; + + if (inode->is_symlink) { +- res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH); + if (res == -1 && errno == EINVAL) { + /* Sorry, no race free way to set times on symlink. */ +- errno = EPERM; ++ if (lo->norace) { ++ errno = EPERM; ++ } else { ++ goto fallback; ++ } + } + return res; + } +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(path, "/proc/self/fd/%i", inode->fd); + +- return utimensat(AT_FDCWD, procname, tv, 0); ++ return utimensat(AT_FDCWD, path, tv, 0); ++ ++fallback: ++ res = lo_parent_and_name(lo, inode, path, &parent); ++ if (res != -1) { ++ res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW); ++ unref_inode(lo, parent, 1); ++ } ++ ++ return res; + } + + static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi) +@@ -387,6 +497,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + { + int saverr; + char procname[64]; ++ struct lo_data *lo = lo_data(req); + struct lo_inode *inode; + int ifd; + int res; +@@ -459,7 +570,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + if (fi) { + res = futimens(fd, tv); + } else { +- res = utimensat_empty_nofollow(inode, tv); ++ res = utimensat_empty(lo, inode, tv); + } + if (res == -1) { + goto out_err; +@@ -709,24 +820,38 @@ static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent, + lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); + } + +-static int linkat_empty_nofollow(struct lo_inode *inode, int dfd, +- const char *name) ++static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode, ++ int dfd, const char *name) + { + int res; +- char procname[64]; ++ struct lo_inode *parent; ++ char path[PATH_MAX]; + + if (inode->is_symlink) { + res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); + if (res == -1 && (errno == ENOENT || errno == EINVAL)) { + /* Sorry, no race free way to hard-link a symlink. */ +- errno = EPERM; ++ if (lo->norace) { ++ errno = EPERM; ++ } else { ++ goto fallback; ++ } + } + return res; + } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(path, "/proc/self/fd/%i", inode->fd); ++ ++ return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW); ++ ++fallback: ++ res = lo_parent_and_name(lo, inode, path, &parent); ++ if (res != -1) { ++ res = linkat(parent->fd, path, dfd, name, 0); ++ unref_inode(lo, parent, 1); ++ } + +- return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); ++ return res; + } + + static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, +@@ -748,7 +873,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + e.attr_timeout = lo->timeout; + e.entry_timeout = lo->timeout; + +- res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); ++ res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name); + if (res == -1) { + goto out_err; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch new file mode 100644 index 0000000..24b2a6e --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch @@ -0,0 +1,328 @@ +From 35337e604e9149d6d8fcf74b8b82ac33a8611ebb Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:16 +0100 +Subject: [PATCH 045/116] virtiofsd: passthrough_ll: add fd_map to hide file + descriptors +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-42-dgilbert@redhat.com> +Patchwork-id: 93494 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 041/112] virtiofsd: passthrough_ll: add fd_map to hide file descriptors +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Do not expose file descriptor numbers to clients. This prevents the +abuse of internal file descriptors (like stdin/stdout). + +Signed-off-by: Stefan Hajnoczi +Fix from: +Signed-off-by: Xiao Yang +dgilbert: + Added lseek +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 73b4d19dfc4248a74c1f3e511cfa934681d9c602) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 116 +++++++++++++++++++++++++++++++-------- + 1 file changed, 94 insertions(+), 22 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 5f5a72f..9815bfa 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -60,6 +60,7 @@ struct lo_map_elem { + union { + struct lo_inode *inode; + struct lo_dirp *dirp; ++ int fd; + ssize_t freelist; + }; + bool in_use; +@@ -107,6 +108,7 @@ struct lo_data { + struct lo_inode root; /* protected by lo->mutex */ + struct lo_map ino_map; /* protected by lo->mutex */ + struct lo_map dirp_map; /* protected by lo->mutex */ ++ struct lo_map fd_map; /* protected by lo->mutex */ + }; + + static const struct fuse_opt lo_opts[] = { +@@ -237,6 +239,20 @@ static void lo_map_remove(struct lo_map *map, size_t key) + } + + /* Assumes lo->mutex is held */ ++static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd) ++{ ++ struct lo_map_elem *elem; ++ ++ elem = lo_map_alloc_elem(&lo_data(req)->fd_map); ++ if (!elem) { ++ return -1; ++ } ++ ++ elem->fd = fd; ++ return elem - lo_data(req)->fd_map.elems; ++} ++ ++/* Assumes lo->mutex is held */ + static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp) + { + struct lo_map_elem *elem; +@@ -350,6 +366,22 @@ static int utimensat_empty_nofollow(struct lo_inode *inode, + return utimensat(AT_FDCWD, procname, tv, 0); + } + ++static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi) ++{ ++ struct lo_data *lo = lo_data(req); ++ struct lo_map_elem *elem; ++ ++ pthread_mutex_lock(&lo->mutex); ++ elem = lo_map_get(&lo->fd_map, fi->fh); ++ pthread_mutex_unlock(&lo->mutex); ++ ++ if (!elem) { ++ return -1; ++ } ++ ++ return elem->fd; ++} ++ + static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + int valid, struct fuse_file_info *fi) + { +@@ -358,6 +390,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + struct lo_inode *inode; + int ifd; + int res; ++ int fd; + + inode = lo_inode(req, ino); + if (!inode) { +@@ -367,9 +400,14 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + + ifd = inode->fd; + ++ /* If fi->fh is invalid we'll report EBADF later */ ++ if (fi) { ++ fd = lo_fi_fd(req, fi); ++ } ++ + if (valid & FUSE_SET_ATTR_MODE) { + if (fi) { +- res = fchmod(fi->fh, attr->st_mode); ++ res = fchmod(fd, attr->st_mode); + } else { + sprintf(procname, "/proc/self/fd/%i", ifd); + res = chmod(procname, attr->st_mode); +@@ -389,7 +427,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + } + if (valid & FUSE_SET_ATTR_SIZE) { + if (fi) { +- res = ftruncate(fi->fh, attr->st_size); ++ res = ftruncate(fd, attr->st_size); + } else { + sprintf(procname, "/proc/self/fd/%i", ifd); + res = truncate(procname, attr->st_size); +@@ -419,7 +457,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + } + + if (fi) { +- res = futimens(fi->fh, tv); ++ res = futimens(fd, tv); + } else { + res = utimensat_empty_nofollow(inode, tv); + } +@@ -1096,7 +1134,18 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + lo_restore_cred(&old); + + if (!err) { +- fi->fh = fd; ++ ssize_t fh; ++ ++ pthread_mutex_lock(&lo->mutex); ++ fh = lo_add_fd_mapping(req, fd); ++ pthread_mutex_unlock(&lo->mutex); ++ if (fh == -1) { ++ close(fd); ++ fuse_reply_err(req, ENOMEM); ++ return; ++ } ++ ++ fi->fh = fh; + err = lo_do_lookup(req, parent, name, &e); + } + if (lo->cache == CACHE_NEVER) { +@@ -1140,6 +1189,7 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, + static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + { + int fd; ++ ssize_t fh; + char buf[64]; + struct lo_data *lo = lo_data(req); + +@@ -1175,7 +1225,16 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + return (void)fuse_reply_err(req, errno); + } + +- fi->fh = fd; ++ pthread_mutex_lock(&lo->mutex); ++ fh = lo_add_fd_mapping(req, fd); ++ pthread_mutex_unlock(&lo->mutex); ++ if (fh == -1) { ++ close(fd); ++ fuse_reply_err(req, ENOMEM); ++ return; ++ } ++ ++ fi->fh = fh; + if (lo->cache == CACHE_NEVER) { + fi->direct_io = 1; + } else if (lo->cache == CACHE_ALWAYS) { +@@ -1187,9 +1246,18 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + static void lo_release(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) + { ++ struct lo_data *lo = lo_data(req); ++ int fd; ++ + (void)ino; + +- close(fi->fh); ++ fd = lo_fi_fd(req, fi); ++ ++ pthread_mutex_lock(&lo->mutex); ++ lo_map_remove(&lo->fd_map, fi->fh); ++ pthread_mutex_unlock(&lo->mutex); ++ ++ close(fd); + fuse_reply_err(req, 0); + } + +@@ -1197,7 +1265,7 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + { + int res; + (void)ino; +- res = close(dup(fi->fh)); ++ res = close(dup(lo_fi_fd(req, fi))); + fuse_reply_err(req, res == -1 ? errno : 0); + } + +@@ -1224,7 +1292,7 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, + return (void)fuse_reply_err(req, errno); + } + } else { +- fd = fi->fh; ++ fd = lo_fi_fd(req, fi); + } + + if (datasync) { +@@ -1251,7 +1319,7 @@ static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, + } + + buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; +- buf.buf[0].fd = fi->fh; ++ buf.buf[0].fd = lo_fi_fd(req, fi); + buf.buf[0].pos = offset; + + fuse_reply_data(req, &buf); +@@ -1266,7 +1334,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, + struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); + + out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; +- out_buf.buf[0].fd = fi->fh; ++ out_buf.buf[0].fd = lo_fi_fd(req, fi); + out_buf.buf[0].pos = off; + + if (lo_debug(req)) { +@@ -1303,7 +1371,7 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, + (void)ino; + + #ifdef CONFIG_FALLOCATE +- err = fallocate(fi->fh, mode, offset, length); ++ err = fallocate(lo_fi_fd(req, fi), mode, offset, length); + if (err < 0) { + err = errno; + } +@@ -1314,7 +1382,7 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, + return; + } + +- err = posix_fallocate(fi->fh, offset, length); ++ err = posix_fallocate(lo_fi_fd(req, fi), offset, length); + #endif + + fuse_reply_err(req, err); +@@ -1326,7 +1394,7 @@ static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, + int res; + (void)ino; + +- res = flock(fi->fh, op); ++ res = flock(lo_fi_fd(req, fi), op); + + fuse_reply_err(req, res == -1 ? errno : 0); + } +@@ -1551,17 +1619,19 @@ static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, + off_t off_out, struct fuse_file_info *fi_out, + size_t len, int flags) + { ++ int in_fd, out_fd; + ssize_t res; + +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, +- "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " +- "off=%lu, ino=%" PRIu64 "/fd=%lu, " +- "off=%lu, size=%zd, flags=0x%x)\n", +- ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, len, +- flags); ++ in_fd = lo_fi_fd(req, fi_in); ++ out_fd = lo_fi_fd(req, fi_out); ++ ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_copy_file_range(ino=%" PRIu64 "/fd=%d, " ++ "off=%lu, ino=%" PRIu64 "/fd=%d, " ++ "off=%lu, size=%zd, flags=0x%x)\n", ++ ino_in, in_fd, off_in, ino_out, out_fd, off_out, len, flags); + +- res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, flags); ++ res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags); + if (res < 0) { + fuse_reply_err(req, -errno); + } else { +@@ -1576,7 +1646,7 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, + off_t res; + + (void)ino; +- res = lseek(fi->fh, off, whence); ++ res = lseek(lo_fi_fd(req, fi), off, whence); + if (res != -1) { + fuse_reply_lseek(req, res); + } else { +@@ -1661,6 +1731,7 @@ int main(int argc, char *argv[]) + root_elem->inode = &lo.root; + + lo_map_init(&lo.dirp_map); ++ lo_map_init(&lo.fd_map); + + if (fuse_parse_cmdline(&args, &opts) != 0) { + return 1; +@@ -1758,6 +1829,7 @@ err_out2: + err_out1: + fuse_opt_free_args(&args); + ++ lo_map_destroy(&lo.fd_map); + lo_map_destroy(&lo.dirp_map); + lo_map_destroy(&lo.ino_map); + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch new file mode 100644 index 0000000..ba8b730 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch @@ -0,0 +1,395 @@ +From d81396cc3d9815730903b0755c9d2e67d6954d54 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:14 +0100 +Subject: [PATCH 043/116] virtiofsd: passthrough_ll: add ino_map to hide + lo_inode pointers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-40-dgilbert@redhat.com> +Patchwork-id: 93493 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 039/112] virtiofsd: passthrough_ll: add ino_map to hide lo_inode pointers +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Do not expose lo_inode pointers to clients. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 92fb57b83cdbfc4bf53c0c46a3d0bcbc36e64126) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 144 +++++++++++++++++++++++++++++++-------- + 1 file changed, 114 insertions(+), 30 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e83a976..a3ebf74 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -57,8 +57,8 @@ + #include "passthrough_helpers.h" + + /* +- * We are re-using pointers to our `struct lo_inode` and `struct +- * lo_dirp` elements as inodes. This means that we must be able to ++ * We are re-using pointers to our `struct lo_inode` ++ * elements as inodes. This means that we must be able to + * store uintptr_t values in a fuse_ino_t variable. The following + * incantation checks this condition at compile time. + */ +@@ -76,7 +76,7 @@ struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { + + struct lo_map_elem { + union { +- /* Element values will go here... */ ++ struct lo_inode *inode; + ssize_t freelist; + }; + bool in_use; +@@ -97,6 +97,7 @@ struct lo_inode { + ino_t ino; + dev_t dev; + uint64_t refcount; /* protected by lo->mutex */ ++ fuse_ino_t fuse_ino; + }; + + struct lo_cred { +@@ -121,6 +122,7 @@ struct lo_data { + int cache; + int timeout_set; + struct lo_inode root; /* protected by lo->mutex */ ++ struct lo_map ino_map; /* protected by lo->mutex */ + }; + + static const struct fuse_opt lo_opts[] = { +@@ -145,14 +147,14 @@ static struct lo_data *lo_data(fuse_req_t req) + return (struct lo_data *)fuse_req_userdata(req); + } + +-__attribute__((unused)) static void lo_map_init(struct lo_map *map) ++static void lo_map_init(struct lo_map *map) + { + map->elems = NULL; + map->nelems = 0; + map->freelist = -1; + } + +-__attribute__((unused)) static void lo_map_destroy(struct lo_map *map) ++static void lo_map_destroy(struct lo_map *map) + { + free(map->elems); + } +@@ -183,8 +185,7 @@ static int lo_map_grow(struct lo_map *map, size_t new_nelems) + return 1; + } + +-__attribute__((unused)) static struct lo_map_elem * +-lo_map_alloc_elem(struct lo_map *map) ++static struct lo_map_elem *lo_map_alloc_elem(struct lo_map *map) + { + struct lo_map_elem *elem; + +@@ -200,8 +201,7 @@ lo_map_alloc_elem(struct lo_map *map) + return elem; + } + +-__attribute__((unused)) static struct lo_map_elem * +-lo_map_reserve(struct lo_map *map, size_t key) ++static struct lo_map_elem *lo_map_reserve(struct lo_map *map, size_t key) + { + ssize_t *prev; + +@@ -222,8 +222,7 @@ lo_map_reserve(struct lo_map *map, size_t key) + return NULL; + } + +-__attribute__((unused)) static struct lo_map_elem * +-lo_map_get(struct lo_map *map, size_t key) ++static struct lo_map_elem *lo_map_get(struct lo_map *map, size_t key) + { + if (key >= map->nelems) { + return NULL; +@@ -234,8 +233,7 @@ lo_map_get(struct lo_map *map, size_t key) + return &map->elems[key]; + } + +-__attribute__((unused)) static void lo_map_remove(struct lo_map *map, +- size_t key) ++static void lo_map_remove(struct lo_map *map, size_t key) + { + struct lo_map_elem *elem; + +@@ -254,18 +252,40 @@ __attribute__((unused)) static void lo_map_remove(struct lo_map *map, + map->freelist = key; + } + ++/* Assumes lo->mutex is held */ ++static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) ++{ ++ struct lo_map_elem *elem; ++ ++ elem = lo_map_alloc_elem(&lo_data(req)->ino_map); ++ if (!elem) { ++ return -1; ++ } ++ ++ elem->inode = inode; ++ return elem - lo_data(req)->ino_map.elems; ++} ++ + static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) + { +- if (ino == FUSE_ROOT_ID) { +- return &lo_data(req)->root; +- } else { +- return (struct lo_inode *)(uintptr_t)ino; ++ struct lo_data *lo = lo_data(req); ++ struct lo_map_elem *elem; ++ ++ pthread_mutex_lock(&lo->mutex); ++ elem = lo_map_get(&lo->ino_map, ino); ++ pthread_mutex_unlock(&lo->mutex); ++ ++ if (!elem) { ++ return NULL; + } ++ ++ return elem->inode; + } + + static int lo_fd(fuse_req_t req, fuse_ino_t ino) + { +- return lo_inode(req, ino)->fd; ++ struct lo_inode *inode = lo_inode(req, ino); ++ return inode ? inode->fd : -1; + } + + static bool lo_debug(fuse_req_t req) +@@ -337,10 +357,18 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + { + int saverr; + char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); +- int ifd = inode->fd; ++ struct lo_inode *inode; ++ int ifd; + int res; + ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ ++ ifd = inode->fd; ++ + if (valid & FUSE_SET_ATTR_MODE) { + if (fi) { + res = fchmod(fi->fh, attr->st_mode); +@@ -470,6 +498,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + inode->dev = e->attr.st_dev; + + pthread_mutex_lock(&lo->mutex); ++ inode->fuse_ino = lo_add_inode_mapping(req, inode); + prev = &lo->root; + next = prev->next; + next->prev = inode; +@@ -478,7 +507,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + prev->next = inode; + pthread_mutex_unlock(&lo->mutex); + } +- e->ino = (uintptr_t)inode; ++ e->ino = inode->fuse_ino; + + if (lo_debug(req)) { + fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +@@ -582,10 +611,16 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + { + int res; + int saverr; +- struct lo_inode *dir = lo_inode(req, parent); ++ struct lo_inode *dir; + struct fuse_entry_param e; + struct lo_cred old = {}; + ++ dir = lo_inode(req, parent); ++ if (!dir) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ + saverr = ENOMEM; + + saverr = lo_change_cred(req, &old); +@@ -663,10 +698,16 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + { + int res; + struct lo_data *lo = lo_data(req); +- struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_inode *inode; + struct fuse_entry_param e; + int saverr; + ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ + memset(&e, 0, sizeof(struct fuse_entry_param)); + e.attr_timeout = lo->timeout; + e.entry_timeout = lo->timeout; +@@ -684,7 +725,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + pthread_mutex_lock(&lo->mutex); + inode->refcount++; + pthread_mutex_unlock(&lo->mutex); +- e.ino = (uintptr_t)inode; ++ e.ino = inode->fuse_ino; + + if (lo_debug(req)) { + fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +@@ -750,10 +791,10 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) + next->prev = prev; + prev->next = next; + ++ lo_map_remove(&lo->ino_map, inode->fuse_ino); + pthread_mutex_unlock(&lo->mutex); + close(inode->fd); + free(inode); +- + } else { + pthread_mutex_unlock(&lo->mutex); + } +@@ -762,7 +803,12 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) + static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + { + struct lo_data *lo = lo_data(req); +- struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_inode *inode; ++ ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ return; ++ } + + if (lo_debug(req)) { + fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", +@@ -1244,10 +1290,16 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + { + char *value = NULL; + char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_inode *inode; + ssize_t ret; + int saverr; + ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ + saverr = ENOSYS; + if (!lo_data(req)->xattr) { + goto out; +@@ -1306,10 +1358,16 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + { + char *value = NULL; + char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_inode *inode; + ssize_t ret; + int saverr; + ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ + saverr = ENOSYS; + if (!lo_data(req)->xattr) { + goto out; +@@ -1367,10 +1425,16 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + const char *value, size_t size, int flags) + { + char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_inode *inode; + ssize_t ret; + int saverr; + ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ + saverr = ENOSYS; + if (!lo_data(req)->xattr) { + goto out; +@@ -1400,10 +1464,16 @@ out: + static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) + { + char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_inode *inode; + ssize_t ret; + int saverr; + ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ + saverr = ENOSYS; + if (!lo_data(req)->xattr) { + goto out; +@@ -1522,6 +1592,7 @@ int main(int argc, char *argv[]) + struct fuse_session *se; + struct fuse_cmdline_opts opts; + struct lo_data lo = { .debug = 0, .writeback = 0 }; ++ struct lo_map_elem *root_elem; + int ret = -1; + + /* Don't mask creation mode, kernel already did that */ +@@ -1530,8 +1601,19 @@ int main(int argc, char *argv[]) + pthread_mutex_init(&lo.mutex, NULL); + lo.root.next = lo.root.prev = &lo.root; + lo.root.fd = -1; ++ lo.root.fuse_ino = FUSE_ROOT_ID; + lo.cache = CACHE_NORMAL; + ++ /* ++ * Set up the ino map like this: ++ * [0] Reserved (will not be used) ++ * [1] Root inode ++ */ ++ lo_map_init(&lo.ino_map); ++ lo_map_reserve(&lo.ino_map, 0)->in_use = false; ++ root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino); ++ root_elem->inode = &lo.root; ++ + if (fuse_parse_cmdline(&args, &opts) != 0) { + return 1; + } +@@ -1628,6 +1710,8 @@ err_out2: + err_out1: + fuse_opt_free_args(&args); + ++ lo_map_destroy(&lo.ino_map); ++ + if (lo.root.fd >= 0) { + close(lo.root.fd); + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch new file mode 100644 index 0000000..4751f95 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch @@ -0,0 +1,182 @@ +From d56651e227bae83ee0cceb12bd91e3e9f6045ab3 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:13 +0100 +Subject: [PATCH 042/116] virtiofsd: passthrough_ll: add lo_map for ino/fh + indirection +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-39-dgilbert@redhat.com> +Patchwork-id: 93492 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 038/112] virtiofsd: passthrough_ll: add lo_map for ino/fh indirection +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +A layer of indirection is needed because passthrough_ll cannot expose +pointers or file descriptor numbers to untrusted clients. Malicious +clients could send invalid pointers or file descriptors in order to +crash or exploit the file system daemon. + +lo_map provides an integer key->value mapping. This will be used for +ino and fh fields in the patches that follow. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 25c135727b08dca90f00094e522a69170b13dfac) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 124 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 124 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 5e06179..e83a976 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -74,6 +74,21 @@ struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { + }; + #endif + ++struct lo_map_elem { ++ union { ++ /* Element values will go here... */ ++ ssize_t freelist; ++ }; ++ bool in_use; ++}; ++ ++/* Maps FUSE fh or ino values to internal objects */ ++struct lo_map { ++ struct lo_map_elem *elems; ++ size_t nelems; ++ ssize_t freelist; ++}; ++ + struct lo_inode { + struct lo_inode *next; /* protected by lo->mutex */ + struct lo_inode *prev; /* protected by lo->mutex */ +@@ -130,6 +145,115 @@ static struct lo_data *lo_data(fuse_req_t req) + return (struct lo_data *)fuse_req_userdata(req); + } + ++__attribute__((unused)) static void lo_map_init(struct lo_map *map) ++{ ++ map->elems = NULL; ++ map->nelems = 0; ++ map->freelist = -1; ++} ++ ++__attribute__((unused)) static void lo_map_destroy(struct lo_map *map) ++{ ++ free(map->elems); ++} ++ ++static int lo_map_grow(struct lo_map *map, size_t new_nelems) ++{ ++ struct lo_map_elem *new_elems; ++ size_t i; ++ ++ if (new_nelems <= map->nelems) { ++ return 1; ++ } ++ ++ new_elems = realloc(map->elems, sizeof(map->elems[0]) * new_nelems); ++ if (!new_elems) { ++ return 0; ++ } ++ ++ for (i = map->nelems; i < new_nelems; i++) { ++ new_elems[i].freelist = i + 1; ++ new_elems[i].in_use = false; ++ } ++ new_elems[new_nelems - 1].freelist = -1; ++ ++ map->elems = new_elems; ++ map->freelist = map->nelems; ++ map->nelems = new_nelems; ++ return 1; ++} ++ ++__attribute__((unused)) static struct lo_map_elem * ++lo_map_alloc_elem(struct lo_map *map) ++{ ++ struct lo_map_elem *elem; ++ ++ if (map->freelist == -1 && !lo_map_grow(map, map->nelems + 256)) { ++ return NULL; ++ } ++ ++ elem = &map->elems[map->freelist]; ++ map->freelist = elem->freelist; ++ ++ elem->in_use = true; ++ ++ return elem; ++} ++ ++__attribute__((unused)) static struct lo_map_elem * ++lo_map_reserve(struct lo_map *map, size_t key) ++{ ++ ssize_t *prev; ++ ++ if (!lo_map_grow(map, key + 1)) { ++ return NULL; ++ } ++ ++ for (prev = &map->freelist; *prev != -1; ++ prev = &map->elems[*prev].freelist) { ++ if (*prev == key) { ++ struct lo_map_elem *elem = &map->elems[key]; ++ ++ *prev = elem->freelist; ++ elem->in_use = true; ++ return elem; ++ } ++ } ++ return NULL; ++} ++ ++__attribute__((unused)) static struct lo_map_elem * ++lo_map_get(struct lo_map *map, size_t key) ++{ ++ if (key >= map->nelems) { ++ return NULL; ++ } ++ if (!map->elems[key].in_use) { ++ return NULL; ++ } ++ return &map->elems[key]; ++} ++ ++__attribute__((unused)) static void lo_map_remove(struct lo_map *map, ++ size_t key) ++{ ++ struct lo_map_elem *elem; ++ ++ if (key >= map->nelems) { ++ return; ++ } ++ ++ elem = &map->elems[key]; ++ if (!elem->in_use) { ++ return; ++ } ++ ++ elem->in_use = false; ++ ++ elem->freelist = map->freelist; ++ map->freelist = key; ++} ++ + static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) + { + if (ino == FUSE_ROOT_ID) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch new file mode 100644 index 0000000..a3f7970 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch @@ -0,0 +1,52 @@ +From 86b4f2865f2ebd7e6b3d85beb66a9390eb46eb96 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:45 +0100 +Subject: [PATCH 074/116] virtiofsd: passthrough_ll: add renameat2 support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-71-dgilbert@redhat.com> +Patchwork-id: 93531 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 070/112] virtiofsd: passthrough_ll: add renameat2 support +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +Signed-off-by: Miklos Szeredi +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit f0ab7d6f78a7d3c1c19fd81a91c9b1199f56c4f6) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 98114a3..18d69ab 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1099,7 +1099,17 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + } + + if (flags) { ++#ifndef SYS_renameat2 + fuse_reply_err(req, EINVAL); ++#else ++ res = syscall(SYS_renameat2, lo_fd(req, parent), name, ++ lo_fd(req, newparent), newname, flags); ++ if (res == -1 && errno == ENOSYS) { ++ fuse_reply_err(req, EINVAL); ++ } else { ++ fuse_reply_err(req, res == -1 ? errno : 0); ++ } ++#endif + return; + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch new file mode 100644 index 0000000..dc87ef2 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch @@ -0,0 +1,138 @@ +From 079199c53f483f0051f994b195ebb595aec76a39 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:51 +0100 +Subject: [PATCH 080/116] virtiofsd: passthrough_ll: clean up cache related + options +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-77-dgilbert@redhat.com> +Patchwork-id: 93530 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 076/112] virtiofsd: passthrough_ll: clean up cache related options +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + + - Rename "cache=never" to "cache=none" to match 9p's similar option. + + - Rename CACHE_NORMAL constant to CACHE_AUTO to match the "cache=auto" + option. + +Signed-off-by: Miklos Szeredi +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 230e777b5e250759ee0480fcc0e9ccfa2b082fba) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/helper.c | 5 ++++- + tools/virtiofsd/passthrough_ll.c | 20 ++++++++++---------- + 2 files changed, 14 insertions(+), 11 deletions(-) + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 14f5d70..5672024 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -145,6 +145,9 @@ void fuse_cmdline_help(void) + " --syslog log to syslog (default stderr)\n" + " -f foreground operation\n" + " --daemonize run in background\n" ++ " -o cache= cache mode. could be one of \"auto, " ++ "always, none\"\n" ++ " default: auto\n" + " -o log_level= log level, default to \"info\"\n" + " level could be one of \"debug, " + "info, warn, err\"\n" +@@ -156,7 +159,7 @@ void fuse_cmdline_help(void) + " -o readdirplus|no_readdirplus\n" + " enable/disable readirplus\n" + " default: readdirplus except with " +- "cache=never\n" ++ "cache=none\n" + ); + } + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 9e7191e..b40f287 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -101,8 +101,8 @@ struct lo_cred { + }; + + enum { +- CACHE_NEVER, +- CACHE_NORMAL, ++ CACHE_NONE, ++ CACHE_AUTO, + CACHE_ALWAYS, + }; + +@@ -138,8 +138,8 @@ static const struct fuse_opt lo_opts[] = { + { "no_xattr", offsetof(struct lo_data, xattr), 0 }, + { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, + { "timeout=", offsetof(struct lo_data, timeout_set), 1 }, +- { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER }, +- { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, ++ { "cache=none", offsetof(struct lo_data, cache), CACHE_NONE }, ++ { "cache=auto", offsetof(struct lo_data, cache), CACHE_AUTO }, + { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, + { "norace", offsetof(struct lo_data, norace), 1 }, + { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, +@@ -482,7 +482,7 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) + fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); + conn->want |= FUSE_CAP_FLOCK_LOCKS; + } +- if ((lo->cache == CACHE_NEVER && !lo->readdirplus_set) || ++ if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) || + lo->readdirplus_clear) { + fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); + conn->want &= ~FUSE_CAP_READDIRPLUS; +@@ -1493,7 +1493,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + fi->fh = fh; + err = lo_do_lookup(req, parent, name, &e); + } +- if (lo->cache == CACHE_NEVER) { ++ if (lo->cache == CACHE_NONE) { + fi->direct_io = 1; + } else if (lo->cache == CACHE_ALWAYS) { + fi->keep_cache = 1; +@@ -1578,7 +1578,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + } + + fi->fh = fh; +- if (lo->cache == CACHE_NEVER) { ++ if (lo->cache == CACHE_NONE) { + fi->direct_io = 1; + } else if (lo->cache == CACHE_ALWAYS) { + fi->keep_cache = 1; +@@ -2395,7 +2395,7 @@ int main(int argc, char *argv[]) + lo.root.next = lo.root.prev = &lo.root; + lo.root.fd = -1; + lo.root.fuse_ino = FUSE_ROOT_ID; +- lo.cache = CACHE_NORMAL; ++ lo.cache = CACHE_AUTO; + + /* + * Set up the ino map like this: +@@ -2470,11 +2470,11 @@ int main(int argc, char *argv[]) + } + if (!lo.timeout_set) { + switch (lo.cache) { +- case CACHE_NEVER: ++ case CACHE_NONE: + lo.timeout = 0.0; + break; + +- case CACHE_NORMAL: ++ case CACHE_AUTO: + lo.timeout = 1.0; + break; + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch new file mode 100644 index 0000000..c55eead --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch @@ -0,0 +1,154 @@ +From f93ea308351cbe2630d7ecf637c3b69894d84a11 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 3 Mar 2020 18:43:13 +0000 +Subject: [PATCH 17/18] virtiofsd: passthrough_ll: cleanup getxattr/listxattr +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200303184314.155564-7-dgilbert@redhat.com> +Patchwork-id: 94125 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 6/7] virtiofsd: passthrough_ll: cleanup getxattr/listxattr +Bugzilla: 1797064 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Ján Tomko + +From: Misono Tomohiro + +This is a cleanup patch to simplify the following xattr fix and +there is no functional changes. + +- Move memory allocation to head of the function +- Unify fgetxattr/flistxattr call for both size == 0 and + size != 0 case +- Remove redundant lo_inode_put call in error path + (Note: second call is ignored now since @inode is already NULL) + +Signed-off-by: Misono Tomohiro +Message-Id: <20200227055927.24566-2-misono.tomohiro@jp.fujitsu.com> +Acked-by: Vivek Goyal +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 16e15a73089102c3d8846792d514e769300fcc3c) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/passthrough_ll.c | 54 ++++++++++++++++------------------------ + 1 file changed, 22 insertions(+), 32 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index c635fc8..50c7273 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2199,34 +2199,30 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + goto out; + } + ++ if (size) { ++ value = malloc(size); ++ if (!value) { ++ goto out_err; ++ } ++ } ++ + sprintf(procname, "%i", inode->fd); + fd = openat(lo->proc_self_fd, procname, O_RDONLY); + if (fd < 0) { + goto out_err; + } + ++ ret = fgetxattr(fd, name, value, size); ++ if (ret == -1) { ++ goto out_err; ++ } + if (size) { +- value = malloc(size); +- if (!value) { +- goto out_err; +- } +- +- ret = fgetxattr(fd, name, value, size); +- if (ret == -1) { +- goto out_err; +- } + saverr = 0; + if (ret == 0) { + goto out; + } +- + fuse_reply_buf(req, value, ret); + } else { +- ret = fgetxattr(fd, name, NULL, 0); +- if (ret == -1) { +- goto out_err; +- } +- + fuse_reply_xattr(req, ret); + } + out_free: +@@ -2242,7 +2238,6 @@ out_free: + out_err: + saverr = errno; + out: +- lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + goto out_free; + } +@@ -2277,34 +2272,30 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + goto out; + } + ++ if (size) { ++ value = malloc(size); ++ if (!value) { ++ goto out_err; ++ } ++ } ++ + sprintf(procname, "%i", inode->fd); + fd = openat(lo->proc_self_fd, procname, O_RDONLY); + if (fd < 0) { + goto out_err; + } + ++ ret = flistxattr(fd, value, size); ++ if (ret == -1) { ++ goto out_err; ++ } + if (size) { +- value = malloc(size); +- if (!value) { +- goto out_err; +- } +- +- ret = flistxattr(fd, value, size); +- if (ret == -1) { +- goto out_err; +- } + saverr = 0; + if (ret == 0) { + goto out; + } +- + fuse_reply_buf(req, value, ret); + } else { +- ret = flistxattr(fd, NULL, 0); +- if (ret == -1) { +- goto out_err; +- } +- + fuse_reply_xattr(req, ret); + } + out_free: +@@ -2320,7 +2311,6 @@ out_free: + out_err: + saverr = errno; + out: +- lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + goto out_free; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-control-readdirplus.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-control-readdirplus.patch new file mode 100644 index 0000000..98d00fc --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-control-readdirplus.patch @@ -0,0 +1,79 @@ +From 0f1d456fad4ba6a696eff8976b9fe8a0f251e1b5 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:47 +0100 +Subject: [PATCH 076/116] virtiofsd: passthrough_ll: control readdirplus +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-73-dgilbert@redhat.com> +Patchwork-id: 93524 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 072/112] virtiofsd: passthrough_ll: control readdirplus +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +Signed-off-by: Miklos Szeredi +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 59aef494be2d8d91055ff3f3a8eb13d9f32873d8) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/helper.c | 4 ++++ + tools/virtiofsd/passthrough_ll.c | 7 ++++++- + 2 files changed, 10 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 6d50a46..14f5d70 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -153,6 +153,10 @@ void fuse_cmdline_help(void) + " allowed (default: 10)\n" + " -o norace disable racy fallback\n" + " default: false\n" ++ " -o readdirplus|no_readdirplus\n" ++ " enable/disable readirplus\n" ++ " default: readdirplus except with " ++ "cache=never\n" + ); + } + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 6480c51..8b1784f 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -117,6 +117,8 @@ struct lo_data { + double timeout; + int cache; + int timeout_set; ++ int readdirplus_set; ++ int readdirplus_clear; + struct lo_inode root; /* protected by lo->mutex */ + struct lo_map ino_map; /* protected by lo->mutex */ + struct lo_map dirp_map; /* protected by lo->mutex */ +@@ -140,6 +142,8 @@ static const struct fuse_opt lo_opts[] = { + { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, + { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, + { "norace", offsetof(struct lo_data, norace), 1 }, ++ { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, ++ { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 }, + FUSE_OPT_END + }; + static bool use_syslog = false; +@@ -478,7 +482,8 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) + fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); + conn->want |= FUSE_CAP_FLOCK_LOCKS; + } +- if (lo->cache == CACHE_NEVER) { ++ if ((lo->cache == CACHE_NEVER && !lo->readdirplus_set) || ++ lo->readdirplus_clear) { + fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); + conn->want &= ~FUSE_CAP_READDIRPLUS; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch new file mode 100644 index 0000000..4b02779 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch @@ -0,0 +1,198 @@ +From af14ef1dba9356e566c9c7531b8fd23361c2b16d Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:12 +0100 +Subject: [PATCH 041/116] virtiofsd: passthrough_ll: create new files in + caller's context +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-38-dgilbert@redhat.com> +Patchwork-id: 93488 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 037/112] virtiofsd: passthrough_ll: create new files in caller's context +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Vivek Goyal + +We need to create files in the caller's context. Otherwise after +creating a file, the caller might not be able to do file operations on +that file. + +Changed effective uid/gid to caller's uid/gid, create file and then +switch back to uid/gid 0. + +Use syscall(setresuid, ...) otherwise glibc does some magic to change EUID +in all threads, which is not what we want. + +Signed-off-by: Vivek Goyal +Signed-off-by: Miklos Szeredi +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 929cfb7a9a1b101cdfc9ac19807ecab4c81a13e4) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 96 +++++++++++++++++++++++++++++++++++++--- + 1 file changed, 91 insertions(+), 5 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index cd27c09..5e06179 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -50,6 +50,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -83,6 +84,11 @@ struct lo_inode { + uint64_t refcount; /* protected by lo->mutex */ + }; + ++struct lo_cred { ++ uid_t euid; ++ gid_t egid; ++}; ++ + enum { + CACHE_NEVER, + CACHE_NORMAL, +@@ -383,6 +389,69 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) + } + } + ++/* ++ * On some archs, setres*id is limited to 2^16 but they ++ * provide setres*id32 variants that allow 2^32. ++ * Others just let setres*id do 2^32 anyway. ++ */ ++#ifdef SYS_setresgid32 ++#define OURSYS_setresgid SYS_setresgid32 ++#else ++#define OURSYS_setresgid SYS_setresgid ++#endif ++ ++#ifdef SYS_setresuid32 ++#define OURSYS_setresuid SYS_setresuid32 ++#else ++#define OURSYS_setresuid SYS_setresuid ++#endif ++ ++/* ++ * Change to uid/gid of caller so that file is created with ++ * ownership of caller. ++ * TODO: What about selinux context? ++ */ ++static int lo_change_cred(fuse_req_t req, struct lo_cred *old) ++{ ++ int res; ++ ++ old->euid = geteuid(); ++ old->egid = getegid(); ++ ++ res = syscall(OURSYS_setresgid, -1, fuse_req_ctx(req)->gid, -1); ++ if (res == -1) { ++ return errno; ++ } ++ ++ res = syscall(OURSYS_setresuid, -1, fuse_req_ctx(req)->uid, -1); ++ if (res == -1) { ++ int errno_save = errno; ++ ++ syscall(OURSYS_setresgid, -1, old->egid, -1); ++ return errno_save; ++ } ++ ++ return 0; ++} ++ ++/* Regain Privileges */ ++static void lo_restore_cred(struct lo_cred *old) ++{ ++ int res; ++ ++ res = syscall(OURSYS_setresuid, -1, old->euid, -1); ++ if (res == -1) { ++ fuse_log(FUSE_LOG_ERR, "seteuid(%u): %m\n", old->euid); ++ exit(1); ++ } ++ ++ res = syscall(OURSYS_setresgid, -1, old->egid, -1); ++ if (res == -1) { ++ fuse_log(FUSE_LOG_ERR, "setegid(%u): %m\n", old->egid); ++ exit(1); ++ } ++} ++ + static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + const char *name, mode_t mode, dev_t rdev, + const char *link) +@@ -391,12 +460,21 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + int saverr; + struct lo_inode *dir = lo_inode(req, parent); + struct fuse_entry_param e; ++ struct lo_cred old = {}; + + saverr = ENOMEM; + ++ saverr = lo_change_cred(req, &old); ++ if (saverr) { ++ goto out; ++ } ++ + res = mknod_wrapper(dir->fd, name, link, mode, rdev); + + saverr = errno; ++ ++ lo_restore_cred(&old); ++ + if (res == -1) { + goto out; + } +@@ -794,26 +872,34 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + struct lo_data *lo = lo_data(req); + struct fuse_entry_param e; + int err; ++ struct lo_cred old = {}; + + if (lo_debug(req)) { + fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", + parent, name); + } + ++ err = lo_change_cred(req, &old); ++ if (err) { ++ goto out; ++ } ++ + fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, + mode); +- if (fd == -1) { +- return (void)fuse_reply_err(req, errno); +- } ++ err = fd == -1 ? errno : 0; ++ lo_restore_cred(&old); + +- fi->fh = fd; ++ if (!err) { ++ fi->fh = fd; ++ err = lo_do_lookup(req, parent, name, &e); ++ } + if (lo->cache == CACHE_NEVER) { + fi->direct_io = 1; + } else if (lo->cache == CACHE_ALWAYS) { + fi->keep_cache = 1; + } + +- err = lo_do_lookup(req, parent, name, &e); ++out: + if (err) { + fuse_reply_err(req, err); + } else { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch new file mode 100644 index 0000000..4a531a3 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch @@ -0,0 +1,50 @@ +From bbf92338e5e5eed796d511d2bd3c3686b7d1e5fd Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:46 +0100 +Subject: [PATCH 075/116] virtiofsd: passthrough_ll: disable readdirplus on + cache=never +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-72-dgilbert@redhat.com> +Patchwork-id: 93525 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 071/112] virtiofsd: passthrough_ll: disable readdirplus on cache=never +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +...because the attributes sent in the READDIRPLUS reply would be discarded +anyway. + +Signed-off-by: Miklos Szeredi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit ddcbabcb0ea177be3ec3500726b699c7c26ffd93) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 18d69ab..6480c51 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -478,6 +478,10 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) + fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); + conn->want |= FUSE_CAP_FLOCK_LOCKS; + } ++ if (lo->cache == CACHE_NEVER) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); ++ conn->want &= ~FUSE_CAP_READDIRPLUS; ++ } + } + + static void lo_getattr(fuse_req_t req, fuse_ino_t ino, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch new file mode 100644 index 0000000..00e11b4 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch @@ -0,0 +1,143 @@ +From 5e33269d5fbc4ba4614bab4a6b9e0ef759bebcb7 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:10 +0100 +Subject: [PATCH 099/116] virtiofsd: passthrough_ll: fix refcounting on + remove/rename +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-96-dgilbert@redhat.com> +Patchwork-id: 93549 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 095/112] virtiofsd: passthrough_ll: fix refcounting on remove/rename +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +Signed-off-by: Miklos Szeredi +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 9257e514d861afa759c36704e1904d43ca3fec88) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 50 +++++++++++++++++++++++++++++++++++++++- + 1 file changed, 49 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index c819b5f..e3a6d6b 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1140,17 +1140,42 @@ out_err: + fuse_reply_err(req, saverr); + } + ++static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent, ++ const char *name) ++{ ++ int res; ++ struct stat attr; ++ ++ res = fstatat(lo_fd(req, parent), name, &attr, ++ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) { ++ return NULL; ++ } ++ ++ return lo_find(lo_data(req), &attr); ++} ++ + static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) + { + int res; ++ struct lo_inode *inode; ++ struct lo_data *lo = lo_data(req); ++ + if (!is_safe_path_component(name)) { + fuse_reply_err(req, EINVAL); + return; + } + ++ inode = lookup_name(req, parent, name); ++ if (!inode) { ++ fuse_reply_err(req, EIO); ++ return; ++ } ++ + res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); + + fuse_reply_err(req, res == -1 ? errno : 0); ++ unref_inode_lolocked(lo, inode, 1); + } + + static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, +@@ -1158,12 +1183,23 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + unsigned int flags) + { + int res; ++ struct lo_inode *oldinode; ++ struct lo_inode *newinode; ++ struct lo_data *lo = lo_data(req); + + if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { + fuse_reply_err(req, EINVAL); + return; + } + ++ oldinode = lookup_name(req, parent, name); ++ newinode = lookup_name(req, newparent, newname); ++ ++ if (!oldinode) { ++ fuse_reply_err(req, EIO); ++ goto out; ++ } ++ + if (flags) { + #ifndef SYS_renameat2 + fuse_reply_err(req, EINVAL); +@@ -1176,26 +1212,38 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + fuse_reply_err(req, res == -1 ? errno : 0); + } + #endif +- return; ++ goto out; + } + + res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname); + + fuse_reply_err(req, res == -1 ? errno : 0); ++out: ++ unref_inode_lolocked(lo, oldinode, 1); ++ unref_inode_lolocked(lo, newinode, 1); + } + + static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) + { + int res; ++ struct lo_inode *inode; ++ struct lo_data *lo = lo_data(req); + + if (!is_safe_path_component(name)) { + fuse_reply_err(req, EINVAL); + return; + } + ++ inode = lookup_name(req, parent, name); ++ if (!inode) { ++ fuse_reply_err(req, EIO); ++ return; ++ } ++ + res = unlinkat(lo_fd(req, parent), name, 0); + + fuse_reply_err(req, res == -1 ? errno : 0); ++ unref_inode_lolocked(lo, inode, 1); + } + + static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-passthrough_ll-use-hashtable.patch b/SOURCES/kvm-virtiofsd-passthrough_ll-use-hashtable.patch new file mode 100644 index 0000000..b0be1f9 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-passthrough_ll-use-hashtable.patch @@ -0,0 +1,211 @@ +From 44f4434b1305f6ff47b4f63fafcf39bcea9e4ceb Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:52 +0100 +Subject: [PATCH 081/116] virtiofsd: passthrough_ll: use hashtable +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-78-dgilbert@redhat.com> +Patchwork-id: 93528 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 077/112] virtiofsd: passthrough_ll: use hashtable +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +Improve performance of inode lookup by using a hash table. + +Signed-off-by: Miklos Szeredi +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Liu Bo +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit bfc50a6e06b10b2f9dbaf6c1a89dd523322e016f) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 81 ++++++++++++++++++++++------------------ + 1 file changed, 45 insertions(+), 36 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index b40f287..b176a31 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -84,13 +84,15 @@ struct lo_map { + ssize_t freelist; + }; + ++struct lo_key { ++ ino_t ino; ++ dev_t dev; ++}; ++ + struct lo_inode { +- struct lo_inode *next; /* protected by lo->mutex */ +- struct lo_inode *prev; /* protected by lo->mutex */ + int fd; + bool is_symlink; +- ino_t ino; +- dev_t dev; ++ struct lo_key key; + uint64_t refcount; /* protected by lo->mutex */ + fuse_ino_t fuse_ino; + }; +@@ -119,7 +121,8 @@ struct lo_data { + int timeout_set; + int readdirplus_set; + int readdirplus_clear; +- struct lo_inode root; /* protected by lo->mutex */ ++ struct lo_inode root; ++ GHashTable *inodes; /* protected by lo->mutex */ + struct lo_map ino_map; /* protected by lo->mutex */ + struct lo_map dirp_map; /* protected by lo->mutex */ + struct lo_map fd_map; /* protected by lo->mutex */ +@@ -573,7 +576,7 @@ retry: + } + goto fail_unref; + } +- if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) { ++ if (stat.st_dev != inode->key.dev || stat.st_ino != inode->key.ino) { + if (!retries) { + fuse_log(FUSE_LOG_WARNING, + "%s: failed to match last\n", __func__); +@@ -753,19 +756,20 @@ out_err: + static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) + { + struct lo_inode *p; +- struct lo_inode *ret = NULL; ++ struct lo_key key = { ++ .ino = st->st_ino, ++ .dev = st->st_dev, ++ }; + + pthread_mutex_lock(&lo->mutex); +- for (p = lo->root.next; p != &lo->root; p = p->next) { +- if (p->ino == st->st_ino && p->dev == st->st_dev) { +- assert(p->refcount > 0); +- ret = p; +- ret->refcount++; +- break; +- } ++ p = g_hash_table_lookup(lo->inodes, &key); ++ if (p) { ++ assert(p->refcount > 0); ++ p->refcount++; + } + pthread_mutex_unlock(&lo->mutex); +- return ret; ++ ++ return p; + } + + static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, +@@ -810,8 +814,6 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + close(newfd); + newfd = -1; + } else { +- struct lo_inode *prev, *next; +- + saverr = ENOMEM; + inode = calloc(1, sizeof(struct lo_inode)); + if (!inode) { +@@ -822,17 +824,12 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + inode->refcount = 1; + inode->fd = newfd; + newfd = -1; +- inode->ino = e->attr.st_ino; +- inode->dev = e->attr.st_dev; ++ inode->key.ino = e->attr.st_ino; ++ inode->key.dev = e->attr.st_dev; + + pthread_mutex_lock(&lo->mutex); + inode->fuse_ino = lo_add_inode_mapping(req, inode); +- prev = &lo->root; +- next = prev->next; +- next->prev = inode; +- inode->next = next; +- inode->prev = prev; +- prev->next = inode; ++ g_hash_table_insert(lo->inodes, &inode->key, inode); + pthread_mutex_unlock(&lo->mutex); + } + e->ino = inode->fuse_ino; +@@ -1162,14 +1159,8 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, + assert(inode->refcount >= n); + inode->refcount -= n; + if (!inode->refcount) { +- struct lo_inode *prev, *next; +- +- prev = inode->prev; +- next = inode->next; +- next->prev = prev; +- prev->next = next; +- + lo_map_remove(&lo->ino_map, inode->fuse_ino); ++ g_hash_table_remove(lo->inodes, &inode->key); + pthread_mutex_unlock(&lo->mutex); + close(inode->fd); + free(inode); +@@ -1369,7 +1360,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + + /* Hide root's parent directory */ + if (dinode == &lo->root && strcmp(name, "..") == 0) { +- e.attr.st_ino = lo->root.ino; ++ e.attr.st_ino = lo->root.key.ino; + e.attr.st_mode = DT_DIR << 12; + } + +@@ -2370,11 +2361,26 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) + + root->is_symlink = false; + root->fd = fd; +- root->ino = stat.st_ino; +- root->dev = stat.st_dev; ++ root->key.ino = stat.st_ino; ++ root->key.dev = stat.st_dev; + root->refcount = 2; + } + ++static guint lo_key_hash(gconstpointer key) ++{ ++ const struct lo_key *lkey = key; ++ ++ return (guint)lkey->ino + (guint)lkey->dev; ++} ++ ++static gboolean lo_key_equal(gconstpointer a, gconstpointer b) ++{ ++ const struct lo_key *la = a; ++ const struct lo_key *lb = b; ++ ++ return la->ino == lb->ino && la->dev == lb->dev; ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +@@ -2392,7 +2398,7 @@ int main(int argc, char *argv[]) + umask(0); + + pthread_mutex_init(&lo.mutex, NULL); +- lo.root.next = lo.root.prev = &lo.root; ++ lo.inodes = g_hash_table_new(lo_key_hash, lo_key_equal); + lo.root.fd = -1; + lo.root.fuse_ino = FUSE_ROOT_ID; + lo.cache = CACHE_AUTO; +@@ -2522,6 +2528,9 @@ err_out2: + err_out1: + fuse_opt_free_args(&args); + ++ if (lo.inodes) { ++ g_hash_table_destroy(lo.inodes); ++ } + lo_map_destroy(&lo.fd_map); + lo_map_destroy(&lo.dirp_map); + lo_map_destroy(&lo.ino_map); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch b/SOURCES/kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch new file mode 100644 index 0000000..68eb03e --- /dev/null +++ b/SOURCES/kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch @@ -0,0 +1,54 @@ +From feb005dfeb15dd5ac5156c994f323ab4c573b1fc Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:24 +0100 +Subject: [PATCH 053/116] virtiofsd: prevent ".." escape in lo_do_lookup() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-50-dgilbert@redhat.com> +Patchwork-id: 93500 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 049/112] virtiofsd: prevent ".." escape in lo_do_lookup() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 854684bc0b3d63eb90b3abdfe471c2e4271ef176) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e375406..79d5966 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -624,12 +624,17 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + int res; + int saverr; + struct lo_data *lo = lo_data(req); +- struct lo_inode *inode; ++ struct lo_inode *inode, *dir = lo_inode(req, parent); + + memset(e, 0, sizeof(*e)); + e->attr_timeout = lo->timeout; + e->entry_timeout = lo->timeout; + ++ /* Do not allow escaping root directory */ ++ if (dir == &lo->root && strcmp(name, "..") == 0) { ++ name = "."; ++ } ++ + newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); + if (newfd == -1) { + goto out_err; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch b/SOURCES/kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch new file mode 100644 index 0000000..5f97cbf --- /dev/null +++ b/SOURCES/kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch @@ -0,0 +1,108 @@ +From 97e232e75bbc0032f4a309d248f383384612eafe Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:25 +0100 +Subject: [PATCH 054/116] virtiofsd: prevent ".." escape in lo_do_readdir() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-51-dgilbert@redhat.com> +Patchwork-id: 93507 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 050/112] virtiofsd: prevent ".." escape in lo_do_readdir() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Construct a fake dirent for the root directory's ".." entry. This hides +the parent directory from the FUSE client. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 752272da2b68a2312f0e11fc5303015a6c3ee1ac) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 36 ++++++++++++++++++++++-------------- + 1 file changed, 22 insertions(+), 14 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 79d5966..e3d65c3 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1149,19 +1149,25 @@ out_err: + static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + off_t offset, struct fuse_file_info *fi, int plus) + { ++ struct lo_data *lo = lo_data(req); + struct lo_dirp *d; ++ struct lo_inode *dinode; + char *buf = NULL; + char *p; + size_t rem = size; +- int err = ENOMEM; ++ int err = EBADF; + +- (void)ino; ++ dinode = lo_inode(req, ino); ++ if (!dinode) { ++ goto error; ++ } + + d = lo_dirp(req, fi); + if (!d) { + goto error; + } + ++ err = ENOMEM; + buf = calloc(1, size); + if (!buf) { + goto error; +@@ -1192,15 +1198,21 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + } + nextoff = d->entry->d_off; + name = d->entry->d_name; ++ + fuse_ino_t entry_ino = 0; ++ struct fuse_entry_param e = (struct fuse_entry_param){ ++ .attr.st_ino = d->entry->d_ino, ++ .attr.st_mode = d->entry->d_type << 12, ++ }; ++ ++ /* Hide root's parent directory */ ++ if (dinode == &lo->root && strcmp(name, "..") == 0) { ++ e.attr.st_ino = lo->root.ino; ++ e.attr.st_mode = DT_DIR << 12; ++ } ++ + if (plus) { +- struct fuse_entry_param e; +- if (is_dot_or_dotdot(name)) { +- e = (struct fuse_entry_param){ +- .attr.st_ino = d->entry->d_ino, +- .attr.st_mode = d->entry->d_type << 12, +- }; +- } else { ++ if (!is_dot_or_dotdot(name)) { + err = lo_do_lookup(req, ino, name, &e); + if (err) { + goto error; +@@ -1210,11 +1222,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + + entsize = fuse_add_direntry_plus(req, p, rem, name, &e, nextoff); + } else { +- struct stat st = { +- .st_ino = d->entry->d_ino, +- .st_mode = d->entry->d_type << 12, +- }; +- entsize = fuse_add_direntry(req, p, rem, name, &st, nextoff); ++ entsize = fuse_add_direntry(req, p, rem, name, &e.attr, nextoff); + } + if (entsize > rem) { + if (entry_ino != 0) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch b/SOURCES/kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch new file mode 100644 index 0000000..be7c120 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch @@ -0,0 +1,103 @@ +From 249c02ae54739dc5894ee1b2905bbe8f1e79e909 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:20 +0100 +Subject: [PATCH 109/116] virtiofsd: prevent FUSE_INIT/FUSE_DESTROY races +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-106-dgilbert@redhat.com> +Patchwork-id: 93562 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 105/112] virtiofsd: prevent FUSE_INIT/FUSE_DESTROY races +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +When running with multiple threads it can be tricky to handle +FUSE_INIT/FUSE_DESTROY in parallel with other request types or in +parallel with themselves. Serialize FUSE_INIT and FUSE_DESTROY so that +malicious clients cannot trigger race conditions. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit cdc497c6925be745bc895355bd4674a17a4b2a8b) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 1 + + tools/virtiofsd/fuse_lowlevel.c | 18 ++++++++++++++++++ + 2 files changed, 19 insertions(+) + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index a20854f..1447d86 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -61,6 +61,7 @@ struct fuse_session { + struct fuse_req list; + struct fuse_req interrupts; + pthread_mutex_t lock; ++ pthread_rwlock_t init_rwlock; + int got_destroy; + int broken_splice_nonblock; + uint64_t notify_ctr; +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index dab6a31..79a4031 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2428,6 +2428,19 @@ void fuse_session_process_buf_int(struct fuse_session *se, + req->ctx.pid = in->pid; + req->ch = ch; + ++ /* ++ * INIT and DESTROY requests are serialized, all other request types ++ * run in parallel. This prevents races between FUSE_INIT and ordinary ++ * requests, FUSE_INIT and FUSE_INIT, FUSE_INIT and FUSE_DESTROY, and ++ * FUSE_DESTROY and FUSE_DESTROY. ++ */ ++ if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT || ++ in->opcode == FUSE_DESTROY) { ++ pthread_rwlock_wrlock(&se->init_rwlock); ++ } else { ++ pthread_rwlock_rdlock(&se->init_rwlock); ++ } ++ + err = EIO; + if (!se->got_init) { + enum fuse_opcode expected; +@@ -2485,10 +2498,13 @@ void fuse_session_process_buf_int(struct fuse_session *se, + } else { + fuse_ll_ops[in->opcode].func(req, in->nodeid, &iter); + } ++ ++ pthread_rwlock_unlock(&se->init_rwlock); + return; + + reply_err: + fuse_reply_err(req, err); ++ pthread_rwlock_unlock(&se->init_rwlock); + } + + #define LL_OPTION(n, o, v) \ +@@ -2531,6 +2547,7 @@ void fuse_session_destroy(struct fuse_session *se) + se->op.destroy(se->userdata); + } + } ++ pthread_rwlock_destroy(&se->init_rwlock); + pthread_mutex_destroy(&se->lock); + free(se->cuse_data); + if (se->fd != -1) { +@@ -2610,6 +2627,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + list_init_req(&se->list); + list_init_req(&se->interrupts); + fuse_mutex_init(&se->lock); ++ pthread_rwlock_init(&se->init_rwlock, NULL); + + memcpy(&se->op, op, op_size); + se->owner = getuid(); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch b/SOURCES/kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch new file mode 100644 index 0000000..8eabede --- /dev/null +++ b/SOURCES/kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch @@ -0,0 +1,149 @@ +From 69c6a829f8136a8c95ccdf480f2fd0173d64b6ec Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:05 +0100 +Subject: [PATCH 094/116] virtiofsd: prevent fv_queue_thread() vs virtio_loop() + races +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-91-dgilbert@redhat.com> +Patchwork-id: 93544 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 090/112] virtiofsd: prevent fv_queue_thread() vs virtio_loop() races +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +We call into libvhost-user from the virtqueue handler thread and the +vhost-user message processing thread without a lock. There is nothing +protecting the virtqueue handler thread if the vhost-user message +processing thread changes the virtqueue or memory table while it is +running. + +This patch introduces a read-write lock. Virtqueue handler threads are +readers. The vhost-user message processing thread is a writer. This +will allow concurrency for multiqueue in the future while protecting +against fv_queue_thread() vs virtio_loop() races. + +Note that the critical sections could be made smaller but it would be +more invasive and require libvhost-user changes. Let's start simple and +improve performance later, if necessary. Another option would be an +RCU-style approach with lighter-weight primitives. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit e7b337326d594b71b07cd6dbb332c49c122c80a4) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 34 +++++++++++++++++++++++++++++++++- + 1 file changed, 33 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index fb8d6d1..f6242f9 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -59,6 +59,18 @@ struct fv_VuDev { + struct fuse_session *se; + + /* ++ * Either handle virtqueues or vhost-user protocol messages. Don't do ++ * both at the same time since that could lead to race conditions if ++ * virtqueues or memory tables change while another thread is accessing ++ * them. ++ * ++ * The assumptions are: ++ * 1. fv_queue_thread() reads/writes to virtqueues and only reads VuDev. ++ * 2. virtio_loop() reads/writes virtqueues and VuDev. ++ */ ++ pthread_rwlock_t vu_dispatch_rwlock; ++ ++ /* + * The following pair of fields are only accessed in the main + * virtio_loop + */ +@@ -415,6 +427,8 @@ static void *fv_queue_thread(void *opaque) + qi->qidx, qi->kick_fd); + while (1) { + struct pollfd pf[2]; ++ int ret; ++ + pf[0].fd = qi->kick_fd; + pf[0].events = POLLIN; + pf[0].revents = 0; +@@ -461,6 +475,9 @@ static void *fv_queue_thread(void *opaque) + fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n"); + break; + } ++ /* Mutual exclusion with virtio_loop() */ ++ ret = pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); ++ assert(ret == 0); /* there is no possible error case */ + /* out is from guest, in is too guest */ + unsigned int in_bytes, out_bytes; + vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0); +@@ -469,6 +486,7 @@ static void *fv_queue_thread(void *opaque) + "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n", + __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); + ++ + while (1) { + bool allocated_bufv = false; + struct fuse_bufvec bufv; +@@ -597,6 +615,8 @@ static void *fv_queue_thread(void *opaque) + free(elem); + elem = NULL; + } ++ ++ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); + } + out: + pthread_mutex_destroy(&ch.lock); +@@ -711,6 +731,8 @@ int virtio_loop(struct fuse_session *se) + + while (!fuse_session_exited(se)) { + struct pollfd pf[1]; ++ bool ok; ++ int ret; + pf[0].fd = se->vu_socketfd; + pf[0].events = POLLIN; + pf[0].revents = 0; +@@ -735,7 +757,15 @@ int virtio_loop(struct fuse_session *se) + } + assert(pf[0].revents & POLLIN); + fuse_log(FUSE_LOG_DEBUG, "%s: Got VU event\n", __func__); +- if (!vu_dispatch(&se->virtio_dev->dev)) { ++ /* Mutual exclusion with fv_queue_thread() */ ++ ret = pthread_rwlock_wrlock(&se->virtio_dev->vu_dispatch_rwlock); ++ assert(ret == 0); /* there is no possible error case */ ++ ++ ok = vu_dispatch(&se->virtio_dev->dev); ++ ++ pthread_rwlock_unlock(&se->virtio_dev->vu_dispatch_rwlock); ++ ++ if (!ok) { + fuse_log(FUSE_LOG_ERR, "%s: vu_dispatch failed\n", __func__); + break; + } +@@ -877,6 +907,7 @@ int virtio_session_mount(struct fuse_session *se) + + se->vu_socketfd = data_sock; + se->virtio_dev->se = se; ++ pthread_rwlock_init(&se->virtio_dev->vu_dispatch_rwlock, NULL); + vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, fv_set_watch, + fv_remove_watch, &fv_iface); + +@@ -892,6 +923,7 @@ void virtio_session_close(struct fuse_session *se) + } + + free(se->virtio_dev->qi); ++ pthread_rwlock_destroy(&se->virtio_dev->vu_dispatch_rwlock); + free(se->virtio_dev); + se->virtio_dev = NULL; + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch b/SOURCES/kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch new file mode 100644 index 0000000..acafa41 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch @@ -0,0 +1,147 @@ +From 2e58ff6978f8433fc8672d2e357c6f0f5f36d24f Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:07 +0100 +Subject: [PATCH 096/116] virtiofsd: prevent races with lo_dirp_put() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-93-dgilbert@redhat.com> +Patchwork-id: 93546 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 092/112] virtiofsd: prevent races with lo_dirp_put() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Introduce lo_dirp_put() so that FUSE_RELEASEDIR does not cause +use-after-free races with other threads that are accessing lo_dirp. + +Also make lo_releasedir() atomic to prevent FUSE_RELEASEDIR racing with +itself. This prevents double-frees. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit acefdde73b403576a241ebd8dbe8431ddc0d9442) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 41 ++++++++++++++++++++++++++++++++++------ + 1 file changed, 35 insertions(+), 6 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 690edbc..2d703b5 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1284,11 +1284,28 @@ static void lo_readlink(fuse_req_t req, fuse_ino_t ino) + } + + struct lo_dirp { ++ gint refcount; + DIR *dp; + struct dirent *entry; + off_t offset; + }; + ++static void lo_dirp_put(struct lo_dirp **dp) ++{ ++ struct lo_dirp *d = *dp; ++ ++ if (!d) { ++ return; ++ } ++ *dp = NULL; ++ ++ if (g_atomic_int_dec_and_test(&d->refcount)) { ++ closedir(d->dp); ++ free(d); ++ } ++} ++ ++/* Call lo_dirp_put() on the return value when no longer needed */ + static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi) + { + struct lo_data *lo = lo_data(req); +@@ -1296,6 +1313,9 @@ static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi) + + pthread_mutex_lock(&lo->mutex); + elem = lo_map_get(&lo->dirp_map, fi->fh); ++ if (elem) { ++ g_atomic_int_inc(&elem->dirp->refcount); ++ } + pthread_mutex_unlock(&lo->mutex); + if (!elem) { + return NULL; +@@ -1331,6 +1351,7 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, + d->offset = 0; + d->entry = NULL; + ++ g_atomic_int_set(&d->refcount, 1); /* paired with lo_releasedir() */ + pthread_mutex_lock(&lo->mutex); + fh = lo_add_dirp_mapping(req, d); + pthread_mutex_unlock(&lo->mutex); +@@ -1364,7 +1385,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + off_t offset, struct fuse_file_info *fi, int plus) + { + struct lo_data *lo = lo_data(req); +- struct lo_dirp *d; ++ struct lo_dirp *d = NULL; + struct lo_inode *dinode; + char *buf = NULL; + char *p; +@@ -1454,6 +1475,8 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + + err = 0; + error: ++ lo_dirp_put(&d); ++ + /* + * If there's an error, we can only signal it if we haven't stored + * any entries yet - otherwise we'd end up with wrong lookup +@@ -1484,22 +1507,25 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) + { + struct lo_data *lo = lo_data(req); ++ struct lo_map_elem *elem; + struct lo_dirp *d; + + (void)ino; + +- d = lo_dirp(req, fi); +- if (!d) { ++ pthread_mutex_lock(&lo->mutex); ++ elem = lo_map_get(&lo->dirp_map, fi->fh); ++ if (!elem) { ++ pthread_mutex_unlock(&lo->mutex); + fuse_reply_err(req, EBADF); + return; + } + +- pthread_mutex_lock(&lo->mutex); ++ d = elem->dirp; + lo_map_remove(&lo->dirp_map, fi->fh); + pthread_mutex_unlock(&lo->mutex); + +- closedir(d->dp); +- free(d); ++ lo_dirp_put(&d); /* paired with lo_opendir() */ ++ + fuse_reply_err(req, 0); + } + +@@ -1710,6 +1736,9 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, + } else { + res = fsync(fd); + } ++ ++ lo_dirp_put(&d); ++ + fuse_reply_err(req, res == -1 ? errno : 0); + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch b/SOURCES/kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch new file mode 100644 index 0000000..056559d --- /dev/null +++ b/SOURCES/kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch @@ -0,0 +1,469 @@ +From 5c9bbd00e8f8c944d9e8e22e7d1cf08cb8fddd6b Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:37 +0100 +Subject: [PATCH 066/116] virtiofsd: print log only when priority is high + enough +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-63-dgilbert@redhat.com> +Patchwork-id: 93518 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 062/112] virtiofsd: print log only when priority is high enough +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Eryu Guan + +Introduce "-o log_level=" command line option to specify current log +level (priority), valid values are "debug info warn err", e.g. + + ./virtiofsd -o log_level=debug ... + +So only log priority higher than "debug" will be printed to +stderr/syslog. And the default level is info. + +The "-o debug"/"-d" options are kept, and imply debug log level. + +Signed-off-by: Eryu Guan +dgilbert: Reworked for libfuse's log_func +Signed-off-by: Dr. David Alan Gilbert +with fix by: +Signed-off-by: Xiao Yang +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit d240314a1a18a1d914af1b5763fe8c9a572e6409) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 75 ++++++++++--------------- + tools/virtiofsd/fuse_lowlevel.h | 1 + + tools/virtiofsd/helper.c | 8 ++- + tools/virtiofsd/passthrough_ll.c | 118 ++++++++++++++++----------------------- + 4 files changed, 87 insertions(+), 115 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 6ceb33d..a7a1968 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -158,19 +158,17 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, + struct fuse_out_header *out = iov[0].iov_base; + + out->len = iov_length(iov, count); +- if (se->debug) { +- if (out->unique == 0) { +- fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error, +- out->len); +- } else if (out->error) { +- fuse_log(FUSE_LOG_DEBUG, +- " unique: %llu, error: %i (%s), outsize: %i\n", +- (unsigned long long)out->unique, out->error, +- strerror(-out->error), out->len); +- } else { +- fuse_log(FUSE_LOG_DEBUG, " unique: %llu, success, outsize: %i\n", +- (unsigned long long)out->unique, out->len); +- } ++ if (out->unique == 0) { ++ fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error, ++ out->len); ++ } else if (out->error) { ++ fuse_log(FUSE_LOG_DEBUG, ++ " unique: %llu, error: %i (%s), outsize: %i\n", ++ (unsigned long long)out->unique, out->error, ++ strerror(-out->error), out->len); ++ } else { ++ fuse_log(FUSE_LOG_DEBUG, " unique: %llu, success, outsize: %i\n", ++ (unsigned long long)out->unique, out->len); + } + + if (fuse_lowlevel_is_virtio(se)) { +@@ -1662,10 +1660,8 @@ static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, + return; + } + +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", +- (unsigned long long)arg->unique); +- } ++ fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", ++ (unsigned long long)arg->unique); + + req->u.i.unique = arg->unique; + +@@ -1901,13 +1897,10 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, + } + } + +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); +- if (arg->major == 7 && arg->minor >= 6) { +- fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); +- fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", +- arg->max_readahead); +- } ++ fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); ++ if (arg->major == 7 && arg->minor >= 6) { ++ fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); ++ fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", arg->max_readahead); + } + se->conn.proto_major = arg->major; + se->conn.proto_minor = arg->minor; +@@ -2116,19 +2109,14 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, + outarg.congestion_threshold = se->conn.congestion_threshold; + outarg.time_gran = se->conn.time_gran; + +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, +- outarg.minor); +- fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); +- fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", +- outarg.max_readahead); +- fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); +- fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", +- outarg.max_background); +- fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", +- outarg.congestion_threshold); +- fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); +- } ++ fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor); ++ fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); ++ fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", outarg.max_readahead); ++ fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); ++ fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", outarg.max_background); ++ fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", ++ outarg.congestion_threshold); ++ fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); + + send_reply_ok(req, &outarg, outargsize); + } +@@ -2407,14 +2395,11 @@ void fuse_session_process_buf_int(struct fuse_session *se, + in = fuse_mbuf_iter_advance(&iter, sizeof(*in)); + assert(in); /* caller guarantees the input buffer is large enough */ + +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, +- "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, " +- "pid: %u\n", +- (unsigned long long)in->unique, +- opname((enum fuse_opcode)in->opcode), in->opcode, +- (unsigned long long)in->nodeid, buf->size, in->pid); +- } ++ fuse_log( ++ FUSE_LOG_DEBUG, ++ "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n", ++ (unsigned long long)in->unique, opname((enum fuse_opcode)in->opcode), ++ in->opcode, (unsigned long long)in->nodeid, buf->size, in->pid); + + req = fuse_ll_alloc_req(se); + if (req == NULL) { +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index f2750bc..138041e 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1796,6 +1796,7 @@ struct fuse_cmdline_opts { + int show_help; + int print_capabilities; + int syslog; ++ int log_level; + unsigned int max_idle_threads; + }; + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 9692ef9..6d50a46 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -34,7 +34,6 @@ + t, offsetof(struct fuse_cmdline_opts, p), v \ + } + +- + static const struct fuse_opt fuse_helper_opts[] = { + FUSE_HELPER_OPT("-h", show_help), + FUSE_HELPER_OPT("--help", show_help), +@@ -55,6 +54,10 @@ static const struct fuse_opt fuse_helper_opts[] = { + FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), + FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), + FUSE_HELPER_OPT("--syslog", syslog), ++ FUSE_HELPER_OPT_VALUE("log_level=debug", log_level, FUSE_LOG_DEBUG), ++ FUSE_HELPER_OPT_VALUE("log_level=info", log_level, FUSE_LOG_INFO), ++ FUSE_HELPER_OPT_VALUE("log_level=warn", log_level, FUSE_LOG_WARNING), ++ FUSE_HELPER_OPT_VALUE("log_level=err", log_level, FUSE_LOG_ERR), + FUSE_OPT_END + }; + +@@ -142,6 +145,9 @@ void fuse_cmdline_help(void) + " --syslog log to syslog (default stderr)\n" + " -f foreground operation\n" + " --daemonize run in background\n" ++ " -o log_level= log level, default to \"info\"\n" ++ " level could be one of \"debug, " ++ "info, warn, err\"\n" + " -o max_idle_threads the maximum number of idle worker " + "threads\n" + " allowed (default: 10)\n" +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 0372aca..ff6910f 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -37,6 +37,7 @@ + + #include "qemu/osdep.h" + #include "fuse_virtio.h" ++#include "fuse_log.h" + #include "fuse_lowlevel.h" + #include + #include +@@ -140,6 +141,7 @@ static const struct fuse_opt lo_opts[] = { + FUSE_OPT_END + }; + static bool use_syslog = false; ++static int current_log_level; + + static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); + +@@ -458,11 +460,6 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino) + return inode ? inode->fd : -1; + } + +-static bool lo_debug(fuse_req_t req) +-{ +- return lo_data(req)->debug != 0; +-} +- + static void lo_init(void *userdata, struct fuse_conn_info *conn) + { + struct lo_data *lo = (struct lo_data *)userdata; +@@ -472,15 +469,11 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) + } + + if (lo->writeback && conn->capable & FUSE_CAP_WRITEBACK_CACHE) { +- if (lo->debug) { +- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); + conn->want |= FUSE_CAP_WRITEBACK_CACHE; + } + if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { +- if (lo->debug) { +- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); + conn->want |= FUSE_CAP_FLOCK_LOCKS; + } + } +@@ -823,10 +816,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + } + e->ino = inode->fuse_ino; + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +- (unsigned long long)parent, name, (unsigned long long)e->ino); +- } ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, ++ name, (unsigned long long)e->ino); + + return 0; + +@@ -843,10 +834,8 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) + struct fuse_entry_param e; + int err; + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", +- parent, name); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", parent, ++ name); + + /* + * Don't use is_safe_path_component(), allow "." and ".." for NFS export +@@ -971,10 +960,8 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + goto out; + } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +- (unsigned long long)parent, name, (unsigned long long)e.ino); +- } ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, ++ name, (unsigned long long)e.ino); + + fuse_reply_entry(req, &e); + return; +@@ -1074,10 +1061,8 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + pthread_mutex_unlock(&lo->mutex); + e.ino = inode->fuse_ino; + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +- (unsigned long long)parent, name, (unsigned long long)e.ino); +- } ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, ++ name, (unsigned long long)e.ino); + + fuse_reply_entry(req, &e); + return; +@@ -1171,11 +1156,9 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + return; + } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", +- (unsigned long long)ino, (unsigned long long)inode->refcount, +- (unsigned long long)nlookup); +- } ++ fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", ++ (unsigned long long)ino, (unsigned long long)inode->refcount, ++ (unsigned long long)nlookup); + + unref_inode(lo, inode, nlookup); + } +@@ -1445,10 +1428,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + int err; + struct lo_cred old = {}; + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", +- parent, name); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", parent, ++ name); + + if (!is_safe_path_component(name)) { + fuse_reply_err(req, EINVAL); +@@ -1525,10 +1506,8 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + char buf[64]; + struct lo_data *lo = lo_data(req); + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, +- fi->flags); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, ++ fi->flags); + + /* + * With writeback cache, kernel may send read requests even +@@ -1644,12 +1623,10 @@ static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, + { + struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, +- "lo_read(ino=%" PRIu64 ", size=%zd, " +- "off=%lu)\n", +- ino, size, (unsigned long)offset); +- } ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_read(ino=%" PRIu64 ", size=%zd, " ++ "off=%lu)\n", ++ ino, size, (unsigned long)offset); + + buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; + buf.buf[0].fd = lo_fi_fd(req, fi); +@@ -1671,11 +1648,9 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, + out_buf.buf[0].fd = lo_fi_fd(req, fi); + out_buf.buf[0].pos = off; + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, +- "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, +- out_buf.buf[0].size, (unsigned long)off); +- } ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, ++ out_buf.buf[0].size, (unsigned long)off); + + /* + * If kill_priv is set, drop CAP_FSETID which should lead to kernel +@@ -1774,11 +1749,8 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + goto out; + } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, +- "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", ino, name, +- size); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", ++ ino, name, size); + + if (inode->is_symlink) { + /* Sorry, no race free way to getxattr on symlink. */ +@@ -1852,10 +1824,8 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + goto out; + } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", +- ino, size); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino, ++ size); + + if (inode->is_symlink) { + /* Sorry, no race free way to listxattr on symlink. */ +@@ -1929,11 +1899,8 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + goto out; + } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, +- "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", +- ino, name, value, size); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 ++ ", name=%s value=%s size=%zd)\n", ino, name, value, size); + + if (inode->is_symlink) { + /* Sorry, no race free way to setxattr on symlink. */ +@@ -1978,10 +1945,8 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) + goto out; + } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", +- ino, name); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino, ++ name); + + if (inode->is_symlink) { + /* Sorry, no race free way to setxattr on symlink. */ +@@ -2303,6 +2268,10 @@ static void setup_nofile_rlimit(void) + + static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) + { ++ if (current_log_level < level) { ++ return; ++ } ++ + if (use_syslog) { + int priority = LOG_ERR; + switch (level) { +@@ -2401,8 +2370,19 @@ int main(int argc, char *argv[]) + return 1; + } + ++ /* ++ * log_level is 0 if not configured via cmd options (0 is LOG_EMERG, ++ * and we don't use this log level). ++ */ ++ if (opts.log_level != 0) { ++ current_log_level = opts.log_level; ++ } + lo.debug = opts.debug; ++ if (lo.debug) { ++ current_log_level = FUSE_LOG_DEBUG; ++ } + lo.root.refcount = 2; ++ + if (lo.source) { + struct stat stat; + int res; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-process-requests-in-a-thread-pool.patch b/SOURCES/kvm-virtiofsd-process-requests-in-a-thread-pool.patch new file mode 100644 index 0000000..87fff99 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-process-requests-in-a-thread-pool.patch @@ -0,0 +1,533 @@ +From b0db5e666aaa43eadff3e60a1ada704f33b03074 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:19 +0100 +Subject: [PATCH 108/116] virtiofsd: process requests in a thread pool +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-105-dgilbert@redhat.com> +Patchwork-id: 93554 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 104/112] virtiofsd: process requests in a thread pool +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Introduce a thread pool so that fv_queue_thread() just pops +VuVirtqElements and hands them to the thread pool. For the time being +only one worker thread is allowed since passthrough_ll.c is not +thread-safe yet. Future patches will lift this restriction so that +multiple FUSE requests can be processed in parallel. + +The main new concept is struct FVRequest, which contains both +VuVirtqElement and struct fuse_chan. We now have fv_VuDev for a device, +fv_QueueInfo for a virtqueue, and FVRequest for a request. Some of +fv_QueueInfo's fields are moved into FVRequest because they are +per-request. The name FVRequest conforms to QEMU coding style and I +expect the struct fv_* types will be renamed in a future refactoring. + +This patch series is not optimal. fbuf reuse is dropped so each request +does malloc(se->bufsize), but there is no clean and cheap way to keep +this with a thread pool. The vq_lock mutex is held for longer than +necessary, especially during the eventfd_write() syscall. Performance +can be improved in the future. + +prctl(2) had to be added to the seccomp whitelist because glib invokes +it. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit a3d756c5aecccc4c0e51060a7e2f1c87bf8f1180) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 359 +++++++++++++++++++++++------------------- + 1 file changed, 201 insertions(+), 158 deletions(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index f6242f9..0dcf2ef 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -22,6 +22,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -37,17 +38,28 @@ + struct fv_VuDev; + struct fv_QueueInfo { + pthread_t thread; ++ /* ++ * This lock protects the VuVirtq preventing races between ++ * fv_queue_thread() and fv_queue_worker(). ++ */ ++ pthread_mutex_t vq_lock; ++ + struct fv_VuDev *virtio_dev; + + /* Our queue index, corresponds to array position */ + int qidx; + int kick_fd; + int kill_fd; /* For killing the thread */ ++}; + +- /* The element for the command currently being processed */ +- VuVirtqElement *qe; ++/* A FUSE request */ ++typedef struct { ++ VuVirtqElement elem; ++ struct fuse_chan ch; ++ ++ /* Used to complete requests that involve no reply */ + bool reply_sent; +-}; ++} FVRequest; + + /* + * We pass the dev element into libvhost-user +@@ -191,8 +203,11 @@ static void copy_iov(struct iovec *src_iov, int src_count, + int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, + struct iovec *iov, int count) + { +- VuVirtqElement *elem; +- VuVirtq *q; ++ FVRequest *req = container_of(ch, FVRequest, ch); ++ struct fv_QueueInfo *qi = ch->qi; ++ VuDev *dev = &se->virtio_dev->dev; ++ VuVirtq *q = vu_get_queue(dev, qi->qidx); ++ VuVirtqElement *elem = &req->elem; + int ret = 0; + + assert(count >= 1); +@@ -205,11 +220,7 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, + + /* unique == 0 is notification, which we don't support */ + assert(out->unique); +- /* For virtio we always have ch */ +- assert(ch); +- assert(!ch->qi->reply_sent); +- elem = ch->qi->qe; +- q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; ++ assert(!req->reply_sent); + + /* The 'in' part of the elem is to qemu */ + unsigned int in_num = elem->in_num; +@@ -236,9 +247,15 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, + } + + copy_iov(iov, count, in_sg, in_num, tosend_len); +- vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); +- vu_queue_notify(&se->virtio_dev->dev, q); +- ch->qi->reply_sent = true; ++ ++ pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); ++ pthread_mutex_lock(&qi->vq_lock); ++ vu_queue_push(dev, q, elem, tosend_len); ++ vu_queue_notify(dev, q); ++ pthread_mutex_unlock(&qi->vq_lock); ++ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); ++ ++ req->reply_sent = true; + + err: + return ret; +@@ -254,9 +271,12 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + struct iovec *iov, int count, struct fuse_bufvec *buf, + size_t len) + { ++ FVRequest *req = container_of(ch, FVRequest, ch); ++ struct fv_QueueInfo *qi = ch->qi; ++ VuDev *dev = &se->virtio_dev->dev; ++ VuVirtq *q = vu_get_queue(dev, qi->qidx); ++ VuVirtqElement *elem = &req->elem; + int ret = 0; +- VuVirtqElement *elem; +- VuVirtq *q; + + assert(count >= 1); + assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); +@@ -275,11 +295,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + /* unique == 0 is notification which we don't support */ + assert(out->unique); + +- /* For virtio we always have ch */ +- assert(ch); +- assert(!ch->qi->reply_sent); +- elem = ch->qi->qe; +- q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; ++ assert(!req->reply_sent); + + /* The 'in' part of the elem is to qemu */ + unsigned int in_num = elem->in_num; +@@ -395,33 +411,175 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + + ret = 0; + +- vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); +- vu_queue_notify(&se->virtio_dev->dev, q); ++ pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); ++ pthread_mutex_lock(&qi->vq_lock); ++ vu_queue_push(dev, q, elem, tosend_len); ++ vu_queue_notify(dev, q); ++ pthread_mutex_unlock(&qi->vq_lock); ++ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); + + err: + if (ret == 0) { +- ch->qi->reply_sent = true; ++ req->reply_sent = true; + } + + return ret; + } + ++/* Process one FVRequest in a thread pool */ ++static void fv_queue_worker(gpointer data, gpointer user_data) ++{ ++ struct fv_QueueInfo *qi = user_data; ++ struct fuse_session *se = qi->virtio_dev->se; ++ struct VuDev *dev = &qi->virtio_dev->dev; ++ FVRequest *req = data; ++ VuVirtqElement *elem = &req->elem; ++ struct fuse_buf fbuf = {}; ++ bool allocated_bufv = false; ++ struct fuse_bufvec bufv; ++ struct fuse_bufvec *pbufv; ++ ++ assert(se->bufsize > sizeof(struct fuse_in_header)); ++ ++ /* ++ * An element contains one request and the space to send our response ++ * They're spread over multiple descriptors in a scatter/gather set ++ * and we can't trust the guest to keep them still; so copy in/out. ++ */ ++ fbuf.mem = malloc(se->bufsize); ++ assert(fbuf.mem); ++ ++ fuse_mutex_init(&req->ch.lock); ++ req->ch.fd = -1; ++ req->ch.qi = qi; ++ ++ /* The 'out' part of the elem is from qemu */ ++ unsigned int out_num = elem->out_num; ++ struct iovec *out_sg = elem->out_sg; ++ size_t out_len = iov_size(out_sg, out_num); ++ fuse_log(FUSE_LOG_DEBUG, ++ "%s: elem %d: with %d out desc of length %zd\n", ++ __func__, elem->index, out_num, out_len); ++ ++ /* ++ * The elem should contain a 'fuse_in_header' (in to fuse) ++ * plus the data based on the len in the header. ++ */ ++ if (out_len < sizeof(struct fuse_in_header)) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n", ++ __func__, elem->index); ++ assert(0); /* TODO */ ++ } ++ if (out_len > se->bufsize) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", __func__, ++ elem->index); ++ assert(0); /* TODO */ ++ } ++ /* Copy just the first element and look at it */ ++ copy_from_iov(&fbuf, 1, out_sg); ++ ++ pbufv = NULL; /* Compiler thinks an unitialised path */ ++ if (out_num > 2 && ++ out_sg[0].iov_len == sizeof(struct fuse_in_header) && ++ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && ++ out_sg[1].iov_len == sizeof(struct fuse_write_in)) { ++ /* ++ * For a write we don't actually need to copy the ++ * data, we can just do it straight out of guest memory ++ * but we must still copy the headers in case the guest ++ * was nasty and changed them while we were using them. ++ */ ++ fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__); ++ ++ /* copy the fuse_write_in header afte rthe fuse_in_header */ ++ fbuf.mem += out_sg->iov_len; ++ copy_from_iov(&fbuf, 1, out_sg + 1); ++ fbuf.mem -= out_sg->iov_len; ++ fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; ++ ++ /* Allocate the bufv, with space for the rest of the iov */ ++ pbufv = malloc(sizeof(struct fuse_bufvec) + ++ sizeof(struct fuse_buf) * (out_num - 2)); ++ if (!pbufv) { ++ fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", ++ __func__); ++ goto out; ++ } ++ ++ allocated_bufv = true; ++ pbufv->count = 1; ++ pbufv->buf[0] = fbuf; ++ ++ size_t iovindex, pbufvindex; ++ iovindex = 2; /* 2 headers, separate iovs */ ++ pbufvindex = 1; /* 2 headers, 1 fusebuf */ ++ ++ for (; iovindex < out_num; iovindex++, pbufvindex++) { ++ pbufv->count++; ++ pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ ++ pbufv->buf[pbufvindex].flags = 0; ++ pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; ++ pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; ++ } ++ } else { ++ /* Normal (non fast write) path */ ++ ++ /* Copy the rest of the buffer */ ++ fbuf.mem += out_sg->iov_len; ++ copy_from_iov(&fbuf, out_num - 1, out_sg + 1); ++ fbuf.mem -= out_sg->iov_len; ++ fbuf.size = out_len; ++ ++ /* TODO! Endianness of header */ ++ ++ /* TODO: Add checks for fuse_session_exited */ ++ bufv.buf[0] = fbuf; ++ bufv.count = 1; ++ pbufv = &bufv; ++ } ++ pbufv->idx = 0; ++ pbufv->off = 0; ++ fuse_session_process_buf_int(se, pbufv, &req->ch); ++ ++out: ++ if (allocated_bufv) { ++ free(pbufv); ++ } ++ ++ /* If the request has no reply, still recycle the virtqueue element */ ++ if (!req->reply_sent) { ++ struct VuVirtq *q = vu_get_queue(dev, qi->qidx); ++ ++ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", __func__, ++ elem->index); ++ ++ pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); ++ pthread_mutex_lock(&qi->vq_lock); ++ vu_queue_push(dev, q, elem, 0); ++ vu_queue_notify(dev, q); ++ pthread_mutex_unlock(&qi->vq_lock); ++ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); ++ } ++ ++ pthread_mutex_destroy(&req->ch.lock); ++ free(fbuf.mem); ++ free(req); ++} ++ + /* Thread function for individual queues, created when a queue is 'started' */ + static void *fv_queue_thread(void *opaque) + { + struct fv_QueueInfo *qi = opaque; + struct VuDev *dev = &qi->virtio_dev->dev; + struct VuVirtq *q = vu_get_queue(dev, qi->qidx); +- struct fuse_session *se = qi->virtio_dev->se; +- struct fuse_chan ch; +- struct fuse_buf fbuf; ++ GThreadPool *pool; + +- fbuf.mem = NULL; +- fbuf.flags = 0; +- +- fuse_mutex_init(&ch.lock); +- ch.fd = (int)0xdaff0d111; +- ch.qi = qi; ++ pool = g_thread_pool_new(fv_queue_worker, qi, 1 /* TODO max_threads */, ++ TRUE, NULL); ++ if (!pool) { ++ fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__); ++ return NULL; ++ } + + fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, + qi->qidx, qi->kick_fd); +@@ -478,6 +636,7 @@ static void *fv_queue_thread(void *opaque) + /* Mutual exclusion with virtio_loop() */ + ret = pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); + assert(ret == 0); /* there is no possible error case */ ++ pthread_mutex_lock(&qi->vq_lock); + /* out is from guest, in is too guest */ + unsigned int in_bytes, out_bytes; + vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0); +@@ -486,141 +645,22 @@ static void *fv_queue_thread(void *opaque) + "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n", + __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); + +- + while (1) { +- bool allocated_bufv = false; +- struct fuse_bufvec bufv; +- struct fuse_bufvec *pbufv; +- +- /* +- * An element contains one request and the space to send our +- * response They're spread over multiple descriptors in a +- * scatter/gather set and we can't trust the guest to keep them +- * still; so copy in/out. +- */ +- VuVirtqElement *elem = vu_queue_pop(dev, q, sizeof(VuVirtqElement)); +- if (!elem) { ++ FVRequest *req = vu_queue_pop(dev, q, sizeof(FVRequest)); ++ if (!req) { + break; + } + +- qi->qe = elem; +- qi->reply_sent = false; ++ req->reply_sent = false; + +- if (!fbuf.mem) { +- fbuf.mem = malloc(se->bufsize); +- assert(fbuf.mem); +- assert(se->bufsize > sizeof(struct fuse_in_header)); +- } +- /* The 'out' part of the elem is from qemu */ +- unsigned int out_num = elem->out_num; +- struct iovec *out_sg = elem->out_sg; +- size_t out_len = iov_size(out_sg, out_num); +- fuse_log(FUSE_LOG_DEBUG, +- "%s: elem %d: with %d out desc of length %zd\n", __func__, +- elem->index, out_num, out_len); +- +- /* +- * The elem should contain a 'fuse_in_header' (in to fuse) +- * plus the data based on the len in the header. +- */ +- if (out_len < sizeof(struct fuse_in_header)) { +- fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n", +- __func__, elem->index); +- assert(0); /* TODO */ +- } +- if (out_len > se->bufsize) { +- fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", +- __func__, elem->index); +- assert(0); /* TODO */ +- } +- /* Copy just the first element and look at it */ +- copy_from_iov(&fbuf, 1, out_sg); +- +- if (out_num > 2 && +- out_sg[0].iov_len == sizeof(struct fuse_in_header) && +- ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && +- out_sg[1].iov_len == sizeof(struct fuse_write_in)) { +- /* +- * For a write we don't actually need to copy the +- * data, we can just do it straight out of guest memory +- * but we must still copy the headers in case the guest +- * was nasty and changed them while we were using them. +- */ +- fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__); +- +- /* copy the fuse_write_in header after the fuse_in_header */ +- fbuf.mem += out_sg->iov_len; +- copy_from_iov(&fbuf, 1, out_sg + 1); +- fbuf.mem -= out_sg->iov_len; +- fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; +- +- /* Allocate the bufv, with space for the rest of the iov */ +- allocated_bufv = true; +- pbufv = malloc(sizeof(struct fuse_bufvec) + +- sizeof(struct fuse_buf) * (out_num - 2)); +- if (!pbufv) { +- vu_queue_unpop(dev, q, elem, 0); +- free(elem); +- fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", +- __func__); +- goto out; +- } +- +- pbufv->count = 1; +- pbufv->buf[0] = fbuf; +- +- size_t iovindex, pbufvindex; +- iovindex = 2; /* 2 headers, separate iovs */ +- pbufvindex = 1; /* 2 headers, 1 fusebuf */ +- +- for (; iovindex < out_num; iovindex++, pbufvindex++) { +- pbufv->count++; +- pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ +- pbufv->buf[pbufvindex].flags = 0; +- pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; +- pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; +- } +- } else { +- /* Normal (non fast write) path */ +- +- /* Copy the rest of the buffer */ +- fbuf.mem += out_sg->iov_len; +- copy_from_iov(&fbuf, out_num - 1, out_sg + 1); +- fbuf.mem -= out_sg->iov_len; +- fbuf.size = out_len; +- +- /* TODO! Endianness of header */ +- +- /* TODO: Add checks for fuse_session_exited */ +- bufv.buf[0] = fbuf; +- bufv.count = 1; +- pbufv = &bufv; +- } +- pbufv->idx = 0; +- pbufv->off = 0; +- fuse_session_process_buf_int(se, pbufv, &ch); +- +- if (allocated_bufv) { +- free(pbufv); +- } +- +- if (!qi->reply_sent) { +- fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", +- __func__, elem->index); +- /* I think we've still got to recycle the element */ +- vu_queue_push(dev, q, elem, 0); +- vu_queue_notify(dev, q); +- } +- qi->qe = NULL; +- free(elem); +- elem = NULL; ++ g_thread_pool_push(pool, req, NULL); + } + ++ pthread_mutex_unlock(&qi->vq_lock); + pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); + } +-out: +- pthread_mutex_destroy(&ch.lock); +- free(fbuf.mem); ++ ++ g_thread_pool_free(pool, FALSE, TRUE); + + return NULL; + } +@@ -643,6 +683,7 @@ static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx) + fuse_log(FUSE_LOG_ERR, "%s: Failed to join thread idx %d err %d\n", + __func__, qidx, ret); + } ++ pthread_mutex_destroy(&ourqi->vq_lock); + close(ourqi->kill_fd); + ourqi->kick_fd = -1; + free(vud->qi[qidx]); +@@ -696,6 +737,8 @@ static void fv_queue_set_started(VuDev *dev, int qidx, bool started) + + ourqi->kill_fd = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE); + assert(ourqi->kill_fd != -1); ++ pthread_mutex_init(&ourqi->vq_lock, NULL); ++ + if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) { + fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n", + __func__, qidx); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-remove-mountpoint-dummy-argument.patch b/SOURCES/kvm-virtiofsd-remove-mountpoint-dummy-argument.patch new file mode 100644 index 0000000..181e32d --- /dev/null +++ b/SOURCES/kvm-virtiofsd-remove-mountpoint-dummy-argument.patch @@ -0,0 +1,159 @@ +From a8a1835a82510be7d2d6edcc28a60e506a2cedad Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:46 +0100 +Subject: [PATCH 015/116] virtiofsd: remove mountpoint dummy argument +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-12-dgilbert@redhat.com> +Patchwork-id: 93466 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 011/112] virtiofsd: remove mountpoint dummy argument +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Classic FUSE file system daemons take a mountpoint argument but +virtiofsd exposes a vhost-user UNIX domain socket instead. The +mountpoint argument is not used by virtiofsd but the user is still +required to pass a dummy argument on the command-line. + +Remove the mountpoint argument to clean up the command-line. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 67aab02272f6cb47c56420f60b370c184961b5ca) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 2 +- + tools/virtiofsd/fuse_lowlevel.h | 4 +--- + tools/virtiofsd/helper.c | 20 +++----------------- + tools/virtiofsd/passthrough_ll.c | 12 ++---------- + 4 files changed, 7 insertions(+), 31 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 5c9cb52..2f32c68 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2455,7 +2455,7 @@ out1: + return NULL; + } + +-int fuse_session_mount(struct fuse_session *se, const char *mountpoint) ++int fuse_session_mount(struct fuse_session *se) + { + int fd; + +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index adb9054..8d8909b 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1863,7 +1863,6 @@ struct fuse_cmdline_opts { + int foreground; + int debug; + int nodefault_subtype; +- char *mountpoint; + int show_version; + int show_help; + unsigned int max_idle_threads; +@@ -1924,12 +1923,11 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + /** + * Mount a FUSE file system. + * +- * @param mountpoint the mount point path + * @param se session object + * + * @return 0 on success, -1 on failure. + **/ +-int fuse_session_mount(struct fuse_session *se, const char *mountpoint); ++int fuse_session_mount(struct fuse_session *se); + + /** + * Enter a single threaded, blocking event loop. +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 5711dd2..5e6f205 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -140,27 +140,13 @@ void fuse_cmdline_help(void) + static int fuse_helper_opt_proc(void *data, const char *arg, int key, + struct fuse_args *outargs) + { ++ (void)data; + (void)outargs; +- struct fuse_cmdline_opts *opts = data; + + switch (key) { + case FUSE_OPT_KEY_NONOPT: +- if (!opts->mountpoint) { +- if (fuse_mnt_parse_fuse_fd(arg) != -1) { +- return fuse_opt_add_opt(&opts->mountpoint, arg); +- } +- +- char mountpoint[PATH_MAX] = ""; +- if (realpath(arg, mountpoint) == NULL) { +- fuse_log(FUSE_LOG_ERR, "fuse: bad mount point `%s': %s\n", arg, +- strerror(errno)); +- return -1; +- } +- return fuse_opt_add_opt(&opts->mountpoint, mountpoint); +- } else { +- fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); +- return -1; +- } ++ fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); ++ return -1; + + default: + /* Pass through unknown options */ +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index c5850ef..9377718 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1297,7 +1297,7 @@ int main(int argc, char *argv[]) + return 1; + } + if (opts.show_help) { +- printf("usage: %s [options] \n\n", argv[0]); ++ printf("usage: %s [options]\n\n", argv[0]); + fuse_cmdline_help(); + fuse_lowlevel_help(); + ret = 0; +@@ -1308,13 +1308,6 @@ int main(int argc, char *argv[]) + goto err_out1; + } + +- if (opts.mountpoint == NULL) { +- printf("usage: %s [options] \n", argv[0]); +- printf(" %s --help\n", argv[0]); +- ret = 1; +- goto err_out1; +- } +- + if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { + return 1; + } +@@ -1374,7 +1367,7 @@ int main(int argc, char *argv[]) + goto err_out2; + } + +- if (fuse_session_mount(se, opts.mountpoint) != 0) { ++ if (fuse_session_mount(se) != 0) { + goto err_out3; + } + +@@ -1393,7 +1386,6 @@ err_out3: + err_out2: + fuse_session_destroy(se); + err_out1: +- free(opts.mountpoint); + fuse_opt_free_args(&args); + + if (lo.root.fd >= 0) { +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-remove-unused-notify-reply-support.patch b/SOURCES/kvm-virtiofsd-remove-unused-notify-reply-support.patch new file mode 100644 index 0000000..98fb968 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-remove-unused-notify-reply-support.patch @@ -0,0 +1,294 @@ +From e5534c0d4b866f61dbafa8d2422a24ab956189c1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:47 +0100 +Subject: [PATCH 016/116] virtiofsd: remove unused notify reply support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-13-dgilbert@redhat.com> +Patchwork-id: 93467 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 012/112] virtiofsd: remove unused notify reply support +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Notify reply support is unused by virtiofsd. The code would need to be +updated to validate input buffer sizes. Remove this unused code since +changes to it are untestable. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 64c6f408a29ef03e9b8da9f5a5d8fd511b0d801e) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 147 +--------------------------------------- + tools/virtiofsd/fuse_lowlevel.h | 47 ------------- + 2 files changed, 1 insertion(+), 193 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 2f32c68..eb0ec49 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -31,12 +31,6 @@ + #define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) + #define OFFSET_MAX 0x7fffffffffffffffLL + +-#define container_of(ptr, type, member) \ +- ({ \ +- const typeof(((type *)0)->member) *__mptr = (ptr); \ +- (type *)((char *)__mptr - offsetof(type, member)); \ +- }) +- + struct fuse_pollhandle { + uint64_t kh; + struct fuse_session *se; +@@ -1862,52 +1856,6 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + send_reply_ok(req, NULL, 0); + } + +-static void list_del_nreq(struct fuse_notify_req *nreq) +-{ +- struct fuse_notify_req *prev = nreq->prev; +- struct fuse_notify_req *next = nreq->next; +- prev->next = next; +- next->prev = prev; +-} +- +-static void list_add_nreq(struct fuse_notify_req *nreq, +- struct fuse_notify_req *next) +-{ +- struct fuse_notify_req *prev = next->prev; +- nreq->next = next; +- nreq->prev = prev; +- prev->next = nreq; +- next->prev = nreq; +-} +- +-static void list_init_nreq(struct fuse_notify_req *nreq) +-{ +- nreq->next = nreq; +- nreq->prev = nreq; +-} +- +-static void do_notify_reply(fuse_req_t req, fuse_ino_t nodeid, +- const void *inarg, const struct fuse_buf *buf) +-{ +- struct fuse_session *se = req->se; +- struct fuse_notify_req *nreq; +- struct fuse_notify_req *head; +- +- pthread_mutex_lock(&se->lock); +- head = &se->notify_list; +- for (nreq = head->next; nreq != head; nreq = nreq->next) { +- if (nreq->unique == req->unique) { +- list_del_nreq(nreq); +- break; +- } +- } +- pthread_mutex_unlock(&se->lock); +- +- if (nreq != head) { +- nreq->reply(nreq, req, nodeid, inarg, buf); +- } +-} +- + static int send_notify_iov(struct fuse_session *se, int notify_code, + struct iovec *iov, int count) + { +@@ -2059,95 +2007,6 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, + return res; + } + +-struct fuse_retrieve_req { +- struct fuse_notify_req nreq; +- void *cookie; +-}; +- +-static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, fuse_req_t req, +- fuse_ino_t ino, const void *inarg, +- const struct fuse_buf *ibuf) +-{ +- struct fuse_session *se = req->se; +- struct fuse_retrieve_req *rreq = +- container_of(nreq, struct fuse_retrieve_req, nreq); +- const struct fuse_notify_retrieve_in *arg = inarg; +- struct fuse_bufvec bufv = { +- .buf[0] = *ibuf, +- .count = 1, +- }; +- +- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { +- bufv.buf[0].mem = PARAM(arg); +- } +- +- bufv.buf[0].size -= +- sizeof(struct fuse_in_header) + sizeof(struct fuse_notify_retrieve_in); +- +- if (bufv.buf[0].size < arg->size) { +- fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); +- fuse_reply_none(req); +- goto out; +- } +- bufv.buf[0].size = arg->size; +- +- if (se->op.retrieve_reply) { +- se->op.retrieve_reply(req, rreq->cookie, ino, arg->offset, &bufv); +- } else { +- fuse_reply_none(req); +- } +-out: +- free(rreq); +-} +- +-int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, +- size_t size, off_t offset, void *cookie) +-{ +- struct fuse_notify_retrieve_out outarg; +- struct iovec iov[2]; +- struct fuse_retrieve_req *rreq; +- int err; +- +- if (!se) { +- return -EINVAL; +- } +- +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { +- return -ENOSYS; +- } +- +- rreq = malloc(sizeof(*rreq)); +- if (rreq == NULL) { +- return -ENOMEM; +- } +- +- pthread_mutex_lock(&se->lock); +- rreq->cookie = cookie; +- rreq->nreq.unique = se->notify_ctr++; +- rreq->nreq.reply = fuse_ll_retrieve_reply; +- list_add_nreq(&rreq->nreq, &se->notify_list); +- pthread_mutex_unlock(&se->lock); +- +- outarg.notify_unique = rreq->nreq.unique; +- outarg.nodeid = ino; +- outarg.offset = offset; +- outarg.size = size; +- outarg.padding = 0; +- +- iov[1].iov_base = &outarg; +- iov[1].iov_len = sizeof(outarg); +- +- err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); +- if (err) { +- pthread_mutex_lock(&se->lock); +- list_del_nreq(&rreq->nreq); +- pthread_mutex_unlock(&se->lock); +- free(rreq); +- } +- +- return err; +-} +- + void *fuse_req_userdata(fuse_req_t req) + { + return req->se->userdata; +@@ -2226,7 +2085,7 @@ static struct { + [FUSE_POLL] = { do_poll, "POLL" }, + [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, + [FUSE_DESTROY] = { do_destroy, "DESTROY" }, +- [FUSE_NOTIFY_REPLY] = { (void *)1, "NOTIFY_REPLY" }, ++ [FUSE_NOTIFY_REPLY] = { NULL, "NOTIFY_REPLY" }, + [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, + [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS" }, + [FUSE_RENAME2] = { do_rename2, "RENAME2" }, +@@ -2333,8 +2192,6 @@ void fuse_session_process_buf_int(struct fuse_session *se, + inarg = (void *)&in[1]; + if (in->opcode == FUSE_WRITE && se->op.write_buf) { + do_write_buf(req, in->nodeid, inarg, buf); +- } else if (in->opcode == FUSE_NOTIFY_REPLY) { +- do_notify_reply(req, in->nodeid, inarg, buf); + } else { + fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); + } +@@ -2437,8 +2294,6 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + + list_init_req(&se->list); + list_init_req(&se->interrupts); +- list_init_nreq(&se->notify_list); +- se->notify_ctr = 1; + fuse_mutex_init(&se->lock); + + memcpy(&se->op, op, op_size); +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 8d8909b..12a84b4 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1085,21 +1085,6 @@ struct fuse_lowlevel_ops { + off_t off, struct fuse_file_info *fi); + + /** +- * Callback function for the retrieve request +- * +- * Valid replies: +- * fuse_reply_none +- * +- * @param req request handle +- * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() +- * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() +- * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() +- * @param bufv the buffer containing the returned data +- */ +- void (*retrieve_reply)(fuse_req_t req, void *cookie, fuse_ino_t ino, +- off_t offset, struct fuse_bufvec *bufv); +- +- /** + * Forget about multiple inodes + * + * See description of the forget function for more +@@ -1726,38 +1711,6 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, + int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, + off_t offset, struct fuse_bufvec *bufv, + enum fuse_buf_copy_flags flags); +-/** +- * Retrieve data from the kernel buffers +- * +- * Retrieve data in the kernel buffers belonging to the given inode. +- * If successful then the retrieve_reply() method will be called with +- * the returned data. +- * +- * Only present pages are returned in the retrieve reply. Retrieving +- * stops when it finds a non-present page and only data prior to that +- * is returned. +- * +- * If this function returns an error, then the retrieve will not be +- * completed and no reply will be sent. +- * +- * This function doesn't change the dirty state of pages in the kernel +- * buffer. For dirty pages the write() method will be called +- * regardless of having been retrieved previously. +- * +- * Added in FUSE protocol version 7.15. If the kernel does not support +- * this (or a newer) version, the function will return -ENOSYS and do +- * nothing. +- * +- * @param se the session object +- * @param ino the inode number +- * @param size the number of bytes to retrieve +- * @param offset the starting offset into the file to retrieve from +- * @param cookie user data to supply to the reply callback +- * @return zero for success, -errno for failure +- */ +-int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, +- size_t size, off_t offset, void *cookie); +- + + /* + * Utility functions +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch b/SOURCES/kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch new file mode 100644 index 0000000..97a0db3 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch @@ -0,0 +1,139 @@ +From e01a6e68d799ed2af0ca3b04d75818ba62b18682 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:08 +0100 +Subject: [PATCH 097/116] virtiofsd: rename inode->refcount to inode->nlookup +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-94-dgilbert@redhat.com> +Patchwork-id: 93547 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 093/112] virtiofsd: rename inode->refcount to inode->nlookup +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +This reference counter plays a specific role in the FUSE protocol. It's +not a generic object reference counter and the FUSE kernel code calls it +"nlookup". + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 1222f015558fc34cea02aa3a5a92de608c82cec8) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 37 +++++++++++++++++++++++++------------ + 1 file changed, 25 insertions(+), 12 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 2d703b5..c819b5f 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -99,7 +99,20 @@ struct lo_inode { + int fd; + bool is_symlink; + struct lo_key key; +- uint64_t refcount; /* protected by lo->mutex */ ++ ++ /* ++ * This counter keeps the inode alive during the FUSE session. ++ * Incremented when the FUSE inode number is sent in a reply ++ * (FUSE_LOOKUP, FUSE_READDIRPLUS, etc). Decremented when an inode is ++ * released by requests like FUSE_FORGET, FUSE_RMDIR, FUSE_RENAME, etc. ++ * ++ * Note that this value is untrusted because the client can manipulate ++ * it arbitrarily using FUSE_FORGET requests. ++ * ++ * Protected by lo->mutex. ++ */ ++ uint64_t nlookup; ++ + fuse_ino_t fuse_ino; + pthread_mutex_t plock_mutex; + GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ +@@ -568,7 +581,7 @@ retry: + if (last == path) { + p = &lo->root; + pthread_mutex_lock(&lo->mutex); +- p->refcount++; ++ p->nlookup++; + pthread_mutex_unlock(&lo->mutex); + } else { + *last = '\0'; +@@ -786,8 +799,8 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) + pthread_mutex_lock(&lo->mutex); + p = g_hash_table_lookup(lo->inodes, &key); + if (p) { +- assert(p->refcount > 0); +- p->refcount++; ++ assert(p->nlookup > 0); ++ p->nlookup++; + } + pthread_mutex_unlock(&lo->mutex); + +@@ -855,7 +868,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + } + + inode->is_symlink = S_ISLNK(e->attr.st_mode); +- inode->refcount = 1; ++ inode->nlookup = 1; + inode->fd = newfd; + newfd = -1; + inode->key.ino = e->attr.st_ino; +@@ -1112,7 +1125,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + } + + pthread_mutex_lock(&lo->mutex); +- inode->refcount++; ++ inode->nlookup++; + pthread_mutex_unlock(&lo->mutex); + e.ino = inode->fuse_ino; + +@@ -1193,9 +1206,9 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, + } + + pthread_mutex_lock(&lo->mutex); +- assert(inode->refcount >= n); +- inode->refcount -= n; +- if (!inode->refcount) { ++ assert(inode->nlookup >= n); ++ inode->nlookup -= n; ++ if (!inode->nlookup) { + lo_map_remove(&lo->ino_map, inode->fuse_ino); + g_hash_table_remove(lo->inodes, &inode->key); + if (g_hash_table_size(inode->posix_locks)) { +@@ -1216,7 +1229,7 @@ static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) + struct lo_inode *inode = value; + struct lo_data *lo = user_data; + +- inode->refcount = 0; ++ inode->nlookup = 0; + lo_map_remove(&lo->ino_map, inode->fuse_ino); + close(inode->fd); + +@@ -1241,7 +1254,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + } + + fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", +- (unsigned long long)ino, (unsigned long long)inode->refcount, ++ (unsigned long long)ino, (unsigned long long)inode->nlookup, + (unsigned long long)nlookup); + + unref_inode_lolocked(lo, inode, nlookup); +@@ -2609,7 +2622,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) + root->fd = fd; + root->key.ino = stat.st_ino; + root->key.dev = stat.st_dev; +- root->refcount = 2; ++ root->nlookup = 2; + } + + static guint lo_key_hash(gconstpointer key) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch b/SOURCES/kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch new file mode 100644 index 0000000..95858f8 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch @@ -0,0 +1,94 @@ +From cfa4550f926e7a07757853f94273f2d1589cb9d3 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:48 +0100 +Subject: [PATCH 077/116] virtiofsd: rename unref_inode() to + unref_inode_lolocked() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-74-dgilbert@redhat.com> +Patchwork-id: 93526 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 073/112] virtiofsd: rename unref_inode() to unref_inode_lolocked() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +Signed-off-by: Miklos Szeredi +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 95d2715791c60b5dc2d22e4eb7b83217273296fa) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 8b1784f..de12e75 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -148,8 +148,8 @@ static const struct fuse_opt lo_opts[] = { + }; + static bool use_syslog = false; + static int current_log_level; +- +-static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); ++static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, ++ uint64_t n); + + static struct { + pthread_mutex_t mutex; +@@ -586,7 +586,7 @@ retry: + return 0; + + fail_unref: +- unref_inode(lo, p, 1); ++ unref_inode_lolocked(lo, p, 1); + fail: + if (retries) { + retries--; +@@ -624,7 +624,7 @@ fallback: + res = lo_parent_and_name(lo, inode, path, &parent); + if (res != -1) { + res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW); +- unref_inode(lo, parent, 1); ++ unref_inode_lolocked(lo, parent, 1); + } + + return res; +@@ -1027,7 +1027,7 @@ fallback: + res = lo_parent_and_name(lo, inode, path, &parent); + if (res != -1) { + res = linkat(parent->fd, path, dfd, name, 0); +- unref_inode(lo, parent, 1); ++ unref_inode_lolocked(lo, parent, 1); + } + + return res; +@@ -1141,7 +1141,8 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) + fuse_reply_err(req, res == -1 ? errno : 0); + } + +-static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) ++static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, ++ uint64_t n) + { + if (!inode) { + return; +@@ -1181,7 +1182,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + (unsigned long long)ino, (unsigned long long)inode->refcount, + (unsigned long long)nlookup); + +- unref_inode(lo, inode, nlookup); ++ unref_inode_lolocked(lo, inode, nlookup); + } + + static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-sandbox-mount-namespace.patch b/SOURCES/kvm-virtiofsd-sandbox-mount-namespace.patch new file mode 100644 index 0000000..ab6f751 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-sandbox-mount-namespace.patch @@ -0,0 +1,166 @@ +From c7ae38df696e4be432fd418c670dcea892b910a7 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:27 +0100 +Subject: [PATCH 056/116] virtiofsd: sandbox mount namespace +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-53-dgilbert@redhat.com> +Patchwork-id: 93504 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 052/112] virtiofsd: sandbox mount namespace +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Use a mount namespace with the shared directory tree mounted at "/" and +no other mounts. + +This prevents symlink escape attacks because symlink targets are +resolved only against the shared directory and cannot go outside it. + +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Peng Tao +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 5baa3b8e95064c2434bd9e2f312edd5e9ae275dc) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 89 ++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 89 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e2e2211..0570453 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -50,6 +50,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1943,6 +1944,58 @@ static void print_capabilities(void) + printf("}\n"); + } + ++/* This magic is based on lxc's lxc_pivot_root() */ ++static void setup_pivot_root(const char *source) ++{ ++ int oldroot; ++ int newroot; ++ ++ oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC); ++ if (oldroot < 0) { ++ fuse_log(FUSE_LOG_ERR, "open(/): %m\n"); ++ exit(1); ++ } ++ ++ newroot = open(source, O_DIRECTORY | O_RDONLY | O_CLOEXEC); ++ if (newroot < 0) { ++ fuse_log(FUSE_LOG_ERR, "open(%s): %m\n", source); ++ exit(1); ++ } ++ ++ if (fchdir(newroot) < 0) { ++ fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n"); ++ exit(1); ++ } ++ ++ if (syscall(__NR_pivot_root, ".", ".") < 0) { ++ fuse_log(FUSE_LOG_ERR, "pivot_root(., .): %m\n"); ++ exit(1); ++ } ++ ++ if (fchdir(oldroot) < 0) { ++ fuse_log(FUSE_LOG_ERR, "fchdir(oldroot): %m\n"); ++ exit(1); ++ } ++ ++ if (mount("", ".", "", MS_SLAVE | MS_REC, NULL) < 0) { ++ fuse_log(FUSE_LOG_ERR, "mount(., MS_SLAVE | MS_REC): %m\n"); ++ exit(1); ++ } ++ ++ if (umount2(".", MNT_DETACH) < 0) { ++ fuse_log(FUSE_LOG_ERR, "umount2(., MNT_DETACH): %m\n"); ++ exit(1); ++ } ++ ++ if (fchdir(newroot) < 0) { ++ fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n"); ++ exit(1); ++ } ++ ++ close(newroot); ++ close(oldroot); ++} ++ + static void setup_proc_self_fd(struct lo_data *lo) + { + lo->proc_self_fd = open("/proc/self/fd", O_PATH); +@@ -1952,6 +2005,39 @@ static void setup_proc_self_fd(struct lo_data *lo) + } + } + ++/* ++ * Make the source directory our root so symlinks cannot escape and no other ++ * files are accessible. ++ */ ++static void setup_mount_namespace(const char *source) ++{ ++ if (unshare(CLONE_NEWNS) != 0) { ++ fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNS): %m\n"); ++ exit(1); ++ } ++ ++ if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) { ++ fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_PRIVATE): %m\n"); ++ exit(1); ++ } ++ ++ if (mount(source, source, NULL, MS_BIND, NULL) < 0) { ++ fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); ++ exit(1); ++ } ++ ++ setup_pivot_root(source); ++} ++ ++/* ++ * Lock down this process to prevent access to other processes or files outside ++ * source directory. This reduces the impact of arbitrary code execution bugs. ++ */ ++static void setup_sandbox(struct lo_data *lo) ++{ ++ setup_mount_namespace(lo->source); ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +@@ -2052,6 +2138,7 @@ int main(int argc, char *argv[]) + } + + lo.root.fd = open(lo.source, O_PATH); ++ + if (lo.root.fd == -1) { + fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source); + exit(1); +@@ -2075,6 +2162,8 @@ int main(int argc, char *argv[]) + /* Must be after daemonize to get the right /proc/self/fd */ + setup_proc_self_fd(&lo); + ++ setup_sandbox(&lo); ++ + /* Block until ctrl+c or fusermount -u */ + ret = virtio_loop(se); + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch b/SOURCES/kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch new file mode 100644 index 0000000..e54248c --- /dev/null +++ b/SOURCES/kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch @@ -0,0 +1,93 @@ +From 4cc435b3a8a9a419cc85ee883d5184f810f91e52 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:34 +0100 +Subject: [PATCH 063/116] virtiofsd: set maximum RLIMIT_NOFILE limit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-60-dgilbert@redhat.com> +Patchwork-id: 93516 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 059/112] virtiofsd: set maximum RLIMIT_NOFILE limit +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +virtiofsd can exceed the default open file descriptor limit easily on +most systems. Take advantage of the fact that it runs as root to raise +the limit. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 01a6dc95ec7f71eeff9963fe3cb03d85225fba3e) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 32 ++++++++++++++++++++++++++++++++ + 1 file changed, 32 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index d53cb1e..c281d81 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -53,6 +53,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -2268,6 +2269,35 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) + setup_seccomp(); + } + ++/* Raise the maximum number of open file descriptors */ ++static void setup_nofile_rlimit(void) ++{ ++ const rlim_t max_fds = 1000000; ++ struct rlimit rlim; ++ ++ if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) { ++ fuse_log(FUSE_LOG_ERR, "getrlimit(RLIMIT_NOFILE): %m\n"); ++ exit(1); ++ } ++ ++ if (rlim.rlim_cur >= max_fds) { ++ return; /* nothing to do */ ++ } ++ ++ rlim.rlim_cur = max_fds; ++ rlim.rlim_max = max_fds; ++ ++ if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) { ++ /* Ignore SELinux denials */ ++ if (errno == EPERM) { ++ return; ++ } ++ ++ fuse_log(FUSE_LOG_ERR, "setrlimit(RLIMIT_NOFILE): %m\n"); ++ exit(1); ++ } ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +@@ -2389,6 +2419,8 @@ int main(int argc, char *argv[]) + + fuse_daemonize(opts.foreground); + ++ setup_nofile_rlimit(); ++ + /* Must be before sandbox since it wants /proc */ + setup_capng(); + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch b/SOURCES/kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch new file mode 100644 index 0000000..be6b244 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch @@ -0,0 +1,72 @@ +From 06a24b54c94345b436d888a48b92fafa967c3d58 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:25 +0100 +Subject: [PATCH 114/116] virtiofsd: stop all queue threads on exit in + virtio_loop() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-111-dgilbert@redhat.com> +Patchwork-id: 93564 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 110/112] virtiofsd: stop all queue threads on exit in virtio_loop() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Eryu Guan + +On guest graceful shutdown, virtiofsd receives VHOST_USER_GET_VRING_BASE +request from VMM and shuts down virtqueues by calling fv_set_started(), +which joins fv_queue_thread() threads. So when virtio_loop() returns, +there should be no thread is still accessing data in fuse session and/or +virtio dev. + +But on abnormal exit, e.g. guest got killed for whatever reason, +vhost-user socket is closed and virtio_loop() breaks out the main loop +and returns to main(). But it's possible fv_queue_worker()s are still +working and accessing fuse session and virtio dev, which results in +crash or use-after-free. + +Fix it by stopping fv_queue_thread()s before virtio_loop() returns, +to make sure there's no-one could access fuse session and virtio dev. + +Reported-by: Qingming Su +Signed-off-by: Eryu Guan +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 9883df8ccae6d744a0c8d9cbf9d62b1797d70ebd) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 9f65823..80a6e92 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -815,6 +815,19 @@ int virtio_loop(struct fuse_session *se) + } + } + ++ /* ++ * Make sure all fv_queue_thread()s quit on exit, as we're about to ++ * free virtio dev and fuse session, no one should access them anymore. ++ */ ++ for (int i = 0; i < se->virtio_dev->nqueues; i++) { ++ if (!se->virtio_dev->qi[i]) { ++ continue; ++ } ++ ++ fuse_log(FUSE_LOG_INFO, "%s: Stopping queue %d thread\n", __func__, i); ++ fv_queue_cleanup_thread(se->virtio_dev, i); ++ } ++ + fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__); + + return 0; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch b/SOURCES/kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch new file mode 100644 index 0000000..f595ffa --- /dev/null +++ b/SOURCES/kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch @@ -0,0 +1,83 @@ +From 1744329bcba4a3e1a82cec3b1a34b3fbf0a9d7cf Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:54 +0100 +Subject: [PATCH 083/116] virtiofsd: support nanosecond resolution for file + timestamp +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-80-dgilbert@redhat.com> +Patchwork-id: 93535 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 079/112] virtiofsd: support nanosecond resolution for file timestamp +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Jiufei Xue + +Define HAVE_STRUCT_STAT_ST_ATIM to 1 if `st_atim' is member of `struct +stat' which means support nanosecond resolution for the file timestamp +fields. + +Signed-off-by: Jiufei Xue +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 8a792b034d4b315251fd842bb4c73a133aa1368f) +Signed-off-by: Miroslav Rezanina +--- + configure | 16 ++++++++++++++++ + tools/virtiofsd/fuse_misc.h | 1 + + 2 files changed, 17 insertions(+) + +diff --git a/configure b/configure +index 7831618..5120c14 100755 +--- a/configure ++++ b/configure +@@ -5218,6 +5218,19 @@ if compile_prog "" "" ; then + strchrnul=yes + fi + ++######################################### ++# check if we have st_atim ++ ++st_atim=no ++cat > $TMPC << EOF ++#include ++#include ++int main(void) { return offsetof(struct stat, st_atim); } ++EOF ++if compile_prog "" "" ; then ++ st_atim=yes ++fi ++ + ########################################## + # check if trace backend exists + +@@ -6919,6 +6932,9 @@ fi + if test "$strchrnul" = "yes" ; then + echo "HAVE_STRCHRNUL=y" >> $config_host_mak + fi ++if test "$st_atim" = "yes" ; then ++ echo "HAVE_STRUCT_STAT_ST_ATIM=y" >> $config_host_mak ++fi + if test "$byteswap_h" = "yes" ; then + echo "CONFIG_BYTESWAP_H=y" >> $config_host_mak + fi +diff --git a/tools/virtiofsd/fuse_misc.h b/tools/virtiofsd/fuse_misc.h +index f252baa..5c618ce 100644 +--- a/tools/virtiofsd/fuse_misc.h ++++ b/tools/virtiofsd/fuse_misc.h +@@ -7,6 +7,7 @@ + */ + + #include ++#include "config-host.h" + + /* + * Versioned symbols cannot be used in some cases because it +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch b/SOURCES/kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch new file mode 100644 index 0000000..1bae1bf --- /dev/null +++ b/SOURCES/kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch @@ -0,0 +1,82 @@ +From 7bc27a767bc8c78b1bca46bbe5e1d53dcd7173b4 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:18 +0100 +Subject: [PATCH 107/116] virtiofsd: use fuse_buf_writev to replace + fuse_buf_write for better performance +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-104-dgilbert@redhat.com> +Patchwork-id: 93558 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 103/112] virtiofsd: use fuse_buf_writev to replace fuse_buf_write for better performance +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: piaojun + +fuse_buf_writev() only handles the normal write in which src is buffer +and dest is fd. Specially if src buffer represents guest physical +address that can't be mapped by the daemon process, IO must be bounced +back to the VMM to do it by fuse_buf_copy(). + +Signed-off-by: Jun Piao +Suggested-by: Dr. David Alan Gilbert +Suggested-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit c465bba2c90a810f6e71e4f2646b1b4ee4b478de) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 20 ++++++++++++++++++-- + 1 file changed, 18 insertions(+), 2 deletions(-) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index 37befeb..27c1377 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -34,7 +34,6 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv) + return size; + } + +-__attribute__((unused)) + static ssize_t fuse_buf_writev(struct fuse_buf *out_buf, + struct fuse_bufvec *in_buf) + { +@@ -262,12 +261,29 @@ static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) + + ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv) + { +- size_t copied = 0; ++ size_t copied = 0, i; + + if (dstv == srcv) { + return fuse_buf_size(dstv); + } + ++ /* ++ * use writev to improve bandwidth when all the ++ * src buffers already mapped by the daemon ++ * process ++ */ ++ for (i = 0; i < srcv->count; i++) { ++ if (srcv->buf[i].flags & FUSE_BUF_IS_FD) { ++ break; ++ } ++ } ++ if ((i == srcv->count) && (dstv->count == 1) && ++ (dstv->idx == 0) && ++ (dstv->buf[0].flags & FUSE_BUF_IS_FD)) { ++ dstv->buf[0].pos += dstv->off; ++ return fuse_buf_writev(&dstv->buf[0], srcv); ++ } ++ + for (;;) { + const struct fuse_buf *src = fuse_bufvec_current(srcv); + const struct fuse_buf *dst = fuse_bufvec_current(dstv); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch b/SOURCES/kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch new file mode 100644 index 0000000..feffb5e --- /dev/null +++ b/SOURCES/kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch @@ -0,0 +1,56 @@ +From 1724f54070d33d8070ba2d22c8fac87ea65814c1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:04 +0100 +Subject: [PATCH 093/116] virtiofsd: use fuse_lowlevel_is_virtio() in + fuse_session_destroy() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-90-dgilbert@redhat.com> +Patchwork-id: 93540 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 089/112] virtiofsd: use fuse_lowlevel_is_virtio() in fuse_session_destroy() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +vu_socket_path is NULL when --fd=FDNUM was used. Use +fuse_lowlevel_is_virtio() instead. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 620e9d8d9cee6df7fe71168dea950dba0cc21a4a) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 70568d2..dab6a31 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2537,12 +2537,13 @@ void fuse_session_destroy(struct fuse_session *se) + close(se->fd); + } + +- if (se->vu_socket_path) { ++ if (fuse_lowlevel_is_virtio(se)) { + virtio_session_close(se); +- free(se->vu_socket_path); +- se->vu_socket_path = NULL; + } + ++ free(se->vu_socket_path); ++ se->vu_socket_path = NULL; ++ + free(se); + } + +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch b/SOURCES/kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch new file mode 100644 index 0000000..f250ed7 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch @@ -0,0 +1,390 @@ +From bce5070d1aada88154b811a08eec1586ab24fce5 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:26 +0100 +Subject: [PATCH 055/116] virtiofsd: use /proc/self/fd/ O_PATH file descriptor +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-52-dgilbert@redhat.com> +Patchwork-id: 93506 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 051/112] virtiofsd: use /proc/self/fd/ O_PATH file descriptor +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Sandboxing will remove /proc from the mount namespace so we can no +longer build string paths into "/proc/self/fd/...". + +Keep an O_PATH file descriptor so we can still re-open fds via +/proc/self/fd. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 9f59d175e2ca96f0b87f534dba69ea547dd35945) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 130 +++++++++++++++++++++++++++++++-------- + 1 file changed, 103 insertions(+), 27 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e3d65c3..e2e2211 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -110,6 +110,9 @@ struct lo_data { + struct lo_map ino_map; /* protected by lo->mutex */ + struct lo_map dirp_map; /* protected by lo->mutex */ + struct lo_map fd_map; /* protected by lo->mutex */ ++ ++ /* An O_PATH file descriptor to /proc/self/fd/ */ ++ int proc_self_fd; + }; + + static const struct fuse_opt lo_opts[] = { +@@ -379,9 +382,9 @@ static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, + int res; + + retry: +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "%i", inode->fd); + +- res = readlink(procname, path, PATH_MAX); ++ res = readlinkat(lo->proc_self_fd, procname, path, PATH_MAX); + if (res < 0) { + fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__); + goto fail_noretry; +@@ -477,9 +480,9 @@ static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode, + } + return res; + } +- sprintf(path, "/proc/self/fd/%i", inode->fd); ++ sprintf(path, "%i", inode->fd); + +- return utimensat(AT_FDCWD, path, tv, 0); ++ return utimensat(lo->proc_self_fd, path, tv, 0); + + fallback: + res = lo_parent_and_name(lo, inode, path, &parent); +@@ -535,8 +538,8 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + if (fi) { + res = fchmod(fd, attr->st_mode); + } else { +- sprintf(procname, "/proc/self/fd/%i", ifd); +- res = chmod(procname, attr->st_mode); ++ sprintf(procname, "%i", ifd); ++ res = fchmodat(lo->proc_self_fd, procname, attr->st_mode, 0); + } + if (res == -1) { + goto out_err; +@@ -552,11 +555,23 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + } + } + if (valid & FUSE_SET_ATTR_SIZE) { ++ int truncfd; ++ + if (fi) { +- res = ftruncate(fd, attr->st_size); ++ truncfd = fd; + } else { +- sprintf(procname, "/proc/self/fd/%i", ifd); +- res = truncate(procname, attr->st_size); ++ sprintf(procname, "%i", ifd); ++ truncfd = openat(lo->proc_self_fd, procname, O_RDWR); ++ if (truncfd < 0) { ++ goto out_err; ++ } ++ } ++ ++ res = ftruncate(truncfd, attr->st_size); ++ if (!fi) { ++ saverr = errno; ++ close(truncfd); ++ errno = saverr; + } + if (res == -1) { + goto out_err; +@@ -874,9 +889,9 @@ static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode, + return res; + } + +- sprintf(path, "/proc/self/fd/%i", inode->fd); ++ sprintf(path, "%i", inode->fd); + +- return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW); ++ return linkat(lo->proc_self_fd, path, dfd, name, AT_SYMLINK_FOLLOW); + + fallback: + res = lo_parent_and_name(lo, inode, path, &parent); +@@ -1404,8 +1419,8 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + fi->flags &= ~O_APPEND; + } + +- sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); +- fd = open(buf, fi->flags & ~O_NOFOLLOW); ++ sprintf(buf, "%i", lo_fd(req, ino)); ++ fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); + if (fd == -1) { + return (void)fuse_reply_err(req, errno); + } +@@ -1458,7 +1473,6 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, + struct fuse_file_info *fi) + { + int res; +- (void)ino; + int fd; + char *buf; + +@@ -1466,12 +1480,14 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, + (void *)fi); + + if (!fi) { +- res = asprintf(&buf, "/proc/self/fd/%i", lo_fd(req, ino)); ++ struct lo_data *lo = lo_data(req); ++ ++ res = asprintf(&buf, "%i", lo_fd(req, ino)); + if (res == -1) { + return (void)fuse_reply_err(req, errno); + } + +- fd = open(buf, O_RDWR); ++ fd = openat(lo->proc_self_fd, buf, O_RDWR); + free(buf); + if (fd == -1) { + return (void)fuse_reply_err(req, errno); +@@ -1587,11 +1603,13 @@ static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, + static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + size_t size) + { ++ struct lo_data *lo = lo_data(req); + char *value = NULL; + char procname[64]; + struct lo_inode *inode; + ssize_t ret; + int saverr; ++ int fd = -1; + + inode = lo_inode(req, ino); + if (!inode) { +@@ -1616,7 +1634,11 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + goto out; + } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "%i", inode->fd); ++ fd = openat(lo->proc_self_fd, procname, O_RDONLY); ++ if (fd < 0) { ++ goto out_err; ++ } + + if (size) { + value = malloc(size); +@@ -1624,7 +1646,7 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + goto out_err; + } + +- ret = getxattr(procname, name, value, size); ++ ret = fgetxattr(fd, name, value, size); + if (ret == -1) { + goto out_err; + } +@@ -1635,7 +1657,7 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + + fuse_reply_buf(req, value, ret); + } else { +- ret = getxattr(procname, name, NULL, 0); ++ ret = fgetxattr(fd, name, NULL, 0); + if (ret == -1) { + goto out_err; + } +@@ -1644,6 +1666,10 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + } + out_free: + free(value); ++ ++ if (fd >= 0) { ++ close(fd); ++ } + return; + + out_err: +@@ -1655,11 +1681,13 @@ out: + + static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + { ++ struct lo_data *lo = lo_data(req); + char *value = NULL; + char procname[64]; + struct lo_inode *inode; + ssize_t ret; + int saverr; ++ int fd = -1; + + inode = lo_inode(req, ino); + if (!inode) { +@@ -1683,7 +1711,11 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + goto out; + } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "%i", inode->fd); ++ fd = openat(lo->proc_self_fd, procname, O_RDONLY); ++ if (fd < 0) { ++ goto out_err; ++ } + + if (size) { + value = malloc(size); +@@ -1691,7 +1723,7 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + goto out_err; + } + +- ret = listxattr(procname, value, size); ++ ret = flistxattr(fd, value, size); + if (ret == -1) { + goto out_err; + } +@@ -1702,7 +1734,7 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + + fuse_reply_buf(req, value, ret); + } else { +- ret = listxattr(procname, NULL, 0); ++ ret = flistxattr(fd, NULL, 0); + if (ret == -1) { + goto out_err; + } +@@ -1711,6 +1743,10 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + } + out_free: + free(value); ++ ++ if (fd >= 0) { ++ close(fd); ++ } + return; + + out_err: +@@ -1724,9 +1760,11 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + const char *value, size_t size, int flags) + { + char procname[64]; ++ struct lo_data *lo = lo_data(req); + struct lo_inode *inode; + ssize_t ret; + int saverr; ++ int fd = -1; + + inode = lo_inode(req, ino); + if (!inode) { +@@ -1751,21 +1789,31 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + goto out; + } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "%i", inode->fd); ++ fd = openat(lo->proc_self_fd, procname, O_RDWR); ++ if (fd < 0) { ++ saverr = errno; ++ goto out; ++ } + +- ret = setxattr(procname, name, value, size, flags); ++ ret = fsetxattr(fd, name, value, size, flags); + saverr = ret == -1 ? errno : 0; + + out: ++ if (fd >= 0) { ++ close(fd); ++ } + fuse_reply_err(req, saverr); + } + + static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) + { + char procname[64]; ++ struct lo_data *lo = lo_data(req); + struct lo_inode *inode; + ssize_t ret; + int saverr; ++ int fd = -1; + + inode = lo_inode(req, ino); + if (!inode) { +@@ -1789,12 +1837,20 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) + goto out; + } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "%i", inode->fd); ++ fd = openat(lo->proc_self_fd, procname, O_RDWR); ++ if (fd < 0) { ++ saverr = errno; ++ goto out; ++ } + +- ret = removexattr(procname, name); ++ ret = fremovexattr(fd, name); + saverr = ret == -1 ? errno : 0; + + out: ++ if (fd >= 0) { ++ close(fd); ++ } + fuse_reply_err(req, saverr); + } + +@@ -1887,12 +1943,25 @@ static void print_capabilities(void) + printf("}\n"); + } + ++static void setup_proc_self_fd(struct lo_data *lo) ++{ ++ lo->proc_self_fd = open("/proc/self/fd", O_PATH); ++ if (lo->proc_self_fd == -1) { ++ fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n"); ++ exit(1); ++ } ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); + struct fuse_session *se; + struct fuse_cmdline_opts opts; +- struct lo_data lo = { .debug = 0, .writeback = 0 }; ++ struct lo_data lo = { ++ .debug = 0, ++ .writeback = 0, ++ .proc_self_fd = -1, ++ }; + struct lo_map_elem *root_elem; + int ret = -1; + +@@ -2003,6 +2072,9 @@ int main(int argc, char *argv[]) + + fuse_daemonize(opts.foreground); + ++ /* Must be after daemonize to get the right /proc/self/fd */ ++ setup_proc_self_fd(&lo); ++ + /* Block until ctrl+c or fusermount -u */ + ret = virtio_loop(se); + +@@ -2018,6 +2090,10 @@ err_out1: + lo_map_destroy(&lo.dirp_map); + lo_map_destroy(&lo.ino_map); + ++ if (lo.proc_self_fd >= 0) { ++ close(lo.proc_self_fd); ++ } ++ + if (lo.root.fd >= 0) { + close(lo.root.fd); + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch b/SOURCES/kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch new file mode 100644 index 0000000..d60a902 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch @@ -0,0 +1,137 @@ +From 6877a6c456178d6c1ca9a0ffaabaa7e51105b2ac Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:22 +0100 +Subject: [PATCH 051/116] virtiofsd: validate input buffer sizes in + do_write_buf() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-48-dgilbert@redhat.com> +Patchwork-id: 93501 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 047/112] virtiofsd: validate input buffer sizes in do_write_buf() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +There is a small change in behavior: if fuse_write_in->size doesn't +match the input buffer size then the request is failed. Previously +write requests with 1 fuse_buf element would truncate to +fuse_write_in->size. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 0ba8c3c6fce8fe949d59c1fd84d98d220ef9e759) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 49 +++++++++++++++++++++++++---------------- + 1 file changed, 30 insertions(+), 19 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 7e10995..611e8b0 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1003,8 +1003,8 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, +- struct fuse_bufvec *ibufv) ++static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter, struct fuse_bufvec *ibufv) + { + struct fuse_session *se = req->se; + struct fuse_bufvec *pbufv = ibufv; +@@ -1012,28 +1012,27 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, + .buf[0] = ibufv->buf[0], + .count = 1, + }; +- struct fuse_write_in *arg = (struct fuse_write_in *)inarg; ++ struct fuse_write_in *arg; ++ size_t arg_size = sizeof(*arg); + struct fuse_file_info fi; + + memset(&fi, 0, sizeof(fi)); ++ ++ arg = fuse_mbuf_iter_advance(iter, arg_size); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; + fi.fh = arg->fh; + fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; + + if (ibufv->count == 1) { +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- if (!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)) { +- tmpbufv.buf[0].mem = PARAM(arg); +- } +- tmpbufv.buf[0].size -= +- sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); +- if (tmpbufv.buf[0].size < arg->size) { +- fuse_log(FUSE_LOG_ERR, +- "fuse: do_write_buf: buffer size too small\n"); +- fuse_reply_err(req, EIO); +- return; +- } +- tmpbufv.buf[0].size = arg->size; ++ assert(!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)); ++ tmpbufv.buf[0].mem = ((char *)arg) + arg_size; ++ tmpbufv.buf[0].size -= sizeof(struct fuse_in_header) + arg_size; + pbufv = &tmpbufv; + } else { + /* +@@ -1043,6 +1042,13 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, + ibufv->buf[0].size = 0; + } + ++ if (fuse_buf_size(pbufv) != arg->size) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: do_write_buf: buffer size doesn't match arg->size\n"); ++ fuse_reply_err(req, EIO); ++ return; ++ } ++ + se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi); + } + +@@ -2052,12 +2058,17 @@ void fuse_session_process_buf_int(struct fuse_session *se, + struct fuse_chan *ch) + { + const struct fuse_buf *buf = bufv->buf; ++ struct fuse_mbuf_iter iter = FUSE_MBUF_ITER_INIT(buf); + struct fuse_in_header *in; + const void *inarg; + struct fuse_req *req; + int err; + +- in = buf->mem; ++ /* The first buffer must be a memory buffer */ ++ assert(!(buf->flags & FUSE_BUF_IS_FD)); ++ ++ in = fuse_mbuf_iter_advance(&iter, sizeof(*in)); ++ assert(in); /* caller guarantees the input buffer is large enough */ + + if (se->debug) { + fuse_log(FUSE_LOG_DEBUG, +@@ -2129,7 +2140,7 @@ void fuse_session_process_buf_int(struct fuse_session *se, + + inarg = (void *)&in[1]; + if (in->opcode == FUSE_WRITE && se->op.write_buf) { +- do_write_buf(req, in->nodeid, inarg, bufv); ++ do_write_buf(req, in->nodeid, &iter, bufv); + } else { + fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); + } +-- +1.8.3.1 + diff --git a/SOURCES/kvm-virtiofsd-validate-path-components.patch b/SOURCES/kvm-virtiofsd-validate-path-components.patch new file mode 100644 index 0000000..b35aed7 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-validate-path-components.patch @@ -0,0 +1,164 @@ +From 69ac47502848c37ca3ede00f432c0675d9eef42c Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:18 +0100 +Subject: [PATCH 047/116] virtiofsd: validate path components +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-44-dgilbert@redhat.com> +Patchwork-id: 93498 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 043/112] virtiofsd: validate path components +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Several FUSE requests contain single path components. A correct FUSE +client sends well-formed path components but there is currently no input +validation in case something went wrong or the client is malicious. + +Refuse ".", "..", and paths containing '/' when we expect a path +component. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 25dae28c58d7e706b5d5db99042c9db3cef2e657) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 59 ++++++++++++++++++++++++++++++++++++---- + 1 file changed, 53 insertions(+), 6 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index ac380ef..e375406 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -133,6 +133,21 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); + + static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st); + ++static int is_dot_or_dotdot(const char *name) ++{ ++ return name[0] == '.' && ++ (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); ++} ++ ++/* Is `path` a single path component that is not "." or ".."? */ ++static int is_safe_path_component(const char *path) ++{ ++ if (strchr(path, '/')) { ++ return 0; ++ } ++ ++ return !is_dot_or_dotdot(path); ++} + + static struct lo_data *lo_data(fuse_req_t req) + { +@@ -681,6 +696,15 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) + parent, name); + } + ++ /* ++ * Don't use is_safe_path_component(), allow "." and ".." for NFS export ++ * support. ++ */ ++ if (strchr(name, '/')) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + err = lo_do_lookup(req, parent, name, &e); + if (err) { + fuse_reply_err(req, err); +@@ -762,6 +786,11 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + struct fuse_entry_param e; + struct lo_cred old = {}; + ++ if (!is_safe_path_component(name)) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + dir = lo_inode(req, parent); + if (!dir) { + fuse_reply_err(req, EBADF); +@@ -863,6 +892,11 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + struct fuse_entry_param e; + int saverr; + ++ if (!is_safe_path_component(name)) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + inode = lo_inode(req, ino); + if (!inode) { + fuse_reply_err(req, EBADF); +@@ -904,6 +938,10 @@ out_err: + static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) + { + int res; ++ if (!is_safe_path_component(name)) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); + +@@ -916,6 +954,11 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + { + int res; + ++ if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + if (flags) { + fuse_reply_err(req, EINVAL); + return; +@@ -930,6 +973,11 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) + { + int res; + ++ if (!is_safe_path_component(name)) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + res = unlinkat(lo_fd(req, parent), name, 0); + + fuse_reply_err(req, res == -1 ? errno : 0); +@@ -1093,12 +1141,6 @@ out_err: + fuse_reply_err(req, error); + } + +-static int is_dot_or_dotdot(const char *name) +-{ +- return name[0] == '.' && +- (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); +-} +- + static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + off_t offset, struct fuse_file_info *fi, int plus) + { +@@ -1248,6 +1290,11 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + parent, name); + } + ++ if (!is_safe_path_component(name)) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + err = lo_change_cred(req, &old); + if (err) { + goto out; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch b/SOURCES/kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch new file mode 100644 index 0000000..20add81 --- /dev/null +++ b/SOURCES/kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch @@ -0,0 +1,56 @@ +From 247987aa987b7332eb501e00c440079b9e8e1fe7 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:52 +0100 +Subject: [PATCH 021/116] vitriofsd/passthrough_ll: fix fallocate() ifdefs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-18-dgilbert@redhat.com> +Patchwork-id: 93471 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 017/112] vitriofsd/passthrough_ll: fix fallocate() ifdefs +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Xiao Yang + +1) Use correct CONFIG_FALLOCATE macro to check if fallocate() is supported.(i.e configure + script sets CONFIG_FALLOCATE intead of HAVE_FALLOCATE if fallocate() is supported) +2) Replace HAVE_POSIX_FALLOCATE with CONFIG_POSIX_FALLOCATE. + +Signed-off-by: Xiao Yang +Signed-off-by: Dr. David Alan Gilbert + Merged from two of Xiao Yang's patches +(cherry picked from commit 9776457ca6f05d5900e27decb1dba2ffddf95a22) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 322a889..6c4da18 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -975,13 +975,13 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, + int err = EOPNOTSUPP; + (void)ino; + +-#ifdef HAVE_FALLOCATE ++#ifdef CONFIG_FALLOCATE + err = fallocate(fi->fh, mode, offset, length); + if (err < 0) { + err = errno; + } + +-#elif defined(HAVE_POSIX_FALLOCATE) ++#elif defined(CONFIG_POSIX_FALLOCATE) + if (mode) { + fuse_reply_err(req, EOPNOTSUPP); + return; +-- +1.8.3.1 + diff --git a/SOURCES/kvm-x86.conf b/SOURCES/kvm-x86.conf new file mode 100644 index 0000000..3f7842a --- /dev/null +++ b/SOURCES/kvm-x86.conf @@ -0,0 +1,12 @@ +# Setting modprobe kvm_intel/kvm_amd nested = 1 +# only enables Nested Virtualization until the next reboot or +# module reload. Uncomment the option applicable +# to your system below to enable the feature permanently. +# +# User changes in this file are preserved across upgrades. +# +# For Intel +#options kvm_intel nested=1 +# +# For AMD +#options kvm_amd nested=1 diff --git a/SOURCES/kvm-xhci-recheck-slot-status.patch b/SOURCES/kvm-xhci-recheck-slot-status.patch new file mode 100644 index 0000000..8bcbc2c --- /dev/null +++ b/SOURCES/kvm-xhci-recheck-slot-status.patch @@ -0,0 +1,77 @@ +From ab87c0ed2a8f0a626099261a3028bc34cfac3929 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 14 Jan 2020 20:23:31 +0000 +Subject: [PATCH 5/5] xhci: recheck slot status +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200114202331.51831-3-dgilbert@redhat.com> +Patchwork-id: 93345 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] xhci: recheck slot status +Bugzilla: 1790844 +RH-Acked-by: Peter Xu +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Gerd Hoffmann + +From: Gerd Hoffmann + +Factor out slot status check into a helper function. Add an additional +check after completing transfers. This is needed in case a guest +queues multiple transfers in a row and a device unplug happens while +qemu processes them. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1786413 +Signed-off-by: Gerd Hoffmann +Reviewed-by: Philippe Mathieu-Daudé +Message-id: 20200107083606.12393-1-kraxel@redhat.com +(cherry picked from commit 236846a019c4f7aa3111026fc9a1fe09684c8978) +Signed-off-by: Danilo C. L. de Paula +--- + hw/usb/hcd-xhci.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c +index d2b9744..646c78c 100644 +--- a/hw/usb/hcd-xhci.c ++++ b/hw/usb/hcd-xhci.c +@@ -1861,6 +1861,13 @@ static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid, + xhci_kick_epctx(epctx, streamid); + } + ++static bool xhci_slot_ok(XHCIState *xhci, int slotid) ++{ ++ return (xhci->slots[slotid - 1].uport && ++ xhci->slots[slotid - 1].uport->dev && ++ xhci->slots[slotid - 1].uport->dev->attached); ++} ++ + static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) + { + XHCIState *xhci = epctx->xhci; +@@ -1878,9 +1885,7 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) + + /* If the device has been detached, but the guest has not noticed this + yet the 2 above checks will succeed, but we must NOT continue */ +- if (!xhci->slots[epctx->slotid - 1].uport || +- !xhci->slots[epctx->slotid - 1].uport->dev || +- !xhci->slots[epctx->slotid - 1].uport->dev->attached) { ++ if (!xhci_slot_ok(xhci, epctx->slotid)) { + return; + } + +@@ -1987,6 +1992,10 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) + } else { + xhci_fire_transfer(xhci, xfer, epctx); + } ++ if (!xhci_slot_ok(xhci, epctx->slotid)) { ++ /* surprise removal -> stop processing */ ++ break; ++ } + if (xfer->complete) { + /* update ring dequeue ptr */ + xhci_set_ep_state(xhci, epctx, stctx, epctx->state); +-- +1.8.3.1 + diff --git a/SOURCES/kvm-xics-Don-t-deassert-outputs.patch b/SOURCES/kvm-xics-Don-t-deassert-outputs.patch new file mode 100644 index 0000000..08ed724 --- /dev/null +++ b/SOURCES/kvm-xics-Don-t-deassert-outputs.patch @@ -0,0 +1,52 @@ +From 99b6ee4b7f63ea49e5b73f61bbf68f67252f27da Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Tue, 21 Jan 2020 05:16:12 +0000 +Subject: [PATCH 02/15] xics: Don't deassert outputs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: David Gibson +Message-id: <20200121051613.388295-3-dgibson@redhat.com> +Patchwork-id: 93430 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 2/3] xics: Don't deassert outputs +Bugzilla: 1776638 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth + +From: Greg Kurz + +The correct way to do this is to deassert the input pins on the CPU side. +This is the case since a previous change. + +Signed-off-by: Greg Kurz +Message-Id: <157548862298.3650476.1228720391270249433.stgit@bahia.lan> +Signed-off-by: David Gibson +(cherry picked from commit 4febcdd88f08422a66a1aa0dc55e1472abed3c4b) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1776638 + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/intc/xics.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/hw/intc/xics.c b/hw/intc/xics.c +index e7ac9ba..72c5dca 100644 +--- a/hw/intc/xics.c ++++ b/hw/intc/xics.c +@@ -289,9 +289,6 @@ void icp_reset(ICPState *icp) + icp->pending_priority = 0xff; + icp->mfrr = 0xff; + +- /* Make all outputs are deasserted */ +- qemu_set_irq(icp->output, 0); +- + if (kvm_irqchip_in_kernel()) { + Error *local_err = NULL; + +-- +1.8.3.1 + diff --git a/SOURCES/kvm.conf b/SOURCES/kvm.conf new file mode 100644 index 0000000..24e60e9 --- /dev/null +++ b/SOURCES/kvm.conf @@ -0,0 +1,3 @@ +# +# User changes in this file are preserved across upgrades. +# diff --git a/SOURCES/qemu-ga.sysconfig b/SOURCES/qemu-ga.sysconfig new file mode 100644 index 0000000..67bad0c --- /dev/null +++ b/SOURCES/qemu-ga.sysconfig @@ -0,0 +1,19 @@ +# This is a systemd environment file, not a shell script. +# It provides settings for "/lib/systemd/system/qemu-guest-agent.service". + +# Comma-separated blacklist of RPCs to disable, or empty list to enable all. +# +# You can get the list of RPC commands using "qemu-ga --blacklist='?'". +# There should be no spaces between commas and commands in the blacklist. +BLACKLIST_RPC=guest-file-open,guest-file-close,guest-file-read,guest-file-write,guest-file-seek,guest-file-flush,guest-exec,guest-exec-status + +# Fsfreeze hook script specification. +# +# FSFREEZE_HOOK_PATHNAME=/dev/null : disables the feature. +# +# FSFREEZE_HOOK_PATHNAME=/path/to/executable : enables the feature with the +# specified binary or shell script. +# +# FSFREEZE_HOOK_PATHNAME= : enables the feature with the +# default value (invoke "qemu-ga --help" to interrogate). +FSFREEZE_HOOK_PATHNAME=/etc/qemu-ga/fsfreeze-hook diff --git a/SOURCES/qemu-guest-agent.service b/SOURCES/qemu-guest-agent.service new file mode 100644 index 0000000..b33e951 --- /dev/null +++ b/SOURCES/qemu-guest-agent.service @@ -0,0 +1,20 @@ +[Unit] +Description=QEMU Guest Agent +BindsTo=dev-virtio\x2dports-org.qemu.guest_agent.0.device +After=dev-virtio\x2dports-org.qemu.guest_agent.0.device +IgnoreOnIsolate=True + +[Service] +UMask=0077 +EnvironmentFile=/etc/sysconfig/qemu-ga +ExecStart=/usr/bin/qemu-ga \ + --method=virtio-serial \ + --path=/dev/virtio-ports/org.qemu.guest_agent.0 \ + --blacklist=${BLACKLIST_RPC} \ + -F${FSFREEZE_HOOK_PATHNAME} +StandardError=syslog +Restart=always +RestartSec=0 + +[Install] +WantedBy=dev-virtio\x2dports-org.qemu.guest_agent.0.device diff --git a/SOURCES/qemu-pr-helper.service b/SOURCES/qemu-pr-helper.service new file mode 100644 index 0000000..a1d27b0 --- /dev/null +++ b/SOURCES/qemu-pr-helper.service @@ -0,0 +1,15 @@ +[Unit] +Description=Persistent Reservation Daemon for QEMU + +[Service] +WorkingDirectory=/tmp +Type=simple +ExecStart=/usr/bin/qemu-pr-helper +PrivateTmp=yes +ProtectSystem=strict +ReadWritePaths=/var/run +RestrictAddressFamilies=AF_UNIX +Restart=always +RestartSec=0 + +[Install] diff --git a/SOURCES/qemu-pr-helper.socket b/SOURCES/qemu-pr-helper.socket new file mode 100644 index 0000000..9d7c3e5 --- /dev/null +++ b/SOURCES/qemu-pr-helper.socket @@ -0,0 +1,9 @@ +[Unit] +Description=Persistent Reservation Daemon for QEMU + +[Socket] +ListenStream=/run/qemu-pr-helper.sock +SocketMode=0600 + +[Install] +WantedBy=multi-user.target diff --git a/SOURCES/udev-kvm-check.c b/SOURCES/udev-kvm-check.c new file mode 100644 index 0000000..cb0ecba --- /dev/null +++ b/SOURCES/udev-kvm-check.c @@ -0,0 +1,172 @@ +/* + * udev-kvm-check.c + * + * Copyright 2018 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#include +#include +#include +#include +#include + +#define DEFAULT 0 +#define FACILITY "kvm" +#define SYSCONFIG_KVM "/etc/sysconfig/kvm" + +#define COUNT_MSG \ + "%d %s now active" + +#define SUBSCRIPTION_MSG \ + "%d %s now active; your Red Hat Enterprise Linux subscription" \ + " limit is %d guests. Please review your Red Hat Enterprise Linux" \ + " subscription agreement or contact your Red Hat" \ + " support representative for more information. You" \ + " may review the Red Hat Enterprise subscription" \ + " limits at http://www.redhat.com/rhel-virt-limits" + +int get_threshold_from_file(FILE *fp) +{ + static const char key[] = "THRESHOLD="; + int pos = 0; + int thres; + int ch; + +start: + /* State START - at beginning of line, search for beginning of "THRESHOLD=" + * string. + */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (isspace(ch)) { + goto start; + } + if (ch == 'T') { + pos = 1; + goto key; + } + goto eol; + +eol: + /* State EOL - loop until end of line */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (ch == '\n') { + goto start; + } + goto eol; + +key: + /* State KEY - match "THRESHOLD=" string, go to THRESHOLD if found */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (ch == key[pos]) { + pos++; + if (key[pos] == 0) { + goto threshold; + } else { + goto key; + } + } + goto eol; + +threshold: + /* State THRESHOLD - parse number using fscanf, expect comment or space + * or EOL. + */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (!isdigit(ch)) { + goto eol; + } + ungetc(ch, fp); + if (fscanf(fp, "%d", &thres) != 1) { + return DEFAULT; + } + ch = getc(fp); + if (ch == '#' || ch == EOF || ch == '\n' || isspace(ch)) { + return thres; + } + goto eol; +} + +int get_threshold() +{ + FILE *fp = fopen(SYSCONFIG_KVM, "r"); + int val; + + if (!fp) { + return DEFAULT; + } + + val = get_threshold_from_file(fp); + fclose (fp); + return val; +} + +const char *guest(int count) +{ + return (count == 1 ? "guest" : "guests"); +} + +void emit_count_message(int count) +{ + openlog(FACILITY, LOG_CONS, LOG_USER); + syslog(LOG_INFO, COUNT_MSG, count, guest(count)); + closelog(); +} + +void emit_subscription_message(int count, int threshold) +{ + openlog(FACILITY, LOG_CONS, LOG_USER); + syslog(LOG_WARNING, SUBSCRIPTION_MSG, count, guest(count), threshold); + closelog(); +} + +int main(int argc, char **argv) +{ + int count, threshold; + + if (argc < 3) + exit(1); + + count = atoi(argv[1]); + threshold = get_threshold(); + + if (!strcmp(argv[2], "create")) { + if (threshold == 0) { + emit_count_message(count); + } else if (count > threshold) { + emit_subscription_message(count, threshold); + } + } else { + if (count >= threshold) { + emit_count_message(count); + } + } + + return 0; +} diff --git a/SOURCES/vhost.conf b/SOURCES/vhost.conf new file mode 100644 index 0000000..68d6d7f --- /dev/null +++ b/SOURCES/vhost.conf @@ -0,0 +1,3 @@ +# Increase default vhost memory map limit to match +# KVM's memory slot limit +options vhost max_mem_regions=509 diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec new file mode 100644 index 0000000..ba806ba --- /dev/null +++ b/SPECS/qemu-kvm.spec @@ -0,0 +1,3179 @@ +%global SLOF_gittagdate 20191022 +%global SLOF_gittagcommit 899d9883 + +%global have_usbredir 1 +%global have_spice 1 +%global have_opengl 1 +%global have_fdt 0 +%global have_gluster 1 +%global have_kvm_setup 0 +%global have_memlock_limits 0 + +%ifnarch %{ix86} x86_64 + %global have_usbredir 0 +%endif + +%ifnarch s390x + %global have_librdma 1 +%else + %global have_librdma 0 +%endif + +%ifarch %{ix86} + %global kvm_target i386 +%endif +%ifarch x86_64 + %global kvm_target x86_64 +%else + %global have_spice 0 + %global have_opengl 0 + %global have_gluster 0 +%endif +%ifarch %{power64} + %global kvm_target ppc64 + %global have_fdt 1 + %global have_kvm_setup 1 + %global have_memlock_limits 1 +%endif +%ifarch s390x + %global kvm_target s390x + %global have_kvm_setup 1 +%endif +%ifarch ppc + %global kvm_target ppc + %global have_fdt 1 +%endif +%ifarch aarch64 + %global kvm_target aarch64 + %global have_fdt 1 +%endif + +#Versions of various parts: + +%global requires_all_modules \ +Requires: %{name}-block-curl = %{epoch}:%{version}-%{release} \ +%if %{have_gluster} \ +Requires: %{name}-block-gluster = %{epoch}:%{version}-%{release} \ +%endif \ +Requires: %{name}-block-iscsi = %{epoch}:%{version}-%{release} \ +Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ +Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} + +# Macro to properly setup RHEL/RHEV conflict handling +%define rhev_ma_conflicts() \ +Obsoletes: %1-ma \ +Obsoletes: %1-rhev + +Summary: QEMU is a machine emulator and virtualizer +Name: qemu-kvm +Version: 4.2.0 +Release: 19%{?dist} +# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped +Epoch: 15 +License: GPLv2 and GPLv2+ and CC-BY +Group: Development/Tools +URL: http://www.qemu.org/ +ExclusiveArch: x86_64 %{power64} aarch64 s390x + + +Source0: http://wiki.qemu.org/download/qemu-4.2.0.tar.xz + +# KSM control scripts +Source4: ksm.service +Source5: ksm.sysconfig +Source6: ksmctl.c +Source7: ksmtuned.service +Source8: ksmtuned +Source9: ksmtuned.conf +Source10: qemu-guest-agent.service +Source11: 99-qemu-guest-agent.rules +Source12: bridge.conf +Source13: qemu-ga.sysconfig +Source21: kvm-setup +Source22: kvm-setup.service +Source23: 85-kvm.preset +Source26: vhost.conf +Source27: kvm.conf +Source28: 95-kvm-memlock.conf +Source30: kvm-s390x.conf +Source31: kvm-x86.conf +Source32: qemu-pr-helper.service +Source33: qemu-pr-helper.socket +Source34: 81-kvm-rhel.rules +Source35: udev-kvm-check.c +Source36: README.tests + + +Patch0005: 0005-Initial-redhat-build.patch +Patch0006: 0006-Enable-disable-devices-for-RHEL.patch +Patch0007: 0007-Machine-type-related-general-changes.patch +Patch0008: 0008-Add-aarch64-machine-types.patch +Patch0009: 0009-Add-ppc64-machine-types.patch +Patch0010: 0010-Add-s390x-machine-types.patch +Patch0011: 0011-Add-x86_64-machine-types.patch +Patch0012: 0012-Enable-make-check.patch +Patch0013: 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +Patch0014: 0014-Add-support-statement-to-help-output.patch +Patch0015: 0015-globally-limit-the-maximum-number-of-CPUs.patch +Patch0016: 0016-Add-support-for-simpletrace.patch +Patch0017: 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +Patch0018: 0018-usb-xhci-Fix-PCI-capability-order.patch +Patch0019: 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +Patch0020: 0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +Patch0021: 0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch +# For bz#1741345 - Remove the "cpu64-rhel6" CPU from qemu-kvm +Patch22: kvm-i386-Remove-cpu64-rhel6-CPU-model.patch +# For bz#1772774 - qemu-kvm core dump during migration+reboot ( Assertion `mem->dirty_bmap' failed ) +Patch23: kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch +# For bz#1733893 - Boot a guest with "-prom-env 'auto-boot?=false'", SLOF failed to enter the boot entry after input "boot" followed by "0 > " on VNC +Patch24: kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch +# For bz#1782678 - qemu core dump after hot-unplugging the XXV710/XL710 PF +Patch25: kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch +# For bz#1789301 - virtio-blk/scsi: fix notification suppression during AioContext polling +Patch26: kvm-virtio-don-t-enable-notifications-during-polling.patch +# For bz#1790844 - USB related fixes +Patch27: kvm-usbredir-Prevent-recursion-in-usbredir_write.patch +# For bz#1790844 - USB related fixes +Patch28: kvm-xhci-recheck-slot-status.patch +# For bz#1791568 - CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0] +Patch29: kvm-tcp_emu-Fix-oob-access.patch +# For bz#1791568 - CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0] +Patch30: kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch +# For bz#1791568 - CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0] +Patch31: kvm-slirp-use-correct-size-while-emulating-commands.patch +# For bz#1559846 - Nested KVM: limit VMX features according to CPU models - Fast Train +Patch32: kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch +# For bz#1725084 - aarch64: support dumping SVE registers +Patch33: kvm-target-arm-arch_dump-Add-SVE-notes.patch +# For bz#1779041 - netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic +Patch34: kvm-vhost-Add-names-to-section-rounded-warning.patch +# For bz#1779041 - netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic +Patch35: kvm-vhost-Only-align-sections-for-vhost-user.patch +# For bz#1779041 - netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic +Patch36: kvm-vhost-coding-style-fix.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch37: kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch38: kvm-vhost-user-fs-remove-vhostfd-property.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch39: kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch40: kvm-virtiofsd-Pull-in-upstream-headers.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch41: kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch42: kvm-virtiofsd-Add-auxiliary-.c-s.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch43: kvm-virtiofsd-Add-fuse_lowlevel.c.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch44: kvm-virtiofsd-Add-passthrough_ll.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch45: kvm-virtiofsd-Trim-down-imported-files.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch46: kvm-virtiofsd-Format-imported-files-to-qemu-style.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch47: kvm-virtiofsd-remove-mountpoint-dummy-argument.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch48: kvm-virtiofsd-remove-unused-notify-reply-support.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch49: kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch50: kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch51: kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch52: kvm-virtiofsd-Trim-out-compatibility-code.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch53: kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch54: kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch55: kvm-virtiofsd-Add-options-for-virtio.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch56: kvm-virtiofsd-add-o-source-PATH-to-help-output.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch57: kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch58: kvm-virtiofsd-Start-wiring-up-vhost-user.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch59: kvm-virtiofsd-Add-main-virtio-loop.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch60: kvm-virtiofsd-get-set-features-callbacks.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch61: kvm-virtiofsd-Start-queue-threads.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch62: kvm-virtiofsd-Poll-kick_fd-for-queue.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch63: kvm-virtiofsd-Start-reading-commands-from-queue.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch64: kvm-virtiofsd-Send-replies-to-messages.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch65: kvm-virtiofsd-Keep-track-of-replies.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch66: kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch67: kvm-virtiofsd-Fast-path-for-virtio-read.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch68: kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch69: kvm-virtiofsd-make-f-foreground-the-default.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch70: kvm-virtiofsd-add-vhost-user.json-file.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch71: kvm-virtiofsd-add-print-capabilities-option.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch72: kvm-virtiofs-Add-maintainers-entry.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch73: kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch74: kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch75: kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch76: kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch77: kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch78: kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch79: kvm-virtiofsd-validate-path-components.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch80: kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch81: kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch82: kvm-virtiofsd-add-fuse_mbuf_iter-API.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch83: kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch84: kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch85: kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch86: kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch87: kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch88: kvm-virtiofsd-sandbox-mount-namespace.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch89: kvm-virtiofsd-move-to-an-empty-network-namespace.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch90: kvm-virtiofsd-move-to-a-new-pid-namespace.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch91: kvm-virtiofsd-add-seccomp-whitelist.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch92: kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch93: kvm-virtiofsd-cap-ng-helpers.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch94: kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch95: kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch96: kvm-virtiofsd-fix-libfuse-information-leaks.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch97: kvm-virtiofsd-add-syslog-command-line-option.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch98: kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch99: kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch100: kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch101: kvm-virtiofsd-Handle-reinit.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch102: kvm-virtiofsd-Handle-hard-reboot.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch103: kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch104: kvm-vhost-user-Print-unexpected-slave-message-types.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch105: kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch106: kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch107: kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch108: kvm-virtiofsd-passthrough_ll-control-readdirplus.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch109: kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch110: kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch111: kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch112: kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch113: kvm-virtiofsd-passthrough_ll-use-hashtable.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch114: kvm-virtiofsd-Clean-up-inodes-on-destroy.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch115: kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch116: kvm-virtiofsd-fix-error-handling-in-main.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch117: kvm-virtiofsd-cleanup-allocated-resource-in-se.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch118: kvm-virtiofsd-fix-memory-leak-on-lo.source.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch119: kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch120: kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch121: kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch122: kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch123: kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch124: kvm-virtiofsd-Support-remote-posix-locks.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch125: kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch126: kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch127: kvm-virtiofsd-make-lo_release-atomic.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch128: kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch129: kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch130: kvm-libvhost-user-Fix-some-memtable-remap-cases.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch131: kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch132: kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch133: kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch134: kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch135: kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch136: kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch137: kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch138: kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch139: kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch140: kvm-virtiofsd-process-requests-in-a-thread-pool.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch141: kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch142: kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch143: kvm-virtiofsd-add-thread-pool-size-NUM-option.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch144: kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch145: kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch146: kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch147: kvm-virtiofsd-add-some-options-to-the-help-message.patch +# For bz#1776638 - Guest failed to boot up after system_reset 20 times +Patch148: kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch +# For bz#1776638 - Guest failed to boot up after system_reset 20 times +Patch149: kvm-xics-Don-t-deassert-outputs.patch +# For bz#1776638 - Guest failed to boot up after system_reset 20 times +Patch150: kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch +# For bz#1787395 - qemu-trace-stap list : TypeError: startswith first arg must be bytes or a tuple of bytes, not str +Patch151: kvm-trace-update-qemu-trace-stap-to-Python-3.patch +# For bz#1794503 - CVE-2020-1711 qemu-kvm: QEMU: block: iscsi: OOB heap access via an unexpected response of iSCSI Server [rhel-av-8.2.0] +Patch153: kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch +# For bz#1787444 - Broken postcopy migration with vTPM device +Patch154: kvm-tpm-ppi-page-align-PPI-RAM.patch +# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature +Patch155: kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch +# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature +Patch156: kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch +# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature +Patch157: kvm-tests-arm-cpu-features-Check-feature-default-values.patch +# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature +Patch158: kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch +# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature +Patch159: kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch +# For bz#1529231 - [q35] VM hangs after migration with 200 vCPUs +Patch160: kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch +# For bz#1529231 - [q35] VM hangs after migration with 200 vCPUs +Patch161: kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch +# For bz#1529231 - [q35] VM hangs after migration with 200 vCPUs +Patch162: kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch +# For bz#1779078 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) +# For bz#1787291 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) [rhel-8.1.0.z] +# For bz#1779078 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) +# For bz#1779078 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) +Patch163: kvm-i386-Resolve-CPU-models-to-v1-by-default.patch +# For bz#1781637 - qemu crashed when do mem and disk snapshot +Patch164: kvm-iotests-Support-job-complete-in-run_job.patch +# For bz#1781637 - qemu crashed when do mem and disk snapshot +Patch165: kvm-iotests-Create-VM.blockdev_create.patch +# For bz#1781637 - qemu crashed when do mem and disk snapshot +Patch166: kvm-block-Activate-recursively-even-for-already-active-n.patch +# For bz#1781637 - qemu crashed when do mem and disk snapshot +Patch167: kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch +# For bz#1781637 - qemu crashed when do mem and disk snapshot +Patch168: kvm-iotests-Test-external-snapshot-with-VM-state.patch +# For bz#1781637 - qemu crashed when do mem and disk snapshot +Patch169: kvm-iotests.py-Let-wait_migration-wait-even-more.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch170: kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch171: kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch172: kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch173: kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch174: kvm-backup-top-Begin-drain-earlier.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch175: kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch176: kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch177: kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch178: kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch +# For bz#1801320 - aarch64: backport query-cpu-model-expansion and adjvtime document fixes +Patch179: kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch +# For bz#1801320 - aarch64: backport query-cpu-model-expansion and adjvtime document fixes +Patch180: kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch +# For bz#1796240 - Enable hw accelerated cache-count-flush by default for POWER9 DD2.3 cpus +Patch181: kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch +# For bz#1798994 - CVE-2020-8608 qemu-kvm: QEMU: Slirp: potential OOB access due to unsafe snprintf() usages [rhel-av-8.2.0] +Patch182: kvm-util-add-slirp_fmt-helpers.patch +# For bz#1798994 - CVE-2020-8608 qemu-kvm: QEMU: Slirp: potential OOB access due to unsafe snprintf() usages [rhel-av-8.2.0] +Patch183: kvm-tcp_emu-fix-unsafe-snprintf-usages.patch +# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device +Patch184: kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch +# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device +Patch185: kvm-virtio-make-virtio_delete_queue-idempotent.patch +# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device +Patch186: kvm-virtio-reset-region-cache-when-on-queue-deletion.patch +# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device +Patch187: kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch +# For bz#1805334 - vhost-user/50-qemu-gpu.json is not valid JSON +Patch188: kvm-vhost-user-gpu-Drop-trailing-json-comma.patch +# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough +Patch189: kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch +# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough +Patch190: kvm-target-i386-add-a-ucode-rev-property.patch +# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough +Patch191: kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch +# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough +Patch192: kvm-target-i386-fix-TCG-UCODE_REV-access.patch +# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough +Patch193: kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch +# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough +Patch194: kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch +# For bz#1703907 - [upstream]QEMU coredump when converting to qcow2: external data file images on block devices with copy_offloading +Patch195: kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch +# For bz#1794692 - Mirror block job stops making progress +Patch196: kvm-mirror-Store-MirrorOp.co-for-debuggability.patch +# For bz#1794692 - Mirror block job stops making progress +Patch197: kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch +# For bz#1782529 - Windows Update Enablement with default smbios strings in qemu +Patch198: kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch199: kvm-migration-multifd-clean-pages-after-filling-packet.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch200: kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch201: kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch202: kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch203: kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch204: kvm-qemu-file-Don-t-do-IO-after-shutdown.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch205: kvm-migration-Don-t-send-data-if-we-have-stopped.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch206: kvm-migration-Create-migration_is_running.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch207: kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch208: kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch +# For bz#1797064 - virtiofsd: Fixes +Patch209: kvm-virtiofsd-Remove-fuse_req_getgroups.patch +# For bz#1797064 - virtiofsd: Fixes +Patch210: kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch +# For bz#1797064 - virtiofsd: Fixes +Patch211: kvm-virtiofsd-load_capng-missing-unlock.patch +# For bz#1797064 - virtiofsd: Fixes +Patch212: kvm-virtiofsd-do_read-missing-NULL-check.patch +# For bz#1797064 - virtiofsd: Fixes +Patch213: kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch +# For bz#1797064 - virtiofsd: Fixes +Patch214: kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch +# For bz#1797064 - virtiofsd: Fixes +Patch215: kvm-virtiofsd-Fix-xattr-operations.patch +# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support +Patch216: kvm-block-nbd-Fix-hang-in-.bdrv_close.patch +# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support +Patch217: kvm-block-Generic-file-creation-fallback.patch +# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support +Patch218: kvm-file-posix-Drop-hdev_co_create_opts.patch +# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support +Patch219: kvm-iscsi-Drop-iscsi_co_create_opts.patch +# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support +Patch220: kvm-iotests-Add-test-for-image-creation-fallback.patch +# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support +Patch221: kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch222: kvm-iotests-Use-complete_and_wait-in-155.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch223: kvm-block-Introduce-bdrv_reopen_commit_post-step.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch224: kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch225: kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch226: kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch227: kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch228: kvm-block-Make-bdrv_get_cumulative_perm-public.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch229: kvm-block-Relax-restrictions-for-blockdev-snapshot.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch230: kvm-iotests-Fix-run_job-with-use_log-False.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch231: kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch232: kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch233: kvm-iotests-Add-iothread-cases-to-155.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch234: kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch +# For bz#1809380 - guest hang during reboot process after migration from RHEl7.8 to RHEL8.2.0. +Patch235: kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch +# For bz#1814336 - [POWER9] QEMU migration-test triggers a kernel warning +Patch236: kvm-migration-Rate-limit-inside-host-pages.patch +# For bz#1811670 - Unneeded qemu-guest-agent dependency on pixman +Patch237: kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch +# For bz#1816007 - qemu-img convert failed to convert with block device as target +Patch238: kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch +# For bz#1816007 - qemu-img convert failed to convert with block device as target +Patch239: kvm-block-trickle-down-the-fallback-image-creation-funct.patch +# For bz#1794692 - Mirror block job stops making progress +Patch240: kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch +# For bz#1794692 - Mirror block job stops making progress +Patch241: kvm-mirror-Wait-only-for-in-flight-operations.patch +# For bz#1817621 - Crash and deadlock with block jobs when using io-threads +Patch242: kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch +# For bz#1817621 - Crash and deadlock with block jobs when using io-threads +Patch243: kvm-replication-assert-we-own-context-before-job_cancel_.patch +# For bz#1817621 - Crash and deadlock with block jobs when using io-threads +Patch244: kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch +# For bz#1817621 - Crash and deadlock with block jobs when using io-threads +Patch245: kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch +# For bz#1817621 - Crash and deadlock with block jobs when using io-threads +Patch246: kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch +# For bz#1817621 - Crash and deadlock with block jobs when using io-threads +Patch247: kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch +# For bz#1822682 - QEMU-4.2 fails to start a VM on Azure +Patch248: kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch + +BuildRequires: wget +BuildRequires: rpm-build +BuildRequires: zlib-devel +BuildRequires: glib2-devel +BuildRequires: which +BuildRequires: gnutls-devel +BuildRequires: cyrus-sasl-devel +BuildRequires: libtool +BuildRequires: libaio-devel +BuildRequires: rsync +BuildRequires: python3-devel +BuildRequires: pciutils-devel +BuildRequires: libiscsi-devel +BuildRequires: ncurses-devel +BuildRequires: libattr-devel +BuildRequires: libusbx-devel >= 1.0.22 +%if %{have_usbredir} +BuildRequires: usbredir-devel >= 0.7.1 +%endif +BuildRequires: texinfo +BuildRequires: python3-sphinx +%if %{have_spice} +BuildRequires: spice-protocol >= 0.12.12 +BuildRequires: spice-server-devel >= 0.12.8 +BuildRequires: libcacard-devel +# For smartcard NSS support +BuildRequires: nss-devel +%endif +BuildRequires: libseccomp-devel >= 2.4.0 +# For network block driver +BuildRequires: libcurl-devel +BuildRequires: libssh-devel +BuildRequires: librados-devel +BuildRequires: librbd-devel +%if %{have_gluster} +# For gluster block driver +BuildRequires: glusterfs-api-devel >= 3.6.0 +BuildRequires: glusterfs-devel +%endif +# We need both because the 'stap' binary is probed for by configure +BuildRequires: systemtap +BuildRequires: systemtap-sdt-devel +# For VNC PNG support +BuildRequires: libpng-devel +# For uuid generation +BuildRequires: libuuid-devel +# For BlueZ device support +BuildRequires: bluez-libs-devel +# For Braille device support +BuildRequires: brlapi-devel +# For test suite +BuildRequires: check-devel +# For virtiofs +BuildRequires: libcap-ng-devel +# Hard requirement for version >= 1.3 +BuildRequires: pixman-devel +# Documentation requirement +BuildRequires: perl-podlators +BuildRequires: texinfo +BuildRequires: python3-sphinx +# For rdma +%if 0%{?have_librdma} +BuildRequires: rdma-core-devel +%endif +%if %{have_fdt} +BuildRequires: libfdt-devel >= 1.4.3 +%endif +# iasl and cpp for acpi generation (not a hard requirement as we can use +# pre-compiled files, but it's better to use this) +%ifarch %{ix86} x86_64 +BuildRequires: iasl +BuildRequires: cpp +%endif +# For compressed guest memory dumps +BuildRequires: lzo-devel snappy-devel +# For NUMA memory binding +%ifnarch s390x +BuildRequires: numactl-devel +%endif +BuildRequires: libgcrypt-devel +# qemu-pr-helper multipath support (requires libudev too) +BuildRequires: device-mapper-multipath-devel +BuildRequires: systemd-devel +# used by qemu-bridge-helper and qemu-pr-helper +BuildRequires: libcap-ng-devel + +BuildRequires: diffutils +%ifarch x86_64 +BuildRequires: libpmem-devel +Requires: libpmem +%endif + +# qemu-keymap +BuildRequires: pkgconfig(xkbcommon) + +# For s390-pgste flag +%ifarch s390x +BuildRequires: binutils >= 2.27-16 +%endif + +%if %{have_opengl} +BuildRequires: pkgconfig(epoxy) +BuildRequires: pkgconfig(libdrm) +BuildRequires: pkgconfig(gbm) +Requires: mesa-libGL +Requires: mesa-libEGL +Requires: mesa-dri-drivers +%endif + +BuildRequires: perl-Test-Harness + +Requires: qemu-kvm-core = %{epoch}:%{version}-%{release} +%rhev_ma_conflicts qemu-kvm + +%{requires_all_modules} + +%define qemudocdir %{_docdir}/%{name} + +%description +qemu-kvm is an open source virtualizer that provides hardware +emulation for the KVM hypervisor. qemu-kvm acts as a virtual +machine monitor together with the KVM kernel modules, and emulates the +hardware for a full system such as a PC and its associated peripherals. + + +%package -n qemu-kvm-core +Summary: qemu-kvm core components +Requires: qemu-img = %{epoch}:%{version}-%{release} +%ifarch %{ix86} x86_64 +Requires: seabios-bin >= 1.10.2-1 +Requires: sgabios-bin +Requires: edk2-ovmf +%endif +%ifarch aarch64 +Requires: edk2-aarch64 +%endif + +%ifnarch aarch64 s390x +Requires: seavgabios-bin >= 1.12.0-3 +Requires: ipxe-roms-qemu >= 20170123-1 +%endif +%ifarch %{power64} +Requires: SLOF >= %{SLOF_gittagdate}-1.git%{SLOF_gittagcommit} +%endif +Requires: %{name}-common = %{epoch}:%{version}-%{release} +Requires: libseccomp >= 2.4.0 +# For compressed guest memory dumps +Requires: lzo snappy +%if %{have_gluster} +Requires: glusterfs-api >= 3.6.0 +%endif +%if %{have_kvm_setup} +Requires(post): systemd-units +Requires(preun): systemd-units + %ifarch %{power64} +Requires: powerpc-utils + %endif +%endif +Requires: libusbx >= 1.0.19 +%if %{have_usbredir} +Requires: usbredir >= 0.7.1 +%endif + +%rhev_ma_conflicts qemu-kvm + +%description -n qemu-kvm-core +qemu-kvm is an open source virtualizer that provides hardware +emulation for the KVM hypervisor. qemu-kvm acts as a virtual +machine monitor together with the KVM kernel modules, and emulates the +hardware for a full system such as a PC and its associated peripherals. + + +%package -n qemu-img +Summary: QEMU command line tool for manipulating disk images +Group: Development/Tools + +%rhev_ma_conflicts qemu-img + +%description -n qemu-img +This package provides a command line tool for manipulating disk images. + +%package -n qemu-kvm-common +Summary: QEMU common files needed by all QEMU targets +Group: Development/Tools +Requires(post): /usr/bin/getent +Requires(post): /usr/sbin/groupadd +Requires(post): /usr/sbin/useradd +Requires(post): systemd-units +Requires(preun): systemd-units +Requires(postun): systemd-units + +%rhev_ma_conflicts qemu-kvm-common + +%description -n qemu-kvm-common +qemu-kvm is an open source virtualizer that provides hardware emulation for +the KVM hypervisor. + +This package provides documentation and auxiliary programs used with qemu-kvm. + + +%package -n qemu-guest-agent +Summary: QEMU guest agent +Requires(post): systemd-units +Requires(preun): systemd-units +Requires(postun): systemd-units + +%description -n qemu-guest-agent +qemu-kvm is an open source virtualizer that provides hardware emulation for +the KVM hypervisor. + +This package provides an agent to run inside guests, which communicates +with the host over a virtio-serial channel named "org.qemu.guest_agent.0" + +This package does not need to be installed on the host OS. + +%package tests +Summary: tests for the qemu-kvm package +Requires: %{name} = %{epoch}:%{version}-%{release} + +%define testsdir %{_libdir}/%{name}/tests-src + +%description tests +The qemu-kvm-tests rpm contains tests that can be used to verify +the functionality of the installed qemu-kvm package + +Install this package if you want access to the avocado_qemu +tests, or qemu-iotests. + +%package block-curl +Summary: QEMU CURL block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-curl +This package provides the additional CURL block driver for QEMU. + +Install this package if you want to access remote disks over +http, https, ftp and other transports provided by the CURL library. + + +%if %{have_gluster} +%package block-gluster +Summary: QEMU Gluster block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%description block-gluster +This package provides the additional Gluster block driver for QEMU. + +Install this package if you want to access remote Gluster storage. +%endif + + +%package block-iscsi +Summary: QEMU iSCSI block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-iscsi +This package provides the additional iSCSI block driver for QEMU. + +Install this package if you want to access iSCSI volumes. + + +%package block-rbd +Summary: QEMU Ceph/RBD block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-rbd +This package provides the additional Ceph/RBD block driver for QEMU. + +Install this package if you want to access remote Ceph volumes +using the rbd protocol. + + +%package block-ssh +Summary: QEMU SSH block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-ssh +This package provides the additional SSH block driver for QEMU. + +Install this package if you want to access remote disks using +the Secure Shell (SSH) protocol. + + +%prep +%setup -n qemu-%{version} +%autopatch -p1 + +%build +%global buildarch %{kvm_target}-softmmu + +# --build-id option is used for giving info to the debug packages. +buildldflags="VL_LDFLAGS=-Wl,--build-id" + +%global block_drivers_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle + +%if 0%{have_gluster} + %global block_drivers_list %{block_drivers_list},gluster +%endif + +./configure \ + --prefix="%{_prefix}" \ + --libdir="%{_libdir}" \ + --sysconfdir="%{_sysconfdir}" \ + --interp-prefix=%{_prefix}/qemu-%M \ + --localstatedir="%{_localstatedir}" \ + --docdir="%{qemudocdir}" \ + --libexecdir="%{_libexecdir}" \ + --extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ + --extra-cflags="%{optflags}" \ + --with-pkgversion="%{name}-%{version}-%{release}" \ + --with-confsuffix=/"%{name}" \ + --firmwarepath=%{_prefix}/share/qemu-firmware \ +%if 0%{have_fdt} + --enable-fdt \ +%else + --disable-fdt \ + %endif +%if 0%{have_gluster} + --enable-glusterfs \ +%else + --disable-glusterfs \ +%endif + --enable-guest-agent \ +%ifnarch s390x + --enable-numa \ +%else + --disable-numa \ +%endif + --enable-rbd \ +%if 0%{have_librdma} + --enable-rdma \ +%else + --disable-rdma \ +%endif + --disable-pvrdma \ + --enable-seccomp \ +%if 0%{have_spice} + --enable-spice \ + --enable-smartcard \ +%else + --disable-spice \ + --disable-smartcard \ +%endif +%if 0%{have_opengl} + --enable-opengl \ +%else + --disable-opengl \ +%endif +%if 0%{have_usbredir} + --enable-usb-redir \ +%else + --disable-usb-redir \ +%endif + --disable-tcmalloc \ +%ifarch x86_64 + --enable-libpmem \ +%else + --disable-libpmem \ +%endif + --enable-vhost-user \ +%ifarch %{ix86} x86_64 + --enable-avx2 \ +%else + --disable-avx2 \ +%endif + --python=%{__python3} \ + --target-list="%{buildarch}" \ + --block-drv-rw-whitelist=%{block_drivers_list} \ + --audio-drv-list= \ + --block-drv-ro-whitelist=vmdk,vhdx,vpc,https,ssh \ + --with-coroutine=ucontext \ + --tls-priority=NORMAL \ + --disable-bluez \ + --disable-brlapi \ + --enable-cap-ng \ + --enable-coroutine-pool \ + --enable-curl \ + --disable-curses \ + --disable-debug-tcg \ + --enable-docs \ + --disable-gtk \ + --enable-kvm \ + --enable-libiscsi \ + --disable-libnfs \ + --enable-libssh \ + --enable-libusb \ + --disable-bzip2 \ + --enable-linux-aio \ + --disable-live-block-migration \ + --enable-lzo \ + --enable-pie \ + --disable-qom-cast-debug \ + --disable-sdl \ + --enable-snappy \ + --disable-sparse \ + --disable-strip \ + --enable-tpm \ + --enable-trace-backend=dtrace \ + --disable-vde \ + --disable-vhost-scsi \ + --disable-vxhs \ + --disable-virtfs \ + --disable-vnc-jpeg \ + --disable-vte \ + --enable-vnc-png \ + --enable-vnc-sasl \ + --enable-werror \ + --disable-xen \ + --disable-xfsctl \ + --enable-gnutls \ + --enable-gcrypt \ + --disable-nettle \ + --enable-attr \ + --disable-bsd-user \ + --disable-cocoa \ + --enable-debug-info \ + --disable-guest-agent-msi \ + --disable-hax \ + --disable-jemalloc \ + --disable-linux-user \ + --enable-modules \ + --disable-netmap \ + --disable-replication \ + --enable-system \ + --enable-tools \ + --disable-user \ + --enable-vhost-net \ + --enable-vhost-vsock \ + --enable-vnc \ + --enable-mpath \ + --disable-xen-pci-passthrough \ + --enable-tcg \ + --with-git=git \ + --disable-sanitizers \ + --disable-hvf \ + --disable-whpx \ + --enable-malloc-trim \ + --disable-membarrier \ + --disable-vhost-crypto \ + --disable-libxml2 \ + --enable-capstone \ + --disable-git-update \ + --disable-crypto-afalg \ + --disable-debug-mutex \ + --disable-bochs \ + --disable-cloop \ + --disable-dmg \ + --disable-qcow1 \ + --disable-vdi \ + --disable-vvfat \ + --disable-qed \ + --disable-parallels \ + --disable-sheepdog \ + --disable-auth-pam \ + --enable-iconv \ + --disable-lzfse \ + --enable-vhost-kernel \ + --disable-virglrenderer \ + --without-default-devices + +echo "config-host.mak contents:" +echo "===" +cat config-host.mak +echo "===" + +make V=1 %{?_smp_mflags} $buildldflags + +# Setup back compat qemu-kvm binary +%{__python3} scripts/tracetool.py --backend dtrace --format stap \ + --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ + trace-events-all > qemu-kvm.stp + +%{__python3} scripts/tracetool.py --backends=dtrace --format=log-stap \ + --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ + trace-events-all > qemu-kvm-log.stp + +%{__python3} scripts/tracetool.py --backend dtrace --format simpletrace-stap \ + --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ + trace-events-all > qemu-kvm-simpletrace.stp + +cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kvm + +gcc %{SOURCE6} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o ksmctl +gcc %{SOURCE35} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o udev-kvm-check + +%install +%define _udevdir %(pkg-config --variable=udevdir udev) +%define _udevrulesdir %{_udevdir}/rules.d + +install -D -p -m 0644 %{SOURCE4} $RPM_BUILD_ROOT%{_unitdir}/ksm.service +install -D -p -m 0644 %{SOURCE5} $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig/ksm +install -D -p -m 0755 ksmctl $RPM_BUILD_ROOT%{_libexecdir}/ksmctl + +install -D -p -m 0644 %{SOURCE7} $RPM_BUILD_ROOT%{_unitdir}/ksmtuned.service +install -D -p -m 0755 %{SOURCE8} $RPM_BUILD_ROOT%{_sbindir}/ksmtuned +install -D -p -m 0644 %{SOURCE9} $RPM_BUILD_ROOT%{_sysconfdir}/ksmtuned.conf +install -D -p -m 0644 %{SOURCE26} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/vhost.conf +%ifarch s390x + install -D -p -m 0644 %{SOURCE30} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf +%else +%ifarch %{ix86} x86_64 + install -D -p -m 0644 %{SOURCE31} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf +%else + install -D -p -m 0644 %{SOURCE27} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf +%endif +%endif + +mkdir -p $RPM_BUILD_ROOT%{_bindir}/ +mkdir -p $RPM_BUILD_ROOT%{_udevrulesdir}/ +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name} + +# Create new directories and put them all under tests-src +mkdir -p $RPM_BUILD_ROOT%{testsdir}/python +mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests +mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/acceptance +mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests +mkdir -p $RPM_BUILD_ROOT%{testsdir}/scripts/qmp + +install -p -m 0755 udev-kvm-check $RPM_BUILD_ROOT%{_udevdir} +install -p -m 0644 %{SOURCE34} $RPM_BUILD_ROOT%{_udevrulesdir} + +install -m 0644 scripts/dump-guest-memory.py \ + $RPM_BUILD_ROOT%{_datadir}/%{name} + +# Install avocado_qemu tests +cp -R tests/acceptance/* $RPM_BUILD_ROOT%{testsdir}/tests/acceptance/ + +# Install qemu.py and qmp/ scripts required to run avocado_qemu tests +cp -R python/qemu $RPM_BUILD_ROOT%{testsdir}/python +cp -R scripts/qmp/* $RPM_BUILD_ROOT%{testsdir}/scripts/qmp +install -p -m 0755 tests/Makefile.include $RPM_BUILD_ROOT%{testsdir}/tests/ + +# Install qemu-iotests +cp -R tests/qemu-iotests/* $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/ +# Avoid ambiguous 'python' interpreter name +find $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env python+%{__python3}+' {} \; +find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env python+%{__python3}+' {} \; +find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/python+%{__python3}+' {} \; + +install -p -m 0644 %{SOURCE36} $RPM_BUILD_ROOT%{testsdir}/README + +make DESTDIR=$RPM_BUILD_ROOT \ + sharedir="%{_datadir}/%{name}" \ + datadir="%{_datadir}/%{name}" \ + install + +mkdir -p $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset + +# Install qemu-guest-agent service and udev rules +install -m 0644 %{_sourcedir}/qemu-guest-agent.service %{buildroot}%{_unitdir} +install -m 0644 %{_sourcedir}/qemu-ga.sysconfig %{buildroot}%{_sysconfdir}/sysconfig/qemu-ga +install -m 0644 %{_sourcedir}/99-qemu-guest-agent.rules %{buildroot}%{_udevrulesdir} + +# - the fsfreeze hook script: +install -D --preserve-timestamps \ + scripts/qemu-guest-agent/fsfreeze-hook \ + $RPM_BUILD_ROOT%{_sysconfdir}/qemu-ga/fsfreeze-hook + +# - the directory for user scripts: +mkdir $RPM_BUILD_ROOT%{_sysconfdir}/qemu-ga/fsfreeze-hook.d + +# - and the fsfreeze script samples: +mkdir --parents $RPM_BUILD_ROOT%{_datadir}/%{name}/qemu-ga/fsfreeze-hook.d/ +install --preserve-timestamps --mode=0644 \ + scripts/qemu-guest-agent/fsfreeze-hook.d/*.sample \ + $RPM_BUILD_ROOT%{_datadir}/%{name}/qemu-ga/fsfreeze-hook.d/ + +# - Install dedicated log directory: +mkdir -p -v $RPM_BUILD_ROOT%{_localstatedir}/log/qemu-ga/ + +mkdir -p $RPM_BUILD_ROOT%{_bindir} +install -c -m 0755 qemu-ga ${RPM_BUILD_ROOT}%{_bindir}/qemu-ga + +mkdir -p $RPM_BUILD_ROOT%{_mandir}/man8 + +install -m 0755 qemu-kvm $RPM_BUILD_ROOT%{_libexecdir}/ +install -m 0644 qemu-kvm.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ +install -m 0644 qemu-kvm-log.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ +install -m 0644 qemu-kvm-simpletrace.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ + +rm $RPM_BUILD_ROOT/%{_datadir}/applications/qemu.desktop +rm $RPM_BUILD_ROOT%{_bindir}/qemu-system-%{kvm_target} +rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}.stp +rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-simpletrace.stp +rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-log.stp +rm $RPM_BUILD_ROOT%{_bindir}/elf2dmp + +# Install simpletrace +install -m 0755 scripts/simpletrace.py $RPM_BUILD_ROOT%{_datadir}/%{name}/simpletrace.py +# Avoid ambiguous 'python' interpreter name +sed -i -e '1 s/python/python3/' $RPM_BUILD_ROOT%{_datadir}/%{name}/simpletrace.py +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool +install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool scripts/tracetool/*.py +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/backend +install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/backend scripts/tracetool/backend/*.py +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format +install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format scripts/tracetool/format/*.py + +mkdir -p $RPM_BUILD_ROOT%{qemudocdir} +install -p -m 0644 -t ${RPM_BUILD_ROOT}%{qemudocdir} Changelog README.rst README.systemtap COPYING COPYING.LIB LICENSE docs/interop/qmp-spec.txt +chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man1/* +chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man8/* + +install -D -p -m 0644 qemu.sasl $RPM_BUILD_ROOT%{_sysconfdir}/sasl2/%{name}.conf + +# Provided by package openbios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-ppc +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-sparc32 +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-sparc64 +# Provided by package SLOF +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/slof.bin + +# Remove unpackaged files. +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/palcode-clipper +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/petalogix*.dtb +rm -f ${RPM_BUILD_ROOT}%{_datadir}/%{name}/bamboo.dtb +rm -f ${RPM_BUILD_ROOT}%{_datadir}/%{name}/ppc_rom.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-zipl.rom +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/u-boot.e500 +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qemu_vga.ndrv +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/skiboot.lid + +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-ccw.img +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/hppa-firmware.img +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/canyonlands.dtb +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/u-boot-sam460-20100605.bin + +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/firmware +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/edk2-*.fd +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/edk2-licenses.txt + +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-virt-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-sifive_u-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-virt-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qemu-nsis.bmp + +rm -rf ${RPM_BUILD_ROOT}%{_libdir}/qemu-kvm/ui-spice-app.so + +%ifarch s390x + # Use the s390-ccw.img that we've just built, not the pre-built one + install -m 0644 pc-bios/s390-ccw/s390-ccw.img $RPM_BUILD_ROOT%{_datadir}/%{name}/ +%else + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-netboot.img +%endif + +%ifnarch x86_64 + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/kvmvapic.bin + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/linuxboot.bin + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/multiboot.bin + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/pvh.bin +%endif + +# Remove sparc files +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/QEMU,tcx.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/QEMU,cgthree.bin + +# Remove ivshmem example programs +rm -rf ${RPM_BUILD_ROOT}%{_bindir}/ivshmem-client +rm -rf ${RPM_BUILD_ROOT}%{_bindir}/ivshmem-server + +# Remove efi roms +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/efi*.rom + +# Provided by package ipxe +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/pxe*rom +# Provided by package vgabios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/vgabios*bin +# Provided by package seabios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/bios*.bin +# Provided by package sgabios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/sgabios.bin + +# the pxe gpxe images will be symlinks to the images on +# /usr/share/ipxe, as QEMU doesn't know how to look +# for other paths, yet. +pxe_link() { + ln -s ../ipxe.efi/$2.rom %{buildroot}%{_datadir}/%{name}/efi-$1.rom +} + +%ifnarch aarch64 s390x +pxe_link e1000 8086100e +pxe_link ne2k_pci 10ec8029 +pxe_link pcnet 10222000 +pxe_link rtl8139 10ec8139 +pxe_link virtio 1af41000 +pxe_link e1000e 808610d3 +%endif + +rom_link() { + ln -s $1 %{buildroot}%{_datadir}/%{name}/$2 +} + +%ifnarch aarch64 s390x + rom_link ../seavgabios/vgabios-isavga.bin vgabios.bin + rom_link ../seavgabios/vgabios-cirrus.bin vgabios-cirrus.bin + rom_link ../seavgabios/vgabios-qxl.bin vgabios-qxl.bin + rom_link ../seavgabios/vgabios-stdvga.bin vgabios-stdvga.bin + rom_link ../seavgabios/vgabios-vmware.bin vgabios-vmware.bin + rom_link ../seavgabios/vgabios-virtio.bin vgabios-virtio.bin + rom_link ../seavgabios/vgabios-ramfb.bin vgabios-ramfb.bin + rom_link ../seavgabios/vgabios-bochs-display.bin vgabios-bochs-display.bin +%endif +%ifarch x86_64 + rom_link ../seabios/bios.bin bios.bin + rom_link ../seabios/bios-256k.bin bios-256k.bin + rom_link ../sgabios/sgabios.bin sgabios.bin +%endif + +%if 0%{have_kvm_setup} + install -D -p -m 755 %{SOURCE21} $RPM_BUILD_ROOT%{_prefix}/lib/systemd/kvm-setup + install -D -p -m 644 %{SOURCE22} $RPM_BUILD_ROOT%{_unitdir}/kvm-setup.service + install -D -p -m 644 %{SOURCE23} $RPM_BUILD_ROOT%{_presetdir}/85-kvm.preset +%endif + +%if 0%{have_memlock_limits} + install -D -p -m 644 %{SOURCE28} $RPM_BUILD_ROOT%{_sysconfdir}/security/limits.d/95-kvm-memlock.conf +%endif + +# Install rules to use the bridge helper with libvirt's virbr0 +install -D -m 0644 %{SOURCE12} $RPM_BUILD_ROOT%{_sysconfdir}/%{name}/bridge.conf + +# Install qemu-pr-helper service +install -m 0644 %{_sourcedir}/qemu-pr-helper.service %{buildroot}%{_unitdir} +install -m 0644 %{_sourcedir}/qemu-pr-helper.socket %{buildroot}%{_unitdir} + +find $RPM_BUILD_ROOT -name '*.la' -or -name '*.a' | xargs rm -f + +# We need to make the block device modules executable else +# RPM won't pick up their dependencies. +chmod +x $RPM_BUILD_ROOT%{_libdir}/qemu-kvm/block-*.so + +# Remove buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/interop/.buildinfo + +# Remove spec +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/specs + +%check +export DIFF=diff; make check V=1 + +%post -n qemu-kvm-core +# load kvm modules now, so we can make sure no reboot is needed. +# If there's already a kvm module installed, we don't mess with it +%udev_rules_update +sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : + udevadm trigger --subsystem-match=misc --sysname-match=kvm --action=add || : +%if %{have_kvm_setup} + systemctl daemon-reload # Make sure it sees the new presets and unitfile + %systemd_post kvm-setup.service + if systemctl is-enabled kvm-setup.service > /dev/null; then + systemctl start kvm-setup.service + fi +%endif + +%if %{have_kvm_setup} +%preun -n qemu-kvm-core +%systemd_preun kvm-setup.service +%endif + +%post -n qemu-kvm-common +%systemd_post ksm.service +%systemd_post ksmtuned.service + +getent group kvm >/dev/null || groupadd -g 36 -r kvm +getent group qemu >/dev/null || groupadd -g 107 -r qemu +getent passwd qemu >/dev/null || \ +useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ + -c "qemu user" qemu + +%preun -n qemu-kvm-common +%systemd_preun ksm.service +%systemd_preun ksmtuned.service + +%postun -n qemu-kvm-common +%systemd_postun_with_restart ksm.service +%systemd_postun_with_restart ksmtuned.service + +%files +# Deliberately empty + + +%files -n qemu-kvm-common +%defattr(-,root,root) +%dir %{qemudocdir} +%doc %{qemudocdir}/Changelog +%doc %{qemudocdir}/README.rst +%doc %{qemudocdir}/qemu-doc.html +%doc %{qemudocdir}/COPYING +%doc %{qemudocdir}/COPYING.LIB +%doc %{qemudocdir}/LICENSE +%doc %{qemudocdir}/README.systemtap +%doc %{qemudocdir}/qmp-spec.txt +%doc %{qemudocdir}/qemu-doc.txt +%doc %{qemudocdir}/qemu-ga-ref.html +%doc %{qemudocdir}/qemu-ga-ref.txt +%doc %{qemudocdir}/qemu-qmp-ref.html +%doc %{qemudocdir}/qemu-qmp-ref.txt +%doc %{qemudocdir}/interop/* +%{_mandir}/man7/qemu-qmp-ref.7* +%{_mandir}/man7/qemu-cpu-models.7* +%{_bindir}/qemu-keymap +%{_bindir}/qemu-pr-helper +%{_bindir}/qemu-edid +%{_bindir}/qemu-trace-stap +%{_unitdir}/qemu-pr-helper.service +%{_unitdir}/qemu-pr-helper.socket +%{_mandir}/man7/qemu-ga-ref.7* + +%dir %{_datadir}/%{name}/ +%{_datadir}/%{name}/keymaps/ +%{_mandir}/man1/%{name}.1* +%{_mandir}/man1/qemu-trace-stap.1* +%{_mandir}/man7/qemu-block-drivers.7* +%attr(4755, -, -) %{_libexecdir}/qemu-bridge-helper +%config(noreplace) %{_sysconfdir}/sasl2/%{name}.conf +%{_unitdir}/ksm.service +%{_libexecdir}/ksmctl +%config(noreplace) %{_sysconfdir}/sysconfig/ksm +%{_unitdir}/ksmtuned.service +%{_sbindir}/ksmtuned +%{_udevdir}/udev-kvm-check +%{_udevrulesdir}/81-kvm-rhel.rules +%ghost %{_sysconfdir}/kvm +%config(noreplace) %{_sysconfdir}/ksmtuned.conf +%dir %{_sysconfdir}/%{name} +%config(noreplace) %{_sysconfdir}/%{name}/bridge.conf +%config(noreplace) %{_sysconfdir}/modprobe.d/vhost.conf +%config(noreplace) %{_sysconfdir}/modprobe.d/kvm.conf +%{_datadir}/%{name}/simpletrace.py* +%{_datadir}/%{name}/tracetool/*.py* +%{_datadir}/%{name}/tracetool/backend/*.py* +%{_datadir}/%{name}/tracetool/format/*.py* + +%files -n qemu-kvm-core +%defattr(-,root,root) +%ifarch x86_64 + %{_datadir}/%{name}/bios.bin + %{_datadir}/%{name}/bios-256k.bin + %{_datadir}/%{name}/linuxboot.bin + %{_datadir}/%{name}/multiboot.bin + %{_datadir}/%{name}/kvmvapic.bin + %{_datadir}/%{name}/sgabios.bin + %{_datadir}/%{name}/pvh.bin +%endif +%ifarch s390x + %{_datadir}/%{name}/s390-ccw.img + %{_datadir}/%{name}/s390-netboot.img +%endif +%ifnarch aarch64 s390x + %{_datadir}/%{name}/vgabios.bin + %{_datadir}/%{name}/vgabios-cirrus.bin + %{_datadir}/%{name}/vgabios-qxl.bin + %{_datadir}/%{name}/vgabios-stdvga.bin + %{_datadir}/%{name}/vgabios-vmware.bin + %{_datadir}/%{name}/vgabios-virtio.bin + %{_datadir}/%{name}/vgabios-ramfb.bin + %{_datadir}/%{name}/vgabios-bochs-display.bin + %{_datadir}/%{name}/efi-e1000.rom + %{_datadir}/%{name}/efi-e1000e.rom + %{_datadir}/%{name}/efi-virtio.rom + %{_datadir}/%{name}/efi-pcnet.rom + %{_datadir}/%{name}/efi-rtl8139.rom + %{_datadir}/%{name}/efi-ne2k_pci.rom +%endif +%{_datadir}/icons/* +%{_datadir}/%{name}/linuxboot_dma.bin +%{_datadir}/%{name}/dump-guest-memory.py* +%{_libexecdir}/qemu-kvm +%{_datadir}/systemtap/tapset/qemu-kvm.stp +%{_datadir}/systemtap/tapset/qemu-kvm-log.stp +%{_datadir}/%{name}/trace-events-all +%{_datadir}/systemtap/tapset/qemu-kvm-simpletrace.stp +%{_datadir}/%{name}/systemtap/script.d/qemu_kvm.stp +%{_datadir}/%{name}/systemtap/conf.d/qemu_kvm.conf +%if 0%{have_kvm_setup} + %{_prefix}/lib/systemd/kvm-setup + %{_unitdir}/kvm-setup.service + %{_presetdir}/85-kvm.preset +%endif +%if 0%{have_memlock_limits} + %{_sysconfdir}/security/limits.d/95-kvm-memlock.conf +%endif +%{_libexecdir}/virtiofsd +%{_datadir}/%{name}/vhost-user/50-qemu-virtiofsd.json + +%files -n qemu-img +%defattr(-,root,root) +%{_bindir}/qemu-img +%{_bindir}/qemu-io +%{_bindir}/qemu-nbd +%{_mandir}/man1/qemu-img.1* +%{_mandir}/man8/qemu-nbd.8* + +%files -n qemu-guest-agent +%defattr(-,root,root,-) +%doc COPYING README.rst +%{_bindir}/qemu-ga +%{_mandir}/man8/qemu-ga.8* +%{_unitdir}/qemu-guest-agent.service +%{_udevrulesdir}/99-qemu-guest-agent.rules +%config(noreplace) %{_sysconfdir}/sysconfig/qemu-ga +%{_sysconfdir}/qemu-ga +%{_datadir}/%{name}/qemu-ga +%dir %{_localstatedir}/log/qemu-ga + +%files tests +%{testsdir} + +%files block-curl +%{_libdir}/qemu-kvm/block-curl.so + +%if %{have_gluster} +%files block-gluster +%{_libdir}/qemu-kvm/block-gluster.so +%endif + +%files block-iscsi +%{_libdir}/qemu-kvm/block-iscsi.so + +%files block-rbd +%{_libdir}/qemu-kvm/block-rbd.so + +%files block-ssh +%{_libdir}/qemu-kvm/block-ssh.so + + +%changelog +* Mon Apr 27 2020 Danilo C. L. de Paula - 4.2.0 +- Resolves: bz#1810193 + (Upgrade components in virt:rhel module:stream for RHEL-8.3 release) + +* Fri Feb 21 2020 Danilo Cesar Lemes de Paula - 2.12.0-99.el8 +- kvm-slirp-disable-tcp_emu.patch [bz#1791677] +- kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch [bz#1790308] +- Resolves: bz#1790308 + (qemu-kvm core dump when do L1 guest live migration with L2 guest running) +- Resolves: bz#1791677 + (QEMU: Slirp: disable emulation of tcp programs like ftp IRC etc. [rhel-8]) + +* Mon Feb 10 2020 Danilo Cesar Lemes de Paula - 2.12.0-98.el8 +- kvm-iscsi-Avoid-potential-for-get_status-overflow.patch [bz#1794501] +- kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch [bz#1794501] +- kvm-clean-up-callback-when-del-virtqueue.patch [bz#1708480] +- kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch [bz#1708480] +- kvm-virtio-reset-region-cache-when-on-queue-deletion.patch [bz#1708480] +- kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch [bz#1708480] +- Resolves: bz#1708480 + ([Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device) +- Resolves: bz#1794501 + (CVE-2020-1711 qemu-kvm: QEMU: block: iscsi: OOB heap access via an unexpected response of iSCSI Server [rhel-8.2.0]) + +* Fri Jan 24 2020 Miroslav Rezanina - 2.12.0-97.el8 +- kvm-exec-Fix-MAP_RAM-for-cached-access.patch [bz#1769613] +- kvm-virtio-Return-true-from-virtio_queue_empty-if-broken.patch [bz#1769613] +- kvm-usbredir-Prevent-recursion-in-usbredir_write.patch [bz#1752320] +- kvm-xhci-recheck-slot-status.patch [bz#1752320] +- kvm-tcp_emu-Fix-oob-access.patch [bz#1791566] +- kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch [bz#1791566] +- kvm-slirp-use-correct-size-while-emulating-commands.patch [bz#1791566] +- Resolves: bz#1752320 + (vm gets stuck when migrate vm back and forth with remote-viewer trying to connect) +- Resolves: bz#1769613 + ([SEV] kexec mays hang at "[sda] Synchronizing SCSI cache " before switching to new kernel) +- Resolves: bz#1791566 + (CVE-2020-7039 virt:rhel/qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-8.2.0]) + +* Tue Jan 07 2020 Danilo Cesar Lemes de Paula - 2.12.0-96.el8 +- kvm-i386-Remove-cpu64-rhel6-CPU-model.patch [bz#1741346] +- Resolves: bz#1741346 + (Remove the "cpu64-rhel6" CPU from qemu-kvm) + +* Thu Jan 02 2020 Danilo Cesar Lemes de Paula - 2.12.0-95.el8 +- kvm-virtio-gpu-block-both-2d-and-3d-rendering.patch [bz#1674324] +- kvm-x86-Intel-AVX512_BF16-feature-enabling.patch [bz#1642541] +- Resolves: bz#1642541 + ([Intel 8.2 Feature] qemu-kvm Enable BFloat16 data type support) +- Resolves: bz#1674324 + (With , qemu either refuses to start completely or spice-server crashes afterwards) + +* Wed Dec 18 2019 Danilo Cesar Lemes de Paula - 2.12.0-94.el8 +- kvm-util-mmap-alloc-Add-a-is_pmem-parameter-to-qemu_ram_.patch [bz#1539282] +- kvm-mmap-alloc-unfold-qemu_ram_mmap.patch [bz#1539282] +- kvm-mmap-alloc-fix-hugetlbfs-misaligned-length-in-ppc64.patch [bz#1539282] +- kvm-util-mmap-alloc-support-MAP_SYNC-in-qemu_ram_mmap.patch [bz#1539282] +- kvm-x86-cpu-Enable-MOVDIRI-cpu-feature.patch [bz#1634827] +- kvm-x86-cpu-Enable-MOVDIR64B-cpu-feature.patch [bz#1634827] +- kvm-add-call-to-qemu_add_opts-for-overcommit-option.patch [bz#1634827] +- kvm-support-overcommit-cpu-pm-on-off.patch [bz#1634827] +- kvm-i386-cpu-make-cpu-host-support-monitor-mwait.patch [] +- kvm-x86-cpu-Add-support-for-UMONITOR-UMWAIT-TPAUSE.patch [bz#1634827] +- kvm-target-i386-Add-support-for-save-load-IA32_UMWAIT_CO.patch [bz#1634827] +- Resolves: bz#1539282 + ([Intel 8.2 Feature][Crystal Ridge] Support MAP_SYNC - qemu-kvm) +- Resolves: bz#1634827 + ([Intel 8.2 Feat] KVM Enable SnowRidge Accelerator Interface Architecture (AIA) - qemu) + +* Wed Dec 11 2019 Danilo Cesar Lemes de Paula - 2.12.0-93.el8 +- kvm-target-i386-Export-TAA_NO-bit-to-guests.patch [bz#1771971] +- kvm-target-i386-add-support-for-MSR_IA32_TSX_CTRL.patch [bz#1771971] +- Resolves: bz#1771971 + (CVE-2019-11135 virt:rhel/qemu-kvm: hw: TSX Transaction Asynchronous Abort (TAA) [rhel-8.2.0]) + +* Mon Dec 02 2019 Danilo Cesar Lemes de Paula - 2.12.0-92.el8 +- kvm-x86-cpu-use-FeatureWordArray-to-define-filtered_feat.patch [bz#1689270] +- kvm-i386-Add-x-force-features-option-for-testing.patch [bz#1689270] +- kvm-target-i386-define-a-new-MSR-based-feature-word-FEAT.patch [bz#1689270] +- kvm-i386-display-known-CPUID-features-linewrapped-in-alp.patch [bz#1689270] +- kvm-target-i386-kvm-kvm_get_supported_msrs-cleanup.patch [bz#1689270] +- kvm-target-i386-handle-filtered_features-in-a-new-functi.patch [bz#1689270] +- kvm-target-i386-introduce-generic-feature-dependency-mec.patch [bz#1689270] +- kvm-target-i386-expand-feature-words-to-64-bits.patch [bz#1689270] +- kvm-target-i386-add-VMX-definitions.patch [bz#1689270] +- kvm-vmxcap-correct-the-name-of-the-variables.patch [bz#1689270] +- kvm-target-i386-add-VMX-features.patch [bz#1689270] +- kvm-target-i386-work-around-KVM_GET_MSRS-bug-for-seconda.patch [bz#1689270] +- kvm-target-i386-adjust-for-missing-VMX-features.patch [bz#1689270] +- kvm-target-i386-add-VMX-features-to-named-CPU-models.patch [bz#1689270] +- kvm-target-i386-add-VMX-features-to-named-CPU-models-RHE.patch [bz#1689270] +- kvm-vhost-fix-vhost_log-size-overflow-during-migration.patch [bz#1776808] +- Resolves: bz#1689270 + (Nested KVM: limit VMX features according to CPU models - Slow Train) +- Resolves: bz#1776808 + (qemu-kvm crashes when Windows VM is migrated with multiqueue) + +* Wed Nov 27 2019 Danilo Cesar Lemes de Paula - 2.12.0-91.el8 +- kvm-qapi-fill-in-CpuInfoFast.arch-in-query-cpus-fast.patch [bz#1730969] +- kvm-curl-Keep-pointer-to-the-CURLState-in-CURLSocket.patch [bz#1744602] +- kvm-curl-Keep-socket-until-the-end-of-curl_sock_cb.patch [bz#1744602] +- kvm-curl-Check-completion-in-curl_multi_do.patch [bz#1744602] +- kvm-curl-Pass-CURLSocket-to-curl_multi_do.patch [bz#1744602] +- kvm-curl-Report-only-ready-sockets.patch [bz#1744602] +- kvm-curl-Handle-success-in-multi_check_completion.patch [bz#1744602] +- kvm-curl-Check-curl_multi_add_handle-s-return-code.patch [bz#1744602] +- Resolves: bz#1730969 + ([ppc] qmp: The 'arch' value returned by the command 'query-cpus-fast' does not match) +- Resolves: bz#1744602 + (qemu-img gets stuck when stream-converting from http) + +* Tue Nov 12 2019 Danilo Cesar Lemes de Paula - 2.12.0-90.el8 +- kvm-i386-Don-t-print-warning-if-phys-bits-was-set-automa.patch [bz#1719127] +- kvm-Disable-CONFIG_I2C-and-CONFIG_IOH3420.patch [bz#1693140] +- kvm-usb-drop-unnecessary-usb_device_post_load-checks.patch [bz#1757482] +- kvm-pc-bios-s390-ccw-define-loadparm-length.patch [bz#1664376] +- kvm-pc-bios-s390-ccw-net-Use-diag308-to-reset-machine-be.patch [bz#1664376] +- kvm-s390-bios-decouple-cio-setup-from-virtio.patch [bz#1664376] +- kvm-s390-bios-decouple-common-boot-logic-from-virtio.patch [bz#1664376] +- kvm-s390-bios-Clean-up-cio.h.patch [bz#1664376] +- kvm-s390-bios-Decouple-channel-i-o-logic-from-virtio.patch [bz#1664376] +- kvm-s390-bios-Map-low-core-memory.patch [bz#1664376] +- kvm-s390-bios-ptr2u32-and-u32toptr.patch [bz#1664376] +- kvm-s390-bios-Support-for-running-format-0-1-channel-pro.patch [bz#1664376] +- kvm-s390-bios-cio-error-handling.patch [bz#1664376] +- kvm-s390-bios-Extend-find_dev-for-non-virtio-devices.patch [bz#1664376] +- kvm-s390-bios-Factor-finding-boot-device-out-of-virtio-c.patch [bz#1664376] +- kvm-s390-bios-Refactor-virtio-to-run-channel-programs-vi.patch [bz#1664376] +- kvm-s390-bios-Use-control-unit-type-to-determine-boot-me.patch [bz#1664376] +- kvm-s390-bios-Add-channel-command-codes-structs-needed-f.patch [bz#1664376] +- kvm-s390-bios-Support-booting-from-real-dasd-device.patch [bz#1664376] +- kvm-s390-bios-Use-control-unit-type-to-find-bootable-dev.patch [bz#1664376] +- kvm-s390x-vfio-ap-Implement-hot-plug-unplug-of-vfio-ap-d.patch [bz#1660906] +- Resolves: bz#1660906 + ([IBM 8.2 FEAT] KVM s390x: Crypto Passthrough Hotplug - qemu part) +- Resolves: bz#1664376 + ([IBM 8.2 FEAT] CCW IPL Support (kvm) - qemu part) +- Resolves: bz#1693140 + (aarch64: qemu: remove smbus_eeprom and i2c from config) +- Resolves: bz#1719127 + ([Intel 8.2 Bug] warning shown when boot VM with “–cpu host” or “–cpu other mode” on ICX platform (physical)) +- Resolves: bz#1757482 + (Fail to migrate a rhel6.10-mt7.6 guest with dimm device) + +* Mon Oct 14 2019 Danilo Cesar Lemes de Paula - 2.12.0-89.el8 +- kvm-accel-use-g_strsplit-for-parsing-accelerator-names.patch [bz#1749022] +- kvm-opts-don-t-silently-truncate-long-parameter-keys.patch [bz#1749022] +- kvm-opts-don-t-silently-truncate-long-option-values.patch [bz#1749022] +- kvm-i386-fix-regression-parsing-multiboot-initrd-modules.patch [bz#1749022] +- kvm-i386-only-parse-the-initrd_filename-once-for-multibo.patch [bz#1749022] +- kvm-opts-remove-redundant-check-for-NULL-parameter.patch [bz#1749022] +- kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch [bz#1749724] +- kvm-virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch [bz#1708459] +- kvm-s390x-cpumodel-Rework-CPU-feature-definition.patch [bz#1660909] +- kvm-s390x-cpumodel-Set-up-CPU-model-for-AQIC-interceptio.patch [bz#1660909] +- kvm-ccid-Fix-dwProtocols-advertisement-of-T-0.patch [bz#1746361] +- kvm-s390-PCI-fix-IOMMU-region-init.patch [bz#1754643] +- kvm-fw_cfg-Improve-error-message-when-can-t-load-splash-.patch [bz#1607367] +- kvm-fw_cfg-Fix-boot-bootsplash-error-checking.patch [bz#1607367] +- kvm-fw_cfg-Fix-boot-reboot-timeout-error-checking.patch [bz#1607367] +- kvm-hw-nvram-fw_cfg-Store-reboot-timeout-as-little-endia.patch [bz#1607367] +- kvm-intel_iommu-Correct-caching-mode-error-message.patch [bz#1738440] +- kvm-intel_iommu-Sanity-check-vfio-pci-config-on-machine-.patch [bz#1738440] +- kvm-qdev-machine-Introduce-hotplug_allowed-hook.patch [bz#1738440] +- kvm-pc-q35-Disallow-vfio-pci-hotplug-without-VT-d-cachin.patch [bz#1738440] +- kvm-intel_iommu-Remove-the-caching-mode-check-during-fla.patch [bz#1738440] +- kvm-pseries-do-not-allow-memory-less-cpu-less-NUMA-node.patch [bz#1651474] +- Resolves: bz#1607367 + (After boot failed, guest should not reboot when set reboot-timeout < -1) +- Resolves: bz#1651474 + (RHEL8.0 Beta - [4.18.0-32.el8.ppc64le] Guest VM crashes during vcpu hotplug with specific numa configuration (kvm)) +- Resolves: bz#1660909 + ([IBM 8.2 FEAT] KVM s390x: Crypto Passthrough Interrupt Support - qemu part) +- Resolves: bz#1708459 + (qemu-kvm core dumped when repeat "system_reset" multiple times during guest boot) +- Resolves: bz#1738440 + (For intel-iommu, qemu shows conflict behaviors between booting a guest with vfio and hot plugging vfio device) +- Resolves: bz#1746361 + (ccid: Fix incorrect dwProtocol advertisement of T=0) +- Resolves: bz#1749022 + (Please backport 950c4e6c94b1 ("opts: don't silently truncate long option values", 2018-05-09)) +- Resolves: bz#1749724 + (CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-8]) +- Resolves: bz#1754643 + (RHEL8.1 Snapshot3 - Passthrough PCI card goes into error state if used in domain (kvm)) + +* Fri Sep 13 2019 Danilo Cesar Lemes de Paula - 2.12.0-88.el8 +- Revert fix for bz#1749724 - this got delayed to 8.2 + (CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-8]) + +* Tue Sep 03 2019 Danilo Cesar Lemes de Paula - 2.12.0-86.el8 +- kvm-Do-not-run-iotests-on-brew-build.patch [bz#1742819] +- kvm-target-ppc-spapr-Add-workaround-option-to-SPAPR_CAP_.patch [bz#1744415] +- kvm-target-ppc-spapr-Add-SPAPR_CAP_CCF_ASSIST.patch [bz#1744415] +- kvm-i386-x86_cpu_list_feature_names-function.patch [bz#1747185] +- kvm-i386-unavailable-features-QOM-property.patch [bz#1747185] +- kvm-file-posix-Handle-undetectable-alignment.patch [bz#1738839] +- kvm-iotests-Tweak-221-sizing-for-different-hole-granular.patch [bz#1738839] +- kvm-iotests-Filter-175-s-allocation-information.patch [bz#1738839] +- kvm-block-posix-Always-allocate-the-first-block.patch [bz#1738839] +- kvm-iotests-Test-allocate_first_block-with-O_DIRECT.patch [bz#1738839] +- Resolves: bz#1738839 + (I/O error when virtio-blk disk is backed by a raw image on 4k disk) +- Resolves: bz#1742819 + (Remove iotests from qemu-kvm builds [RHEL 8.1.0]) +- Resolves: bz#1744415 + (Backport support for count cache flush Spectre v2 mitigation [slow train]) +- Resolves: bz#1747185 + ("filtered-features" QOM property is not available) + +* Mon Aug 19 2019 Danilo Cesar Lemes de Paula - 2.12.0-85.el8 +- kvm-console-Avoid-segfault-in-screendump.patch [bz#1684383] +- kvm-usb-hub-clear-suspend-on-detach.patch [bz#1619661] +- kvm-qemu-img-fix-regression-copying-secrets-during-conve.patch [bz#1727821] +- Resolves: bz#1619661 + (the attach hub on one hub still exits in device manager after unhotplug) +- Resolves: bz#1684383 + (qemu crashed when take screenshot for 2nd head of virtio video device if the display not opened by virt-viewer) +- Resolves: bz#1727821 + (Failed to convert a source image to the qcow2 image encrypted by luks) + +* Fri Aug 16 2019 Danilo Cesar Lemes de Paula - 2.12.0-84.el8 +- kvm-vnc-detect-and-optimize-pageflips.patch [bz#1727033] +- kvm-block-backend-Make-blk_inc-dec_in_flight-public.patch [bz#1716349] +- kvm-virtio-blk-Increase-in_flight-for-request-restart-BH.patch [bz#1716349] +- kvm-block-Fix-AioContext-switch-for-drained-node.patch [bz#1716349] +- kvm-test-bdrv-drain-AioContext-switch-in-drained-section.patch [bz#1716349] +- kvm-block-Use-normal-drain-for-bdrv_set_aio_context.patch [bz#1716349] +- kvm-block-Fix-AioContext-switch-for-bs-drv-NULL.patch [bz#1716347] +- kvm-iothread-fix-crash-with-invalid-properties.patch [bz#1687541] +- kvm-iothread-replace-init_done_cond-with-a-semaphore.patch [bz#1687541] +- kvm-RHEL-disable-hostmem-memfd.patch [bz#1740797] +- Resolves: bz#1687541 + (qemu aborted when start guest with a big iothreads) +- Resolves: bz#1716347 + (Qemu Core dump when quit vm that's in status "paused(io-error)" with data plane enabled) +- Resolves: bz#1716349 + (qemu with iothreads enabled crashes on resume after enospc pause for disk extension) +- Resolves: bz#1727033 + (vnc server should detect page-flips and avoid sending fullscreen updates then.) +- Resolves: bz#1740797 + (Disable memfd in QEMU) + +* Thu Aug 01 2019 Danilo Cesar Lemes de Paula - 2.12.0-83.el8 +- kvm-hw-block-pflash_cfi01-Add-missing-DeviceReset-handle.patch [bz#1707192] +- kvm-block-file-posix-Unaligned-O_DIRECT-block-status.patch [bz#1678979] +- kvm-iotests-Test-unaligned-raw-images-with-O_DIRECT.patch [bz#1678979] +- kvm-nbd-client-Lower-min_block-for-block-status-unaligne.patch [bz#1678979] +- kvm-nbd-client-Reject-inaccessible-tail-of-inconsistent-.patch [bz#1678979] +- kvm-nbd-client-Support-qemu-img-convert-from-unaligned-s.patch [bz#1678979] +- kvm-block-Add-bdrv_get_request_alignment.patch [bz#1678979] +- kvm-nbd-server-Advertise-actual-minimum-block-size.patch [bz#1678979] +- kvm-slirp-check-sscanf-result-when-emulating-ident.patch [bz#1727642] +- kvm-slirp-fix-big-little-endian-conversion-in-ident-prot.patch [bz#1727642] +- kvm-slirp-ensure-there-is-enough-space-in-mbuf-to-null-t.patch [bz#1727642] +- kvm-slirp-don-t-manipulate-so_rcv-in-tcp_emu.patch [bz#1727642] +- kvm-tap-set-vhostfd-passed-from-qemu-cli-to-non-blocking.patch [bz#1732642] +- kvm-Fix-heap-overflow-in-ip_reass-on-big-packet-input.patch [bz#1734751] +- Resolves: bz#1678979 + (qemu-img convert abort when converting image with unaligned size (qemu-img: block/io.c:2134: bdrv_co_block_status: Assertion `*pnum && (((*pnum) % (align)) == 0) && align > offset - aligned_offset\' failed)) +- Resolves: bz#1707192 + (implement missing reset handler for cfi.pflash01 - slow train) +- Resolves: bz#1727642 + (CVE-2019-6778 qemu-kvm: QEMU: slirp: heap buffer overflow in tcp_emu()) +- Resolves: bz#1732642 + (enable the virtio-net frontend to work with the vhost-net backend in SEV guests) +- Resolves: bz#1734751 + (CVE-2019-14378 qemu-kvm: QEMU: slirp: heap buffer overflow during packet reassembly [rhel-8.1.0]) + +* Tue Jul 23 2019 Danilo Cesar Lemes de Paula - 2.12.0-82.el8 +- kvm-i386-Add-new-model-of-Cascadelake-Server.patch [bz#1629906] +- kvm-i386-Update-stepping-of-Cascadelake-Server.patch [bz#1629906] +- kvm-target-i386-Disable-MPX-support-on-named-CPU-models.patch [bz#1629906] +- kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-NEW.patch [bz#1629906] +- kvm-i386-Disable-OSPKE-on-CPU-model-definitions-NEW.patch [bz#1629906] +- kvm-block-ssh-Convert-from-DPRINTF-macro-to-trace-events.patch [bz#1513367] +- kvm-block-ssh-Do-not-report-read-write-flush-errors-to-t.patch [bz#1513367] +- kvm-qemu-iotests-Fix-paths-for-NFS.patch [bz#1513367] +- kvm-qemu-iotests-Filter-NFS-paths.patch [bz#1513367] +- kvm-iotests-Filter-SSH-paths.patch [bz#1513367] +- kvm-block-ssh-Implement-.bdrv_refresh_filename.patch [bz#1513367] +- kvm-iotests-Use-Python-byte-strings-where-appropriate.patch [bz#1513367] +- kvm-iotests-Unify-log-outputs-between-Python-2-and-3.patch [bz#1513367] +- kvm-ssh-switch-from-libssh2-to-libssh.patch [bz#1513367] +- kvm-redhat-switch-from-libssh2-to-libssh.patch [bz#1513367] +- kvm-block-gluster-limit-the-transfer-size-to-512-MiB.patch [bz#1728657] +- kvm-s390-cpumodel-fix-description-for-the-new-vector-fac.patch [bz#1729975] +- kvm-s390x-cpumodel-remove-esort-from-the-default-model.patch [bz#1729975] +- kvm-s390x-cpumodel-also-change-name-of-vxbeh.patch [bz#1729975] +- kvm-s390x-cpumodel-change-internal-name-of-vxpdeh-to-mat.patch [bz#1729975] +- kvm-target-i386-sev-Do-not-unpin-ram-device-memory-regio.patch [bz#1728958] +- kvm-i386-Save-EFER-for-32-bit-targets.patch [bz#1689269] +- kvm-target-i386-rename-HF_SVMI_MASK-to-HF_GUEST_MASK.patch [bz#1689269] +- kvm-target-i386-kvm-add-VMX-migration-blocker.patch [bz#1689269] +- kvm-target-i386-kvm-just-return-after-migrate_add_blocke.patch [bz#1689269] +- kvm-target-i386-kvm-Delete-VMX-migration-blocker-on-vCPU.patch [bz#1689269] +- kvm-Introduce-kvm_arch_destroy_vcpu.patch [bz#1689269] +- kvm-target-i386-kvm-Use-symbolic-constant-for-DB-BP-exce.patch [bz#1689269] +- kvm-target-i386-kvm-Re-inject-DB-to-guest-with-updated-D.patch [bz#1689269] +- kvm-target-i386-kvm-Block-migration-for-vCPUs-exposed-wi.patch [bz#1689269] +- kvm-target-i386-kvm-do-not-initialize-padding-fields.patch [bz#1689269] +- kvm-linux-headers-synchronize-generic-and-x86-KVM-header.patch [bz#1689269] +- kvm-vmstate-Add-support-for-kernel-integer-types.patch [bz#1689269] +- kvm-target-i386-kvm-Add-support-for-save-and-restore-nes.patch [bz#1689269] +- kvm-target-i386-kvm-Add-support-for-KVM_CAP_EXCEPTION_PA.patch [bz#1689269] +- kvm-target-i386-kvm-Add-nested-migration-blocker-only-wh.patch [bz#1689269] +- kvm-target-i386-kvm-Demand-nested-migration-kernel-capab.patch [bz#1689269] +- kvm-target-i386-skip-KVM_GET-SET_NESTED_STATE-if-VMX-dis.patch [bz#1689269] +- kvm-i386-kvm-Do-not-sync-nested-state-during-runtime.patch [bz#1689269] +- Resolves: bz#1513367 + (qemu with libssh) +- Resolves: bz#1629906 + ([Intel 8.1 Feat] qemu-kvm Introduce Cascade Lake (CLX) cpu model) +- Resolves: bz#1689269 + (Nested KVM: support for migration of nested hypervisors - Slow Train) +- Resolves: bz#1728657 + ('qemu-io write' to a raw image over libgfapi fails) +- Resolves: bz#1728958 + (Hot unplug vfio-pci NIC devices from sev guest will cause qemu-kvm: sev_ram_block_removed: failed to unregister region) +- Resolves: bz#1729975 + (RHEL 8.1 Pre-Beta - Fix for hardware CPU Model) + +* Mon Jul 08 2019 Miroslav Rezanina - 2.12.0-81.el8 +- kvm-target-i386-add-MDS-NO-feature.patch [bz#1714792] +- kvm-virtio-gpu-pass-down-VirtIOGPU-pointer-to-a-bunch-of.patch [bz#1531543] +- kvm-virtio-gpu-add-iommu-support.patch [bz#1531543] +- kvm-virtio-gpu-fix-unmap-in-error-path.patch [bz#1531543] +- Resolves: bz#1531543 + ([RFE] add iommu support to virtio-gpu) +- Resolves: bz#1714792 + ([Intel 8.1 FEAT] MDS_NO exposure to guest) + +* Tue Jul 02 2019 Danilo Cesar Lemes de Paula - 2.12.0-80.el8 +- kvm-qxl-check-release-info-object.patch [bz#1712705] +- kvm-iotests-Make-182-do-without-device_add.patch [bz#1707598] +- Resolves: bz#1707598 + (qemu-iotest 182 fails without device hotplugging support) +- Resolves: bz#1712705 + (CVE-2019-12155 qemu-kvm: QEMU: qxl: null pointer dereference while releasing spice resources [rhel-8]) + +* Fri Jun 28 2019 Danilo de Paula - 15:2.12.0-79 +- Rebuild all virt packages to fix RHEL's upgrade path +- Resolves: rhbz#1695587 + (Ensure modular RPM upgrade path) + +* Thu Jun 20 2019 Miroslav Rezanina - 2.12.0-78.el8 +- kvm-gluster-Handle-changed-glfs_ftruncate-signature.patch [bz#1721983] +- kvm-gluster-the-glfs_io_cbk-callback-function-pointer-ad.patch [bz#1721983] +- Resolves: bz#1721983 + (qemu-kvm can't be build with new gluster version (6.0.6)) + +* Thu Jun 13 2019 Danilo Cesar Lemes de Paula - 2.12.0-77.el8 +- kvm-i386-Make-arch_capabilities-migratable.patch [bz#1709970] +- kvm-spapr-Fix-ibm-max-associativity-domains-property-num.patch [bz#1710662] +- kvm-linux-headers-Update-for-NVLink2-passthrough-downstr.patch [bz#1710662] +- kvm-pci-Move-NVIDIA-vendor-id-to-the-rest-of-ids.patch [bz#1710662] +- kvm-vfio-quirks-Add-common-quirk-alloc-helper.patch [bz#1710662] +- kvm-vfio-Make-vfio_get_region_info_cap-public.patch [bz#1710662] +- kvm-spapr-Support-NVIDIA-V100-GPU-with-NVLink2.patch [bz#1710662] +- kvm-qemu-kvm.spec-bump-libseccomp-2.4.0.patch [bz#1719578] +- Resolves: bz#1709970 + ([Intel 8.1 Bug] [KVM][CLX] CPUID_7_0_EDX_ARCH_CAPABILITIES is not enabled in VM - qemu-kvm) +- Resolves: bz#1710662 + ([IBM 8.1 FEAT] POWER9 - Virt: qemu: NVLink2 passthru to guest - Nvidia Volta (GPU) (kvm)) +- Resolves: bz#1719578 + (VM failed to start with error "failed to install seccomp syscall filter in the kernel") + +* Tue Jun 11 2019 Danilo Cesar Lemes de Paula - 2.12.0-76.el8 +- kvm-Introduce-new-no_guest_reset-parameter-for-usb-host-.patch [bz#1713677] +- kvm-usb-call-reset-handler-before-updating-state.patch [bz#1713677] +- kvm-usb-host-skip-reset-for-untouched-devices.patch [bz#1713677] +- kvm-usb-host-avoid-libusb_set_configuration-calls.patch [bz#1713677] +- kvm-virtio-scsi-Move-BlockBackend-back-to-the-main-AioCo.patch [bz#1673396 bz#1673401] +- kvm-scsi-disk-Acquire-the-AioContext-in-scsi_-_realize.patch [bz#1673396 bz#1673401] +- kvm-virtio-scsi-Forbid-devices-with-different-iothreads-.patch [bz#1673396 bz#1673401] +- kvm-Disable-VXHS-support.patch [bz#1714933] +- Resolves: bz#1673396 + (qemu-kvm core dumped after hotplug the deleted disk with iothread parameter) +- Resolves: bz#1673401 + (Qemu core dump when start guest with two disks using same drive) +- Resolves: bz#1713677 + (Detached device when trying to upgrade USB device firmware when in doing USB Passthrough via QEMU) +- Resolves: bz#1714933 + (Disable VXHS in qemu-kvm) + +* Fri May 24 2019 Danilo Cesar Lemes de Paula - 2.12.0-75.el8 +- kvm-s390x-cpumodel-enum-type-S390FeatGroup-now-gets-gene.patch [bz#1660912] +- kvm-linux-headers-update-against-Linux-5.2-rc1.patch [bz#1660912] +- kvm-s390x-cpumodel-ignore-csske-for-expansion.patch [bz#1660912] +- kvm-s390x-cpumodel-Miscellaneous-Instruction-Extensions-.patch [bz#1660912] +- kvm-s390x-cpumodel-msa9-facility.patch [bz#1660912] +- kvm-s390x-cpumodel-vector-enhancements.patch [bz#1660912] +- kvm-s390x-cpumodel-enhanced-sort-facility.patch [bz#1660912] +- kvm-s390x-cpumodel-add-Deflate-conversion-facility.patch [bz#1660912] +- kvm-s390x-cpumodel-add-gen15-defintions.patch [bz#1660912] +- kvm-s390x-cpumodel-wire-up-8561-and-8562-as-gen15-machin.patch [bz#1660912] +- kvm-spice-set-device-address-and-device-display-ID-in-QX.patch [bz#1712946] +- kvm-hw-pci-Add-missing-include.patch [bz#1712946] +- Resolves: bz#1660912 + ([IBM 8.1 FEAT] KVM s390x: Add hardware CPU Model - qemu part) +- Resolves: bz#1712946 + (qemu-kvm build is broken due to spice_qxl_set_max_monitors being deprecated) + +* Mon May 20 2019 Danilo Cesar Lemes de Paula - 2.12.0-74.el8 +- kvm-x86-cpu-Enable-CLDEMOTE-Demote-Cache-Line-cpu-featur.patch [bz#1696436] +- kvm-memory-Fix-the-memory-region-type-assignment-order.patch [bz#1667249] +- kvm-target-i386-sev-Do-not-pin-the-ram-device-memory-reg.patch [bz#1667249] +- kvm-block-Fix-invalidate_cache-error-path-for-parent-act.patch [bz#1673010] +- kvm-target-i386-define-md-clear-bit.patch [bz#1703302 bz#1703308] +- Resolves: bz#1667249 + (Fail to launch AMD SEV VM with assigned PCI device) +- Resolves: bz#1673010 + (Local VM and migrated VM on the same host can run with same RAW file as visual disk source while without shareable configured or lock manager enabled) +- Resolves: bz#1696436 + ([Intel 8.0 Feat] KVM Enabling SnowRidge new NIs - qemu-kvm) +- Resolves: bz#1703302 + (CVE-2018-12130 virt:rhel/qemu-kvm: hardware: Microarchitectural Fill Buffer Data Sampling (MFBDS) [rhel-8]) +- Resolves: bz#1703308 + (CVE-2018-12127 virt:rhel/qemu-kvm: hardware: Micro-architectural Load Port Data Sampling - Information Leak (MLPDS) [rhel-8]) + +* Tue May 14 2019 Danilo Cesar Lemes de Paula - 2.12.0-73.el8 +- kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-mo.patch [bz#1561761] +- kvm-i386-Disable-OSPKE-on-CPU-model-definitions.patch [bz#1561761] +- Resolves: bz#1561761 + ([Intel 8.1 Feat] qemu-kvm Introduce Icelake cpu model) + +* Tue May 14 2019 Danilo Cesar Lemes de Paula - 2.12.0-72.el8 +- kvm-Use-KVM_GET_MSR_INDEX_LIST-for-MSR_IA32_ARCH_CAP.patch [bz#1707706] +- kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch [bz#1707706] +- Resolves: bz#1707706 + (/builddir/build/BUILD/qemu-2.12.0/target/i386/kvm.c:2031: kvm_put_msrs: Assertion `ret == cpu->kvm_msr_buf->nmsrs' failed.) + +* Wed May 08 2019 Danilo Cesar Lemes de Paula - 2.12.0-71.el8 +- kvm-s390-bios-Skip-bootmap-signature-entries.patch [bz#1683275] +- Resolves: bz#1683275 + ([IBM 8.1 FEAT] KVM: Secure Linux Boot Toleration (qemu)) + +* Tue May 07 2019 Danilo Cesar Lemes de Paula - 2.12.0-70.el8 +- kvm-i386-Add-new-MSR-indices-for-IA32_PRED_CMD-and-IA32_.patch [bz#1561761] +- kvm-i386-Add-CPUID-bit-and-feature-words-for-IA32_ARCH_C.patch [bz#1561761] +- kvm-i386-Add-CPUID-bit-for-PCONFIG.patch [bz#1561761] +- kvm-i386-Add-CPUID-bit-for-WBNOINVD.patch [bz#1561761] +- kvm-i386-Add-new-CPU-model-Icelake-Server-Client.patch [bz#1561761] +- kvm-Add-support-to-KVM_GET_MSR_FEATURE_INDEX_LIST-an.patch [bz#1561761] +- kvm-x86-Data-structure-changes-to-support-MSR-based-feat.patch [bz#1561761] +- kvm-x86-define-a-new-MSR-based-feature-word-FEATURE_WORD.patch [bz#1561761] +- kvm-i386-remove-the-new-CPUID-PCONFIG-from-Icelake-Serve.patch [bz#1561761] +- kvm-Revert-i386-Add-CPUID-bit-for-PCONFIG.patch [bz#1561761] +- Resolves: bz#1561761 + ([Intel 8.1 Feat] qemu-kvm Introduce Icelake cpu model) + +* Fri May 03 2019 Danilo Cesar Lemes de Paula - 2.12.0-69.el8 +- kvm-tests-crypto-Use-the-IEC-binary-prefix-definitions.patch [bz#1680231] +- kvm-crypto-expand-algorithm-coverage-for-cipher-benchmar.patch [bz#1680231] +- kvm-crypto-remove-code-duplication-in-tweak-encrypt-decr.patch [bz#1680231] +- kvm-crypto-introduce-a-xts_uint128-data-type.patch [bz#1680231] +- kvm-crypto-convert-xts_tweak_encdec-to-use-xts_uint128-t.patch [bz#1680231] +- kvm-crypto-convert-xts_mult_x-to-use-xts_uint128-type.patch [bz#1680231] +- kvm-crypto-annotate-xts_tweak_encdec-as-inlineable.patch [bz#1680231] +- kvm-crypto-refactor-XTS-cipher-mode-test-suite.patch [bz#1680231] +- kvm-crypto-add-testing-for-unaligned-buffers-with-XTS-ci.patch [bz#1680231] +- Resolves: bz#1680231 + (severe performance impact using luks format) + +* Mon Apr 29 2019 Danilo Cesar Lemes de Paula - 2.12.0-68.el8 +- kvm-s390x-ipl-Try-to-detect-Linux-vs-non-Linux-for-initi.patch [bz#1699070] +- kvm-loader-Check-access-size-when-calling-rom_ptr-to-avo.patch [bz#1699070] +- kvm-hw-s390x-Use-the-IEC-binary-prefix-definitions.patch [bz#1699070] +- kvm-s390x-storage-attributes-fix-CMMA_BLOCK_SIZE-usage.patch [bz#1699070] +- kvm-s390x-cpumodel-fix-segmentation-fault-when-baselinin.patch [bz#1699070] +- kvm-hw-s390x-s390-pci-bus-Convert-sysbus-init-function-t.patch [bz#1699070] +- kvm-s390x-pci-properly-fail-if-the-zPCI-device-cannot-be.patch [bz#1699070] +- kvm-s390x-pci-rename-hotplug-handler-callbacks.patch [bz#1699070] +- kvm-s390-avoid-potential-null-dereference-in-s390_pcihos.patch [bz#1699070] +- kvm-s390x-pci-Send-correct-event-on-hotplug.patch [bz#1699070] +- kvm-s390x-pci-Set-the-iommu-region-size-mpcifc-request.patch [bz#1699070] +- kvm-s390x-pci-Always-delete-and-free-the-release_timer.patch [bz#1699070] +- kvm-s390x-pci-Ignore-the-unplug-call-if-we-already-have-.patch [bz#1699070] +- kvm-s390x-pci-Use-hotplug_dev-instead-of-looking-up-the-.patch [bz#1699070] +- kvm-s390x-pci-Move-some-hotplug-checks-to-the-pre_plug-h.patch [bz#1699070] +- kvm-s390x-pci-Introduce-unplug-requests-and-split-unplug.patch [bz#1699070] +- kvm-s390x-pci-Drop-release-timer-and-replace-it-with-a-f.patch [bz#1699070] +- kvm-s390x-pci-mark-zpci-devices-as-unmigratable.patch [bz#1699070] +- kvm-s390x-pci-Fix-primary-bus-number-for-PCI-bridges.patch [bz#1699070] +- kvm-s390x-pci-Fix-hotplugging-of-PCI-bridges.patch [bz#1699070] +- kvm-s390x-pci-Warn-when-adding-PCI-devices-without-the-z.patch [bz#1699070] +- kvm-s390x-pci-Unplug-remaining-requested-devices-on-pcih.patch [bz#1699070] +- kvm-s390x-refactor-reset-reipl-handling.patch [bz#1699070] +- kvm-s390-ipl-fix-ipl-with-no-reboot.patch [bz#1699070] +- Resolves: bz#1699070 + (Backport s390x-related fixes for qemu-kvm) + +* Tue Apr 23 2019 Danilo Cesar Lemes de Paula - 2.12.0-67.el8 +- kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch [bz#1693116] +- Resolves: bz#1693116 + (CVE-2018-20815 qemu-kvm: QEMU: device_tree: heap buffer overflow while loading device tree blob [rhel-8.0]) + +* Mon Apr 15 2019 Danilo Cesar Lemes de Paula - 2.12.0-66.el8 +- kvm-iotests-153-Fix-dead-code.patch [bz#1694148] +- kvm-file-posix-Include-filename-in-locking-error-message.patch [bz#1694148] +- kvm-file-posix-Skip-effectiveless-OFD-lock-operations.patch [bz#1694148] +- kvm-file-posix-Drop-s-lock_fd.patch [bz#1694148] +- kvm-tests-Add-unit-tests-for-image-locking.patch [bz#1694148] +- kvm-file-posix-Fix-shared-locks-on-reopen-commit.patch [bz#1694148] +- kvm-iotests-Test-file-posix-locking-and-reopen.patch [bz#1694148] +- kvm-block-file-posix-do-not-fail-on-unlock-bytes.patch [bz#1694148] +- kvm-hostmem-file-remove-object-id-from-pmem-error-messag.patch [bz#1687596] +- kvm-redhat-setting-target-release-to-rhel-8.1.0.patch [] +- kvm-redhat-removing-iotest-182.patch [] +- Resolves: bz#1687596 + ([Intel 8.1 BUG][KVM][Crystal Ridge]object_get_canonical_path_component: assertion failed: (obj->parent != NULL)) +- Resolves: bz#1694148 + (QEMU image locking needn't double open fd number, and it should not fail when attempting to release locks) + +* Tue Apr 09 2019 Danilo Cesar Lemes de Paula - 2.12.0-65.el8 +- kvm-s390x-cpumodel-mepochptff-warn-when-no-mepoch-and-re.patch [bz#1664371] +- kvm-s390x-cpumodel-add-z14-GA2-model.patch [bz#1664371] +- kvm-redhat-s390x-cpumodel-enable-mepoch-by-default-for-z.patch [bz#1664371] +- kvm-intel_iommu-fix-operator-in-vtd_switch_address_space.patch [bz#1662272] +- kvm-intel_iommu-reset-intr_enabled-when-system-reset.patch [bz#1662272] +- kvm-pci-msi-export-msi_is_masked.patch [bz#1662272] +- kvm-i386-kvm-ignore-masked-irqs-when-update-msi-routes.patch [bz#1662272] +- Resolves: bz#1662272 + (Boot guest with device assignment+vIOMMU, qemu prompts "vtd_interrupt_remap_msi: MSI address low 32 bit invalid: 0x0" when first rebooting guest) +- Resolves: bz#1664371 + ([IBM 8.1 FEAT] Update hardware CPU Model z14 (kvm) - qemu part) + +* Mon Apr 08 2019 Danilo Cesar Lemes de Paula - 2.12.0-64.el8 +- kvm-doc-fix-the-configuration-path.patch [bz#1645411] +- kvm-Increase-number-of-iotests-being-run-as-a-part-of-RH.patch [bz#1664463] +- kvm-Load-kvm-module-during-boot.patch [bz#1676907 bz#1685995] +- kvm-qemu-kvm.spec.template-Update-pyton-path-to-system-i.patch [] +- Resolves: bz#1645411 + (the "fsfreeze-hook" script path shown by command "qemu-ga --help" or "man qemu-ga" is wrong) +- Resolves: bz#1664463 + (Modify iotest behavior to include luks and nbd and fail build if iotests fail) +- Resolves: bz#1676907 + (/dev/kvm device exists but kernel module is not loaded on boot up causing VM start to fail in libvirt) +- Resolves: bz#1685995 + (/dev/kvm device exists but kernel module is not loaded on boot up causing VM start to fail in libvirt) + +* Tue Feb 26 2019 Danilo Cesar Lemes de Paula - 2.12.0-63.el8 +- kvm-scsi-generic-avoid-possible-out-of-bounds-access-to-.patch [bz#1668162] +- Resolves: bz#1668162 + (CVE-2019-6501 qemu-kvm: QEMU: scsi-generic: possible OOB access while handling inquiry request [rhel-8]) + +* Mon Feb 25 2019 Danilo Cesar Lemes de Paula - 2.12.0-62.el8 +- kvm-slirp-check-data-length-while-emulating-ident-functi.patch [bz#1669069] +- Resolves: bz#1669069 + (CVE-2019-6778 qemu-kvm: QEMU: slirp: heap buffer overflow in tcp_emu() [rhel-8.0]) + +* Mon Feb 11 2019 Danilo Cesar Lemes de Paula - 2.12.0-61.el8 +- kvm-qemu-ga-make-get-fsinfo-work-over-pci-bridges.patch [bz#1666952] +- kvm-qga-fix-driver-leak-in-guest-get-fsinfo.patch [bz#1666952] +- Resolves: bz#1666952 + (qemu-guest-agent does not parse PCI bridge links in "build_guest_fsinfo_for_real_device" (q35)) + +* Mon Jan 28 2019 Danilo Cesar Lemes de Paula - 2.12.0-60.el8 +- kvm-ne2000-fix-possible-out-of-bound-access-in-ne2000_re.patch [bz#1636784] +- kvm-rtl8139-fix-possible-out-of-bound-access.patch [bz#1636784] +- kvm-pcnet-fix-possible-buffer-overflow.patch [bz#1636784] +- kvm-net-ignore-packet-size-greater-than-INT_MAX.patch [bz#1636784] +- kvm-net-drop-too-large-packet-early.patch [bz#1636784] +- kvm-net-hub-suppress-warnings-of-no-host-network-for-qte.patch [bz#1636784] +- kvm-virtio-net-test-accept-variable-length-argument-in-p.patch [bz#1636784] +- kvm-virtio-net-test-remove-unused-macro.patch [bz#1636784] +- kvm-virtio-net-test-add-large-tx-buffer-test.patch [bz#1636784] +- kvm-s390x-Return-specification-exception-for-unimplement.patch [bz#1668261] +- kvm-cpus-ignore-ESRCH-in-qemu_cpu_kick_thread.patch [bz#1665844] +- Resolves: bz#1636784 + (CVE-2018-17963 qemu-kvm: Qemu: net: ignore packets with large size [rhel-8]) +- Resolves: bz#1665844 + (Guest quit with error when hotunplug cpu) +- Resolves: bz#1668261 + ([RHEL8] Backport diag308 stable exception fix (qemu-kvm)) + +* Thu Jan 24 2019 Danilo Cesar Lemes de Paula - 2.12.0-59.el8 +- kvm-hw-scsi-cleanups-before-VPD-BL-emulation.patch [bz#1639957] +- kvm-hw-scsi-centralize-SG_IO-calls-into-single-function.patch [bz#1639957] +- kvm-hw-scsi-add-VPD-Block-Limits-emulation.patch [bz#1639957] +- kvm-scsi-disk-Block-Device-Characteristics-emulation-fix.patch [bz#1639957] +- kvm-scsi-generic-keep-VPD-page-list-sorted.patch [bz#1639957] +- kvm-scsi-generic-avoid-out-of-bounds-access-to-VPD-page-.patch [bz#1639957] +- kvm-scsi-generic-avoid-invalid-access-to-struct-when-emu.patch [bz#1639957] +- kvm-scsi-generic-do-not-do-VPD-emulation-for-sense-other.patch [bz#1639957] +- Resolves: bz#1639957 + ([RHEL.8] scsi host device passthrough limits IO writes - slow train) + +* Mon Jan 21 2019 Danilo Cesar Lemes de Paula - 2.12.0-58.el8 +- kvm-block-Update-flags-in-bdrv_set_read_only.patch [bz#1644996] +- kvm-block-Add-auto-read-only-option.patch [bz#1644996] +- kvm-rbd-Close-image-in-qemu_rbd_open-error-path.patch [bz#1644996] +- kvm-block-Require-auto-read-only-for-existing-fallbacks.patch [bz#1644996] +- kvm-nbd-Support-auto-read-only-option.patch [bz#1644996] +- kvm-file-posix-Support-auto-read-only-option.patch [bz#1644996] +- kvm-curl-Support-auto-read-only-option.patch [bz#1644996] +- kvm-gluster-Support-auto-read-only-option.patch [bz#1644996] +- kvm-iscsi-Support-auto-read-only-option.patch [bz#1644996] +- kvm-block-Make-auto-read-only-on-default-for-drive.patch [bz#1644996] +- kvm-qemu-iotests-Test-auto-read-only-with-drive-and-bloc.patch [bz#1644996] +- kvm-block-Fix-update-of-BDRV_O_AUTO_RDONLY-in-update_fla.patch [bz#1644996] +- kvm-qemu-img-Add-C-option-for-convert-with-copy-offloadi.patch [bz#1623082] +- kvm-iotests-Add-test-for-qemu-img-convert-C-compatibilit.patch [bz#1623082] +- Resolves: bz#1623082 + ([rhel.8.0]Target files for 'qemu-img convert' do not support thin_provisoning with iscsi/nfs backend) +- Resolves: bz#1644996 + (block-commit can't be used with -blockdev) + +* Fri Jan 11 2019 Danilo Cesar Lemes de Paula - 2.12.0-57.el8 +- kvm-qemu-kvm.spec.template-Update-files-for-tests-rpm-to.patch [bz#1601107] + +* Fri Jan 11 2019 Danilo Cesar Lemes de Paula - 2.12.0-56.el8 +- kvm-Run-iotests-as-part-of-the-build-process.patch [bz#1661026] +- kvm-Introduce-the-qemu-kvm-tests-rpm.patch [bz#1601107] +- Resolves: bz#1601107 + (qemu-kvm packaging: make running qemu-iotests more robust) +- Resolves: bz#1661026 + (Run iotests as part of build process) + +* Tue Jan 08 2019 Danilo Cesar Lemes de Paula - 2.12.0-55.el8 +- kvm-block-Don-t-inactivate-children-before-parents.patch [bz#1659395] +- kvm-iotests-Test-migration-with-blockdev.patch [bz#1659395] +- Resolves: bz#1659395 + (src qemu core dump when do migration ( block device node-name changed after change cdrom) - Slow Train) + +* Tue Jan 08 2019 Danilo Cesar Lemes de Paula - 2.12.0-54.el8 +- kvm-s390x-tcg-avoid-overflows-in-time2tod-tod2time.patch [bz#1653569] +- kvm-s390x-kvm-pass-values-instead-of-pointers-to-kvm_s39.patch [bz#1653569] +- kvm-s390x-tod-factor-out-TOD-into-separate-device.patch [bz#1653569] +- kvm-s390x-tcg-drop-tod_basetime.patch [bz#1653569] +- kvm-s390x-tcg-properly-implement-the-TOD.patch [bz#1653569] +- kvm-s390x-tcg-SET-CLOCK-COMPARATOR-can-clear-CKC-interru.patch [bz#1653569] +- kvm-s390x-tcg-implement-SET-CLOCK.patch [bz#1653569] +- kvm-s390x-tcg-rearm-the-CKC-timer-during-migration.patch [bz#1653569] +- kvm-s390x-tcg-fix-locking-problem-with-tcg_s390_tod_upda.patch [bz#1653569] +- kvm-hw-s390x-Include-the-tod-qemu-also-for-builds-with-d.patch [bz#1653569] +- kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch [bz#1653569] +- kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch [bz#1653569] +- kvm-migration-discard-non-migratable-RAMBlocks.patch [bz#1539285] +- kvm-vfio-pci-do-not-set-the-PCIDevice-has_rom-attribute.patch [bz#1539285] +- kvm-memory-exec-Expose-all-memory-block-related-flags.patch [bz#1539285] +- kvm-memory-exec-switch-file-ram-allocation-functions-to-.patch [bz#1539285] +- kvm-configure-add-libpmem-support.patch [bz#1539285] +- kvm-hostmem-file-add-the-pmem-option.patch [bz#1539285] +- kvm-mem-nvdimm-ensure-write-persistence-to-PMEM-in-label.patch [bz#1539285] +- kvm-migration-ram-Add-check-and-info-message-to-nvdimm-p.patch [bz#1539285] +- kvm-migration-ram-ensure-write-persistence-on-loading-al.patch [bz#1539285] +- Resolves: bz#1539285 + ([Intel 8.0 Bug] [KVM][Crystal Ridge] Lack of data persistence guarantee of QEMU writes to host PMEM) +- Resolves: bz#1653569 + (Stress guest and stop it, then do live migration, guest hit call trace on destination end) + +* Tue Jan 08 2019 Danilo Cesar Lemes de Paula - 2.12.0-53.el8 +- kvm-ui-add-qapi-parser-for-display.patch [bz#1652871] +- kvm-ui-switch-trivial-displays-to-qapi-parser.patch [bz#1652871] +- kvm-qapi-Add-rendernode-display-option-for-egl-headless.patch [bz#1652871] +- kvm-ui-Allow-specifying-rendernode-display-option-for-eg.patch [bz#1652871] +- kvm-qapi-add-query-display-options-command.patch [bz#1652871] +- Resolves: bz#1652871 + (QEMU doesn't expose rendernode option for egl-headless display type) + +* Fri Jan 04 2019 Danilo Cesar Lemes de Paula - 2.12.0-52.el8 +- kvm-Add-edk2-Requires-to-qemu-kvm.patch [bz#1654276] +- Resolves: bz#1654276 + (qemu-kvm: Should depend on the architecture-appropriate guest firmware) + +* Mon Dec 24 2018 Danilo Cesar Lemes de Paula - 2.12.0-51.el8 +- kvm-x86-host-phys-bits-limit-option.patch [bz#1598284] +- kvm-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch [bz#1598284] +- kvm-i386-do-not-migrate-MSR_SMI_COUNT-on-machine-types-2.patch [bz#1659565] +- kvm-pc-x-migrate-smi-count-to-PC_RHEL_COMPAT.patch [bz#1659565] +- kvm-slow-train-kvm-clear-out-KVM_ASYNC_PF_DELIVERY_AS_PF.patch [bz#1656829] +- Resolves: bz#1598284 + ([Intel 8.0 Alpha] physical bits should < 48 when host with 5level paging &EPT5 and qemu command with "-cpu qemu64" parameters.) +- Resolves: bz#1656829 + (8->7 migration failed: qemu-kvm: error: failed to set MSR 0x4b564d02 to 0x27fc13285) +- Resolves: bz#1659565 + (machine type: required compat flag x-migrate-smi-count=off) + +* Tue Dec 18 2018 Danilo Cesar Lemes de Paula - 2.12.0-51 +- kvm-Add-edk2-Requires-to-qemu-kvm.patch [bz#1654276] +- Resolves: bz#1654276 + (qemu-kvm: Should depend on the architecture-appropriate guest firmware) + +* Mon Dec 17 2018 Danilo Cesar Lemes de Paula - +- kvm-redhat-enable-tpmdev-passthrough.patch [bz#1654486] +- Resolves: bz#1654486 + ([RFE] enable TPM passthrough at compile time (qemu-kvm)) + +* Fri Dec 14 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-48 +- kvm-redhat-use-autopatch-instead-of-PATCHAPPLY.patch [bz#1613128] +- kvm-redhat-Removing-some-unused-build-flags-in-the-spec-.patch [bz#1613128] +- kvm-redhat-Fixing-rhev-ma-conflicts.patch [bz#1613126] +- kvm-redhat-Remove-_smp_mflags-cleanup-workaround-for-s39.patch [bz#1613128] +- kvm-redhat-Removing-dead-code-from-the-spec-file.patch [bz#1613128] +- kvm-i386-Add-stibp-flag-name.patch [bz#1639446] +- kvm-Add-functional-acceptance-tests-infrastructure.patch [bz#1655807] +- kvm-scripts-qemu.py-allow-adding-to-the-list-of-extra-ar.patch [bz#1655807] +- kvm-Acceptance-tests-add-quick-VNC-tests.patch [bz#1655807] +- kvm-scripts-qemu.py-introduce-set_console-method.patch [bz#1655807] +- kvm-Acceptance-tests-add-Linux-kernel-boot-and-console-c.patch [bz#1655807] +- kvm-Bootstrap-Python-venv-for-tests.patch [bz#1655807] +- kvm-Acceptance-tests-add-make-rule-for-running-them.patch [bz#1655807] +- Resolves: bz#1613126 + (Check and fix qemu-kvm-rhev and qemu-kvm-ma conflicts in qemu-kvm for rhel-8) +- Resolves: bz#1613128 + (Spec file clean up) +- Resolves: bz#1639446 + (Cross migration from RHEL7.5 to RHEL8 shouldn't fail with cpu flag stibp [qemu-kvm]) +- Resolves: bz#1655807 + (Backport avocado-qemu tests for QEMU 2.12) + +* Tue Dec 11 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-47 +- kvm-Disable-CONFIG_IPMI-and-CONFIG_I2C-for-ppc64.patch [bz#1640044] +- kvm-Disable-CONFIG_CAN_BUS-and-CONFIG_CAN_SJA1000.patch [bz#1640042] +- Resolves: bz#1640042 + (Disable CONFIG_CAN_BUS and CONFIG_CAN_SJA1000 config switches) +- Resolves: bz#1640044 + (Disable CONFIG_I2C and CONFIG_IPMI in default-configs/ppc64-softmmu.mak) + +* Tue Dec 11 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-46 +- kvm-qcow2-Give-the-refcount-cache-the-minimum-possible-s.patch [bz#1656507] +- kvm-docs-Document-the-new-default-sizes-of-the-qcow2-cac.patch [bz#1656507] +- kvm-qcow2-Fix-Coverity-warning-when-calculating-the-refc.patch [bz#1656507] +- kvm-include-Add-IEC-binary-prefixes-in-qemu-units.h.patch [bz#1656507] +- kvm-qcow2-Options-documentation-fixes.patch [bz#1656507] +- kvm-include-Add-a-lookup-table-of-sizes.patch [bz#1656507] +- kvm-qcow2-Make-sizes-more-humanly-readable.patch [bz#1656507] +- kvm-qcow2-Avoid-duplication-in-setting-the-refcount-cach.patch [bz#1656507] +- kvm-qcow2-Assign-the-L2-cache-relatively-to-the-image-si.patch [bz#1656507] +- kvm-qcow2-Increase-the-default-upper-limit-on-the-L2-cac.patch [bz#1656507] +- kvm-qcow2-Resize-the-cache-upon-image-resizing.patch [bz#1656507] +- kvm-qcow2-Set-the-default-cache-clean-interval-to-10-min.patch [bz#1656507] +- kvm-qcow2-Explicit-number-replaced-by-a-constant.patch [bz#1656507] +- kvm-block-backend-Set-werror-rerror-defaults-in-blk_new.patch [bz#1657637] +- kvm-qcow2-Fix-cache-clean-interval-documentation.patch [bz#1656507] +- Resolves: bz#1656507 + ([RHEL.8] qcow2 cache is too small) +- Resolves: bz#1657637 + (Wrong werror default for -device drive=) + +* Thu Dec 06 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-45 +- kvm-target-ppc-add-basic-support-for-PTCR-on-POWER9.patch [bz#1639069] +- kvm-linux-headers-Update-for-nested-KVM-HV-downstream-on.patch [bz#1639069] +- kvm-target-ppc-Add-one-reg-id-for-ptcr.patch [bz#1639069] +- kvm-ppc-spapr_caps-Add-SPAPR_CAP_NESTED_KVM_HV.patch [bz#1639069] +- kvm-Re-enable-CONFIG_HYPERV_TESTDEV.patch [bz#1651195] +- kvm-qxl-use-guest_monitor_config-for-local-renderer.patch [bz#1610163] +- kvm-Declare-cirrus-vga-as-deprecated.patch [bz#1651994] +- kvm-Do-not-build-bluetooth-support.patch [bz#1654651] +- kvm-vfio-helpers-Fix-qemu_vfio_open_pci-crash.patch [bz#1645840] +- kvm-balloon-Allow-multiple-inhibit-users.patch [bz#1650272] +- kvm-Use-inhibit-to-prevent-ballooning-without-synchr.patch [bz#1650272] +- kvm-vfio-Inhibit-ballooning-based-on-group-attachment-to.patch [bz#1650272] +- kvm-vfio-ccw-pci-Allow-devices-to-opt-in-for-ballooning.patch [bz#1650272] +- kvm-vfio-pci-Handle-subsystem-realpath-returning-NULL.patch [bz#1650272] +- kvm-vfio-pci-Fix-failure-to-close-file-descriptor-on-err.patch [bz#1650272] +- kvm-postcopy-Synchronize-usage-of-the-balloon-inhibitor.patch [bz#1650272] +- Resolves: bz#1610163 + (guest shows border blurred screen with some resolutions when qemu boot with -device qxl-vga ,and guest on rhel7.6 has no such question) +- Resolves: bz#1639069 + ([IBM 8.0 FEAT] POWER9 - Nested virtualization in RHEL8.0 KVM for ppc64le - qemu-kvm side) +- Resolves: bz#1645840 + (Qemu core dump when hotplug nvme:// drive via -blockdev) +- Resolves: bz#1650272 + (Ballooning is incompatible with vfio assigned devices, but not prevented) +- Resolves: bz#1651195 + (Re-enable hyperv-testdev device) +- Resolves: bz#1651994 + (Declare the "Cirrus VGA" device emulation of QEMU as deprecated in RHEL8) +- Resolves: bz#1654651 + (Qemu: hw: bt: keep bt/* objects from building [rhel-8.0]) + +* Tue Nov 27 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-44 +- kvm-block-Make-more-block-drivers-compile-time-configura.patch [bz#1598842 bz#1598842] +- kvm-RHEL8-Add-disable-configure-options-to-qemu-spec-fil.patch [bz#1598842] +- Resolves: bz#1598842 + (Compile out unused block drivers) + +* Mon Nov 26 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-43 +- kvm-configure-add-test-for-libudev.patch [bz#1636185] +- kvm-qga-linux-report-disk-serial-number.patch [bz#1636185] +- kvm-qga-linux-return-disk-device-in-guest-get-fsinfo.patch [bz#1636185] +- kvm-qemu-error-introduce-error-warn-_report_once.patch [bz#1625173] +- kvm-intel-iommu-start-to-use-error_report_once.patch [bz#1625173] +- kvm-intel-iommu-replace-more-vtd_err_-traces.patch [bz#1625173] +- kvm-intel_iommu-introduce-vtd_reset_caches.patch [bz#1625173] +- kvm-intel_iommu-better-handling-of-dmar-state-switch.patch [bz#1625173] +- kvm-intel_iommu-move-ce-fetching-out-when-sync-shadow.patch [bz#1625173 bz#1629616] +- kvm-intel_iommu-handle-invalid-ce-for-shadow-sync.patch [bz#1625173 bz#1629616] +- kvm-block-remove-bdrv_dirty_bitmap_make_anon.patch [bz#1518989] +- kvm-block-simplify-code-around-releasing-bitmaps.patch [bz#1518989] +- kvm-hbitmap-Add-advance-param-to-hbitmap_iter_next.patch [bz#1518989] +- kvm-test-hbitmap-Add-non-advancing-iter_next-tests.patch [bz#1518989] +- kvm-block-dirty-bitmap-Add-bdrv_dirty_iter_next_area.patch [bz#1518989] +- kvm-blockdev-backup-add-bitmap-argument.patch [bz#1518989] +- kvm-dirty-bitmap-switch-assert-fails-to-errors-in-bdrv_m.patch [bz#1518989] +- kvm-dirty-bitmap-rename-bdrv_undo_clear_dirty_bitmap.patch [bz#1518989] +- kvm-dirty-bitmap-make-it-possible-to-restore-bitmap-afte.patch [bz#1518989] +- kvm-blockdev-rename-block-dirty-bitmap-clear-transaction.patch [bz#1518989] +- kvm-qapi-add-transaction-support-for-x-block-dirty-bitma.patch [bz#1518989] +- kvm-block-dirty-bitmaps-add-user_locked-status-checker.patch [bz#1518989] +- kvm-block-dirty-bitmaps-fix-merge-permissions.patch [bz#1518989] +- kvm-block-dirty-bitmaps-allow-clear-on-disabled-bitmaps.patch [bz#1518989] +- kvm-block-dirty-bitmaps-prohibit-enable-disable-on-locke.patch [bz#1518989] +- kvm-block-backup-prohibit-backup-from-using-in-use-bitma.patch [bz#1518989] +- kvm-nbd-forbid-use-of-frozen-bitmaps.patch [bz#1518989] +- kvm-bitmap-Update-count-after-a-merge.patch [bz#1518989] +- kvm-iotests-169-drop-deprecated-autoload-parameter.patch [bz#1518989] +- kvm-block-qcow2-improve-error-message-in-qcow2_inactivat.patch [bz#1518989] +- kvm-bloc-qcow2-drop-dirty_bitmaps_loaded-state-variable.patch [bz#1518989] +- kvm-dirty-bitmaps-clean-up-bitmaps-loading-and-migration.patch [bz#1518989] +- kvm-iotests-improve-169.patch [bz#1518989] +- kvm-iotests-169-add-cases-for-source-vm-resuming.patch [bz#1518989] +- kvm-pc-dimm-turn-alignment-assert-into-check.patch [bz#1630116] +- Resolves: bz#1518989 + (RFE: QEMU Incremental live backup) +- Resolves: bz#1625173 + ([NVMe Device Assignment] Guest could not boot up with q35+iommu) +- Resolves: bz#1629616 + (boot guest with q35+vIOMMU+ device assignment, qemu terminal shows "qemu-kvm: VFIO_UNMAP_DMA: -22" when return assigned network devices from vfio driver to ixgbe in guest) +- Resolves: bz#1630116 + (pc_dimm_get_free_addr: assertion failed: (QEMU_ALIGN_UP(address_space_start, align) == address_space_start)) +- Resolves: bz#1636185 + ([RFE] Report disk device name and serial number (qemu-guest-agent on Linux)) + +* Mon Nov 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-42.el8 +- kvm-luks-Allow-share-rw-on.patch [bz#1629701] +- kvm-redhat-reenable-gluster-support.patch [bz#1599340] +- kvm-redhat-bump-libusb-requirement.patch [bz#1627970] +- Resolves: bz#1599340 + (Reenable glusterfs in qemu-kvm once BZ#1567292 gets fixed) +- Resolves: bz#1627970 + (symbol lookup error: /usr/libexec/qemu-kvm: undefined symbol: libusb_set_option) +- Resolves: bz#1629701 + ("share-rw=on" does not work for luks format image - Fast Train) + +* Tue Oct 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-41.el8 +- kvm-block-rbd-pull-out-qemu_rbd_convert_options.patch [bz#1635585] +- kvm-block-rbd-Attempt-to-parse-legacy-filenames.patch [bz#1635585] +- kvm-block-rbd-add-deprecation-documentation-for-filename.patch [bz#1635585] +- kvm-block-rbd-add-iotest-for-rbd-legacy-keyvalue-filenam.patch [bz#1635585] +- Resolves: bz#1635585 + (rbd json format of 7.6 is incompatible with 7.5) + +* Tue Oct 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-40.el8 +- kvm-vnc-call-sasl_server_init-only-when-required.patch [bz#1609327] +- kvm-nbd-server-fix-NBD_CMD_CACHE.patch [bz#1636142] +- kvm-nbd-fix-NBD_FLAG_SEND_CACHE-value.patch [bz#1636142] +- kvm-test-bdrv-drain-bdrv_drain-works-with-cross-AioConte.patch [bz#1637976] +- kvm-block-Use-bdrv_do_drain_begin-end-in-bdrv_drain_all.patch [bz#1637976] +- kvm-block-Remove-recursive-parameter-from-bdrv_drain_inv.patch [bz#1637976] +- kvm-block-Don-t-manually-poll-in-bdrv_drain_all.patch [bz#1637976] +- kvm-tests-test-bdrv-drain-bdrv_drain_all-works-in-corout.patch [bz#1637976] +- kvm-block-Avoid-unnecessary-aio_poll-in-AIO_WAIT_WHILE.patch [bz#1637976] +- kvm-block-Really-pause-block-jobs-on-drain.patch [bz#1637976] +- kvm-block-Remove-bdrv_drain_recurse.patch [bz#1637976] +- kvm-test-bdrv-drain-Add-test-for-node-deletion.patch [bz#1637976] +- kvm-block-Drain-recursively-with-a-single-BDRV_POLL_WHIL.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-node-deletion-in-subtree-recurs.patch [bz#1637976] +- kvm-block-Don-t-poll-in-parent-drain-callbacks.patch [bz#1637976] +- kvm-test-bdrv-drain-Graph-change-through-parent-callback.patch [bz#1637976] +- kvm-block-Defer-.bdrv_drain_begin-callback-to-polling-ph.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-that-bdrv_drain_invoke-doesn-t-.patch [bz#1637976] +- kvm-block-Allow-AIO_WAIT_WHILE-with-NULL-ctx.patch [bz#1637976] +- kvm-block-Move-bdrv_drain_all_begin-out-of-coroutine-con.patch [bz#1637976] +- kvm-block-ignore_bds_parents-parameter-for-drain-functio.patch [bz#1637976] +- kvm-block-Allow-graph-changes-in-bdrv_drain_all_begin-en.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-graph-changes-in-drain_all-sect.patch [bz#1637976] +- kvm-block-Poll-after-drain-on-attaching-a-node.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-bdrv_append-to-drained-node.patch [bz#1637976] +- kvm-block-linux-aio-acquire-AioContext-before-qemu_laio_.patch [bz#1637976] +- kvm-util-async-use-qemu_aio_coroutine_enter-in-co_schedu.patch [bz#1637976] +- kvm-job-Fix-nested-aio_poll-hanging-in-job_txn_apply.patch [bz#1637976] +- kvm-job-Fix-missing-locking-due-to-mismerge.patch [bz#1637976] +- kvm-blockjob-Wake-up-BDS-when-job-becomes-idle.patch [bz#1637976] +- kvm-aio-wait-Increase-num_waiters-even-in-home-thread.patch [bz#1637976] +- kvm-test-bdrv-drain-Drain-with-block-jobs-in-an-I-O-thre.patch [bz#1637976] +- kvm-test-blockjob-Acquire-AioContext-around-job_cancel_s.patch [bz#1637976] +- kvm-job-Use-AIO_WAIT_WHILE-in-job_finish_sync.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-AIO_WAIT_WHILE-in-completion-ca.patch [bz#1637976] +- kvm-block-Add-missing-locking-in-bdrv_co_drain_bh_cb.patch [bz#1637976] +- kvm-block-backend-Add-.drained_poll-callback.patch [bz#1637976] +- kvm-block-backend-Fix-potential-double-blk_delete.patch [bz#1637976] +- kvm-block-backend-Decrease-in_flight-only-after-callback.patch [bz#1637976] +- kvm-blockjob-Lie-better-in-child_job_drained_poll.patch [bz#1637976] +- kvm-block-Remove-aio_poll-in-bdrv_drain_poll-variants.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-nested-poll-in-bdrv_drain_poll_.patch [bz#1637976] +- kvm-job-Avoid-deadlocks-in-job_completed_txn_abort.patch [bz#1637976] +- kvm-test-bdrv-drain-AIO_WAIT_WHILE-in-job-.commit-.abort.patch [bz#1637976] +- kvm-test-bdrv-drain-Fix-outdated-comments.patch [bz#1637976] +- kvm-block-Use-a-single-global-AioWait.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-draining-job-source-child-and-p.patch [bz#1637976] +- kvm-qemu-img-Fix-assert-when-mapping-unaligned-raw-file.patch [bz#1639374] +- kvm-iotests-Add-test-221-to-catch-qemu-img-map-regressio.patch [bz#1639374] +- Resolves: bz#1609327 + (qemu-kvm[37046]: Could not find keytab file: /etc/qemu/krb5.tab: Unknown error 49408) +- Resolves: bz#1636142 + (qemu NBD_CMD_CACHE flaws impacting non-qemu NBD clients) +- Resolves: bz#1637976 + (Crashes and hangs with iothreads vs. block jobs) +- Resolves: bz#1639374 + (qemu-img map 'Aborted (core dumped)' when specifying a plain file) + +* Tue Oct 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-39.el8 +- kvm-linux-headers-update.patch [bz#1508142] +- kvm-s390x-cpumodel-Set-up-CPU-model-for-AP-device-suppor.patch [bz#1508142] +- kvm-s390x-kvm-enable-AP-instruction-interpretation-for-g.patch [bz#1508142] +- kvm-s390x-ap-base-Adjunct-Processor-AP-object-model.patch [bz#1508142] +- kvm-s390x-vfio-ap-Introduce-VFIO-AP-device.patch [bz#1508142] +- kvm-s390-doc-detailed-specifications-for-AP-virtualizati.patch [bz#1508142] +- Resolves: bz#1508142 + ([IBM 8.0 FEAT] KVM: Guest-dedicated Crypto Adapters - qemu part) + +* Mon Oct 15 2018 Danilo Cesar Lemes de Paula - 2.12.0-38.el8 +- kvm-Revert-hw-acpi-build-build-SRAT-memory-affinity-stru.patch [bz#1609235] +- kvm-add-udev-kvm-check.patch [bz#1552663] +- kvm-aio-posix-Don-t-count-ctx-notifier-as-progress-when-.patch [bz#1623085] +- kvm-aio-Do-aio_notify_accept-only-during-blocking-aio_po.patch [bz#1623085] +- kvm-aio-posix-fix-concurrent-access-to-poll_disable_cnt.patch [bz#1632622] +- kvm-aio-posix-compute-timeout-before-polling.patch [bz#1632622] +- kvm-aio-posix-do-skip-system-call-if-ctx-notifier-pollin.patch [bz#1632622] +- kvm-intel-iommu-send-PSI-always-even-if-across-PDEs.patch [bz#1450712] +- kvm-intel-iommu-remove-IntelIOMMUNotifierNode.patch [bz#1450712] +- kvm-intel-iommu-add-iommu-lock.patch [bz#1450712] +- kvm-intel-iommu-only-do-page-walk-for-MAP-notifiers.patch [bz#1450712] +- kvm-intel-iommu-introduce-vtd_page_walk_info.patch [bz#1450712] +- kvm-intel-iommu-pass-in-address-space-when-page-walk.patch [bz#1450712] +- kvm-intel-iommu-trace-domain-id-during-page-walk.patch [bz#1450712] +- kvm-util-implement-simple-iova-tree.patch [bz#1450712] +- kvm-intel-iommu-rework-the-page-walk-logic.patch [bz#1450712] +- kvm-i386-define-the-ssbd-CPUID-feature-bit-CVE-2018-3639.patch [bz#1633928] +- Resolves: bz#1450712 + (Booting nested guest with vIOMMU, the assigned network devices can not receive packets (qemu)) +- Resolves: bz#1552663 + (81-kvm-rhel.rules is no longer part of initscripts) +- Resolves: bz#1609235 + (Win2016 guest can't recognize pc-dimm hotplugged to node 0) +- Resolves: bz#1623085 + (VM doesn't boot from HD) +- Resolves: bz#1632622 + (~40% virtio_blk disk performance drop for win2012r2 guest when comparing qemu-kvm-rhev-2.12.0-9 with qemu-kvm-rhev-2.12.0-12) +- Resolves: bz#1633928 + (CVE-2018-3639 qemu-kvm: hw: cpu: speculative store bypass [rhel-8.0]) + +* Fri Oct 12 2018 Danilo Cesar Lemes de Paula - 2.12.0-37.el8 +- kvm-block-for-jobs-do-not-clear-user_paused-until-after-.patch [bz#1635583] +- kvm-iotests-Add-failure-matching-to-common.qemu.patch [bz#1635583] +- kvm-block-iotest-to-catch-abort-on-forced-blockjob-cance.patch [bz#1635583] +- Resolves: bz#1635583 + (Quitting VM causes qemu core dump once the block mirror job paused for no enough target space) + +* Fri Oct 12 2018 Danilo Cesar Lemes de Paula - 2.12.0-36.el8 +- kvm-check-Only-test-ivshm-when-it-is-compiled-in.patch [bz#1621817] +- kvm-Disable-ivshmem.patch [bz#1621817] +- kvm-mirror-Fail-gracefully-for-source-target.patch [bz#1637963] +- kvm-commit-Add-top-node-base-node-options.patch [bz#1637970] +- kvm-qemu-iotests-Test-commit-with-top-node-base-node.patch [bz#1637970] +- Resolves: bz#1621817 + (Disable IVSHMEM in RHEL 8) +- Resolves: bz#1637963 + (Segfault on 'blockdev-mirror' with same node as source and target) +- Resolves: bz#1637970 + (allow using node-names with block-commit) + +* Thu Oct 11 2018 Danilo Cesar Lemes de Paula - 2.12.0-35.el8 +- kvm-redhat-make-the-plugins-executable.patch [bz#1638304] +- Resolves: bz#1638304 + (the driver packages lack all the library Requires) + +* Thu Oct 11 2018 Danilo Cesar Lemes de Paula - 2.12.0-34.el8 +- kvm-seccomp-allow-sched_setscheduler-with-SCHED_IDLE-pol.patch [bz#1618356] +- kvm-seccomp-use-SIGSYS-signal-instead-of-killing-the-thr.patch [bz#1618356] +- kvm-seccomp-prefer-SCMP_ACT_KILL_PROCESS-if-available.patch [bz#1618356] +- kvm-configure-require-libseccomp-2.2.0.patch [bz#1618356] +- kvm-seccomp-set-the-seccomp-filter-to-all-threads.patch [bz#1618356] +- kvm-memory-cleanup-side-effects-of-memory_region_init_fo.patch [bz#1600365] +- Resolves: bz#1600365 + (QEMU core dumped when hotplug memory exceeding host hugepages and with discard-data=yes) +- Resolves: bz#1618356 + (qemu-kvm: Qemu: seccomp: blacklist is not applied to all threads [rhel-8]) + +* Fri Oct 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-33.el8 +- kvm-migration-postcopy-Clear-have_listen_thread.patch [bz#1608765] +- kvm-migration-cleanup-in-error-paths-in-loadvm.patch [bz#1608765] +- kvm-jobs-change-start-callback-to-run-callback.patch [bz#1632939] +- kvm-jobs-canonize-Error-object.patch [bz#1632939] +- kvm-jobs-add-exit-shim.patch [bz#1632939] +- kvm-block-commit-utilize-job_exit-shim.patch [bz#1632939] +- kvm-block-mirror-utilize-job_exit-shim.patch [bz#1632939] +- kvm-jobs-utilize-job_exit-shim.patch [bz#1632939] +- kvm-block-backup-make-function-variables-consistently-na.patch [bz#1632939] +- kvm-jobs-remove-ret-argument-to-job_completed-privatize-.patch [bz#1632939] +- kvm-jobs-remove-job_defer_to_main_loop.patch [bz#1632939] +- kvm-block-commit-add-block-job-creation-flags.patch [bz#1632939] +- kvm-block-mirror-add-block-job-creation-flags.patch [bz#1632939] +- kvm-block-stream-add-block-job-creation-flags.patch [bz#1632939] +- kvm-block-commit-refactor-commit-to-use-job-callbacks.patch [bz#1632939] +- kvm-block-mirror-don-t-install-backing-chain-on-abort.patch [bz#1632939] +- kvm-block-mirror-conservative-mirror_exit-refactor.patch [bz#1632939] +- kvm-block-stream-refactor-stream-to-use-job-callbacks.patch [bz#1632939] +- kvm-tests-blockjob-replace-Blockjob-with-Job.patch [bz#1632939] +- kvm-tests-test-blockjob-remove-exit-callback.patch [bz#1632939] +- kvm-tests-test-blockjob-txn-move-.exit-to-.clean.patch [bz#1632939] +- kvm-jobs-remove-.exit-callback.patch [bz#1632939] +- kvm-qapi-block-commit-expose-new-job-properties.patch [bz#1632939] +- kvm-qapi-block-mirror-expose-new-job-properties.patch [bz#1632939] +- kvm-qapi-block-stream-expose-new-job-properties.patch [bz#1632939] +- kvm-block-backup-qapi-documentation-fixup.patch [bz#1632939] +- kvm-blockdev-document-transactional-shortcomings.patch [bz#1632939] +- Resolves: bz#1608765 + (After postcopy migration, do savevm and loadvm, guest hang and call trace) +- Resolves: bz#1632939 + (qemu blockjobs other than backup do not support job-finalize or job-dismiss) + +* Fri Sep 28 2018 Danilo Cesar Lemes de Paula - 2.12.0-32.el8 +- kvm-Re-enable-disabled-Hyper-V-enlightenments.patch [bz#1625185] +- kvm-Fix-annocheck-issues.patch [bz#1624164] +- kvm-exec-check-that-alignment-is-a-power-of-two.patch [bz#1630746] +- kvm-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch [bz#1575925] +- Resolves: bz#1575925 + ("SSL: no alternative certificate subject name matches target host name" error even though sslverify = off) +- Resolves: bz#1624164 + (Review annocheck distro flag failures in qemu-kvm) +- Resolves: bz#1625185 + (Re-enable disabled Hyper-V enlightenments) +- Resolves: bz#1630746 + (qemu_ram_mmap: Assertion `is_power_of_2(align)' failed) + +* Tue Sep 11 2018 Danilo Cesar Lemes de Paula - 2.12.0-31.el8 +- kvm-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch [bz#1619804] +- kvm-redhat-enable-opengl-add-build-and-runtime-deps.patch [bz#1618412] +- Resolves: bz#1618412 + (Enable opengl (for intel vgpu display)) +- Resolves: bz#1619804 + (kernel panic in init_amd_cacheinfo) + +* Wed Sep 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-30.el8 +- kvm-redhat-Disable-vhost-crypto.patch [bz#1625668] +- Resolves: bz#1625668 + (Decide if we should disable 'vhost-crypto' or not) + +* Wed Sep 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-29.el8 +- kvm-target-i386-sev-fix-memory-leaks.patch [bz#1615717] +- kvm-i386-Fix-arch_query_cpu_model_expansion-leak.patch [bz#1615717] +- kvm-redhat-Update-build-configuration.patch [bz#1573156] +- Resolves: bz#1573156 + (Update build configure for QEMU 2.12.0) +- Resolves: bz#1615717 + (Memory leaks) + +* Tue Sep 04 2018 Danilo Cesar Lemes de Paula - 2.12.0-28.el8 +- kvm-e1000e-Do-not-auto-clear-ICR-bits-which-aren-t-set-i.patch [bz#1596024] +- kvm-e1000e-Prevent-MSI-MSI-X-storms.patch [bz#1596024] +- kvm-Drop-build_configure.sh-and-Makefile.local-files.patch [] +- kvm-Fix-subject-line-in-.gitpublish.patch [] +- Resolves: bz#1596024 + (The network link can't be detected on guest when the guest uses e1000e model type) + +* Wed Aug 29 2018 Danilo Cesar Lemes de Paula - 2.12.0-27.el8 +- kvm-Fix-libusb-1.0.22-deprecated-libusb_set_debug-with-l.patch [bz#1622656] +- Resolves: bz#1622656 + (qemu-kvm fails to build due to libusb_set_debug being deprecated) + +* Fri Aug 17 2018 Danilo Cesar Lemes de Paula - 2.12.0-26.el8 +- kvm-redhat-remove-extra-in-rhel_rhev_conflicts-macro.patch [bz#1618752] +- Resolves: bz#1618752 + (qemu-kvm can't be installed in RHEL-8 as it Conflicts with itself.) + +* Thu Aug 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-25.el8 +- kvm-Migration-TLS-Fix-crash-due-to-double-cleanup.patch [bz#1594384] +- Resolves: bz#1594384 + (2.12 migration fixes) + +* Tue Aug 14 2018 Danilo Cesar Lemes de Paula - 2.12.0-24.el8 +- kvm-Add-qemu-keymap-to-qemu-kvm-common.patch [bz#1593117] +- Resolves: bz#1593117 + (add qemu-keymap utility) + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-23.el8 +- Fixing an issue with some old command in the spec file + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-22.el8 +- Fix an issue with the build_configure script. +- Resolves: bz#1425820 + (Improve QEMU packaging layout with modularization of the block layer) + + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-20.el8 +- kvm-migration-stop-compressing-page-in-migration-thread.patch [bz#1594384] +- kvm-migration-stop-compression-to-allocate-and-free-memo.patch [bz#1594384] +- kvm-migration-stop-decompression-to-allocate-and-free-me.patch [bz#1594384] +- kvm-migration-detect-compression-and-decompression-error.patch [bz#1594384] +- kvm-migration-introduce-control_save_page.patch [bz#1594384] +- kvm-migration-move-some-code-to-ram_save_host_page.patch [bz#1594384] +- kvm-migration-move-calling-control_save_page-to-the-comm.patch [bz#1594384] +- kvm-migration-move-calling-save_zero_page-to-the-common-.patch [bz#1594384] +- kvm-migration-introduce-save_normal_page.patch [bz#1594384] +- kvm-migration-remove-ram_save_compressed_page.patch [bz#1594384] +- kvm-migration-block-dirty-bitmap-fix-memory-leak-in-dirt.patch [bz#1594384] +- kvm-migration-fix-saving-normal-page-even-if-it-s-been-c.patch [bz#1594384] +- kvm-migration-update-index-field-when-delete-or-qsort-RD.patch [bz#1594384] +- kvm-migration-introduce-decompress-error-check.patch [bz#1594384] +- kvm-migration-Don-t-activate-block-devices-if-using-S.patch [bz#1594384] +- kvm-migration-not-wait-RDMA_CM_EVENT_DISCONNECTED-event-.patch [bz#1594384] +- kvm-migration-block-dirty-bitmap-fix-dirty_bitmap_load.patch [bz#1594384] +- kvm-s390x-add-RHEL-7.6-machine-type-for-ccw.patch [bz#1595718] +- kvm-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch [bz#1595718] +- kvm-linux-headers-asm-s390-kvm.h-header-sync.patch [bz#1612938] +- kvm-s390x-kvm-add-etoken-facility.patch [bz#1612938] +- Resolves: bz#1594384 + (2.12 migration fixes) +- Resolves: bz#1595718 + (Add ppa15/bpb to the default cpu model for z196 and higher in the 7.6 s390-ccw-virtio machine) +- Resolves: bz#1612938 + (Add etoken support to qemu-kvm for s390x KVM guests) + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-18.el8 + Mass import from RHEL 7.6 qemu-kvm-rhev, including fixes to the following BZs: + +- kvm-AArch64-Add-virt-rhel7.6-machine-type.patch [bz#1558723] +- kvm-cpus-Fix-event-order-on-resume-of-stopped-guest.patch [bz#1566153] +- kvm-qemu-img-Check-post-truncation-size.patch [bz#1523065] +- kvm-vga-catch-depth-0.patch [bz#1575541] +- kvm-Fix-x-hv-max-vps-compat-value-for-7.4-machine-type.patch [bz#1583959] +- kvm-ccid-card-passthru-fix-regression-in-realize.patch [bz#1584984] +- kvm-Use-4-MB-vram-for-cirrus.patch [bz#1542080] +- kvm-spapr_pci-Remove-unhelpful-pagesize-warning.patch [bz#1505664] +- kvm-rpm-Add-nvme-VFIO-driver-to-rw-whitelist.patch [bz#1416180] +- kvm-qobject-Use-qobject_to-instead-of-type-cast.patch [bz#1557995] +- kvm-qobject-Ensure-base-is-at-offset-0.patch [bz#1557995] +- kvm-qobject-use-a-QObjectBase_-struct.patch [bz#1557995] +- kvm-qobject-Replace-qobject_incref-QINCREF-qobject_decre.patch [bz#1557995] +- kvm-qobject-Modify-qobject_ref-to-return-obj.patch [bz#1557995] +- kvm-rbd-Drop-deprecated-drive-parameter-filename.patch [bz#1557995] +- kvm-iscsi-Drop-deprecated-drive-parameter-filename.patch [bz#1557995] +- kvm-block-Add-block-specific-QDict-header.patch [bz#1557995] +- kvm-qobject-Move-block-specific-qdict-code-to-block-qdic.patch [bz#1557995] +- kvm-block-Fix-blockdev-for-certain-non-string-scalars.patch [bz#1557995] +- kvm-block-Fix-drive-for-certain-non-string-scalars.patch [bz#1557995] +- kvm-block-Clean-up-a-misuse-of-qobject_to-in-.bdrv_co_cr.patch [bz#1557995] +- kvm-block-Factor-out-qobject_input_visitor_new_flat_conf.patch [bz#1557995] +- kvm-block-Make-remaining-uses-of-qobject-input-visitor-m.patch [bz#1557995] +- kvm-block-qdict-Simplify-qdict_flatten_qdict.patch [bz#1557995] +- kvm-block-qdict-Tweak-qdict_flatten_qdict-qdict_flatten_.patch [bz#1557995] +- kvm-block-qdict-Clean-up-qdict_crumple-a-bit.patch [bz#1557995] +- kvm-block-qdict-Simplify-qdict_is_list-some.patch [bz#1557995] +- kvm-check-block-qdict-Rename-qdict_flatten-s-variables-f.patch [bz#1557995] +- kvm-check-block-qdict-Cover-flattening-of-empty-lists-an.patch [bz#1557995] +- kvm-block-Fix-blockdev-blockdev-add-for-empty-objects-an.patch [bz#1557995] +- kvm-rbd-New-parameter-auth-client-required.patch [bz#1557995] +- kvm-rbd-New-parameter-key-secret.patch [bz#1557995] +- kvm-block-mirror-honor-ratelimit-again.patch [bz#1572856] +- kvm-block-mirror-Make-cancel-always-cancel-pre-READY.patch [bz#1572856] +- kvm-iotests-Add-test-for-cancelling-a-mirror-job.patch [bz#1572856] +- kvm-iotests-Split-214-off-of-122.patch [bz#1518738] +- kvm-block-Add-COR-filter-driver.patch [bz#1518738] +- kvm-block-BLK_PERM_WRITE-includes-._UNCHANGED.patch [bz#1518738] +- kvm-block-Add-BDRV_REQ_WRITE_UNCHANGED-flag.patch [bz#1518738] +- kvm-block-Set-BDRV_REQ_WRITE_UNCHANGED-for-COR-writes.patch [bz#1518738] +- kvm-block-quorum-Support-BDRV_REQ_WRITE_UNCHANGED.patch [bz#1518738] +- kvm-block-Support-BDRV_REQ_WRITE_UNCHANGED-in-filters.patch [bz#1518738] +- kvm-iotests-Clean-up-wrap-image-in-197.patch [bz#1518738] +- kvm-iotests-Copy-197-for-COR-filter-driver.patch [bz#1518738] +- kvm-iotests-Add-test-for-COR-across-nodes.patch [bz#1518738] +- kvm-qemu-io-Use-purely-string-blockdev-options.patch [bz#1576598] +- kvm-qemu-img-Use-only-string-options-in-img_open_opts.patch [bz#1576598] +- kvm-iotests-Add-test-for-U-force-share-conflicts.patch [bz#1576598] +- kvm-qemu-io-Drop-command-functions-return-values.patch [bz#1519617] +- kvm-qemu-io-Let-command-functions-return-error-code.patch [bz#1519617] +- kvm-qemu-io-Exit-with-error-when-a-command-failed.patch [bz#1519617] +- kvm-iotests.py-Add-qemu_io_silent.patch [bz#1519617] +- kvm-iotests-Let-216-make-use-of-qemu-io-s-exit-code.patch [bz#1519617] +- kvm-qcow2-Repair-OFLAG_COPIED-when-fixing-leaks.patch [bz#1527085] +- kvm-iotests-Repairing-error-during-snapshot-deletion.patch [bz#1527085] +- kvm-block-Make-bdrv_is_writable-public.patch [bz#1588039] +- kvm-qcow2-Do-not-mark-inactive-images-corrupt.patch [bz#1588039] +- kvm-iotests-Add-case-for-a-corrupted-inactive-image.patch [bz#1588039] +- kvm-main-loop-drop-spin_counter.patch [bz#1168213] +- kvm-target-ppc-Factor-out-the-parsing-in-kvmppc_get_cpu_.patch [bz#1560847] +- kvm-target-ppc-Don-t-require-private-l1d-cache-on-POWER8.patch [bz#1560847] +- kvm-ppc-spapr_caps-Don-t-disable-cap_cfpc-on-POWER8-by-d.patch [bz#1560847] +- kvm-qxl-fix-local-renderer-crash.patch [bz#1567733] +- kvm-qemu-img-Amendment-support-implies-create_opts.patch [bz#1537956] +- kvm-block-Add-Error-parameter-to-bdrv_amend_options.patch [bz#1537956] +- kvm-qemu-option-Pull-out-Supported-options-print.patch [bz#1537956] +- kvm-qemu-img-Add-print_amend_option_help.patch [bz#1537956] +- kvm-qemu-img-Recognize-no-creation-support-in-o-help.patch [bz#1537956] +- kvm-iotests-Test-help-option-for-unsupporting-formats.patch [bz#1537956] +- kvm-iotests-Rework-113.patch [bz#1537956] +- kvm-qemu-img-Resolve-relative-backing-paths-in-rebase.patch [bz#1569835] +- kvm-iotests-Add-test-for-rebasing-with-relative-paths.patch [bz#1569835] +- kvm-qemu-img-Special-post-backing-convert-handling.patch [bz#1527898] +- kvm-iotests-Test-post-backing-convert-target-behavior.patch [bz#1527898] +- kvm-migration-calculate-expected_downtime-with-ram_bytes.patch [bz#1564576] +- kvm-sheepdog-Fix-sd_co_create_opts-memory-leaks.patch [bz#1513543] +- kvm-qemu-iotests-reduce-chance-of-races-in-185.patch [bz#1513543] +- kvm-blockjob-do-not-cancel-timer-in-resume.patch [bz#1513543] +- kvm-nfs-Fix-error-path-in-nfs_options_qdict_to_qapi.patch [bz#1513543] +- kvm-nfs-Remove-processed-options-from-QDict.patch [bz#1513543] +- kvm-blockjob-drop-block_job_pause-resume_all.patch [bz#1513543] +- kvm-blockjob-expose-error-string-via-query.patch [bz#1513543] +- kvm-blockjob-Fix-assertion-in-block_job_finalize.patch [bz#1513543] +- kvm-blockjob-Wrappers-for-progress-counter-access.patch [bz#1513543] +- kvm-blockjob-Move-RateLimit-to-BlockJob.patch [bz#1513543] +- kvm-blockjob-Implement-block_job_set_speed-centrally.patch [bz#1513543] +- kvm-blockjob-Introduce-block_job_ratelimit_get_delay.patch [bz#1513543] +- kvm-blockjob-Add-block_job_driver.patch [bz#1513543] +- kvm-blockjob-Update-block-job-pause-resume-documentation.patch [bz#1513543] +- kvm-blockjob-Improve-BlockJobInfo.offset-len-documentati.patch [bz#1513543] +- kvm-job-Create-Job-JobDriver-and-job_create.patch [bz#1513543] +- kvm-job-Rename-BlockJobType-into-JobType.patch [bz#1513543] +- kvm-job-Add-JobDriver.job_type.patch [bz#1513543] +- kvm-job-Add-job_delete.patch [bz#1513543] +- kvm-job-Maintain-a-list-of-all-jobs.patch [bz#1513543] +- kvm-job-Move-state-transitions-to-Job.patch [bz#1513543] +- kvm-job-Add-reference-counting.patch [bz#1513543] +- kvm-job-Move-cancelled-to-Job.patch [bz#1513543] +- kvm-job-Add-Job.aio_context.patch [bz#1513543] +- kvm-job-Move-defer_to_main_loop-to-Job.patch [bz#1513543] +- kvm-job-Move-coroutine-and-related-code-to-Job.patch [bz#1513543] +- kvm-job-Add-job_sleep_ns.patch [bz#1513543] +- kvm-job-Move-pause-resume-functions-to-Job.patch [bz#1513543] +- kvm-job-Replace-BlockJob.completed-with-job_is_completed.patch [bz#1513543] +- kvm-job-Move-BlockJobCreateFlags-to-Job.patch [bz#1513543] +- kvm-blockjob-Split-block_job_event_pending.patch [bz#1513543] +- kvm-job-Add-job_event_.patch [bz#1513543] +- kvm-job-Move-single-job-finalisation-to-Job.patch [bz#1513543] +- kvm-job-Convert-block_job_cancel_async-to-Job.patch [bz#1513543] +- kvm-job-Add-job_drain.patch [bz#1513543] +- kvm-job-Move-.complete-callback-to-Job.patch [bz#1513543] +- kvm-job-Move-job_finish_sync-to-Job.patch [bz#1513543] +- kvm-job-Switch-transactions-to-JobTxn.patch [bz#1513543] +- kvm-job-Move-transactions-to-Job.patch [bz#1513543] +- kvm-job-Move-completion-and-cancellation-to-Job.patch [bz#1513543] +- kvm-block-Cancel-job-in-bdrv_close_all-callers.patch [bz#1513543] +- kvm-job-Add-job_yield.patch [bz#1513543] +- kvm-job-Add-job_dismiss.patch [bz#1513543] +- kvm-job-Add-job_is_ready.patch [bz#1513543] +- kvm-job-Add-job_transition_to_ready.patch [bz#1513543] +- kvm-job-Move-progress-fields-to-Job.patch [bz#1513543] +- kvm-job-Introduce-qapi-job.json.patch [bz#1513543] +- kvm-job-Add-JOB_STATUS_CHANGE-QMP-event.patch [bz#1513543] +- kvm-job-Add-lifecycle-QMP-commands.patch [bz#1513543] +- kvm-job-Add-query-jobs-QMP-command.patch [bz#1513543] +- kvm-blockjob-Remove-BlockJob.driver.patch [bz#1513543] +- kvm-iotests-Move-qmp_to_opts-to-VM.patch [bz#1513543] +- kvm-qemu-iotests-Test-job-with-block-jobs.patch [bz#1513543] +- kvm-vdi-Fix-vdi_co_do_create-return-value.patch [bz#1513543] +- kvm-vhdx-Fix-vhdx_co_create-return-value.patch [bz#1513543] +- kvm-job-Add-error-message-for-failing-jobs.patch [bz#1513543] +- kvm-block-create-Make-x-blockdev-create-a-job.patch [bz#1513543] +- kvm-qemu-iotests-Add-VM.get_qmp_events_filtered.patch [bz#1513543] +- kvm-qemu-iotests-Add-VM.qmp_log.patch [bz#1513543] +- kvm-qemu-iotests-Add-iotests.img_info_log.patch [bz#1513543] +- kvm-qemu-iotests-Add-VM.run_job.patch [bz#1513543] +- kvm-qemu-iotests-iotests.py-helper-for-non-file-protocol.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-206-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-207-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-210-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-211-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-212-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-213-for-blockdev-create-job.patch [bz#1513543] +- kvm-block-create-Mark-blockdev-create-stable.patch [bz#1513543] +- kvm-jobs-fix-stale-wording.patch [bz#1513543] +- kvm-jobs-fix-verb-references-in-docs.patch [bz#1513543] +- kvm-iotests-Fix-219-s-timing.patch [bz#1513543] +- kvm-iotests-improve-pause_job.patch [bz#1513543] +- kvm-rpm-Whitelist-copy-on-read-block-driver.patch [bz#1518738] +- kvm-rpm-add-throttle-driver-to-rw-whitelist.patch [bz#1591076] +- kvm-usb-host-skip-open-on-pending-postload-bh.patch [bz#1572851] +- kvm-i386-Define-the-Virt-SSBD-MSR-and-handling-of-it-CVE.patch [bz#1574216] +- kvm-i386-define-the-AMD-virt-ssbd-CPUID-feature-bit-CVE-.patch [bz#1574216] +- kvm-block-file-posix-Pass-FD-to-locking-helpers.patch [bz#1519144] +- kvm-block-file-posix-File-locking-during-creation.patch [bz#1519144] +- kvm-iotests-Add-creation-test-to-153.patch [bz#1519144] +- kvm-vhost-user-add-Net-prefix-to-internal-state-structur.patch [bz#1526645] +- kvm-virtio-support-setting-memory-region-based-host-noti.patch [bz#1526645] +- kvm-vhost-user-support-receiving-file-descriptors-in-sla.patch [bz#1526645] +- kvm-osdep-add-wait.h-compat-macros.patch [bz#1526645] +- kvm-vhost-user-bridge-support-host-notifier.patch [bz#1526645] +- kvm-vhost-allow-backends-to-filter-memory-sections.patch [bz#1526645] +- kvm-vhost-user-allow-slave-to-send-fds-via-slave-channel.patch [bz#1526645] +- kvm-vhost-user-introduce-shared-vhost-user-state.patch [bz#1526645] +- kvm-vhost-user-support-registering-external-host-notifie.patch [bz#1526645] +- kvm-libvhost-user-support-host-notifier.patch [bz#1526645] +- kvm-block-Introduce-API-for-copy-offloading.patch [bz#1482537] +- kvm-raw-Check-byte-range-uniformly.patch [bz#1482537] +- kvm-raw-Implement-copy-offloading.patch [bz#1482537] +- kvm-qcow2-Implement-copy-offloading.patch [bz#1482537] +- kvm-file-posix-Implement-bdrv_co_copy_range.patch [bz#1482537] +- kvm-iscsi-Query-and-save-device-designator-when-opening.patch [bz#1482537] +- kvm-iscsi-Create-and-use-iscsi_co_wait_for_task.patch [bz#1482537] +- kvm-iscsi-Implement-copy-offloading.patch [bz#1482537] +- kvm-block-backend-Add-blk_co_copy_range.patch [bz#1482537] +- kvm-qemu-img-Convert-with-copy-offloading.patch [bz#1482537] +- kvm-qcow2-Fix-src_offset-in-copy-offloading.patch [bz#1482537] +- kvm-iscsi-Don-t-blindly-use-designator-length-in-respons.patch [bz#1482537] +- kvm-file-posix-Fix-EINTR-handling.patch [bz#1482537] +- kvm-usb-storage-Add-rerror-werror-properties.patch [bz#1595180] +- kvm-numa-clarify-error-message-when-node-index-is-out-of.patch [bz#1578381] +- kvm-qemu-iotests-Update-026.out.nocache-reference-output.patch [bz#1528541] +- kvm-qcow2-Free-allocated-clusters-on-write-error.patch [bz#1528541] +- kvm-qemu-iotests-Test-qcow2-not-leaking-clusters-on-writ.patch [bz#1528541] +- kvm-qemu-options-Add-missing-newline-to-accel-help-text.patch [bz#1586313] +- kvm-xhci-fix-guest-triggerable-assert.patch [bz#1594135] +- kvm-virtio-gpu-tweak-scanout-disable.patch [bz#1589634] +- kvm-virtio-gpu-update-old-resource-too.patch [bz#1589634] +- kvm-virtio-gpu-disable-scanout-when-backing-resource-is-.patch [bz#1589634] +- kvm-block-Don-t-silently-truncate-node-names.patch [bz#1549654] +- kvm-pr-helper-fix-socket-path-default-in-help.patch [bz#1533158] +- kvm-pr-helper-fix-assertion-failure-on-failed-multipath-.patch [bz#1533158] +- kvm-pr-manager-helper-avoid-SIGSEGV-when-writing-to-the-.patch [bz#1533158] +- kvm-pr-manager-put-stubs-in-.c-file.patch [bz#1533158] +- kvm-pr-manager-add-query-pr-managers-QMP-command.patch [bz#1533158] +- kvm-pr-manager-helper-report-event-on-connection-disconn.patch [bz#1533158] +- kvm-pr-helper-avoid-error-on-PR-IN-command-with-zero-req.patch [bz#1533158] +- kvm-pr-helper-Rework-socket-path-handling.patch [bz#1533158] +- kvm-pr-manager-helper-fix-memory-leak-on-event.patch [bz#1533158] +- kvm-object-fix-OBJ_PROP_LINK_UNREF_ON_RELEASE-ambivalenc.patch [bz#1556678] +- kvm-usb-hcd-xhci-test-add-a-test-for-ccid-hotplug.patch [bz#1556678] +- kvm-Revert-usb-release-the-created-buses.patch [bz#1556678] +- kvm-file-posix-Fix-creation-locking.patch [bz#1599335] +- kvm-file-posix-Unlock-FD-after-creation.patch [bz#1599335] +- kvm-ahci-trim-signatures-on-raise-lower.patch [bz#1584914] +- kvm-ahci-fix-PxCI-register-race.patch [bz#1584914] +- kvm-ahci-don-t-schedule-unnecessary-BH.patch [bz#1584914] +- kvm-qcow2-Fix-qcow2_truncate-error-return-value.patch [bz#1595173] +- kvm-block-Convert-.bdrv_truncate-callback-to-coroutine_f.patch [bz#1595173] +- kvm-qcow2-Remove-coroutine-trampoline-for-preallocate_co.patch [bz#1595173] +- kvm-block-Move-bdrv_truncate-implementation-to-io.c.patch [bz#1595173] +- kvm-block-Use-tracked-request-for-truncate.patch [bz#1595173] +- kvm-file-posix-Make-.bdrv_co_truncate-asynchronous.patch [bz#1595173] +- kvm-block-Fix-copy-on-read-crash-with-partial-final-clus.patch [bz#1590640] +- kvm-block-fix-QEMU-crash-with-scsi-hd-and-drive_del.patch [bz#1599515] +- kvm-virtio-rng-process-pending-requests-on-DRIVER_OK.patch [bz#1576743] +- kvm-file-posix-specify-expected-filetypes.patch [bz#1525829] +- kvm-iotests-add-test-226-for-file-driver-types.patch [bz#1525829] +- kvm-block-dirty-bitmap-add-lock-to-bdrv_enable-disable_d.patch [bz#1207657] +- kvm-qapi-add-x-block-dirty-bitmap-enable-disable.patch [bz#1207657] +- kvm-qmp-transaction-support-for-x-block-dirty-bitmap-ena.patch [bz#1207657] +- kvm-qapi-add-x-block-dirty-bitmap-merge.patch [bz#1207657] +- kvm-qapi-add-disabled-parameter-to-block-dirty-bitmap-ad.patch [bz#1207657] +- kvm-block-dirty-bitmap-add-bdrv_enable_dirty_bitmap_lock.patch [bz#1207657] +- kvm-dirty-bitmap-fix-double-lock-on-bitmap-enabling.patch [bz#1207657] +- kvm-block-qcow2-bitmap-fix-free_bitmap_clusters.patch [bz#1207657] +- kvm-qcow2-add-overlap-check-for-bitmap-directory.patch [bz#1207657] +- kvm-blockdev-enable-non-root-nodes-for-backup-source.patch [bz#1207657] +- kvm-iotests-add-222-to-test-basic-fleecing.patch [bz#1207657] +- kvm-qcow2-Remove-dead-check-on-ret.patch [bz#1207657] +- kvm-block-Move-request-tracking-to-children-in-copy-offl.patch [bz#1207657] +- kvm-block-Fix-parameter-checking-in-bdrv_co_copy_range_i.patch [bz#1207657] +- kvm-block-Honour-BDRV_REQ_NO_SERIALISING-in-copy-range.patch [bz#1207657] +- kvm-backup-Use-copy-offloading.patch [bz#1207657] +- kvm-block-backup-disable-copy-offloading-for-backup.patch [bz#1207657] +- kvm-iotests-222-Don-t-run-with-luks.patch [bz#1207657] +- kvm-block-io-fix-copy_range.patch [bz#1207657] +- kvm-block-split-flags-in-copy_range.patch [bz#1207657] +- kvm-block-add-BDRV_REQ_SERIALISING-flag.patch [bz#1207657] +- kvm-block-backup-fix-fleecing-scheme-use-serialized-writ.patch [bz#1207657] +- kvm-nbd-server-Reject-0-length-block-status-request.patch [bz#1207657] +- kvm-nbd-server-fix-trace.patch [bz#1207657] +- kvm-nbd-server-refactor-NBDExportMetaContexts.patch [bz#1207657] +- kvm-nbd-server-add-nbd_meta_empty_or_pattern-helper.patch [bz#1207657] +- kvm-nbd-server-implement-dirty-bitmap-export.patch [bz#1207657] +- kvm-qapi-new-qmp-command-nbd-server-add-bitmap.patch [bz#1207657] +- kvm-docs-interop-add-nbd.txt.patch [bz#1207657] +- kvm-nbd-server-introduce-NBD_CMD_CACHE.patch [bz#1207657] +- kvm-nbd-server-Silence-gcc-false-positive.patch [bz#1207657] +- kvm-nbd-server-Fix-dirty-bitmap-logic-regression.patch [bz#1207657] +- kvm-nbd-server-fix-nbd_co_send_block_status.patch [bz#1207657] +- kvm-nbd-client-Add-x-dirty-bitmap-to-query-bitmap-from-s.patch [bz#1207657] +- kvm-iotests-New-test-223-for-exporting-dirty-bitmap-over.patch [bz#1207657] +- kvm-hw-char-serial-Only-retry-if-qemu_chr_fe_write-retur.patch [bz#1592817] +- kvm-hw-char-serial-retry-write-if-EAGAIN.patch [bz#1592817] +- kvm-throttle-groups-fix-hang-when-group-member-leaves.patch [bz#1535914] +- kvm-Disable-aarch64-devices-reappeared-after-2.12-rebase.patch [bz#1586357] +- kvm-Disable-split-irq-device.patch [bz#1586357] +- kvm-Disable-AT24Cx-i2c-eeprom.patch [bz#1586357] +- kvm-Disable-CAN-bus-devices.patch [bz#1586357] +- kvm-Disable-new-superio-devices.patch [bz#1586357] +- kvm-Disable-new-pvrdma-device.patch [bz#1586357] +- kvm-qdev-add-HotplugHandler-post_plug-callback.patch [bz#1607891] +- kvm-virtio-scsi-fix-hotplug-reset-vs-event-race.patch [bz#1607891] +- kvm-e1000-Fix-tso_props-compat-for-82540em.patch [bz#1608778] +- kvm-slirp-correct-size-computation-while-concatenating-m.patch [bz#1586255] +- kvm-s390x-sclp-fix-maxram-calculation.patch [bz#1595740] +- kvm-redhat-Make-gitpublish-profile-the-default-one.patch [bz#1425820] +- Resolves: bz#1168213 + (main-loop: WARNING: I/O thread spun for 1000 iterations while doing stream block device.) +- Resolves: bz#1207657 + (RFE: QEMU Incremental live backup - push and pull modes) +- Resolves: bz#1416180 + (QEMU VFIO based block driver for NVMe devices) +- Resolves: bz#1425820 + (Improve QEMU packaging layout with modularization of the block layer) +- Resolves: bz#1482537 + ([RFE] qemu-img copy-offloading (convert command)) +- Resolves: bz#1505664 + ("qemu-kvm: System page size 0x1000000 is not enabled in page_size_mask (0x11000). Performance may be slow" show up while using hugepage as guest's memory) +- Resolves: bz#1513543 + ([RFE] Add block job to create format on a storage device) +- Resolves: bz#1518738 + (Add 'copy-on-read' filter driver for use with blockdev-add) +- Resolves: bz#1519144 + (qemu-img: image locking doesn't cover image creation) +- Resolves: bz#1519617 + (The exit code should be non-zero when qemu-io reports an error) +- Resolves: bz#1523065 + ("qemu-img resize" should fail to decrease the size of logical partition/lvm/iSCSI image with raw format) +- Resolves: bz#1525829 + (can not boot up a scsi-block passthrough disk via -blockdev with error "cannot get SG_IO version number: Operation not supported. Is this a SCSI device?") +- Resolves: bz#1526645 + ([Intel 7.6 FEAT] vHost Data Plane Acceleration (vDPA) - vhost user client - qemu-kvm-rhev) +- Resolves: bz#1527085 + (The copied flag should be updated during '-r leaks') +- Resolves: bz#1527898 + ([RFE] qemu-img should leave cluster unallocated if it's read as zero throughout the backing chain) +- Resolves: bz#1528541 + (qemu-img check reports tons of leaked clusters after re-start nfs service to resume writing data in guest) +- Resolves: bz#1533158 + (QEMU support for libvirtd restarting qemu-pr-helper) +- Resolves: bz#1535914 + (Disable io throttling for one member disk of a group during io will induce the other one hang with io) +- Resolves: bz#1537956 + (RFE: qemu-img amend should list the true supported options) +- Resolves: bz#1542080 + (Qemu core dump at cirrus_invalidate_region) +- Resolves: bz#1549654 + (Reject node-names which would be truncated by the block layer commands) +- Resolves: bz#1556678 + (Hot plug usb-ccid for the 2nd time with the same ID as the 1st time failed) +- Resolves: bz#1557995 + (QAPI schema for RBD storage misses the 'password-secret' option) +- Resolves: bz#1558723 + (Create RHEL-7.6 QEMU machine type for AArch64) +- Resolves: bz#1560847 + ([Power8][FW b0320a_1812.861][rhel7.5rc2 3.10.0-861.el7.ppc64le][qemu-kvm-{ma,rhev}-2.10.0-21.el7_5.1.ppc64le] KVM guest does not default to ori type flush even with pseries-rhel7.5.0-sxxm) +- Resolves: bz#1564576 + (Pegas 1.1 - Require to backport qemu-kvm patch that fixes expected_downtime calculation during migration) +- Resolves: bz#1566153 + (IOERROR pause code lost after resuming a VM while I/O error is still present) +- Resolves: bz#1567733 + (qemu abort when migrate during guest reboot) +- Resolves: bz#1569835 + (qemu-img get wrong backing file path after rebasing image with relative path) +- Resolves: bz#1572851 + (Core dumped after migration when with usb-host) +- Resolves: bz#1572856 + ('block-job-cancel' can not cancel a "drive-mirror" job) +- Resolves: bz#1574216 + (CVE-2018-3639 qemu-kvm-rhev: hw: cpu: speculative store bypass [rhel-7.6]) +- Resolves: bz#1575541 + (qemu core dump while installing win10 guest) +- Resolves: bz#1576598 + (Segfault in qemu-io and qemu-img with -U --image-opts force-share=off) +- Resolves: bz#1576743 + (virtio-rng hangs when running on recent (2.x) QEMU versions) +- Resolves: bz#1578381 + (Error message need update when specify numa distance with node index >=128) +- Resolves: bz#1583959 + (Incorrect vcpu count limit for 7.4 machine types for windows guests) +- Resolves: bz#1584914 + (SATA emulator lags and hangs) +- Resolves: bz#1584984 + (Vm starts failed with 'passthrough' smartcard) +- Resolves: bz#1586255 + (CVE-2018-11806 qemu-kvm-rhev: QEMU: slirp: heap buffer overflow while reassembling fragmented datagrams [rhel-7.6]) +- Resolves: bz#1586313 + (-smp option is not easily found in the output of qemu help) +- Resolves: bz#1586357 + (Disable new devices in 2.12) +- Resolves: bz#1588039 + (Possible assertion failure in qemu when a corrupted image is used during an incoming migration) +- Resolves: bz#1589634 + (Migration failed when rebooting guest with multiple virtio videos) +- Resolves: bz#1590640 + (qemu-kvm: block/io.c:1098: bdrv_co_do_copy_on_readv: Assertion `skip_bytes < pnum' failed.) +- Resolves: bz#1591076 + (The driver of 'throttle' is not whitelisted) +- Resolves: bz#1592817 + (Retrying on serial_xmit if the pipe is broken may compromise the Guest) +- Resolves: bz#1594135 + (system_reset many times linux guests cause qemu process Aborted) +- Resolves: bz#1595173 + (blockdev-create is blocking) +- Resolves: bz#1595180 + (Can't set rerror/werror with usb-storage) +- Resolves: bz#1595740 + (RHEL-Alt-7.6 - qemu has error during migration of larger guests) +- Resolves: bz#1599335 + (Image creation locking is too tight and is not properly released) +- Resolves: bz#1599515 + (qemu core-dump with aio_read via hmp (util/qemu-thread-posix.c:64: qemu_mutex_lock_impl: Assertion `mutex->initialized' failed)) +- Resolves: bz#1607891 + (Hotplug events are sometimes lost with virtio-scsi + iothread) +- Resolves: bz#1608778 + (qemu/migration: migrate failed from RHEL.7.6 to RHEL.7.5 with e1000-82540em) + +* Mon Aug 06 2018 Danilo Cesar Lemes de Paula - 2.12.0-17.el8 +- kvm-linux-headers-Update-to-include-KVM_CAP_S390_HPAGE_1.patch [bz#1610906] +- kvm-s390x-Enable-KVM-huge-page-backing-support.patch [bz#1610906] +- kvm-redhat-s390x-add-hpage-1-to-kvm.conf.patch [bz#1610906] +- Resolves: bz#1610906 + ([IBM 8.0 FEAT] KVM: Huge Pages - libhugetlbfs Enablement - qemu-kvm part) + +* Tue Jul 31 2018 Danilo Cesar Lemes de Paula - 2.12.0-16.el8 +- kvm-spapr-Correct-inverted-test-in-spapr_pc_dimm_node.patch [bz#1601671] +- kvm-osdep-powerpc64-align-memory-to-allow-2MB-radix-THP-.patch [bz#1601317] +- kvm-RHEL-8.0-Add-pseries-rhel7.6.0-sxxm-machine-type.patch [bz#1595501] +- kvm-i386-Helpers-to-encode-cache-information-consistentl.patch [bz#1597739] +- kvm-i386-Add-cache-information-in-X86CPUDefinition.patch [bz#1597739] +- kvm-i386-Initialize-cache-information-for-EPYC-family-pr.patch [bz#1597739] +- kvm-i386-Add-new-property-to-control-cache-info.patch [bz#1597739] +- kvm-i386-Clean-up-cache-CPUID-code.patch [bz#1597739] +- kvm-i386-Populate-AMD-Processor-Cache-Information-for-cp.patch [bz#1597739] +- kvm-i386-Add-support-for-CPUID_8000_001E-for-AMD.patch [bz#1597739] +- kvm-i386-Fix-up-the-Node-id-for-CPUID_8000_001E.patch [bz#1597739] +- kvm-i386-Enable-TOPOEXT-feature-on-AMD-EPYC-CPU.patch [bz#1597739] +- kvm-i386-Remove-generic-SMT-thread-check.patch [bz#1597739] +- kvm-i386-Allow-TOPOEXT-to-be-enabled-on-older-kernels.patch [bz#1597739] +- Resolves: bz#1595501 + (Create pseries-rhel7.6.0-sxxm machine type) +- Resolves: bz#1597739 + (AMD EPYC/Zen SMT support for KVM / QEMU guest (qemu-kvm)) +- Resolves: bz#1601317 + (RHEL8.0 - qemu patch to align memory to allow 2MB THP) +- Resolves: bz#1601671 + (After rebooting guest,all the hot plug memory will be assigned to the 1st numa node.) + +* Tue Jul 24 2018 Danilo Cesar Lemes de Paula - 2.12.0-15.el8 +- kvm-spapr-Add-ibm-max-associativity-domains-property.patch [bz#1599593] +- kvm-Revert-spapr-Don-t-allow-memory-hotplug-to-memory-le.patch [bz#1599593] +- kvm-simpletrace-Convert-name-from-mapping-record-to-str.patch [bz#1594969] +- kvm-tests-fix-TLS-handshake-failure-with-TLS-1.3.patch [bz#1602403] +- Resolves: bz#1594969 + (simpletrace.py fails when running with Python 3) +- Resolves: bz#1599593 + (User can't hotplug memory to less memory numa node on rhel8) +- Resolves: bz#1602403 + (test-crypto-tlssession unit test fails with assertions) + +* Mon Jul 09 2018 Danilo Cesar Lemes de Paula - 2.12.0-14.el8 +- kvm-vfio-pci-Default-display-option-to-off.patch [bz#1590511] +- kvm-python-futurize-f-libfuturize.fixes.fix_print_with_i.patch [bz#1571533] +- kvm-python-futurize-f-lib2to3.fixes.fix_except.patch [bz#1571533] +- kvm-Revert-Defining-a-shebang-for-python-scripts.patch [bz#1571533] +- kvm-spec-Fix-ambiguous-python-interpreter-name.patch [bz#1571533] +- kvm-qemu-ga-blacklisting-guest-exec-and-guest-exec-statu.patch [bz#1518132] +- kvm-redhat-rewrap-build_configure.sh-cmdline-for-the-rh-.patch +- kvm-redhat-remove-the-VTD-LIVE_BLOCK_OPS-and-RHV-options.patch +- kvm-redhat-fix-the-rh-env-prep-target-s-dependency-on-th.patch +- kvm-redhat-remove-dead-code-related-to-s390-not-s390x.patch +- kvm-redhat-sync-compiler-flags-from-the-spec-file-to-rh-.patch +- kvm-redhat-sync-guest-agent-enablement-and-tcmalloc-usag.patch +- kvm-redhat-fix-up-Python-3-dependency-for-building-QEMU.patch +- kvm-redhat-fix-up-Python-dependency-for-SRPM-generation.patch +- kvm-redhat-disable-glusterfs-dependency-support-temporar.patch +- Resolves: bz#1518132 + (Ensure file access RPCs are disabled by default) +- Resolves: bz#1571533 + (Convert qemu-kvm python scripts to python3) +- Resolves: bz#1590511 + (Fails to start guest with Intel vGPU device) + +* Thu Jun 21 2018 Danilo C. L. de Paula - 2.12.0-13.el8 +- Resolves: bz#1508137 + ([IBM 8.0 FEAT] KVM: Interactive Bootloader (qemu)) +- Resolves: bz#1513558 + (Remove RHEL6 machine types) +- Resolves: bz#1568600 + (pc-i440fx-rhel7.6.0 and pc-q35-rhel7.6.0 machine types (x86)) +- Resolves: bz#1570029 + ([IBM 8.0 FEAT] KVM: 3270 Connectivity - qemu part) +- Resolves: bz#1578855 + (Enable Native Ceph support on non x86_64 CPUs) +- Resolves: bz#1585651 + (RHEL 7.6 new pseries machine type (ppc64le)) +- Resolves: bz#1592337 + ([IBM 8.0 FEAT] KVM: CPU Model z14 ZR1 (qemu-kvm)) + +* Tue May 15 2018 Danilo C. L. de Paula - 2.12.0-11.el8.1 +- Resolves: bz#1576468 + (Enable vhost_user in qemu-kvm 2.12) + +* Wed May 09 2018 Danilo de Paula - 2.12.0-11.el8 +- Resolves: bz#1574406 + ([RHEL 8][qemu-kvm] Failed to find romfile "efi-virtio.rom") +- Resolves: bz#1569675 + (Backwards compatibility of pc-*-rhel7.5.0 and older machine-types) +- Resolves: bz#1576045 + (Fix build issue by using python3) +- Resolves: bz#1571145 + (qemu-kvm segfaults on RHEL 8 when run guestfsd under TCG) + +* Fri Apr 20 2018 Danilo de Paula - 2.12.0-10.el +- Fixing some issues with packaging. +- Rebasing to 2.12.0-rc4 + +* Fri Apr 13 2018 Danilo de Paula - 2.11.0-7.el8 +- Bumping epoch for RHEL8 and dropping self-obsoleting + +* Thu Apr 12 2018 Danilo de Paula - 2.11.0-6.el8 +- Rebuilding + +* Mon Mar 05 2018 Danilo de Paula - 2.11.0-5.el8 +- Prepare building on RHEL-8.0